208 files changed, 76067 insertions, 0 deletions
diff --git a/third_party/aom/test/accounting_test.cc b/third_party/aom/test/accounting_test.cc
new file mode 100644
index 0000000000..033499d13b
--- /dev/null
+++ b/third_party/aom/test/accounting_test.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/bitreader.h"
+#include "aom_dsp/bitwriter.h"
+
+using libaom_test::ACMRandom;
+
+TEST(AV1, TestAccounting) {
+  const int kBufferSize = 10000;
+  const int kSymbols = 1024;
+  aom_writer bw;
+  uint8_t bw_buffer[kBufferSize];
+  aom_start_encode(&bw, bw_buffer);
+  for (int i = 0; i < kSymbols; i++) {
+    aom_write(&bw, 0, 32);
+    aom_write(&bw, 0, 32);
+    aom_write(&bw, 0, 32);
+  }
+  GTEST_ASSERT_GE(aom_stop_encode(&bw), 0);
+  aom_reader br;
+  aom_reader_init(&br, bw_buffer, bw.pos);
+
+  Accounting accounting;
+  aom_accounting_init(&accounting);
+  br.accounting = &accounting;
+  for (int i = 0; i < kSymbols; i++) {
+    aom_read(&br, 32, "A");
+  }
+  // Consecutive symbols that are the same are coalesced.
+  GTEST_ASSERT_EQ(accounting.syms.num_syms, 1);
+  GTEST_ASSERT_EQ(accounting.syms.syms[0].samples, (unsigned int)kSymbols);
+
+  aom_accounting_reset(&accounting);
+  GTEST_ASSERT_EQ(accounting.syms.num_syms, 0);
+
+  // Should record 2 * kSymbols accounting symbols.
+  aom_reader_init(&br, bw_buffer, bw.pos);
+  br.accounting = &accounting;
+  for (int i = 0; i < kSymbols; i++) {
+    aom_read(&br, 32, "A");
+    aom_read(&br, 32, "B");
+    aom_read(&br, 32, "B");
+  }
+  GTEST_ASSERT_EQ(accounting.syms.num_syms, kSymbols * 2);
+  uint32_t tell_frac = aom_reader_tell_frac(&br);
+  for (int i = 0; i < accounting.syms.num_syms; i++) {
+    tell_frac -= accounting.syms.syms[i].bits;
+  }
+  GTEST_ASSERT_EQ(tell_frac, 0U);
+
+  GTEST_ASSERT_EQ(aom_accounting_dictionary_lookup(&accounting, "A"),
+                  aom_accounting_dictionary_lookup(&accounting, "A"));
+
+  // Check for collisions. The current aom_accounting_hash function returns
+  // the same hash code for AB and BA.
+  GTEST_ASSERT_NE(aom_accounting_dictionary_lookup(&accounting, "AB"),
+                  aom_accounting_dictionary_lookup(&accounting, "BA"));
+}
diff --git a/third_party/aom/test/acm_random.h b/third_party/aom/test/acm_random.h
new file mode 100644
index 0000000000..15e8c9cc2e
--- /dev/null
+++ b/third_party/aom/test/acm_random.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_TEST_ACM_RANDOM_H_
+#define AOM_TEST_ACM_RANDOM_H_
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "aom/aom_integer.h"
+
+namespace libaom_test {
+
+class ACMRandom {
+ public:
+  ACMRandom() : random_(DeterministicSeed()) {}
+
+  explicit ACMRandom(int seed) : random_(seed) {}
+
+  void Reset(int seed) { random_.Reseed(seed); }
+
+  // Generates a random 31-bit unsigned integer from [0, 2^31).
+  uint32_t Rand31() {
+    return random_.Generate(testing::internal::Random::kMaxRange);
+  }
+
+  uint16_t Rand16() {
+    const uint32_t value =
+        random_.Generate(testing::internal::Random::kMaxRange);
+    // There's a bit more entropy in the upper bits of this implementation.
+    return (value >> 15) & 0xffff;
+  }
+
+  int16_t Rand16Signed() { return static_cast<int16_t>(Rand16()); }
+
+  int16_t Rand15() {
+    const uint32_t value =
+        random_.Generate(testing::internal::Random::kMaxRange);
+    // There's a bit more entropy in the upper bits of this implementation.
+    return (value >> 16) & 0x7fff;
+  }
+
+  int16_t Rand15Signed() {
+    // Use 15 bits: values between 16383 (0x3FFF) and -16384 (0xC000).
+    return static_cast<int16_t>(Rand15()) - (1 << 14);
+  }
+
+  uint16_t Rand12() {
+    const uint32_t value =
+        random_.Generate(testing::internal::Random::kMaxRange);
+    // There's a bit more entropy in the upper bits of this implementation.
+    return (value >> 19) & 0xfff;
+  }
+
+  uint8_t Rand8() {
+    const uint32_t value =
+        random_.Generate(testing::internal::Random::kMaxRange);
+    // There's a bit more entropy in the upper bits of this implementation.
+    return (value >> 23) & 0xff;
+  }
+
+  uint8_t Rand8Extremes() {
+    // Returns a random value near 0 or near 255, to better exercise
+    // saturation behavior.
+    const uint8_t r = Rand8();
+    return static_cast<uint8_t>((r < 128) ? r << 4 : r >> 4);
+  }
+
+  int PseudoUniform(int range) { return random_.Generate(range); }
+
+  int operator()(int n) { return PseudoUniform(n); }
+
+  static int DeterministicSeed() { return 0xbaba; }
+
+ private:
+  testing::internal::Random random_;
+};
+
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_ACM_RANDOM_H_
diff --git a/third_party/aom/test/active_map_test.cc b/third_party/aom/test/active_map_test.cc
new file mode 100644
index 0000000000..979ee6b8b3
--- /dev/null
+++ b/third_party/aom/test/active_map_test.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <climits>
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+class ActiveMapTest
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  static const int kWidth = 208;
+  static const int kHeight = 144;
+
+  ActiveMapTest() : EncoderTest(GET_PARAM(0)) {}
+  ~ActiveMapTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(GET_PARAM(1));
+    cpu_used_ = GET_PARAM(2);
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AV1E_SET_ALLOW_WARPED_MOTION, 0);
+      encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
+      encoder->Control(AV1E_SET_ENABLE_OBMC, 0);
+    } else if (video->frame() == 3) {
+      aom_active_map_t map = aom_active_map_t();
+      /* clang-format off */
+      uint8_t active_map[9 * 13] = {
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+        0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1,
+        0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
+        0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1,
+        0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1,
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+      };
+      /* clang-format on */
+      map.cols = (kWidth + 15) / 16;
+      map.rows = (kHeight + 15) / 16;
+      ASSERT_EQ(map.cols, 13u);
+      ASSERT_EQ(map.rows, 9u);
+      map.active_map = active_map;
+      encoder->Control(AOME_SET_ACTIVEMAP, &map);
+    } else if (video->frame() == 15) {
+      aom_active_map_t map = aom_active_map_t();
+      map.cols = (kWidth + 15) / 16;
+      map.rows = (kHeight + 15) / 16;
+      map.active_map = nullptr;
+      encoder->Control(AOME_SET_ACTIVEMAP, &map);
+    }
+  }
+
+  void DoTest() {
+    // Validate that this non multiple of 64 wide clip encodes
+    cfg_.g_lag_in_frames = 0;
+    cfg_.rc_target_bitrate = 400;
+    cfg_.rc_resize_mode = 0;
+    cfg_.g_pass = AOM_RC_ONE_PASS;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.kf_max_dist = 90000;
+    ::libaom_test::I420VideoSource video("hantro_odd.yuv", kWidth, kHeight, 30,
+                                         1, 0, 20);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  int cpu_used_;
+};
+
+TEST_P(ActiveMapTest, Test) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_SUITE(ActiveMapTest,
+                           ::testing::Values(::libaom_test::kRealTime),
+                           ::testing::Range(5, 9));
+
+}  // namespace
diff --git a/third_party/aom/test/allintra_end_to_end_test.cc b/third_party/aom/test/allintra_end_to_end_test.cc
new file mode 100644
index 0000000000..8ec24aa686
--- /dev/null
+++ b/third_party/aom/test/allintra_end_to_end_test.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+#include <ostream>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+
+const unsigned int kFrames = 20;
+const int kBitrate = 500;
+typedef struct {
+  const char *filename;
+  unsigned int input_bit_depth;
+  aom_img_fmt fmt;
+  aom_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) {
+  return os << "TestVideoParam { filename:" << test_arg.filename
+            << " input_bit_depth:" << test_arg.input_bit_depth
+            << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth
+            << " profile:" << test_arg.profile << " }";
+}
+
+const TestVideoParam kTestVectors[] = {
+  { "niklas_1280_720_30.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
+  { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
+};
+
+// Params: test video, speed, aq mode, threads, tile columns.
+class AllIntraEndToEndTest
+    : public ::libaom_test::CodecTestWith6Params<TestVideoParam, int, int, int,
+                                                 int, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  AllIntraEndToEndTest()
+      : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)),
+        cpu_used_(GET_PARAM(2)), psnr_(0.0), nframes_(0),
+        deltaq_mode_(GET_PARAM(3)), threads_(GET_PARAM(4)),
+        tile_columns_(GET_PARAM(5)), enable_tx_size_search_(GET_PARAM(6)) {}
+
+  ~AllIntraEndToEndTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(::libaom_test::kAllIntra);
+    cfg_.g_threads = threads_;
+  }
+
+  void BeginPassHook(unsigned int) override {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_ROW_MT, 1);
+      encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, tile_columns_);
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AV1E_SET_DELTAQ_MODE, deltaq_mode_);
+      encoder->Control(AV1E_SET_ENABLE_TX_SIZE_SEARCH, enable_tx_size_search_);
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  void DoTest() {
+    cfg_.rc_target_bitrate = kBitrate;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_profile = test_video_param_.profile;
+    cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+    cfg_.g_bit_depth = test_video_param_.bit_depth;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+    std::unique_ptr<libaom_test::VideoSource> video;
+    if (is_extension_y4m(test_video_param_.filename))
+      video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                  kFrames));
+    else
+      video.reset(new libaom_test::YUVVideoSource(test_video_param_.filename,
+                                                  test_video_param_.fmt, 352,
+                                                  288, 30, 1, 0, kFrames));
+    ASSERT_NE(video, nullptr);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+  }
+
+  TestVideoParam test_video_param_;
+  int cpu_used_;
+
+ private:
+  double psnr_;
+  unsigned int nframes_;
+  unsigned int deltaq_mode_;
+  int threads_;
+  int tile_columns_;
+  int enable_tx_size_search_;
+};
+
+TEST_P(AllIntraEndToEndTest, EndToEndNoFailure) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_SUITE(AllIntraEndToEndTest,
+                           ::testing::ValuesIn(kTestVectors),
+                           ::testing::Range(5, 9), ::testing::Range(0, 4),
+                           ::testing::Values(1), ::testing::Values(1),
+                           ::testing::Values(0, 1));
+
+INSTANTIATE_TEST_SUITE_P(
+    AV1MultiThreaded, AllIntraEndToEndTest,
+    ::testing::Combine(
+        ::testing::Values(
+            static_cast<const libaom_test::CodecFactory *>(&libaom_test::kAV1)),
+        ::testing::ValuesIn(kTestVectors), ::testing::Range(5, 9),
+        ::testing::Range(0, 4), ::testing::Values(6), ::testing::Values(1),
+        ::testing::Values(0, 1)));
+
+}  // namespace
diff --git a/third_party/aom/test/altref_test.cc b/third_party/aom/test/altref_test.cc
new file mode 100644
index 0000000000..081123cbe4
--- /dev/null
+++ b/third_party/aom/test/altref_test.cc
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+namespace {
+typedef struct {
+  const unsigned int min_kf_dist;
+  const unsigned int max_kf_dist;
+  const unsigned int min_gf_interval;
+  const unsigned int max_gf_interval;
+  const unsigned int lag_in_frames;
+  libaom_test::TestMode encoding_mode;
+} AltRefTestParams;
+
+static const AltRefTestParams TestParams[] = {
+  { 0, 10, 4, 8, 10, ::libaom_test::kOnePassGood },
+  { 0, 30, 8, 12, 16, ::libaom_test::kOnePassGood },
+  { 30, 30, 12, 16, 25, ::libaom_test::kOnePassGood },
+  { 0, 60, 12, 20, 25, ::libaom_test::kOnePassGood },
+  { 60, 60, 16, 28, 30, ::libaom_test::kOnePassGood },
+  { 0, 100, 16, 32, 35, ::libaom_test::kOnePassGood },
+  { 0, 10, 4, 8, 10, ::libaom_test::kTwoPassGood },
+  { 0, 30, 8, 12, 16, ::libaom_test::kTwoPassGood },
+  { 30, 30, 12, 16, 25, ::libaom_test::kTwoPassGood },
+  { 0, 60, 16, 24, 25, ::libaom_test::kTwoPassGood },
+  { 60, 60, 20, 28, 30, ::libaom_test::kTwoPassGood },
+  { 0, 100, 24, 32, 35, ::libaom_test::kTwoPassGood },
+};
+
+std::ostream &operator<<(std::ostream &os, const AltRefTestParams &test_arg) {
+  return os << "AltRefTestParams { min_kf_dist:" << test_arg.min_kf_dist
+            << " max_kf_dist:" << test_arg.max_kf_dist
+            << " min_gf_interval:" << test_arg.min_gf_interval
+            << " max_gf_interval:" << test_arg.max_gf_interval
+            << " lag_in_frames:" << test_arg.lag_in_frames
+            << " encoding_mode:" << test_arg.encoding_mode << " }";
+}
+
+// This class is used to check the presence of altref frame.
+class AltRefFramePresenceTestLarge
+    : public ::libaom_test::CodecTestWith2Params<AltRefTestParams, aom_rc_mode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  AltRefFramePresenceTestLarge()
+      : EncoderTest(GET_PARAM(0)), altref_test_params_(GET_PARAM(1)),
+        rc_end_usage_(GET_PARAM(2)) {
+    is_arf_frame_present_ = 0;
+  }
+  ~AltRefFramePresenceTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(altref_test_params_.encoding_mode);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = rc_end_usage_;
+    cfg_.g_threads = 1;
+    cfg_.kf_min_dist = altref_test_params_.min_kf_dist;
+    cfg_.kf_max_dist = altref_test_params_.max_kf_dist;
+    cfg_.g_lag_in_frames = altref_test_params_.lag_in_frames;
+  }
+
+  bool DoDecode() const override { return true; }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 5);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AV1E_SET_MIN_GF_INTERVAL,
+                       altref_test_params_.min_gf_interval);
+      encoder->Control(AV1E_SET_MAX_GF_INTERVAL,
+                       altref_test_params_.max_gf_interval);
+    }
+  }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    if (is_arf_frame_present_ != 1 && AOM_CODEC_OK == res_dec) {
+      aom_codec_ctx_t *ctx_dec = decoder->GetDecoder();
+      AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_ALTREF_PRESENT,
+                                    &is_arf_frame_present_);
+    }
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  const AltRefTestParams altref_test_params_;
+  int is_arf_frame_present_;
+  aom_rc_mode rc_end_usage_;
+};
+
+TEST_P(AltRefFramePresenceTestLarge, AltRefFrameEncodePresenceTest) {
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 100);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(is_arf_frame_present_, 1);
+}
+
+AV1_INSTANTIATE_TEST_SUITE(AltRefFramePresenceTestLarge,
+                           ::testing::ValuesIn(TestParams),
+                           ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ));
+
+typedef struct {
+  const ::libaom_test::TestMode encoding_mode;
+  const unsigned int min_gf_interval;
+  const unsigned int max_gf_interval;
+} gfIntervalParam;
+
+const gfIntervalParam gfTestParams[] = {
+  // single pass
+  { ::libaom_test::kOnePassGood, 0, 6 },
+  { ::libaom_test::kOnePassGood, 0, 8 },
+  { ::libaom_test::kOnePassGood, 5, 10 },
+  { ::libaom_test::kOnePassGood, 8, 16 },
+  { ::libaom_test::kOnePassGood, 16, 16 },
+
+  // two pass
+  { ::libaom_test::kTwoPassGood, 0, 6 },
+  { ::libaom_test::kTwoPassGood, 0, 8 },
+  { ::libaom_test::kTwoPassGood, 5, 10 },
+  { ::libaom_test::kTwoPassGood, 8, 16 },
+  { ::libaom_test::kTwoPassGood, 16, 32 },
+  { ::libaom_test::kTwoPassGood, 20, 32 },
+};
+
+// This class is used to test if the gf interval bounds configured by the user
+// are respected by the encoder.
+class GoldenFrameIntervalTestLarge
+    : public ::libaom_test::CodecTestWith2Params<gfIntervalParam, aom_rc_mode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  GoldenFrameIntervalTestLarge()
+      : EncoderTest(GET_PARAM(0)), gf_interval_param_(GET_PARAM(1)),
+        rc_end_usage_(GET_PARAM(2)) {
+    baseline_gf_interval_ = -1;
+    limit_ = 60;
+    frame_num_ = 0;
+  }
+  ~GoldenFrameIntervalTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(gf_interval_param_.encoding_mode);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = rc_end_usage_;
+    cfg_.g_threads = 1;
+    // kf_min_dist is equal to kf_max_dist to make sure that there are no scene
+    // cuts due to which the min_gf_interval may not be respected.
+    cfg_.kf_min_dist = limit_;
+    cfg_.kf_max_dist = limit_;
+    cfg_.g_limit = limit_;
+    cfg_.g_lag_in_frames = 35;
+    cfg_.rc_target_bitrate = 1000;
+  }
+
+  bool DoDecode() const override { return true; }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 5);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AV1E_SET_MIN_GF_INTERVAL,
+                       gf_interval_param_.min_gf_interval);
+      encoder->Control(AV1E_SET_MAX_GF_INTERVAL,
+                       gf_interval_param_.max_gf_interval);
+    }
+    if (frame_num_ > 0) {
+      encoder->Control(AV1E_GET_BASELINE_GF_INTERVAL, &baseline_gf_interval_);
+      ASSERT_LE(baseline_gf_interval_,
+                (int)gf_interval_param_.max_gf_interval + 1);
+      if ((frame_num_ + (int)gf_interval_param_.min_gf_interval) <= limit_) {
+        ASSERT_GE(baseline_gf_interval_,
+                  (int)gf_interval_param_.min_gf_interval);
+      }
+    }
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    (void)pkt;
+    ++frame_num_;
+  }
+
+  const gfIntervalParam gf_interval_param_;
+  int baseline_gf_interval_;
+  int limit_;
+  int frame_num_;
+  aom_rc_mode rc_end_usage_;
+};
+
+TEST_P(GoldenFrameIntervalTestLarge, GoldenFrameIntervalTest) {
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, limit_);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+AV1_INSTANTIATE_TEST_SUITE(GoldenFrameIntervalTestLarge,
+                           ::testing::ValuesIn(gfTestParams),
+                           ::testing::Values(AOM_Q, AOM_VBR, AOM_CQ, AOM_CBR));
+
+}  // namespace
diff --git a/third_party/aom/test/aom_image_test.cc b/third_party/aom/test/aom_image_test.cc
new file mode 100644
index 0000000000..ad48e73e3d
--- /dev/null
+++ b/third_party/aom/test/aom_image_test.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "aom/aom_image.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+TEST(AomImageTest, AomImgWrapInvalidAlign) {
+  const int kWidth = 128;
+  const int kHeight = 128;
+  unsigned char buf[kWidth * kHeight * 3];
+
+  aom_image_t img;
+  // Set img_data and img_data_owner to junk values. aom_img_wrap() should
+  // not read these values on failure.
+  img.img_data = (unsigned char *)"";
+  img.img_data_owner = 1;
+
+  aom_img_fmt_t format = AOM_IMG_FMT_I444;
+  // 'align' must be a power of 2 but is not. This causes the aom_img_wrap()
+  // call to fail. The test verifies we do not read the junk values in 'img'.
+  unsigned int align = 31;
+  EXPECT_EQ(aom_img_wrap(&img, format, kWidth, kHeight, align, buf), nullptr);
+}
+
+TEST(AomImageTest, AomImgSetRectOverflow) {
+  const int kWidth = 128;
+  const int kHeight = 128;
+  unsigned char buf[kWidth * kHeight * 3];
+
+  aom_image_t img;
+  aom_img_fmt_t format = AOM_IMG_FMT_I444;
+  unsigned int align = 32;
+  EXPECT_EQ(aom_img_wrap(&img, format, kWidth, kHeight, align, buf), &img);
+
+  EXPECT_EQ(aom_img_set_rect(&img, 0, 0, kWidth, kHeight, 0), 0);
+  // This would result in overflow because -1 is cast to UINT_MAX.
+  EXPECT_NE(aom_img_set_rect(&img, static_cast<unsigned int>(-1),
+                             static_cast<unsigned int>(-1), kWidth, kHeight, 0),
+            0);
+}
+
+TEST(AomImageTest, AomImgAllocNv12) {
+  const int kWidth = 128;
+  const int kHeight = 128;
+
+  aom_image_t img;
+  aom_img_fmt_t format = AOM_IMG_FMT_NV12;
+  unsigned int align = 32;
+  EXPECT_NE(aom_img_alloc(&img, format, kWidth, kHeight, align), nullptr);
+  EXPECT_EQ(img.stride[AOM_PLANE_U], img.stride[AOM_PLANE_Y]);
+  EXPECT_EQ(img.stride[AOM_PLANE_V], 0);
+  EXPECT_EQ(img.planes[AOM_PLANE_V], nullptr);
+  aom_img_free(&img);
+}
diff --git a/third_party/aom/test/aom_integer_test.cc b/third_party/aom/test/aom_integer_test.cc
new file mode 100644
index 0000000000..fcbbfb4d48
--- /dev/null
+++ b/third_party/aom/test/aom_integer_test.cc
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "aom/aom_integer.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+const uint64_t kMaximumLeb128CodedSize = 8;
+const uint8_t kLeb128PadByte = 0x80;  // Binary: 10000000
+const uint64_t kMaximumLeb128Value = UINT32_MAX;
+const uint32_t kSizeTestNumValues = 6;
+const uint32_t kSizeTestExpectedSizes[kSizeTestNumValues] = {
+  1, 1, 2, 3, 4, 5
+};
+const uint64_t kSizeTestInputs[kSizeTestNumValues] = { 0,        0x7f,
+                                                       0x3fff,   0x1fffff,
+                                                       0xffffff, 0x10000000 };
+
+const uint8_t kOutOfRangeLeb128Value[5] = { 0x80, 0x80, 0x80, 0x80,
+                                            0x10 };  // UINT32_MAX + 1
+}  // namespace
+
+TEST(AomLeb128, DecodeTest) {
+  const size_t num_leb128_bytes = 3;
+  const uint8_t leb128_bytes[num_leb128_bytes] = { 0xE5, 0x8E, 0x26 };
+  const uint64_t expected_value = 0x98765;  // 624485
+  const size_t expected_length = 3;
+  uint64_t value = ~0ULL;  // make sure value is cleared by the function
+  size_t length;
+  ASSERT_EQ(
+      aom_uleb_decode(&leb128_bytes[0], num_leb128_bytes, &value, &length), 0);
+  ASSERT_EQ(expected_value, value);
+  ASSERT_EQ(expected_length, length);
+
+  // Make sure the decoder stops on the last marked LEB128 byte.
+  aom_uleb_decode(&leb128_bytes[0], num_leb128_bytes + 1, &value, &length);
+  ASSERT_EQ(expected_value, value);
+  ASSERT_EQ(expected_length, length);
+}
+
+TEST(AomLeb128, EncodeTest) {
+  const uint32_t test_value = 0x98765;  // 624485
+  const uint8_t expected_bytes[3] = { 0xE5, 0x8E, 0x26 };
+  const size_t kWriteBufferSize = 4;
+  uint8_t write_buffer[kWriteBufferSize] = { 0 };
+  size_t bytes_written = 0;
+  ASSERT_EQ(aom_uleb_encode(test_value, kWriteBufferSize, &write_buffer[0],
+                            &bytes_written),
+            0);
+  ASSERT_EQ(bytes_written, 3u);
+  for (size_t i = 0; i < bytes_written; ++i) {
+    ASSERT_EQ(write_buffer[i], expected_bytes[i]);
+  }
+}
+
+TEST(AomLeb128, EncodeDecodeTest) {
+  const uint32_t value = 0x98765;  // 624485
+  const size_t kWriteBufferSize = 4;
+  uint8_t write_buffer[kWriteBufferSize] = { 0 };
+  size_t bytes_written = 0;
+  ASSERT_EQ(aom_uleb_encode(value, kWriteBufferSize, &write_buffer[0],
+                            &bytes_written),
+            0);
+  ASSERT_EQ(bytes_written, 3u);
+  uint64_t decoded_value;
+  size_t decoded_length;
+  aom_uleb_decode(&write_buffer[0], bytes_written, &decoded_value,
+                  &decoded_length);
+  ASSERT_EQ(value, decoded_value);
+  ASSERT_EQ(bytes_written, decoded_length);
+}
+
+TEST(AomLeb128, FixedSizeEncodeTest) {
+  const uint32_t test_value = 0x123;
+  const uint8_t expected_bytes[4] = { 0xa3, 0x82, 0x80, 0x00 };
+  const size_t kWriteBufferSize = 4;
+  uint8_t write_buffer[kWriteBufferSize] = { 0 };
+  size_t bytes_written = 0;
+  ASSERT_EQ(0, aom_uleb_encode_fixed_size(test_value, kWriteBufferSize,
+                                          kWriteBufferSize, &write_buffer[0],
+                                          &bytes_written));
+  ASSERT_EQ(kWriteBufferSize, bytes_written);
+  for (size_t i = 0; i < bytes_written; ++i) {
+    ASSERT_EQ(write_buffer[i], expected_bytes[i]);
+  }
+}
+
+TEST(AomLeb128, FixedSizeEncodeDecodeTest) {
+  const uint32_t value = 0x1;
+  const size_t kWriteBufferSize = 4;
+  uint8_t write_buffer[kWriteBufferSize] = { 0 };
+  size_t bytes_written = 0;
+  ASSERT_EQ(
+      aom_uleb_encode_fixed_size(value, kWriteBufferSize, kWriteBufferSize,
+                                 &write_buffer[0], &bytes_written),
+      0);
+  ASSERT_EQ(bytes_written, 4u);
+  uint64_t decoded_value;
+  size_t decoded_length;
+  aom_uleb_decode(&write_buffer[0], bytes_written, &decoded_value,
+                  &decoded_length);
+  ASSERT_EQ(value, decoded_value);
+  ASSERT_EQ(bytes_written, decoded_length);
+}
+
+TEST(AomLeb128, SizeTest) {
+  for (size_t i = 0; i < kSizeTestNumValues; ++i) {
+    ASSERT_EQ(kSizeTestExpectedSizes[i],
+              aom_uleb_size_in_bytes(kSizeTestInputs[i]));
+  }
+}
+
+TEST(AomLeb128, DecodeFailTest) {
+  // Input buffer containing what would be a valid 9 byte LEB128 encoded
+  // unsigned integer.
+  const uint8_t kAllPadBytesBuffer[kMaximumLeb128CodedSize + 1] = {
+    kLeb128PadByte, kLeb128PadByte, kLeb128PadByte,
+    kLeb128PadByte, kLeb128PadByte, kLeb128PadByte,
+    kLeb128PadByte, kLeb128PadByte, 0
+  };
+  uint64_t decoded_value;
+
+  // Test that decode fails when result would be valid 9 byte integer.
+  ASSERT_EQ(aom_uleb_decode(&kAllPadBytesBuffer[0], kMaximumLeb128CodedSize + 1,
+                            &decoded_value, nullptr),
+            -1);
+
+  // Test that encoded value missing terminator byte within available buffer
+  // range causes decode error.
+  ASSERT_EQ(aom_uleb_decode(&kAllPadBytesBuffer[0], kMaximumLeb128CodedSize,
+                            &decoded_value, nullptr),
+            -1);
+
+  // Test that LEB128 input that decodes to a value larger than 32-bits fails.
+  size_t value_size = 0;
+  ASSERT_EQ(aom_uleb_decode(&kOutOfRangeLeb128Value[0],
+                            sizeof(kOutOfRangeLeb128Value), &decoded_value,
+                            &value_size),
+            -1);
+}
+
+TEST(AomLeb128, EncodeFailTest) {
+  const size_t kWriteBufferSize = 4;
+  const uint32_t kValidTestValue = 1;
+  uint8_t write_buffer[kWriteBufferSize] = { 0 };
+  size_t coded_size = 0;
+  ASSERT_EQ(
+      aom_uleb_encode(kValidTestValue, kWriteBufferSize, nullptr, &coded_size),
+      -1);
+  ASSERT_EQ(aom_uleb_encode(kValidTestValue, kWriteBufferSize, &write_buffer[0],
+                            nullptr),
+            -1);
+
+  const uint32_t kValueOutOfRangeForBuffer = 0xFFFFFFFF;
+  ASSERT_EQ(aom_uleb_encode(kValueOutOfRangeForBuffer, kWriteBufferSize,
+                            &write_buffer[0], &coded_size),
+            -1);
+
+  const uint64_t kValueOutOfRange = kMaximumLeb128Value + 1;
+  ASSERT_EQ(aom_uleb_encode(kValueOutOfRange, kWriteBufferSize,
+                            &write_buffer[0], &coded_size),
+            -1);
+
+  const size_t kPadSizeOutOfRange = 5;
+  ASSERT_EQ(aom_uleb_encode_fixed_size(kValidTestValue, kWriteBufferSize,
+                                       kPadSizeOutOfRange, &write_buffer[0],
+                                       &coded_size),
+            -1);
+}
diff --git a/third_party/aom/test/aom_mem_test.cc b/third_party/aom/test/aom_mem_test.cc
new file mode 100644
index 0000000000..849ba64435
--- /dev/null
+++ b/third_party/aom/test/aom_mem_test.cc
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "aom_mem/aom_mem.h"
+
+#include <cstdio>
+#include <cstddef>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+TEST(AomMemTest, Overflow) {
+  // Allocations are aligned > 1 so SIZE_MAX should always fail.
+  ASSERT_EQ(aom_malloc(SIZE_MAX), nullptr);
+  ASSERT_EQ(aom_calloc(1, SIZE_MAX), nullptr);
+  ASSERT_EQ(aom_calloc(32, SIZE_MAX / 32), nullptr);
+  ASSERT_EQ(aom_calloc(SIZE_MAX, SIZE_MAX), nullptr);
+  ASSERT_EQ(aom_memalign(1, SIZE_MAX), nullptr);
+  ASSERT_EQ(aom_memalign(64, SIZE_MAX), nullptr);
+  ASSERT_EQ(aom_memalign(64, SIZE_MAX - 64), nullptr);
+  ASSERT_EQ(aom_memalign(64, SIZE_MAX - 64 - sizeof(size_t) + 2), nullptr);
+}
+
+TEST(AomMemTest, NullParams) {
+  ASSERT_EQ(aom_memset16(nullptr, 0, 0), nullptr);
+  aom_free(nullptr);
+}
diff --git a/third_party/aom/test/aomcx_set_ref.sh b/third_party/aom/test/aomcx_set_ref.sh
new file mode 100755
index 0000000000..237e2f319c
--- /dev/null
+++ b/third_party/aom/test/aomcx_set_ref.sh
@@ -0,0 +1,58 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom aom_cx_set_ref example. To add new tests to this
+## file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to aom_cx_set_ref_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+aom_cx_set_ref_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs aom_cx_set_ref and updates the reference frame before encoding frame 90.
+# $1 is the codec name, which aom_cx_set_ref does not support at present: It's
+# currently used only to name the output file.
+# TODO(tomfinegan): Pass the codec param once the example is updated to support
+# AV1.
+aom_set_ref() {
+  local encoder="${LIBAOM_BIN_PATH}/aom_cx_set_ref${AOM_TEST_EXE_SUFFIX}"
+  local codec="$1"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/aom_cx_set_ref_${codec}.ivf"
+  local ref_frame_num=4
+  local limit=10
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
+      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \
+      "${ref_frame_num}" "${limit}" ${devnull} || return 1
+
+  [ -e "${output_file}" ] || return 1
+}
+
+aom_cx_set_ref_av1() {
+  if [ "$(av1_encode_available)" = "yes" ]; then
+    aom_set_ref av1 || return 1
+  fi
+}
+
+aom_cx_set_ref_tests="aom_cx_set_ref_av1"
+
+run_tests aom_cx_set_ref_verify_environment "${aom_cx_set_ref_tests}"
+
diff --git a/third_party/aom/test/aomdec.sh b/third_party/aom/test/aomdec.sh
new file mode 100755
index 0000000000..e9738a8e89
--- /dev/null
+++ b/third_party/aom/test/aomdec.sh
@@ -0,0 +1,219 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests aomdec. To add new tests to this file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to aomdec_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+AV1_MONOCHROME_B10="${LIBAOM_TEST_DATA_PATH}/av1-1-b10-24-monochrome.ivf"
+AV1_MONOCHROME_B8="${LIBAOM_TEST_DATA_PATH}/av1-1-b8-24-monochrome.ivf"
+
+# Environment check: Make sure input is available.
+aomdec_verify_environment() {
+  if [ "$(av1_encode_available)" != "yes" ] ; then
+    if [ ! -e "${AV1_IVF_FILE}" ] || \
+       [ ! -e "${AV1_OBU_ANNEXB_FILE}" ] || \
+       [ ! -e "${AV1_OBU_SEC5_FILE}" ] || \
+       [ ! -e "${AV1_WEBM_FILE}" ]; then
+      elog "Libaom test data must exist before running this test script when " \
+           " encoding is disabled. "
+      return 1
+    fi
+  fi
+  if [ ! -e "${AV1_MONOCHROME_B10}" ] || [ ! -e "${AV1_MONOCHROME_B8}" ]; then
+    elog "Libaom test data must exist before running this test script."
+  fi
+  if [ -z "$(aom_tool_path aomdec)" ]; then
+    elog "aomdec not found. It must exist in LIBAOM_BIN_PATH or its parent."
+    return 1
+  fi
+}
+
+# Wrapper function for running aomdec with pipe input. Requires that
+# LIBAOM_BIN_PATH points to the directory containing aomdec. $1 is used as the
+# input file path and shifted away. All remaining parameters are passed through
+# to aomdec.
+aomdec_pipe() {
+  local input="$1"
+  shift
+  if [ ! -e "${input}" ]; then
+    elog "Input file ($input) missing in aomdec_pipe()"
+    return 1
+  fi
+  cat "${file}" | aomdec - "$@" ${devnull}
+}
+
+
+# Wrapper function for running aomdec. Requires that LIBAOM_BIN_PATH points to
+# the directory containing aomdec. $1 one is used as the input file path and
+# shifted away. All remaining parameters are passed through to aomdec.
+aomdec() {
+  local decoder="$(aom_tool_path aomdec)"
+  local input="$1"
+  shift
+  eval "${AOM_TEST_PREFIX}" "${decoder}" "$input" "$@" ${devnull}
+}
+
+aomdec_can_decode_av1() {
+  if [ "$(av1_decode_available)" = "yes" ]; then
+    echo yes
+  fi
+}
+
+aomdec_av1_ivf() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local file="${AV1_IVF_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --ivf || return 1
+    fi
+    aomdec "${AV1_IVF_FILE}" --summary --noblit
+  fi
+}
+
+aomdec_av1_ivf_error_resilient() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local file="av1.error-resilient.ivf"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --ivf --error-resilient=1 || return 1
+    fi
+    aomdec "${file}" --summary --noblit
+  fi
+}
+
+ivf_multithread() {
+  local row_mt="$1"
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local file="${AV1_IVF_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --ivf || return 1
+    fi
+    for threads in 2 3 4 5 6 7 8; do
+      aomdec "${file}" --summary --noblit --threads=$threads --row-mt=$row_mt \
+        || return 1
+    done
+  fi
+}
+
+aomdec_av1_ivf_multithread() {
+  ivf_multithread 0  # --row-mt=0
+}
+
+aomdec_av1_ivf_multithread_row_mt() {
+  ivf_multithread 1  # --row-mt=1
+}
+
+aomdec_aom_ivf_pipe_input() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local file="${AV1_IVF_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --ivf || return 1
+    fi
+    aomdec_pipe "${AV1_IVF_FILE}" --summary --noblit
+  fi
+}
+
+aomdec_av1_obu_annexb() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local file="${AV1_OBU_ANNEXB_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --obu --annexb=1 || return 1
+    fi
+    aomdec "${file}" --summary --noblit --annexb
+  fi
+}
+
+aomdec_av1_obu_annexb_pipe_input() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local file="${AV1_OBU_ANNEXB_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --obu --annexb=1 || return 1
+    fi
+    aomdec_pipe "${file}" --summary --noblit --annexb
+  fi
+}
+
+aomdec_av1_obu_section5() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local file="${AV1_OBU_SEC5_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --obu || return 1
+    fi
+    aomdec "${file}" --summary --noblit
+  fi
+}
+
+aomdec_av1_obu_section5_pipe_input() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local file="${AV1_OBU_SEC5_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --obu || return 1
+    fi
+    aomdec_pipe "${file}" --summary --noblit
+  fi
+}
+
+aomdec_av1_webm() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local file="${AV1_WEBM_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" || return 1
+    fi
+    aomdec "${AV1_WEBM_FILE}" --summary --noblit
+  fi
+}
+
+aomdec_av1_monochrome_yuv() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local input="$1"
+    local basename="$(basename "${input}")"
+    local output="${basename}-%wx%h-%4.i420"
+    local md5file="${AOM_TEST_OUTPUT_DIR}/${basename}.md5"
+    local decoder="$(aom_tool_path aomdec)"
+    # Note aomdec() is not used to avoid ${devnull} which may also redirect
+    # stdout.
+    eval "${AOM_TEST_PREFIX}" "${decoder}" --md5 --i420 \
+      -o "${output}" "${input}" ">" "${md5file}" 2>&1 || return 1
+    diff "${1}.md5" "${md5file}"
+  fi
+}
+
+aomdec_av1_monochrome_yuv_8bit() {
+  aomdec_av1_monochrome_yuv "${AV1_MONOCHROME_B8}"
+}
+
+aomdec_av1_monochrome_yuv_10bit() {
+  aomdec_av1_monochrome_yuv "${AV1_MONOCHROME_B10}"
+}
+
+aomdec_tests="aomdec_av1_ivf
+              aomdec_av1_ivf_multithread
+              aomdec_av1_ivf_multithread_row_mt
+              aomdec_aom_ivf_pipe_input
+              aomdec_av1_monochrome_yuv_8bit"
+
+if [ ! "$(realtime_only_build)" = "yes" ]; then
+  aomdec_tests="${aomdec_tests}
+                aomdec_av1_ivf_error_resilient
+                aomdec_av1_obu_annexb
+                aomdec_av1_obu_section5
+                aomdec_av1_obu_annexb_pipe_input
+                aomdec_av1_obu_section5_pipe_input
+                aomdec_av1_webm"
+fi
+
+if [ "$(highbitdepth_available)" = "yes" ]; then
+  aomdec_tests="${aomdec_tests}
+                aomdec_av1_monochrome_yuv_10bit"
+fi
+
+run_tests aomdec_verify_environment "${aomdec_tests}"
diff --git a/third_party/aom/test/aomenc.sh b/third_party/aom/test/aomenc.sh
new file mode 100755
index 0000000000..0bb9fba3b8
--- /dev/null
+++ b/third_party/aom/test/aomenc.sh
@@ -0,0 +1,306 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests aomenc using hantro_collage_w352h288.yuv as input. To add
+## new tests to this file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to aomenc_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available.
+aomenc_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+  if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    if [ ! -e "${Y4M_NOSQ_PAR_INPUT}" ]; then
+      elog "The file ${Y4M_NOSQ_PAR_INPUT##*/} must exist in"
+      elog "LIBAOM_TEST_DATA_PATH."
+      return 1
+    fi
+  fi
+  if [ -z "$(aom_tool_path aomenc)" ]; then
+    elog "aomenc not found. It must exist in LIBAOM_BIN_PATH or its parent."
+    return 1
+  fi
+}
+
+aomenc_can_encode_av1() {
+  if [ "$(av1_encode_available)" = "yes" ]; then
+    echo yes
+  fi
+}
+
+# Utilities that echo aomenc input file parameters.
+y4m_input_non_square_par() {
+  echo ""${Y4M_NOSQ_PAR_INPUT}""
+}
+
+y4m_input_720p() {
+  echo ""${Y4M_720P_INPUT}""
+}
+
+# Wrapper function for running aomenc with pipe input. Requires that
+# LIBAOM_BIN_PATH points to the directory containing aomenc. $1 is used as the
+# input file path and shifted away. All remaining parameters are passed through
+# to aomenc.
+aomenc_pipe() {
+  local encoder="$(aom_tool_path aomenc)"
+  local input="$1"
+  shift
+  cat "${input}" | eval "${AOM_TEST_PREFIX}" "${encoder}" - \
+    --test-decode=fatal \
+    "$@" ${devnull}
+}
+
+# Wrapper function for running aomenc. Requires that LIBAOM_BIN_PATH points to
+# the directory containing aomenc. $1 one is used as the input file path and
+# shifted away. All remaining parameters are passed through to aomenc.
+aomenc() {
+  local encoder="$(aom_tool_path aomenc)"
+  local input="$1"
+  shift
+  eval "${AOM_TEST_PREFIX}" "${encoder}" "${input}" \
+    --test-decode=fatal \
+    "$@" ${devnull}
+}
+
+aomenc_av1_ivf() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    local output="${AV1_IVF_FILE}"
+    if [ -e "${AV1_IVF_FILE}" ]; then
+      output="${AOM_TEST_OUTPUT_DIR}/av1_test.ivf"
+    fi
+    aomenc $(yuv_raw_input) \
+      $(aomenc_encode_test_fast_params) \
+      --ivf \
+      --output="${output}" || return 1
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_ivf_rt() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    local output="${AV1_IVF_FILE}"
+    if [ -e "${AV1_IVF_FILE}" ]; then
+      output="${AOM_TEST_OUTPUT_DIR}/av1_test.ivf"
+    fi
+    aomenc $(yuv_raw_input) \
+      $(aomenc_encode_test_rt_params) \
+      --ivf \
+      --output="${output}" || return 1
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_ivf_use_16bit_internal() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    local output="${AV1_IVF_FILE}"
+    if [ -e "${AV1_IVF_FILE}" ]; then
+      output="${AOM_TEST_OUTPUT_DIR}/av1_test_16bit.ivf"
+    fi
+    aomenc $(yuv_raw_input) \
+      $(aomenc_encode_test_fast_params) \
+      --ivf \
+      --use-16bit-internal \
+      --output="${output}" || return 1
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_obu_annexb() {
+   if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    local output="${AV1_OBU_ANNEXB_FILE}"
+    if [ -e "${AV1_OBU_ANNEXB_FILE}" ]; then
+      output="${AOM_TEST_OUTPUT_DIR}/av1_test.annexb.obu"
+    fi
+    aomenc $(yuv_raw_input) \
+      $(aomenc_encode_test_fast_params) \
+      --obu \
+      --annexb=1 \
+      --output="${output}" || return 1
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_obu_section5() {
+   if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    local output="${AV1_OBU_SEC5_FILE}"
+    if [ -e "${AV1_OBU_SEC5_FILE}" ]; then
+      output="${AOM_TEST_OUTPUT_DIR}/av1_test.section5.obu"
+    fi
+    aomenc $(yuv_raw_input) \
+      $(aomenc_encode_test_fast_params) \
+      --obu \
+      --output="${output}" || return 1
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_webm() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local output="${AV1_WEBM_FILE}"
+    if [ -e "${AV1_WEBM_FILE}" ]; then
+      output="${AOM_TEST_OUTPUT_DIR}/av1_test.webm"
+    fi
+    aomenc $(yuv_raw_input) \
+      $(aomenc_encode_test_fast_params) \
+      --output="${output}" || return 1
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_webm_1pass() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local output="${AOM_TEST_OUTPUT_DIR}/av1_test.webm"
+    aomenc $(yuv_raw_input) \
+      $(aomenc_encode_test_fast_params) \
+      --passes=1 \
+      --output="${output}" || return 1
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_ivf_lossless() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    local output="${AOM_TEST_OUTPUT_DIR}/av1_lossless.ivf"
+    aomenc $(yuv_raw_input) \
+      $(aomenc_encode_test_fast_params) \
+      --ivf \
+      --output="${output}" \
+      --lossless=1 || return 1
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_ivf_minq0_maxq0() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    local output="${AOM_TEST_OUTPUT_DIR}/av1_lossless_minq0_maxq0.ivf"
+    aomenc $(yuv_raw_input) \
+      $(aomenc_encode_test_fast_params) \
+      --ivf \
+      --output="${output}" \
+      --min-q=0 \
+      --max-q=0 || return 1
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_webm_lag5_frames10() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local lag_total_frames=10
+    local lag_frames=5
+    local output="${AOM_TEST_OUTPUT_DIR}/av1_lag5_frames10.webm"
+    aomenc $(yuv_raw_input) \
+      $(aomenc_encode_test_fast_params) \
+      --limit=${lag_total_frames} \
+      --lag-in-frames=${lag_frames} \
+      --output="${output}" || return 1
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+# TODO(fgalligan): Test that DisplayWidth is different than video width.
+aomenc_av1_webm_non_square_par() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local output="${AOM_TEST_OUTPUT_DIR}/av1_non_square_par.webm"
+    aomenc $(y4m_input_non_square_par) \
+      $(aomenc_encode_test_fast_params) \
+      --output="${output}" || return 1
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_webm_cdf_update_mode() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    for mode in 0 1 2; do
+      local output="${AOM_TEST_OUTPUT_DIR}/cdf_mode_${mode}.webm"
+      aomenc $(yuv_raw_input) \
+        $(aomenc_encode_test_fast_params) \
+        --cdf-update-mode=${mode} \
+        --output="${output}" || return 1
+
+      if [ ! -e "${output}" ]; then
+        elog "Output file does not exist."
+        return 1
+      fi
+    done
+  fi
+}
+
+if [ "$(realtime_only_build)" = "yes" ]; then
+  aomenc_tests="aomenc_av1_ivf_rt"
+else
+  aomenc_tests="aomenc_av1_ivf
+                aomenc_av1_ivf_rt
+                aomenc_av1_obu_annexb
+                aomenc_av1_obu_section5
+                aomenc_av1_webm
+                aomenc_av1_webm_1pass
+                aomenc_av1_ivf_lossless
+                aomenc_av1_ivf_minq0_maxq0
+                aomenc_av1_ivf_use_16bit_internal
+                aomenc_av1_webm_lag5_frames10
+                aomenc_av1_webm_non_square_par
+                aomenc_av1_webm_cdf_update_mode"
+fi
+
+run_tests aomenc_verify_environment "${aomenc_tests}"
diff --git a/third_party/aom/test/aq_segment_test.cc b/third_party/aom/test/aq_segment_test.cc
new file mode 100644
index 0000000000..674a883ea2
--- /dev/null
+++ b/third_party/aom/test/aq_segment_test.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "config/aom_config.h"
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+const libaom_test::TestMode kTestModeParams[] =
+#if CONFIG_REALTIME_ONLY
+    { ::libaom_test::kRealTime };
+#else
+    { ::libaom_test::kRealTime, ::libaom_test::kOnePassGood };
+#endif
+
+class AqSegmentTest
+    : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int,
+                                                 int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  AqSegmentTest() : EncoderTest(GET_PARAM(0)) {}
+  ~AqSegmentTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(GET_PARAM(1));
+    set_cpu_used_ = GET_PARAM(2);
+    aq_mode_ = 0;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AV1E_SET_AQ_MODE, aq_mode_);
+      encoder->Control(AV1E_SET_DELTAQ_MODE, deltaq_mode_);
+      encoder->Control(AOME_SET_MAX_INTRA_BITRATE_PCT, 100);
+      if (mode_ == ::libaom_test::kRealTime) {
+        encoder->Control(AV1E_SET_ALLOW_WARPED_MOTION, 0);
+        encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
+        encoder->Control(AV1E_SET_ENABLE_OBMC, 0);
+      }
+    }
+  }
+
+  void DoTest(int aq_mode) {
+    aq_mode_ = aq_mode;
+    deltaq_mode_ = 0;
+    cfg_.kf_max_dist = 12;
+    cfg_.rc_min_quantizer = 8;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 6;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_target_bitrate = 300;
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 15);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  int set_cpu_used_;
+  int aq_mode_;
+  int deltaq_mode_;
+};
+
+// Validate that this AQ segmentation mode (1-variance_aq, 2-complexity_aq,
+// 3-cyclic_refresh_aq) encodes and decodes without a mismatch.
+TEST_P(AqSegmentTest, TestNoMisMatch) { DoTest(GET_PARAM(3)); }
+
+#if !CONFIG_REALTIME_ONLY
+// Validate that this delta q mode
+// encodes and decodes without a mismatch.
+TEST_P(AqSegmentTest, TestNoMisMatchExtDeltaQ) {
+  cfg_.rc_end_usage = AOM_CQ;
+  aq_mode_ = 0;
+  deltaq_mode_ = 2;
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 15);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+#endif
+
+AV1_INSTANTIATE_TEST_SUITE(AqSegmentTest, ::testing::ValuesIn(kTestModeParams),
+                           ::testing::Range(5, 9), ::testing::Range(0, 4));
+
+#if !CONFIG_REALTIME_ONLY
+class AqSegmentTestLarge : public AqSegmentTest {};
+
+TEST_P(AqSegmentTestLarge, TestNoMisMatch) { DoTest(GET_PARAM(3)); }
+
+AV1_INSTANTIATE_TEST_SUITE(AqSegmentTestLarge,
+                           ::testing::Values(::libaom_test::kOnePassGood),
+                           ::testing::Range(3, 5), ::testing::Range(0, 4));
+#endif
+}  // namespace
diff --git a/third_party/aom/test/arf_freq_test.cc b/third_party/aom/test/arf_freq_test.cc
new file mode 100644
index 0000000000..f51444da4d
--- /dev/null
+++ b/third_party/aom/test/arf_freq_test.cc
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+#include "av1/encoder/ratectrl.h"
+
+namespace {
+
+const unsigned int kFrames = 100;
+const int kBitrate = 500;
+
+#define ARF_NOT_SEEN 1000001
+#define ARF_SEEN_ONCE 1000000
+
+typedef struct {
+  const char *filename;
+  unsigned int width;
+  unsigned int height;
+  unsigned int framerate_num;
+  unsigned int framerate_den;
+  unsigned int input_bit_depth;
+  aom_img_fmt fmt;
+  aom_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+typedef struct {
+  libaom_test::TestMode mode;
+  int cpu_used;
+} TestEncodeParam;
+
+const TestVideoParam kTestVectors[] = {
+  // artificially increase framerate to trigger default check
+  { "hantro_collage_w352h288.yuv", 352, 288, 5000, 1, 8, AOM_IMG_FMT_I420,
+    AOM_BITS_8, 0 },
+  { "hantro_collage_w352h288.yuv", 352, 288, 30, 1, 8, AOM_IMG_FMT_I420,
+    AOM_BITS_8, 0 },
+  { "rush_hour_444.y4m", 352, 288, 30, 1, 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 },
+  // Add list of profile 2/3 test videos here ...
+};
+
+const TestEncodeParam kEncodeVectors[] = {
+#if CONFIG_REALTIME_ONLY
+  { ::libaom_test::kRealTime, 5 },
+#else
+  { ::libaom_test::kRealTime, 5 },    { ::libaom_test::kOnePassGood, 2 },
+  { ::libaom_test::kOnePassGood, 5 }, { ::libaom_test::kTwoPassGood, 1 },
+  { ::libaom_test::kTwoPassGood, 2 }, { ::libaom_test::kTwoPassGood, 5 },
+#endif
+};
+
+const int kMinArfVectors[] = {
+  // NOTE: 0 refers to the default built-in logic in:
+  //       av1_rc_get_default_min_gf_interval(...)
+  0, 4, 8, 12, 15
+};
+
+class ArfFreqTestLarge
+    : public ::libaom_test::CodecTestWith3Params<TestVideoParam,
+                                                 TestEncodeParam, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  ArfFreqTestLarge()
+      : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)),
+        test_encode_param_(GET_PARAM(2)), min_arf_requested_(GET_PARAM(3)) {}
+
+  ~ArfFreqTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(test_encode_param_.mode);
+    if (test_encode_param_.mode != ::libaom_test::kRealTime) {
+      cfg_.g_lag_in_frames = 25;
+    } else {
+      cfg_.rc_buf_sz = 1000;
+      cfg_.rc_buf_initial_sz = 500;
+      cfg_.rc_buf_optimal_sz = 600;
+    }
+  }
+
+  void BeginPassHook(unsigned int) override {
+    min_run_ = ARF_NOT_SEEN;
+    run_of_visible_frames_ = 0;
+  }
+
+  int GetNumFramesInPkt(const aom_codec_cx_pkt_t *pkt) {
+    const uint8_t *buffer = reinterpret_cast<uint8_t *>(pkt->data.frame.buf);
+    const uint8_t marker = buffer[pkt->data.frame.sz - 1];
+    const int mag = ((marker >> 3) & 3) + 1;
+    int frames = (marker & 0x7) + 1;
+    const unsigned int index_sz = 2 + mag * frames;
+    // Check for superframe or not.
+    // Assume superframe has only one visible frame, the rest being
+    // invisible. If superframe index is not found, then there is only
+    // one frame.
+    if (!((marker & 0xe0) == 0xc0 && pkt->data.frame.sz >= index_sz &&
+          buffer[pkt->data.frame.sz - index_sz] == marker)) {
+      frames = 1;
+    }
+    return frames;
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) return;
+    const int frames = GetNumFramesInPkt(pkt);
+    if (frames == 1) {
+      run_of_visible_frames_++;
+    } else if (frames == 2) {
+      if (min_run_ == ARF_NOT_SEEN) {
+        min_run_ = ARF_SEEN_ONCE;
+      } else if (min_run_ == ARF_SEEN_ONCE ||
+                 run_of_visible_frames_ < min_run_) {
+        min_run_ = run_of_visible_frames_;
+      }
+      run_of_visible_frames_ = 1;
+    } else {
+      min_run_ = 0;
+      run_of_visible_frames_ = 1;
+    }
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
+      encoder->Control(AOME_SET_CPUUSED, test_encode_param_.cpu_used);
+      encoder->Control(AV1E_SET_MIN_GF_INTERVAL, min_arf_requested_);
+      if (test_encode_param_.mode != ::libaom_test::kRealTime) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      }
+    }
+  }
+
+  int GetMinVisibleRun() const { return min_run_; }
+
+  int GetMinArfDistanceRequested() const {
+    if (min_arf_requested_)
+      return min_arf_requested_;
+    else
+      return av1_rc_get_default_min_gf_interval(
+          test_video_param_.width, test_video_param_.height,
+          (double)test_video_param_.framerate_num /
+              test_video_param_.framerate_den);
+  }
+
+  TestVideoParam test_video_param_;
+  TestEncodeParam test_encode_param_;
+
+ private:
+  int min_arf_requested_;
+  int min_run_;
+  int run_of_visible_frames_;
+};
+
+TEST_P(ArfFreqTestLarge, MinArfFreqTest) {
+  cfg_.rc_target_bitrate = kBitrate;
+  cfg_.g_error_resilient = 0;
+  cfg_.g_profile = test_video_param_.profile;
+  cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+  cfg_.g_bit_depth = test_video_param_.bit_depth;
+  init_flags_ = AOM_CODEC_USE_PSNR;
+  if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+  std::unique_ptr<libaom_test::VideoSource> video;
+  if (is_extension_y4m(test_video_param_.filename)) {
+    video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                kFrames));
+  } else {
+    video.reset(new libaom_test::YUVVideoSource(
+        test_video_param_.filename, test_video_param_.fmt,
+        test_video_param_.width, test_video_param_.height,
+        test_video_param_.framerate_num, test_video_param_.framerate_den, 0,
+        kFrames));
+  }
+  ASSERT_NE(video, nullptr);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+  const int min_run = GetMinVisibleRun();
+  const int min_arf_dist_requested = GetMinArfDistanceRequested();
+  if (min_run != ARF_NOT_SEEN && min_run != ARF_SEEN_ONCE) {
+    const int min_arf_dist = min_run + 1;
+    EXPECT_GE(min_arf_dist, min_arf_dist_requested);
+  }
+}
+
+#if CONFIG_AV1_ENCODER
+// TODO(angiebird): 25-29 fail in high bitdepth mode.
+// TODO(zoeliu): This ArfFreqTest does not work with BWDREF_FRAME, as
+// BWDREF_FRAME is also a non-show frame, and the minimum run between two
+// consecutive BWDREF_FRAME's may vary between 1 and any arbitrary positive
+// number as long as it does not exceed the gf_group interval.
+INSTANTIATE_TEST_SUITE_P(
+    DISABLED_AV1, ArfFreqTestLarge,
+    ::testing::Combine(
+        ::testing::Values(
+            static_cast<const libaom_test::CodecFactory *>(&libaom_test::kAV1)),
+        ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kEncodeVectors),
+        ::testing::ValuesIn(kMinArfVectors)));
+#endif  // CONFIG_AV1_ENCODER
+}  // namespace
diff --git a/third_party/aom/test/av1_c_vs_simd_encode.sh b/third_party/aom/test/av1_c_vs_simd_encode.sh
new file mode 100755
index 0000000000..296204d118
--- /dev/null
+++ b/third_party/aom/test/av1_c_vs_simd_encode.sh
@@ -0,0 +1,566 @@
+#!/bin/sh
+## Copyright (c) 2023, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+##  This script checks the bit exactness between C and SIMD
+##  implementations of AV1 encoder.
+##
+. $(dirname $0)/tools_common.sh
+
+PRESETS="good rt"
+LOWBD_CIF_CLIP="yuv_raw_input"
+LOWBD_480p_CLIP="yuv_480p_raw_input"
+LOWBD_720p_CLIP="y4m_720p_input"
+HIGHBD_CLIP="y4m_360p_10bit_input"
+SC_CLIP="y4m_screen_input"
+OUT_FILE_SUFFIX=".ivf"
+SCRIPT_DIR=$(dirname "$0")
+LIBAOM_SOURCE_DIR=$(cd ${SCRIPT_DIR}/..; pwd)
+
+# Clips used in test.
+YUV_RAW_INPUT="${LIBAOM_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
+YUV_480P_RAW_INPUT="${LIBAOM_TEST_DATA_PATH}/niklas_640_480_30.yuv"
+Y4M_360P_10BIT_INPUT="${LIBAOM_TEST_DATA_PATH}/crowd_run_360p_10_150f.y4m"
+Y4M_720P_INPUT="${LIBAOM_TEST_DATA_PATH}/niklas_1280_720_30.y4m"
+Y4M_SCREEN_INPUT="${LIBAOM_TEST_DATA_PATH}/wikipedia_420_360p_60f.y4m"
+
+# Number of frames to test.
+AV1_ENCODE_C_VS_SIMD_TEST_FRAME_LIMIT=35
+
+# Create a temporary directory for output files.
+if [ -n "${TMPDIR}" ]; then
+  AOM_TEST_TEMP_ROOT="${TMPDIR}"
+elif [ -n "${TEMPDIR}" ]; then
+  AOM_TEST_TEMP_ROOT="${TEMPDIR}"
+else
+  AOM_TEST_TEMP_ROOT=/tmp
+fi
+
+AOM_TEST_OUTPUT_DIR="${AOM_TEST_TEMP_ROOT}/av1_test_$$"
+
+if ! mkdir -p "${AOM_TEST_OUTPUT_DIR}" || \
+   [ ! -d "${AOM_TEST_OUTPUT_DIR}" ]; then
+  echo "${0##*/}: Cannot create output directory, giving up."
+  echo "${0##*/}:   AOM_TEST_OUTPUT_DIR=${AOM_TEST_OUTPUT_DIR}"
+  exit 1
+fi
+
+elog() {
+  echo "$@" 1>&2
+}
+
+# Echoes path to $1 when it's executable and exists in ${AOM_TEST_OUTPUT_DIR},
+# or an empty string. Caller is responsible for testing the string once the
+# function returns.
+av1_enc_tool_path() {
+  local target="$1"
+  local preset="$2"
+  local tool_path="${AOM_TEST_OUTPUT_DIR}/build_target_${target}/aomenc_${preset}"
+
+  if [ ! -x "${tool_path}" ]; then
+    tool_path=""
+  fi
+  echo "${tool_path}"
+}
+
+# Environment check: Make sure input and source directories are available.
+av1_c_vs_simd_enc_verify_environment () {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    elog "libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+  if [ ! -e "${Y4M_360P_10BIT_INPUT}" ]; then
+    elog "libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+  if [ ! -e "${YUV_480P_RAW_INPUT}" ]; then
+    elog "libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+  if [ ! -e "${Y4M_720P_INPUT}" ]; then
+    elog "libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+  if [ ! -e "${Y4M_SCREEN_INPUT}" ]; then
+    elog "libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+  if [ ! -d "$LIBAOM_SOURCE_DIR" ]; then
+    elog "LIBAOM_SOURCE_DIR does not exist."
+    return 1
+  fi
+}
+
+# This is not needed since tools_common.sh does the same cleanup.
+# Keep the code here for our reference.
+# cleanup() {
+#  rm -rf  ${AOM_TEST_OUTPUT_DIR}
+# }
+
+# Echo AOM_SIMD_CAPS_MASK for different instruction set architecture.
+avx512f() {
+   echo "0x1FF"
+}
+
+avx2() {
+   echo "0x0FF"
+}
+
+avx() {
+   echo "0x07F"
+}
+
+sse4_1() {
+   echo "0x03F"
+}
+
+ssse3() {
+   echo "0x01F"
+}
+
+sse3() {
+   echo "0x00F"
+}
+
+sse2() {
+   echo "0x007"
+}
+
+get_bitrates() {
+  local content=$1
+  local preset=$2
+
+  # Bit-rates:
+  local bitrate_lowres_good="300"
+  local bitrate_480p_good="500"
+  local bitrate_720p_good="1000"
+  local bitrate_scc_360p_good="500"
+  local bitrate_lowres_rt="200"
+  local bitrate_480p_rt="300"
+  local bitrate_720p_rt="600"
+  local bitrate_scc_360p_rt="300"
+  local bitrate_hbd_360p="500"
+
+  if [ "${preset}" = "good" ]; then
+    if [ "${content}" = "yuv_raw_input" ]; then
+      echo "${bitrate_lowres_good}"
+    elif [ "${content}" = "yuv_480p_raw_input" ]; then
+      echo "${bitrate_480p_good}"
+    elif [ "${content}" = "y4m_720p_input" ]; then
+      echo "${bitrate_720p_good}"
+    elif [ "${content}" = "y4m_screen_input" ]; then
+      echo "${bitrate_scc_360p_good}"
+    elif [ "${content}" = "y4m_360p_10bit_input" ]; then
+      echo "${bitrate_hbd_360p}"
+    else
+      elog "Invalid content"
+    fi
+  elif  [ "${preset}" = "rt" ]; then
+    if [ "${content}" = "yuv_raw_input" ]; then
+      echo "${bitrate_lowres_rt}"
+    elif [ "${content}" = "yuv_480p_raw_input" ]; then
+      echo "${bitrate_480p_rt}"
+    elif [ "${content}" = "y4m_720p_input" ]; then
+      echo "${bitrate_720p_rt}"
+    elif [ "${content}" = "y4m_screen_input" ]; then
+      echo "${bitrate_scc_360p_rt}"
+    elif [ "${content}" = "y4m_360p_10bit_input" ]; then
+      echo "${bitrate_hbd_360p}"
+    else
+      elog "Invalid content"
+    fi
+  else
+    elog "invalid preset"
+  fi
+}
+
+# Echo clip details to be used as input to aomenc.
+yuv_raw_input() {
+  echo ""${YUV_RAW_INPUT}"
+       --width=352
+       --height=288
+       --bit-depth=8"
+}
+
+y4m_360p_10bit_input() {
+  echo ""${Y4M_360P_10BIT_INPUT}"
+       --bit-depth=10"
+}
+
+yuv_480p_raw_input() {
+  echo ""${YUV_480P_RAW_INPUT}"
+       --width=640
+       --height=480
+       --bit-depth=8"
+}
+
+y4m_720p_input() {
+  echo ""${Y4M_720P_INPUT}"
+       --bit-depth=8"
+}
+
+y4m_screen_input() {
+  echo ""${Y4M_SCREEN_INPUT}"
+       --tune-content=screen
+       --enable-palette=1
+       --bit-depth=8"
+}
+
+has_x86_isa_extn() {
+  instruction_set=$1
+  if ! grep -q "$instruction_set" /proc/cpuinfo; then
+    # This instruction set is not supported.
+    return 1
+  fi
+}
+
+# Echo good encode params for use with AV1 encoder.
+av1_encode_good_params() {
+  echo "--good \
+  --ivf \
+  --profile=0 \
+  --static-thresh=0 \
+  --threads=1 \
+  --tile-columns=0 \
+  --tile-rows=0 \
+  --verbose \
+  --end-usage=vbr \
+  --kf-max-dist=160 \
+  --kf-min-dist=0 \
+  --max-q=63 \
+  --min-q=0 \
+  --overshoot-pct=100 \
+  --undershoot-pct=100 \
+  --passes=2 \
+  --arnr-maxframes=7 \
+  --arnr-strength=5 \
+  --auto-alt-ref=1 \
+  --drop-frame=0 \
+  --frame-parallel=0 \
+  --lag-in-frames=35 \
+  --maxsection-pct=2000 \
+  --minsection-pct=0 \
+  --sharpness=0"
+}
+
+# Echo realtime encode params for use with AV1 encoder.
+av1_encode_rt_params() {
+  echo "--rt \
+  --ivf \
+  --profile=0 \
+  --static-thresh=0 \
+  --threads=1 \
+  --tile-columns=0 \
+  --tile-rows=0 \
+  --verbose \
+  --end-usage=cbr \
+  --kf-max-dist=90000 \
+  --max-q=58 \
+  --min-q=2 \
+  --overshoot-pct=50 \
+  --undershoot-pct=50 \
+  --passes=1 \
+  --aq-mode=3 \
+  --buf-initial-sz=500 \
+  --buf-optimal-sz=600 \
+  --buf-sz=1000 \
+  --coeff-cost-upd-freq=3 \
+  --dv-cost-upd-freq=3 \
+  --mode-cost-upd-freq=3 \
+  --mv-cost-upd-freq=3 \
+  --deltaq-mode=0 \
+  --enable-global-motion=0 \
+  --enable-obmc=0 \
+  --enable-order-hint=0 \
+  --enable-ref-frame-mvs=0 \
+  --enable-tpl-model=0 \
+  --enable-warped-motion=0 \
+  --lag-in-frames=0 \
+  --max-intra-rate=300 \
+  --noise-sensitivity=0"
+}
+
+# Configures for the given target in AOM_TEST_OUTPUT_DIR/build_target_${target}
+# directory.
+av1_enc_build() {
+  local target="$1"
+  local cmake_command="$2"
+  local tmp_build_dir=${AOM_TEST_OUTPUT_DIR}/build_target_${target}
+  if [ -d "$tmp_build_dir" ]; then
+    rm -rf $tmp_build_dir
+  fi
+
+  mkdir -p $tmp_build_dir
+  cd $tmp_build_dir
+
+  local cmake_common_args="-DCONFIG_EXCLUDE_SIMD_MISMATCH=1 \
+           -DCMAKE_BUILD_TYPE=Release \
+           -DENABLE_CCACHE=1 \
+           '-DCMAKE_C_FLAGS_RELEASE=-O3 -g' \
+           '-DCMAKE_CXX_FLAGS_RELEASE=-O3 -g' \
+           -DENABLE_DOCS=0 -DENABLE_TESTS=0 -DENABLE_TOOLS=0"
+
+  for preset in $PRESETS; do
+    echo "Building target[${preset} encoding]: ${target}"
+    if [ "${preset}" = "good" ]; then
+      local cmake_extra_args="-DCONFIG_AV1_HIGHBITDEPTH=1"
+    elif [ "${preset}" = "rt" ]; then
+      local cmake_extra_args="-DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0"
+    else
+      elog "Invalid preset"
+      return 1
+    fi
+    if ! eval "$cmake_command" "${cmake_common_args}" "${cmake_extra_args}" \
+      ${devnull}; then
+      elog "cmake failure"
+      return 1
+    fi
+    if ! eval make -j$(nproc) aomenc ${devnull}; then
+      elog "build failure"
+      return 1
+    fi
+
+    mv aomenc aomenc_${preset}
+  done
+  echo "Done building target: ${target}"
+}
+
+compare_enc_output() {
+  local target=$1
+  local cpu=$2
+  local clip=$3
+  local bitrate=$4
+  local preset=$5
+  if ! diff -q ${AOM_TEST_OUTPUT_DIR}/Out-generic-"${clip}"-${preset}-${bitrate}kbps-cpu${cpu}${OUT_FILE_SUFFIX} \
+       ${AOM_TEST_OUTPUT_DIR}/Out-${target}-"${clip}"-${preset}-${bitrate}kbps-cpu${cpu}${OUT_FILE_SUFFIX}; then
+    elog "C vs ${target} encode mismatches for ${clip}, at ${bitrate} kbps, speed ${cpu}, ${preset} preset"
+    return 1
+  fi
+}
+
+av1_enc_test() {
+  local encoder="$1"
+  local arch="$2"
+  local target="$3"
+  local preset="$4"
+  if [ -z "$(av1_enc_tool_path "${target}"  "${preset}")" ]; then
+    elog "aomenc_{preset} not found. It must exist in ${AOM_TEST_OUTPUT_DIR}/build_target_${target} path"
+    return 1
+  fi
+
+  if [ "${preset}" = "good" ]; then
+    if [ "${arch}" = "x86_64" ]; then
+      local min_cpu_used=0
+      local max_cpu_used=6
+    elif [ "${arch}" = "x86" ]; then
+      local min_cpu_used=2
+      local max_cpu_used=3
+    fi
+    local test_params=av1_encode_good_params
+  elif [ "${preset}" = "rt" ]; then
+    local min_cpu_used=5
+    local max_cpu_used=11
+    local test_params=av1_encode_rt_params
+  else
+    elog "Invalid preset"
+    return 1
+  fi
+
+  for cpu in $(seq $min_cpu_used $max_cpu_used); do
+    if [ "${preset}" = "good" ]; then
+      if [ "${arch}" = "x86_64" ]; then
+        if [ "${cpu}" -lt 2 ]; then
+          local test_clips="${LOWBD_CIF_CLIP} ${HIGHBD_CLIP}"
+        elif [ "${cpu}" -lt 5 ]; then
+          local test_clips="${LOWBD_480p_CLIP} ${HIGHBD_CLIP}"
+        else
+          local test_clips="${LOWBD_720p_CLIP} ${HIGHBD_CLIP}"
+        fi
+      elif [ "${arch}" = "x86" ]; then
+        local test_clips="${LOWBD_CIF_CLIP} ${HIGHBD_CLIP}"
+      elif [ "${arch}" = "arm64" ]; then
+        local test_clips="${LOWBD_CIF_CLIP} ${HIGHBD_CLIP}"
+      fi
+    elif [ "${preset}" = "rt" ]; then
+      if [ "${cpu}" -lt 8 ]; then
+        local test_clips="${LOWBD_CIF_CLIP} ${SC_CLIP}"
+      else
+        local test_clips="${LOWBD_480p_CLIP} ${SC_CLIP}"
+      fi
+    else
+      elog "Invalid preset"
+      return 1
+    fi
+
+    for clip in ${test_clips}; do
+      local test_bitrates=$(get_bitrates ${clip} ${preset})
+      for bitrate in ${test_bitrates}; do
+        eval "${encoder}" $($clip) $($test_params) \
+        "--limit=${AV1_ENCODE_C_VS_SIMD_TEST_FRAME_LIMIT}" \
+        "--cpu-used=${cpu}" "--target-bitrate=${bitrate}" "-o" \
+        ${AOM_TEST_OUTPUT_DIR}/Out-${target}-"${clip}"-${preset}-${bitrate}kbps-cpu${cpu}${OUT_FILE_SUFFIX} \
+        ${devnull}
+
+        if [ "${target}" != "generic" ]; then
+          if ! compare_enc_output ${target} $cpu ${clip} $bitrate ${preset}; then
+            # Found a mismatch
+            return 1
+          fi
+        fi
+      done
+    done
+  done
+}
+
+av1_test_generic() {
+  local arch=$1
+  local target="generic"
+  if [ $arch = "x86_64" ]; then
+    local cmake_command="cmake $LIBAOM_SOURCE_DIR -DAOM_TARGET_CPU=${target}"
+  elif [ $arch = "x86" ]; then
+    # As AV1 encode output differs for x86 32-bit and 64-bit platforms
+    # (BUG=aomedia:3479), the x86 32-bit C-only build is generated separately.
+    # The cmake command line option -DENABLE_MMX=0 flag disables all SIMD
+    # optimizations, and generates a C-only binary.
+    local cmake_command="cmake $LIBAOM_SOURCE_DIR -DENABLE_MMX=0 \
+      -DCMAKE_TOOLCHAIN_FILE=${LIBAOM_SOURCE_DIR}/build/cmake/toolchains/i686-linux-gcc.cmake"
+  fi
+
+  echo "Build for: Generic ${arch}"
+  if ! av1_enc_build "${target}" "${cmake_command}"; then
+    return 1
+  fi
+
+  for preset in $PRESETS; do
+    local encoder="$(av1_enc_tool_path "${target}" "${preset}")"
+    av1_enc_test $encoder "${arch}" "${target}" "${preset}"
+  done
+}
+
+# This function encodes AV1 bitstream by enabling SSE2, SSE3, SSSE3, SSE4_1, AVX, AVX2 as there are
+# no functions with MMX, SSE and AVX512 specialization.
+# The value of environment variable 'AOM_SIMD_CAPS_MASK' controls enabling of different instruction
+# set extension optimizations. The value of the flag 'AOM_SIMD_CAPS_MASK' and the corresponding
+# instruction set extension optimization enabled are as follows:
+# AVX512 AVX2 AVX SSE4_1 SSSE3 SSE3 SSE2 SSE MMX
+#   1     1    1    1      1    1    1    1   1  -> 0x1FF -> Enable AVX512 and lower variants
+#   0     1    1    1      1    1    1    1   1  -> 0x0FF -> Enable AVX2 and lower variants
+#   0     0    1    1      1    1    1    1   1  -> 0x07F -> Enable AVX and lower variants
+#   0     0    0    1      1    1    1    1   1  -> 0x03F  -> Enable SSE4_1 and lower variants
+#   0     0    0    0      1    1    1    1   1  -> 0x01F  -> Enable SSSE3 and lower variants
+#   0     0    0    0      0    1    1    1   1  -> 0x00F  -> Enable SSE3 and lower variants
+#   0     0    0    0      0    0    1    1   1  -> 0x007  -> Enable SSE2 and lower variants
+#   0     0    0    0      0    0    0    1   1  -> 0x003  -> Enable SSE and lower variants
+#   0     0    0    0      0    0    0    0   1  -> 0x001  -> Enable MMX
+## NOTE: In x86_64 platform, it is not possible to enable sse/mmx/c using "AOM_SIMD_CAPS_MASK" as
+#  all x86_64 platforms implement sse2.
+av1_test_x86() {
+  local arch=$1
+
+  if ! uname -m | grep -q "x86"; then
+    elog "Machine architecture is not x86 or x86_64"
+    return 0
+  fi
+
+  if [ $arch = "x86" ]; then
+    local target="x86-linux"
+    local cmake_command="cmake \
+    $LIBAOM_SOURCE_DIR \
+    -DCMAKE_TOOLCHAIN_FILE=${LIBAOM_SOURCE_DIR}/build/cmake/toolchains/i686-linux-gcc.cmake"
+  elif [ $arch = "x86_64" ]; then
+    local target="x86_64-linux"
+    local cmake_command="cmake $LIBAOM_SOURCE_DIR"
+  fi
+
+  # Available x86 isa variants: "avx2 avx sse4_1 ssse3 sse3 sse2"
+  local x86_isa_variants="avx2 sse4_1 sse2"
+
+  echo "Build for x86: ${target}"
+  if ! av1_enc_build "${target}" "${cmake_command}"; then
+    return 1
+  fi
+
+  for preset in $PRESETS; do
+    local encoder="$(av1_enc_tool_path "${target}" "${preset}")"
+    for isa in $x86_isa_variants; do
+      # Note that if has_x86_isa_extn returns 1, it is false, and vice versa.
+      if ! has_x86_isa_extn $isa; then
+        echo "${isa} is not supported in this machine"
+        continue
+      fi
+      export AOM_SIMD_CAPS_MASK=$($isa)
+      if ! av1_enc_test $encoder "${arch}" "${target}" "${preset}"; then
+        # Found a mismatch
+        return 1
+      fi
+      unset AOM_SIMD_CAPS_MASK
+    done
+  done
+}
+
+av1_test_arm() {
+  local arch="arm64"
+  local target="arm64-linux-gcc"
+  local cmake_command="cmake $LIBAOM_SOURCE_DIR \
+        -DCMAKE_TOOLCHAIN_FILE=$LIBAOM_SOURCE_DIR/build/cmake/toolchains/${target}.cmake \
+        -DCMAKE_C_FLAGS=-Wno-maybe-uninitialized"
+  echo "Build for arm64: ${target}"
+  if ! av1_enc_build "${target}" "${cmake_command}"; then
+    return 1
+  fi
+
+  for preset in $PRESETS; do
+    local encoder="$(av1_enc_tool_path "${target}" "${preset}")"
+    if ! av1_enc_test "qemu-aarch64 -L /usr/aarch64-linux-gnu ${encoder}" "${arch}" "${target}" "${preset}"; then
+      # Found a mismatch
+      return 1
+    fi
+  done
+}
+
+av1_c_vs_simd_enc_test () {
+  # Test x86 (32 bit)
+  # x86 requires the i686-linux-gnu toolchain:
+  # $ sudo apt-get install g++-i686-linux-gnu
+  echo "av1 test for x86 (32 bit): Started."
+  # Encode 'C' only
+  av1_test_generic "x86"
+  # Encode with SIMD optimizations enabled
+  if ! av1_test_x86 "x86"; then
+    echo "av1 test for x86 (32 bit): Done, test failed."
+    return 1
+  else
+    echo "av1 test for x86 (32 bit): Done, all tests passed."
+  fi
+
+  # Test x86_64 (64 bit)
+  if [ "$(eval uname -m)" = "x86_64" ]; then
+    echo "av1 test for x86_64 (64 bit): Started."
+    # Encode 'C' only
+    av1_test_generic "x86_64"
+    # Encode with SIMD optimizations enabled
+    if ! av1_test_x86 "x86_64"; then
+      echo "av1 test for x86_64 (64 bit): Done, test failed."
+      return 1
+    else
+      echo "av1 test for x86_64 (64 bit): Done, all tests passed."
+    fi
+  fi
+
+  # Test ARM
+  echo "av1_test_arm: Started."
+  if ! av1_test_arm; then
+    echo "av1 test for arm: Done, test failed."
+    return 1
+  else
+    echo "av1 test for arm: Done, all tests passed."
+  fi
+}
+
+run_tests av1_c_vs_simd_enc_verify_environment av1_c_vs_simd_enc_test
diff --git a/third_party/aom/test/av1_common_int_test.cc b/third_party/aom/test/av1_common_int_test.cc
new file mode 100644
index 0000000000..dde2542e3d
--- /dev/null
+++ b/third_party/aom/test/av1_common_int_test.cc
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "av1/common/av1_common_int.h"
+
+TEST(AV1CommonInt, TestGetTxSize) {
+  for (int t = TX_4X4; t < TX_SIZES_ALL; t++) {
+    TX_SIZE t2 = get_tx_size(tx_size_wide[t], tx_size_high[t]);
+    GTEST_ASSERT_EQ(tx_size_wide[t], tx_size_wide[t2]);
+    GTEST_ASSERT_EQ(tx_size_high[t], tx_size_high[t2]);
+  }
+}
diff --git a/third_party/aom/test/av1_config_test.cc b/third_party/aom/test/av1_config_test.cc
new file mode 100644
index 0000000000..3ff816c163
--- /dev/null
+++ b/third_party/aom/test/av1_config_test.cc
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <string.h>
+
+#include "common/av1_config.h"
+#include "test/util.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+//
+// Input buffers containing exactly one Sequence Header OBU.
+//
+// Each buffer is named according to the OBU storage format (Annex-B vs Low
+// Overhead Bitstream Format) and the type of Sequence Header OBU ("Full"
+// Sequence Header OBUs vs Sequence Header OBUs with the
+// reduced_still_image_flag set).
+//
+const uint8_t kAnnexBFullSequenceHeaderObu[] = { 0x0c, 0x08, 0x00, 0x00, 0x00,
+                                                 0x04, 0x45, 0x7e, 0x3e, 0xff,
+                                                 0xfc, 0xc0, 0x20 };
+const uint8_t kAnnexBReducedStillImageSequenceHeaderObu[] = {
+  0x08, 0x08, 0x18, 0x22, 0x2b, 0xf1, 0xfe, 0xc0, 0x20
+};
+
+const uint8_t kLobfFullSequenceHeaderObu[] = { 0x0a, 0x0b, 0x00, 0x00, 0x00,
+                                               0x04, 0x45, 0x7e, 0x3e, 0xff,
+                                               0xfc, 0xc0, 0x20 };
+
+const uint8_t kLobfReducedStillImageSequenceHeaderObu[] = { 0x0a, 0x07, 0x18,
+                                                            0x22, 0x2b, 0xf1,
+                                                            0xfe, 0xc0, 0x20 };
+
+const uint8_t kAv1cAllZero[] = { 0, 0, 0, 0 };
+
+// The size of AV1 config when no configOBUs are present at the end of the
+// configuration structure.
+const size_t kAv1cNoConfigObusSize = 4;
+
+bool VerifyAv1c(const uint8_t *const obu_buffer, size_t obu_buffer_length,
+                bool is_annexb) {
+  Av1Config av1_config;
+  memset(&av1_config, 0, sizeof(av1_config));
+  bool parse_ok = get_av1config_from_obu(obu_buffer, obu_buffer_length,
+                                         is_annexb, &av1_config) == 0;
+  if (parse_ok) {
+    EXPECT_EQ(1, av1_config.marker);
+    EXPECT_EQ(1, av1_config.version);
+    EXPECT_EQ(0, av1_config.seq_profile);
+    EXPECT_EQ(0, av1_config.seq_level_idx_0);
+    EXPECT_EQ(0, av1_config.seq_tier_0);
+    EXPECT_EQ(0, av1_config.high_bitdepth);
+    EXPECT_EQ(0, av1_config.twelve_bit);
+    EXPECT_EQ(0, av1_config.monochrome);
+    EXPECT_EQ(1, av1_config.chroma_subsampling_x);
+    EXPECT_EQ(1, av1_config.chroma_subsampling_y);
+    EXPECT_EQ(0, av1_config.chroma_sample_position);
+    EXPECT_EQ(0, av1_config.initial_presentation_delay_present);
+    EXPECT_EQ(0, av1_config.initial_presentation_delay_minus_one);
+  }
+  return parse_ok && ::testing::Test::HasFailure() == false;
+}
+
+TEST(Av1Config, ObuInvalidInputs) {
+  Av1Config av1_config;
+  memset(&av1_config, 0, sizeof(av1_config));
+  ASSERT_EQ(-1, get_av1config_from_obu(nullptr, 0, 0, nullptr));
+  ASSERT_EQ(-1, get_av1config_from_obu(&kLobfFullSequenceHeaderObu[0], 0, 0,
+                                       nullptr));
+  ASSERT_EQ(-1, get_av1config_from_obu(&kLobfFullSequenceHeaderObu[0],
+                                       sizeof(kLobfFullSequenceHeaderObu), 0,
+                                       nullptr));
+  ASSERT_EQ(-1, get_av1config_from_obu(
+                    nullptr, sizeof(kLobfFullSequenceHeaderObu), 0, nullptr));
+  ASSERT_EQ(-1, get_av1config_from_obu(&kLobfFullSequenceHeaderObu[0], 0, 0,
+                                       &av1_config));
+}
+
+TEST(Av1Config, ReadInvalidInputs) {
+  Av1Config av1_config;
+  memset(&av1_config, 0, sizeof(av1_config));
+  size_t bytes_read = 0;
+  ASSERT_EQ(-1, read_av1config(nullptr, 0, nullptr, nullptr));
+  ASSERT_EQ(-1, read_av1config(nullptr, 4, nullptr, nullptr));
+  ASSERT_EQ(-1, read_av1config(&kAv1cAllZero[0], 0, nullptr, nullptr));
+  ASSERT_EQ(-1, read_av1config(&kAv1cAllZero[0], 4, &bytes_read, nullptr));
+  ASSERT_EQ(-1, read_av1config(nullptr, 4, &bytes_read, &av1_config));
+}
+
+TEST(Av1Config, WriteInvalidInputs) {
+  Av1Config av1_config;
+  memset(&av1_config, 0, sizeof(av1_config));
+  size_t bytes_written = 0;
+  uint8_t av1c_buffer[4] = { 0 };
+  ASSERT_EQ(-1, write_av1config(nullptr, 0, nullptr, nullptr));
+  ASSERT_EQ(-1, write_av1config(&av1_config, 0, nullptr, nullptr));
+  ASSERT_EQ(-1, write_av1config(&av1_config, 0, &bytes_written, nullptr));
+
+  ASSERT_EQ(-1,
+            write_av1config(&av1_config, 0, &bytes_written, &av1c_buffer[0]));
+  ASSERT_EQ(-1, write_av1config(&av1_config, 4, &bytes_written, nullptr));
+}
+
+TEST(Av1Config, GetAv1ConfigFromLobfObu) {
+  // Test parsing of a Sequence Header OBU with the reduced_still_picture_header
+  // unset-- aka a full Sequence Header OBU.
+  ASSERT_TRUE(VerifyAv1c(kLobfFullSequenceHeaderObu,
+                         sizeof(kLobfFullSequenceHeaderObu), false));
+
+  // Test parsing of a reduced still image Sequence Header OBU.
+  ASSERT_TRUE(VerifyAv1c(kLobfReducedStillImageSequenceHeaderObu,
+                         sizeof(kLobfReducedStillImageSequenceHeaderObu),
+                         false));
+}
+
+TEST(Av1Config, GetAv1ConfigFromAnnexBObu) {
+  // Test parsing of a Sequence Header OBU with the reduced_still_picture_header
+  // unset-- aka a full Sequence Header OBU.
+  ASSERT_TRUE(VerifyAv1c(kAnnexBFullSequenceHeaderObu,
+                         sizeof(kAnnexBFullSequenceHeaderObu), true));
+
+  // Test parsing of a reduced still image Sequence Header OBU.
+  ASSERT_TRUE(VerifyAv1c(kAnnexBReducedStillImageSequenceHeaderObu,
+                         sizeof(kAnnexBReducedStillImageSequenceHeaderObu),
+                         true));
+}
+
+TEST(Av1Config, ReadWriteConfig) {
+  Av1Config av1_config;
+  memset(&av1_config, 0, sizeof(av1_config));
+
+  // Test writing out the AV1 config.
+  size_t bytes_written = 0;
+  uint8_t av1c_buffer[4] = { 0 };
+  ASSERT_EQ(0, write_av1config(&av1_config, sizeof(av1c_buffer), &bytes_written,
+                               &av1c_buffer[0]));
+  ASSERT_EQ(kAv1cNoConfigObusSize, bytes_written);
+  for (size_t i = 0; i < kAv1cNoConfigObusSize; ++i) {
+    ASSERT_EQ(kAv1cAllZero[i], av1c_buffer[i])
+        << "Mismatch in output Av1Config at offset=" << i;
+  }
+
+  // Test reading the AV1 config.
+  size_t bytes_read = 0;
+  ASSERT_EQ(0, read_av1config(&kAv1cAllZero[0], sizeof(kAv1cAllZero),
+                              &bytes_read, &av1_config));
+  ASSERT_EQ(kAv1cNoConfigObusSize, bytes_read);
+  ASSERT_EQ(0, write_av1config(&av1_config, sizeof(av1c_buffer), &bytes_written,
+                               &av1c_buffer[0]));
+  for (size_t i = 0; i < kAv1cNoConfigObusSize; ++i) {
+    ASSERT_EQ(kAv1cAllZero[i], av1c_buffer[i])
+        << "Mismatch in output Av1Config at offset=" << i;
+  }
+}
+
+}  // namespace
diff --git a/third_party/aom/test/av1_convolve_scale_test.cc b/third_party/aom/test/av1_convolve_scale_test.cc
new file mode 100644
index 0000000000..76cf77ab07
--- /dev/null
+++ b/third_party/aom/test/av1_convolve_scale_test.cc
@@ -0,0 +1,561 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tuple>
+#include <vector>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "av1/common/common_data.h"
+
+namespace {
+const int kTestIters = 10;
+const int kPerfIters = 1000;
+
+const int kVPad = 32;
+const int kHPad = 32;
+const int kXStepQn = 16;
+const int kYStepQn = 20;
+
+using libaom_test::ACMRandom;
+using std::make_tuple;
+using std::tuple;
+
+enum NTaps { EIGHT_TAP, TEN_TAP, TWELVE_TAP };
+int NTapsToInt(NTaps ntaps) { return 8 + static_cast<int>(ntaps) * 2; }
+
+// A 16-bit filter with a configurable number of taps.
+class TestFilter {
+ public:
+  void set(NTaps ntaps, bool backwards);
+
+  InterpFilterParams params_;
+
+ private:
+  std::vector<int16_t> coeffs_;
+};
+
+void TestFilter::set(NTaps ntaps, bool backwards) {
+  const int n = NTapsToInt(ntaps);
+  assert(n >= 8 && n <= 12);
+
+  // The filter has n * SUBPEL_SHIFTS proper elements and an extra 8 bogus
+  // elements at the end so that convolutions can read off the end safely.
+  coeffs_.resize(n * SUBPEL_SHIFTS + 8);
+
+  // The coefficients are pretty much arbitrary, but convolutions shouldn't
+  // over or underflow. For the first filter (subpels = 0), we use an
+  // increasing or decreasing ramp (depending on the backwards parameter). We
+  // don't want any zero coefficients, so we make it have an x-intercept at -1
+  // or n. To ensure absence of under/overflow, we normalise the area under the
+  // ramp to be I = 1 << FILTER_BITS (so that convolving a constant function
+  // gives the identity).
+  //
+  // When increasing, the function has the form:
+  //
+  //   f(x) = A * (x + 1)
+  //
+  // Summing and rearranging for A gives A = 2 * I / (n * (n + 1)). If the
+  // filter is reversed, we have the same A but with formula
+  //
+  //   g(x) = A * (n - x)
+  const int I = 1 << FILTER_BITS;
+  const float A = 2.f * I / (n * (n + 1.f));
+  for (int i = 0; i < n; ++i) {
+    coeffs_[i] = static_cast<int16_t>(A * (backwards ? (n - i) : (i + 1)));
+  }
+
+  // For the other filters, make them slightly different by swapping two
+  // columns. Filter k will have the columns (k % n) and (7 * k) % n swapped.
+  const size_t filter_size = sizeof(coeffs_[0] * n);
+  int16_t *const filter0 = &coeffs_[0];
+  for (int k = 1; k < SUBPEL_SHIFTS; ++k) {
+    int16_t *filterk = &coeffs_[k * n];
+    memcpy(filterk, filter0, filter_size);
+
+    const int idx0 = k % n;
+    const int idx1 = (7 * k) % n;
+
+    const int16_t tmp = filterk[idx0];
+    filterk[idx0] = filterk[idx1];
+    filterk[idx1] = tmp;
+  }
+
+  // Finally, write some rubbish at the end to make sure we don't use it.
+  for (int i = 0; i < 8; ++i) coeffs_[n * SUBPEL_SHIFTS + i] = 123 + i;
+
+  // Fill in params
+  params_.filter_ptr = &coeffs_[0];
+  params_.taps = n;
+  // These are ignored by the functions being tested. Set them to whatever.
+  params_.interp_filter = EIGHTTAP_REGULAR;
+}
+
+template <typename SrcPixel>
+class TestImage {
+ public:
+  TestImage(int w, int h, int bd) : w_(w), h_(h), bd_(bd) {
+    assert(bd < 16);
+    assert(bd <= 8 * static_cast<int>(sizeof(SrcPixel)));
+
+    // Pad width by 2*kHPad and then round up to the next multiple of 16
+    // to get src_stride_. Add another 16 for dst_stride_ (to make sure
+    // something goes wrong if we use the wrong one)
+    src_stride_ = (w_ + 2 * kHPad + 15) & ~15;
+    dst_stride_ = src_stride_ + 16;
+
+    // Allocate image data
+    src_data_.resize(2 * src_block_size());
+    dst_data_.resize(2 * dst_block_size());
+    dst_16_data_.resize(2 * dst_block_size());
+  }
+
+  void Initialize(ACMRandom *rnd);
+  void Check() const;
+
+  int src_stride() const { return src_stride_; }
+  int dst_stride() const { return dst_stride_; }
+
+  int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); }
+  int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); }
+
+  const SrcPixel *GetSrcData(bool ref, bool borders) const {
+    const SrcPixel *block = &src_data_[ref ? 0 : src_block_size()];
+    return borders ? block : block + kHPad + src_stride_ * kVPad;
+  }
+
+  SrcPixel *GetDstData(bool ref, bool borders) {
+    SrcPixel *block = &dst_data_[ref ? 0 : dst_block_size()];
+    return borders ? block : block + kHPad + dst_stride_ * kVPad;
+  }
+
+  CONV_BUF_TYPE *GetDst16Data(bool ref, bool borders) {
+    CONV_BUF_TYPE *block = &dst_16_data_[ref ? 0 : dst_block_size()];
+    return borders ? block : block + kHPad + dst_stride_ * kVPad;
+  }
+
+ private:
+  int w_, h_, bd_;
+  int src_stride_, dst_stride_;
+
+  std::vector<SrcPixel> src_data_;
+  std::vector<SrcPixel> dst_data_;
+  std::vector<CONV_BUF_TYPE> dst_16_data_;
+};
+
+template <typename Pixel>
+void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) {
+  if (!trash) {
+    memset(data, 0, sizeof(*data) * num_pixels);
+    return;
+  }
+  const Pixel mask = (1 << bd) - 1;
+  for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask;
+}
+
+template <typename Pixel>
+void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd,
+                 bool trash_edges, Pixel *data) {
+  assert(rnd);
+  const Pixel mask = (1 << bd) - 1;
+
+  // Fill in the first buffer with random data
+  // Top border
+  FillEdge(rnd, stride * kVPad, bd, trash_edges, data);
+  for (int r = 0; r < h; ++r) {
+    Pixel *row_data = data + (kVPad + r) * stride;
+    // Left border, contents, right border
+    FillEdge(rnd, kHPad, bd, trash_edges, row_data);
+    for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask;
+    FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w);
+  }
+  // Bottom border
+  FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h));
+
+  const int bpp = sizeof(*data);
+  const int block_elts = stride * (h + 2 * kVPad);
+  const int block_size = bpp * block_elts;
+
+  // Now copy that to the second buffer
+  memcpy(data + block_elts, data, block_size);
+}
+
+template <typename SrcPixel>
+void TestImage<SrcPixel>::Initialize(ACMRandom *rnd) {
+  PrepBuffers(rnd, w_, h_, src_stride_, bd_, false, &src_data_[0]);
+  PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_data_[0]);
+  PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_16_data_[0]);
+}
+
+template <typename SrcPixel>
+void TestImage<SrcPixel>::Check() const {
+  // If memcmp returns 0, there's nothing to do.
+  const int num_pixels = dst_block_size();
+  const SrcPixel *ref_dst = &dst_data_[0];
+  const SrcPixel *tst_dst = &dst_data_[num_pixels];
+
+  const CONV_BUF_TYPE *ref_16_dst = &dst_16_data_[0];
+  const CONV_BUF_TYPE *tst_16_dst = &dst_16_data_[num_pixels];
+
+  if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) {
+    if (0 == memcmp(ref_16_dst, tst_16_dst, sizeof(*ref_16_dst) * num_pixels))
+      return;
+  }
+  // Otherwise, iterate through the buffer looking for differences (including
+  // the edges)
+  const int stride = dst_stride_;
+  for (int r = 0; r < h_ + 2 * kVPad; ++r) {
+    for (int c = 0; c < w_ + 2 * kHPad; ++c) {
+      const int32_t ref_value = ref_dst[r * stride + c];
+      const int32_t tst_value = tst_dst[r * stride + c];
+
+      EXPECT_EQ(tst_value, ref_value)
+          << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad);
+    }
+  }
+
+  for (int r = 0; r < h_ + 2 * kVPad; ++r) {
+    for (int c = 0; c < w_ + 2 * kHPad; ++c) {
+      const int32_t ref_value = ref_16_dst[r * stride + c];
+      const int32_t tst_value = tst_16_dst[r * stride + c];
+
+      EXPECT_EQ(tst_value, ref_value)
+          << "Error in 16 bit buffer "
+          << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad);
+    }
+  }
+}
+
+typedef tuple<int, int> BlockDimension;
+
+struct BaseParams {
+  BaseParams(BlockDimension dimensions, NTaps num_taps_x, NTaps num_taps_y,
+             bool average)
+      : dims(dimensions), ntaps_x(num_taps_x), ntaps_y(num_taps_y),
+        avg(average) {}
+
+  BlockDimension dims;
+  NTaps ntaps_x, ntaps_y;
+  bool avg;
+};
+
+template <typename SrcPixel>
+class ConvolveScaleTestBase : public ::testing::Test {
+ public:
+  ConvolveScaleTestBase() : image_(nullptr) {}
+  ~ConvolveScaleTestBase() override { delete image_; }
+
+  // Implemented by subclasses (SetUp depends on the parameters passed
+  // in and RunOne depends on the function to be tested. These can't
+  // be templated for low/high bit depths because they have different
+  // numbers of parameters)
+  void SetUp() override = 0;
+  virtual void RunOne(bool ref) = 0;
+
+ protected:
+  void SetParams(const BaseParams &params, int bd) {
+    width_ = std::get<0>(params.dims);
+    height_ = std::get<1>(params.dims);
+    ntaps_x_ = params.ntaps_x;
+    ntaps_y_ = params.ntaps_y;
+    bd_ = bd;
+    avg_ = params.avg;
+
+    filter_x_.set(ntaps_x_, false);
+    filter_y_.set(ntaps_y_, true);
+    convolve_params_ =
+        get_conv_params_no_round(avg_ != false, 0, nullptr, 0, 1, bd);
+
+    delete image_;
+    image_ = new TestImage<SrcPixel>(width_, height_, bd_);
+    ASSERT_NE(image_, nullptr);
+  }
+
+  void SetConvParamOffset(int i, int j, int is_compound, int do_average,
+                          int use_dist_wtd_comp_avg) {
+    if (i == -1 && j == -1) {
+      convolve_params_.use_dist_wtd_comp_avg = use_dist_wtd_comp_avg;
+      convolve_params_.is_compound = is_compound;
+      convolve_params_.do_average = do_average;
+    } else {
+      convolve_params_.use_dist_wtd_comp_avg = use_dist_wtd_comp_avg;
+      convolve_params_.fwd_offset = quant_dist_lookup_table[j][i];
+      convolve_params_.bck_offset = quant_dist_lookup_table[j][1 - i];
+      convolve_params_.is_compound = is_compound;
+      convolve_params_.do_average = do_average;
+    }
+  }
+
+  void Run() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    for (int i = 0; i < kTestIters; ++i) {
+      int is_compound = 0;
+      SetConvParamOffset(-1, -1, is_compound, 0, 0);
+      Prep(&rnd);
+      RunOne(true);
+      RunOne(false);
+      image_->Check();
+
+      is_compound = 1;
+      for (int do_average = 0; do_average < 2; do_average++) {
+        for (int use_dist_wtd_comp_avg = 0; use_dist_wtd_comp_avg < 2;
+             use_dist_wtd_comp_avg++) {
+          for (int j = 0; j < 2; ++j) {
+            for (int k = 0; k < 4; ++k) {
+              SetConvParamOffset(j, k, is_compound, do_average,
+                                 use_dist_wtd_comp_avg);
+              Prep(&rnd);
+              RunOne(true);
+              RunOne(false);
+              image_->Check();
+            }
+          }
+        }
+      }
+    }
+  }
+
+  void SpeedTest() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    Prep(&rnd);
+
+    aom_usec_timer ref_timer;
+    aom_usec_timer_start(&ref_timer);
+    for (int i = 0; i < kPerfIters; ++i) RunOne(true);
+    aom_usec_timer_mark(&ref_timer);
+    const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+    aom_usec_timer tst_timer;
+    aom_usec_timer_start(&tst_timer);
+    for (int i = 0; i < kPerfIters; ++i) RunOne(false);
+    aom_usec_timer_mark(&tst_timer);
+    const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+    std::cout << "[          ] C time = " << ref_time / 1000
+              << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+    EXPECT_GT(ref_time, tst_time)
+        << "Error: CDEFSpeedTest, SIMD slower than C.\n"
+        << "C time: " << ref_time << " us\n"
+        << "SIMD time: " << tst_time << " us\n";
+  }
+
+  static int RandomSubpel(ACMRandom *rnd) {
+    const uint8_t subpel_mode = rnd->Rand8();
+    if ((subpel_mode & 7) == 0) {
+      return 0;
+    } else if ((subpel_mode & 7) == 1) {
+      return SCALE_SUBPEL_SHIFTS - 1;
+    } else {
+      return 1 + rnd->PseudoUniform(SCALE_SUBPEL_SHIFTS - 2);
+    }
+  }
+
+  void Prep(ACMRandom *rnd) {
+    assert(rnd);
+
+    // Choose subpel_x_ and subpel_y_. They should be less than
+    // SCALE_SUBPEL_SHIFTS; we also want to add extra weight to "interesting"
+    // values: 0 and SCALE_SUBPEL_SHIFTS - 1
+    subpel_x_ = RandomSubpel(rnd);
+    subpel_y_ = RandomSubpel(rnd);
+
+    image_->Initialize(rnd);
+  }
+
+  int width_, height_, bd_;
+  NTaps ntaps_x_, ntaps_y_;
+  bool avg_;
+  int subpel_x_, subpel_y_;
+  TestFilter filter_x_, filter_y_;
+  TestImage<SrcPixel> *image_;
+  ConvolveParams convolve_params_;
+};
+
+typedef tuple<int, int> BlockDimension;
+
+typedef void (*LowbdConvolveFunc)(const uint8_t *src, int src_stride,
+                                  uint8_t *dst, int dst_stride, int w, int h,
+                                  const InterpFilterParams *filter_params_x,
+                                  const InterpFilterParams *filter_params_y,
+                                  const int subpel_x_qn, const int x_step_qn,
+                                  const int subpel_y_qn, const int y_step_qn,
+                                  ConvolveParams *conv_params);
+
+// Test parameter list:
+//  <tst_fun, dims, ntaps_x, ntaps_y, avg>
+typedef tuple<LowbdConvolveFunc, BlockDimension, NTaps, NTaps, bool>
+    LowBDParams;
+
+class LowBDConvolveScaleTest
+    : public ConvolveScaleTestBase<uint8_t>,
+      public ::testing::WithParamInterface<LowBDParams> {
+ public:
+  ~LowBDConvolveScaleTest() override = default;
+
+  void SetUp() override {
+    tst_fun_ = GET_PARAM(0);
+
+    const BlockDimension &block = GET_PARAM(1);
+    const NTaps ntaps_x = GET_PARAM(2);
+    const NTaps ntaps_y = GET_PARAM(3);
+    const int bd = 8;
+    const bool avg = GET_PARAM(4);
+
+    SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd);
+  }
+
+  void RunOne(bool ref) override {
+    const uint8_t *src = image_->GetSrcData(ref, false);
+    uint8_t *dst = image_->GetDstData(ref, false);
+    convolve_params_.dst = image_->GetDst16Data(ref, false);
+    const int src_stride = image_->src_stride();
+    const int dst_stride = image_->dst_stride();
+    if (ref) {
+      av1_convolve_2d_scale_c(src, src_stride, dst, dst_stride, width_, height_,
+                              &filter_x_.params_, &filter_y_.params_, subpel_x_,
+                              kXStepQn, subpel_y_, kYStepQn, &convolve_params_);
+    } else {
+      tst_fun_(src, src_stride, dst, dst_stride, width_, height_,
+               &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn,
+               subpel_y_, kYStepQn, &convolve_params_);
+    }
+  }
+
+ private:
+  LowbdConvolveFunc tst_fun_;
+};
+
+const BlockDimension kBlockDim[] = {
+  make_tuple(2, 2),    make_tuple(2, 4),    make_tuple(4, 4),
+  make_tuple(4, 8),    make_tuple(8, 4),    make_tuple(8, 8),
+  make_tuple(8, 16),   make_tuple(16, 8),   make_tuple(16, 16),
+  make_tuple(16, 32),  make_tuple(32, 16),  make_tuple(32, 32),
+  make_tuple(32, 64),  make_tuple(64, 32),  make_tuple(64, 64),
+  make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128),
+};
+
+const NTaps kNTaps[] = { EIGHT_TAP };
+
+TEST_P(LowBDConvolveScaleTest, Check) { Run(); }
+TEST_P(LowBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(
+    C, LowBDConvolveScaleTest,
+    ::testing::Combine(::testing::Values(av1_convolve_2d_scale_c),
+                       ::testing::ValuesIn(kBlockDim),
+                       ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
+                       ::testing::Bool()));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, LowBDConvolveScaleTest,
+    ::testing::Combine(::testing::Values(av1_convolve_2d_scale_sse4_1),
+                       ::testing::ValuesIn(kBlockDim),
+                       ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
+                       ::testing::Bool()));
+#endif  // HAVE_SSE4_1
+
+#if CONFIG_AV1_HIGHBITDEPTH
+typedef void (*HighbdConvolveFunc)(const uint16_t *src, int src_stride,
+                                   uint16_t *dst, int dst_stride, int w, int h,
+                                   const InterpFilterParams *filter_params_x,
+                                   const InterpFilterParams *filter_params_y,
+                                   const int subpel_x_qn, const int x_step_qn,
+                                   const int subpel_y_qn, const int y_step_qn,
+                                   ConvolveParams *conv_params, int bd);
+
+// Test parameter list:
+//  <tst_fun, dims, ntaps_x, ntaps_y, avg, bd>
+typedef tuple<HighbdConvolveFunc, BlockDimension, NTaps, NTaps, bool, int>
+    HighBDParams;
+
+class HighBDConvolveScaleTest
+    : public ConvolveScaleTestBase<uint16_t>,
+      public ::testing::WithParamInterface<HighBDParams> {
+ public:
+  ~HighBDConvolveScaleTest() override = default;
+
+  void SetUp() override {
+    tst_fun_ = GET_PARAM(0);
+
+    const BlockDimension &block = GET_PARAM(1);
+    const NTaps ntaps_x = GET_PARAM(2);
+    const NTaps ntaps_y = GET_PARAM(3);
+    const bool avg = GET_PARAM(4);
+    const int bd = GET_PARAM(5);
+
+    SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd);
+  }
+
+  void RunOne(bool ref) override {
+    const uint16_t *src = image_->GetSrcData(ref, false);
+    uint16_t *dst = image_->GetDstData(ref, false);
+    convolve_params_.dst = image_->GetDst16Data(ref, false);
+    const int src_stride = image_->src_stride();
+    const int dst_stride = image_->dst_stride();
+
+    if (ref) {
+      av1_highbd_convolve_2d_scale_c(
+          src, src_stride, dst, dst_stride, width_, height_, &filter_x_.params_,
+          &filter_y_.params_, subpel_x_, kXStepQn, subpel_y_, kYStepQn,
+          &convolve_params_, bd_);
+    } else {
+      tst_fun_(src, src_stride, dst, dst_stride, width_, height_,
+               &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn,
+               subpel_y_, kYStepQn, &convolve_params_, bd_);
+    }
+  }
+
+ private:
+  HighbdConvolveFunc tst_fun_;
+};
+
+const int kBDs[] = { 8, 10, 12 };
+
+TEST_P(HighBDConvolveScaleTest, Check) { Run(); }
+TEST_P(HighBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(
+    C, HighBDConvolveScaleTest,
+    ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_c),
+                       ::testing::ValuesIn(kBlockDim),
+                       ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
+                       ::testing::Bool(), ::testing::ValuesIn(kBDs)));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, HighBDConvolveScaleTest,
+    ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_sse4_1),
+                       ::testing::ValuesIn(kBlockDim),
+                       ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
+                       ::testing::Bool(), ::testing::ValuesIn(kBDs)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, HighBDConvolveScaleTest,
+    ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_neon),
+                       ::testing::ValuesIn(kBlockDim),
+                       ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
+                       ::testing::Bool(), ::testing::ValuesIn(kBDs)));
+
+#endif  // HAVE_NEON
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/av1_convolve_test.cc b/third_party/aom/test/av1_convolve_test.cc
new file mode 100644
index 0000000000..5bbac21803
--- /dev/null
+++ b/third_party/aom/test/av1_convolve_test.cc
@@ -0,0 +1,2447 @@
+/*
+ * Copyright (c) 2020, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <ostream>
+#include <set>
+#include <vector>
+#include "config/av1_rtcd.h"
+#include "config/aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "aom_ports/aom_timer.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+// TODO(any): Remove following INTERP_FILTERS_ALL define, so that 12-tap filter
+// is tested once 12-tap filter SIMD is done.
+#undef INTERP_FILTERS_ALL
+#define INTERP_FILTERS_ALL 4
+
+// All single reference convolve tests are parameterized on block size,
+// bit-depth, and function to test.
+//
+// Note that parameterizing on these variables (and not other parameters) is
+// a conscious decision - Jenkins needs some degree of parallelization to run
+// the tests within the time limit, but if the number of parameters increases
+// too much, the gtest framework does not handle it well (increased overhead per
+// test, huge amount of output to stdout, etc.).
+//
+// Also note that the test suites must be named with the architecture, e.g.,
+// C, C_X, AVX2_X, ... The test suite that runs on Jenkins sometimes runs tests
+// that cannot deal with intrinsics (e.g., the Valgrind tests on 32-bit x86
+// binaries) and will disable tests using a filter like
+// --gtest_filter=-:SSE4_1.*. If the test suites are not named this way, the
+// testing infrastructure will not selectively filter them properly.
+class BlockSize {
+ public:
+  BlockSize(int w, int h) : width_(w), height_(h) {}
+
+  int Width() const { return width_; }
+  int Height() const { return height_; }
+
+  bool operator<(const BlockSize &other) const {
+    if (Width() == other.Width()) {
+      return Height() < other.Height();
+    }
+    return Width() < other.Width();
+  }
+
+  bool operator==(const BlockSize &other) const {
+    return Width() == other.Width() && Height() == other.Height();
+  }
+
+ private:
+  int width_;
+  int height_;
+};
+
+// Block size / bit depth / test function used to parameterize the tests.
+template <typename T>
+class TestParam {
+ public:
+  TestParam(const BlockSize &block, int bd, T test_func)
+      : block_(block), bd_(bd), test_func_(test_func) {}
+
+  const BlockSize &Block() const { return block_; }
+  int BitDepth() const { return bd_; }
+  T TestFunction() const { return test_func_; }
+
+  bool operator==(const TestParam &other) const {
+    return Block() == other.Block() && BitDepth() == other.BitDepth() &&
+           TestFunction() == other.TestFunction();
+  }
+
+ private:
+  BlockSize block_;
+  int bd_;
+  T test_func_;
+};
+
+template <typename T>
+std::ostream &operator<<(std::ostream &os, const TestParam<T> &test_arg) {
+  return os << "TestParam { width:" << test_arg.Block().Width()
+            << " height:" << test_arg.Block().Height()
+            << " bd:" << test_arg.BitDepth() << " }";
+}
+
+// Generate the list of all block widths / heights that need to be tested,
+// includes chroma and luma sizes, for the given bit-depths. The test
+// function is the same for all generated parameters.
+template <typename T>
+std::vector<TestParam<T>> GetTestParams(std::initializer_list<int> bit_depths,
+                                        T test_func) {
+  std::set<BlockSize> sizes;
+  for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) {
+    const int w = block_size_wide[b];
+    const int h = block_size_high[b];
+    sizes.insert(BlockSize(w, h));
+    // Add in smaller chroma sizes as well.
+    if (w == 4 || h == 4) {
+      sizes.insert(BlockSize(w / 2, h / 2));
+    }
+  }
+  std::vector<TestParam<T>> result;
+  for (const BlockSize &block : sizes) {
+    for (int bd : bit_depths) {
+      result.push_back(TestParam<T>(block, bd, test_func));
+    }
+  }
+  return result;
+}
+
+template <typename T>
+std::vector<TestParam<T>> GetLowbdTestParams(T test_func) {
+  return GetTestParams({ 8 }, test_func);
+}
+
+template <typename T>
+::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdParams(
+    T test_func) {
+  return ::testing::ValuesIn(GetLowbdTestParams(test_func));
+}
+
+// Test the test-parameters generators work as expected.
+class AV1ConvolveParametersTest : public ::testing::Test {};
+
+TEST_F(AV1ConvolveParametersTest, GetLowbdTestParams) {
+  auto v = GetLowbdTestParams(av1_convolve_x_sr_c);
+  ASSERT_EQ(27U, v.size());
+  for (const auto &p : v) {
+    ASSERT_EQ(8, p.BitDepth());
+    // Needed (instead of ASSERT_EQ(...) since gtest does not
+    // have built in printing for arbitrary functions, which
+    // causes a compilation error.
+    bool same_fn = av1_convolve_x_sr_c == p.TestFunction();
+    ASSERT_TRUE(same_fn);
+  }
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+template <typename T>
+std::vector<TestParam<T>> GetHighbdTestParams(T test_func) {
+  return GetTestParams({ 10, 12 }, test_func);
+}
+
+template <typename T>
+::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdParams(
+    T test_func) {
+  return ::testing::ValuesIn(GetHighbdTestParams(test_func));
+}
+
+TEST_F(AV1ConvolveParametersTest, GetHighbdTestParams) {
+  auto v = GetHighbdTestParams(av1_highbd_convolve_x_sr_c);
+  ASSERT_EQ(54U, v.size());
+  int num_10 = 0;
+  int num_12 = 0;
+  for (const auto &p : v) {
+    ASSERT_TRUE(p.BitDepth() == 10 || p.BitDepth() == 12);
+    bool same_fn = av1_highbd_convolve_x_sr_c == p.TestFunction();
+    ASSERT_TRUE(same_fn);
+    if (p.BitDepth() == 10) {
+      ++num_10;
+    } else {
+      ++num_12;
+    }
+  }
+  ASSERT_EQ(num_10, num_12);
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+// AV1ConvolveTest is the base class that all convolve tests should derive from.
+// It provides storage/methods for generating randomized buffers for both
+// low bit-depth and high bit-depth, and setup/teardown methods for clearing
+// system state. Implementors can get the bit-depth / block-size /
+// test function by calling GetParam().
+template <typename T>
+class AV1ConvolveTest : public ::testing::TestWithParam<TestParam<T>> {
+ public:
+  ~AV1ConvolveTest() override = default;
+
+  void SetUp() override {
+    rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
+  }
+
+  // Randomizes the 8-bit input buffer and returns a pointer to it. Note that
+  // the pointer is safe to use with an 8-tap filter. The stride can range
+  // from width to (width + kPadding). Also note that the pointer is to the
+  // same memory location.
+  static constexpr int kInputPadding = 12;
+
+  // Get a pointer to a buffer with stride == width. Note that we must have
+  // the test param passed in explicitly -- the gtest framework does not
+  // support calling GetParam() within a templatized class.
+  // Note that FirstRandomInput8 always returns the same pointer -- if two
+  // inputs are needed, also use SecondRandomInput8.
+  const uint8_t *FirstRandomInput8(const TestParam<T> &param) {
+    // Note we can't call GetParam() directly -- gtest does not support
+    // this for parameterized types.
+    return RandomInput8(input8_1_, param);
+  }
+
+  const uint8_t *SecondRandomInput8(const TestParam<T> &param) {
+    return RandomInput8(input8_2_, param);
+  }
+
+  // Some of the intrinsics perform writes in 32 byte chunks. Moreover, some
+  // of the instrinsics assume that the stride is also a multiple of 32.
+  // To satisfy these constraints and also remain simple, output buffer strides
+  // are assumed MAX_SB_SIZE.
+  static constexpr int kOutputStride = MAX_SB_SIZE;
+
+  // Check that two 8-bit output buffers are identical.
+  void AssertOutputBufferEq(const uint8_t *p1, const uint8_t *p2, int width,
+                            int height) {
+    ASSERT_TRUE(p1 != p2) << "Buffers must be at different memory locations";
+    for (int j = 0; j < height; ++j) {
+      if (memcmp(p1, p2, sizeof(*p1) * width) == 0) {
+        p1 += kOutputStride;
+        p2 += kOutputStride;
+        continue;
+      }
+      for (int i = 0; i < width; ++i) {
+        ASSERT_EQ(p1[i], p2[i])
+            << width << "x" << height << " Pixel mismatch at (" << i << ", "
+            << j << ")";
+      }
+    }
+  }
+
+  // Check that two 16-bit output buffers are identical.
+  void AssertOutputBufferEq(const uint16_t *p1, const uint16_t *p2, int width,
+                            int height) {
+    ASSERT_TRUE(p1 != p2) << "Buffers must be in different memory locations";
+    for (int j = 0; j < height; ++j) {
+      if (memcmp(p1, p2, sizeof(*p1) * width) == 0) {
+        p1 += kOutputStride;
+        p2 += kOutputStride;
+        continue;
+      }
+      for (int i = 0; i < width; ++i) {
+        ASSERT_EQ(p1[i], p2[i])
+            << width << "x" << height << " Pixel mismatch at (" << i << ", "
+            << j << ")";
+      }
+    }
+  }
+
+#if CONFIG_AV1_HIGHBITDEPTH
+  // Note that the randomized values are capped by bit-depth.
+  const uint16_t *FirstRandomInput16(const TestParam<T> &param) {
+    return RandomInput16(input16_1_, param);
+  }
+
+  const uint16_t *SecondRandomInput16(const TestParam<T> &param) {
+    return RandomInput16(input16_2_, param);
+  }
+#endif
+
+ private:
+  const uint8_t *RandomInput8(uint8_t *p, const TestParam<T> &param) {
+    EXPECT_EQ(8, param.BitDepth());
+    EXPECT_GE(MAX_SB_SIZE, param.Block().Width());
+    EXPECT_GE(MAX_SB_SIZE, param.Block().Height());
+    const int padded_width = param.Block().Width() + kInputPadding;
+    const int padded_height = param.Block().Height() + kInputPadding;
+    Randomize(p, padded_width * padded_height);
+    return p + (kInputPadding / 2) * padded_width + kInputPadding / 2;
+  }
+
+  void Randomize(uint8_t *p, int size) {
+    for (int i = 0; i < size; ++i) {
+      p[i] = rnd_.Rand8();
+    }
+  }
+
+#if CONFIG_AV1_HIGHBITDEPTH
+  const uint16_t *RandomInput16(uint16_t *p, const TestParam<T> &param) {
+    // Check that this is only called with high bit-depths.
+    EXPECT_TRUE(param.BitDepth() == 10 || param.BitDepth() == 12);
+    EXPECT_GE(MAX_SB_SIZE, param.Block().Width());
+    EXPECT_GE(MAX_SB_SIZE, param.Block().Height());
+    const int padded_width = param.Block().Width() + kInputPadding;
+    const int padded_height = param.Block().Height() + kInputPadding;
+    Randomize(p, padded_width * padded_height, param.BitDepth());
+    return p + (kInputPadding / 2) * padded_width + kInputPadding / 2;
+  }
+
+  void Randomize(uint16_t *p, int size, int bit_depth) {
+    for (int i = 0; i < size; ++i) {
+      p[i] = rnd_.Rand16() & ((1 << bit_depth) - 1);
+    }
+  }
+#endif
+
+  static constexpr int kInputStride = MAX_SB_SIZE + kInputPadding;
+
+  libaom_test::ACMRandom rnd_;
+  // Statically allocate all the memory that is needed for the tests. Note
+  // that we cannot allocate output memory here. It must use DECLARE_ALIGNED,
+  // which is a C99 feature and interacts badly with C++ member variables.
+  uint8_t input8_1_[kInputStride * kInputStride];
+  uint8_t input8_2_[kInputStride * kInputStride];
+#if CONFIG_AV1_HIGHBITDEPTH
+  uint16_t input16_1_[kInputStride * kInputStride];
+  uint16_t input16_2_[kInputStride * kInputStride];
+#endif
+};
+
+////////////////////////////////////////////////////////
+// Single reference convolve-x functions (low bit-depth)
+////////////////////////////////////////////////////////
+typedef void (*convolve_x_func)(const uint8_t *src, int src_stride,
+                                uint8_t *dst, int dst_stride, int w, int h,
+                                const InterpFilterParams *filter_params_x,
+                                const int subpel_x_qn,
+                                ConvolveParams *conv_params);
+
+class AV1ConvolveXTest : public AV1ConvolveTest<convolve_x_func> {
+ public:
+  void RunTest() {
+    for (int sub_x = 0; sub_x < 16; ++sub_x) {
+      for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
+           ++filter) {
+        InterpFilter f = static_cast<InterpFilter>(filter);
+        TestConvolve(sub_x, f);
+      }
+    }
+  }
+
+ public:
+  void SpeedTest() {
+    for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
+         ++filter) {
+      InterpFilter f = static_cast<InterpFilter>(filter);
+      TestConvolveSpeed(f, 10000);
+    }
+  }
+
+ private:
+  void TestConvolve(const int sub_x, const InterpFilter filter) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+
+    const InterpFilterParams *filter_params_x =
+        av1_get_interp_filter_params_with_block_size(filter, width);
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    const uint8_t *input = FirstRandomInput8(GetParam());
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+    av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height,
+                        filter_params_x, sub_x, &conv_params1);
+
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    convolve_x_func test_func = GetParam().TestFunction();
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    test_func(input, width, test, kOutputStride, width, height, filter_params_x,
+              sub_x, &conv_params2);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+ private:
+  void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+
+    const InterpFilterParams *filter_params_x =
+        av1_get_interp_filter_params_with_block_size(filter, width);
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    const uint8_t *input = FirstRandomInput8(GetParam());
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height,
+                          filter_params_x, 0, &conv_params1);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    convolve_x_func test_func = GetParam().TestFunction();
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      test_func(input, width, test, kOutputStride, width, height,
+                filter_params_x, 0, &conv_params2);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
+           time2, time1 / time2);
+  }
+};
+
+TEST_P(AV1ConvolveXTest, RunTest) { RunTest(); }
+
+TEST_P(AV1ConvolveXTest, DISABLED_SpeedTest) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXTest,
+                         BuildLowbdParams(av1_convolve_x_sr_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXTest,
+                         BuildLowbdParams(av1_convolve_x_sr_sse2));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXTest,
+                         BuildLowbdParams(av1_convolve_x_sr_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXTest,
+                         BuildLowbdParams(av1_convolve_x_sr_neon));
+#endif
+
+#if HAVE_NEON_DOTPROD
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1ConvolveXTest,
+                         BuildLowbdParams(av1_convolve_x_sr_neon_dotprod));
+#endif
+
+#if HAVE_NEON_I8MM
+INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1ConvolveXTest,
+                         BuildLowbdParams(av1_convolve_x_sr_neon_i8mm));
+#endif
+
+////////////////////////////////////////////////////////////////
+// Single reference convolve-x IntraBC functions (low bit-depth)
+////////////////////////////////////////////////////////////////
+
+class AV1ConvolveXIntraBCTest : public AV1ConvolveTest<convolve_x_func> {
+ public:
+  void RunTest() {
+    // IntraBC functions only operate for subpel_x_qn = 8.
+    constexpr int kSubX = 8;
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
+    const uint8_t *input = FirstRandomInput8(GetParam());
+
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+    // Use a stride different from width to avoid potential storing errors that
+    // would go undetected. The input buffer is filled using a padding of 12, so
+    // the stride can be anywhere between width and width + 12.
+    av1_convolve_x_sr_intrabc_c(input, width + 2, reference, kOutputStride,
+                                width, height, filter_params_x, kSubX,
+                                &conv_params1);
+
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    convolve_x_func test_func = GetParam().TestFunction();
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    test_func(input, width + 2, test, kOutputStride, width, height,
+              filter_params_x, kSubX, &conv_params2);
+
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+  void SpeedTest() {
+    constexpr int kNumIters = 10000;
+    const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
+    const uint8_t *input = FirstRandomInput8(GetParam());
+
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < kNumIters; ++i) {
+      av1_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride, width,
+                                  height, filter_params_x, 0, &conv_params1);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    convolve_x_func test_func = GetParam().TestFunction();
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < kNumIters; ++i) {
+      test_func(input, width, test, kOutputStride, width, height,
+                filter_params_x, 0, &conv_params2);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
+           time2, time1 / time2);
+  }
+};
+
+TEST_P(AV1ConvolveXIntraBCTest, RunTest) { RunTest(); }
+
+TEST_P(AV1ConvolveXIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXIntraBCTest,
+                         BuildLowbdParams(av1_convolve_x_sr_intrabc_c));
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXIntraBCTest,
+                         BuildLowbdParams(av1_convolve_x_sr_intrabc_neon));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+/////////////////////////////////////////////////////////
+// Single reference convolve-x functions (high bit-depth)
+/////////////////////////////////////////////////////////
+typedef void (*highbd_convolve_x_func)(
+    const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
+    int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn,
+    ConvolveParams *conv_params, int bd);
+
+class AV1ConvolveXHighbdTest : public AV1ConvolveTest<highbd_convolve_x_func> {
+ public:
+  void RunTest() {
+    for (int sub_x = 0; sub_x < 16; ++sub_x) {
+      for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
+           ++filter) {
+        InterpFilter f = static_cast<InterpFilter>(filter);
+        TestConvolve(sub_x, f);
+      }
+    }
+  }
+
+ public:
+  void SpeedTest() {
+    for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
+         ++filter) {
+      InterpFilter f = static_cast<InterpFilter>(filter);
+      TestConvolveSpeed(f, 10000);
+    }
+  }
+
+ private:
+  void TestConvolve(const int sub_x, const InterpFilter filter) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const int bit_depth = GetParam().BitDepth();
+    const InterpFilterParams *filter_params_x =
+        av1_get_interp_filter_params_with_block_size(filter, width);
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
+    const uint16_t *input = FirstRandomInput16(GetParam());
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width,
+                               height, filter_params_x, sub_x, &conv_params1,
+                               bit_depth);
+
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
+                              filter_params_x, sub_x, &conv_params2, bit_depth);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+ private:
+  void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const int bit_depth = GetParam().BitDepth();
+    const InterpFilterParams *filter_params_x =
+        av1_get_interp_filter_params_with_block_size(filter, width);
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    const uint16_t *input = FirstRandomInput16(GetParam());
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width,
+                                 height, filter_params_x, 0, &conv_params1,
+                                 bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    highbd_convolve_x_func test_func = GetParam().TestFunction();
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      test_func(input, width, test, kOutputStride, width, height,
+                filter_params_x, 0, &conv_params2, bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
+           time2, time1 / time2);
+  }
+};
+
+TEST_P(AV1ConvolveXHighbdTest, RunTest) { RunTest(); }
+
+TEST_P(AV1ConvolveXHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdTest,
+                         BuildHighbdParams(av1_highbd_convolve_x_sr_c));
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveXHighbdTest,
+                         BuildHighbdParams(av1_highbd_convolve_x_sr_ssse3));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXHighbdTest,
+                         BuildHighbdParams(av1_highbd_convolve_x_sr_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXHighbdTest,
+                         BuildHighbdParams(av1_highbd_convolve_x_sr_neon));
+#endif
+
+/////////////////////////////////////////////////////////////////
+// Single reference convolve-x IntraBC functions (high bit-depth)
+/////////////////////////////////////////////////////////////////
+
+class AV1ConvolveXHighbdIntraBCTest
+    : public AV1ConvolveTest<highbd_convolve_x_func> {
+ public:
+  void RunTest() {
+    // IntraBC functions only operate for subpel_x_qn = 8.
+    constexpr int kSubX = 8;
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const int bit_depth = GetParam().BitDepth();
+    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
+    const uint16_t *input = FirstRandomInput16(GetParam());
+
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    // Use a stride different from width to avoid potential storing errors that
+    // would go undetected. The input buffer is filled using a padding of 12, so
+    // the stride can be anywhere between width and width + 12.
+    av1_highbd_convolve_x_sr_intrabc_c(
+        input, width + 2, reference, kOutputStride, width, height,
+        filter_params_x, kSubX, &conv_params1, bit_depth);
+
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
+                              height, filter_params_x, kSubX, &conv_params2,
+                              bit_depth);
+
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+  void SpeedTest() {
+    constexpr int kNumIters = 10000;
+    const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const int bit_depth = GetParam().BitDepth();
+    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
+    const uint16_t *input = FirstRandomInput16(GetParam());
+
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < kNumIters; ++i) {
+      av1_highbd_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride,
+                                         width, height, filter_params_x, 0,
+                                         &conv_params1, bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    highbd_convolve_x_func test_func = GetParam().TestFunction();
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < kNumIters; ++i) {
+      test_func(input, width, test, kOutputStride, width, height,
+                filter_params_x, 0, &conv_params2, bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
+           time2, time1 / time2);
+  }
+};
+
+TEST_P(AV1ConvolveXHighbdIntraBCTest, RunTest) { RunTest(); }
+
+TEST_P(AV1ConvolveXHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdIntraBCTest,
+                         BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_c));
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1ConvolveXHighbdIntraBCTest,
+    BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_neon));
+#endif
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+////////////////////////////////////////////////////////
+// Single reference convolve-y functions (low bit-depth)
+////////////////////////////////////////////////////////
+typedef void (*convolve_y_func)(const uint8_t *src, int src_stride,
+                                uint8_t *dst, int dst_stride, int w, int h,
+                                const InterpFilterParams *filter_params_y,
+                                const int subpel_y_qn);
+
+class AV1ConvolveYTest : public AV1ConvolveTest<convolve_y_func> {
+ public:
+  void RunTest() {
+    for (int sub_y = 0; sub_y < 16; ++sub_y) {
+      for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
+           ++filter) {
+        InterpFilter f = static_cast<InterpFilter>(filter);
+        TestConvolve(sub_y, f);
+      }
+    }
+  }
+
+ public:
+  void SpeedTest() {
+    for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
+         ++filter) {
+      InterpFilter f = static_cast<InterpFilter>(filter);
+      TestConvolveSpeed(f, 10000);
+    }
+  }
+
+ private:
+  void TestConvolve(const int sub_y, const InterpFilter filter) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+
+    const InterpFilterParams *filter_params_y =
+        av1_get_interp_filter_params_with_block_size(filter, height);
+    const uint8_t *input = FirstRandomInput8(GetParam());
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+    av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height,
+                        filter_params_y, sub_y);
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
+                              filter_params_y, sub_y);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+ private:
+  void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+
+    const InterpFilterParams *filter_params_y =
+        av1_get_interp_filter_params_with_block_size(filter, height);
+    const uint8_t *input = FirstRandomInput8(GetParam());
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height,
+                          filter_params_y, 0);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      GetParam().TestFunction()(input, width, test, kOutputStride, width,
+                                height, filter_params_y, 0);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
+           time2, time1 / time2);
+  }
+};
+
+TEST_P(AV1ConvolveYTest, RunTest) { RunTest(); }
+
+TEST_P(AV1ConvolveYTest, DISABLED_SpeedTest) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYTest,
+                         BuildLowbdParams(av1_convolve_y_sr_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYTest,
+                         BuildLowbdParams(av1_convolve_y_sr_sse2));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYTest,
+                         BuildLowbdParams(av1_convolve_y_sr_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYTest,
+                         BuildLowbdParams(av1_convolve_y_sr_neon));
+#endif
+
+////////////////////////////////////////////////////////////////
+// Single reference convolve-y IntraBC functions (low bit-depth)
+////////////////////////////////////////////////////////////////
+
+class AV1ConvolveYIntraBCTest : public AV1ConvolveTest<convolve_y_func> {
+ public:
+  void RunTest() {
+    // IntraBC functions only operate for subpel_y_qn = 8.
+    constexpr int kSubY = 8;
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
+    const uint8_t *input = FirstRandomInput8(GetParam());
+
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+    // Use a stride different from width to avoid potential storing errors that
+    // would go undetected. The input buffer is filled using a padding of 12, so
+    // the stride can be anywhere between width and width + 12.
+    av1_convolve_y_sr_intrabc_c(input, width + 2, reference, kOutputStride,
+                                width, height, filter_params_y, kSubY);
+
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
+                              height, filter_params_y, kSubY);
+
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+  void SpeedTest() {
+    constexpr int kNumIters = 10000;
+    const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+
+    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
+    const uint8_t *input = FirstRandomInput8(GetParam());
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < kNumIters; ++i) {
+      av1_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride, width,
+                                  height, filter_params_y, 0);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    convolve_y_func test_func = GetParam().TestFunction();
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < kNumIters; ++i) {
+      test_func(input, width, test, kOutputStride, width, height,
+                filter_params_y, 0);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
+           time2, time1 / time2);
+  }
+};
+
+TEST_P(AV1ConvolveYIntraBCTest, RunTest) { RunTest(); }
+
+TEST_P(AV1ConvolveYIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYIntraBCTest,
+                         BuildLowbdParams(av1_convolve_y_sr_intrabc_c));
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYIntraBCTest,
+                         BuildLowbdParams(av1_convolve_y_sr_intrabc_neon));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+/////////////////////////////////////////////////////////
+// Single reference convolve-y functions (high bit-depth)
+/////////////////////////////////////////////////////////
+typedef void (*highbd_convolve_y_func)(
+    const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
+    int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn,
+    int bd);
+
+class AV1ConvolveYHighbdTest : public AV1ConvolveTest<highbd_convolve_y_func> {
+ public:
+  void RunTest() {
+    for (int sub_y = 0; sub_y < 16; ++sub_y) {
+      for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
+           ++filter) {
+        InterpFilter f = static_cast<InterpFilter>(filter);
+        TestConvolve(sub_y, f);
+      }
+    }
+  }
+
+ public:
+  void SpeedTest() {
+    for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
+         ++filter) {
+      InterpFilter f = static_cast<InterpFilter>(filter);
+      TestConvolveSpeed(f, 10000);
+    }
+  }
+
+ private:
+  void TestConvolve(const int sub_y, const InterpFilter filter) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const int bit_depth = GetParam().BitDepth();
+    const InterpFilterParams *filter_params_y =
+        av1_get_interp_filter_params_with_block_size(filter, height);
+    const uint16_t *input = FirstRandomInput16(GetParam());
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width,
+                               height, filter_params_y, sub_y, bit_depth);
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
+                              filter_params_y, sub_y, bit_depth);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+ private:
+  void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const int bit_depth = GetParam().BitDepth();
+    const InterpFilterParams *filter_params_y =
+        av1_get_interp_filter_params_with_block_size(filter, width);
+    const uint16_t *input = FirstRandomInput16(GetParam());
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width,
+                                 height, filter_params_y, 0, bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    highbd_convolve_y_func test_func = GetParam().TestFunction();
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      test_func(input, width, test, kOutputStride, width, height,
+                filter_params_y, 0, bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
+           time2, time1 / time2);
+  }
+};
+
+TEST_P(AV1ConvolveYHighbdTest, RunTest) { RunTest(); }
+
+TEST_P(AV1ConvolveYHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdTest,
+                         BuildHighbdParams(av1_highbd_convolve_y_sr_c));
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveYHighbdTest,
+                         BuildHighbdParams(av1_highbd_convolve_y_sr_ssse3));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYHighbdTest,
+                         BuildHighbdParams(av1_highbd_convolve_y_sr_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYHighbdTest,
+                         BuildHighbdParams(av1_highbd_convolve_y_sr_neon));
+#endif
+
+/////////////////////////////////////////////////////////////////
+// Single reference convolve-y IntraBC functions (high bit-depth)
+/////////////////////////////////////////////////////////////////
+
+class AV1ConvolveYHighbdIntraBCTest
+    : public AV1ConvolveTest<highbd_convolve_y_func> {
+ public:
+  void RunTest() {
+    // IntraBC functions only operate for subpel_y_qn = 8.
+    constexpr int kSubY = 8;
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const int bit_depth = GetParam().BitDepth();
+    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
+    const uint16_t *input = FirstRandomInput16(GetParam());
+
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    // Use a stride different from width to avoid potential storing errors that
+    // would go undetected. The input buffer is filled using a padding of 12, so
+    // the stride can be anywhere between width and width + 12.
+    av1_highbd_convolve_y_sr_intrabc_c(input, width + 2, reference,
+                                       kOutputStride, width, height,
+                                       filter_params_y, kSubY, bit_depth);
+
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
+                              height, filter_params_y, kSubY, bit_depth);
+
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+  void SpeedTest() {
+    constexpr int kNumIters = 10000;
+    const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const int bit_depth = GetParam().BitDepth();
+    const InterpFilterParams *filter_params_y =
+        av1_get_interp_filter_params_with_block_size(filter, width);
+    const uint16_t *input = FirstRandomInput16(GetParam());
+
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < kNumIters; ++i) {
+      av1_highbd_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride,
+                                         width, height, filter_params_y, 0,
+                                         bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    highbd_convolve_y_func test_func = GetParam().TestFunction();
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < kNumIters; ++i) {
+      test_func(input, width, test, kOutputStride, width, height,
+                filter_params_y, 0, bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
+           time2, time1 / time2);
+  }
+};
+
+TEST_P(AV1ConvolveYHighbdIntraBCTest, RunTest) { RunTest(); }
+
+TEST_P(AV1ConvolveYHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdIntraBCTest,
+                         BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_c));
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1ConvolveYHighbdIntraBCTest,
+    BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_neon));
+#endif
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+//////////////////////////////////////////////////////////////
+// Single reference convolve-copy functions (low bit-depth)
+//////////////////////////////////////////////////////////////
+typedef void (*convolve_copy_func)(const uint8_t *src, ptrdiff_t src_stride,
+                                   uint8_t *dst, ptrdiff_t dst_stride, int w,
+                                   int h);
+
+class AV1ConvolveCopyTest : public AV1ConvolveTest<convolve_copy_func> {
+ public:
+  void RunTest() {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const uint8_t *input = FirstRandomInput8(GetParam());
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+    aom_convolve_copy_c(input, width, reference, kOutputStride, width, height);
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    GetParam().TestFunction()(input, width, test, kOutputStride, width, height);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+};
+
+// Note that even though these are AOM convolve functions, we are using the
+// newer AV1 test framework.
+TEST_P(AV1ConvolveCopyTest, RunTest) { RunTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyTest,
+                         BuildLowbdParams(aom_convolve_copy_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyTest,
+                         BuildLowbdParams(aom_convolve_copy_sse2));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyTest,
+                         BuildLowbdParams(aom_convolve_copy_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyTest,
+                         BuildLowbdParams(aom_convolve_copy_neon));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+///////////////////////////////////////////////////////////////
+// Single reference convolve-copy functions (high bit-depth)
+///////////////////////////////////////////////////////////////
+typedef void (*highbd_convolve_copy_func)(const uint16_t *src,
+                                          ptrdiff_t src_stride, uint16_t *dst,
+                                          ptrdiff_t dst_stride, int w, int h);
+
+class AV1ConvolveCopyHighbdTest
+    : public AV1ConvolveTest<highbd_convolve_copy_func> {
+ public:
+  void RunTest() {
+    const BlockSize &block = GetParam().Block();
+    const int width = block.Width();
+    const int height = block.Height();
+    const uint16_t *input = FirstRandomInput16(GetParam());
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    aom_highbd_convolve_copy_c(input, width, reference, kOutputStride, width,
+                               height);
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    GetParam().TestFunction()(input, width, test, kOutputStride, width, height);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+};
+
+TEST_P(AV1ConvolveCopyHighbdTest, RunTest) { RunTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyHighbdTest,
+                         BuildHighbdParams(aom_highbd_convolve_copy_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyHighbdTest,
+                         BuildHighbdParams(aom_highbd_convolve_copy_sse2));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyHighbdTest,
+                         BuildHighbdParams(aom_highbd_convolve_copy_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyHighbdTest,
+                         BuildHighbdParams(aom_highbd_convolve_copy_neon));
+#endif
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+/////////////////////////////////////////////////////////
+// Single reference convolve-2D functions (low bit-depth)
+/////////////////////////////////////////////////////////
+typedef void (*convolve_2d_func)(const uint8_t *src, int src_stride,
+                                 uint8_t *dst, int dst_stride, int w, int h,
+                                 const InterpFilterParams *filter_params_x,
+                                 const InterpFilterParams *filter_params_y,
+                                 const int subpel_x_qn, const int subpel_y_qn,
+                                 ConvolveParams *conv_params);
+
+class AV1Convolve2DTest : public AV1ConvolveTest<convolve_2d_func> {
+ public:
+  void RunTest() {
+    for (int sub_x = 0; sub_x < 16; ++sub_x) {
+      for (int sub_y = 0; sub_y < 16; ++sub_y) {
+        for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
+          for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
+            if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
+                ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
+              continue;
+            TestConvolve(static_cast<InterpFilter>(h_f),
+                         static_cast<InterpFilter>(v_f), sub_x, sub_y);
+          }
+        }
+      }
+    }
+  }
+
+ public:
+  void SpeedTest() {
+    for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
+      for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
+        if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
+            ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
+          continue;
+        TestConvolveSpeed(static_cast<InterpFilter>(h_f),
+                          static_cast<InterpFilter>(v_f), 10000);
+      }
+    }
+  }
+
+ private:
+  void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
+                    const int sub_x, const int sub_y) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const InterpFilterParams *filter_params_x =
+        av1_get_interp_filter_params_with_block_size(h_f, width);
+    const InterpFilterParams *filter_params_y =
+        av1_get_interp_filter_params_with_block_size(v_f, height);
+    const uint8_t *input = FirstRandomInput8(GetParam());
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width, height,
+                         filter_params_x, filter_params_y, sub_x, sub_y,
+                         &conv_params1);
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
+                              filter_params_x, filter_params_y, sub_x, sub_y,
+                              &conv_params2);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+ private:
+  void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f,
+                         int num_iters) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const InterpFilterParams *filter_params_x =
+        av1_get_interp_filter_params_with_block_size(h_f, width);
+    const InterpFilterParams *filter_params_y =
+        av1_get_interp_filter_params_with_block_size(v_f, height);
+    const uint8_t *input = FirstRandomInput8(GetParam());
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
+                           height, filter_params_x, filter_params_y, 0, 0,
+                           &conv_params1);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      GetParam().TestFunction()(input, width, test, kOutputStride, width,
+                                height, filter_params_x, filter_params_y, 0, 0,
+                                &conv_params2);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
+           time1, time2, time1 / time2);
+  }
+};
+
+TEST_P(AV1Convolve2DTest, RunTest) { RunTest(); }
+
+TEST_P(AV1Convolve2DTest, DISABLED_SpeedTest) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DTest,
+                         BuildLowbdParams(av1_convolve_2d_sr_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, AV1Convolve2DTest,
+                         BuildLowbdParams(av1_convolve_2d_sr_sse2));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DTest,
+                         BuildLowbdParams(av1_convolve_2d_sr_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DTest,
+                         BuildLowbdParams(av1_convolve_2d_sr_neon));
+#endif
+
+#if HAVE_NEON_DOTPROD
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1Convolve2DTest,
+                         BuildLowbdParams(av1_convolve_2d_sr_neon_dotprod));
+#endif
+
+#if HAVE_NEON_I8MM
+INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1Convolve2DTest,
+                         BuildLowbdParams(av1_convolve_2d_sr_neon_i8mm));
+#endif
+
+/////////////////////////////////////////////////////////////////
+// Single reference convolve-2D IntraBC functions (low bit-depth)
+/////////////////////////////////////////////////////////////////
+
+class AV1Convolve2DIntraBCTest : public AV1ConvolveTest<convolve_2d_func> {
+ public:
+  void RunTest() {
+    // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8.
+    constexpr int kSubX = 8;
+    constexpr int kSubY = 8;
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
+    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
+    const uint8_t *input = FirstRandomInput8(GetParam());
+
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    // Use a stride different from width to avoid potential storing errors that
+    // would go undetected. The input buffer is filled using a padding of 12, so
+    // the stride can be anywhere between width and width + 12.
+    av1_convolve_2d_sr_intrabc_c(input, width + 2, reference, kOutputStride,
+                                 width, height, filter_params_x,
+                                 filter_params_y, kSubX, kSubY, &conv_params1);
+
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
+                              height, filter_params_x, filter_params_y, kSubX,
+                              kSubY, &conv_params2);
+
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+  void SpeedTest() {
+    constexpr int kNumIters = 10000;
+    const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR);
+    const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR);
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
+    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
+    const uint8_t *input = FirstRandomInput8(GetParam());
+
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < kNumIters; ++i) {
+      av1_convolve_2d_sr_intrabc_c(input, width, reference, kOutputStride,
+                                   width, height, filter_params_x,
+                                   filter_params_y, 8, 8, &conv_params1);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    convolve_2d_func test_func = GetParam().TestFunction();
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < kNumIters; ++i) {
+      test_func(input, width, test, kOutputStride, width, height,
+                filter_params_x, filter_params_y, 8, 8, &conv_params2);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
+           time1, time2, time1 / time2);
+  }
+};
+
+TEST_P(AV1Convolve2DIntraBCTest, RunTest) { RunTest(); }
+
+TEST_P(AV1Convolve2DIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DIntraBCTest,
+                         BuildLowbdParams(av1_convolve_2d_sr_intrabc_c));
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DIntraBCTest,
+                         BuildLowbdParams(av1_convolve_2d_sr_intrabc_neon));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+//////////////////////////////////////////////////////////
+// Single reference convolve-2d functions (high bit-depth)
+//////////////////////////////////////////////////////////
+
+typedef void (*highbd_convolve_2d_func)(
+    const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
+    int h, const InterpFilterParams *filter_params_x,
+    const InterpFilterParams *filter_params_y, const int subpel_x_qn,
+    const int subpel_y_qn, ConvolveParams *conv_params, int bd);
+
+class AV1Convolve2DHighbdTest
+    : public AV1ConvolveTest<highbd_convolve_2d_func> {
+ public:
+  void RunTest() {
+    for (int sub_x = 0; sub_x < 16; ++sub_x) {
+      for (int sub_y = 0; sub_y < 16; ++sub_y) {
+        for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
+          for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
+            if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
+                ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
+              continue;
+            TestConvolve(static_cast<InterpFilter>(h_f),
+                         static_cast<InterpFilter>(v_f), sub_x, sub_y);
+          }
+        }
+      }
+    }
+  }
+
+ public:
+  void SpeedTest() {
+    for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
+      for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
+        if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
+            ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
+          continue;
+        TestConvolveSpeed(static_cast<InterpFilter>(h_f),
+                          static_cast<InterpFilter>(v_f), 10000);
+      }
+    }
+  }
+
+ private:
+  void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
+                    const int sub_x, const int sub_y) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const int bit_depth = GetParam().BitDepth();
+    const InterpFilterParams *filter_params_x =
+        av1_get_interp_filter_params_with_block_size(h_f, width);
+    const InterpFilterParams *filter_params_y =
+        av1_get_interp_filter_params_with_block_size(v_f, height);
+    const uint16_t *input = FirstRandomInput16(GetParam());
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
+    av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
+                                height, filter_params_x, filter_params_y, sub_x,
+                                sub_y, &conv_params1, bit_depth);
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
+    GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
+                              filter_params_x, filter_params_y, sub_x, sub_y,
+                              &conv_params2, bit_depth);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+  void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f,
+                         int num_iters) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const int bit_depth = GetParam().BitDepth();
+    const InterpFilterParams *filter_params_x =
+        av1_get_interp_filter_params_with_block_size(h_f, width);
+    const InterpFilterParams *filter_params_y =
+        av1_get_interp_filter_params_with_block_size(v_f, height);
+    const uint16_t *input = FirstRandomInput16(GetParam());
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
+                                  height, filter_params_x, filter_params_y, 0,
+                                  0, &conv_params1, bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      GetParam().TestFunction()(input, width, test, kOutputStride, width,
+                                height, filter_params_x, filter_params_y, 0, 0,
+                                &conv_params2, bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
+           time1, time2, time1 / time2);
+  }
+};
+
+TEST_P(AV1Convolve2DHighbdTest, RunTest) { RunTest(); }
+
+TEST_P(AV1Convolve2DHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DHighbdTest,
+                         BuildHighbdParams(av1_highbd_convolve_2d_sr_c));
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DHighbdTest,
+                         BuildHighbdParams(av1_highbd_convolve_2d_sr_ssse3));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DHighbdTest,
+                         BuildHighbdParams(av1_highbd_convolve_2d_sr_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DHighbdTest,
+                         BuildHighbdParams(av1_highbd_convolve_2d_sr_neon));
+#endif
+
+//////////////////////////////////////////////////////////////////
+// Single reference convolve-2d IntraBC functions (high bit-depth)
+//////////////////////////////////////////////////////////////////
+
+class AV1Convolve2DHighbdIntraBCTest
+    : public AV1ConvolveTest<highbd_convolve_2d_func> {
+ public:
+  void RunTest() {
+    // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8.
+    constexpr int kSubX = 8;
+    constexpr int kSubY = 8;
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const int bit_depth = GetParam().BitDepth();
+    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
+    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
+    const uint16_t *input = FirstRandomInput16(GetParam());
+
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
+    // Use a stride different from width to avoid potential storing errors that
+    // would go undetected. The input buffer is filled using a padding of 12, so
+    // the stride can be anywhere between width and width + 12.
+    av1_highbd_convolve_2d_sr_intrabc_c(input, width + 2, reference,
+                                        kOutputStride, width, height,
+                                        filter_params_x, filter_params_y, kSubX,
+                                        kSubY, &conv_params1, bit_depth);
+
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
+    GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
+                              height, filter_params_x, filter_params_y, kSubX,
+                              kSubY, &conv_params2, bit_depth);
+
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+  void SpeedTest() {
+    constexpr int kNumIters = 10000;
+    const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR);
+    const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR);
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const int bit_depth = GetParam().BitDepth();
+    const InterpFilterParams *filter_params_x =
+        av1_get_interp_filter_params_with_block_size(h_f, width);
+    const InterpFilterParams *filter_params_y =
+        av1_get_interp_filter_params_with_block_size(v_f, height);
+    const uint16_t *input = FirstRandomInput16(GetParam());
+
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    ConvolveParams conv_params1 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < kNumIters; ++i) {
+      av1_highbd_convolve_2d_sr_intrabc_c(
+          input, width, reference, kOutputStride, width, height,
+          filter_params_x, filter_params_y, 0, 0, &conv_params1, bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    highbd_convolve_2d_func test_func = GetParam().TestFunction();
+    ConvolveParams conv_params2 =
+        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < kNumIters; ++i) {
+      test_func(input, width, test, kOutputStride, width, height,
+                filter_params_x, filter_params_y, 0, 0, &conv_params2,
+                bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
+           time1, time2, time1 / time2);
+  }
+};
+
+TEST_P(AV1Convolve2DHighbdIntraBCTest, RunTest) { RunTest(); }
+
+TEST_P(AV1Convolve2DHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(
+    C, AV1Convolve2DHighbdIntraBCTest,
+    BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_c));
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1Convolve2DHighbdIntraBCTest,
+    BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_neon));
+#endif
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+//////////////////////////
+// Compound Convolve Tests
+//////////////////////////
+
+// The compound functions do not work for chroma block sizes. Provide
+// a function to generate test parameters for just luma block sizes.
+template <typename T>
+std::vector<TestParam<T>> GetLumaTestParams(
+    std::initializer_list<int> bit_depths, T test_func) {
+  std::set<BlockSize> sizes;
+  for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) {
+    const int w = block_size_wide[b];
+    const int h = block_size_high[b];
+    sizes.insert(BlockSize(w, h));
+  }
+  std::vector<TestParam<T>> result;
+  for (int bit_depth : bit_depths) {
+    for (const auto &block : sizes) {
+      result.push_back(TestParam<T>(block, bit_depth, test_func));
+    }
+  }
+  return result;
+}
+
+template <typename T>
+std::vector<TestParam<T>> GetLowbdLumaTestParams(T test_func) {
+  return GetLumaTestParams({ 8 }, test_func);
+}
+
+template <typename T>
+::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdLumaParams(
+    T test_func) {
+  return ::testing::ValuesIn(GetLowbdLumaTestParams(test_func));
+}
+
+TEST_F(AV1ConvolveParametersTest, GetLowbdLumaTestParams) {
+  auto v = GetLowbdLumaTestParams(av1_dist_wtd_convolve_x_c);
+  ASSERT_EQ(22U, v.size());
+  for (const auto &e : v) {
+    ASSERT_EQ(8, e.BitDepth());
+    bool same_fn = av1_dist_wtd_convolve_x_c == e.TestFunction();
+    ASSERT_TRUE(same_fn);
+  }
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+template <typename T>
+std::vector<TestParam<T>> GetHighbdLumaTestParams(T test_func) {
+  return GetLumaTestParams({ 10, 12 }, test_func);
+}
+
+TEST_F(AV1ConvolveParametersTest, GetHighbdLumaTestParams) {
+  auto v = GetHighbdLumaTestParams(av1_highbd_dist_wtd_convolve_x_c);
+  ASSERT_EQ(44U, v.size());
+  int num_10 = 0;
+  int num_12 = 0;
+  for (const auto &e : v) {
+    ASSERT_TRUE(10 == e.BitDepth() || 12 == e.BitDepth());
+    bool same_fn = av1_highbd_dist_wtd_convolve_x_c == e.TestFunction();
+    ASSERT_TRUE(same_fn);
+    if (e.BitDepth() == 10) {
+      ++num_10;
+    } else {
+      ++num_12;
+    }
+  }
+  ASSERT_EQ(num_10, num_12);
+}
+
+template <typename T>
+::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdLumaParams(
+    T test_func) {
+  return ::testing::ValuesIn(GetHighbdLumaTestParams(test_func));
+}
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+// Compound cases also need to test different frame offsets and weightings.
+class CompoundParam {
+ public:
+  CompoundParam(bool use_dist_wtd_comp_avg, int fwd_offset, int bck_offset)
+      : use_dist_wtd_comp_avg_(use_dist_wtd_comp_avg), fwd_offset_(fwd_offset),
+        bck_offset_(bck_offset) {}
+
+  bool UseDistWtdCompAvg() const { return use_dist_wtd_comp_avg_; }
+  int FwdOffset() const { return fwd_offset_; }
+  int BckOffset() const { return bck_offset_; }
+
+ private:
+  bool use_dist_wtd_comp_avg_;
+  int fwd_offset_;
+  int bck_offset_;
+};
+
+std::vector<CompoundParam> GetCompoundParams() {
+  std::vector<CompoundParam> result;
+  result.push_back(CompoundParam(false, 0, 0));
+  for (int k = 0; k < 2; ++k) {
+    for (int l = 0; l < 4; ++l) {
+      result.push_back(CompoundParam(true, quant_dist_lookup_table[l][k],
+                                     quant_dist_lookup_table[l][1 - k]));
+    }
+  }
+  return result;
+}
+
+TEST_F(AV1ConvolveParametersTest, GetCompoundParams) {
+  auto v = GetCompoundParams();
+  ASSERT_EQ(9U, v.size());
+  ASSERT_FALSE(v[0].UseDistWtdCompAvg());
+  for (size_t i = 1; i < v.size(); ++i) {
+    ASSERT_TRUE(v[i].UseDistWtdCompAvg());
+  }
+}
+
+////////////////////////////////////////////////
+// Compound convolve-x functions (low bit-depth)
+////////////////////////////////////////////////
+
+ConvolveParams GetConvolveParams(int do_average, CONV_BUF_TYPE *conv_buf,
+                                 int width, int bit_depth,
+                                 const CompoundParam &compound) {
+  ConvolveParams conv_params =
+      get_conv_params_no_round(do_average, 0, conv_buf, width, 1, bit_depth);
+  conv_params.use_dist_wtd_comp_avg = compound.UseDistWtdCompAvg();
+  conv_params.fwd_offset = compound.FwdOffset();
+  conv_params.bck_offset = compound.BckOffset();
+  return conv_params;
+}
+
+class AV1ConvolveXCompoundTest : public AV1ConvolveTest<convolve_x_func> {
+ public:
+  void RunTest() {
+    auto compound_params = GetCompoundParams();
+    for (int sub_pix = 0; sub_pix < 16; ++sub_pix) {
+      for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) {
+        for (const auto &c : compound_params) {
+          TestConvolve(sub_pix, static_cast<InterpFilter>(f), c);
+        }
+      }
+    }
+  }
+
+ protected:
+  virtual const InterpFilterParams *FilterParams(InterpFilter f,
+                                                 const BlockSize &block) const {
+    return av1_get_interp_filter_params_with_block_size(f, block.Width());
+  }
+
+  virtual convolve_x_func ReferenceFunc() const {
+    return av1_dist_wtd_convolve_x_c;
+  }
+
+ private:
+  void TestConvolve(const int sub_pix, const InterpFilter filter,
+                    const CompoundParam &compound) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const uint8_t *input1 = FirstRandomInput8(GetParam());
+    const uint8_t *input2 = SecondRandomInput8(GetParam());
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
+    Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf,
+             compound, sub_pix, filter);
+
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
+    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
+             compound, sub_pix, filter);
+
+    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+ private:
+  void Convolve(convolve_x_func test_func, const uint8_t *src1,
+                const uint8_t *src2, uint8_t *dst, CONV_BUF_TYPE *conv_buf,
+                const CompoundParam &compound, const int sub_pix,
+                const InterpFilter filter) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const InterpFilterParams *filter_params =
+        FilterParams(filter, GetParam().Block());
+
+    ConvolveParams conv_params =
+        GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
+    test_func(src1, width, dst, kOutputStride, width, height, filter_params,
+              sub_pix, &conv_params);
+
+    conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
+    test_func(src2, width, dst, kOutputStride, width, height, filter_params,
+              sub_pix, &conv_params);
+  }
+};
+
+TEST_P(AV1ConvolveXCompoundTest, RunTest) { RunTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_x_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_x_sse2));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_x_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon));
+#endif
+
+#if HAVE_NEON_DOTPROD
+INSTANTIATE_TEST_SUITE_P(
+    NEON_DOTPROD, AV1ConvolveXCompoundTest,
+    BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_dotprod));
+#endif
+
+#if HAVE_NEON_I8MM
+INSTANTIATE_TEST_SUITE_P(
+    NEON_I8MM, AV1ConvolveXCompoundTest,
+    BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_i8mm));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+/////////////////////////////////////////////////
+// Compound convolve-x functions (high bit-depth)
+/////////////////////////////////////////////////
+class AV1ConvolveXHighbdCompoundTest
+    : public AV1ConvolveTest<highbd_convolve_x_func> {
+ public:
+  void RunTest() {
+    auto compound_params = GetCompoundParams();
+    for (int sub_pix = 0; sub_pix < 16; ++sub_pix) {
+      for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) {
+        for (const auto &c : compound_params) {
+          TestConvolve(sub_pix, static_cast<InterpFilter>(f), c);
+        }
+      }
+    }
+  }
+
+ protected:
+  virtual const InterpFilterParams *FilterParams(InterpFilter f,
+                                                 const BlockSize &block) const {
+    return av1_get_interp_filter_params_with_block_size(f, block.Width());
+  }
+
+  virtual highbd_convolve_x_func ReferenceFunc() const {
+    return av1_highbd_dist_wtd_convolve_x_c;
+  }
+
+ private:
+  void TestConvolve(const int sub_pix, const InterpFilter filter,
+                    const CompoundParam &compound) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+
+    const uint16_t *input1 = FirstRandomInput16(GetParam());
+    const uint16_t *input2 = SecondRandomInput16(GetParam());
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
+    Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf,
+             compound, sub_pix, filter);
+
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
+    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
+             compound, sub_pix, filter);
+
+    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+  void Convolve(highbd_convolve_x_func test_func, const uint16_t *src1,
+                const uint16_t *src2, uint16_t *dst, CONV_BUF_TYPE *conv_buf,
+                const CompoundParam &compound, const int sub_pix,
+                const InterpFilter filter) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+    const int bit_depth = GetParam().BitDepth();
+    const InterpFilterParams *filter_params =
+        FilterParams(filter, GetParam().Block());
+    ConvolveParams conv_params =
+        GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
+    test_func(src1, width, dst, kOutputStride, width, height, filter_params,
+              sub_pix, &conv_params, bit_depth);
+    conv_params =
+        GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
+    test_func(src2, width, dst, kOutputStride, width, height, filter_params,
+              sub_pix, &conv_params, bit_depth);
+  }
+};
+
+TEST_P(AV1ConvolveXHighbdCompoundTest, RunTest) { RunTest(); }
+
+INSTANTIATE_TEST_SUITE_P(
+    C, AV1ConvolveXHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_c));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AV1ConvolveXHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sse4_1));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1ConvolveXHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1ConvolveXHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_neon));
+#endif
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+////////////////////////////////////////////////
+// Compound convolve-y functions (low bit-depth)
+////////////////////////////////////////////////
+
+// Note that the X and Y convolve functions have the same type signature and
+// logic; they only differentiate the filter parameters and reference function.
+class AV1ConvolveYCompoundTest : public AV1ConvolveXCompoundTest {
+ protected:
+  const InterpFilterParams *FilterParams(
+      InterpFilter f, const BlockSize &block) const override {
+    return av1_get_interp_filter_params_with_block_size(f, block.Height());
+  }
+
+  convolve_x_func ReferenceFunc() const override {
+    return av1_dist_wtd_convolve_y_c;
+  }
+};
+
+TEST_P(AV1ConvolveYCompoundTest, RunTest) { RunTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_y_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_y_sse2));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_y_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_y_neon));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+/////////////////////////////////////////////////
+// Compound convolve-y functions (high bit-depth)
+/////////////////////////////////////////////////
+
+// Again, the X and Y convolve functions have the same type signature and logic.
+class AV1ConvolveYHighbdCompoundTest : public AV1ConvolveXHighbdCompoundTest {
+  highbd_convolve_x_func ReferenceFunc() const override {
+    return av1_highbd_dist_wtd_convolve_y_c;
+  }
+  const InterpFilterParams *FilterParams(
+      InterpFilter f, const BlockSize &block) const override {
+    return av1_get_interp_filter_params_with_block_size(f, block.Height());
+  }
+};
+
+TEST_P(AV1ConvolveYHighbdCompoundTest, RunTest) { RunTest(); }
+
+INSTANTIATE_TEST_SUITE_P(
+    C, AV1ConvolveYHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_c));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AV1ConvolveYHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_sse4_1));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1ConvolveYHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1ConvolveYHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_neon));
+#endif
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+//////////////////////////////////////////////////////
+// Compound convolve-2d-copy functions (low bit-depth)
+//////////////////////////////////////////////////////
+typedef void (*compound_conv_2d_copy_func)(const uint8_t *src, int src_stride,
+                                           uint8_t *dst, int dst_stride, int w,
+                                           int h, ConvolveParams *conv_params);
+
+class AV1Convolve2DCopyCompoundTest
+    : public AV1ConvolveTest<compound_conv_2d_copy_func> {
+ public:
+  void RunTest() {
+    auto compound_params = GetCompoundParams();
+    for (const auto &compound : compound_params) {
+      TestConvolve(compound);
+    }
+  }
+  void SpeedTest() {
+    for (const auto &compound : GetCompoundParams()) {
+      TestConvolveSpeed(compound, 100000);
+    }
+  }
+
+ private:
+  void TestConvolve(const CompoundParam &compound) {
+    const BlockSize &block = GetParam().Block();
+    const int width = block.Width();
+    const int height = block.Height();
+
+    const uint8_t *input1 = FirstRandomInput8(GetParam());
+    const uint8_t *input2 = SecondRandomInput8(GetParam());
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
+    Convolve(av1_dist_wtd_convolve_2d_copy_c, input1, input2, reference,
+             reference_conv_buf, compound);
+
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
+    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
+             compound);
+
+    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+  void TestConvolveSpeed(const CompoundParam &compound, const int num_iters) {
+    const int width = GetParam().Block().Width();
+    const int height = GetParam().Block().Height();
+
+    const uint8_t *src0 = FirstRandomInput8(GetParam());
+    const uint8_t *src1 = SecondRandomInput8(GetParam());
+    DECLARE_ALIGNED(32, uint8_t, dst[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, CONV_BUF_TYPE, conv_buf[MAX_SB_SQUARE]);
+
+    const auto test_func = GetParam().TestFunction();
+
+    ConvolveParams conv_params_0 =
+        GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
+    ConvolveParams conv_params_1 =
+        GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      av1_dist_wtd_convolve_2d_copy_c(src0, width, dst, kOutputStride, width,
+                                      height, &conv_params_0);
+      av1_dist_wtd_convolve_2d_copy_c(src1, width, dst, kOutputStride, width,
+                                      height, &conv_params_1);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_iters; ++i) {
+      test_func(src0, width, dst, kOutputStride, width, height, &conv_params_0);
+      test_func(src1, width, dst, kOutputStride, width, height, &conv_params_1);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    printf("Dist Weighted: %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n",
+           compound.UseDistWtdCompAvg(), width, height, time1, time2,
+           time1 / time2);
+  }
+
+  void Convolve(compound_conv_2d_copy_func test_func, const uint8_t *src1,
+                const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf,
+                const CompoundParam &compound) {
+    const BlockSize &block = GetParam().Block();
+    const int width = block.Width();
+    const int height = block.Height();
+    ConvolveParams conv_params =
+        GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
+    test_func(src1, width, dst, kOutputStride, width, height, &conv_params);
+
+    conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
+    test_func(src2, width, dst, kOutputStride, width, height, &conv_params);
+  }
+};
+
+TEST_P(AV1Convolve2DCopyCompoundTest, RunTest) { RunTest(); }
+TEST_P(AV1Convolve2DCopyCompoundTest, DISABLED_SpeedTest) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCopyCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, AV1Convolve2DCopyCompoundTest,
+    BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_sse2));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1Convolve2DCopyCompoundTest,
+    BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1Convolve2DCopyCompoundTest,
+    BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_neon));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+///////////////////////////////////////////////////////
+// Compound convolve-2d-copy functions (high bit-depth)
+///////////////////////////////////////////////////////
+typedef void (*highbd_compound_conv_2d_copy_func)(const uint16_t *src,
+                                                  int src_stride, uint16_t *dst,
+                                                  int dst_stride, int w, int h,
+                                                  ConvolveParams *conv_params,
+                                                  int bd);
+
+class AV1Convolve2DCopyHighbdCompoundTest
+    : public AV1ConvolveTest<highbd_compound_conv_2d_copy_func> {
+ public:
+  void RunTest() {
+    auto compound_params = GetCompoundParams();
+    for (const auto &compound : compound_params) {
+      TestConvolve(compound);
+    }
+  }
+
+ private:
+  void TestConvolve(const CompoundParam &compound) {
+    const BlockSize &block = GetParam().Block();
+    const int width = block.Width();
+    const int height = block.Height();
+
+    const uint16_t *input1 = FirstRandomInput16(GetParam());
+    const uint16_t *input2 = SecondRandomInput16(GetParam());
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
+    Convolve(av1_highbd_dist_wtd_convolve_2d_copy_c, input1, input2, reference,
+             reference_conv_buf, compound);
+
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
+    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
+             compound);
+
+    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+  void Convolve(highbd_compound_conv_2d_copy_func test_func,
+                const uint16_t *src1, const uint16_t *src2, uint16_t *dst,
+                uint16_t *conv_buf, const CompoundParam &compound) {
+    const BlockSize &block = GetParam().Block();
+    const int width = block.Width();
+    const int height = block.Height();
+    const int bit_depth = GetParam().BitDepth();
+
+    ConvolveParams conv_params =
+        GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
+    test_func(src1, width, dst, kOutputStride, width, height, &conv_params,
+              bit_depth);
+
+    conv_params =
+        GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
+    test_func(src2, width, dst, kOutputStride, width, height, &conv_params,
+              bit_depth);
+  }
+};
+
+TEST_P(AV1Convolve2DCopyHighbdCompoundTest, RunTest) { RunTest(); }
+
+INSTANTIATE_TEST_SUITE_P(
+    C, AV1Convolve2DCopyHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_c));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AV1Convolve2DCopyHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_sse4_1));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1Convolve2DCopyHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1Convolve2DCopyHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_neon));
+#endif
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+/////////////////////////////////////////////////
+// Compound convolve-2d functions (low bit-depth)
+/////////////////////////////////////////////////
+
+class AV1Convolve2DCompoundTest : public AV1ConvolveTest<convolve_2d_func> {
+ public:
+  void RunTest() {
+    auto compound_params = GetCompoundParams();
+    for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) {
+      for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) {
+        for (int sub_x = 0; sub_x < 16; ++sub_x) {
+          for (int sub_y = 0; sub_y < 16; ++sub_y) {
+            for (const auto &compound : compound_params) {
+              TestConvolve(static_cast<InterpFilter>(h_f),
+                           static_cast<InterpFilter>(v_f), sub_x, sub_y,
+                           compound);
+            }
+          }
+        }
+      }
+    }
+  }
+
+ private:
+  void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
+                    const int sub_x, const int sub_y,
+                    const CompoundParam &compound) {
+    const BlockSize &block = GetParam().Block();
+    const int width = block.Width();
+    const int height = block.Height();
+
+    const uint8_t *input1 = FirstRandomInput8(GetParam());
+    const uint8_t *input2 = SecondRandomInput8(GetParam());
+    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
+    Convolve(av1_dist_wtd_convolve_2d_c, input1, input2, reference,
+             reference_conv_buf, compound, h_f, v_f, sub_x, sub_y);
+
+    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
+    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
+             compound, h_f, v_f, sub_x, sub_y);
+
+    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+ private:
+  void Convolve(convolve_2d_func test_func, const uint8_t *src1,
+                const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf,
+                const CompoundParam &compound, const InterpFilter h_f,
+                const InterpFilter v_f, const int sub_x, const int sub_y) {
+    const BlockSize &block = GetParam().Block();
+    const int width = block.Width();
+    const int height = block.Height();
+
+    const InterpFilterParams *filter_params_x =
+        av1_get_interp_filter_params_with_block_size(h_f, width);
+    const InterpFilterParams *filter_params_y =
+        av1_get_interp_filter_params_with_block_size(v_f, height);
+    ConvolveParams conv_params =
+        GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
+
+    test_func(src1, width, dst, kOutputStride, width, height, filter_params_x,
+              filter_params_y, sub_x, sub_y, &conv_params);
+
+    conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
+    test_func(src2, width, dst, kOutputStride, width, height, filter_params_x,
+              filter_params_y, sub_x, sub_y, &conv_params);
+  }
+};
+
+TEST_P(AV1Convolve2DCompoundTest, RunTest) { RunTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, AV1Convolve2DCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_sse2));
+#endif
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_ssse3));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DCompoundTest,
+                         BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon));
+#endif
+
+#if HAVE_NEON_DOTPROD
+INSTANTIATE_TEST_SUITE_P(
+    NEON_DOTPROD, AV1Convolve2DCompoundTest,
+    BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_dotprod));
+#endif
+
+#if HAVE_NEON_I8MM
+INSTANTIATE_TEST_SUITE_P(
+    NEON_I8MM, AV1Convolve2DCompoundTest,
+    BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_i8mm));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+//////////////////////////////////////////////////
+// Compound convolve-2d functions (high bit-depth)
+//////////////////////////////////////////////////
+
+class AV1Convolve2DHighbdCompoundTest
+    : public AV1ConvolveTest<highbd_convolve_2d_func> {
+ public:
+  void RunTest() {
+    auto compound_params = GetCompoundParams();
+    for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) {
+      for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) {
+        for (int sub_x = 0; sub_x < 16; ++sub_x) {
+          for (int sub_y = 0; sub_y < 16; ++sub_y) {
+            for (const auto &compound : compound_params) {
+              TestConvolve(static_cast<InterpFilter>(h_f),
+                           static_cast<InterpFilter>(v_f), sub_x, sub_y,
+                           compound);
+            }
+          }
+        }
+      }
+    }
+  }
+
+ private:
+  void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
+                    const int sub_x, const int sub_y,
+                    const CompoundParam &compound) {
+    const BlockSize &block = GetParam().Block();
+    const int width = block.Width();
+    const int height = block.Height();
+    const uint16_t *input1 = FirstRandomInput16(GetParam());
+    const uint16_t *input2 = SecondRandomInput16(GetParam());
+    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
+    Convolve(av1_highbd_dist_wtd_convolve_2d_c, input1, input2, reference,
+             reference_conv_buf, compound, h_f, v_f, sub_x, sub_y);
+
+    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
+    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
+             compound, h_f, v_f, sub_x, sub_y);
+
+    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
+    AssertOutputBufferEq(reference, test, width, height);
+  }
+
+ private:
+  void Convolve(highbd_convolve_2d_func test_func, const uint16_t *src1,
+                const uint16_t *src2, uint16_t *dst, uint16_t *conv_buf,
+                const CompoundParam &compound, const InterpFilter h_f,
+                const InterpFilter v_f, const int sub_x, const int sub_y) {
+    const BlockSize &block = GetParam().Block();
+    const int width = block.Width();
+    const int height = block.Height();
+
+    const InterpFilterParams *filter_params_x =
+        av1_get_interp_filter_params_with_block_size(h_f, width);
+    const InterpFilterParams *filter_params_y =
+        av1_get_interp_filter_params_with_block_size(v_f, height);
+    const int bit_depth = GetParam().BitDepth();
+    ConvolveParams conv_params =
+        GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
+    test_func(src1, width, dst, kOutputStride, width, height, filter_params_x,
+              filter_params_y, sub_x, sub_y, &conv_params, bit_depth);
+
+    conv_params =
+        GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
+    test_func(src2, width, dst, kOutputStride, width, height, filter_params_x,
+              filter_params_y, sub_x, sub_y, &conv_params, bit_depth);
+  }
+};
+
+TEST_P(AV1Convolve2DHighbdCompoundTest, RunTest) { RunTest(); }
+
+INSTANTIATE_TEST_SUITE_P(
+    C, AV1Convolve2DHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_c));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AV1Convolve2DHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_sse4_1));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1Convolve2DHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1Convolve2DHighbdCompoundTest,
+    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_neon));
+#endif
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+}  // namespace
diff --git a/third_party/aom/test/av1_encoder_parms_get_to_decoder.cc b/third_party/aom/test/av1_encoder_parms_get_to_decoder.cc
new file mode 100644
index 0000000000..402e70c34a
--- /dev/null
+++ b/third_party/aom/test/av1_encoder_parms_get_to_decoder.cc
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+#include "aom/aom_decoder.h"
+#include "av1/decoder/decoder.h"
+
+namespace {
+
+const int kMaxPsnr = 100;
+
+struct ParamPassingTestVideo {
+  const char *name;
+  uint32_t width;
+  uint32_t height;
+  uint32_t bitrate;
+  int frames;
+};
+
+const ParamPassingTestVideo kAV1ParamPassingTestVector = {
+  "niklas_1280_720_30.y4m", 1280, 720, 600, 3
+};
+
+struct EncodeParameters {
+  int32_t lossless;
+  aom_color_primaries_t color_primaries;
+  aom_transfer_characteristics_t transfer_characteristics;
+  aom_matrix_coefficients_t matrix_coefficients;
+  aom_color_range_t color_range;
+  aom_chroma_sample_position_t chroma_sample_position;
+  int32_t render_size[2];
+};
+
+const EncodeParameters kAV1EncodeParameterSet[] = {
+  { 1,
+    AOM_CICP_CP_BT_709,
+    AOM_CICP_TC_BT_709,
+    AOM_CICP_MC_BT_709,
+    AOM_CR_STUDIO_RANGE,
+    AOM_CSP_UNKNOWN,
+    { 0, 0 } },
+  { 0,
+    AOM_CICP_CP_BT_470_M,
+    AOM_CICP_TC_BT_470_M,
+    AOM_CICP_MC_BT_470_B_G,
+    AOM_CR_FULL_RANGE,
+    AOM_CSP_VERTICAL,
+    { 0, 0 } },
+  { 1,
+    AOM_CICP_CP_BT_601,
+    AOM_CICP_TC_BT_601,
+    AOM_CICP_MC_BT_601,
+    AOM_CR_STUDIO_RANGE,
+    AOM_CSP_COLOCATED,
+    { 0, 0 } },
+  { 0,
+    AOM_CICP_CP_BT_2020,
+    AOM_CICP_TC_BT_2020_10_BIT,
+    AOM_CICP_MC_BT_2020_NCL,
+    AOM_CR_FULL_RANGE,
+    AOM_CSP_RESERVED,
+    { 640, 480 } },
+};
+
+class AVxEncoderParmsGetToDecoder
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWithParam<EncodeParameters> {
+ protected:
+  AVxEncoderParmsGetToDecoder()
+      : EncoderTest(GET_PARAM(0)), encode_parms(GET_PARAM(1)) {}
+
+  ~AVxEncoderParmsGetToDecoder() override = default;
+
+  void SetUp() override {
+    InitializeConfig(::libaom_test::kTwoPassGood);
+    cfg_.g_lag_in_frames = 25;
+    test_video_ = kAV1ParamPassingTestVector;
+    cfg_.rc_target_bitrate = test_video_.bitrate;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 3);
+      encoder->Control(AV1E_SET_COLOR_PRIMARIES, encode_parms.color_primaries);
+      encoder->Control(AV1E_SET_TRANSFER_CHARACTERISTICS,
+                       encode_parms.transfer_characteristics);
+      encoder->Control(AV1E_SET_MATRIX_COEFFICIENTS,
+                       encode_parms.matrix_coefficients);
+      encoder->Control(AV1E_SET_COLOR_RANGE, encode_parms.color_range);
+      encoder->Control(AV1E_SET_CHROMA_SAMPLE_POSITION,
+                       encode_parms.chroma_sample_position);
+      encoder->Control(AV1E_SET_LOSSLESS, encode_parms.lossless);
+      if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) {
+        encoder->Control(AV1E_SET_RENDER_SIZE, encode_parms.render_size);
+      }
+    }
+  }
+
+  void DecompressedFrameHook(const aom_image_t &img,
+                             aom_codec_pts_t pts) override {
+    (void)pts;
+    if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) {
+      EXPECT_EQ(encode_parms.render_size[0], (int)img.r_w);
+      EXPECT_EQ(encode_parms.render_size[1], (int)img.r_h);
+    }
+    EXPECT_EQ(encode_parms.color_primaries, img.cp);
+    EXPECT_EQ(encode_parms.transfer_characteristics, img.tc);
+    EXPECT_EQ(encode_parms.matrix_coefficients, img.mc);
+    EXPECT_EQ(encode_parms.color_range, img.range);
+    EXPECT_EQ(encode_parms.chroma_sample_position, img.csp);
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    if (encode_parms.lossless) {
+      EXPECT_EQ(kMaxPsnr, pkt->data.psnr.psnr[0]);
+    }
+  }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  ParamPassingTestVideo test_video_;
+
+ private:
+  EncodeParameters encode_parms;
+};
+
+TEST_P(AVxEncoderParmsGetToDecoder, BitstreamParms) {
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  std::unique_ptr<libaom_test::VideoSource> video(
+      new libaom_test::Y4mVideoSource(test_video_.name, 0, test_video_.frames));
+  ASSERT_NE(video, nullptr);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+}
+
+AV1_INSTANTIATE_TEST_SUITE(AVxEncoderParmsGetToDecoder,
+                           ::testing::ValuesIn(kAV1EncodeParameterSet));
+}  // namespace
diff --git a/third_party/aom/test/av1_ext_tile_test.cc b/third_party/aom/test/av1_ext_tile_test.cc
new file mode 100644
index 0000000000..59c44cad12
--- /dev/null
+++ b/third_party/aom/test/av1_ext_tile_test.cc
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include <string>
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+
+namespace {
+// The number of frames to be encoded/decoded
+const int kLimit = 8;
+// Skip 1 frame to check the frame decoding independency.
+const int kSkip = 5;
+const int kTileSize = 1;
+const int kTIleSizeInPixels = (kTileSize << 6);
+// Fake width and height so that they can be multiples of the tile size.
+const int kImgWidth = 704;
+const int kImgHeight = 576;
+
+// This test tests large scale tile coding case. Non-large-scale tile coding
+// is tested by the tile_independence test.
+class AV1ExtTileTest
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  AV1ExtTileTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        set_cpu_used_(GET_PARAM(2)) {
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.w = kImgWidth;
+    cfg.h = kImgHeight;
+    cfg.allow_lowbitdepth = 1;
+
+    decoder_ = codec_->CreateDecoder(cfg, 0);
+    decoder_->Control(AV1_SET_TILE_MODE, 1);
+    decoder_->Control(AV1D_EXT_TILE_DEBUG, 1);
+    decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+    decoder_->Control(AV1_SET_DECODE_TILE_COL, -1);
+
+    // Allocate buffer to store tile image.
+    aom_img_alloc(&tile_img_, AOM_IMG_FMT_I420, kImgWidth, kImgHeight, 32);
+
+    md5_.clear();
+    tile_md5_.clear();
+  }
+
+  ~AV1ExtTileTest() override {
+    aom_img_free(&tile_img_);
+    delete decoder_;
+  }
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+
+    cfg_.g_lag_in_frames = 0;
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.g_error_resilient = 1;
+
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_min_quantizer = 0;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      // Encode setting
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+
+      // TODO(yunqingwang): test single_tile_decoding = 0.
+      encoder->Control(AV1E_SET_SINGLE_TILE_DECODING, 1);
+      // Always use 64x64 max partition.
+      encoder->Control(AV1E_SET_SUPERBLOCK_SIZE, AOM_SUPERBLOCK_SIZE_64X64);
+      // Set tile_columns and tile_rows to MAX values, which guarantees the tile
+      // size of 64 x 64 pixels(i.e. 1 SB) for <= 4k resolution.
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 6);
+      encoder->Control(AV1E_SET_TILE_ROWS, 6);
+    } else if (video->frame() == 1) {
+      frame_flags_ =
+          AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF;
+    }
+  }
+
+  void DecompressedFrameHook(const aom_image_t &img,
+                             aom_codec_pts_t pts) override {
+    // Skip 1 already decoded frame to be consistent with the decoder in this
+    // test.
+    if (pts == (aom_codec_pts_t)kSkip) return;
+
+    // Calculate MD5 as the reference.
+    ::libaom_test::MD5 md5_res;
+    md5_res.Add(&img);
+    md5_.push_back(md5_res.Get());
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    // Skip decoding 1 frame.
+    if (pkt->data.frame.pts == (aom_codec_pts_t)kSkip) return;
+
+    bool IsLastFrame = (pkt->data.frame.pts == (aom_codec_pts_t)(kLimit - 1));
+
+    // Decode the first (kLimit - 1) frames as whole frame, and decode the last
+    // frame in single tiles.
+    for (int r = 0; r < kImgHeight / kTIleSizeInPixels; ++r) {
+      for (int c = 0; c < kImgWidth / kTIleSizeInPixels; ++c) {
+        if (!IsLastFrame) {
+          decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+          decoder_->Control(AV1_SET_DECODE_TILE_COL, -1);
+        } else {
+          decoder_->Control(AV1_SET_DECODE_TILE_ROW, r);
+          decoder_->Control(AV1_SET_DECODE_TILE_COL, c);
+        }
+
+        const aom_codec_err_t res = decoder_->DecodeFrame(
+            reinterpret_cast<uint8_t *>(pkt->data.frame.buf),
+            pkt->data.frame.sz);
+        if (res != AOM_CODEC_OK) {
+          abort_ = true;
+          ASSERT_EQ(AOM_CODEC_OK, res);
+        }
+        const aom_image_t *img = decoder_->GetDxData().Next();
+
+        if (!IsLastFrame) {
+          if (img) {
+            ::libaom_test::MD5 md5_res;
+            md5_res.Add(img);
+            tile_md5_.push_back(md5_res.Get());
+          }
+          break;
+        }
+
+        const int kMaxMBPlane = 3;
+        for (int plane = 0; plane < kMaxMBPlane; ++plane) {
+          const int shift = (plane == 0) ? 0 : 1;
+          int tile_height = kTIleSizeInPixels >> shift;
+          int tile_width = kTIleSizeInPixels >> shift;
+
+          for (int tr = 0; tr < tile_height; ++tr) {
+            memcpy(tile_img_.planes[plane] +
+                       tile_img_.stride[plane] * (r * tile_height + tr) +
+                       c * tile_width,
+                   img->planes[plane] + img->stride[plane] * tr, tile_width);
+          }
+        }
+      }
+
+      if (!IsLastFrame) break;
+    }
+
+    if (IsLastFrame) {
+      ::libaom_test::MD5 md5_res;
+      md5_res.Add(&tile_img_);
+      tile_md5_.push_back(md5_res.Get());
+    }
+  }
+
+  void TestRoundTrip() {
+    ::libaom_test::I420VideoSource video(
+        "hantro_collage_w352h288.yuv", kImgWidth, kImgHeight, 30, 1, 0, kLimit);
+    cfg_.rc_target_bitrate = 500;
+    cfg_.g_error_resilient = AOM_ERROR_RESILIENT_DEFAULT;
+    cfg_.large_scale_tile = 1;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_threads = 1;
+
+    // Tile encoding
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+    // Compare to check if two vectors are equal.
+    ASSERT_EQ(md5_, tile_md5_);
+  }
+
+  ::libaom_test::TestMode encoding_mode_;
+  int set_cpu_used_;
+  ::libaom_test::Decoder *decoder_;
+  aom_image_t tile_img_;
+  std::vector<std::string> md5_;
+  std::vector<std::string> tile_md5_;
+};
+
+TEST_P(AV1ExtTileTest, DecoderResultTest) { TestRoundTrip(); }
+
+AV1_INSTANTIATE_TEST_SUITE(
+    // Now only test 2-pass mode.
+    AV1ExtTileTest, ::testing::Values(::libaom_test::kTwoPassGood),
+    ::testing::Range(1, 4));
+
+class AV1ExtTileTestLarge : public AV1ExtTileTest {};
+
+TEST_P(AV1ExtTileTestLarge, DecoderResultTest) { TestRoundTrip(); }
+
+AV1_INSTANTIATE_TEST_SUITE(
+    // Now only test 2-pass mode.
+    AV1ExtTileTestLarge, ::testing::Values(::libaom_test::kTwoPassGood),
+    ::testing::Range(0, 1));
+}  // namespace
diff --git a/third_party/aom/test/av1_external_partition_test.cc b/third_party/aom/test/av1_external_partition_test.cc
new file mode 100644
index 0000000000..88f6216fa5
--- /dev/null
+++ b/third_party/aom/test/av1_external_partition_test.cc
@@ -0,0 +1,702 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <fstream>
+#include <new>
+#include <sstream>
+#include <string>
+
+#include "aom/aom_codec.h"
+#include "aom/aom_external_partition.h"
+#include "av1/common/blockd.h"
+#include "av1/encoder/encodeframe_utils.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/y4m_video_source.h"
+#include "test/util.h"
+
+#if CONFIG_AV1_ENCODER
+#if !CONFIG_REALTIME_ONLY
+namespace {
+
+constexpr int kFrameNum = 8;
+constexpr int kVersion = 1;
+
+typedef struct TestData {
+  int version = kVersion;
+} TestData;
+
+typedef struct ToyModel {
+  TestData *data;
+  aom_ext_part_config_t config;
+  aom_ext_part_funcs_t funcs;
+  int mi_row;
+  int mi_col;
+  int frame_width;
+  int frame_height;
+  BLOCK_SIZE block_size;
+} ToyModel;
+
+// Note:
+// if CONFIG_PARTITION_SEARCH_ORDER = 0, we test APIs designed for the baseline
+// encoder's DFS partition search workflow.
+// if CONFIG_PARTITION_SEARCH_ORDER = 1, we test APIs designed for the new
+// ML model's partition search workflow.
+#if CONFIG_PARTITION_SEARCH_ORDER
+aom_ext_part_status_t ext_part_create_model(
+    void *priv, const aom_ext_part_config_t *part_config,
+    aom_ext_part_model_t *ext_part_model) {
+  TestData *received_data = reinterpret_cast<TestData *>(priv);
+  EXPECT_EQ(received_data->version, kVersion);
+  ToyModel *toy_model = new (std::nothrow) ToyModel;
+  if (toy_model == nullptr) {
+    EXPECT_NE(toy_model, nullptr);
+    return AOM_EXT_PART_ERROR;
+  }
+  toy_model->data = received_data;
+  *ext_part_model = toy_model;
+  EXPECT_EQ(part_config->superblock_size, BLOCK_64X64);
+  return AOM_EXT_PART_OK;
+}
+
+aom_ext_part_status_t ext_part_send_features(
+    aom_ext_part_model_t ext_part_model,
+    const aom_partition_features_t *part_features) {
+  ToyModel *toy_model = static_cast<ToyModel *>(ext_part_model);
+  toy_model->mi_row = part_features->mi_row;
+  toy_model->mi_col = part_features->mi_col;
+  toy_model->frame_width = part_features->frame_width;
+  toy_model->frame_height = part_features->frame_height;
+  toy_model->block_size = static_cast<BLOCK_SIZE>(part_features->block_size);
+  return AOM_EXT_PART_OK;
+}
+
+// The model provide the whole decision tree to the encoder.
+aom_ext_part_status_t ext_part_get_partition_decision_whole_tree(
+    aom_ext_part_model_t ext_part_model,
+    aom_partition_decision_t *ext_part_decision) {
+  ToyModel *toy_model = static_cast<ToyModel *>(ext_part_model);
+  // A toy model that always asks the encoder to encode with
+  // 4x4 blocks (the smallest).
+  ext_part_decision->is_final_decision = 1;
+  // Note: super block size is fixed to BLOCK_64X64 for the
+  // input video. It is determined inside the encoder, see the
+  // check in "ext_part_create_model".
+  const int is_last_sb_col =
+      toy_model->mi_col * 4 + 64 > toy_model->frame_width;
+  const int is_last_sb_row =
+      toy_model->mi_row * 4 + 64 > toy_model->frame_height;
+  if (is_last_sb_row && is_last_sb_col) {
+    // 64x64: 1 node
+    // 32x32: 4 nodes (only the first one will further split)
+    // 16x16: 4 nodes
+    // 8x8:   4 * 4 nodes
+    // 4x4:   4 * 4 * 4 nodes
+    const int num_blocks = 1 + 4 + 4 + 4 * 4 + 4 * 4 * 4;
+    const int num_4x4_blocks = 4 * 4 * 4;
+    ext_part_decision->num_nodes = num_blocks;
+    // 64x64
+    ext_part_decision->partition_decision[0] = PARTITION_SPLIT;
+    // 32x32, only the first one will split, the other three are
+    // out of frame boundary.
+    ext_part_decision->partition_decision[1] = PARTITION_SPLIT;
+    ext_part_decision->partition_decision[2] = PARTITION_NONE;
+    ext_part_decision->partition_decision[3] = PARTITION_NONE;
+    ext_part_decision->partition_decision[4] = PARTITION_NONE;
+    // The rest blocks inside the top-left 32x32 block.
+    for (int i = 5; i < num_blocks - num_4x4_blocks; ++i) {
+      ext_part_decision->partition_decision[0] = PARTITION_SPLIT;
+    }
+    for (int i = num_blocks - num_4x4_blocks; i < num_blocks; ++i) {
+      ext_part_decision->partition_decision[i] = PARTITION_NONE;
+    }
+  } else if (is_last_sb_row) {
+    // 64x64: 1 node
+    // 32x32: 4 nodes (only the first two will further split)
+    // 16x16: 2 * 4 nodes
+    // 8x8:   2 * 4 * 4 nodes
+    // 4x4:   2 * 4 * 4 * 4 nodes
+    const int num_blocks = 1 + 4 + 2 * 4 + 2 * 4 * 4 + 2 * 4 * 4 * 4;
+    const int num_4x4_blocks = 2 * 4 * 4 * 4;
+    ext_part_decision->num_nodes = num_blocks;
+    // 64x64
+    ext_part_decision->partition_decision[0] = PARTITION_SPLIT;
+    // 32x32, only the first two will split, the other two are out
+    // of frame boundary.
+    ext_part_decision->partition_decision[1] = PARTITION_SPLIT;
+    ext_part_decision->partition_decision[2] = PARTITION_SPLIT;
+    ext_part_decision->partition_decision[3] = PARTITION_NONE;
+    ext_part_decision->partition_decision[4] = PARTITION_NONE;
+    // The rest blocks.
+    for (int i = 5; i < num_blocks - num_4x4_blocks; ++i) {
+      ext_part_decision->partition_decision[0] = PARTITION_SPLIT;
+    }
+    for (int i = num_blocks - num_4x4_blocks; i < num_blocks; ++i) {
+      ext_part_decision->partition_decision[i] = PARTITION_NONE;
+    }
+  } else if (is_last_sb_col) {
+    // 64x64: 1 node
+    // 32x32: 4 nodes (only the top-left and bottom-left will further split)
+    // 16x16: 2 * 4 nodes
+    // 8x8:   2 * 4 * 4 nodes
+    // 4x4:   2 * 4 * 4 * 4 nodes
+    const int num_blocks = 1 + 4 + 2 * 4 + 2 * 4 * 4 + 2 * 4 * 4 * 4;
+    const int num_4x4_blocks = 2 * 4 * 4 * 4;
+    ext_part_decision->num_nodes = num_blocks;
+    // 64x64
+    ext_part_decision->partition_decision[0] = PARTITION_SPLIT;
+    // 32x32, only the top-left and bottom-left will split, the other two are
+    // out of frame boundary.
+    ext_part_decision->partition_decision[1] = PARTITION_SPLIT;
+    ext_part_decision->partition_decision[2] = PARTITION_NONE;
+    ext_part_decision->partition_decision[3] = PARTITION_SPLIT;
+    ext_part_decision->partition_decision[4] = PARTITION_NONE;
+    // The rest blocks.
+    for (int i = 5; i < num_blocks - num_4x4_blocks; ++i) {
+      ext_part_decision->partition_decision[0] = PARTITION_SPLIT;
+    }
+    for (int i = num_blocks - num_4x4_blocks; i < num_blocks; ++i) {
+      ext_part_decision->partition_decision[i] = PARTITION_NONE;
+    }
+  } else {
+    // 64x64: 1 node
+    // 32x32: 4 nodes
+    // 16x16: 4 * 4 nodes
+    // 8x8:   4 * 4 * 4 nodes
+    // 4x4:   4 * 4 * 4 * 4 nodes
+    const int num_blocks = 1 + 4 + 4 * 4 + 4 * 4 * 4 + 4 * 4 * 4 * 4;
+    const int num_4x4_blocks = 4 * 4 * 4 * 4;
+    ext_part_decision->num_nodes = num_blocks;
+    for (int i = 0; i < num_blocks - num_4x4_blocks; ++i) {
+      ext_part_decision->partition_decision[i] = PARTITION_SPLIT;
+    }
+    for (int i = num_blocks - num_4x4_blocks; i < num_blocks; ++i) {
+      ext_part_decision->partition_decision[i] = PARTITION_NONE;
+    }
+  }
+
+  return AOM_EXT_PART_OK;
+}
+
+aom_ext_part_status_t ext_part_get_partition_decision_recursive(
+    aom_ext_part_model_t ext_part_model,
+    aom_partition_decision_t *ext_part_decision) {
+  ext_part_decision->current_decision = PARTITION_NONE;
+  ext_part_decision->is_final_decision = 1;
+  ToyModel *toy_model = static_cast<ToyModel *>(ext_part_model);
+  // Note: super block size is fixed to BLOCK_64X64 for the
+  // input video. It is determined inside the encoder, see the
+  // check in "ext_part_create_model".
+  const int is_last_sb_col =
+      toy_model->mi_col * 4 + 64 > toy_model->frame_width;
+  const int is_last_sb_row =
+      toy_model->mi_row * 4 + 64 > toy_model->frame_height;
+  if (is_last_sb_row && is_last_sb_col) {
+    if (block_size_wide[toy_model->block_size] == 64) {
+      ext_part_decision->current_decision = PARTITION_SPLIT;
+    } else {
+      ext_part_decision->current_decision = PARTITION_NONE;
+    }
+  } else if (is_last_sb_row) {
+    if (block_size_wide[toy_model->block_size] == 64) {
+      ext_part_decision->current_decision = PARTITION_SPLIT;
+    } else {
+      ext_part_decision->current_decision = PARTITION_NONE;
+    }
+  } else if (is_last_sb_col) {
+    if (block_size_wide[toy_model->block_size] == 64) {
+      ext_part_decision->current_decision = PARTITION_SPLIT;
+    } else {
+      ext_part_decision->current_decision = PARTITION_NONE;
+    }
+  } else {
+    ext_part_decision->current_decision = PARTITION_NONE;
+  }
+  return AOM_EXT_PART_OK;
+}
+
+aom_ext_part_status_t ext_part_send_partition_stats(
+    aom_ext_part_model_t ext_part_model,
+    const aom_partition_stats_t *ext_part_stats) {
+  (void)ext_part_model;
+  (void)ext_part_stats;
+  return AOM_EXT_PART_OK;
+}
+
+aom_ext_part_status_t ext_part_delete_model(
+    aom_ext_part_model_t ext_part_model) {
+  ToyModel *toy_model = static_cast<ToyModel *>(ext_part_model);
+  EXPECT_EQ(toy_model->data->version, kVersion);
+  delete toy_model;
+  return AOM_EXT_PART_OK;
+}
+
+class ExternalPartitionTestAPI
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  ExternalPartitionTestAPI()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        cpu_used_(GET_PARAM(2)), psnr_(0.0), nframes_(0) {}
+  ~ExternalPartitionTestAPI() override {}
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.g_threads = 1;
+    cfg_.g_lag_in_frames = 4;
+    cfg_.rc_target_bitrate = 400;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+  }
+
+  bool DoDecode() const override { return false; }
+
+  void BeginPassHook(unsigned int) override {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  void SetExternalPartition(bool use_external_partition) {
+    use_external_partition_ = use_external_partition;
+  }
+
+  void SetPartitionControlMode(int mode) { partition_control_mode_ = mode; }
+
+  void SetDecisionMode(aom_ext_part_decision_mode_t mode) {
+    decision_mode_ = mode;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      if (decision_mode_ == AOM_EXT_PART_WHOLE_TREE) {
+        aom_ext_part_funcs_t ext_part_funcs;
+        ext_part_funcs.priv = reinterpret_cast<void *>(&test_data_);
+        ext_part_funcs.decision_mode = AOM_EXT_PART_WHOLE_TREE;
+        ext_part_funcs.create_model = ext_part_create_model;
+        ext_part_funcs.send_features = ext_part_send_features;
+        ext_part_funcs.get_partition_decision =
+            ext_part_get_partition_decision_whole_tree;
+        ext_part_funcs.send_partition_stats = ext_part_send_partition_stats;
+        ext_part_funcs.delete_model = ext_part_delete_model;
+
+        encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        if (use_external_partition_) {
+          encoder->Control(AV1E_SET_EXTERNAL_PARTITION, &ext_part_funcs);
+        }
+        if (partition_control_mode_ == -1) {
+          encoder->Control(AV1E_SET_MAX_PARTITION_SIZE, 128);
+          encoder->Control(AV1E_SET_MIN_PARTITION_SIZE, 4);
+        } else {
+          switch (partition_control_mode_) {
+            case 1:
+              encoder->Control(AV1E_SET_MAX_PARTITION_SIZE, 64);
+              encoder->Control(AV1E_SET_MIN_PARTITION_SIZE, 64);
+              break;
+            case 2:
+              encoder->Control(AV1E_SET_MAX_PARTITION_SIZE, 4);
+              encoder->Control(AV1E_SET_MIN_PARTITION_SIZE, 4);
+              break;
+            default: assert(0 && "Invalid partition control mode."); break;
+          }
+        }
+      } else if (decision_mode_ == AOM_EXT_PART_RECURSIVE) {
+        aom_ext_part_funcs_t ext_part_funcs;
+        ext_part_funcs.priv = reinterpret_cast<void *>(&test_data_);
+        ext_part_funcs.decision_mode = AOM_EXT_PART_RECURSIVE;
+        ext_part_funcs.create_model = ext_part_create_model;
+        ext_part_funcs.send_features = ext_part_send_features;
+        ext_part_funcs.get_partition_decision =
+            ext_part_get_partition_decision_recursive;
+        ext_part_funcs.send_partition_stats = ext_part_send_partition_stats;
+        ext_part_funcs.delete_model = ext_part_delete_model;
+
+        encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        if (use_external_partition_) {
+          encoder->Control(AV1E_SET_EXTERNAL_PARTITION, &ext_part_funcs);
+        }
+        if (partition_control_mode_ == -1) {
+          encoder->Control(AV1E_SET_MAX_PARTITION_SIZE, 128);
+          encoder->Control(AV1E_SET_MIN_PARTITION_SIZE, 4);
+        } else {
+          switch (partition_control_mode_) {
+            case 1:
+              encoder->Control(AV1E_SET_MAX_PARTITION_SIZE, 64);
+              encoder->Control(AV1E_SET_MIN_PARTITION_SIZE, 64);
+              break;
+            case 2:
+              encoder->Control(AV1E_SET_MAX_PARTITION_SIZE, 4);
+              encoder->Control(AV1E_SET_MIN_PARTITION_SIZE, 4);
+              break;
+            default: assert(0 && "Invalid partition control mode."); break;
+          }
+        }
+      } else {
+        assert(0 && "Invalid decision mode.");
+      }
+    }
+  }
+
+ private:
+  libaom_test::TestMode encoding_mode_;
+  int cpu_used_;
+  double psnr_;
+  unsigned int nframes_;
+  bool use_external_partition_ = false;
+  TestData test_data_;
+  int partition_control_mode_ = -1;
+  aom_ext_part_decision_mode_t decision_mode_;
+};
+
+// Encode twice and expect the same psnr value.
+// The first run is a normal encoding run with restricted partition types,
+// i.e., we use control flags to force the encoder to encode with the
+// 4x4 block size.
+// The second run is to get partition decisions from a toy model that we
+// built, which will asks the encoder to encode with the 4x4 blocks.
+// We expect the encoding results are the same.
+TEST_P(ExternalPartitionTestAPI, WholePartitionTree4x4Block) {
+  ::libaom_test::Y4mVideoSource video("paris_352_288_30.y4m", 0, kFrameNum);
+  SetExternalPartition(false);
+  SetPartitionControlMode(2);
+  SetDecisionMode(AOM_EXT_PART_WHOLE_TREE);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr = GetAveragePsnr();
+
+  SetExternalPartition(true);
+  SetPartitionControlMode(2);
+  SetDecisionMode(AOM_EXT_PART_WHOLE_TREE);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr2 = GetAveragePsnr();
+
+  EXPECT_DOUBLE_EQ(psnr, psnr2);
+}
+
+TEST_P(ExternalPartitionTestAPI, RecursivePartition) {
+  ::libaom_test::Y4mVideoSource video("paris_352_288_30.y4m", 0, kFrameNum);
+  SetExternalPartition(false);
+  SetPartitionControlMode(1);
+  SetDecisionMode(AOM_EXT_PART_RECURSIVE);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr = GetAveragePsnr();
+
+  SetExternalPartition(true);
+  SetPartitionControlMode(1);
+  SetDecisionMode(AOM_EXT_PART_RECURSIVE);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr2 = GetAveragePsnr();
+
+  const double psnr_thresh = 0.02;
+  EXPECT_NEAR(psnr, psnr2, psnr_thresh);
+}
+
+AV1_INSTANTIATE_TEST_SUITE(ExternalPartitionTestAPI,
+                           ::testing::Values(::libaom_test::kTwoPassGood),
+                           ::testing::Values(4));  // cpu_used
+
+#else   // !CONFIG_PARTITION_SEARCH_ORDER
+// Feature files written during encoding, as defined in partition_strategy.c.
+std::string feature_file_names[] = {
+  "feature_before_partition_none",
+  "feature_before_partition_none_prune_rect",
+  "feature_after_partition_none_prune",
+  "feature_after_partition_none_terminate",
+  "feature_after_partition_split_terminate",
+  "feature_after_partition_split_prune_rect",
+  "feature_after_partition_rect",
+  "feature_after_partition_ab",
+};
+
+// Files written here in the test, where the feature data is received
+// from the API.
+std::string test_feature_file_names[] = {
+  "test_feature_before_partition_none",
+  "test_feature_before_partition_none_prune_rect",
+  "test_feature_after_partition_none_prune",
+  "test_feature_after_partition_none_terminate",
+  "test_feature_after_partition_split_terminate",
+  "test_feature_after_partition_split_prune_rect",
+  "test_feature_after_partition_rect",
+  "test_feature_after_partition_ab",
+};
+
+static void write_features_to_file(const float *features,
+                                   const int feature_size, const int id) {
+  if (!WRITE_FEATURE_TO_FILE) return;
+  char filename[256];
+  snprintf(filename, sizeof(filename), "%s",
+           test_feature_file_names[id].c_str());
+  FILE *pfile = fopen(filename, "a");
+  ASSERT_NE(pfile, nullptr);
+  for (int i = 0; i < feature_size; ++i) {
+    fprintf(pfile, "%.6f", features[i]);
+    if (i < feature_size - 1) fprintf(pfile, ",");
+  }
+  fprintf(pfile, "\n");
+  fclose(pfile);
+}
+
+aom_ext_part_status_t ext_part_create_model(
+    void *priv, const aom_ext_part_config_t *part_config,
+    aom_ext_part_model_t *ext_part_model) {
+  TestData *received_data = reinterpret_cast<TestData *>(priv);
+  EXPECT_EQ(received_data->version, kVersion);
+  ToyModel *toy_model = new (std::nothrow) ToyModel;
+  if (toy_model == nullptr) {
+    EXPECT_NE(toy_model, nullptr);
+    return AOM_EXT_PART_ERROR;
+  }
+  toy_model->data = received_data;
+  *ext_part_model = toy_model;
+  EXPECT_EQ(part_config->superblock_size, BLOCK_64X64);
+  return AOM_EXT_PART_OK;
+}
+
+aom_ext_part_status_t ext_part_create_model_test(
+    void *priv, const aom_ext_part_config_t *part_config,
+    aom_ext_part_model_t *ext_part_model) {
+  (void)priv;
+  (void)ext_part_model;
+  EXPECT_EQ(part_config->superblock_size, BLOCK_64X64);
+  // Return status indicates it's a encoder test. It lets the encoder
+  // set a flag and write partition features to text files.
+  return AOM_EXT_PART_TEST;
+}
+
+aom_ext_part_status_t ext_part_send_features(
+    aom_ext_part_model_t ext_part_model,
+    const aom_partition_features_t *part_features) {
+  (void)ext_part_model;
+  (void)part_features;
+  return AOM_EXT_PART_OK;
+}
+
+aom_ext_part_status_t ext_part_send_features_test(
+    aom_ext_part_model_t ext_part_model,
+    const aom_partition_features_t *part_features) {
+  (void)ext_part_model;
+  if (part_features->id == AOM_EXT_PART_FEATURE_BEFORE_NONE) {
+    write_features_to_file(part_features->before_part_none.f,
+                           AOM_EXT_PART_SIZE_DIRECT_SPLIT, 0);
+  } else if (part_features->id == AOM_EXT_PART_FEATURE_BEFORE_NONE_PART2) {
+    write_features_to_file(part_features->before_part_none.f_part2,
+                           AOM_EXT_PART_SIZE_PRUNE_PART, 1);
+  } else if (part_features->id == AOM_EXT_PART_FEATURE_AFTER_NONE) {
+    write_features_to_file(part_features->after_part_none.f,
+                           AOM_EXT_PART_SIZE_PRUNE_NONE, 2);
+  } else if (part_features->id == AOM_EXT_PART_FEATURE_AFTER_NONE_PART2) {
+    write_features_to_file(part_features->after_part_none.f_terminate,
+                           AOM_EXT_PART_SIZE_TERM_NONE, 3);
+  } else if (part_features->id == AOM_EXT_PART_FEATURE_AFTER_SPLIT) {
+    write_features_to_file(part_features->after_part_split.f_terminate,
+                           AOM_EXT_PART_SIZE_TERM_SPLIT, 4);
+  } else if (part_features->id == AOM_EXT_PART_FEATURE_AFTER_SPLIT_PART2) {
+    write_features_to_file(part_features->after_part_split.f_prune_rect,
+                           AOM_EXT_PART_SIZE_PRUNE_RECT, 5);
+  } else if (part_features->id == AOM_EXT_PART_FEATURE_AFTER_RECT) {
+    write_features_to_file(part_features->after_part_rect.f,
+                           AOM_EXT_PART_SIZE_PRUNE_AB, 6);
+  } else if (part_features->id == AOM_EXT_PART_FEATURE_AFTER_AB) {
+    write_features_to_file(part_features->after_part_ab.f,
+                           AOM_EXT_PART_SIZE_PRUNE_4_WAY, 7);
+  }
+  return AOM_EXT_PART_TEST;
+}
+
+aom_ext_part_status_t ext_part_get_partition_decision(
+    aom_ext_part_model_t ext_part_model,
+    aom_partition_decision_t *ext_part_decision) {
+  (void)ext_part_model;
+  (void)ext_part_decision;
+  // Return an invalid decision such that the encoder doesn't take any
+  // partition decision from the ml model.
+  return AOM_EXT_PART_ERROR;
+}
+
+aom_ext_part_status_t ext_part_send_partition_stats(
+    aom_ext_part_model_t ext_part_model,
+    const aom_partition_stats_t *ext_part_stats) {
+  (void)ext_part_model;
+  (void)ext_part_stats;
+  return AOM_EXT_PART_OK;
+}
+
+aom_ext_part_status_t ext_part_delete_model(
+    aom_ext_part_model_t ext_part_model) {
+  ToyModel *toy_model = static_cast<ToyModel *>(ext_part_model);
+  EXPECT_EQ(toy_model->data->version, kVersion);
+  delete toy_model;
+  return AOM_EXT_PART_OK;
+}
+
+class ExternalPartitionTestDfsAPI
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  ExternalPartitionTestDfsAPI()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        cpu_used_(GET_PARAM(2)), psnr_(0.0), nframes_(0) {}
+  ~ExternalPartitionTestDfsAPI() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.g_threads = 1;
+    cfg_.g_lag_in_frames = 4;
+    cfg_.rc_target_bitrate = 400;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+  }
+
+  bool DoDecode() const override { return false; }
+
+  void BeginPassHook(unsigned int) override {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  void SetExternalPartition(bool use_external_partition) {
+    use_external_partition_ = use_external_partition;
+  }
+
+  void SetTestSendFeatures(int test_send_features) {
+    test_send_features_ = test_send_features;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      aom_ext_part_funcs_t ext_part_funcs;
+      ext_part_funcs.priv = reinterpret_cast<void *>(&test_data_);
+      if (use_external_partition_) {
+        ext_part_funcs.create_model = ext_part_create_model;
+        ext_part_funcs.send_features = ext_part_send_features;
+      }
+      if (test_send_features_ == 1) {
+        ext_part_funcs.create_model = ext_part_create_model;
+        ext_part_funcs.send_features = ext_part_send_features_test;
+      } else if (test_send_features_ == 0) {
+        ext_part_funcs.create_model = ext_part_create_model_test;
+        ext_part_funcs.send_features = ext_part_send_features;
+      }
+      ext_part_funcs.get_partition_decision = ext_part_get_partition_decision;
+      ext_part_funcs.send_partition_stats = ext_part_send_partition_stats;
+      ext_part_funcs.delete_model = ext_part_delete_model;
+
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      if (use_external_partition_) {
+        encoder->Control(AV1E_SET_EXTERNAL_PARTITION, &ext_part_funcs);
+      }
+    }
+  }
+
+ private:
+  libaom_test::TestMode encoding_mode_;
+  int cpu_used_;
+  double psnr_;
+  unsigned int nframes_;
+  bool use_external_partition_ = false;
+  int test_send_features_ = -1;
+  TestData test_data_;
+};
+
+// Encode twice and expect the same psnr value.
+// The first run is the baseline without external partition.
+// The second run is to get partition decisions from the toy model we defined.
+// Here, we let the partition decision return invalid for all stages.
+// In this case, the external partition doesn't alter the original encoder
+// behavior. So we expect the same encoding results.
+TEST_P(ExternalPartitionTestDfsAPI, EncodeMatch) {
+  ::libaom_test::Y4mVideoSource video("paris_352_288_30.y4m", 0, kFrameNum);
+  SetExternalPartition(false);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr = GetAveragePsnr();
+
+  SetExternalPartition(true);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr2 = GetAveragePsnr();
+
+  EXPECT_DOUBLE_EQ(psnr, psnr2);
+}
+
+// Encode twice to compare generated feature files.
+// The first run let the encoder write partition features to file.
+// The second run calls send partition features function to send features to
+// the external model, and we write them to file.
+// The generated files should match each other.
+TEST_P(ExternalPartitionTestDfsAPI, SendFeatures) {
+  ::libaom_test::Y4mVideoSource video("paris_352_288_30.y4m", 0, kFrameNum);
+  SetExternalPartition(true);
+  SetTestSendFeatures(0);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  SetExternalPartition(true);
+  SetTestSendFeatures(1);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  if (!WRITE_FEATURE_TO_FILE) return;
+
+  // Compare feature files by reading them into strings.
+  for (int i = 0; i < 8; ++i) {
+    std::ifstream base_file(feature_file_names[i]);
+    ASSERT_TRUE(base_file.good());
+    std::stringstream base_stream;
+    base_stream << base_file.rdbuf();
+    std::string base_string = base_stream.str();
+
+    std::ifstream test_file(test_feature_file_names[i]);
+    ASSERT_TRUE(test_file.good());
+    std::stringstream test_stream;
+    test_stream << test_file.rdbuf();
+    std::string test_string = test_stream.str();
+
+    EXPECT_STREQ(base_string.c_str(), test_string.c_str());
+  }
+
+  // Remove files.
+  std::string command("rm -f feature_* test_feature_*");
+  system(command.c_str());
+}
+
+AV1_INSTANTIATE_TEST_SUITE(ExternalPartitionTestDfsAPI,
+                           ::testing::Values(::libaom_test::kTwoPassGood),
+                           ::testing::Values(4));  // cpu_used
+#endif  // CONFIG_PARTITION_SEARCH_ORDER
+
+}  // namespace
+#endif  // !CONFIG_REALTIME_ONLY
+#endif  // CONFIG_AV1_ENCODER
diff --git a/third_party/aom/test/av1_fwd_txfm1d_test.cc b/third_party/aom/test/av1_fwd_txfm1d_test.cc
new file mode 100644
index 0000000000..6bae9f8364
--- /dev/null
+++ b/third_party/aom/test/av1_fwd_txfm1d_test.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+#include <new>
+
+#include "av1/encoder/av1_fwd_txfm1d.h"
+#include "test/av1_txfm_test.h"
+
+using libaom_test::ACMRandom;
+using libaom_test::input_base;
+using libaom_test::reference_hybrid_1d;
+using libaom_test::TYPE_ADST;
+using libaom_test::TYPE_DCT;
+using libaom_test::TYPE_IDTX;
+using libaom_test::TYPE_TXFM;
+
+namespace {
+const int txfm_type_num = 3;
+const TYPE_TXFM txfm_type_ls[txfm_type_num] = { TYPE_DCT, TYPE_ADST,
+                                                TYPE_IDTX };
+
+const int txfm_size_num = 5;
+
+const int txfm_size_ls[] = { 4, 8, 16, 32, 64 };
+
+const TxfmFunc fwd_txfm_func_ls[][txfm_type_num] = {
+  { av1_fdct4, av1_fadst4, av1_fidentity4_c },
+  { av1_fdct8, av1_fadst8, av1_fidentity8_c },
+  { av1_fdct16, av1_fadst16, av1_fidentity16_c },
+  { av1_fdct32, nullptr, av1_fidentity32_c },
+  { av1_fdct64, nullptr, nullptr },
+};
+
+// the maximum stage number of fwd/inv 1d dct/adst txfm is 12
+const int8_t cos_bit = 13;
+const int8_t range_bit[12] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 };
+
+TEST(av1_fwd_txfm1d, round_shift) {
+  EXPECT_EQ(round_shift(7, 1), 4);
+  EXPECT_EQ(round_shift(-7, 1), -3);
+
+  EXPECT_EQ(round_shift(7, 2), 2);
+  EXPECT_EQ(round_shift(-7, 2), -2);
+
+  EXPECT_EQ(round_shift(8, 2), 2);
+  EXPECT_EQ(round_shift(-8, 2), -2);
+}
+
+TEST(av1_fwd_txfm1d, av1_cospi_arr_data) {
+  for (int i = 0; i < 4; i++) {
+    for (int j = 0; j < 64; j++) {
+      EXPECT_EQ(av1_cospi_arr_data[i][j],
+                (int32_t)round(cos(PI * j / 128) * (1 << (cos_bit_min + i))));
+    }
+  }
+}
+
+TEST(av1_fwd_txfm1d, accuracy) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  for (int si = 0; si < txfm_size_num; ++si) {
+    int txfm_size = txfm_size_ls[si];
+    std::unique_ptr<int32_t[]> input(new (std::nothrow) int32_t[txfm_size]);
+    std::unique_ptr<int32_t[]> output(new (std::nothrow) int32_t[txfm_size]);
+    std::unique_ptr<double[]> ref_input(new (std::nothrow) double[txfm_size]);
+    std::unique_ptr<double[]> ref_output(new (std::nothrow) double[txfm_size]);
+    ASSERT_NE(input, nullptr);
+    ASSERT_NE(output, nullptr);
+    ASSERT_NE(ref_input, nullptr);
+    ASSERT_NE(ref_output, nullptr);
+
+    for (int ti = 0; ti < txfm_type_num; ++ti) {
+      TYPE_TXFM txfm_type = txfm_type_ls[ti];
+      TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[si][ti];
+      int max_error = 7;
+
+      const int count_test_block = 5000;
+      if (fwd_txfm_func != nullptr) {
+        for (int i = 0; i < count_test_block; ++i) {
+          for (int ni = 0; ni < txfm_size; ++ni) {
+            input[ni] = rnd.Rand16() % input_base - rnd.Rand16() % input_base;
+            ref_input[ni] = static_cast<double>(input[ni]);
+          }
+
+          fwd_txfm_func(input.get(), output.get(), cos_bit, range_bit);
+          reference_hybrid_1d(ref_input.get(), ref_output.get(), txfm_size,
+                              txfm_type);
+
+          for (int ni = 0; ni < txfm_size; ++ni) {
+            ASSERT_LE(
+                abs(output[ni] - static_cast<int32_t>(round(ref_output[ni]))),
+                max_error)
+                << "tx size = " << txfm_size << ", tx type = " << txfm_type;
+          }
+        }
+      }
+    }
+  }
+}
+}  // namespace
diff --git a/third_party/aom/test/av1_fwd_txfm2d_test.cc b/third_party/aom/test/av1_fwd_txfm2d_test.cc
new file mode 100644
index 0000000000..2ed5d94db3
--- /dev/null
+++ b/third_party/aom/test/av1_fwd_txfm2d_test.cc
@@ -0,0 +1,692 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <tuple>
+#include <vector>
+
+#include "config/av1_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "test/av1_txfm_test.h"
+#include "av1/common/av1_txfm.h"
+#include "av1/encoder/hybrid_fwd_txfm.h"
+
+using libaom_test::ACMRandom;
+using libaom_test::bd;
+using libaom_test::compute_avg_abs_error;
+using libaom_test::input_base;
+using libaom_test::tx_type_name;
+using libaom_test::TYPE_TXFM;
+
+using std::vector;
+
+namespace {
+// tx_type_, tx_size_, max_error_, max_avg_error_
+typedef std::tuple<TX_TYPE, TX_SIZE, double, double> AV1FwdTxfm2dParam;
+
+class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
+ public:
+  void SetUp() override {
+    tx_type_ = GET_PARAM(0);
+    tx_size_ = GET_PARAM(1);
+    max_error_ = GET_PARAM(2);
+    max_avg_error_ = GET_PARAM(3);
+    count_ = 500;
+    TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg;
+    av1_get_fwd_txfm_cfg(tx_type_, tx_size_, &fwd_txfm_flip_cfg);
+    amplify_factor_ = libaom_test::get_amplification_factor(tx_type_, tx_size_);
+    tx_width_ = tx_size_wide[fwd_txfm_flip_cfg.tx_size];
+    tx_height_ = tx_size_high[fwd_txfm_flip_cfg.tx_size];
+    ud_flip_ = fwd_txfm_flip_cfg.ud_flip;
+    lr_flip_ = fwd_txfm_flip_cfg.lr_flip;
+
+    fwd_txfm_ = libaom_test::fwd_txfm_func_ls[tx_size_];
+    txfm2d_size_ = tx_width_ * tx_height_;
+    input_ = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(input_[0]) * txfm2d_size_));
+    ASSERT_NE(input_, nullptr);
+    output_ = reinterpret_cast<int32_t *>(
+        aom_memalign(16, sizeof(output_[0]) * txfm2d_size_));
+    ASSERT_NE(output_, nullptr);
+    ref_input_ = reinterpret_cast<double *>(
+        aom_memalign(16, sizeof(ref_input_[0]) * txfm2d_size_));
+    ASSERT_NE(ref_input_, nullptr);
+    ref_output_ = reinterpret_cast<double *>(
+        aom_memalign(16, sizeof(ref_output_[0]) * txfm2d_size_));
+    ASSERT_NE(ref_output_, nullptr);
+  }
+
+  void RunFwdAccuracyCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    double avg_abs_error = 0;
+    for (int ci = 0; ci < count_; ci++) {
+      for (int ni = 0; ni < txfm2d_size_; ++ni) {
+        input_[ni] = rnd.Rand16() % input_base;
+        ref_input_[ni] = static_cast<double>(input_[ni]);
+        output_[ni] = 0;
+        ref_output_[ni] = 0;
+      }
+
+      fwd_txfm_(input_, output_, tx_width_, tx_type_, bd);
+
+      if (lr_flip_ && ud_flip_) {
+        libaom_test::fliplrud(ref_input_, tx_width_, tx_height_, tx_width_);
+      } else if (lr_flip_) {
+        libaom_test::fliplr(ref_input_, tx_width_, tx_height_, tx_width_);
+      } else if (ud_flip_) {
+        libaom_test::flipud(ref_input_, tx_width_, tx_height_, tx_width_);
+      }
+
+      libaom_test::reference_hybrid_2d(ref_input_, ref_output_, tx_type_,
+                                       tx_size_);
+
+      double actual_max_error = 0;
+      for (int ni = 0; ni < txfm2d_size_; ++ni) {
+        ref_output_[ni] = round(ref_output_[ni]);
+        const double this_error =
+            fabs(output_[ni] - ref_output_[ni]) / amplify_factor_;
+        actual_max_error = AOMMAX(actual_max_error, this_error);
+      }
+      EXPECT_GE(max_error_, actual_max_error)
+          << "tx_w: " << tx_width_ << " tx_h: " << tx_height_
+          << ", tx_type = " << (int)tx_type_;
+      if (actual_max_error > max_error_) {  // exit early.
+        break;
+      }
+
+      avg_abs_error += compute_avg_abs_error<int32_t, double>(
+          output_, ref_output_, txfm2d_size_);
+    }
+
+    avg_abs_error /= amplify_factor_;
+    avg_abs_error /= count_;
+    EXPECT_GE(max_avg_error_, avg_abs_error)
+        << "tx_size = " << tx_size_ << ", tx_type = " << tx_type_;
+  }
+
+  void TearDown() override {
+    aom_free(input_);
+    aom_free(output_);
+    aom_free(ref_input_);
+    aom_free(ref_output_);
+  }
+
+ private:
+  double max_error_;
+  double max_avg_error_;
+  int count_;
+  double amplify_factor_;
+  TX_TYPE tx_type_;
+  TX_SIZE tx_size_;
+  int tx_width_;
+  int tx_height_;
+  int txfm2d_size_;
+  FwdTxfm2dFunc fwd_txfm_;
+  int16_t *input_;
+  int32_t *output_;
+  double *ref_input_;
+  double *ref_output_;
+  int ud_flip_;  // flip upside down
+  int lr_flip_;  // flip left to right
+};
+
+static double avg_error_ls[TX_SIZES_ALL] = {
+  0.5,   // 4x4 transform
+  0.5,   // 8x8 transform
+  1.2,   // 16x16 transform
+  6.1,   // 32x32 transform
+  3.4,   // 64x64 transform
+  0.57,  // 4x8 transform
+  0.68,  // 8x4 transform
+  0.92,  // 8x16 transform
+  1.1,   // 16x8 transform
+  4.1,   // 16x32 transform
+  6,     // 32x16 transform
+  3.5,   // 32x64 transform
+  5.7,   // 64x32 transform
+  0.6,   // 4x16 transform
+  0.9,   // 16x4 transform
+  1.2,   // 8x32 transform
+  1.7,   // 32x8 transform
+  2.0,   // 16x64 transform
+  4.7,   // 64x16 transform
+};
+
+static double max_error_ls[TX_SIZES_ALL] = {
+  3,    // 4x4 transform
+  5,    // 8x8 transform
+  11,   // 16x16 transform
+  70,   // 32x32 transform
+  64,   // 64x64 transform
+  3.9,  // 4x8 transform
+  4.3,  // 8x4 transform
+  12,   // 8x16 transform
+  12,   // 16x8 transform
+  32,   // 16x32 transform
+  46,   // 32x16 transform
+  136,  // 32x64 transform
+  136,  // 64x32 transform
+  5,    // 4x16 transform
+  6,    // 16x4 transform
+  21,   // 8x32 transform
+  13,   // 32x8 transform
+  30,   // 16x64 transform
+  36,   // 64x16 transform
+};
+
+vector<AV1FwdTxfm2dParam> GetTxfm2dParamList() {
+  vector<AV1FwdTxfm2dParam> param_list;
+  for (int s = 0; s < TX_SIZES; ++s) {
+    const double max_error = max_error_ls[s];
+    const double avg_error = avg_error_ls[s];
+    for (int t = 0; t < TX_TYPES; ++t) {
+      const TX_TYPE tx_type = static_cast<TX_TYPE>(t);
+      const TX_SIZE tx_size = static_cast<TX_SIZE>(s);
+      if (libaom_test::IsTxSizeTypeValid(tx_size, tx_type)) {
+        param_list.push_back(
+            AV1FwdTxfm2dParam(tx_type, tx_size, max_error, avg_error));
+      }
+    }
+  }
+  return param_list;
+}
+
+INSTANTIATE_TEST_SUITE_P(C, AV1FwdTxfm2d,
+                         ::testing::ValuesIn(GetTxfm2dParamList()));
+
+TEST_P(AV1FwdTxfm2d, RunFwdAccuracyCheck) { RunFwdAccuracyCheck(); }
+
+TEST(AV1FwdTxfm2d, CfgTest) {
+  for (int bd_idx = 0; bd_idx < BD_NUM; ++bd_idx) {
+    int bd = libaom_test::bd_arr[bd_idx];
+    int8_t low_range = libaom_test::low_range_arr[bd_idx];
+    int8_t high_range = libaom_test::high_range_arr[bd_idx];
+    for (int tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) {
+      for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+        if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(tx_size),
+                                           static_cast<TX_TYPE>(tx_type)) ==
+            false) {
+          continue;
+        }
+        TXFM_2D_FLIP_CFG cfg;
+        av1_get_fwd_txfm_cfg(static_cast<TX_TYPE>(tx_type),
+                             static_cast<TX_SIZE>(tx_size), &cfg);
+        int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
+        int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
+        av1_gen_fwd_stage_range(stage_range_col, stage_range_row, &cfg, bd);
+        libaom_test::txfm_stage_range_check(stage_range_col, cfg.stage_num_col,
+                                            cfg.cos_bit_col, low_range,
+                                            high_range);
+        libaom_test::txfm_stage_range_check(stage_range_row, cfg.stage_num_row,
+                                            cfg.cos_bit_row, low_range,
+                                            high_range);
+      }
+    }
+  }
+}
+
+typedef void (*lowbd_fwd_txfm_func)(const int16_t *src_diff, tran_low_t *coeff,
+                                    int diff_stride, TxfmParam *txfm_param);
+
+void AV1FwdTxfm2dMatchTest(TX_SIZE tx_size, lowbd_fwd_txfm_func target_func) {
+  const int bd = 8;
+  TxfmParam param;
+  memset(&param, 0, sizeof(param));
+  const int rows = tx_size_high[tx_size];
+  const int cols = tx_size_wide[tx_size];
+  // printf("%d x %d\n", cols, rows);
+  for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+    if (libaom_test::IsTxSizeTypeValid(
+            tx_size, static_cast<TX_TYPE>(tx_type)) == false) {
+      continue;
+    }
+
+    FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size];
+    if (ref_func != nullptr) {
+      DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 };
+      DECLARE_ALIGNED(32, int32_t, output[64 * 64]);
+      DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]);
+      int input_stride = 64;
+      ACMRandom rnd(ACMRandom::DeterministicSeed());
+      for (int cnt = 0; cnt < 500; ++cnt) {
+        if (cnt == 0) {
+          for (int c = 0; c < cols; ++c) {
+            for (int r = 0; r < rows; ++r) {
+              input[r * input_stride + c] = (1 << bd) - 1;
+            }
+          }
+        } else {
+          for (int r = 0; r < rows; ++r) {
+            for (int c = 0; c < cols; ++c) {
+              input[r * input_stride + c] = rnd.Rand16() % (1 << bd);
+            }
+          }
+        }
+        param.tx_type = (TX_TYPE)tx_type;
+        param.tx_size = (TX_SIZE)tx_size;
+        param.tx_set_type = EXT_TX_SET_ALL16;
+        param.bd = bd;
+        ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd);
+        target_func(input, output, input_stride, &param);
+        const int check_cols = AOMMIN(32, cols);
+        const int check_rows = AOMMIN(32, rows * cols / check_cols);
+        for (int r = 0; r < check_rows; ++r) {
+          for (int c = 0; c < check_cols; ++c) {
+            ASSERT_EQ(ref_output[r * check_cols + c],
+                      output[r * check_cols + c])
+                << "[" << r << "," << c << "] cnt:" << cnt
+                << " tx_size: " << cols << "x" << rows
+                << " tx_type: " << tx_type_name[tx_type];
+          }
+        }
+      }
+    }
+  }
+}
+
+void AV1FwdTxfm2dSpeedTest(TX_SIZE tx_size, lowbd_fwd_txfm_func target_func) {
+  TxfmParam param;
+  memset(&param, 0, sizeof(param));
+  const int rows = tx_size_high[tx_size];
+  const int cols = tx_size_wide[tx_size];
+  const int num_loops = 1000000 / (rows * cols);
+
+  const int bd = 8;
+  for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+    if (libaom_test::IsTxSizeTypeValid(
+            tx_size, static_cast<TX_TYPE>(tx_type)) == false) {
+      continue;
+    }
+
+    FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size];
+    if (ref_func != nullptr) {
+      DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 };
+      DECLARE_ALIGNED(32, int32_t, output[64 * 64]);
+      DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]);
+      int input_stride = 64;
+      ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+      for (int r = 0; r < rows; ++r) {
+        for (int c = 0; c < cols; ++c) {
+          input[r * input_stride + c] = rnd.Rand16() % (1 << bd);
+        }
+      }
+
+      param.tx_type = (TX_TYPE)tx_type;
+      param.tx_size = (TX_SIZE)tx_size;
+      param.tx_set_type = EXT_TX_SET_ALL16;
+      param.bd = bd;
+
+      aom_usec_timer ref_timer, test_timer;
+
+      aom_usec_timer_start(&ref_timer);
+      for (int i = 0; i < num_loops; ++i) {
+        ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd);
+      }
+      aom_usec_timer_mark(&ref_timer);
+      const int elapsed_time_c =
+          static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
+
+      aom_usec_timer_start(&test_timer);
+      for (int i = 0; i < num_loops; ++i) {
+        target_func(input, output, input_stride, &param);
+      }
+      aom_usec_timer_mark(&test_timer);
+      const int elapsed_time_simd =
+          static_cast<int>(aom_usec_timer_elapsed(&test_timer));
+
+      printf(
+          "txfm_size[%2dx%-2d] \t txfm_type[%d] \t c_time=%d \t"
+          "simd_time=%d \t gain=%d \n",
+          rows, cols, tx_type, elapsed_time_c, elapsed_time_simd,
+          (elapsed_time_c / elapsed_time_simd));
+    }
+  }
+}
+
+typedef std::tuple<TX_SIZE, lowbd_fwd_txfm_func> LbdFwdTxfm2dParam;
+
+class AV1FwdTxfm2dTest : public ::testing::TestWithParam<LbdFwdTxfm2dParam> {};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1FwdTxfm2dTest);
+
+TEST_P(AV1FwdTxfm2dTest, match) {
+  AV1FwdTxfm2dMatchTest(GET_PARAM(0), GET_PARAM(1));
+}
+TEST_P(AV1FwdTxfm2dTest, DISABLED_Speed) {
+  AV1FwdTxfm2dSpeedTest(GET_PARAM(0), GET_PARAM(1));
+}
+TEST(AV1FwdTxfm2dTest, DCTScaleTest) {
+  BitDepthInfo bd_info;
+  bd_info.bit_depth = 8;
+  bd_info.use_highbitdepth_buf = 0;
+  DECLARE_ALIGNED(32, int16_t, src_diff[1024]);
+  DECLARE_ALIGNED(32, tran_low_t, coeff[1024]);
+
+  const TX_SIZE tx_size_list[4] = { TX_4X4, TX_8X8, TX_16X16, TX_32X32 };
+  const int stride_list[4] = { 4, 8, 16, 32 };
+  const int ref_scale_list[4] = { 64, 64, 64, 16 };
+
+  for (int i = 0; i < 4; i++) {
+    TX_SIZE tx_size = tx_size_list[i];
+    int stride = stride_list[i];
+    int array_size = stride * stride;
+
+    for (int j = 0; j < array_size; j++) {
+      src_diff[j] = 8;
+      coeff[j] = 0;
+    }
+
+    av1_quick_txfm(/*use_hadamard=*/0, tx_size, bd_info, src_diff, stride,
+                   coeff);
+
+    double input_sse = 0;
+    double output_sse = 0;
+    for (int j = 0; j < array_size; j++) {
+      input_sse += pow(src_diff[j], 2);
+      output_sse += pow(coeff[j], 2);
+    }
+
+    double scale = output_sse / input_sse;
+
+    EXPECT_NEAR(scale, ref_scale_list[i], 5);
+  }
+}
+TEST(AV1FwdTxfm2dTest, HadamardScaleTest) {
+  BitDepthInfo bd_info;
+  bd_info.bit_depth = 8;
+  bd_info.use_highbitdepth_buf = 0;
+  DECLARE_ALIGNED(32, int16_t, src_diff[1024]);
+  DECLARE_ALIGNED(32, tran_low_t, coeff[1024]);
+
+  const TX_SIZE tx_size_list[4] = { TX_4X4, TX_8X8, TX_16X16, TX_32X32 };
+  const int stride_list[4] = { 4, 8, 16, 32 };
+  const int ref_scale_list[4] = { 1, 64, 64, 16 };
+
+  for (int i = 0; i < 4; i++) {
+    TX_SIZE tx_size = tx_size_list[i];
+    int stride = stride_list[i];
+    int array_size = stride * stride;
+
+    for (int j = 0; j < array_size; j++) {
+      src_diff[j] = 8;
+      coeff[j] = 0;
+    }
+
+    av1_quick_txfm(/*use_hadamard=*/1, tx_size, bd_info, src_diff, stride,
+                   coeff);
+
+    double input_sse = 0;
+    double output_sse = 0;
+    for (int j = 0; j < array_size; j++) {
+      input_sse += pow(src_diff[j], 2);
+      output_sse += pow(coeff[j], 2);
+    }
+
+    double scale = output_sse / input_sse;
+
+    EXPECT_NEAR(scale, ref_scale_list[i], 5);
+  }
+}
+using ::testing::Combine;
+using ::testing::Values;
+using ::testing::ValuesIn;
+
+#if HAVE_SSE2
+static TX_SIZE fwd_txfm_for_sse2[] = {
+  TX_4X4,
+  TX_8X8,
+  TX_16X16,
+  TX_32X32,
+  // TX_64X64,
+  TX_4X8,
+  TX_8X4,
+  TX_8X16,
+  TX_16X8,
+  TX_16X32,
+  TX_32X16,
+  // TX_32X64,
+  // TX_64X32,
+  TX_4X16,
+  TX_16X4,
+  TX_8X32,
+  TX_32X8,
+  TX_16X64,
+  TX_64X16,
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, AV1FwdTxfm2dTest,
+                         Combine(ValuesIn(fwd_txfm_for_sse2),
+                                 Values(av1_lowbd_fwd_txfm_sse2)));
+#endif  // HAVE_SSE2
+
+#if HAVE_SSE4_1
+static TX_SIZE fwd_txfm_for_sse41[] = {
+  TX_4X4,
+  TX_64X64,
+  TX_32X64,
+  TX_64X32,
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1FwdTxfm2dTest,
+                         Combine(ValuesIn(fwd_txfm_for_sse41),
+                                 Values(av1_lowbd_fwd_txfm_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+static TX_SIZE fwd_txfm_for_avx2[] = {
+  TX_4X4,  TX_8X8,  TX_16X16, TX_32X32, TX_64X64, TX_4X8,   TX_8X4,
+  TX_8X16, TX_16X8, TX_16X32, TX_32X16, TX_32X64, TX_64X32, TX_4X16,
+  TX_16X4, TX_8X32, TX_32X8,  TX_16X64, TX_64X16,
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1FwdTxfm2dTest,
+                         Combine(ValuesIn(fwd_txfm_for_avx2),
+                                 Values(av1_lowbd_fwd_txfm_avx2)));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+
+static TX_SIZE fwd_txfm_for_neon[] = { TX_4X4,   TX_8X8,   TX_16X16, TX_32X32,
+                                       TX_64X64, TX_4X8,   TX_8X4,   TX_8X16,
+                                       TX_16X8,  TX_16X32, TX_32X16, TX_32X64,
+                                       TX_64X32, TX_4X16,  TX_16X4,  TX_8X32,
+                                       TX_32X8,  TX_16X64, TX_64X16 };
+
+INSTANTIATE_TEST_SUITE_P(NEON, AV1FwdTxfm2dTest,
+                         Combine(ValuesIn(fwd_txfm_for_neon),
+                                 Values(av1_lowbd_fwd_txfm_neon)));
+
+#endif  // HAVE_NEON
+
+typedef void (*Highbd_fwd_txfm_func)(const int16_t *src_diff, tran_low_t *coeff,
+                                     int diff_stride, TxfmParam *txfm_param);
+
+void AV1HighbdFwdTxfm2dMatchTest(TX_SIZE tx_size,
+                                 Highbd_fwd_txfm_func target_func) {
+  const int bd_ar[2] = { 10, 12 };
+  TxfmParam param;
+  memset(&param, 0, sizeof(param));
+  const int rows = tx_size_high[tx_size];
+  const int cols = tx_size_wide[tx_size];
+  for (int i = 0; i < 2; ++i) {
+    const int bd = bd_ar[i];
+    for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+      if (libaom_test::IsTxSizeTypeValid(
+              tx_size, static_cast<TX_TYPE>(tx_type)) == false) {
+        continue;
+      }
+
+      FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size];
+      if (ref_func != nullptr) {
+        DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 };
+        DECLARE_ALIGNED(32, int32_t, output[64 * 64]);
+        DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]);
+        int input_stride = 64;
+        ACMRandom rnd(ACMRandom::DeterministicSeed());
+        for (int cnt = 0; cnt < 500; ++cnt) {
+          if (cnt == 0) {
+            for (int r = 0; r < rows; ++r) {
+              for (int c = 0; c < cols; ++c) {
+                input[r * input_stride + c] = (1 << bd) - 1;
+              }
+            }
+          } else {
+            for (int r = 0; r < rows; ++r) {
+              for (int c = 0; c < cols; ++c) {
+                input[r * input_stride + c] = rnd.Rand16() % (1 << bd);
+              }
+            }
+          }
+          param.tx_type = (TX_TYPE)tx_type;
+          param.tx_size = (TX_SIZE)tx_size;
+          param.tx_set_type = EXT_TX_SET_ALL16;
+          param.bd = bd;
+
+          ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd);
+          target_func(input, output, input_stride, &param);
+          const int check_cols = AOMMIN(32, cols);
+          const int check_rows = AOMMIN(32, rows * cols / check_cols);
+          for (int r = 0; r < check_rows; ++r) {
+            for (int c = 0; c < check_cols; ++c) {
+              ASSERT_EQ(ref_output[c * check_rows + r],
+                        output[c * check_rows + r])
+                  << "[" << r << "," << c << "] cnt:" << cnt
+                  << " tx_size: " << cols << "x" << rows
+                  << " tx_type: " << tx_type;
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+void AV1HighbdFwdTxfm2dSpeedTest(TX_SIZE tx_size,
+                                 Highbd_fwd_txfm_func target_func) {
+  const int bd_ar[2] = { 10, 12 };
+  TxfmParam param;
+  memset(&param, 0, sizeof(param));
+  const int rows = tx_size_high[tx_size];
+  const int cols = tx_size_wide[tx_size];
+  const int num_loops = 1000000 / (rows * cols);
+
+  for (int i = 0; i < 2; ++i) {
+    const int bd = bd_ar[i];
+    for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+      if (libaom_test::IsTxSizeTypeValid(
+              tx_size, static_cast<TX_TYPE>(tx_type)) == false) {
+        continue;
+      }
+
+      FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size];
+      if (ref_func != nullptr) {
+        DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 };
+        DECLARE_ALIGNED(32, int32_t, output[64 * 64]);
+        DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]);
+        int input_stride = 64;
+        ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+        for (int r = 0; r < rows; ++r) {
+          for (int c = 0; c < cols; ++c) {
+            input[r * input_stride + c] = rnd.Rand16() % (1 << bd);
+          }
+        }
+
+        param.tx_type = (TX_TYPE)tx_type;
+        param.tx_size = (TX_SIZE)tx_size;
+        param.tx_set_type = EXT_TX_SET_ALL16;
+        param.bd = bd;
+
+        aom_usec_timer ref_timer, test_timer;
+
+        aom_usec_timer_start(&ref_timer);
+        for (int j = 0; j < num_loops; ++j) {
+          ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd);
+        }
+        aom_usec_timer_mark(&ref_timer);
+        const int elapsed_time_c =
+            static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
+
+        aom_usec_timer_start(&test_timer);
+        for (int j = 0; j < num_loops; ++j) {
+          target_func(input, output, input_stride, &param);
+        }
+        aom_usec_timer_mark(&test_timer);
+        const int elapsed_time_simd =
+            static_cast<int>(aom_usec_timer_elapsed(&test_timer));
+
+        printf(
+            "txfm_size[%2dx%-2d] \t txfm_type[%d] \t c_time=%d \t"
+            "simd_time=%d \t gain=%d \n",
+            cols, rows, tx_type, elapsed_time_c, elapsed_time_simd,
+            (elapsed_time_c / elapsed_time_simd));
+      }
+    }
+  }
+}
+
+typedef std::tuple<TX_SIZE, Highbd_fwd_txfm_func> HighbdFwdTxfm2dParam;
+
+class AV1HighbdFwdTxfm2dTest
+    : public ::testing::TestWithParam<HighbdFwdTxfm2dParam> {};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdFwdTxfm2dTest);
+
+TEST_P(AV1HighbdFwdTxfm2dTest, match) {
+  AV1HighbdFwdTxfm2dMatchTest(GET_PARAM(0), GET_PARAM(1));
+}
+
+TEST_P(AV1HighbdFwdTxfm2dTest, DISABLED_Speed) {
+  AV1HighbdFwdTxfm2dSpeedTest(GET_PARAM(0), GET_PARAM(1));
+}
+
+using ::testing::Combine;
+using ::testing::Values;
+using ::testing::ValuesIn;
+
+#if HAVE_SSE4_1
+static TX_SIZE Highbd_fwd_txfm_for_sse4_1[] = {
+  TX_4X4,  TX_8X8,  TX_16X16, TX_32X32, TX_64X64, TX_4X8,   TX_8X4,
+  TX_8X16, TX_16X8, TX_16X32, TX_32X16, TX_32X64, TX_64X32,
+#if !CONFIG_REALTIME_ONLY
+  TX_4X16, TX_16X4, TX_8X32,  TX_32X8,  TX_16X64, TX_64X16,
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdFwdTxfm2dTest,
+                         Combine(ValuesIn(Highbd_fwd_txfm_for_sse4_1),
+                                 Values(av1_highbd_fwd_txfm)));
+#endif  // HAVE_SSE4_1
+#if HAVE_AVX2
+static TX_SIZE Highbd_fwd_txfm_for_avx2[] = { TX_8X8,   TX_16X16, TX_32X32,
+                                              TX_64X64, TX_8X16,  TX_16X8 };
+
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1HighbdFwdTxfm2dTest,
+                         Combine(ValuesIn(Highbd_fwd_txfm_for_avx2),
+                                 Values(av1_highbd_fwd_txfm)));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+static TX_SIZE Highbd_fwd_txfm_for_neon[] = {
+  TX_4X4,  TX_8X8,  TX_16X16, TX_32X32, TX_64X64, TX_4X8,   TX_8X4,
+  TX_8X16, TX_16X8, TX_16X32, TX_32X16, TX_32X64, TX_64X32, TX_4X16,
+  TX_16X4, TX_8X32, TX_32X8,  TX_16X64, TX_64X16
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, AV1HighbdFwdTxfm2dTest,
+                         Combine(ValuesIn(Highbd_fwd_txfm_for_neon),
+                                 Values(av1_highbd_fwd_txfm)));
+#endif  // HAVE_NEON
+
+}  // namespace
diff --git a/third_party/aom/test/av1_highbd_iht_test.cc b/third_party/aom/test/av1_highbd_iht_test.cc
new file mode 100644
index 0000000000..2c57362a82
--- /dev/null
+++ b/third_party/aom/test/av1_highbd_iht_test.cc
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/av1_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/av1_txfm_test.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/enums.h"
+#include "av1/common/scan.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_ports/mem.h"
+
+namespace {
+
+using libaom_test::ACMRandom;
+using std::tuple;
+
+typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
+                          TX_TYPE tx_type, int bd);
+
+typedef void (*IHbdHtFunc)(const int32_t *coeff, uint16_t *output, int stride,
+                           TX_TYPE tx_type, int bd);
+static const char *tx_type_name[] = {
+  "DCT_DCT",
+  "ADST_DCT",
+  "DCT_ADST",
+  "ADST_ADST",
+  "FLIPADST_DCT",
+  "DCT_FLIPADST",
+  "FLIPADST_FLIPADST",
+  "ADST_FLIPADST",
+  "FLIPADST_ADST",
+  "IDTX",
+  "V_DCT",
+  "H_DCT",
+  "V_ADST",
+  "H_ADST",
+  "V_FLIPADST",
+  "H_FLIPADST",
+};
+// Test parameter argument list:
+//   <transform reference function,
+//    optimized inverse transform function,
+//    inverse transform reference function,
+//    num_coeffs,
+//    tx_type,
+//    bit_depth>
+typedef tuple<HbdHtFunc, IHbdHtFunc, IHbdHtFunc, int, TX_TYPE, int> IHbdHtParam;
+
+class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> {
+ public:
+  ~AV1HighbdInvHTNxN() override = default;
+
+  void SetUp() override {
+    txfm_ref_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    inv_txfm_ref_ = GET_PARAM(2);
+    num_coeffs_ = GET_PARAM(3);
+    tx_type_ = GET_PARAM(4);
+    bit_depth_ = GET_PARAM(5);
+
+    input_ = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(input_[0]) * num_coeffs_));
+    ASSERT_NE(input_, nullptr);
+
+    // Note:
+    // Inverse transform input buffer is 32-byte aligned
+    // Refer to <root>/av1/encoder/context_tree.c, function,
+    // void alloc_mode_context().
+    coeffs_ = reinterpret_cast<int32_t *>(
+        aom_memalign(32, sizeof(coeffs_[0]) * num_coeffs_));
+    ASSERT_NE(coeffs_, nullptr);
+    output_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(32, sizeof(output_[0]) * num_coeffs_));
+    ASSERT_NE(output_, nullptr);
+    output_ref_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(32, sizeof(output_ref_[0]) * num_coeffs_));
+    ASSERT_NE(output_ref_, nullptr);
+  }
+
+  void TearDown() override {
+    aom_free(input_);
+    aom_free(coeffs_);
+    aom_free(output_);
+    aom_free(output_ref_);
+  }
+
+ protected:
+  void RunBitexactCheck();
+
+ private:
+  int GetStride() const {
+    if (16 == num_coeffs_) {
+      return 4;
+    } else if (64 == num_coeffs_) {
+      return 8;
+    } else if (256 == num_coeffs_) {
+      return 16;
+    } else if (1024 == num_coeffs_) {
+      return 32;
+    } else if (4096 == num_coeffs_) {
+      return 64;
+    } else {
+      return 0;
+    }
+  }
+
+  HbdHtFunc txfm_ref_;
+  IHbdHtFunc inv_txfm_;
+  IHbdHtFunc inv_txfm_ref_;
+  int num_coeffs_;
+  TX_TYPE tx_type_;
+  int bit_depth_;
+
+  int16_t *input_;
+  int32_t *coeffs_;
+  uint16_t *output_;
+  uint16_t *output_ref_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdInvHTNxN);
+
+void AV1HighbdInvHTNxN::RunBitexactCheck() {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int stride = GetStride();
+  const int num_tests = 20000;
+  const uint16_t mask = (1 << bit_depth_) - 1;
+
+  for (int i = 0; i < num_tests; ++i) {
+    for (int j = 0; j < num_coeffs_; ++j) {
+      input_[j] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
+      output_ref_[j] = rnd.Rand16() & mask;
+      output_[j] = output_ref_[j];
+    }
+
+    txfm_ref_(input_, coeffs_, stride, tx_type_, bit_depth_);
+    inv_txfm_ref_(coeffs_, output_ref_, stride, tx_type_, bit_depth_);
+    API_REGISTER_STATE_CHECK(
+        inv_txfm_(coeffs_, output_, stride, tx_type_, bit_depth_));
+
+    for (int j = 0; j < num_coeffs_; ++j) {
+      EXPECT_EQ(output_ref_[j], output_[j])
+          << "Not bit-exact result at index: " << j << " At test block: " << i;
+    }
+  }
+}
+
+TEST_P(AV1HighbdInvHTNxN, InvTransResultCheck) { RunBitexactCheck(); }
+
+using std::make_tuple;
+
+#if HAVE_SSE4_1
+#define PARAM_LIST_4X4                                   \
+  &av1_fwd_txfm2d_4x4_c, &av1_inv_txfm2d_add_4x4_sse4_1, \
+      &av1_inv_txfm2d_add_4x4_c, 16
+
+const IHbdHtParam kArrayIhtParam[] = {
+  // 4x4
+  make_tuple(PARAM_LIST_4X4, DCT_DCT, 10),
+  make_tuple(PARAM_LIST_4X4, DCT_DCT, 12),
+  make_tuple(PARAM_LIST_4X4, ADST_DCT, 10),
+  make_tuple(PARAM_LIST_4X4, ADST_DCT, 12),
+  make_tuple(PARAM_LIST_4X4, DCT_ADST, 10),
+  make_tuple(PARAM_LIST_4X4, DCT_ADST, 12),
+  make_tuple(PARAM_LIST_4X4, ADST_ADST, 10),
+  make_tuple(PARAM_LIST_4X4, ADST_ADST, 12),
+  make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 10),
+  make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 12),
+  make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 10),
+  make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 12),
+  make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 10),
+  make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 12),
+  make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 10),
+  make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 12),
+  make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 10),
+  make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 12),
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdInvHTNxN,
+                         ::testing::ValuesIn(kArrayIhtParam));
+#endif  // HAVE_SSE4_1
+
+typedef void (*HighbdInvTxfm2dFunc)(const int32_t *input, uint8_t *output,
+                                    int stride, const TxfmParam *txfm_param);
+
+typedef std::tuple<const HighbdInvTxfm2dFunc> AV1HighbdInvTxfm2dParam;
+class AV1HighbdInvTxfm2d
+    : public ::testing::TestWithParam<AV1HighbdInvTxfm2dParam> {
+ public:
+  void SetUp() override { target_func_ = GET_PARAM(0); }
+  void RunAV1InvTxfm2dTest(TX_TYPE tx_type, TX_SIZE tx_size, int run_times,
+                           int bit_depth, int gt_int16 = 0);
+
+ private:
+  HighbdInvTxfm2dFunc target_func_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdInvTxfm2d);
+
+void AV1HighbdInvTxfm2d::RunAV1InvTxfm2dTest(TX_TYPE tx_type_, TX_SIZE tx_size_,
+                                             int run_times, int bit_depth_,
+                                             int gt_int16) {
+#if CONFIG_REALTIME_ONLY
+  if (tx_size_ >= TX_4X16) {
+    return;
+  }
+#endif
+  FwdTxfm2dFunc fwd_func_ = libaom_test::fwd_txfm_func_ls[tx_size_];
+  TxfmParam txfm_param;
+  const int BLK_WIDTH = 64;
+  const int BLK_SIZE = BLK_WIDTH * BLK_WIDTH;
+  DECLARE_ALIGNED(16, int16_t, input[BLK_SIZE]) = { 0 };
+  DECLARE_ALIGNED(32, int32_t, inv_input[BLK_SIZE]) = { 0 };
+  DECLARE_ALIGNED(32, uint16_t, output[BLK_SIZE]) = { 0 };
+  DECLARE_ALIGNED(32, uint16_t, ref_output[BLK_SIZE]) = { 0 };
+  int stride = BLK_WIDTH;
+  int rows = tx_size_high[tx_size_];
+  int cols = tx_size_wide[tx_size_];
+  const int rows_nonezero = AOMMIN(32, rows);
+  const int cols_nonezero = AOMMIN(32, cols);
+  const uint16_t mask = (1 << bit_depth_) - 1;
+  run_times /= (rows * cols);
+  run_times = AOMMAX(1, run_times);
+  const SCAN_ORDER *scan_order = get_default_scan(tx_size_, tx_type_);
+  const int16_t *scan = scan_order->scan;
+  const int16_t eobmax = rows_nonezero * cols_nonezero;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int randTimes = run_times == 1 ? (eobmax) : 1;
+
+  txfm_param.tx_type = tx_type_;
+  txfm_param.tx_size = tx_size_;
+  txfm_param.lossless = 0;
+  txfm_param.bd = bit_depth_;
+  txfm_param.is_hbd = 1;
+  txfm_param.tx_set_type = EXT_TX_SET_ALL16;
+
+  for (int cnt = 0; cnt < randTimes; ++cnt) {
+    for (int r = 0; r < BLK_WIDTH; ++r) {
+      for (int c = 0; c < BLK_WIDTH; ++c) {
+        input[r * cols + c] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
+        output[r * stride + c] = rnd.Rand16() & mask;
+
+        ref_output[r * stride + c] = output[r * stride + c];
+      }
+    }
+    fwd_func_(input, inv_input, stride, tx_type_, bit_depth_);
+
+    // produce eob input by setting high freq coeffs to zero
+    const int eob = AOMMIN(cnt + 1, eobmax);
+    for (int i = eob; i < eobmax; i++) {
+      inv_input[scan[i]] = 0;
+    }
+    txfm_param.eob = eob;
+    if (gt_int16) {
+      const uint16_t inv_input_mask =
+          static_cast<uint16_t>((1 << (bit_depth_ + 7)) - 1);
+      for (int i = 0; i < eob; i++) {
+        inv_input[scan[i]] = (rnd.Rand31() & inv_input_mask);
+      }
+    }
+
+    aom_usec_timer ref_timer, test_timer;
+    aom_usec_timer_start(&ref_timer);
+    for (int i = 0; i < run_times; ++i) {
+      av1_highbd_inv_txfm_add_c(inv_input, CONVERT_TO_BYTEPTR(ref_output),
+                                stride, &txfm_param);
+    }
+    aom_usec_timer_mark(&ref_timer);
+    const int elapsed_time_c =
+        static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
+
+    aom_usec_timer_start(&test_timer);
+    for (int i = 0; i < run_times; ++i) {
+      target_func_(inv_input, CONVERT_TO_BYTEPTR(output), stride, &txfm_param);
+    }
+    aom_usec_timer_mark(&test_timer);
+    const int elapsed_time_simd =
+        static_cast<int>(aom_usec_timer_elapsed(&test_timer));
+    if (run_times > 10) {
+      printf(
+          "txfm_size[%d] \t txfm_type[%d] \t c_time=%d \t simd_time=%d \t "
+          "gain=%d \n",
+          tx_size_, tx_type_, elapsed_time_c, elapsed_time_simd,
+          (elapsed_time_c / elapsed_time_simd));
+    } else {
+      for (int r = 0; r < rows; ++r) {
+        for (int c = 0; c < cols; ++c) {
+          ASSERT_EQ(ref_output[r * stride + c], output[r * stride + c])
+              << "[" << r << "," << c << "] " << cnt << " tx_size: " << cols
+              << "x" << rows << " bit_depth_: " << bit_depth_
+              << " tx_type: " << tx_type_name[tx_type_] << " eob " << eob;
+        }
+      }
+    }
+  }
+}
+
+TEST_P(AV1HighbdInvTxfm2d, match) {
+  int bitdepth_ar[3] = { 8, 10, 12 };
+  for (int k = 0; k < 3; ++k) {
+    int bd = bitdepth_ar[k];
+    for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
+      for (int i = 0; i < (int)TX_TYPES; ++i) {
+        if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
+                                           static_cast<TX_TYPE>(i))) {
+          RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
+                              1, bd);
+        }
+      }
+    }
+  }
+}
+
+TEST_P(AV1HighbdInvTxfm2d, gt_int16) {
+  int bitdepth_ar[3] = { 8, 10, 12 };
+  static const TX_TYPE types[] = {
+    DCT_DCT, ADST_DCT, FLIPADST_DCT, IDTX, V_DCT, H_DCT, H_ADST, H_FLIPADST
+  };
+  for (int k = 0; k < 3; ++k) {
+    int bd = bitdepth_ar[k];
+    for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
+      const TX_SIZE sz = static_cast<TX_SIZE>(j);
+      for (uint8_t i = 0; i < sizeof(types) / sizeof(TX_TYPE); ++i) {
+        const TX_TYPE tp = types[i];
+        if (libaom_test::IsTxSizeTypeValid(sz, tp)) {
+          RunAV1InvTxfm2dTest(tp, sz, 1, bd, 1);
+        }
+      }
+    }
+  }
+}
+
+TEST_P(AV1HighbdInvTxfm2d, DISABLED_Speed) {
+  int bitdepth_ar[2] = { 10, 12 };
+  for (int k = 0; k < 2; ++k) {
+    int bd = bitdepth_ar[k];
+    for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
+      for (int i = 0; i < (int)TX_TYPES; ++i) {
+        if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
+                                           static_cast<TX_TYPE>(i))) {
+          RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
+                              1000000, bd);
+        }
+      }
+    }
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdInvTxfm2d,
+                         ::testing::Values(av1_highbd_inv_txfm_add_sse4_1));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1HighbdInvTxfm2d,
+                         ::testing::Values(av1_highbd_inv_txfm_add_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1HighbdInvTxfm2d,
+                         ::testing::Values(av1_highbd_inv_txfm_add_neon));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/av1_horz_only_frame_superres_test.cc b/third_party/aom/test/av1_horz_only_frame_superres_test.cc
new file mode 100644
index 0000000000..e9cf02e202
--- /dev/null
+++ b/third_party/aom/test/av1_horz_only_frame_superres_test.cc
@@ -0,0 +1,385 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tuple>
+#include <vector>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "av1/common/convolve.h"
+#include "av1/common/resize.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+const int kTestIters = 10;
+const int kPerfIters = 1000;
+
+const int kVPad = 32;
+const int kHPad = 32;
+
+using libaom_test::ACMRandom;
+using std::make_tuple;
+using std::tuple;
+
+template <typename Pixel>
+class TestImage {
+ public:
+  TestImage(int w_src, int h, int superres_denom, int x0, int bd)
+      : w_src_(w_src), h_(h), superres_denom_(superres_denom), x0_(x0),
+        bd_(bd) {
+    assert(bd < 16);
+    assert(bd <= 8 * static_cast<int>(sizeof(Pixel)));
+    assert(9 <= superres_denom && superres_denom <= 16);
+    assert(SCALE_NUMERATOR == 8);
+    assert(0 <= x0_ && x0_ <= RS_SCALE_SUBPEL_MASK);
+
+    w_dst_ = w_src_;
+    av1_calculate_unscaled_superres_size(&w_dst_, nullptr, superres_denom);
+
+    src_stride_ = ALIGN_POWER_OF_TWO(w_src_ + 2 * kHPad, 4);
+    dst_stride_ = ALIGN_POWER_OF_TWO(w_dst_ + 2 * kHPad, 4);
+
+    // Allocate image data
+    src_data_.resize(2 * src_block_size());
+    dst_data_.resize(2 * dst_block_size());
+  }
+
+  void Initialize(ACMRandom *rnd);
+  void Check() const;
+
+  int src_stride() const { return src_stride_; }
+  int dst_stride() const { return dst_stride_; }
+
+  int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); }
+  int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); }
+
+  int src_width() const { return w_src_; }
+  int dst_width() const { return w_dst_; }
+  int height() const { return h_; }
+  int x0() const { return x0_; }
+
+  const Pixel *GetSrcData(bool ref, bool borders) const {
+    const Pixel *block = &src_data_[ref ? 0 : src_block_size()];
+    return borders ? block : block + kHPad + src_stride_ * kVPad;
+  }
+
+  Pixel *GetDstData(bool ref, bool borders) {
+    Pixel *block = &dst_data_[ref ? 0 : dst_block_size()];
+    return borders ? block : block + kHPad + dst_stride_ * kVPad;
+  }
+
+ private:
+  int w_src_, w_dst_, h_, superres_denom_, x0_, bd_;
+  int src_stride_, dst_stride_;
+
+  std::vector<Pixel> src_data_;
+  std::vector<Pixel> dst_data_;
+};
+
+template <typename Pixel>
+void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) {
+  if (!trash) {
+    memset(data, 0, sizeof(*data) * num_pixels);
+    return;
+  }
+  const Pixel mask = (1 << bd) - 1;
+  for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask;
+}
+
+template <typename Pixel>
+void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd,
+                 bool trash_edges, Pixel *data) {
+  assert(rnd);
+  const Pixel mask = (1 << bd) - 1;
+
+  // Fill in the first buffer with random data
+  // Top border
+  FillEdge(rnd, stride * kVPad, bd, trash_edges, data);
+  for (int r = 0; r < h; ++r) {
+    Pixel *row_data = data + (kVPad + r) * stride;
+    // Left border, contents, right border
+    FillEdge(rnd, kHPad, bd, trash_edges, row_data);
+    for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask;
+    FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w);
+  }
+  // Bottom border
+  FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h));
+
+  const int bpp = sizeof(*data);
+  const int block_elts = stride * (h + 2 * kVPad);
+  const int block_size = bpp * block_elts;
+
+  // Now copy that to the second buffer
+  memcpy(data + block_elts, data, block_size);
+}
+
+template <typename Pixel>
+void TestImage<Pixel>::Initialize(ACMRandom *rnd) {
+  PrepBuffers(rnd, w_src_, h_, src_stride_, bd_, false, &src_data_[0]);
+  PrepBuffers(rnd, w_dst_, h_, dst_stride_, bd_, true, &dst_data_[0]);
+}
+
+template <typename Pixel>
+void TestImage<Pixel>::Check() const {
+  const int num_pixels = dst_block_size();
+  const Pixel *ref_dst = &dst_data_[0];
+  const Pixel *tst_dst = &dst_data_[num_pixels];
+
+  // If memcmp returns 0, there's nothing to do.
+  if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) return;
+
+  // Otherwise, iterate through the buffer looking for differences, *ignoring
+  // the edges*
+  const int stride = dst_stride_;
+  for (int r = kVPad; r < h_ + kVPad; ++r) {
+    for (int c = kVPad; c < w_dst_ + kHPad; ++c) {
+      const int32_t ref_value = ref_dst[r * stride + c];
+      const int32_t tst_value = tst_dst[r * stride + c];
+
+      EXPECT_EQ(tst_value, ref_value)
+          << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad)
+          << ", superres_denom: " << superres_denom_ << ", height: " << h_
+          << ", src_width: " << w_src_ << ", dst_width: " << w_dst_
+          << ", x0: " << x0_;
+    }
+  }
+}
+
+template <typename Pixel>
+class ConvolveHorizRSTestBase : public ::testing::Test {
+ public:
+  ConvolveHorizRSTestBase() : image_(nullptr) {}
+  ~ConvolveHorizRSTestBase() override = default;
+
+  // Implemented by subclasses (SetUp depends on the parameters passed
+  // in and RunOne depends on the function to be tested. These can't
+  // be templated for low/high bit depths because they have different
+  // numbers of parameters)
+  void SetUp() override = 0;
+  virtual void RunOne(bool ref) = 0;
+
+ protected:
+  void SetBitDepth(int bd) { bd_ = bd; }
+
+  void CorrectnessTest() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    for (int i = 0; i < kTestIters; ++i) {
+      for (int superres_denom = 9; superres_denom <= 16; superres_denom++) {
+        // Get a random height between 512 and 767
+        int height = rnd.Rand8() + 512;
+
+        // Get a random src width between 128 and 383
+        int width_src = rnd.Rand8() + 128;
+
+        // x0 is normally calculated by get_upscale_convolve_x0 in
+        // av1/common/resize.c. However, this test should work for
+        // any value of x0 between 0 and RS_SCALE_SUBPEL_MASK
+        // (inclusive), so we choose one at random.
+        int x0 = rnd.Rand16() % (RS_SCALE_SUBPEL_MASK + 1);
+
+        image_ =
+            new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_);
+        ASSERT_NE(image_, nullptr);
+
+        Prep(&rnd);
+        RunOne(true);
+        RunOne(false);
+        image_->Check();
+
+        delete image_;
+      }
+    }
+  }
+
+  void SpeedTest() {
+    // Pick some specific parameters to test
+    int height = 767;
+    int width_src = 129;
+    int superres_denom = 13;
+    int x0 = RS_SCALE_SUBPEL_MASK >> 1;
+
+    image_ = new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_);
+    ASSERT_NE(image_, nullptr);
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    Prep(&rnd);
+
+    aom_usec_timer ref_timer;
+    aom_usec_timer_start(&ref_timer);
+    for (int i = 0; i < kPerfIters; ++i) RunOne(true);
+    aom_usec_timer_mark(&ref_timer);
+    const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+    aom_usec_timer tst_timer;
+    aom_usec_timer_start(&tst_timer);
+    for (int i = 0; i < kPerfIters; ++i) RunOne(false);
+    aom_usec_timer_mark(&tst_timer);
+    const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+    std::cout << "[          ] C time = " << ref_time / 1000
+              << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+    EXPECT_GT(ref_time, tst_time)
+        << "Error: ConvolveHorizRSTest (Speed Test), SIMD slower than C.\n"
+        << "C time: " << ref_time << " us\n"
+        << "SIMD time: " << tst_time << " us\n";
+  }
+
+  void Prep(ACMRandom *rnd) {
+    assert(rnd);
+    image_->Initialize(rnd);
+  }
+
+  int bd_;
+  TestImage<Pixel> *image_;
+};
+
+typedef void (*LowBDConvolveHorizRsFunc)(const uint8_t *src, int src_stride,
+                                         uint8_t *dst, int dst_stride, int w,
+                                         int h, const int16_t *x_filters,
+                                         const int x0_qn, const int x_step_qn);
+
+// Test parameter list:
+//  <tst_fun_>
+typedef tuple<LowBDConvolveHorizRsFunc> LowBDParams;
+
+class LowBDConvolveHorizRSTest
+    : public ConvolveHorizRSTestBase<uint8_t>,
+      public ::testing::WithParamInterface<LowBDParams> {
+ public:
+  ~LowBDConvolveHorizRSTest() override = default;
+
+  void SetUp() override {
+    tst_fun_ = GET_PARAM(0);
+    const int bd = 8;
+    SetBitDepth(bd);
+  }
+
+  void RunOne(bool ref) override {
+    const uint8_t *src = image_->GetSrcData(ref, false);
+    uint8_t *dst = image_->GetDstData(ref, false);
+    const int src_stride = image_->src_stride();
+    const int dst_stride = image_->dst_stride();
+    const int width_src = image_->src_width();
+    const int width_dst = image_->dst_width();
+    const int height = image_->height();
+    const int x0_qn = image_->x0();
+
+    const int32_t x_step_qn =
+        av1_get_upscale_convolve_step(width_src, width_dst);
+
+    if (ref) {
+      av1_convolve_horiz_rs_c(src, src_stride, dst, dst_stride, width_dst,
+                              height, &av1_resize_filter_normative[0][0], x0_qn,
+                              x_step_qn);
+    } else {
+      tst_fun_(src, src_stride, dst, dst_stride, width_dst, height,
+               &av1_resize_filter_normative[0][0], x0_qn, x_step_qn);
+    }
+  }
+
+ private:
+  LowBDConvolveHorizRsFunc tst_fun_;
+};
+
+TEST_P(LowBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); }
+TEST_P(LowBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, LowBDConvolveHorizRSTest,
+                         ::testing::Values(av1_convolve_horiz_rs_c));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE4_1, LowBDConvolveHorizRSTest,
+                         ::testing::Values(av1_convolve_horiz_rs_sse4_1));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+typedef void (*HighBDConvolveHorizRsFunc)(const uint16_t *src, int src_stride,
+                                          uint16_t *dst, int dst_stride, int w,
+                                          int h, const int16_t *x_filters,
+                                          const int x0_qn, const int x_step_qn,
+                                          int bd);
+
+// Test parameter list:
+//  <tst_fun_, bd_>
+typedef tuple<HighBDConvolveHorizRsFunc, int> HighBDParams;
+
+class HighBDConvolveHorizRSTest
+    : public ConvolveHorizRSTestBase<uint16_t>,
+      public ::testing::WithParamInterface<HighBDParams> {
+ public:
+  ~HighBDConvolveHorizRSTest() override = default;
+
+  void SetUp() override {
+    tst_fun_ = GET_PARAM(0);
+    const int bd = GET_PARAM(1);
+    SetBitDepth(bd);
+  }
+
+  void RunOne(bool ref) override {
+    const uint16_t *src = image_->GetSrcData(ref, false);
+    uint16_t *dst = image_->GetDstData(ref, false);
+    const int src_stride = image_->src_stride();
+    const int dst_stride = image_->dst_stride();
+    const int width_src = image_->src_width();
+    const int width_dst = image_->dst_width();
+    const int height = image_->height();
+    const int x0_qn = image_->x0();
+
+    const int32_t x_step_qn =
+        av1_get_upscale_convolve_step(width_src, width_dst);
+
+    if (ref) {
+      av1_highbd_convolve_horiz_rs_c(
+          src, src_stride, dst, dst_stride, width_dst, height,
+          &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_);
+    } else {
+      tst_fun_(src, src_stride, dst, dst_stride, width_dst, height,
+               &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_);
+    }
+  }
+
+ private:
+  HighBDConvolveHorizRsFunc tst_fun_;
+};
+
+const int kBDs[] = { 8, 10, 12 };
+
+TEST_P(HighBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); }
+TEST_P(HighBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); }
+
+INSTANTIATE_TEST_SUITE_P(
+    C, HighBDConvolveHorizRSTest,
+    ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_c),
+                       ::testing::ValuesIn(kBDs)));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, HighBDConvolveHorizRSTest,
+    ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_sse4_1),
+                       ::testing::ValuesIn(kBDs)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, HighBDConvolveHorizRSTest,
+    ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_neon),
+                       ::testing::ValuesIn(kBDs)));
+#endif  // HAVE_NEON
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+}  // namespace
diff --git a/third_party/aom/test/av1_inv_txfm1d_test.cc b/third_party/aom/test/av1_inv_txfm1d_test.cc
new file mode 100644
index 0000000000..e70b22a35a
--- /dev/null
+++ b/third_party/aom/test/av1_inv_txfm1d_test.cc
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+
+#include "test/av1_txfm_test.h"
+#include "test/util.h"
+#include "av1/common/av1_inv_txfm1d.h"
+#include "av1/encoder/av1_fwd_txfm1d.h"
+
+typedef TX_SIZE TxSize;
+
+using libaom_test::ACMRandom;
+using libaom_test::input_base;
+
+namespace {
+const int txfm_type_num = 2;
+const int txfm_size_ls[] = { 4, 8, 16, 32, 64 };
+
+const TxfmFunc fwd_txfm_func_ls[][txfm_type_num] = {
+  { av1_fdct4, av1_fadst4 },   { av1_fdct8, av1_fadst8 },
+  { av1_fdct16, av1_fadst16 }, { av1_fdct32, nullptr },
+  { av1_fdct64, nullptr },
+};
+
+const TxfmFunc inv_txfm_func_ls[][txfm_type_num] = {
+  { av1_idct4, av1_iadst4 },   { av1_idct8, av1_iadst8 },
+  { av1_idct16, av1_iadst16 }, { av1_idct32, nullptr },
+  { av1_idct64, nullptr },
+};
+
+// the maximum stage number of fwd/inv 1d dct/adst txfm is 12
+const int8_t cos_bit = 13;
+const int8_t range_bit[12] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 };
+
+void reference_idct_1d_int(const int32_t *in, int32_t *out, int size) {
+  double input[64];
+  for (int i = 0; i < size; ++i) input[i] = in[i];
+
+  double output[64];
+  libaom_test::reference_idct_1d(input, output, size);
+
+  for (int i = 0; i < size; ++i) {
+    ASSERT_GE(output[i], INT32_MIN);
+    ASSERT_LE(output[i], INT32_MAX);
+    out[i] = static_cast<int32_t>(round(output[i]));
+  }
+}
+
+void random_matrix(int32_t *dst, int len, ACMRandom *rnd) {
+  const int bits = 16;
+  const int maxVal = (1 << (bits - 1)) - 1;
+  const int minVal = -(1 << (bits - 1));
+  for (int i = 0; i < len; ++i) {
+    if (rnd->Rand8() % 10)
+      dst[i] = minVal + rnd->Rand16() % (1 << bits);
+    else
+      dst[i] = rnd->Rand8() % 2 ? minVal : maxVal;
+  }
+}
+
+TEST(av1_inv_txfm1d, InvAccuracyCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = 20000;
+  const int max_error[] = { 6, 10, 19, 31, 40 };
+  ASSERT_EQ(NELEMENTS(max_error), TX_SIZES);
+  ASSERT_EQ(NELEMENTS(inv_txfm_func_ls), TX_SIZES);
+  for (int i = 0; i < count_test_block; ++i) {
+    // choose a random transform to test
+    const TxSize tx_size = static_cast<TxSize>(rnd.Rand8() % TX_SIZES);
+    const int txfm_size = txfm_size_ls[tx_size];
+    const TxfmFunc inv_txfm_func = inv_txfm_func_ls[tx_size][0];
+
+    int32_t input[64];
+    random_matrix(input, txfm_size, &rnd);
+
+    // 64x64 transform assumes last 32 values are zero.
+    memset(input + 32, 0, 32 * sizeof(input[0]));
+
+    int32_t ref_output[64];
+    memset(ref_output, 0, sizeof(ref_output));
+    reference_idct_1d_int(input, ref_output, txfm_size);
+
+    int32_t output[64];
+    memset(output, 0, sizeof(output));
+    inv_txfm_func(input, output, cos_bit, range_bit);
+
+    for (int ni = 0; ni < txfm_size; ++ni) {
+      EXPECT_LE(abs(output[ni] - ref_output[ni]), max_error[tx_size])
+          << "tx_size = " << tx_size << ", ni = " << ni
+          << ", output[ni] = " << output[ni]
+          << ", ref_output[ni] = " << ref_output[ni];
+    }
+  }
+}
+
+static INLINE int get_max_bit(int x) {
+  int max_bit = -1;
+  while (x) {
+    x = x >> 1;
+    max_bit++;
+  }
+  return max_bit;
+}
+
+TEST(av1_inv_txfm1d, get_max_bit) {
+  int max_bit = get_max_bit(8);
+  EXPECT_EQ(max_bit, 3);
+}
+
+TEST(av1_inv_txfm1d, round_trip) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  for (int si = 0; si < NELEMENTS(fwd_txfm_func_ls); ++si) {
+    int txfm_size = txfm_size_ls[si];
+
+    for (int ti = 0; ti < txfm_type_num; ++ti) {
+      TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[si][ti];
+      TxfmFunc inv_txfm_func = inv_txfm_func_ls[si][ti];
+      int max_error = 2;
+
+      if (!fwd_txfm_func) continue;
+
+      const int count_test_block = 5000;
+      for (int i = 0; i < count_test_block; ++i) {
+        int32_t input[64];
+        int32_t output[64];
+        int32_t round_trip_output[64];
+
+        ASSERT_LE(txfm_size, NELEMENTS(input));
+
+        for (int ni = 0; ni < txfm_size; ++ni) {
+          input[ni] = rnd.Rand16() % input_base - rnd.Rand16() % input_base;
+        }
+
+        fwd_txfm_func(input, output, cos_bit, range_bit);
+        inv_txfm_func(output, round_trip_output, cos_bit, range_bit);
+
+        for (int ni = 0; ni < txfm_size; ++ni) {
+          int node_err =
+              abs(input[ni] - round_shift(round_trip_output[ni],
+                                          get_max_bit(txfm_size) - 1));
+          EXPECT_LE(node_err, max_error);
+        }
+      }
+    }
+  }
+}
+
+}  // namespace
diff --git a/third_party/aom/test/av1_inv_txfm2d_test.cc b/third_party/aom/test/av1_inv_txfm2d_test.cc
new file mode 100644
index 0000000000..35a87a43b8
--- /dev/null
+++ b/third_party/aom/test/av1_inv_txfm2d_test.cc
@@ -0,0 +1,406 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <tuple>
+#include <vector>
+
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "av1/common/av1_inv_txfm1d_cfg.h"
+#include "av1/common/scan.h"
+#include "test/acm_random.h"
+#include "test/av1_txfm_test.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+using libaom_test::bd;
+using libaom_test::compute_avg_abs_error;
+using libaom_test::input_base;
+using libaom_test::InvTxfm2dFunc;
+using libaom_test::LbdInvTxfm2dFunc;
+using libaom_test::tx_type_name;
+
+using ::testing::Combine;
+using ::testing::Range;
+using ::testing::Values;
+
+using std::vector;
+
+typedef TX_TYPE TxType;
+typedef TX_SIZE TxSize;
+
+namespace {
+
+// AV1InvTxfm2dParam argument list:
+// tx_type_, tx_size_, max_error_, max_avg_error_
+typedef std::tuple<TxType, TxSize, int, double> AV1InvTxfm2dParam;
+
+class AV1InvTxfm2d : public ::testing::TestWithParam<AV1InvTxfm2dParam> {
+ public:
+  void SetUp() override {
+    tx_type_ = GET_PARAM(0);
+    tx_size_ = GET_PARAM(1);
+    max_error_ = GET_PARAM(2);
+    max_avg_error_ = GET_PARAM(3);
+  }
+
+  void RunRoundtripCheck() {
+    int tx_w = tx_size_wide[tx_size_];
+    int tx_h = tx_size_high[tx_size_];
+    int txfm2d_size = tx_w * tx_h;
+    const FwdTxfm2dFunc fwd_txfm_func = libaom_test::fwd_txfm_func_ls[tx_size_];
+    const InvTxfm2dFunc inv_txfm_func = libaom_test::inv_txfm_func_ls[tx_size_];
+    double avg_abs_error = 0;
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    const int count = 500;
+
+    for (int ci = 0; ci < count; ci++) {
+      DECLARE_ALIGNED(16, int16_t, input[64 * 64]) = { 0 };
+      ASSERT_LE(txfm2d_size, NELEMENTS(input));
+
+      for (int ni = 0; ni < txfm2d_size; ++ni) {
+        if (ci == 0) {
+          int extreme_input = input_base - 1;
+          input[ni] = extreme_input;  // extreme case
+        } else {
+          input[ni] = rnd.Rand16() % input_base;
+        }
+      }
+
+      DECLARE_ALIGNED(16, uint16_t, expected[64 * 64]) = { 0 };
+      ASSERT_LE(txfm2d_size, NELEMENTS(expected));
+      if (TxfmUsesApproximation()) {
+        // Compare reference forward HT + inverse HT vs forward HT + inverse HT.
+        double ref_input[64 * 64];
+        ASSERT_LE(txfm2d_size, NELEMENTS(ref_input));
+        for (int ni = 0; ni < txfm2d_size; ++ni) {
+          ref_input[ni] = input[ni];
+        }
+        double ref_coeffs[64 * 64] = { 0 };
+        ASSERT_LE(txfm2d_size, NELEMENTS(ref_coeffs));
+        ASSERT_EQ(tx_type_, static_cast<TxType>(DCT_DCT));
+        libaom_test::reference_hybrid_2d(ref_input, ref_coeffs, tx_type_,
+                                         tx_size_);
+        DECLARE_ALIGNED(16, int32_t, ref_coeffs_int[64 * 64]) = { 0 };
+        ASSERT_LE(txfm2d_size, NELEMENTS(ref_coeffs_int));
+        for (int ni = 0; ni < txfm2d_size; ++ni) {
+          ref_coeffs_int[ni] = (int32_t)round(ref_coeffs[ni]);
+        }
+        inv_txfm_func(ref_coeffs_int, expected, tx_w, tx_type_, bd);
+      } else {
+        // Compare original input vs forward HT + inverse HT.
+        for (int ni = 0; ni < txfm2d_size; ++ni) {
+          expected[ni] = input[ni];
+        }
+      }
+
+      DECLARE_ALIGNED(16, int32_t, coeffs[64 * 64]) = { 0 };
+      ASSERT_LE(txfm2d_size, NELEMENTS(coeffs));
+      fwd_txfm_func(input, coeffs, tx_w, tx_type_, bd);
+
+      DECLARE_ALIGNED(16, uint16_t, actual[64 * 64]) = { 0 };
+      ASSERT_LE(txfm2d_size, NELEMENTS(actual));
+      inv_txfm_func(coeffs, actual, tx_w, tx_type_, bd);
+
+      double actual_max_error = 0;
+      for (int ni = 0; ni < txfm2d_size; ++ni) {
+        const double this_error = abs(expected[ni] - actual[ni]);
+        actual_max_error = AOMMAX(actual_max_error, this_error);
+      }
+      EXPECT_GE(max_error_, actual_max_error)
+          << " tx_w: " << tx_w << " tx_h " << tx_h
+          << " tx_type: " << tx_type_name[tx_type_];
+      if (actual_max_error > max_error_) {  // exit early.
+        break;
+      }
+      avg_abs_error += compute_avg_abs_error<uint16_t, uint16_t>(
+          expected, actual, txfm2d_size);
+    }
+
+    avg_abs_error /= count;
+    EXPECT_GE(max_avg_error_, avg_abs_error)
+        << " tx_w: " << tx_w << " tx_h " << tx_h
+        << " tx_type: " << tx_type_name[tx_type_];
+  }
+
+ private:
+  bool TxfmUsesApproximation() {
+    if (tx_size_wide[tx_size_] == 64 || tx_size_high[tx_size_] == 64) {
+      return true;
+    }
+    return false;
+  }
+
+  int max_error_;
+  double max_avg_error_;
+  TxType tx_type_;
+  TxSize tx_size_;
+};
+
+static int max_error_ls[TX_SIZES_ALL] = {
+  2,  // 4x4 transform
+  2,  // 8x8 transform
+  2,  // 16x16 transform
+  4,  // 32x32 transform
+  3,  // 64x64 transform
+  2,  // 4x8 transform
+  2,  // 8x4 transform
+  2,  // 8x16 transform
+  2,  // 16x8 transform
+  3,  // 16x32 transform
+  3,  // 32x16 transform
+  5,  // 32x64 transform
+  5,  // 64x32 transform
+  2,  // 4x16 transform
+  2,  // 16x4 transform
+  2,  // 8x32 transform
+  2,  // 32x8 transform
+  3,  // 16x64 transform
+  3,  // 64x16 transform
+};
+
+static double avg_error_ls[TX_SIZES_ALL] = {
+  0.002,  // 4x4 transform
+  0.05,   // 8x8 transform
+  0.07,   // 16x16 transform
+  0.4,    // 32x32 transform
+  0.3,    // 64x64 transform
+  0.02,   // 4x8 transform
+  0.02,   // 8x4 transform
+  0.04,   // 8x16 transform
+  0.07,   // 16x8 transform
+  0.4,    // 16x32 transform
+  0.5,    // 32x16 transform
+  0.38,   // 32x64 transform
+  0.39,   // 64x32 transform
+  0.2,    // 4x16 transform
+  0.2,    // 16x4 transform
+  0.2,    // 8x32 transform
+  0.2,    // 32x8 transform
+  0.38,   // 16x64 transform
+  0.38,   // 64x16 transform
+};
+
+vector<AV1InvTxfm2dParam> GetInvTxfm2dParamList() {
+  vector<AV1InvTxfm2dParam> param_list;
+  for (int s = 0; s < TX_SIZES; ++s) {
+    const int max_error = max_error_ls[s];
+    const double avg_error = avg_error_ls[s];
+    for (int t = 0; t < TX_TYPES; ++t) {
+      const TxType tx_type = static_cast<TxType>(t);
+      const TxSize tx_size = static_cast<TxSize>(s);
+      if (libaom_test::IsTxSizeTypeValid(tx_size, tx_type)) {
+        param_list.push_back(
+            AV1InvTxfm2dParam(tx_type, tx_size, max_error, avg_error));
+      }
+    }
+  }
+  return param_list;
+}
+
+INSTANTIATE_TEST_SUITE_P(C, AV1InvTxfm2d,
+                         ::testing::ValuesIn(GetInvTxfm2dParamList()));
+
+TEST_P(AV1InvTxfm2d, RunRoundtripCheck) { RunRoundtripCheck(); }
+
+TEST(AV1InvTxfm2d, CfgTest) {
+  for (int bd_idx = 0; bd_idx < BD_NUM; ++bd_idx) {
+    int bd = libaom_test::bd_arr[bd_idx];
+    int8_t low_range = libaom_test::low_range_arr[bd_idx];
+    int8_t high_range = libaom_test::high_range_arr[bd_idx];
+    for (int tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) {
+      for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+        if (libaom_test::IsTxSizeTypeValid(static_cast<TxSize>(tx_size),
+                                           static_cast<TxType>(tx_type)) ==
+            false) {
+          continue;
+        }
+        TXFM_2D_FLIP_CFG cfg;
+        av1_get_inv_txfm_cfg(static_cast<TxType>(tx_type),
+                             static_cast<TxSize>(tx_size), &cfg);
+        int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
+        int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
+        av1_gen_inv_stage_range(stage_range_col, stage_range_row, &cfg,
+                                static_cast<TxSize>(tx_size), bd);
+        libaom_test::txfm_stage_range_check(stage_range_col, cfg.stage_num_col,
+                                            cfg.cos_bit_col, low_range,
+                                            high_range);
+        libaom_test::txfm_stage_range_check(stage_range_row, cfg.stage_num_row,
+                                            cfg.cos_bit_row, low_range,
+                                            high_range);
+      }
+    }
+  }
+}
+
+typedef std::tuple<const LbdInvTxfm2dFunc> AV1LbdInvTxfm2dParam;
+class AV1LbdInvTxfm2d : public ::testing::TestWithParam<AV1LbdInvTxfm2dParam> {
+ public:
+  void SetUp() override { target_func_ = GET_PARAM(0); }
+  void RunAV1InvTxfm2dTest(TxType tx_type, TxSize tx_size, int run_times,
+                           int gt_int16 = 0);
+
+ private:
+  LbdInvTxfm2dFunc target_func_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1LbdInvTxfm2d);
+
+void AV1LbdInvTxfm2d::RunAV1InvTxfm2dTest(TxType tx_type, TxSize tx_size,
+                                          int run_times, int gt_int16) {
+  FwdTxfm2dFunc fwd_func_ = libaom_test::fwd_txfm_func_ls[tx_size];
+  InvTxfm2dFunc ref_func_ = libaom_test::inv_txfm_func_ls[tx_size];
+  if (fwd_func_ == nullptr || ref_func_ == nullptr || target_func_ == nullptr) {
+    return;
+  }
+  const int bd = 8;
+  const int BLK_WIDTH = 64;
+  const int BLK_SIZE = BLK_WIDTH * BLK_WIDTH;
+  DECLARE_ALIGNED(16, int16_t, input[BLK_SIZE]) = { 0 };
+  DECLARE_ALIGNED(32, int32_t, inv_input[BLK_SIZE]) = { 0 };
+  DECLARE_ALIGNED(16, uint8_t, output[BLK_SIZE]) = { 0 };
+  DECLARE_ALIGNED(16, uint16_t, ref_output[BLK_SIZE]) = { 0 };
+  int stride = BLK_WIDTH;
+  int rows = tx_size_high[tx_size];
+  int cols = tx_size_wide[tx_size];
+  const int rows_nonezero = AOMMIN(32, rows);
+  const int cols_nonezero = AOMMIN(32, cols);
+  run_times /= (rows * cols);
+  run_times = AOMMAX(1, run_times);
+  const SCAN_ORDER *scan_order = get_default_scan(tx_size, tx_type);
+  const int16_t *scan = scan_order->scan;
+  const int16_t eobmax = rows_nonezero * cols_nonezero;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int randTimes = run_times == 1 ? (eobmax + 500) : 1;
+
+  for (int cnt = 0; cnt < randTimes; ++cnt) {
+    const int16_t max_in = (1 << (bd)) - 1;
+    for (int r = 0; r < BLK_WIDTH; ++r) {
+      for (int c = 0; c < BLK_WIDTH; ++c) {
+        input[r * cols + c] = (cnt == 0) ? max_in : rnd.Rand8Extremes();
+        output[r * stride + c] = (cnt == 0) ? 128 : rnd.Rand8();
+        ref_output[r * stride + c] = output[r * stride + c];
+      }
+    }
+    fwd_func_(input, inv_input, stride, tx_type, bd);
+
+    // produce eob input by setting high freq coeffs to zero
+    const int eob = AOMMIN(cnt + 1, eobmax);
+    for (int i = eob; i < eobmax; i++) {
+      inv_input[scan[i]] = 0;
+    }
+    if (gt_int16) {
+      inv_input[scan[eob - 1]] = ((int32_t)INT16_MAX * 100 / 141);
+    }
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      ref_func_(inv_input, ref_output, stride, tx_type, bd);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      target_func_(inv_input, output, stride, tx_type, tx_size, eob);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    if (run_times > 10) {
+      printf("txfm[%d] %3dx%-3d:%7.2f/%7.2fns", tx_type, cols, rows, time1,
+             time2);
+      printf("(%3.2f)\n", time1 / time2);
+    }
+    for (int r = 0; r < rows; ++r) {
+      for (int c = 0; c < cols; ++c) {
+        uint8_t ref_value = static_cast<uint8_t>(ref_output[r * stride + c]);
+        if (ref_value != output[r * stride + c]) {
+          printf(" ");
+        }
+        ASSERT_EQ(ref_value, output[r * stride + c])
+            << "[" << r << "," << c << "] " << cnt << " tx_size: " << cols
+            << "x" << rows << " tx_type: " << tx_type_name[tx_type] << " eob "
+            << eob;
+      }
+    }
+  }
+}
+
+TEST_P(AV1LbdInvTxfm2d, match) {
+  for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
+    for (int i = 0; i < (int)TX_TYPES; ++i) {
+      if (libaom_test::IsTxSizeTypeValid(static_cast<TxSize>(j),
+                                         static_cast<TxType>(i))) {
+        RunAV1InvTxfm2dTest(static_cast<TxType>(i), static_cast<TxSize>(j), 1);
+      }
+    }
+  }
+}
+
+TEST_P(AV1LbdInvTxfm2d, gt_int16) {
+  static const TxType types[] = { DCT_DCT, ADST_DCT, FLIPADST_DCT, IDTX,
+                                  V_DCT,   H_DCT,    H_ADST,       H_FLIPADST };
+  for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
+    const TxSize sz = static_cast<TxSize>(j);
+    for (uint8_t i = 0; i < sizeof(types) / sizeof(types[0]); ++i) {
+      const TxType tp = types[i];
+      if (libaom_test::IsTxSizeTypeValid(sz, tp)) {
+        RunAV1InvTxfm2dTest(tp, sz, 1, 1);
+      }
+    }
+  }
+}
+
+TEST_P(AV1LbdInvTxfm2d, DISABLED_Speed) {
+  for (int j = 1; j < (int)(TX_SIZES_ALL); ++j) {
+    for (int i = 0; i < (int)TX_TYPES; ++i) {
+      if (libaom_test::IsTxSizeTypeValid(static_cast<TxSize>(j),
+                                         static_cast<TxType>(i))) {
+        RunAV1InvTxfm2dTest(static_cast<TxType>(i), static_cast<TxSize>(j),
+                            10000000);
+      }
+    }
+  }
+}
+
+#if HAVE_SSSE3
+extern "C" void av1_lowbd_inv_txfm2d_add_ssse3(const int32_t *input,
+                                               uint8_t *output, int stride,
+                                               TxType tx_type, TxSize tx_size,
+                                               int eob);
+INSTANTIATE_TEST_SUITE_P(SSSE3, AV1LbdInvTxfm2d,
+                         ::testing::Values(av1_lowbd_inv_txfm2d_add_ssse3));
+#endif  // HAVE_SSSE3
+
+#if HAVE_AVX2
+extern "C" void av1_lowbd_inv_txfm2d_add_avx2(const int32_t *input,
+                                              uint8_t *output, int stride,
+                                              TxType tx_type, TxSize tx_size,
+                                              int eob);
+
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1LbdInvTxfm2d,
+                         ::testing::Values(av1_lowbd_inv_txfm2d_add_avx2));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+extern "C" void av1_lowbd_inv_txfm2d_add_neon(const int32_t *input,
+                                              uint8_t *output, int stride,
+                                              TX_TYPE tx_type, TX_SIZE tx_size,
+                                              int eob);
+
+INSTANTIATE_TEST_SUITE_P(NEON, AV1LbdInvTxfm2d,
+                         ::testing::Values(av1_lowbd_inv_txfm2d_add_neon));
+#endif  // HAVE_NEON
+
+}  // namespace
diff --git a/third_party/aom/test/av1_k_means_test.cc b/third_party/aom/test/av1_k_means_test.cc
new file mode 100644
index 0000000000..7e66a8e01d
--- /dev/null
+++ b/third_party/aom/test/av1_k_means_test.cc
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2020, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdlib>
+#include <new>
+#include <tuple>
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/aom_timer.h"
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "av1/encoder/palette.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace AV1Kmeans {
+typedef void (*av1_calc_indices_dim1_func)(const int16_t *data,
+                                           const int16_t *centroids,
+                                           uint8_t *indices,
+                                           int64_t *total_dist, int n, int k);
+typedef void (*av1_calc_indices_dim2_func)(const int16_t *data,
+                                           const int16_t *centroids,
+                                           uint8_t *indices,
+                                           int64_t *total_dist, int n, int k);
+
+typedef std::tuple<av1_calc_indices_dim1_func, BLOCK_SIZE>
+    av1_calc_indices_dim1Param;
+
+typedef std::tuple<av1_calc_indices_dim2_func, BLOCK_SIZE>
+    av1_calc_indices_dim2Param;
+
+class AV1KmeansTest1
+    : public ::testing::TestWithParam<av1_calc_indices_dim1Param> {
+ public:
+  ~AV1KmeansTest1() override;
+  void SetUp() override;
+
+ protected:
+  void RunCheckOutput(av1_calc_indices_dim1_func test_impl, BLOCK_SIZE bsize,
+                      int centroids);
+  void RunSpeedTest(av1_calc_indices_dim1_func test_impl, BLOCK_SIZE bsize,
+                    int centroids);
+  bool CheckResult(int n) {
+    for (int idx = 0; idx < n; ++idx) {
+      if (indices1_[idx] != indices2_[idx]) {
+        printf("%d ", idx);
+        printf("%d != %d ", indices1_[idx], indices2_[idx]);
+        return false;
+      }
+    }
+    return true;
+  }
+
+  libaom_test::ACMRandom rnd_;
+  int16_t data_[4096];
+  int16_t centroids_[8];
+  uint8_t indices1_[4096];
+  uint8_t indices2_[4096];
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1KmeansTest1);
+
+AV1KmeansTest1::~AV1KmeansTest1() = default;
+
+void AV1KmeansTest1::SetUp() {
+  rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
+  for (int i = 0; i < 4096; ++i) {
+    data_[i] = (int)rnd_.Rand8() << 4;
+  }
+  for (int i = 0; i < 8; i++) {
+    centroids_[i] = (int)rnd_.Rand8() << 4;
+  }
+}
+
+void AV1KmeansTest1::RunCheckOutput(av1_calc_indices_dim1_func test_impl,
+                                    BLOCK_SIZE bsize, int k) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int n = w * h;
+  int64_t total_dist_dim1, total_dist_impl;
+  av1_calc_indices_dim1_c(data_, centroids_, indices1_, &total_dist_dim1, n, k);
+  test_impl(data_, centroids_, indices2_, &total_dist_impl, n, k);
+
+  ASSERT_EQ(total_dist_dim1, total_dist_impl);
+  ASSERT_EQ(CheckResult(n), true)
+      << " block " << bsize << " index " << n << " Centroids " << k;
+}
+
+void AV1KmeansTest1::RunSpeedTest(av1_calc_indices_dim1_func test_impl,
+                                  BLOCK_SIZE bsize, int k) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int n = w * h;
+  const int num_loops = 1000000000 / n;
+
+  av1_calc_indices_dim1_func funcs[2] = { av1_calc_indices_dim1_c, test_impl };
+  double elapsed_time[2] = { 0 };
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    av1_calc_indices_dim1_func func = funcs[i];
+    for (int j = 0; j < num_loops; ++j) {
+      func(data_, centroids_, indices1_, /*total_dist=*/nullptr, n, k);
+    }
+    aom_usec_timer_mark(&timer);
+    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    elapsed_time[i] = 1000.0 * time / num_loops;
+  }
+  printf("av1_calc_indices_dim1 indices= %d centroids=%d: %7.2f/%7.2fns", n, k,
+         elapsed_time[0], elapsed_time[1]);
+  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
+}
+
+TEST_P(AV1KmeansTest1, CheckOutput) {
+  // centroids = 2..8
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 2);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 3);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 4);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 5);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 6);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 7);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 8);
+}
+
+TEST_P(AV1KmeansTest1, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 2);
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 3);
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 4);
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 5);
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 6);
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 7);
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 8);
+}
+
+class AV1KmeansTest2
+    : public ::testing::TestWithParam<av1_calc_indices_dim2Param> {
+ public:
+  ~AV1KmeansTest2() override;
+  void SetUp() override;
+
+ protected:
+  void RunCheckOutput(av1_calc_indices_dim2_func test_impl, BLOCK_SIZE bsize,
+                      int centroids);
+  void RunSpeedTest(av1_calc_indices_dim2_func test_impl, BLOCK_SIZE bsize,
+                    int centroids);
+  bool CheckResult(int n) {
+    bool flag = true;
+    for (int idx = 0; idx < n; ++idx) {
+      if (indices1_[idx] != indices2_[idx]) {
+        printf("%d ", idx);
+        printf("%d != %d ", indices1_[idx], indices2_[idx]);
+        flag = false;
+      }
+    }
+    if (flag == false) {
+      return false;
+    }
+    return true;
+  }
+
+  libaom_test::ACMRandom rnd_;
+  int16_t data_[4096 * 2];
+  int16_t centroids_[8 * 2];
+  uint8_t indices1_[4096];
+  uint8_t indices2_[4096];
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1KmeansTest2);
+
+AV1KmeansTest2::~AV1KmeansTest2() = default;
+
+void AV1KmeansTest2::SetUp() {
+  rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
+  for (int i = 0; i < 4096 * 2; ++i) {
+    data_[i] = (int)rnd_.Rand8();
+  }
+  for (int i = 0; i < 8 * 2; i++) {
+    centroids_[i] = (int)rnd_.Rand8();
+  }
+}
+
+void AV1KmeansTest2::RunCheckOutput(av1_calc_indices_dim2_func test_impl,
+                                    BLOCK_SIZE bsize, int k) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int n = w * h;
+  int64_t total_dist_dim2, total_dist_impl;
+  av1_calc_indices_dim2_c(data_, centroids_, indices1_, &total_dist_dim2, n, k);
+  test_impl(data_, centroids_, indices2_, &total_dist_impl, n, k);
+
+  ASSERT_EQ(total_dist_dim2, total_dist_impl);
+  ASSERT_EQ(CheckResult(n), true)
+      << " block " << bsize << " index " << n << " Centroids " << k;
+}
+
+void AV1KmeansTest2::RunSpeedTest(av1_calc_indices_dim2_func test_impl,
+                                  BLOCK_SIZE bsize, int k) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int n = w * h;
+  const int num_loops = 1000000000 / n;
+
+  av1_calc_indices_dim2_func funcs[2] = { av1_calc_indices_dim2_c, test_impl };
+  double elapsed_time[2] = { 0 };
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    av1_calc_indices_dim2_func func = funcs[i];
+    for (int j = 0; j < num_loops; ++j) {
+      func(data_, centroids_, indices1_, /*total_dist=*/nullptr, n, k);
+    }
+    aom_usec_timer_mark(&timer);
+    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    elapsed_time[i] = 1000.0 * time / num_loops;
+  }
+  printf("av1_calc_indices_dim2 indices= %d centroids=%d: %7.2f/%7.2fns", n, k,
+         elapsed_time[0], elapsed_time[1]);
+  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
+}
+
+TEST_P(AV1KmeansTest2, CheckOutput) {
+  // centroids = 2..8
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 2);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 3);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 4);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 5);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 6);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 7);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 8);
+}
+
+TEST_P(AV1KmeansTest2, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 2);
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 3);
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 4);
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 5);
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 6);
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 7);
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 8);
+}
+
+#if HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON
+const BLOCK_SIZE kValidBlockSize[] = { BLOCK_8X8,   BLOCK_8X16,  BLOCK_8X32,
+                                       BLOCK_16X8,  BLOCK_16X16, BLOCK_16X32,
+                                       BLOCK_32X8,  BLOCK_32X16, BLOCK_32X32,
+                                       BLOCK_32X64, BLOCK_64X32, BLOCK_64X64,
+                                       BLOCK_16X64, BLOCK_64X16 };
+#endif
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, AV1KmeansTest1,
+    ::testing::Combine(::testing::Values(&av1_calc_indices_dim1_sse2),
+                       ::testing::ValuesIn(kValidBlockSize)));
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, AV1KmeansTest2,
+    ::testing::Combine(::testing::Values(&av1_calc_indices_dim2_sse2),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1KmeansTest1,
+    ::testing::Combine(::testing::Values(&av1_calc_indices_dim1_avx2),
+                       ::testing::ValuesIn(kValidBlockSize)));
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1KmeansTest2,
+    ::testing::Combine(::testing::Values(&av1_calc_indices_dim2_avx2),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1KmeansTest1,
+    ::testing::Combine(::testing::Values(&av1_calc_indices_dim1_neon),
+                       ::testing::ValuesIn(kValidBlockSize)));
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1KmeansTest2,
+    ::testing::Combine(::testing::Values(&av1_calc_indices_dim2_neon),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+}  // namespace AV1Kmeans
diff --git a/third_party/aom/test/av1_key_value_api_test.cc b/third_party/aom/test/av1_key_value_api_test.cc
new file mode 100644
index 0000000000..a5734f6beb
--- /dev/null
+++ b/third_party/aom/test/av1_key_value_api_test.cc
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstring>
+#include <tuple>
+
+#include "aom/aom_codec.h"
+#include "aom/aom_decoder.h"
+#include "aom/aom_encoder.h"
+#include "aom/aomcx.h"
+#include "aom/aomdx.h"
+#include "config/aom_config.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+typedef std::tuple<const char *, const char *> KeyValParam;
+
+class BaseKeyValAPI : public testing::Test {
+ public:
+  void SetUp() override {
+#if CONFIG_AV1_ENCODER
+    aom_codec_iface_t *iface_cx = aom_codec_av1_cx();
+    aom_codec_enc_cfg_t enc_cfg;
+#if CONFIG_REALTIME_ONLY
+    const int usage = 1;
+#else
+    const int usage = 0;
+#endif
+    EXPECT_EQ(AOM_CODEC_OK,
+              aom_codec_enc_config_default(iface_cx, &enc_cfg, usage));
+    EXPECT_EQ(AOM_CODEC_OK,
+              aom_codec_enc_init(&enc_, iface_cx, &enc_cfg, usage));
+#endif
+#if CONFIG_AV1_DECODER
+    aom_codec_iface_t *iface_dx = aom_codec_av1_dx();
+    aom_codec_dec_cfg_t dec_cfg = { 0, 0, 0, !FORCE_HIGHBITDEPTH_DECODING };
+
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_dec_init(&dec_, iface_dx, &dec_cfg, 0));
+#endif
+  }
+
+  void TearDown() override {
+#if CONFIG_AV1_ENCODER
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc_));
+#endif
+#if CONFIG_AV1_DECODER
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&dec_));
+#endif
+  }
+
+ protected:
+#if CONFIG_AV1_ENCODER
+  aom_codec_ctx_t enc_;
+#endif
+#if CONFIG_AV1_DECODER
+  aom_codec_ctx_t dec_;
+#endif
+};
+
+// Tests on encoder options.
+// Need to add ones for the decoder in the future if it is also supported in the
+// key & value API.
+#if CONFIG_AV1_ENCODER
+class EncValidTest : public BaseKeyValAPI,
+                     public testing::WithParamInterface<KeyValParam> {};
+class EncInvalidTest : public BaseKeyValAPI,
+                       public testing::WithParamInterface<KeyValParam> {};
+
+TEST_P(EncValidTest, Valid) {
+  const char *key = std::get<0>(GetParam());
+  const char *val = std::get<1>(GetParam());
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_set_option(&enc_, key, val));
+}
+
+TEST_P(EncInvalidTest, NullArg) {
+  const char *key = std::get<0>(GetParam());
+  const char *val = std::get<1>(GetParam());
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_set_option(nullptr, key, val));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_set_option(&enc_, nullptr, val));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_set_option(&enc_, key, nullptr));
+}
+
+TEST_P(EncInvalidTest, InvalidParam) {
+  const char *key = std::get<0>(GetParam());
+  const char *val = std::get<1>(GetParam());
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_set_option(&enc_, key, val));
+  ASSERT_NE(aom_codec_error_detail(&enc_), nullptr);
+  EXPECT_GT(strlen(aom_codec_error_detail(&enc_)), 0u);
+}
+
+// No test for ratio / list for now since the API does not support any of the
+// parameters of these type.
+// The string type typically involves reading a path/file, which brings
+// potential fails.
+const KeyValParam enc_valid_params[] = {
+  std::make_tuple("auto-intra-tools-off", "1"),  // uint
+  std::make_tuple("min-gf-interval", "10"),      // uint
+  std::make_tuple("min-partition-size", "4"),    // int
+  std::make_tuple("tune", "psnr"),               // enum
+};
+
+const KeyValParam enc_invalid_params[] = {
+  // no match
+  std::make_tuple("a-b-c", "10"),
+  // uint
+  std::make_tuple("min-gf-interval", "-1"),
+  std::make_tuple("min-gf-interval", "1.1"),
+  std::make_tuple("min-gf-interval", "abc"),
+  // int
+  std::make_tuple("min-partition-size", "1.1"),
+  std::make_tuple("min-partition-size", "abc"),
+  // enum
+  std::make_tuple("tune", "PsnR1"),
+  // out of range
+  std::make_tuple("cq-level", "1000"),
+};
+
+INSTANTIATE_TEST_SUITE_P(KeyValAPI, EncValidTest,
+                         testing::ValuesIn(enc_valid_params));
+
+INSTANTIATE_TEST_SUITE_P(KeyValAPI, EncInvalidTest,
+                         testing::ValuesIn(enc_invalid_params));
+#endif  // CONFIG_AV1_ENCODER
+
+}  // namespace
diff --git a/third_party/aom/test/av1_nn_predict_test.cc b/third_party/aom/test/av1_nn_predict_test.cc
new file mode 100644
index 0000000000..4201ea6ce6
--- /dev/null
+++ b/third_party/aom/test/av1_nn_predict_test.cc
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "aom/aom_integer.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/encoder/ml.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+#include "test/util.h"
+#include "test/register_state_check.h"
+#include "test/acm_random.h"
+
+namespace {
+typedef void (*NnPredict_Func)(const float *const input_nodes,
+                               const NN_CONFIG *const nn_config,
+                               int reduce_prec, float *const output);
+
+typedef std::tuple<const NnPredict_Func> NnPredictTestParam;
+
+const float epsilon = 1e-3f;  // Error threshold for functional equivalence
+
+class NnPredictTest : public ::testing::TestWithParam<NnPredictTestParam> {
+ public:
+  void SetUp() override {
+    const int MAX_NODES2 = NN_MAX_NODES_PER_LAYER * NN_MAX_NODES_PER_LAYER;
+    // Allocate two massive buffers on the heap for edge weights and node bias
+    // Then set-up the double-dimension arrays pointing into the big buffers
+    weights_buf = (float *)aom_malloc(MAX_NODES2 * (NN_MAX_HIDDEN_LAYERS + 1) *
+                                      sizeof(*weights_buf));
+    bias_buf =
+        (float *)aom_malloc(NN_MAX_NODES_PER_LAYER *
+                            (NN_MAX_HIDDEN_LAYERS + 1) * sizeof(*bias_buf));
+    ASSERT_NE(weights_buf, nullptr);
+    ASSERT_NE(bias_buf, nullptr);
+    for (int i = 0; i < NN_MAX_HIDDEN_LAYERS + 1; i++) {
+      weights[i] = &weights_buf[i * MAX_NODES2];
+      bias[i] = &bias_buf[i * NN_MAX_NODES_PER_LAYER];
+    }
+    target_func_ = GET_PARAM(0);
+  }
+  void TearDown() override {
+    aom_free(weights_buf);
+    aom_free(bias_buf);
+  }
+  void RunNnPredictTest(const NN_CONFIG *const shape);
+  void RunNnPredictSpeedTest(const NN_CONFIG *const shape, const int run_times);
+  void RunNnPredictTest_all(const NN_CONFIG *const shapes,
+                            const int num_shapes);
+  void RunNnPredictSpeedTest_all(const NN_CONFIG *const shapes,
+                                 const int num_shapes, const int run_times);
+
+ private:
+  NnPredict_Func target_func_;
+  libaom_test::ACMRandom rng_;
+  float *weights[NN_MAX_HIDDEN_LAYERS + 1] = {};
+  float *bias[NN_MAX_HIDDEN_LAYERS + 1] = {};
+  float *weights_buf = nullptr, *bias_buf = nullptr;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(NnPredictTest);
+
+void NnPredictTest::RunNnPredictTest(const NN_CONFIG *const shape) {
+  float inputs[NN_MAX_NODES_PER_LAYER] = { 0 };
+  float outputs_test[NN_MAX_NODES_PER_LAYER] = { 0 };
+  float outputs_ref[NN_MAX_NODES_PER_LAYER] = { 0 };
+
+  NN_CONFIG nn_config;
+  memcpy(&nn_config, shape, sizeof(nn_config));
+
+  char shape_str[32] = { 0 };
+  snprintf(shape_str, sizeof(shape_str), "%d", shape->num_inputs);
+  for (int layer = 0; layer < shape->num_hidden_layers; layer++)
+    snprintf(&shape_str[strlen(shape_str)],
+             sizeof(shape_str) - strlen(shape_str), "x%d",
+             shape->num_hidden_nodes[layer]);
+  snprintf(&shape_str[strlen(shape_str)], sizeof(shape_str) - strlen(shape_str),
+           "x%d", shape->num_outputs);
+
+  for (int i = 0; i < NN_MAX_HIDDEN_LAYERS + 1; i++) {
+    nn_config.weights[i] = weights[i];
+    nn_config.bias[i] = bias[i];
+  }
+
+  for (int iter = 0; iter < 10000 && !HasFatalFailure(); ++iter) {
+    for (int node = 0; node < shape->num_inputs; node++) {
+      inputs[node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
+    }
+    for (int layer = 0; layer < shape->num_hidden_layers; layer++) {
+      for (int node = 0; node < NN_MAX_NODES_PER_LAYER; node++) {
+        bias[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
+      }
+      for (int node = 0; node < NN_MAX_NODES_PER_LAYER * NN_MAX_NODES_PER_LAYER;
+           node++) {
+        weights[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
+      }
+    }
+    // Now the outputs:
+    int layer = shape->num_hidden_layers;
+    for (int node = 0; node < NN_MAX_NODES_PER_LAYER; node++) {
+      bias[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
+    }
+    for (int node = 0; node < NN_MAX_NODES_PER_LAYER * NN_MAX_NODES_PER_LAYER;
+         node++) {
+      weights[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
+    }
+
+    av1_nn_predict_c(inputs, &nn_config, 0, outputs_ref);
+    target_func_(inputs, &nn_config, 0, outputs_test);
+
+    for (int node = 0; node < shape->num_outputs; node++) {
+      if (outputs_ref[node] < epsilon) {
+        ASSERT_LE(outputs_test[node], epsilon)
+            << "Reference output was near-zero, test output was not ("
+            << shape_str << ")";
+      } else {
+        const float error = outputs_ref[node] - outputs_test[node];
+        const float relative_error = fabsf(error / outputs_ref[node]);
+        ASSERT_LE(relative_error, epsilon)
+            << "Excessive relative error between reference and test ("
+            << shape_str << ")";
+      }
+    }
+  }
+}
+
+void NnPredictTest::RunNnPredictSpeedTest(const NN_CONFIG *const shape,
+                                          const int run_times) {
+  float inputs[NN_MAX_NODES_PER_LAYER] = { 0 };
+  float outputs_test[NN_MAX_NODES_PER_LAYER] = { 0 };
+  float outputs_ref[NN_MAX_NODES_PER_LAYER] = { 0 };
+
+  NN_CONFIG nn_config;
+  memcpy(&nn_config, shape, sizeof(nn_config));
+
+  for (int i = 0; i < NN_MAX_HIDDEN_LAYERS; i++) {
+    nn_config.weights[i] = weights[i];
+    nn_config.bias[i] = bias[i];
+  }
+  // Don't bother actually changing the values for inputs/weights/bias: it
+  // shouldn't make any difference for a speed test.
+
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < run_times; ++i) {
+    av1_nn_predict_c(inputs, &nn_config, 0, outputs_ref);
+  }
+  aom_usec_timer_mark(&timer);
+  const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < run_times; ++i) {
+    target_func_(inputs, &nn_config, 0, outputs_test);
+  }
+  aom_usec_timer_mark(&timer);
+  const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+  printf("%d", shape->num_inputs);
+  for (int layer = 0; layer < shape->num_hidden_layers; layer++)
+    printf("x%d", shape->num_hidden_nodes[layer]);
+  printf("x%d: ", shape->num_outputs);
+  printf("%7.2f/%7.2fns (%3.2f)\n", time1, time2, time1 / time2);
+}
+
+// This is all the neural network shapes observed executed in a few different
+// runs of the encoder.  It also conveniently covers all the kernels
+// implemented.
+static const NN_CONFIG kShapes[] = {
+  { 37, 1, 2, { 16, 24 }, {}, {} }, { 24, 24, 1, { 12 }, {}, {} },
+  { 10, 16, 1, { 64 }, {}, {} },    { 12, 1, 1, { 12 }, {}, {} },
+  { 12, 1, 1, { 24 }, {}, {} },     { 12, 1, 1, { 32 }, {}, {} },
+  { 18, 4, 1, { 24 }, {}, {} },     { 18, 4, 1, { 32 }, {}, {} },
+  { 4, 1, 1, { 16 }, {}, {} },      { 8, 1, 0, { 0 }, {}, {} },
+  { 8, 4, 1, { 16 }, {}, {} },      { 8, 1, 1, { 32 }, {}, {} },
+  { 9, 3, 1, { 32 }, {}, {} },      { 8, 4, 0, { 0 }, {}, {} },
+  { 8, 8, 0, { 0 }, {}, {} },       { 4, 4, 1, { 8 }, {}, {} },
+  { 4, 3, 0, { 64 }, {}, {} },
+};
+
+void NnPredictTest::RunNnPredictTest_all(const NN_CONFIG *const shapes,
+                                         const int num_shapes) {
+  for (int i = 0; i < num_shapes; i++) RunNnPredictTest(&shapes[i]);
+}
+
+void NnPredictTest::RunNnPredictSpeedTest_all(const NN_CONFIG *const shapes,
+                                              const int num_shapes,
+                                              const int run_times) {
+  for (int i = 0; i < num_shapes; i++)
+    NnPredictTest::RunNnPredictSpeedTest(&shapes[i], run_times);
+}
+
+TEST_P(NnPredictTest, RandomValues) {
+  RunNnPredictTest_all(kShapes, sizeof(kShapes) / sizeof(kShapes[0]));
+}
+
+TEST_P(NnPredictTest, DISABLED_Speed) {
+  RunNnPredictSpeedTest_all(kShapes, sizeof(kShapes) / sizeof(kShapes[0]),
+                            10000000);
+}
+
+#if !CONFIG_EXCLUDE_SIMD_MISMATCH
+#if HAVE_SSE3
+INSTANTIATE_TEST_SUITE_P(SSE3, NnPredictTest,
+                         ::testing::Values(av1_nn_predict_sse3));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, NnPredictTest,
+                         ::testing::Values(av1_nn_predict_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, NnPredictTest,
+                         ::testing::Values(av1_nn_predict_neon));
+#endif
+#endif  // !CONFIG_EXCLUDE_SIMD_MISMATCH
+
+}  // namespace
diff --git a/third_party/aom/test/av1_quantize_test.cc b/third_party/aom/test/av1_quantize_test.cc
new file mode 100644
index 0000000000..c8af14a356
--- /dev/null
+++ b/third_party/aom/test/av1_quantize_test.cc
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <stdlib.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "av1/common/scan.h"
+#include "av1/encoder/av1_quantize.h"
+
+namespace {
+
+typedef void (*QuantizeFpFunc)(
+    const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan, int log_scale);
+
+struct QuantizeFuncParams {
+  QuantizeFuncParams(QuantizeFpFunc qF = nullptr,
+                     QuantizeFpFunc qRefF = nullptr, int count = 16)
+      : qFunc(qF), qFuncRef(qRefF), coeffCount(count) {}
+  QuantizeFpFunc qFunc;
+  QuantizeFpFunc qFuncRef;
+  int coeffCount;
+};
+
+using libaom_test::ACMRandom;
+
+const int numTests = 1000;
+const int maxSize = 1024;
+const int roundFactorRange = 127;
+const int dequantRange = 32768;
+const int coeffRange = (1 << 20) - 1;
+
+class AV1QuantizeTest : public ::testing::TestWithParam<QuantizeFuncParams> {
+ public:
+  void RunQuantizeTest() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, int16_t, zbin_ptr[8]);
+    DECLARE_ALIGNED(16, int16_t, round_ptr[8]);
+    DECLARE_ALIGNED(16, int16_t, quant_ptr[8]);
+    DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[8]);
+    DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, int16_t, dequant_ptr[8]);
+    uint16_t eob;
+    uint16_t ref_eob;
+    int err_count_total = 0;
+    int first_failure = -1;
+    int count = params_.coeffCount;
+    const TX_SIZE txSize = getTxSize(count);
+    int log_scale = (txSize == TX_32X32);
+    QuantizeFpFunc quanFunc = params_.qFunc;
+    QuantizeFpFunc quanFuncRef = params_.qFuncRef;
+
+    const SCAN_ORDER scanOrder = av1_scan_orders[txSize][DCT_DCT];
+    for (int i = 0; i < numTests; i++) {
+      int err_count = 0;
+      ref_eob = eob = UINT16_MAX;
+      for (int j = 0; j < count; j++) {
+        coeff_ptr[j] = rnd(coeffRange);
+      }
+
+      for (int j = 0; j < 2; j++) {
+        zbin_ptr[j] = rnd.Rand16Signed();
+        quant_shift_ptr[j] = rnd.Rand16Signed();
+        // int16_t positive
+        dequant_ptr[j] = abs(rnd(dequantRange));
+        quant_ptr[j] = static_cast<int16_t>((1 << 16) / dequant_ptr[j]);
+        round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7;
+      }
+      for (int j = 2; j < 8; ++j) {
+        zbin_ptr[j] = zbin_ptr[1];
+        quant_shift_ptr[j] = quant_shift_ptr[1];
+        dequant_ptr[j] = dequant_ptr[1];
+        quant_ptr[j] = quant_ptr[1];
+        round_ptr[j] = round_ptr[1];
+      }
+      quanFuncRef(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
+                  quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr,
+                  &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale);
+
+      API_REGISTER_STATE_CHECK(
+          quanFunc(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
+                   quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob,
+                   scanOrder.scan, scanOrder.iscan, log_scale));
+
+      for (int j = 0; j < count; ++j) {
+        err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
+                     (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+        ASSERT_EQ(ref_qcoeff_ptr[j], qcoeff_ptr[j])
+            << "qcoeff error: i = " << i << " j = " << j << "\n";
+        EXPECT_EQ(ref_dqcoeff_ptr[j], dqcoeff_ptr[j])
+            << "dqcoeff error: i = " << i << " j = " << j << "\n";
+      }
+      EXPECT_EQ(ref_eob, eob) << "eob error: "
+                              << "i = " << i << "\n";
+      err_count += (ref_eob != eob);
+      if (err_count && !err_count_total) {
+        first_failure = i;
+      }
+      err_count_total += err_count;
+    }
+    EXPECT_EQ(0, err_count_total)
+        << "Error: Quantization Test, C output doesn't match SSE2 output. "
+        << "First failed at test case " << first_failure;
+  }
+
+  void RunEobTest() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, int16_t, zbin_ptr[8]);
+    DECLARE_ALIGNED(16, int16_t, round_ptr[8]);
+    DECLARE_ALIGNED(16, int16_t, quant_ptr[8]);
+    DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[8]);
+    DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, int16_t, dequant_ptr[8]);
+    uint16_t eob;
+    uint16_t ref_eob;
+    int count = params_.coeffCount;
+    const TX_SIZE txSize = getTxSize(count);
+    int log_scale = (txSize == TX_32X32);
+    QuantizeFpFunc quanFunc = params_.qFunc;
+    QuantizeFpFunc quanFuncRef = params_.qFuncRef;
+    const SCAN_ORDER scanOrder = av1_scan_orders[txSize][DCT_DCT];
+
+    for (int i = 0; i < numTests; i++) {
+      ref_eob = eob = UINT16_MAX;
+      for (int j = 0; j < count; j++) {
+        coeff_ptr[j] = 0;
+      }
+
+      coeff_ptr[rnd(count)] = rnd(coeffRange);
+      coeff_ptr[rnd(count)] = rnd(coeffRange);
+      coeff_ptr[rnd(count)] = rnd(coeffRange);
+
+      for (int j = 0; j < 2; j++) {
+        zbin_ptr[j] = rnd.Rand16Signed();
+        quant_shift_ptr[j] = rnd.Rand16Signed();
+        // int16_t positive
+        dequant_ptr[j] = abs(rnd(dequantRange));
+        quant_ptr[j] = (1 << 16) / dequant_ptr[j];
+        round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7;
+      }
+      for (int j = 2; j < 8; ++j) {
+        zbin_ptr[j] = zbin_ptr[1];
+        quant_shift_ptr[j] = quant_shift_ptr[1];
+        dequant_ptr[j] = dequant_ptr[1];
+        quant_ptr[j] = quant_ptr[1];
+        round_ptr[j] = round_ptr[1];
+      }
+
+      quanFuncRef(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
+                  quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr,
+                  &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale);
+
+      API_REGISTER_STATE_CHECK(
+          quanFunc(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
+                   quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob,
+                   scanOrder.scan, scanOrder.iscan, log_scale));
+      EXPECT_EQ(ref_eob, eob) << "eob error: "
+                              << "i = " << i << "\n";
+    }
+  }
+
+  void SetUp() override { params_ = GetParam(); }
+
+  ~AV1QuantizeTest() override = default;
+
+ private:
+  TX_SIZE getTxSize(int count) {
+    switch (count) {
+      case 16: return TX_4X4;
+      case 64: return TX_8X8;
+      case 256: return TX_16X16;
+      case 1024: return TX_32X32;
+      default: return TX_4X4;
+    }
+  }
+
+  QuantizeFuncParams params_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1QuantizeTest);
+
+TEST_P(AV1QuantizeTest, BitExactCheck) { RunQuantizeTest(); }
+TEST_P(AV1QuantizeTest, EobVerify) { RunEobTest(); }
+
+TEST(AV1QuantizeTest, QuantizeFpNoQmatrix) {
+  // Here we use a uniform quantizer as an example
+  const int16_t dequant_ptr[2] = { 78, 93 };  // quantize step
+  const int16_t round_ptr[2] = { 39, 46 };    // round ~= dequant / 2
+
+  // quant ~= 2^16 / dequant. This is a 16-bit fixed point representation of the
+  // inverse of quantize step.
+  const int16_t quant_ptr[2] = { 840, 704 };
+  int log_scale = 0;
+  int coeff_count = 4;
+  const tran_low_t coeff_ptr[4] = { -449, 624, -14, 24 };
+  const tran_low_t ref_qcoeff_ptr[4] = { -6, 7, 0, 0 };
+  const tran_low_t ref_dqcoeff_ptr[4] = { -468, 651, 0, 0 };
+  const int16_t scan[4] = { 0, 1, 2, 3 };
+  tran_low_t qcoeff_ptr[4];
+  tran_low_t dqcoeff_ptr[4];
+  int eob = av1_quantize_fp_no_qmatrix(quant_ptr, dequant_ptr, round_ptr,
+                                       log_scale, scan, coeff_count, coeff_ptr,
+                                       qcoeff_ptr, dqcoeff_ptr);
+  EXPECT_EQ(eob, 2);
+  for (int i = 0; i < coeff_count; ++i) {
+    EXPECT_EQ(qcoeff_ptr[i], ref_qcoeff_ptr[i]);
+    EXPECT_EQ(dqcoeff_ptr[i], ref_dqcoeff_ptr[i]);
+  }
+}
+
+#if HAVE_SSE4_1
+const QuantizeFuncParams qfps[4] = {
+  QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
+                     16),
+  QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
+                     64),
+  QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
+                     256),
+  QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
+                     1024),
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1QuantizeTest, ::testing::ValuesIn(qfps));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+const QuantizeFuncParams qfps_avx2[4] = {
+  QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c,
+                     16),
+  QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c,
+                     64),
+  QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c,
+                     256),
+  QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c,
+                     1024),
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1QuantizeTest, ::testing::ValuesIn(qfps_avx2));
+#endif  // HAVE_AVX2
+
+}  // namespace
diff --git a/third_party/aom/test/av1_round_shift_array_test.cc b/third_party/aom/test/av1_round_shift_array_test.cc
new file mode 100644
index 0000000000..937e8645a5
--- /dev/null
+++ b/third_party/aom/test/av1_round_shift_array_test.cc
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <tuple>
+
+#include "config/av1_rtcd.h"
+
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/aom_timer.h"
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace AV1CompRoundShift {
+
+typedef void (*comp_round_shift_array_func)(int32_t *arr, int size, int bit);
+
+#if HAVE_SSE4_1 || HAVE_NEON
+const int kValidBitCheck[] = {
+  -4, -3, -2, -1, 0, 1, 2, 3, 4,
+};
+#endif  // HAVE_SSE4_1 || HAVE_NEON
+
+typedef std::tuple<comp_round_shift_array_func, BLOCK_SIZE, int>
+    CompRoundShiftParam;
+
+class AV1CompRoundShiftTest
+    : public ::testing::TestWithParam<CompRoundShiftParam> {
+ public:
+  ~AV1CompRoundShiftTest() override;
+
+  void SetUp() override {
+    rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
+  }
+
+ protected:
+  void RunCheckOutput(comp_round_shift_array_func test_impl, BLOCK_SIZE bsize,
+                      int bit);
+  void RunSpeedTest(comp_round_shift_array_func test_impl, BLOCK_SIZE bsize,
+                    int bit);
+
+  libaom_test::ACMRandom rnd_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompRoundShiftTest);
+
+AV1CompRoundShiftTest::~AV1CompRoundShiftTest() = default;
+
+void AV1CompRoundShiftTest::RunCheckOutput(
+    comp_round_shift_array_func test_impl, BLOCK_SIZE bsize, int bit) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int blk_wd = 64;
+  DECLARE_ALIGNED(32, int32_t, pred_[blk_wd]);
+  DECLARE_ALIGNED(32, int32_t, ref_buffer_[blk_wd]);
+  for (int i = 0; i < (blk_wd); ++i) {
+    ref_buffer_[i] = pred_[i] = rnd_.Rand31() / 16;
+  }
+  av1_round_shift_array_c(ref_buffer_, w, bit);
+  test_impl(pred_, w, bit);
+  for (int x = 0; x < w; ++x) {
+    ASSERT_EQ(ref_buffer_[x], pred_[x]) << w << "x" << h << "mismatch @"
+                                        << "(" << x << ")";
+  }
+}
+
+void AV1CompRoundShiftTest::RunSpeedTest(comp_round_shift_array_func test_impl,
+                                         BLOCK_SIZE bsize, int bit) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int blk_wd = 64;
+  DECLARE_ALIGNED(32, int32_t, ref_buffer_[blk_wd]);
+  for (int i = 0; i < (blk_wd); ++i) {
+    ref_buffer_[i] = rnd_.Rand31();
+  }
+
+  const int num_loops = 1000000000 / (w + h);
+  comp_round_shift_array_func funcs[2] = { av1_round_shift_array_c, test_impl };
+  double elapsed_time[2] = { 0 };
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    comp_round_shift_array_func func = funcs[i];
+    for (int j = 0; j < num_loops; ++j) {
+      func(ref_buffer_, w, bit);
+    }
+    aom_usec_timer_mark(&timer);
+    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    elapsed_time[i] = 1000.0 * time / num_loops;
+  }
+  printf("av1_round_shift_array %3dx%-3d: bit : %d %7.2f/%7.2fns", w, h, bit,
+         elapsed_time[0], elapsed_time[1]);
+  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
+}
+
+TEST_P(AV1CompRoundShiftTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2));
+}
+
+TEST_P(AV1CompRoundShiftTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2));
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AV1CompRoundShiftTest,
+    ::testing::Combine(::testing::Values(&av1_round_shift_array_sse4_1),
+                       ::testing::ValuesIn(txsize_to_bsize),
+                       ::testing::ValuesIn(kValidBitCheck)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1CompRoundShiftTest,
+    ::testing::Combine(::testing::Values(&av1_round_shift_array_neon),
+                       ::testing::ValuesIn(txsize_to_bsize),
+                       ::testing::ValuesIn(kValidBitCheck)));
+#endif
+
+}  // namespace AV1CompRoundShift
diff --git a/third_party/aom/test/av1_softmax_test.cc b/third_party/aom/test/av1_softmax_test.cc
new file mode 100644
index 0000000000..2b04af1342
--- /dev/null
+++ b/third_party/aom/test/av1_softmax_test.cc
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+#include <new>
+#include <tuple>
+
+#include "aom/aom_integer.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/encoder/ml.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+using FastSoftmaxFn = void (*)(const float *const input, float *output);
+using FastSoftmaxTestParams = std::tuple<const FastSoftmaxFn, int>;
+
+// Error thresholds for functional equivalence
+constexpr float kRelEpsilon = 5e-2f;
+constexpr float kAbsEpsilon = 5e-3f;
+
+class FastSoftmaxTest : public ::testing::TestWithParam<FastSoftmaxTestParams> {
+ public:
+  FastSoftmaxTest() : target_fn_(GET_PARAM(0)), num_classes_(GET_PARAM(1)) {}
+  void SetUp() override {
+    ref_buf_.reset(new (std::nothrow) float[num_classes_]());
+    ASSERT_NE(ref_buf_, nullptr);
+    dst_buf_.reset(new (std::nothrow) float[num_classes_]());
+    ASSERT_NE(dst_buf_, nullptr);
+    input_.reset(new (std::nothrow) float[num_classes_]());
+    ASSERT_NE(input_, nullptr);
+  }
+  void RunSoftmaxTest();
+  void RunSoftmaxSpeedTest(const int run_times);
+  void FillInputBuf();
+
+ private:
+  const FastSoftmaxFn target_fn_;
+  const int num_classes_;
+  std::unique_ptr<float[]> ref_buf_, dst_buf_, input_;
+  libaom_test::ACMRandom rng_;
+};
+
+void FastSoftmaxTest::FillInputBuf() {
+  for (int idx = 0; idx < num_classes_; idx++) {
+    input_[idx] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 30);
+  }
+}
+
+void FastSoftmaxTest::RunSoftmaxTest() {
+  av1_nn_softmax(input_.get(), ref_buf_.get(), num_classes_);
+  target_fn_(input_.get(), dst_buf_.get());
+
+  for (int idx = 0; idx < num_classes_; idx++) {
+    if (ref_buf_[idx] < kAbsEpsilon) {
+      ASSERT_LE(dst_buf_[idx], kAbsEpsilon)
+          << "Reference output was near-zero, test output was not" << std::endl;
+    } else {
+      const float error = dst_buf_[idx] - ref_buf_[idx];
+      const float relative_error = fabsf(error / ref_buf_[idx]);
+      ASSERT_LE(relative_error, kRelEpsilon)
+          << "Excessive relative error between reference and test output"
+          << std::endl;
+      ASSERT_LE(error, kAbsEpsilon)
+          << "Excessive absolute error between reference and test output"
+          << std::endl;
+    }
+  }
+}
+
+void FastSoftmaxTest::RunSoftmaxSpeedTest(const int run_times) {
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+  for (int idx = 0; idx < run_times; idx++) {
+    target_fn_(input_.get(), dst_buf_.get());
+  }
+  aom_usec_timer_mark(&timer);
+  const int64_t time = aom_usec_timer_elapsed(&timer);
+  std::cout << "Test with " << num_classes_ << " classes took " << time
+            << " us." << std::endl;
+}
+
+TEST_P(FastSoftmaxTest, RandomValues) {
+  FillInputBuf();
+  RunSoftmaxTest();
+}
+
+TEST_P(FastSoftmaxTest, DISABLED_Speed) {
+  constexpr int kNumTimes = 1000000;
+  RunSoftmaxSpeedTest(kNumTimes);
+}
+
+void AnchorSoftmax16Fn(const float *input, float *output) {
+  av1_nn_softmax(input, output, 16);
+}
+
+const FastSoftmaxTestParams kArrayParams_c[] = {
+  FastSoftmaxTestParams(AnchorSoftmax16Fn, 16),
+  FastSoftmaxTestParams(av1_nn_fast_softmax_16_c, 16)
+};
+INSTANTIATE_TEST_SUITE_P(C, FastSoftmaxTest,
+                         ::testing::ValuesIn(kArrayParams_c));
+
+#if HAVE_SSE3 && !CONFIG_EXCLUDE_SIMD_MISMATCH
+INSTANTIATE_TEST_SUITE_P(
+    SSE3, FastSoftmaxTest,
+    ::testing::Values(FastSoftmaxTestParams(av1_nn_fast_softmax_16_sse3, 16)));
+#endif
+}  // namespace
diff --git a/third_party/aom/test/av1_temporal_denoiser_test.cc b/third_party/aom/test/av1_temporal_denoiser_test.cc
new file mode 100644
index 0000000000..7aa8fb6a66
--- /dev/null
+++ b/third_party/aom/test/av1_temporal_denoiser_test.cc
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <tuple>
+
+#include "config/av1_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "test/register_state_check.h"
+
+#include "aom_scale/yv12config.h"
+#include "aom/aom_integer.h"
+#include "av1/common/reconinter.h"
+#include "av1/encoder/context_tree.h"
+#include "av1/encoder/av1_temporal_denoiser.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+const int kNumPixels = 128 * 128;
+
+typedef int (*Av1DenoiserFilterFunc)(const uint8_t *sig, int sig_stride,
+                                     const uint8_t *mc_avg, int mc_avg_stride,
+                                     uint8_t *avg, int avg_stride,
+                                     int increase_denoising, BLOCK_SIZE bs,
+                                     int motion_magnitude);
+typedef std::tuple<Av1DenoiserFilterFunc, BLOCK_SIZE> AV1DenoiserTestParam;
+
+class AV1DenoiserTest
+    : public ::testing::Test,
+      public ::testing::WithParamInterface<AV1DenoiserTestParam> {
+ public:
+  ~AV1DenoiserTest() override = default;
+
+  void SetUp() override { bs_ = GET_PARAM(1); }
+
+ protected:
+  BLOCK_SIZE bs_;
+};
+
+TEST_P(AV1DenoiserTest, BitexactCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = 4000;
+
+  // Allocate the space for input and output,
+  // where sig_block is the block to be denoised,
+  // mc_avg_block is the denoised reference block,
+  // avg_block_c is the denoised result from C code,
+  // avg_block_sse2 is the denoised result from SSE2 code.
+  DECLARE_ALIGNED(16, uint8_t, sig_block[kNumPixels]);
+  DECLARE_ALIGNED(16, uint8_t, mc_avg_block[kNumPixels]);
+  DECLARE_ALIGNED(16, uint8_t, avg_block_c[kNumPixels]);
+  DECLARE_ALIGNED(16, uint8_t, avg_block_sse2[kNumPixels]);
+
+  for (int i = 0; i < count_test_block; ++i) {
+    // Generate random motion magnitude, 20% of which exceed the threshold.
+    const int motion_magnitude_random =
+        rnd.Rand8() % static_cast<int>(MOTION_MAGNITUDE_THRESHOLD * 1.2);
+
+    // Initialize a test block with random number in range [0, 255].
+    for (int j = 0; j < kNumPixels; ++j) {
+      int temp = 0;
+      sig_block[j] = rnd.Rand8();
+      // The pixels in mc_avg_block are generated by adding a random
+      // number in range [-19, 19] to corresponding pixels in sig_block.
+      temp =
+          sig_block[j] + ((rnd.Rand8() % 2 == 0) ? -1 : 1) * (rnd.Rand8() % 20);
+      // Clip.
+      mc_avg_block[j] = (temp < 0) ? 0 : ((temp > 255) ? 255 : temp);
+    }
+
+    API_REGISTER_STATE_CHECK(
+        av1_denoiser_filter_c(sig_block, 128, mc_avg_block, 128, avg_block_c,
+                              128, 0, bs_, motion_magnitude_random));
+
+    API_REGISTER_STATE_CHECK(GET_PARAM(0)(sig_block, 128, mc_avg_block, 128,
+                                          avg_block_sse2, 128, 0, bs_,
+                                          motion_magnitude_random));
+
+    // Test bitexactness.
+    for (int h = 0; h < block_size_high[bs_]; ++h) {
+      for (int w = 0; w < block_size_wide[bs_]; ++w) {
+        EXPECT_EQ(avg_block_c[h * 128 + w], avg_block_sse2[h * 128 + w]);
+      }
+    }
+  }
+}
+
+using std::make_tuple;
+
+// Test for all block size.
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, AV1DenoiserTest,
+    ::testing::Values(make_tuple(&av1_denoiser_filter_sse2, BLOCK_8X8),
+                      make_tuple(&av1_denoiser_filter_sse2, BLOCK_8X16),
+                      make_tuple(&av1_denoiser_filter_sse2, BLOCK_16X8),
+                      make_tuple(&av1_denoiser_filter_sse2, BLOCK_16X16),
+                      make_tuple(&av1_denoiser_filter_sse2, BLOCK_16X32),
+                      make_tuple(&av1_denoiser_filter_sse2, BLOCK_32X16),
+                      make_tuple(&av1_denoiser_filter_sse2, BLOCK_32X32),
+                      make_tuple(&av1_denoiser_filter_sse2, BLOCK_32X64),
+                      make_tuple(&av1_denoiser_filter_sse2, BLOCK_64X32),
+                      make_tuple(&av1_denoiser_filter_sse2, BLOCK_64X64),
+                      make_tuple(&av1_denoiser_filter_sse2, BLOCK_128X64),
+                      make_tuple(&av1_denoiser_filter_sse2, BLOCK_64X128),
+                      make_tuple(&av1_denoiser_filter_sse2, BLOCK_128X128)));
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1DenoiserTest,
+    ::testing::Values(make_tuple(&av1_denoiser_filter_neon, BLOCK_8X8),
+                      make_tuple(&av1_denoiser_filter_neon, BLOCK_8X16),
+                      make_tuple(&av1_denoiser_filter_neon, BLOCK_16X8),
+                      make_tuple(&av1_denoiser_filter_neon, BLOCK_16X16),
+                      make_tuple(&av1_denoiser_filter_neon, BLOCK_16X32),
+                      make_tuple(&av1_denoiser_filter_neon, BLOCK_32X16),
+                      make_tuple(&av1_denoiser_filter_neon, BLOCK_32X32),
+                      make_tuple(&av1_denoiser_filter_neon, BLOCK_32X64),
+                      make_tuple(&av1_denoiser_filter_neon, BLOCK_64X32),
+                      make_tuple(&av1_denoiser_filter_neon, BLOCK_64X64),
+                      make_tuple(&av1_denoiser_filter_neon, BLOCK_128X64),
+                      make_tuple(&av1_denoiser_filter_neon, BLOCK_64X128),
+                      make_tuple(&av1_denoiser_filter_neon, BLOCK_128X128)));
+#endif
+}  // namespace
diff --git a/third_party/aom/test/av1_txfm_test.cc b/third_party/aom/test/av1_txfm_test.cc
new file mode 100644
index 0000000000..77c0ec1071
--- /dev/null
+++ b/third_party/aom/test/av1_txfm_test.cc
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "test/av1_txfm_test.h"
+
+#include <stdio.h>
+
+#include <memory>
+#include <new>
+
+namespace libaom_test {
+
+const char *tx_type_name[] = {
+  "DCT_DCT",
+  "ADST_DCT",
+  "DCT_ADST",
+  "ADST_ADST",
+  "FLIPADST_DCT",
+  "DCT_FLIPADST",
+  "FLIPADST_FLIPADST",
+  "ADST_FLIPADST",
+  "FLIPADST_ADST",
+  "IDTX",
+  "V_DCT",
+  "H_DCT",
+  "V_ADST",
+  "H_ADST",
+  "V_FLIPADST",
+  "H_FLIPADST",
+};
+
+int get_txfm1d_size(TX_SIZE tx_size) { return tx_size_wide[tx_size]; }
+
+void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1) {
+  switch (txfm2d_type) {
+    case DCT_DCT:
+      *type0 = TYPE_DCT;
+      *type1 = TYPE_DCT;
+      break;
+    case ADST_DCT:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_DCT;
+      break;
+    case DCT_ADST:
+      *type0 = TYPE_DCT;
+      *type1 = TYPE_ADST;
+      break;
+    case ADST_ADST:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_ADST;
+      break;
+    case FLIPADST_DCT:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_DCT;
+      break;
+    case DCT_FLIPADST:
+      *type0 = TYPE_DCT;
+      *type1 = TYPE_ADST;
+      break;
+    case FLIPADST_FLIPADST:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_ADST;
+      break;
+    case ADST_FLIPADST:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_ADST;
+      break;
+    case FLIPADST_ADST:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_ADST;
+      break;
+    case IDTX:
+      *type0 = TYPE_IDTX;
+      *type1 = TYPE_IDTX;
+      break;
+    case H_DCT:
+      *type0 = TYPE_IDTX;
+      *type1 = TYPE_DCT;
+      break;
+    case V_DCT:
+      *type0 = TYPE_DCT;
+      *type1 = TYPE_IDTX;
+      break;
+    case H_ADST:
+      *type0 = TYPE_IDTX;
+      *type1 = TYPE_ADST;
+      break;
+    case V_ADST:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_IDTX;
+      break;
+    case H_FLIPADST:
+      *type0 = TYPE_IDTX;
+      *type1 = TYPE_ADST;
+      break;
+    case V_FLIPADST:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_IDTX;
+      break;
+    default:
+      *type0 = TYPE_DCT;
+      *type1 = TYPE_DCT;
+      assert(0);
+      break;
+  }
+}
+
+double Sqrt2 = pow(2, 0.5);
+double invSqrt2 = 1 / pow(2, 0.5);
+
+double dct_matrix(double n, double k, int size) {
+  return cos(PI * (2 * n + 1) * k / (2 * size));
+}
+
+void reference_dct_1d(const double *in, double *out, int size) {
+  for (int k = 0; k < size; ++k) {
+    out[k] = 0;
+    for (int n = 0; n < size; ++n) {
+      out[k] += in[n] * dct_matrix(n, k, size);
+    }
+    if (k == 0) out[k] = out[k] * invSqrt2;
+  }
+}
+
+void reference_idct_1d(const double *in, double *out, int size) {
+  for (int k = 0; k < size; ++k) {
+    out[k] = 0;
+    for (int n = 0; n < size; ++n) {
+      if (n == 0)
+        out[k] += invSqrt2 * in[n] * dct_matrix(k, n, size);
+      else
+        out[k] += in[n] * dct_matrix(k, n, size);
+    }
+  }
+}
+
+// TODO(any): Copied from the old 'fadst4' (same as the new 'av1_fadst4'
+// function). Should be replaced by a proper reference function that takes
+// 'double' input & output.
+static void fadst4_new(const tran_low_t *input, tran_low_t *output) {
+  tran_high_t x0, x1, x2, x3;
+  tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+  x0 = input[0];
+  x1 = input[1];
+  x2 = input[2];
+  x3 = input[3];
+
+  if (!(x0 | x1 | x2 | x3)) {
+    output[0] = output[1] = output[2] = output[3] = 0;
+    return;
+  }
+
+  s0 = sinpi_1_9 * x0;
+  s1 = sinpi_4_9 * x0;
+  s2 = sinpi_2_9 * x1;
+  s3 = sinpi_1_9 * x1;
+  s4 = sinpi_3_9 * x2;
+  s5 = sinpi_4_9 * x3;
+  s6 = sinpi_2_9 * x3;
+  s7 = x0 + x1 - x3;
+
+  x0 = s0 + s2 + s5;
+  x1 = sinpi_3_9 * s7;
+  x2 = s1 - s3 + s6;
+  x3 = s4;
+
+  s0 = x0 + x3;
+  s1 = x1;
+  s2 = x2 - x3;
+  s3 = x2 - x0 + x3;
+
+  // 1-D transform scaling factor is sqrt(2).
+  output[0] = (tran_low_t)fdct_round_shift(s0);
+  output[1] = (tran_low_t)fdct_round_shift(s1);
+  output[2] = (tran_low_t)fdct_round_shift(s2);
+  output[3] = (tran_low_t)fdct_round_shift(s3);
+}
+
+void reference_adst_1d(const double *in, double *out, int size) {
+  if (size == 4) {  // Special case.
+    tran_low_t int_input[4];
+    for (int i = 0; i < 4; ++i) {
+      int_input[i] = static_cast<tran_low_t>(round(in[i]));
+    }
+    tran_low_t int_output[4];
+    fadst4_new(int_input, int_output);
+    for (int i = 0; i < 4; ++i) {
+      out[i] = int_output[i];
+    }
+    return;
+  }
+
+  for (int k = 0; k < size; ++k) {
+    out[k] = 0;
+    for (int n = 0; n < size; ++n) {
+      out[k] += in[n] * sin(PI * (2 * n + 1) * (2 * k + 1) / (4 * size));
+    }
+  }
+}
+
+void reference_idtx_1d(const double *in, double *out, int size) {
+  double scale = 0;
+  if (size == 4)
+    scale = Sqrt2;
+  else if (size == 8)
+    scale = 2;
+  else if (size == 16)
+    scale = 2 * Sqrt2;
+  else if (size == 32)
+    scale = 4;
+  else if (size == 64)
+    scale = 4 * Sqrt2;
+  for (int k = 0; k < size; ++k) {
+    out[k] = in[k] * scale;
+  }
+}
+
+void reference_hybrid_1d(double *in, double *out, int size, int type) {
+  if (type == TYPE_DCT)
+    reference_dct_1d(in, out, size);
+  else if (type == TYPE_ADST)
+    reference_adst_1d(in, out, size);
+  else
+    reference_idtx_1d(in, out, size);
+}
+
+double get_amplification_factor(TX_TYPE tx_type, TX_SIZE tx_size) {
+  TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg;
+  av1_get_fwd_txfm_cfg(tx_type, tx_size, &fwd_txfm_flip_cfg);
+  const int tx_width = tx_size_wide[fwd_txfm_flip_cfg.tx_size];
+  const int tx_height = tx_size_high[fwd_txfm_flip_cfg.tx_size];
+  const int8_t *shift = fwd_txfm_flip_cfg.shift;
+  const int amplify_bit = shift[0] + shift[1] + shift[2];
+  double amplify_factor =
+      amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
+
+  // For rectangular transforms, we need to multiply by an extra factor.
+  const int rect_type = get_rect_tx_log_ratio(tx_width, tx_height);
+  if (abs(rect_type) == 1) {
+    amplify_factor *= pow(2, 0.5);
+  }
+  return amplify_factor;
+}
+
+void reference_hybrid_2d(double *in, double *out, TX_TYPE tx_type,
+                         TX_SIZE tx_size) {
+  // Get transform type and size of each dimension.
+  TYPE_TXFM type0;
+  TYPE_TXFM type1;
+  get_txfm1d_type(tx_type, &type0, &type1);
+  const int tx_width = tx_size_wide[tx_size];
+  const int tx_height = tx_size_high[tx_size];
+
+  std::unique_ptr<double[]> temp_in(
+      new (std::nothrow) double[AOMMAX(tx_width, tx_height)]);
+  std::unique_ptr<double[]> temp_out(
+      new (std::nothrow) double[AOMMAX(tx_width, tx_height)]);
+  std::unique_ptr<double[]> out_interm(
+      new (std::nothrow) double[tx_width * tx_height]);
+  ASSERT_NE(temp_in, nullptr);
+  ASSERT_NE(temp_out, nullptr);
+  ASSERT_NE(out_interm, nullptr);
+
+  // Transform columns.
+  for (int c = 0; c < tx_width; ++c) {
+    for (int r = 0; r < tx_height; ++r) {
+      temp_in[r] = in[r * tx_width + c];
+    }
+    reference_hybrid_1d(temp_in.get(), temp_out.get(), tx_height, type0);
+    for (int r = 0; r < tx_height; ++r) {
+      out_interm[r * tx_width + c] = temp_out[r];
+    }
+  }
+
+  // Transform rows.
+  for (int r = 0; r < tx_height; ++r) {
+    reference_hybrid_1d(out_interm.get() + r * tx_width, temp_out.get(),
+                        tx_width, type1);
+    for (int c = 0; c < tx_width; ++c) {
+      out[c * tx_height + r] = temp_out[c];
+    }
+  }
+
+  // These transforms use an approximate 2D DCT transform, by only keeping the
+  // top-left quarter of the coefficients, and repacking them in the first
+  // quarter indices.
+  // TODO(urvang): Refactor this code.
+  if (tx_width == 64 && tx_height == 64) {  // tx_size == TX_64X64
+    // Zero out top-right 32x32 area.
+    for (int col = 0; col < 32; ++col) {
+      memset(out + col * 64 + 32, 0, 32 * sizeof(*out));
+    }
+    // Zero out the bottom 64x32 area.
+    memset(out + 32 * 64, 0, 32 * 64 * sizeof(*out));
+    // Re-pack non-zero coeffs in the first 32x32 indices.
+    for (int col = 1; col < 32; ++col) {
+      memcpy(out + col * 32, out + col * 64, 32 * sizeof(*out));
+    }
+  } else if (tx_width == 32 && tx_height == 64) {  // tx_size == TX_32X64
+    // Zero out right 32x32 area.
+    for (int col = 0; col < 32; ++col) {
+      memset(out + col * 64 + 32, 0, 32 * sizeof(*out));
+    }
+    // Re-pack non-zero coeffs in the first 32x32 indices.
+    for (int col = 1; col < 32; ++col) {
+      memcpy(out + col * 32, out + col * 64, 32 * sizeof(*out));
+    }
+  } else if (tx_width == 64 && tx_height == 32) {  // tx_size == TX_64X32
+    // Zero out the bottom 32x32 area.
+    memset(out + 32 * 32, 0, 32 * 32 * sizeof(*out));
+    // Note: no repacking needed here.
+  } else if (tx_width == 16 && tx_height == 64) {  // tx_size == TX_16X64
+    // Note: no repacking needed here.
+    // Zero out right 32x16 area.
+    for (int col = 0; col < 16; ++col) {
+      memset(out + col * 64 + 32, 0, 32 * sizeof(*out));
+    }
+    // Re-pack non-zero coeffs in the first 32x16 indices.
+    for (int col = 1; col < 16; ++col) {
+      memcpy(out + col * 32, out + col * 64, 32 * sizeof(*out));
+    }
+  } else if (tx_width == 64 && tx_height == 16) {  // tx_size == TX_64X16
+    // Zero out the bottom 16x32 area.
+    memset(out + 16 * 32, 0, 16 * 32 * sizeof(*out));
+  }
+
+  // Apply appropriate scale.
+  const double amplify_factor = get_amplification_factor(tx_type, tx_size);
+  for (int c = 0; c < tx_width; ++c) {
+    for (int r = 0; r < tx_height; ++r) {
+      out[c * tx_height + r] *= amplify_factor;
+    }
+  }
+}
+
+template <typename Type>
+void fliplr(Type *dest, int width, int height, int stride) {
+  for (int r = 0; r < height; ++r) {
+    for (int c = 0; c < width / 2; ++c) {
+      const Type tmp = dest[r * stride + c];
+      dest[r * stride + c] = dest[r * stride + width - 1 - c];
+      dest[r * stride + width - 1 - c] = tmp;
+    }
+  }
+}
+
+template <typename Type>
+void flipud(Type *dest, int width, int height, int stride) {
+  for (int c = 0; c < width; ++c) {
+    for (int r = 0; r < height / 2; ++r) {
+      const Type tmp = dest[r * stride + c];
+      dest[r * stride + c] = dest[(height - 1 - r) * stride + c];
+      dest[(height - 1 - r) * stride + c] = tmp;
+    }
+  }
+}
+
+template <typename Type>
+void fliplrud(Type *dest, int width, int height, int stride) {
+  for (int r = 0; r < height / 2; ++r) {
+    for (int c = 0; c < width; ++c) {
+      const Type tmp = dest[r * stride + c];
+      dest[r * stride + c] = dest[(height - 1 - r) * stride + width - 1 - c];
+      dest[(height - 1 - r) * stride + width - 1 - c] = tmp;
+    }
+  }
+}
+
+template void fliplr<double>(double *dest, int width, int height, int stride);
+template void flipud<double>(double *dest, int width, int height, int stride);
+template void fliplrud<double>(double *dest, int width, int height, int stride);
+
+int bd_arr[BD_NUM] = { 8, 10, 12 };
+
+int8_t low_range_arr[BD_NUM] = { 18, 32, 32 };
+int8_t high_range_arr[BD_NUM] = { 32, 32, 32 };
+
+void txfm_stage_range_check(const int8_t *stage_range, int stage_num,
+                            int8_t cos_bit, int low_range, int high_range) {
+  for (int i = 0; i < stage_num; ++i) {
+    EXPECT_LE(stage_range[i], low_range);
+    ASSERT_LE(stage_range[i] + cos_bit, high_range) << "stage = " << i;
+  }
+  for (int i = 0; i < stage_num - 1; ++i) {
+    // make sure there is no overflow while doing half_btf()
+    ASSERT_LE(stage_range[i + 1] + cos_bit, high_range) << "stage = " << i;
+  }
+}
+}  // namespace libaom_test
diff --git a/third_party/aom/test/av1_txfm_test.h b/third_party/aom/test/av1_txfm_test.h
new file mode 100644
index 0000000000..d285e3d637
--- /dev/null
+++ b/third_party/aom/test/av1_txfm_test.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_TEST_AV1_TXFM_TEST_H_
+#define AOM_TEST_AV1_TXFM_TEST_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef _MSC_VER
+#define _USE_MATH_DEFINES
+#endif
+#include <math.h>
+
+#include "config/av1_rtcd.h"
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "av1/common/av1_txfm.h"
+#include "av1/common/blockd.h"
+#include "av1/common/enums.h"
+
+namespace libaom_test {
+
+extern const char *tx_type_name[];
+
+enum {
+  TYPE_DCT = 0,
+  TYPE_ADST,
+  TYPE_IDTX,
+  TYPE_IDCT,
+  TYPE_IADST,
+  TYPE_LAST
+} UENUM1BYTE(TYPE_TXFM);
+
+int get_txfm1d_size(TX_SIZE tx_size);
+
+void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1);
+
+void reference_dct_1d(const double *in, double *out, int size);
+void reference_idct_1d(const double *in, double *out, int size);
+
+void reference_adst_1d(const double *in, double *out, int size);
+
+void reference_hybrid_1d(double *in, double *out, int size, int type);
+
+double get_amplification_factor(TX_TYPE tx_type, TX_SIZE tx_size);
+
+void reference_hybrid_2d(double *in, double *out, TX_TYPE tx_type,
+                         TX_SIZE tx_size);
+template <typename Type1, typename Type2>
+static double compute_avg_abs_error(const Type1 *a, const Type2 *b,
+                                    const int size) {
+  double error = 0;
+  for (int i = 0; i < size; i++) {
+    error += fabs(static_cast<double>(a[i]) - static_cast<double>(b[i]));
+  }
+  error = error / size;
+  return error;
+}
+
+template <typename Type>
+void fliplr(Type *dest, int width, int height, int stride);
+
+template <typename Type>
+void flipud(Type *dest, int width, int height, int stride);
+
+template <typename Type>
+void fliplrud(Type *dest, int width, int height, int stride);
+
+typedef void (*TxfmFunc)(const int32_t *in, int32_t *out, const int8_t cos_bit,
+                         const int8_t *range_bit);
+
+typedef void (*InvTxfm2dFunc)(const int32_t *, uint16_t *, int, TX_TYPE, int);
+typedef void (*LbdInvTxfm2dFunc)(const int32_t *, uint8_t *, int, TX_TYPE,
+                                 TX_SIZE, int);
+
+static const int bd = 10;
+static const int input_base = (1 << bd);
+
+static INLINE bool IsTxSizeTypeValid(TX_SIZE tx_size, TX_TYPE tx_type) {
+  const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
+  TxSetType tx_set_type;
+  if (tx_size_sqr_up > TX_32X32) {
+    tx_set_type = EXT_TX_SET_DCTONLY;
+  } else if (tx_size_sqr_up == TX_32X32) {
+    tx_set_type = EXT_TX_SET_DCT_IDTX;
+  } else {
+    tx_set_type = EXT_TX_SET_ALL16;
+  }
+  return av1_ext_tx_used[tx_set_type][tx_type] != 0;
+}
+
+#if CONFIG_AV1_ENCODER
+#if !CONFIG_REALTIME_ONLY
+static const FwdTxfm2dFunc fwd_txfm_func_ls[TX_SIZES_ALL] = {
+  av1_fwd_txfm2d_4x4_c,   av1_fwd_txfm2d_8x8_c,   av1_fwd_txfm2d_16x16_c,
+  av1_fwd_txfm2d_32x32_c, av1_fwd_txfm2d_64x64_c, av1_fwd_txfm2d_4x8_c,
+  av1_fwd_txfm2d_8x4_c,   av1_fwd_txfm2d_8x16_c,  av1_fwd_txfm2d_16x8_c,
+  av1_fwd_txfm2d_16x32_c, av1_fwd_txfm2d_32x16_c, av1_fwd_txfm2d_32x64_c,
+  av1_fwd_txfm2d_64x32_c, av1_fwd_txfm2d_4x16_c,  av1_fwd_txfm2d_16x4_c,
+  av1_fwd_txfm2d_8x32_c,  av1_fwd_txfm2d_32x8_c,  av1_fwd_txfm2d_16x64_c,
+  av1_fwd_txfm2d_64x16_c,
+};
+#else
+static const FwdTxfm2dFunc fwd_txfm_func_ls[TX_SIZES_ALL] = {
+  av1_fwd_txfm2d_4x4_c,
+  av1_fwd_txfm2d_8x8_c,
+  av1_fwd_txfm2d_16x16_c,
+  av1_fwd_txfm2d_32x32_c,
+  av1_fwd_txfm2d_64x64_c,
+  av1_fwd_txfm2d_4x8_c,
+  av1_fwd_txfm2d_8x4_c,
+  av1_fwd_txfm2d_8x16_c,
+  av1_fwd_txfm2d_16x8_c,
+  av1_fwd_txfm2d_16x32_c,
+  av1_fwd_txfm2d_32x16_c,
+  av1_fwd_txfm2d_32x64_c,
+  av1_fwd_txfm2d_64x32_c,
+  nullptr,
+  av1_fwd_txfm2d_16x4_c,
+  nullptr,
+  nullptr,
+  nullptr,
+  nullptr,
+};
+#endif
+#endif
+
+static const InvTxfm2dFunc inv_txfm_func_ls[TX_SIZES_ALL] = {
+  av1_inv_txfm2d_add_4x4_c,   av1_inv_txfm2d_add_8x8_c,
+  av1_inv_txfm2d_add_16x16_c, av1_inv_txfm2d_add_32x32_c,
+  av1_inv_txfm2d_add_64x64_c, av1_inv_txfm2d_add_4x8_c,
+  av1_inv_txfm2d_add_8x4_c,   av1_inv_txfm2d_add_8x16_c,
+  av1_inv_txfm2d_add_16x8_c,  av1_inv_txfm2d_add_16x32_c,
+  av1_inv_txfm2d_add_32x16_c, av1_inv_txfm2d_add_32x64_c,
+  av1_inv_txfm2d_add_64x32_c, av1_inv_txfm2d_add_4x16_c,
+  av1_inv_txfm2d_add_16x4_c,  av1_inv_txfm2d_add_8x32_c,
+  av1_inv_txfm2d_add_32x8_c,  av1_inv_txfm2d_add_16x64_c,
+  av1_inv_txfm2d_add_64x16_c,
+};
+
+#define BD_NUM 3
+
+extern int bd_arr[];
+extern int8_t low_range_arr[];
+extern int8_t high_range_arr[];
+
+void txfm_stage_range_check(const int8_t *stage_range, int stage_num,
+                            const int8_t cos_bit, int low_range,
+                            int high_range);
+}  // namespace libaom_test
+#endif  // AOM_TEST_AV1_TXFM_TEST_H_
diff --git a/third_party/aom/test/av1_wedge_utils_test.cc b/third_party/aom/test/av1_wedge_utils_test.cc
new file mode 100644
index 0000000000..1055ff35b2
--- /dev/null
+++ b/third_party/aom/test/av1_wedge_utils_test.cc
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_dsp/aom_dsp_common.h"
+
+#include "av1/common/enums.h"
+
+#include "test/acm_random.h"
+#include "test/function_equivalence_test.h"
+#include "test/register_state_check.h"
+
+#define WEDGE_WEIGHT_BITS 6
+#define MAX_MASK_VALUE (1 << (WEDGE_WEIGHT_BITS))
+
+using libaom_test::ACMRandom;
+using libaom_test::FunctionEquivalenceTest;
+
+namespace {
+
+static const int16_t kInt13Max = (1 << 12) - 1;
+
+//////////////////////////////////////////////////////////////////////////////
+// av1_wedge_sse_from_residuals - functionality
+//////////////////////////////////////////////////////////////////////////////
+
+class WedgeUtilsSSEFuncTest : public testing::Test {
+ protected:
+  WedgeUtilsSSEFuncTest() : rng_(ACMRandom::DeterministicSeed()) {}
+
+  static const int kIterations = 1000;
+
+  ACMRandom rng_;
+};
+
+static void equiv_blend_residuals(int16_t *r, const int16_t *r0,
+                                  const int16_t *r1, const uint8_t *m, int N) {
+  for (int i = 0; i < N; i++) {
+    const int32_t m0 = m[i];
+    const int32_t m1 = MAX_MASK_VALUE - m0;
+    const int16_t R = m0 * r0[i] + m1 * r1[i];
+    // Note that this rounding is designed to match the result
+    // you would get when actually blending the 2 predictors and computing
+    // the residuals.
+    r[i] = ROUND_POWER_OF_TWO(R - 1, WEDGE_WEIGHT_BITS);
+  }
+}
+
+static uint64_t equiv_sse_from_residuals(const int16_t *r0, const int16_t *r1,
+                                         const uint8_t *m, int N) {
+  uint64_t acc = 0;
+  for (int i = 0; i < N; i++) {
+    const int32_t m0 = m[i];
+    const int32_t m1 = MAX_MASK_VALUE - m0;
+    const int16_t R = m0 * r0[i] + m1 * r1[i];
+    const int32_t r = ROUND_POWER_OF_TWO(R - 1, WEDGE_WEIGHT_BITS);
+    acc += r * r;
+  }
+  return acc;
+}
+
+TEST_F(WedgeUtilsSSEFuncTest, ResidualBlendingEquiv) {
+  DECLARE_ALIGNED(32, uint8_t, s[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, p0[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, p1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, p[MAX_SB_SQUARE]);
+
+  DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, r_ref[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, r_tst[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      s[i] = rng_.Rand8();
+      m[i] = rng_(MAX_MASK_VALUE + 1);
+    }
+
+    const int w = 1 << (rng_(MAX_SB_SIZE_LOG2 + 1 - 3) + 3);
+    const int h = 1 << (rng_(MAX_SB_SIZE_LOG2 + 1 - 3) + 3);
+    const int N = w * h;
+
+    for (int j = 0; j < N; j++) {
+      p0[j] = clamp(s[j] + rng_(33) - 16, 0, UINT8_MAX);
+      p1[j] = clamp(s[j] + rng_(33) - 16, 0, UINT8_MAX);
+    }
+
+    aom_blend_a64_mask(p, w, p0, w, p1, w, m, w, w, h, 0, 0);
+
+    aom_subtract_block(h, w, r0, w, s, w, p0, w);
+    aom_subtract_block(h, w, r1, w, s, w, p1, w);
+
+    aom_subtract_block(h, w, r_ref, w, s, w, p, w);
+    equiv_blend_residuals(r_tst, r0, r1, m, N);
+
+    for (int i = 0; i < N; ++i) ASSERT_EQ(r_ref[i], r_tst[i]);
+
+    uint64_t ref_sse = aom_sum_squares_i16(r_ref, N);
+    uint64_t tst_sse = equiv_sse_from_residuals(r0, r1, m, N);
+
+    ASSERT_EQ(ref_sse, tst_sse);
+  }
+}
+
+static uint64_t sse_from_residuals(const int16_t *r0, const int16_t *r1,
+                                   const uint8_t *m, int N) {
+  uint64_t acc = 0;
+  for (int i = 0; i < N; i++) {
+    const int32_t m0 = m[i];
+    const int32_t m1 = MAX_MASK_VALUE - m0;
+    const int32_t r = m0 * r0[i] + m1 * r1[i];
+    acc += r * r;
+  }
+  return ROUND_POWER_OF_TWO(acc, 2 * WEDGE_WEIGHT_BITS);
+}
+
+TEST_F(WedgeUtilsSSEFuncTest, ResidualBlendingMethod) {
+  DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      r1[i] = rng_(2 * INT8_MAX - 2 * INT8_MIN + 1) + 2 * INT8_MIN;
+      d[i] = rng_(2 * INT8_MAX - 2 * INT8_MIN + 1) + 2 * INT8_MIN;
+      m[i] = rng_(MAX_MASK_VALUE + 1);
+    }
+
+    const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1);
+
+    for (int i = 0; i < N; i++) r0[i] = r1[i] + d[i];
+
+    const uint64_t ref_res = sse_from_residuals(r0, r1, m, N);
+    const uint64_t tst_res = av1_wedge_sse_from_residuals(r1, d, m, N);
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// av1_wedge_sse_from_residuals - optimizations
+//////////////////////////////////////////////////////////////////////////////
+
+typedef uint64_t (*FSSE)(const int16_t *r1, const int16_t *d, const uint8_t *m,
+                         int N);
+typedef libaom_test::FuncParam<FSSE> TestFuncsFSSE;
+
+class WedgeUtilsSSEOptTest : public FunctionEquivalenceTest<FSSE> {
+ protected:
+  static const int kIterations = 10000;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(WedgeUtilsSSEOptTest);
+
+TEST_P(WedgeUtilsSSEOptTest, RandomValues) {
+  DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      r1[i] = rng_(2 * kInt13Max + 1) - kInt13Max;
+      d[i] = rng_(2 * kInt13Max + 1) - kInt13Max;
+      m[i] = rng_(MAX_MASK_VALUE + 1);
+    }
+
+    const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1);
+
+    const uint64_t ref_res = params_.ref_func(r1, d, m, N);
+    uint64_t tst_res;
+    API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(r1, d, m, N));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+TEST_P(WedgeUtilsSSEOptTest, ExtremeValues) {
+  DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    if (rng_(2)) {
+      for (int i = 0; i < MAX_SB_SQUARE; ++i) r1[i] = kInt13Max;
+    } else {
+      for (int i = 0; i < MAX_SB_SQUARE; ++i) r1[i] = -kInt13Max;
+    }
+
+    if (rng_(2)) {
+      for (int i = 0; i < MAX_SB_SQUARE; ++i) d[i] = kInt13Max;
+    } else {
+      for (int i = 0; i < MAX_SB_SQUARE; ++i) d[i] = -kInt13Max;
+    }
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) m[i] = MAX_MASK_VALUE;
+
+    const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1);
+
+    const uint64_t ref_res = params_.ref_func(r1, d, m, N);
+    uint64_t tst_res;
+    API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(r1, d, m, N));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// av1_wedge_sign_from_residuals
+//////////////////////////////////////////////////////////////////////////////
+
+typedef int8_t (*FSign)(const int16_t *ds, const uint8_t *m, int N,
+                        int64_t limit);
+typedef libaom_test::FuncParam<FSign> TestFuncsFSign;
+
+class WedgeUtilsSignOptTest : public FunctionEquivalenceTest<FSign> {
+ protected:
+  static const int kIterations = 10000;
+  static const int kMaxSize = 8196;  // Size limited by SIMD implementation.
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(WedgeUtilsSignOptTest);
+
+TEST_P(WedgeUtilsSignOptTest, RandomValues) {
+  DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      r0[i] = rng_(2 * kInt13Max + 1) - kInt13Max;
+      r1[i] = rng_(2 * kInt13Max + 1) - kInt13Max;
+      m[i] = rng_(MAX_MASK_VALUE + 1);
+    }
+
+    const int maxN = AOMMIN(kMaxSize, MAX_SB_SQUARE);
+    const int N = 64 * (rng_(maxN / 64 - 1) + 1);
+
+    int64_t limit;
+    limit = (int64_t)aom_sum_squares_i16(r0, N);
+    limit -= (int64_t)aom_sum_squares_i16(r1, N);
+    limit *= (1 << WEDGE_WEIGHT_BITS) / 2;
+
+    for (int i = 0; i < N; i++)
+      ds[i] = clamp(r0[i] * r0[i] - r1[i] * r1[i], INT16_MIN, INT16_MAX);
+
+    const int ref_res = params_.ref_func(ds, m, N, limit);
+    int tst_res;
+    API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(ds, m, N, limit));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+TEST_P(WedgeUtilsSignOptTest, ExtremeValues) {
+  DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    switch (rng_(4)) {
+      case 0:
+        for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+          r0[i] = 0;
+          r1[i] = kInt13Max;
+        }
+        break;
+      case 1:
+        for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+          r0[i] = kInt13Max;
+          r1[i] = 0;
+        }
+        break;
+      case 2:
+        for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+          r0[i] = 0;
+          r1[i] = -kInt13Max;
+        }
+        break;
+      default:
+        for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+          r0[i] = -kInt13Max;
+          r1[i] = 0;
+        }
+        break;
+    }
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) m[i] = MAX_MASK_VALUE;
+
+    const int maxN = AOMMIN(kMaxSize, MAX_SB_SQUARE);
+    const int N = 64 * (rng_(maxN / 64 - 1) + 1);
+
+    int64_t limit;
+    limit = (int64_t)aom_sum_squares_i16(r0, N);
+    limit -= (int64_t)aom_sum_squares_i16(r1, N);
+    limit *= (1 << WEDGE_WEIGHT_BITS) / 2;
+
+    for (int i = 0; i < N; i++)
+      ds[i] = clamp(r0[i] * r0[i] - r1[i] * r1[i], INT16_MIN, INT16_MAX);
+
+    const int ref_res = params_.ref_func(ds, m, N, limit);
+    int tst_res;
+    API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(ds, m, N, limit));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// av1_wedge_compute_delta_squares
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*FDS)(int16_t *d, const int16_t *a, const int16_t *b, int N);
+typedef libaom_test::FuncParam<FDS> TestFuncsFDS;
+
+class WedgeUtilsDeltaSquaresOptTest : public FunctionEquivalenceTest<FDS> {
+ protected:
+  static const int kIterations = 10000;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(WedgeUtilsDeltaSquaresOptTest);
+
+TEST_P(WedgeUtilsDeltaSquaresOptTest, RandomValues) {
+  DECLARE_ALIGNED(32, int16_t, a[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, b[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, d_ref[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, d_tst[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      a[i] = rng_.Rand16Signed();
+      b[i] = rng_(2 * INT16_MAX + 1) - INT16_MAX;
+    }
+
+    const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1);
+
+    memset(&d_ref, INT16_MAX, sizeof(d_ref));
+    memset(&d_tst, INT16_MAX, sizeof(d_tst));
+
+    params_.ref_func(d_ref, a, b, N);
+    API_REGISTER_STATE_CHECK(params_.tst_func(d_tst, a, b, N));
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) ASSERT_EQ(d_ref[i], d_tst[i]);
+  }
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, WedgeUtilsSSEOptTest,
+    ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_c,
+                                    av1_wedge_sse_from_residuals_sse2)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, WedgeUtilsSignOptTest,
+    ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_c,
+                                     av1_wedge_sign_from_residuals_sse2)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, WedgeUtilsDeltaSquaresOptTest,
+    ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_c,
+                                   av1_wedge_compute_delta_squares_sse2)));
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, WedgeUtilsSSEOptTest,
+    ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_c,
+                                    av1_wedge_sse_from_residuals_neon)));
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON, WedgeUtilsSignOptTest,
+    ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_c,
+                                     av1_wedge_sign_from_residuals_neon)));
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON, WedgeUtilsDeltaSquaresOptTest,
+    ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_c,
+                                   av1_wedge_compute_delta_squares_neon)));
+#endif  // HAVE_NEON
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, WedgeUtilsSSEOptTest,
+    ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_sse2,
+                                    av1_wedge_sse_from_residuals_avx2)));
+
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, WedgeUtilsSignOptTest,
+    ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_sse2,
+                                     av1_wedge_sign_from_residuals_avx2)));
+
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, WedgeUtilsDeltaSquaresOptTest,
+    ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_sse2,
+                                   av1_wedge_compute_delta_squares_avx2)));
+#endif  // HAVE_AVX2
+
+}  // namespace
diff --git a/third_party/aom/test/avg_test.cc b/third_party/aom/test/avg_test.cc
new file mode 100644
index 0000000000..6f4c2ff332
--- /dev/null
+++ b/third_party/aom/test/avg_test.cc
@@ -0,0 +1,1150 @@
+/*
+ *  Copyright (c) 2019, Alliance for Open Media. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <ostream>
+#include <string>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+
+using libaom_test::ACMRandom;
+
+template <typename Pixel>
+class AverageTestBase : public ::testing::Test {
+ public:
+  AverageTestBase(int width, int height, int bit_depth = 8)
+      : width_(width), height_(height), source_data_(nullptr),
+        source_stride_(0), bit_depth_(bit_depth) {}
+
+  void TearDown() override {
+    aom_free(source_data_);
+    source_data_ = nullptr;
+  }
+
+ protected:
+  // Handle blocks up to 4 blocks 64x64 with stride up to 128
+  static const int kDataAlignment = 16;
+  static const int kDataBlockWidth = 128;
+  static const int kDataBlockHeight = 128;
+  static const int kDataBlockSize = kDataBlockWidth * kDataBlockHeight;
+
+  void SetUp() override {
+    const testing::TestInfo *const test_info =
+        testing::UnitTest::GetInstance()->current_test_info();
+    // Skip the speed test for C code as the baseline uses the same function.
+    if (std::string(test_info->test_suite_name()).find("C/") == 0 &&
+        std::string(test_info->name()).find("DISABLED_Speed") !=
+            std::string::npos) {
+      GTEST_SKIP();
+    }
+
+    source_data_ = static_cast<Pixel *>(
+        aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
+    ASSERT_NE(source_data_, nullptr);
+    memset(source_data_, 0, kDataBlockSize * sizeof(source_data_[0]));
+    source_stride_ = (width_ + 31) & ~31;
+    bit_depth_ = 8;
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+  // Sum Pixels
+  static unsigned int ReferenceAverage8x8(const Pixel *source, int pitch) {
+    unsigned int average = 0;
+    for (int h = 0; h < 8; ++h) {
+      for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
+    }
+    return (average + 32) >> 6;
+  }
+
+  static void ReferenceAverage8x8_quad(const uint8_t *source, int pitch,
+                                       int x16_idx, int y16_idx, int *avg) {
+    for (int k = 0; k < 4; k++) {
+      int average = 0;
+      int x8_idx = x16_idx + ((k & 1) << 3);
+      int y8_idx = y16_idx + ((k >> 1) << 3);
+      for (int h = 0; h < 8; ++h) {
+        for (int w = 0; w < 8; ++w)
+          average += source[(h + y8_idx) * pitch + w + x8_idx];
+      }
+      avg[k] = (average + 32) >> 6;
+    }
+  }
+
+  static unsigned int ReferenceAverage4x4(const Pixel *source, int pitch) {
+    unsigned int average = 0;
+    for (int h = 0; h < 4; ++h) {
+      for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
+    }
+    return (average + 8) >> 4;
+  }
+
+  void FillConstant(Pixel fill_constant) {
+    for (int i = 0; i < width_ * height_; ++i) {
+      source_data_[i] = fill_constant;
+    }
+  }
+
+  void FillRandom() {
+    for (int i = 0; i < width_ * height_; ++i) {
+      source_data_[i] = rnd_.Rand16() & ((1 << bit_depth_) - 1);
+    }
+  }
+
+  int width_, height_;
+  Pixel *source_data_;
+  int source_stride_;
+  int bit_depth_;
+
+  ACMRandom rnd_;
+};
+typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch);
+
+// Arguments: width, height, bit_depth, buffer start offset, block size, avg
+// function.
+typedef std::tuple<int, int, int, int, int, AverageFunction> AvgFunc;
+
+template <typename Pixel>
+class AverageTest : public AverageTestBase<Pixel>,
+                    public ::testing::WithParamInterface<AvgFunc> {
+ public:
+  AverageTest()
+      : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {}
+
+ protected:
+  using AverageTestBase<Pixel>::source_data_;
+  using AverageTestBase<Pixel>::source_stride_;
+  using AverageTestBase<Pixel>::ReferenceAverage8x8;
+  using AverageTestBase<Pixel>::ReferenceAverage4x4;
+  using AverageTestBase<Pixel>::FillConstant;
+  using AverageTestBase<Pixel>::FillRandom;
+
+  void CheckAverages() {
+    const int block_size = GET_PARAM(4);
+    unsigned int expected = 0;
+
+    // The reference frame, but not the source frame, may be unaligned for
+    // certain types of searches.
+    const Pixel *const src = source_data_ + GET_PARAM(3);
+    if (block_size == 8) {
+      expected = ReferenceAverage8x8(src, source_stride_);
+    } else if (block_size == 4) {
+      expected = ReferenceAverage4x4(src, source_stride_);
+    }
+
+    aom_usec_timer timer;
+    unsigned int actual;
+    if (sizeof(Pixel) == 2) {
+#if CONFIG_AV1_HIGHBITDEPTH
+      AverageFunction avg_c =
+          (block_size == 8) ? aom_highbd_avg_8x8_c : aom_highbd_avg_4x4_c;
+      // To avoid differences in optimization with the local Reference*()
+      // functions the C implementation is used as a baseline.
+      aom_usec_timer_start(&timer);
+      avg_c(CONVERT_TO_BYTEPTR(src), source_stride_);
+      aom_usec_timer_mark(&timer);
+      ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
+
+      AverageFunction avg_opt = GET_PARAM(5);
+      API_REGISTER_STATE_CHECK(
+          aom_usec_timer_start(&timer);
+          actual = avg_opt(CONVERT_TO_BYTEPTR(src), source_stride_);
+          aom_usec_timer_mark(&timer));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+    } else {
+      ASSERT_EQ(sizeof(Pixel), 1u);
+
+      AverageFunction avg_c = (block_size == 8) ? aom_avg_8x8_c : aom_avg_4x4_c;
+      aom_usec_timer_start(&timer);
+      avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_);
+      aom_usec_timer_mark(&timer);
+      ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
+
+      AverageFunction avg_opt = GET_PARAM(5);
+      API_REGISTER_STATE_CHECK(
+          aom_usec_timer_start(&timer);
+          actual =
+              avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_);
+          aom_usec_timer_mark(&timer));
+    }
+    opt_elapsed_time_ += aom_usec_timer_elapsed(&timer);
+
+    EXPECT_EQ(expected, actual);
+  }
+
+  void TestConstantValue(Pixel value) {
+    FillConstant(value);
+    CheckAverages();
+  }
+
+  void TestRandom(int iterations = 1000) {
+    for (int i = 0; i < iterations; i++) {
+      FillRandom();
+      CheckAverages();
+    }
+  }
+
+  void PrintTimingStats() const {
+    printf(
+        "block_size = %d \t ref_time = %d \t simd_time = %d \t Gain = %4.2f\n",
+        GET_PARAM(4), static_cast<int>(ref_elapsed_time_),
+        static_cast<int>(opt_elapsed_time_),
+        (static_cast<float>(ref_elapsed_time_) /
+         static_cast<float>(opt_elapsed_time_)));
+  }
+
+  int64_t ref_elapsed_time_ = 0;
+  int64_t opt_elapsed_time_ = 0;
+};
+
+typedef void (*AverageFunction_8x8_quad)(const uint8_t *s, int pitch, int x_idx,
+                                         int y_idx, int *avg);
+
+// Arguments: width, height, bit_depth, buffer start offset, block size, avg
+// function.
+typedef std::tuple<int, int, int, int, int, AverageFunction_8x8_quad>
+    AvgFunc_8x8_quad;
+
+template <typename Pixel>
+class AverageTest_8x8_quad
+    : public AverageTestBase<Pixel>,
+      public ::testing::WithParamInterface<AvgFunc_8x8_quad> {
+ public:
+  AverageTest_8x8_quad()
+      : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {}
+
+ protected:
+  using AverageTestBase<Pixel>::source_data_;
+  using AverageTestBase<Pixel>::source_stride_;
+  using AverageTestBase<Pixel>::ReferenceAverage8x8_quad;
+  using AverageTestBase<Pixel>::FillConstant;
+  using AverageTestBase<Pixel>::FillRandom;
+
+  void CheckAveragesAt(int iterations, int x16_idx, int y16_idx) {
+    ASSERT_EQ(sizeof(Pixel), 1u);
+    const int block_size = GET_PARAM(4);
+    (void)block_size;
+    int expected[4] = { 0 };
+
+    // The reference frame, but not the source frame, may be unaligned for
+    // certain types of searches.
+    const Pixel *const src = source_data_ + GET_PARAM(3);
+    ReferenceAverage8x8_quad(src, source_stride_, x16_idx, y16_idx, expected);
+
+    aom_usec_timer timer;
+    int expected_c[4] = { 0 };
+    int actual[4] = { 0 };
+    AverageFunction_8x8_quad avg_c = aom_avg_8x8_quad_c;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < iterations; i++) {
+      avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx,
+            y16_idx, expected_c);
+    }
+    aom_usec_timer_mark(&timer);
+    ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
+
+    AverageFunction_8x8_quad avg_opt = GET_PARAM(5);
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < iterations; i++) {
+      avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx,
+              y16_idx, actual);
+    }
+    aom_usec_timer_mark(&timer);
+    opt_elapsed_time_ += aom_usec_timer_elapsed(&timer);
+
+    for (int k = 0; k < 4; k++) {
+      EXPECT_EQ(expected[k], actual[k]);
+      EXPECT_EQ(expected_c[k], actual[k]);
+    }
+
+    // Print scaling information only when Speed test is called.
+    if (iterations > 1) {
+      printf("ref_time = %d \t simd_time = %d \t Gain = %4.2f\n",
+             static_cast<int>(ref_elapsed_time_),
+             static_cast<int>(opt_elapsed_time_),
+             (static_cast<float>(ref_elapsed_time_) /
+              static_cast<float>(opt_elapsed_time_)));
+    }
+  }
+
+  void CheckAverages() {
+    for (int x16_idx = 0; x16_idx < this->kDataBlockWidth / 8; x16_idx += 2)
+      for (int y16_idx = 0; y16_idx < this->kDataBlockHeight / 8; y16_idx += 2)
+        CheckAveragesAt(1, x16_idx, y16_idx);
+  }
+
+  void TestConstantValue(Pixel value) {
+    FillConstant(value);
+    CheckAverages();
+  }
+
+  void TestRandom() {
+    FillRandom();
+    CheckAverages();
+  }
+
+  void TestSpeed() {
+    FillRandom();
+    CheckAveragesAt(1000000, 0, 0);
+  }
+
+  int64_t ref_elapsed_time_ = 0;
+  int64_t opt_elapsed_time_ = 0;
+};
+
+using AverageTest8bpp = AverageTest<uint8_t>;
+
+TEST_P(AverageTest8bpp, MinValue) { TestConstantValue(0); }
+
+TEST_P(AverageTest8bpp, MaxValue) { TestConstantValue(255); }
+
+TEST_P(AverageTest8bpp, Random) { TestRandom(); }
+
+TEST_P(AverageTest8bpp, DISABLED_Speed) {
+  TestRandom(1000000);
+  PrintTimingStats();
+}
+
+using AvgTest8bpp_avg_8x8_quad = AverageTest_8x8_quad<uint8_t>;
+
+TEST_P(AvgTest8bpp_avg_8x8_quad, MinValue) { TestConstantValue(0); }
+
+TEST_P(AvgTest8bpp_avg_8x8_quad, MaxValue) { TestConstantValue(255); }
+
+TEST_P(AvgTest8bpp_avg_8x8_quad, Random) { TestRandom(); }
+
+TEST_P(AvgTest8bpp_avg_8x8_quad, DISABLED_Speed) { TestSpeed(); }
+
+#if CONFIG_AV1_HIGHBITDEPTH
+using AverageTestHbd = AverageTest<uint16_t>;
+
+TEST_P(AverageTestHbd, MinValue) { TestConstantValue(0); }
+
+TEST_P(AverageTestHbd, MaxValue10bit) { TestConstantValue(1023); }
+TEST_P(AverageTestHbd, MaxValue12bit) { TestConstantValue(4095); }
+
+TEST_P(AverageTestHbd, Random) { TestRandom(); }
+
+TEST_P(AverageTestHbd, DISABLED_Speed) {
+  TestRandom(1000000);
+  PrintTimingStats();
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+typedef void (*IntProRowFunc)(int16_t *hbuf, uint8_t const *ref,
+                              const int ref_stride, const int width,
+                              const int height, int norm_factor);
+
+// Params: width, height, asm function, c function.
+typedef std::tuple<int, int, IntProRowFunc, IntProRowFunc> IntProRowParam;
+
+class IntProRowTest : public AverageTestBase<uint8_t>,
+                      public ::testing::WithParamInterface<IntProRowParam> {
+ public:
+  IntProRowTest()
+      : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), hbuf_asm_(nullptr),
+        hbuf_c_(nullptr) {
+    asm_func_ = GET_PARAM(2);
+    c_func_ = GET_PARAM(3);
+  }
+
+  void set_norm_factor() {
+    if (height_ == 128)
+      norm_factor_ = 6;
+    else if (height_ == 64)
+      norm_factor_ = 5;
+    else if (height_ == 32)
+      norm_factor_ = 4;
+    else if (height_ == 16)
+      norm_factor_ = 3;
+  }
+
+ protected:
+  void SetUp() override {
+    source_data_ = static_cast<uint8_t *>(
+        aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
+    ASSERT_NE(source_data_, nullptr);
+
+    hbuf_asm_ = static_cast<int16_t *>(
+        aom_memalign(kDataAlignment, sizeof(*hbuf_asm_) * width_));
+    ASSERT_NE(hbuf_asm_, nullptr);
+    hbuf_c_ = static_cast<int16_t *>(
+        aom_memalign(kDataAlignment, sizeof(*hbuf_c_) * width_));
+    ASSERT_NE(hbuf_c_, nullptr);
+  }
+
+  void TearDown() override {
+    aom_free(source_data_);
+    source_data_ = nullptr;
+    aom_free(hbuf_c_);
+    hbuf_c_ = nullptr;
+    aom_free(hbuf_asm_);
+    hbuf_asm_ = nullptr;
+  }
+
+  void RunComparison() {
+    set_norm_factor();
+    API_REGISTER_STATE_CHECK(
+        c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_));
+    API_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, width_, width_,
+                                       height_, norm_factor_));
+    EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_))
+        << "Output mismatch\n";
+  }
+
+  void RunSpeedTest() {
+    const int numIter = 5000000;
+    set_norm_factor();
+    printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_,
+           numIter);
+    aom_usec_timer c_timer_;
+    aom_usec_timer_start(&c_timer_);
+    for (int i = 0; i < numIter; i++) {
+      c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_);
+    }
+    aom_usec_timer_mark(&c_timer_);
+
+    aom_usec_timer asm_timer_;
+    aom_usec_timer_start(&asm_timer_);
+
+    for (int i = 0; i < numIter; i++) {
+      asm_func_(hbuf_asm_, source_data_, width_, width_, height_, norm_factor_);
+    }
+    aom_usec_timer_mark(&asm_timer_);
+
+    const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
+    const int asm_sum_time =
+        static_cast<int>(aom_usec_timer_elapsed(&asm_timer_));
+
+    printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
+           asm_sum_time,
+           (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time)));
+
+    EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_))
+        << "Output mismatch\n";
+  }
+
+ private:
+  IntProRowFunc asm_func_;
+  IntProRowFunc c_func_;
+  int16_t *hbuf_asm_;
+  int16_t *hbuf_c_;
+  int norm_factor_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProRowTest);
+
+typedef void (*IntProColFunc)(int16_t *vbuf, uint8_t const *ref,
+                              const int ref_stride, const int width,
+                              const int height, int norm_factor);
+
+// Params: width, height, asm function, c function.
+typedef std::tuple<int, int, IntProColFunc, IntProColFunc> IntProColParam;
+
+class IntProColTest : public AverageTestBase<uint8_t>,
+                      public ::testing::WithParamInterface<IntProColParam> {
+ public:
+  IntProColTest()
+      : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), vbuf_asm_(nullptr),
+        vbuf_c_(nullptr) {
+    asm_func_ = GET_PARAM(2);
+    c_func_ = GET_PARAM(3);
+  }
+
+ protected:
+  void SetUp() override {
+    source_data_ = static_cast<uint8_t *>(
+        aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
+    ASSERT_NE(source_data_, nullptr);
+
+    vbuf_asm_ = static_cast<int16_t *>(
+        aom_memalign(kDataAlignment, sizeof(*vbuf_asm_) * width_));
+    ASSERT_NE(vbuf_asm_, nullptr);
+    vbuf_c_ = static_cast<int16_t *>(
+        aom_memalign(kDataAlignment, sizeof(*vbuf_c_) * width_));
+    ASSERT_NE(vbuf_c_, nullptr);
+  }
+
+  void TearDown() override {
+    aom_free(source_data_);
+    source_data_ = nullptr;
+    aom_free(vbuf_c_);
+    vbuf_c_ = nullptr;
+    aom_free(vbuf_asm_);
+    vbuf_asm_ = nullptr;
+  }
+
+  void RunComparison() {
+    int norm_factor_ = 3 + (width_ >> 5);
+    API_REGISTER_STATE_CHECK(
+        c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_));
+    API_REGISTER_STATE_CHECK(asm_func_(vbuf_asm_, source_data_, width_, width_,
+                                       height_, norm_factor_));
+    EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_))
+        << "Output mismatch\n";
+  }
+  void RunSpeedTest() {
+    const int numIter = 5000000;
+    printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_,
+           numIter);
+    int norm_factor_ = 3 + (width_ >> 5);
+    aom_usec_timer c_timer_;
+    aom_usec_timer_start(&c_timer_);
+    for (int i = 0; i < numIter; i++) {
+      c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_);
+    }
+    aom_usec_timer_mark(&c_timer_);
+
+    aom_usec_timer asm_timer_;
+    aom_usec_timer_start(&asm_timer_);
+
+    for (int i = 0; i < numIter; i++) {
+      asm_func_(vbuf_asm_, source_data_, width_, width_, height_, norm_factor_);
+    }
+    aom_usec_timer_mark(&asm_timer_);
+
+    const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
+    const int asm_sum_time =
+        static_cast<int>(aom_usec_timer_elapsed(&asm_timer_));
+
+    printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
+           asm_sum_time,
+           (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time)));
+
+    EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_))
+        << "Output mismatch\n";
+  }
+
+ private:
+  IntProColFunc asm_func_;
+  IntProColFunc c_func_;
+  int16_t *vbuf_asm_;
+  int16_t *vbuf_c_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProColTest);
+
+TEST_P(IntProRowTest, MinValue) {
+  FillConstant(0);
+  RunComparison();
+}
+
+TEST_P(IntProRowTest, MaxValue) {
+  FillConstant(255);
+  RunComparison();
+}
+
+TEST_P(IntProRowTest, Random) {
+  FillRandom();
+  RunComparison();
+}
+
+TEST_P(IntProRowTest, DISABLED_Speed) {
+  FillRandom();
+  RunSpeedTest();
+}
+
+TEST_P(IntProColTest, MinValue) {
+  FillConstant(0);
+  RunComparison();
+}
+
+TEST_P(IntProColTest, MaxValue) {
+  FillConstant(255);
+  RunComparison();
+}
+
+TEST_P(IntProColTest, Random) {
+  FillRandom();
+  RunComparison();
+}
+
+TEST_P(IntProColTest, DISABLED_Speed) {
+  FillRandom();
+  RunSpeedTest();
+}
+class VectorVarTestBase : public ::testing::Test {
+ public:
+  explicit VectorVarTestBase(int bwl) { m_bwl = bwl; }
+  VectorVarTestBase() = default;
+  ~VectorVarTestBase() override = default;
+
+ protected:
+  static const int kDataAlignment = 16;
+
+  void SetUp() override {
+    width = 4 << m_bwl;
+
+    ref_vector = static_cast<int16_t *>(
+        aom_memalign(kDataAlignment, width * sizeof(ref_vector[0])));
+    ASSERT_NE(ref_vector, nullptr);
+    src_vector = static_cast<int16_t *>(
+        aom_memalign(kDataAlignment, width * sizeof(src_vector[0])));
+    ASSERT_NE(src_vector, nullptr);
+
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+  void TearDown() override {
+    aom_free(ref_vector);
+    ref_vector = nullptr;
+    aom_free(src_vector);
+    src_vector = nullptr;
+  }
+
+  void FillConstant(int16_t fill_constant_ref, int16_t fill_constant_src) {
+    for (int i = 0; i < width; ++i) {
+      ref_vector[i] = fill_constant_ref;
+      src_vector[i] = fill_constant_src;
+    }
+  }
+
+  void FillRandom() {
+    for (int i = 0; i < width; ++i) {
+      ref_vector[i] =
+          rnd_.Rand16() % max_range;  // acc. aom_vector_var_c brief.
+      src_vector[i] = rnd_.Rand16() % max_range;
+    }
+  }
+
+  int width;
+  int m_bwl;
+  int16_t *ref_vector;
+  int16_t *src_vector;
+  ACMRandom rnd_;
+
+  static const int max_range = 510;
+  static const int num_random_cmp = 50;
+};
+
+typedef int (*VectorVarFunc)(const int16_t *ref, const int16_t *src,
+                             const int bwl);
+
+typedef std::tuple<int, VectorVarFunc, VectorVarFunc> VecVarFunc;
+
+class VectorVarTest : public VectorVarTestBase,
+                      public ::testing::WithParamInterface<VecVarFunc> {
+ public:
+  VectorVarTest()
+      : VectorVarTestBase(GET_PARAM(0)), c_func(GET_PARAM(1)),
+        simd_func(GET_PARAM(2)) {}
+
+ protected:
+  int calcVarC() { return c_func(ref_vector, src_vector, m_bwl); }
+  int calcVarSIMD() { return simd_func(ref_vector, src_vector, m_bwl); }
+
+  VectorVarFunc c_func;
+  VectorVarFunc simd_func;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VectorVarTest);
+
+TEST_P(VectorVarTest, MaxVar) {
+  FillConstant(0, max_range);
+  int c_var = calcVarC();
+  int simd_var = calcVarSIMD();
+  ASSERT_EQ(c_var, simd_var);
+}
+TEST_P(VectorVarTest, MaxVarRev) {
+  FillConstant(max_range, 0);
+  int c_var = calcVarC();
+  int simd_var = calcVarSIMD();
+  ASSERT_EQ(c_var, simd_var);
+}
+TEST_P(VectorVarTest, ZeroDiff) {
+  FillConstant(0, 0);
+  int c_var = calcVarC();
+  int simd_var = calcVarSIMD();
+  ASSERT_EQ(c_var, simd_var);
+}
+TEST_P(VectorVarTest, ZeroDiff2) {
+  FillConstant(max_range, max_range);
+  int c_var = calcVarC();
+  int simd_var = calcVarSIMD();
+  ASSERT_EQ(c_var, simd_var);
+}
+TEST_P(VectorVarTest, Constant) {
+  FillConstant(30, 90);
+  int c_var = calcVarC();
+  int simd_var = calcVarSIMD();
+  ASSERT_EQ(c_var, simd_var);
+}
+TEST_P(VectorVarTest, Random) {
+  for (size_t i = 0; i < num_random_cmp; i++) {
+    FillRandom();
+    int c_var = calcVarC();
+    int simd_var = calcVarSIMD();
+    ASSERT_EQ(c_var, simd_var);
+  }
+}
+TEST_P(VectorVarTest, DISABLED_Speed) {
+  FillRandom();
+  const int numIter = 5000000;
+  printf("Width = %d number of iteration is %d \n", width, numIter);
+
+  int sum_c_var = 0;
+  int c_var = 0;
+
+  aom_usec_timer c_timer_;
+  aom_usec_timer_start(&c_timer_);
+  for (size_t i = 0; i < numIter; i++) {
+    c_var = calcVarC();
+    sum_c_var += c_var;
+  }
+  aom_usec_timer_mark(&c_timer_);
+
+  int simd_var = 0;
+  int sum_simd_var = 0;
+  aom_usec_timer simd_timer_;
+  aom_usec_timer_start(&simd_timer_);
+  for (size_t i = 0; i < numIter; i++) {
+    simd_var = calcVarSIMD();
+    sum_simd_var += simd_var;
+  }
+  aom_usec_timer_mark(&simd_timer_);
+
+  const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
+  const int simd_sum_time =
+      static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
+
+  printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
+         simd_sum_time,
+         (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
+
+  EXPECT_EQ(c_var, simd_var) << "Output mismatch \n";
+  EXPECT_EQ(sum_c_var, sum_simd_var) << "Output mismatch \n";
+}
+
+using std::make_tuple;
+
+INSTANTIATE_TEST_SUITE_P(
+    C, AverageTest8bpp,
+    ::testing::Values(make_tuple(16, 16, 8, 1, 8, &aom_avg_8x8_c),
+                      make_tuple(16, 16, 8, 1, 4, &aom_avg_4x4_c)));
+
+INSTANTIATE_TEST_SUITE_P(
+    C, AvgTest8bpp_avg_8x8_quad,
+    ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_c),
+                      make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_c),
+                      make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_c)));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, AverageTest8bpp,
+    ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_sse2),
+                      make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_sse2),
+                      make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_sse2),
+                      make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_sse2),
+                      make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_sse2),
+                      make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_sse2)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, AvgTest8bpp_avg_8x8_quad,
+    ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_sse2),
+                      make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_sse2),
+                      make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_sse2)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, IntProRowTest,
+    ::testing::Values(
+        make_tuple(16, 16, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
+        make_tuple(32, 32, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
+        make_tuple(64, 64, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
+        make_tuple(128, 128, &aom_int_pro_row_sse2, &aom_int_pro_row_c)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, IntProColTest,
+    ::testing::Values(
+        make_tuple(16, 16, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
+        make_tuple(32, 32, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
+        make_tuple(64, 64, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
+        make_tuple(128, 128, &aom_int_pro_col_sse2, &aom_int_pro_col_c)));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AvgTest8bpp_avg_8x8_quad,
+    ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_avx2),
+                      make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_avx2),
+                      make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_avx2)));
+
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, IntProRowTest,
+    ::testing::Values(
+        make_tuple(16, 16, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
+        make_tuple(32, 32, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
+        make_tuple(64, 64, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
+        make_tuple(128, 128, &aom_int_pro_row_avx2, &aom_int_pro_row_c)));
+
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, IntProColTest,
+    ::testing::Values(
+        make_tuple(16, 16, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
+        make_tuple(32, 32, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
+        make_tuple(64, 64, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
+        make_tuple(128, 128, &aom_int_pro_col_avx2, &aom_int_pro_col_c)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AverageTest8bpp,
+    ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_neon),
+                      make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_neon),
+                      make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_neon),
+                      make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_neon),
+                      make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_neon),
+                      make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_neon)));
+INSTANTIATE_TEST_SUITE_P(
+    NEON, IntProRowTest,
+    ::testing::Values(
+        make_tuple(16, 16, &aom_int_pro_row_neon, &aom_int_pro_row_c),
+        make_tuple(32, 32, &aom_int_pro_row_neon, &aom_int_pro_row_c),
+        make_tuple(64, 64, &aom_int_pro_row_neon, &aom_int_pro_row_c),
+        make_tuple(128, 128, &aom_int_pro_row_neon, &aom_int_pro_row_c)));
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON, IntProColTest,
+    ::testing::Values(
+        make_tuple(16, 16, &aom_int_pro_col_neon, &aom_int_pro_col_c),
+        make_tuple(32, 32, &aom_int_pro_col_neon, &aom_int_pro_col_c),
+        make_tuple(64, 64, &aom_int_pro_col_neon, &aom_int_pro_col_c),
+        make_tuple(128, 128, &aom_int_pro_col_neon, &aom_int_pro_col_c)));
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AvgTest8bpp_avg_8x8_quad,
+    ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_neon),
+                      make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_neon),
+                      make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_neon)));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(
+    C, AverageTestHbd,
+    ::testing::Values(make_tuple(16, 16, 10, 1, 8, &aom_highbd_avg_8x8_c),
+                      make_tuple(16, 16, 10, 1, 4, &aom_highbd_avg_4x4_c),
+                      make_tuple(16, 16, 12, 1, 8, &aom_highbd_avg_8x8_c),
+                      make_tuple(16, 16, 12, 1, 4, &aom_highbd_avg_4x4_c)));
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AverageTestHbd,
+    ::testing::Values(make_tuple(16, 16, 10, 0, 4, &aom_highbd_avg_4x4_neon),
+                      make_tuple(16, 16, 10, 5, 4, &aom_highbd_avg_4x4_neon),
+                      make_tuple(32, 32, 10, 15, 4, &aom_highbd_avg_4x4_neon),
+                      make_tuple(16, 16, 12, 0, 4, &aom_highbd_avg_4x4_neon),
+                      make_tuple(16, 16, 12, 5, 4, &aom_highbd_avg_4x4_neon),
+                      make_tuple(32, 32, 12, 15, 4, &aom_highbd_avg_4x4_neon),
+                      make_tuple(16, 16, 10, 0, 8, &aom_highbd_avg_8x8_neon),
+                      make_tuple(16, 16, 10, 5, 8, &aom_highbd_avg_8x8_neon),
+                      make_tuple(32, 32, 10, 15, 8, &aom_highbd_avg_8x8_neon),
+                      make_tuple(16, 16, 12, 0, 8, &aom_highbd_avg_8x8_neon),
+                      make_tuple(16, 16, 12, 5, 8, &aom_highbd_avg_8x8_neon),
+                      make_tuple(32, 32, 12, 15, 8, &aom_highbd_avg_8x8_neon)));
+#endif  // HAVE_NEON
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+typedef int (*SatdFunc)(const tran_low_t *coeffs, int length);
+typedef int (*SatdLpFunc)(const int16_t *coeffs, int length);
+
+template <typename SatdFuncType>
+struct SatdTestParam {
+  SatdTestParam(int s, SatdFuncType f1, SatdFuncType f2)
+      : satd_size(s), func_ref(f1), func_simd(f2) {}
+  friend std::ostream &operator<<(std::ostream &os,
+                                  const SatdTestParam<SatdFuncType> &param) {
+    return os << "satd_size: " << param.satd_size;
+  }
+  int satd_size;
+  SatdFuncType func_ref;
+  SatdFuncType func_simd;
+};
+
+template <typename CoeffType, typename SatdFuncType>
+class SatdTestBase
+    : public ::testing::Test,
+      public ::testing::WithParamInterface<SatdTestParam<SatdFuncType>> {
+ protected:
+  explicit SatdTestBase(const SatdTestParam<SatdFuncType> &func_param) {
+    satd_size_ = func_param.satd_size;
+    satd_func_ref_ = func_param.func_ref;
+    satd_func_simd_ = func_param.func_simd;
+  }
+  void SetUp() override {
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<CoeffType *>(
+        aom_memalign(32, sizeof(*src_) * satd_size_));
+    ASSERT_NE(src_, nullptr);
+  }
+  void TearDown() override { aom_free(src_); }
+  void FillConstant(const CoeffType val) {
+    for (int i = 0; i < satd_size_; ++i) src_[i] = val;
+  }
+  void FillRandom() {
+    for (int i = 0; i < satd_size_; ++i) {
+      src_[i] = static_cast<int16_t>(rnd_.Rand16());
+    }
+  }
+  void Check(int expected) {
+    int total_ref;
+    API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_));
+    EXPECT_EQ(expected, total_ref);
+
+    int total_simd;
+    API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_));
+    EXPECT_EQ(expected, total_simd);
+  }
+  void RunComparison() {
+    int total_ref;
+    API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_));
+
+    int total_simd;
+    API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_));
+
+    EXPECT_EQ(total_ref, total_simd);
+  }
+  void RunSpeedTest() {
+    const int numIter = 500000;
+    printf("size = %d number of iteration is %d \n", satd_size_, numIter);
+
+    int total_ref;
+    aom_usec_timer c_timer_;
+    aom_usec_timer_start(&c_timer_);
+    for (int i = 0; i < numIter; i++) {
+      total_ref = satd_func_ref_(src_, satd_size_);
+    }
+    aom_usec_timer_mark(&c_timer_);
+
+    int total_simd;
+    aom_usec_timer simd_timer_;
+    aom_usec_timer_start(&simd_timer_);
+
+    for (int i = 0; i < numIter; i++) {
+      total_simd = satd_func_simd_(src_, satd_size_);
+    }
+    aom_usec_timer_mark(&simd_timer_);
+
+    const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
+    const int simd_sum_time =
+        static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
+
+    printf(
+        "c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
+        simd_sum_time,
+        (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
+
+    EXPECT_EQ(total_ref, total_simd) << "Output mismatch \n";
+  }
+  int satd_size_;
+
+ private:
+  CoeffType *src_;
+  SatdFuncType satd_func_ref_;
+  SatdFuncType satd_func_simd_;
+  ACMRandom rnd_;
+};
+
+class SatdTest : public SatdTestBase<tran_low_t, SatdFunc> {
+ public:
+  SatdTest() : SatdTestBase(GetParam()) {}
+};
+
+TEST_P(SatdTest, MinValue) {
+  const int kMin = -524287;
+  const int expected = -kMin * satd_size_;
+  FillConstant(kMin);
+  Check(expected);
+}
+TEST_P(SatdTest, MaxValue) {
+  const int kMax = 524287;
+  const int expected = kMax * satd_size_;
+  FillConstant(kMax);
+  Check(expected);
+}
+TEST_P(SatdTest, Random) {
+  int expected;
+  switch (satd_size_) {
+    case 16: expected = 205298; break;
+    case 64: expected = 1113950; break;
+    case 256: expected = 4268415; break;
+    case 1024: expected = 16954082; break;
+    default:
+      FAIL() << "Invalid satd size (" << satd_size_
+             << ") valid: 16/64/256/1024";
+  }
+  FillRandom();
+  Check(expected);
+}
+TEST_P(SatdTest, Match) {
+  FillRandom();
+  RunComparison();
+}
+TEST_P(SatdTest, DISABLED_Speed) {
+  FillRandom();
+  RunSpeedTest();
+}
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdTest);
+
+INSTANTIATE_TEST_SUITE_P(
+    C, SatdTest,
+    ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_c),
+                      SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_c),
+                      SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_c),
+                      SatdTestParam<SatdFunc>(1024, &aom_satd_c, &aom_satd_c)));
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, SatdTest,
+    ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_neon),
+                      SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_neon),
+                      SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_neon),
+                      SatdTestParam<SatdFunc>(1024, &aom_satd_c,
+                                              &aom_satd_neon)));
+INSTANTIATE_TEST_SUITE_P(
+    NEON, VectorVarTest,
+    ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_neon),
+                      make_tuple(3, &aom_vector_var_c, &aom_vector_var_neon),
+                      make_tuple(4, &aom_vector_var_c, &aom_vector_var_neon),
+                      make_tuple(5, &aom_vector_var_c, &aom_vector_var_neon)));
+#endif
+
+#if HAVE_SVE
+INSTANTIATE_TEST_SUITE_P(
+    SVE, VectorVarTest,
+    ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sve),
+                      make_tuple(3, &aom_vector_var_c, &aom_vector_var_sve),
+                      make_tuple(4, &aom_vector_var_c, &aom_vector_var_sve),
+                      make_tuple(5, &aom_vector_var_c, &aom_vector_var_sve)));
+#endif  // HAVE_SVE
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, VectorVarTest,
+    ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sse4_1),
+                      make_tuple(3, &aom_vector_var_c, &aom_vector_var_sse4_1),
+                      make_tuple(4, &aom_vector_var_c, &aom_vector_var_sse4_1),
+                      make_tuple(5, &aom_vector_var_c,
+                                 &aom_vector_var_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, SatdTest,
+    ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_avx2),
+                      SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_avx2),
+                      SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_avx2),
+                      SatdTestParam<SatdFunc>(1024, &aom_satd_c,
+                                              &aom_satd_avx2)));
+
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, VectorVarTest,
+    ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_avx2),
+                      make_tuple(3, &aom_vector_var_c, &aom_vector_var_avx2),
+                      make_tuple(4, &aom_vector_var_c, &aom_vector_var_avx2),
+                      make_tuple(5, &aom_vector_var_c, &aom_vector_var_avx2)));
+#endif  // HAVE_AVX2
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, SatdTest,
+    ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_sse2),
+                      SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_sse2),
+                      SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_sse2),
+                      SatdTestParam<SatdFunc>(1024, &aom_satd_c,
+                                              &aom_satd_sse2)));
+#endif
+
+class SatdLpTest : public SatdTestBase<int16_t, SatdLpFunc> {
+ public:
+  SatdLpTest() : SatdTestBase(GetParam()) {}
+};
+
+TEST_P(SatdLpTest, MinValue) {
+  const int kMin = -32640;
+  const int expected = -kMin * satd_size_;
+  FillConstant(kMin);
+  Check(expected);
+}
+TEST_P(SatdLpTest, MaxValue) {
+  const int kMax = 32640;
+  const int expected = kMax * satd_size_;
+  FillConstant(kMax);
+  Check(expected);
+}
+TEST_P(SatdLpTest, Random) {
+  int expected;
+  switch (satd_size_) {
+    case 16: expected = 205298; break;
+    case 64: expected = 1113950; break;
+    case 256: expected = 4268415; break;
+    case 1024: expected = 16954082; break;
+    default:
+      FAIL() << "Invalid satd size (" << satd_size_
+             << ") valid: 16/64/256/1024";
+  }
+  FillRandom();
+  Check(expected);
+}
+TEST_P(SatdLpTest, Match) {
+  FillRandom();
+  RunComparison();
+}
+TEST_P(SatdLpTest, DISABLED_Speed) {
+  FillRandom();
+  RunSpeedTest();
+}
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdLpTest);
+
+// Add the following c test to avoid gtest uninitialized warning.
+INSTANTIATE_TEST_SUITE_P(
+    C, SatdLpTest,
+    ::testing::Values(
+        SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_c),
+        SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_c),
+        SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_c),
+        SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_c)));
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, SatdLpTest,
+    ::testing::Values(
+        SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_neon),
+        SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_neon),
+        SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_neon),
+        SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_neon)));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, SatdLpTest,
+    ::testing::Values(
+        SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_avx2),
+        SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_avx2),
+        SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_avx2),
+        SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_avx2)));
+#endif
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, SatdLpTest,
+    ::testing::Values(
+        SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_sse2),
+        SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_sse2),
+        SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_sse2),
+        SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_sse2)));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/avif_progressive_test.cc b/third_party/aom/test/avif_progressive_test.cc
new file mode 100644
index 0000000000..2a28ca368b
--- /dev/null
+++ b/third_party/aom/test/avif_progressive_test.cc
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2023, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstddef>
+#include <vector>
+
+#include "aom/aomcx.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_encoder.h"
+#include "aom/aom_image.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+// This test emulates how libavif calls libaom functions to encode a
+// progressive AVIF image in libavif's ProgressiveTest.QualityChange test.
+TEST(AVIFProgressiveTest, QualityChange) {
+  constexpr int kWidth = 256;
+  constexpr int kHeight = 256;
+  // Dummy buffer of neutral gray samples.
+  constexpr size_t kBufferSize = 3 * kWidth * kHeight;
+  std::vector<unsigned char> buffer(kBufferSize,
+                                    static_cast<unsigned char>(128));
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I444, kWidth, kHeight, 1,
+                               buffer.data()));
+  img.cp = AOM_CICP_CP_UNSPECIFIED;
+  img.tc = AOM_CICP_TC_UNSPECIFIED;
+  img.mc = AOM_CICP_MC_UNSPECIFIED;
+  img.range = AOM_CR_FULL_RANGE;
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY));
+  cfg.g_profile = 1;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  cfg.g_bit_depth = AOM_BITS_8;
+  cfg.g_input_bit_depth = 8;
+  cfg.g_lag_in_frames = 0;
+  cfg.rc_end_usage = AOM_Q;
+  cfg.rc_min_quantizer = 50;
+  cfg.rc_max_quantizer = 50;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 50));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_NUMBER_SPATIAL_LAYERS, 2));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 6));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM));
+
+  // First frame (layer 0)
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+  aom_codec_iter_t iter = nullptr;
+  const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0x1f0011.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Second frame (layer 1)
+  cfg.rc_min_quantizer = 0;
+  cfg.rc_max_quantizer = 0;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AV1E_SET_LOSSLESS, 1));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 1));
+  aom_enc_frame_flags_t encode_flags =
+      AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD |
+      AOM_EFLAG_NO_REF_ARF2 | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, encode_flags));
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Flush encoder
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0));
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+// This test emulates how libavif calls libaom functions to encode a
+// progressive AVIF image in libavif's ProgressiveTest.DimensionChange test.
+TEST(AVIFProgressiveTest, DimensionChange) {
+  constexpr int kWidth = 256;
+  constexpr int kHeight = 256;
+  // Dummy buffer of neutral gray samples.
+  constexpr size_t kBufferSize = 3 * kWidth * kHeight;
+  std::vector<unsigned char> buffer(kBufferSize,
+                                    static_cast<unsigned char>(128));
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I444, kWidth, kHeight, 1,
+                               buffer.data()));
+  img.cp = AOM_CICP_CP_UNSPECIFIED;
+  img.tc = AOM_CICP_TC_UNSPECIFIED;
+  img.mc = AOM_CICP_MC_UNSPECIFIED;
+  img.range = AOM_CR_FULL_RANGE;
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY));
+  cfg.g_profile = 1;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  cfg.g_bit_depth = AOM_BITS_8;
+  cfg.g_input_bit_depth = 8;
+  cfg.g_lag_in_frames = 0;
+  cfg.rc_end_usage = AOM_Q;
+  cfg.rc_min_quantizer = 0;
+  cfg.rc_max_quantizer = 0;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AV1E_SET_LOSSLESS, 1));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_NUMBER_SPATIAL_LAYERS, 2));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 6));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM));
+
+  // First frame (layer 0)
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 0));
+  aom_scaling_mode_t scaling_mode = { AOME_ONETWO, AOME_ONETWO };
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_SCALEMODE, &scaling_mode));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+  aom_codec_iter_t iter = nullptr;
+  const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0x1f0011.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Second frame (layer 1)
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 1));
+  aom_enc_frame_flags_t encode_flags =
+      AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD |
+      AOM_EFLAG_NO_REF_ARF2 | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, encode_flags));
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Flush encoder
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0));
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+// This test reproduces bug aomedia:3382. Certain parameters such as width,
+// height, g_threads, usage, etc. were carefully chosen based on the
+// complicated logic of av1_select_sb_size() to cause an inconsistent sb_size.
+TEST(AVIFProgressiveTest, DimensionChangeLargeImageMultiThread) {
+  constexpr int kWidth = 1920;
+  constexpr int kHeight = 1080;
+  // Dummy buffer of neutral gray samples.
+  constexpr size_t kBufferSize = 2 * kWidth * kHeight;
+  std::vector<unsigned char> buffer(kBufferSize,
+                                    static_cast<unsigned char>(128));
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1,
+                               buffer.data()));
+  img.cp = AOM_CICP_CP_UNSPECIFIED;
+  img.tc = AOM_CICP_TC_UNSPECIFIED;
+  img.mc = AOM_CICP_MC_UNSPECIFIED;
+  img.range = AOM_CR_FULL_RANGE;
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY));
+  cfg.g_profile = 0;
+  cfg.g_w = img.w;
+  cfg.g_h = img.h;
+  cfg.g_bit_depth = AOM_BITS_8;
+  cfg.g_input_bit_depth = 8;
+  cfg.g_lag_in_frames = 0;
+  cfg.g_threads = 2;  // MultiThread
+  cfg.rc_end_usage = AOM_Q;
+  cfg.rc_min_quantizer = 0;
+  cfg.rc_max_quantizer = 63;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 31));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_NUMBER_SPATIAL_LAYERS, 2));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 6));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AV1E_SET_ROW_MT, 1));  // MultiThread
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM));
+
+  // First frame (layer 0)
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 0));
+  aom_scaling_mode_t scaling_mode = { AOME_ONETWO, AOME_ONETWO };
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_SCALEMODE, &scaling_mode));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+  aom_codec_iter_t iter = nullptr;
+  const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0x1f0011.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Second frame (layer 1)
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 1));
+  aom_enc_frame_flags_t encode_flags =
+      AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD |
+      AOM_EFLAG_NO_REF_ARF2 | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, encode_flags));
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Flush encoder
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0));
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+}  // namespace
diff --git a/third_party/aom/test/best_encode.sh b/third_party/aom/test/best_encode.sh
new file mode 100755
index 0000000000..d29fdaed52
--- /dev/null
+++ b/third_party/aom/test/best_encode.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+#
+# Copyright (c) 2016, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and
+# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+# was not distributed with this source code in the LICENSE file, you can
+# obtain it at www.aomedia.org/license/software. If the Alliance for Open
+# Media Patent License 1.0 was not distributed with this source code in the
+# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+#
+# Author: jimbankoski@google.com (Jim Bankoski)
+
+if [[ $# -ne 2 ]]; then
+  echo "Encodes a file using best known settings (slow!)"
+  echo "  Usage:    be [FILE] [BITRATE]"
+  echo "  Example:  be akiyo_cif.y4m 200"
+  exit
+fi
+
+f=$1  # file is first parameter
+b=$2  # bitrate is second parameter
+
+if [[ -e $f.fpf ]]; then
+  # First-pass file found, do second pass only
+  aomenc \
+    $f \
+    -o $f-$b.av1.webm \
+    -p 2 \
+    --pass=2 \
+    --fpf=$f.fpf \
+    --good \
+    --cpu-used=0 \
+    --target-bitrate=$b \
+    --auto-alt-ref=1 \
+    -v \
+    --minsection-pct=0 \
+    --maxsection-pct=800 \
+    --lag-in-frames=25 \
+    --kf-min-dist=0 \
+    --kf-max-dist=99999 \
+    --static-thresh=0 \
+    --min-q=0 \
+    --max-q=63 \
+    --drop-frame=0 \
+    --bias-pct=50 \
+    --minsection-pct=0 \
+    --maxsection-pct=800 \
+    --psnr \
+    --arnr-maxframes=7 \
+    --arnr-strength=3
+else
+  # No first-pass file found, do 2-pass encode
+  aomenc \
+    $f \
+    -o $f-$b.av1.webm \
+    -p 2 \
+    --pass=1 \
+    --fpf=$f.fpf \
+    --good \
+    --cpu-used=0 \
+    --target-bitrate=$b \
+    --auto-alt-ref=1 \
+    -v \
+    --minsection-pct=0 \
+    --maxsection-pct=800 \
+    --lag-in-frames=25 \
+    --kf-min-dist=0 \
+    --kf-max-dist=99999 \
+    --static-thresh=0 \
+    --min-q=0 \
+    --max-q=63 \
+    --drop-frame=0
+
+  aomenc \
+    $f \
+    -o $f-$b.av1.webm \
+    -p 2 \
+    --pass=2 \
+    --fpf=$f.fpf \
+    --good \
+    --cpu-used=0 \
+    --target-bitrate=$b \
+    --auto-alt-ref=1 \
+    -v \
+    --minsection-pct=0 \
+    --maxsection-pct=800 \
+    --lag-in-frames=25 \
+    --kf-min-dist=0 \
+    --kf-max-dist=99999 \
+    --static-thresh=0 \
+    --min-q=0 \
+    --max-q=63 \
+    --drop-frame=0 \
+    --bias-pct=50 \
+    --minsection-pct=0 \
+    --maxsection-pct=800 \
+    --psnr \
+    --arnr-maxframes=7 \
+    --arnr-strength=3
+fi
diff --git a/third_party/aom/test/binary_codes_test.cc b/third_party/aom/test/binary_codes_test.cc
new file mode 100644
index 0000000000..2c2dfb45a8
--- /dev/null
+++ b/third_party/aom/test/binary_codes_test.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+
+#include "test/acm_random.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/bitreader.h"
+#include "aom_dsp/bitwriter.h"
+#include "aom_dsp/binary_codes_reader.h"
+#include "aom_dsp/binary_codes_writer.h"
+
+#define ACCT_STR __func__
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+// Test for Finite subexponential code with reference
+TEST(AV1, TestPrimitiveRefsubexpfin) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int kBufferSize = 65536;
+  aom_writer bw;
+  uint8_t bw_buffer[kBufferSize];
+  const uint16_t kRanges = 8;
+  const uint16_t kSubexpParams = 6;
+  const uint16_t kReferences = 8;
+  const uint16_t kValues = 16;
+  uint16_t enc_values[kRanges][kSubexpParams][kReferences][kValues][4];
+  const uint16_t range_vals[kRanges] = { 1, 13, 64, 120, 230, 420, 1100, 8000 };
+  aom_start_encode(&bw, bw_buffer);
+  for (int n = 0; n < kRanges; ++n) {
+    const uint16_t range = range_vals[n];
+    for (int k = 0; k < kSubexpParams; ++k) {
+      for (int r = 0; r < kReferences; ++r) {
+        const uint16_t ref = rnd(range);
+        for (int v = 0; v < kValues; ++v) {
+          const uint16_t value = rnd(range);
+          enc_values[n][k][r][v][0] = range;
+          enc_values[n][k][r][v][1] = k;
+          enc_values[n][k][r][v][2] = ref;
+          enc_values[n][k][r][v][3] = value;
+          aom_write_primitive_refsubexpfin(&bw, range, k, ref, value);
+        }
+      }
+    }
+  }
+  GTEST_ASSERT_GE(aom_stop_encode(&bw), 0);
+  aom_reader br;
+  aom_reader_init(&br, bw_buffer, bw.pos);
+  GTEST_ASSERT_GE(aom_reader_tell(&br), 0u);
+  GTEST_ASSERT_LE(aom_reader_tell(&br), 1u);
+  for (int n = 0; n < kRanges; ++n) {
+    for (int k = 0; k < kSubexpParams; ++k) {
+      for (int r = 0; r < kReferences; ++r) {
+        for (int v = 0; v < kValues; ++v) {
+          const uint16_t range = enc_values[n][k][r][v][0];
+          assert(k == enc_values[n][k][r][v][1]);
+          const uint16_t ref = enc_values[n][k][r][v][2];
+          const uint16_t value =
+              aom_read_primitive_refsubexpfin(&br, range, k, ref, ACCT_STR);
+          GTEST_ASSERT_EQ(value, enc_values[n][k][r][v][3]);
+        }
+      }
+    }
+  }
+}
+// TODO(debargha): Adds tests for other primitives
+}  // namespace
diff --git a/third_party/aom/test/blend_a64_mask_1d_test.cc b/third_party/aom/test/blend_a64_mask_1d_test.cc
new file mode 100644
index 0000000000..f9549bccb2
--- /dev/null
+++ b/third_party/aom/test/blend_a64_mask_1d_test.cc
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/register_state_check.h"
+#include "test/function_equivalence_test.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_integer.h"
+
+#include "av1/common/enums.h"
+
+#include "aom_dsp/blend.h"
+
+using libaom_test::FunctionEquivalenceTest;
+
+namespace {
+
+template <typename F, typename T>
+class BlendA64Mask1DTest : public FunctionEquivalenceTest<F> {
+ public:
+  static const int kIterations = 10000;
+  static const int kMaxWidth = MAX_SB_SIZE * 5;  // * 5 to cover longer strides
+  static const int kMaxHeight = MAX_SB_SIZE;
+  static const int kBufSize = kMaxWidth * kMaxHeight;
+  static const int kMaxMaskWidth = 2 * MAX_SB_SIZE;
+  static const int kMaxMaskSize = kMaxMaskWidth;
+
+  ~BlendA64Mask1DTest() override = default;
+
+  virtual void Execute(const T *p_src0, const T *p_src1) = 0;
+
+  void Common(int block_size) {
+    w_ = block_size_wide[block_size];
+    h_ = block_size_high[block_size];
+
+    dst_offset_ = this->rng_(33);
+    dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
+
+    src0_offset_ = this->rng_(33);
+    src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
+
+    src1_offset_ = this->rng_(33);
+    src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
+
+    T *p_src0;
+    T *p_src1;
+
+    switch (this->rng_(3)) {
+      case 0:  // Separate sources
+        p_src0 = src0_;
+        p_src1 = src1_;
+        break;
+      case 1:  // src0 == dst
+        p_src0 = dst_tst_;
+        src0_stride_ = dst_stride_;
+        src0_offset_ = dst_offset_;
+        p_src1 = src1_;
+        break;
+      case 2:  // src1 == dst
+        p_src0 = src0_;
+        p_src1 = dst_tst_;
+        src1_stride_ = dst_stride_;
+        src1_offset_ = dst_offset_;
+        break;
+      default: FAIL();
+    }
+
+    Execute(p_src0, p_src1);
+
+    for (int r = 0; r < h_; ++r) {
+      for (int c = 0; c < w_; ++c) {
+        ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c],
+                  dst_tst_[dst_offset_ + r * dst_stride_ + c]);
+      }
+    }
+  }
+
+  T dst_ref_[kBufSize];
+  T dst_tst_[kBufSize];
+  uint32_t dst_stride_;
+  uint32_t dst_offset_;
+
+  T src0_[kBufSize];
+  uint32_t src0_stride_;
+  uint32_t src0_offset_;
+
+  T src1_[kBufSize];
+  uint32_t src1_stride_;
+  uint32_t src1_offset_;
+
+  uint8_t mask_[kMaxMaskSize];
+
+  int w_;
+  int h_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// 8 bit version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
+                    uint32_t src0_stride, const uint8_t *src1,
+                    uint32_t src1_stride, const uint8_t *mask, int w, int h);
+typedef libaom_test::FuncParam<F8B> TestFuncs;
+
+class BlendA64Mask1DTest8B : public BlendA64Mask1DTest<F8B, uint8_t> {
+ protected:
+  void Execute(const uint8_t *p_src0, const uint8_t *p_src1) override {
+    params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
+                     src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_,
+                     w_, h_);
+    API_REGISTER_STATE_CHECK(params_.tst_func(
+        dst_tst_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
+        src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, w_, h_));
+  }
+};
+
+TEST_P(BlendA64Mask1DTest8B, RandomValues) {
+  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_.Rand8();
+      dst_tst_[i] = rng_.Rand8();
+
+      src0_[i] = rng_.Rand8();
+      src1_[i] = rng_.Rand8();
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+    Common(bsize);
+  }
+}
+
+TEST_P(BlendA64Mask1DTest8B, ExtremeValues) {
+  for (int i = 0; i < kBufSize; ++i) {
+    dst_ref_[i] = rng_(2) + 254;
+    dst_tst_[i] = rng_(2) + 254;
+    src0_[i] = rng_(2) + 254;
+    src1_[i] = rng_(2) + 254;
+  }
+
+  for (int i = 0; i < kMaxMaskSize; ++i)
+    mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
+
+  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
+    Common(bsize);
+  }
+}
+
+static void blend_a64_hmask_ref(uint8_t *dst, uint32_t dst_stride,
+                                const uint8_t *src0, uint32_t src0_stride,
+                                const uint8_t *src1, uint32_t src1_stride,
+                                const uint8_t *mask, int w, int h) {
+  uint8_t mask2d[BlendA64Mask1DTest8B::kMaxMaskSize]
+                [BlendA64Mask1DTest8B::kMaxMaskSize];
+
+  for (int row = 0; row < h; ++row)
+    for (int col = 0; col < w; ++col) mask2d[row][col] = mask[col];
+
+  aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride,
+                       &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, w, h,
+                       0, 0);
+}
+
+static void blend_a64_vmask_ref(uint8_t *dst, uint32_t dst_stride,
+                                const uint8_t *src0, uint32_t src0_stride,
+                                const uint8_t *src1, uint32_t src1_stride,
+                                const uint8_t *mask, int w, int h) {
+  uint8_t mask2d[BlendA64Mask1DTest8B::kMaxMaskSize]
+                [BlendA64Mask1DTest8B::kMaxMaskSize];
+
+  for (int row = 0; row < h; ++row)
+    for (int col = 0; col < w; ++col) mask2d[row][col] = mask[row];
+
+  aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride,
+                       &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, w, h,
+                       0, 0);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    C, BlendA64Mask1DTest8B,
+    ::testing::Values(TestFuncs(blend_a64_hmask_ref, aom_blend_a64_hmask_c),
+                      TestFuncs(blend_a64_vmask_ref, aom_blend_a64_vmask_c)));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, BlendA64Mask1DTest8B,
+    ::testing::Values(
+        TestFuncs(blend_a64_hmask_ref, aom_blend_a64_hmask_sse4_1),
+        TestFuncs(blend_a64_vmask_ref, aom_blend_a64_vmask_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, BlendA64Mask1DTest8B,
+    ::testing::Values(TestFuncs(blend_a64_hmask_ref, aom_blend_a64_hmask_neon),
+                      TestFuncs(blend_a64_vmask_ref,
+                                aom_blend_a64_vmask_neon)));
+#endif  // HAVE_NEON
+
+//////////////////////////////////////////////////////////////////////////////
+// High bit-depth version
+//////////////////////////////////////////////////////////////////////////////
+#if CONFIG_AV1_HIGHBITDEPTH
+typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
+                     uint32_t src0_stride, const uint8_t *src1,
+                     uint32_t src1_stride, const uint8_t *mask, int w, int h,
+                     int bd);
+typedef libaom_test::FuncParam<FHBD> TestFuncsHBD;
+
+class BlendA64Mask1DTestHBD : public BlendA64Mask1DTest<FHBD, uint16_t> {
+ protected:
+  void Execute(const uint16_t *p_src0, const uint16_t *p_src1) override {
+    params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
+                     CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
+                     CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
+                     mask_, w_, h_, bit_depth_);
+    API_REGISTER_STATE_CHECK(params_.tst_func(
+        CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_,
+        CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
+        CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, mask_, w_, h_,
+        bit_depth_));
+  }
+
+  int bit_depth_;
+};
+
+TEST_P(BlendA64Mask1DTestHBD, RandomValues) {
+  for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
+    for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
+      const int hi = 1 << bit_depth_;
+
+      for (int i = 0; i < kBufSize; ++i) {
+        dst_ref_[i] = rng_(hi);
+        dst_tst_[i] = rng_(hi);
+        src0_[i] = rng_(hi);
+        src1_[i] = rng_(hi);
+      }
+
+      for (int i = 0; i < kMaxMaskSize; ++i)
+        mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+      Common(bsize);
+    }
+  }
+}
+
+TEST_P(BlendA64Mask1DTestHBD, ExtremeValues) {
+  for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
+    const int hi = 1 << bit_depth_;
+    const int lo = hi - 2;
+
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_(hi - lo) + lo;
+      dst_tst_[i] = rng_(hi - lo) + lo;
+      src0_[i] = rng_(hi - lo) + lo;
+      src1_[i] = rng_(hi - lo) + lo;
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
+
+    for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
+      Common(bsize);
+    }
+  }
+}
+
+static void highbd_blend_a64_hmask_ref(
+    uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
+    uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
+    const uint8_t *mask, int w, int h, int bd) {
+  uint8_t mask2d[BlendA64Mask1DTestHBD::kMaxMaskSize]
+                [BlendA64Mask1DTestHBD::kMaxMaskSize];
+
+  for (int row = 0; row < h; ++row)
+    for (int col = 0; col < w; ++col) mask2d[row][col] = mask[col];
+
+  aom_highbd_blend_a64_mask_c(
+      dst, dst_stride, src0, src0_stride, src1, src1_stride, &mask2d[0][0],
+      BlendA64Mask1DTestHBD::kMaxMaskSize, w, h, 0, 0, bd);
+}
+
+static void highbd_blend_a64_vmask_ref(
+    uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
+    uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
+    const uint8_t *mask, int w, int h, int bd) {
+  uint8_t mask2d[BlendA64Mask1DTestHBD::kMaxMaskSize]
+                [BlendA64Mask1DTestHBD::kMaxMaskSize];
+
+  for (int row = 0; row < h; ++row)
+    for (int col = 0; col < w; ++col) mask2d[row][col] = mask[row];
+
+  aom_highbd_blend_a64_mask_c(
+      dst, dst_stride, src0, src0_stride, src1, src1_stride, &mask2d[0][0],
+      BlendA64Mask1DTestHBD::kMaxMaskSize, w, h, 0, 0, bd);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    C, BlendA64Mask1DTestHBD,
+    ::testing::Values(TestFuncsHBD(highbd_blend_a64_hmask_ref,
+                                   aom_highbd_blend_a64_hmask_c),
+                      TestFuncsHBD(highbd_blend_a64_vmask_ref,
+                                   aom_highbd_blend_a64_vmask_c)));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, BlendA64Mask1DTestHBD,
+    ::testing::Values(TestFuncsHBD(highbd_blend_a64_hmask_ref,
+                                   aom_highbd_blend_a64_hmask_sse4_1),
+                      TestFuncsHBD(highbd_blend_a64_vmask_ref,
+                                   aom_highbd_blend_a64_vmask_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, BlendA64Mask1DTestHBD,
+    ::testing::Values(TestFuncsHBD(highbd_blend_a64_hmask_ref,
+                                   aom_highbd_blend_a64_hmask_neon),
+                      TestFuncsHBD(highbd_blend_a64_vmask_ref,
+                                   aom_highbd_blend_a64_vmask_neon)));
+#endif  // HAVE_NEON
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/blend_a64_mask_test.cc b/third_party/aom/test/blend_a64_mask_test.cc
new file mode 100644
index 0000000000..fafc7f0329
--- /dev/null
+++ b/third_party/aom/test/blend_a64_mask_test.cc
@@ -0,0 +1,649 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/register_state_check.h"
+#include "test/function_equivalence_test.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_integer.h"
+
+#include "av1/common/enums.h"
+
+#include "aom_dsp/blend.h"
+
+using libaom_test::FunctionEquivalenceTest;
+
+namespace {
+
+template <typename BlendA64Func, typename SrcPixel, typename DstPixel>
+class BlendA64MaskTest : public FunctionEquivalenceTest<BlendA64Func> {
+ protected:
+  static const int kIterations = 10000;
+  static const int kMaxWidth = MAX_SB_SIZE * 5;  // * 5 to cover longer strides
+  static const int kMaxHeight = MAX_SB_SIZE;
+  static const int kBufSize = kMaxWidth * kMaxHeight;
+  static const int kMaxMaskWidth = 2 * MAX_SB_SIZE;
+  static const int kMaxMaskSize = kMaxMaskWidth * kMaxMaskWidth;
+
+  ~BlendA64MaskTest() override = default;
+
+  virtual void Execute(const SrcPixel *p_src0, const SrcPixel *p_src1,
+                       int run_times) = 0;
+
+  template <typename Pixel>
+  void GetSources(Pixel **src0, Pixel **src1, Pixel * /*dst*/, int run_times) {
+    if (run_times > 1) {
+      *src0 = src0_;
+      *src1 = src1_;
+      return;
+    }
+    switch (this->rng_(3)) {
+      case 0:  // Separate sources
+        *src0 = src0_;
+        *src1 = src1_;
+        break;
+      case 1:  // src0 == dst
+        *src0 = dst_tst_;
+        src0_stride_ = dst_stride_;
+        src0_offset_ = dst_offset_;
+        *src1 = src1_;
+        break;
+      case 2:  // src1 == dst
+        *src0 = src0_;
+        *src1 = dst_tst_;
+        src1_stride_ = dst_stride_;
+        src1_offset_ = dst_offset_;
+        break;
+      default: FAIL();
+    }
+  }
+
+  void GetSources(uint16_t **src0, uint16_t **src1, uint8_t * /*dst*/,
+                  int /*run_times*/) {
+    *src0 = src0_;
+    *src1 = src1_;
+  }
+
+  uint8_t Rand1() { return this->rng_.Rand8() & 1; }
+
+  void RunOneTest(int block_size, int subx, int suby, int run_times) {
+    w_ = block_size_wide[block_size];
+    h_ = block_size_high[block_size];
+    run_times = run_times > 1 ? run_times / w_ : 1;
+    ASSERT_GT(run_times, 0);
+    subx_ = subx;
+    suby_ = suby;
+
+    dst_offset_ = this->rng_(33);
+    dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
+
+    src0_offset_ = this->rng_(33);
+    src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
+
+    src1_offset_ = this->rng_(33);
+    src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
+
+    mask_stride_ =
+        this->rng_(kMaxWidth + 1 - w_ * (subx_ ? 2 : 1)) + w_ * (subx_ ? 2 : 1);
+
+    SrcPixel *p_src0;
+    SrcPixel *p_src1;
+
+    p_src0 = src0_;
+    p_src1 = src1_;
+
+    GetSources(&p_src0, &p_src1, &dst_ref_[0], run_times);
+
+    Execute(p_src0, p_src1, run_times);
+
+    for (int r = 0; r < h_; ++r) {
+      for (int c = 0; c < w_; ++c) {
+        ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c],
+                  dst_tst_[dst_offset_ + r * dst_stride_ + c])
+            << w_ << "x" << h_ << " subx " << subx_ << " suby " << suby_
+            << " r: " << r << " c: " << c;
+      }
+    }
+  }
+
+  void RunTest(int block_size, int run_times) {
+    for (subx_ = 0; subx_ <= 1; subx_++) {
+      for (suby_ = 0; suby_ <= 1; suby_++) {
+        RunOneTest(block_size, subx_, suby_, run_times);
+      }
+    }
+  }
+
+  DstPixel dst_ref_[kBufSize];
+  DstPixel dst_tst_[kBufSize];
+  uint32_t dst_stride_;
+  uint32_t dst_offset_;
+
+  SrcPixel src0_[kBufSize];
+  uint32_t src0_stride_;
+  uint32_t src0_offset_;
+
+  SrcPixel src1_[kBufSize];
+  uint32_t src1_stride_;
+  uint32_t src1_offset_;
+
+  uint8_t mask_[kMaxMaskSize];
+  size_t mask_stride_;
+
+  int w_;
+  int h_;
+
+  int suby_;
+  int subx_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// 8 bit version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
+                    uint32_t src0_stride, const uint8_t *src1,
+                    uint32_t src1_stride, const uint8_t *mask,
+                    uint32_t mask_stride, int w, int h, int subx, int suby);
+typedef libaom_test::FuncParam<F8B> TestFuncs;
+
+class BlendA64MaskTest8B : public BlendA64MaskTest<F8B, uint8_t, uint8_t> {
+ protected:
+  void Execute(const uint8_t *p_src0, const uint8_t *p_src1,
+               int run_times) override {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      params_.ref_func(dst_ref_ + dst_offset_, dst_stride_,
+                       p_src0 + src0_offset_, src0_stride_,
+                       p_src1 + src1_offset_, src1_stride_, mask_,
+                       kMaxMaskWidth, w_, h_, subx_, suby_);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      params_.tst_func(dst_tst_ + dst_offset_, dst_stride_,
+                       p_src0 + src0_offset_, src0_stride_,
+                       p_src1 + src1_offset_, src1_stride_, mask_,
+                       kMaxMaskWidth, w_, h_, subx_, suby_);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    if (run_times > 1) {
+      printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
+             time1, time2);
+      printf("(%3.2f)\n", time1 / time2);
+    }
+  }
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B);
+
+TEST_P(BlendA64MaskTest8B, RandomValues) {
+  for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) {
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_.Rand8();
+      dst_tst_[i] = rng_.Rand8();
+
+      src0_[i] = rng_.Rand8();
+      src1_[i] = rng_.Rand8();
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+    RunTest(bsize, 1);
+  }
+}
+
+TEST_P(BlendA64MaskTest8B, ExtremeValues) {
+  for (int i = 0; i < kBufSize; ++i) {
+    dst_ref_[i] = rng_(2) + 254;
+    dst_tst_[i] = rng_(2) + 254;
+    src0_[i] = rng_(2) + 254;
+    src1_[i] = rng_(2) + 254;
+  }
+
+  for (int i = 0; i < kMaxMaskSize; ++i)
+    mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
+
+  for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize)
+    RunTest(bsize, 1);
+}
+
+TEST_P(BlendA64MaskTest8B, DISABLED_Speed) {
+  const int kRunTimes = 10000000;
+  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_.Rand8();
+      dst_tst_[i] = rng_.Rand8();
+
+      src0_[i] = rng_.Rand8();
+      src1_[i] = rng_.Rand8();
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+    RunTest(bsize, kRunTimes);
+  }
+}
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE4_1, BlendA64MaskTest8B,
+                         ::testing::Values(TestFuncs(
+                             aom_blend_a64_mask_c, aom_blend_a64_mask_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, BlendA64MaskTest8B,
+                         ::testing::Values(TestFuncs(aom_blend_a64_mask_sse4_1,
+                                                     aom_blend_a64_mask_avx2)));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, BlendA64MaskTest8B,
+                         ::testing::Values(TestFuncs(aom_blend_a64_mask_c,
+                                                     aom_blend_a64_mask_neon)));
+#endif  // HAVE_NEON
+
+//////////////////////////////////////////////////////////////////////////////
+// 8 bit _d16 version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*F8B_D16)(uint8_t *dst, uint32_t dst_stride, const uint16_t *src0,
+                        uint32_t src0_stride, const uint16_t *src1,
+                        uint32_t src1_stride, const uint8_t *mask,
+                        uint32_t mask_stride, int w, int h, int subx, int suby,
+                        ConvolveParams *conv_params);
+typedef libaom_test::FuncParam<F8B_D16> TestFuncs_d16;
+
+class BlendA64MaskTest8B_d16
+    : public BlendA64MaskTest<F8B_D16, uint16_t, uint8_t> {
+ protected:
+  // max number of bits used by the source
+  static const int kSrcMaxBitsMask = 0x3fff;
+
+  void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
+               int run_times) override {
+    ConvolveParams conv_params;
+    conv_params.round_0 = ROUND0_BITS;
+    conv_params.round_1 = COMPOUND_ROUND1_BITS;
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      params_.ref_func(dst_ref_ + dst_offset_, dst_stride_,
+                       p_src0 + src0_offset_, src0_stride_,
+                       p_src1 + src1_offset_, src1_stride_, mask_,
+                       kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      params_.tst_func(dst_tst_ + dst_offset_, dst_stride_,
+                       p_src0 + src0_offset_, src0_stride_,
+                       p_src1 + src1_offset_, src1_stride_, mask_,
+                       kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    if (run_times > 1) {
+      printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
+             time1, time2);
+      printf("(%3.2f)\n", time1 / time2);
+    }
+  }
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B_d16);
+
+TEST_P(BlendA64MaskTest8B_d16, RandomValues) {
+  for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) {
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_.Rand8();
+      dst_tst_[i] = rng_.Rand8();
+
+      src0_[i] = rng_.Rand16() & kSrcMaxBitsMask;
+      src1_[i] = rng_.Rand16() & kSrcMaxBitsMask;
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+    RunTest(bsize, 1);
+  }
+}
+
+TEST_P(BlendA64MaskTest8B_d16, ExtremeValues) {
+  for (int i = 0; i < kBufSize; ++i) {
+    dst_ref_[i] = 255;
+    dst_tst_[i] = 255;
+
+    src0_[i] = kSrcMaxBitsMask;
+    src1_[i] = kSrcMaxBitsMask;
+  }
+
+  for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA - 1;
+
+  for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize)
+    RunTest(bsize, 1);
+}
+
+TEST_P(BlendA64MaskTest8B_d16, DISABLED_Speed) {
+  const int kRunTimes = 10000000;
+  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_.Rand8();
+      dst_tst_[i] = rng_.Rand8();
+
+      src0_[i] = rng_.Rand16() & kSrcMaxBitsMask;
+      src1_[i] = rng_.Rand16() & kSrcMaxBitsMask;
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+    RunTest(bsize, kRunTimes);
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, BlendA64MaskTest8B_d16,
+    ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
+                                    aom_lowbd_blend_a64_d16_mask_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, BlendA64MaskTest8B_d16,
+    ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
+                                    aom_lowbd_blend_a64_d16_mask_avx2)));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, BlendA64MaskTest8B_d16,
+    ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
+                                    aom_lowbd_blend_a64_d16_mask_neon)));
+#endif  // HAVE_NEON
+
+//////////////////////////////////////////////////////////////////////////////
+// High bit-depth version
+//////////////////////////////////////////////////////////////////////////////
+#if CONFIG_AV1_HIGHBITDEPTH
+typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
+                     uint32_t src0_stride, const uint8_t *src1,
+                     uint32_t src1_stride, const uint8_t *mask,
+                     uint32_t mask_stride, int w, int h, int subx, int suby,
+                     int bd);
+typedef libaom_test::FuncParam<FHBD> TestFuncsHBD;
+
+class BlendA64MaskTestHBD : public BlendA64MaskTest<FHBD, uint16_t, uint16_t> {
+ protected:
+  void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
+               int run_times) override {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
+                       CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
+                       CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
+                       mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_,
+                       CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
+                       CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
+                       mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    if (run_times > 1) {
+      printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
+             time1, time2);
+      printf("(%3.2f)\n", time1 / time2);
+    }
+  }
+
+  int bit_depth_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTestHBD);
+
+TEST_P(BlendA64MaskTestHBD, RandomValues) {
+  for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
+       bit_depth_ += 2) {
+    const int hi = 1 << bit_depth_;
+
+    for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
+      for (int i = 0; i < kBufSize; ++i) {
+        dst_ref_[i] = rng_(hi);
+        dst_tst_[i] = rng_(hi);
+        src0_[i] = rng_(hi);
+        src1_[i] = rng_(hi);
+      }
+
+      for (int i = 0; i < kMaxMaskSize; ++i)
+        mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+      RunTest(bsize, 1);
+    }
+  }
+}
+
+TEST_P(BlendA64MaskTestHBD, ExtremeValues) {
+  for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
+       bit_depth_ += 2) {
+    const int hi = 1 << bit_depth_;
+    const int lo = hi - 2;
+
+    for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure();
+         ++bsize) {
+      for (int i = 0; i < kBufSize; ++i) {
+        dst_ref_[i] = rng_(hi - lo) + lo;
+        dst_tst_[i] = rng_(hi - lo) + lo;
+        src0_[i] = rng_(hi - lo) + lo;
+        src1_[i] = rng_(hi - lo) + lo;
+      }
+
+      for (int i = 0; i < kMaxMaskSize; ++i)
+        mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
+
+      RunTest(bsize, 1);
+    }
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, BlendA64MaskTestHBD,
+    ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
+                                   aom_highbd_blend_a64_mask_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, BlendA64MaskTestHBD,
+    ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
+                                   aom_highbd_blend_a64_mask_neon)));
+#endif  // HAVE_NEON
+
+//////////////////////////////////////////////////////////////////////////////
+// HBD _d16 version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*FHBD_D16)(uint8_t *dst, uint32_t dst_stride,
+                         const CONV_BUF_TYPE *src0, uint32_t src0_stride,
+                         const CONV_BUF_TYPE *src1, uint32_t src1_stride,
+                         const uint8_t *mask, uint32_t mask_stride, int w,
+                         int h, int subx, int suby, ConvolveParams *conv_params,
+                         const int bd);
+typedef libaom_test::FuncParam<FHBD_D16> TestFuncsHBD_d16;
+
+class BlendA64MaskTestHBD_d16
+    : public BlendA64MaskTest<FHBD_D16, uint16_t, uint16_t> {
+ protected:
+  // max number of bits used by the source
+  static const int kSrcMaxBitsMask = (1 << 14) - 1;
+  static const int kSrcMaxBitsMaskHBD = (1 << 16) - 1;
+
+  void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
+               int run_times) override {
+    ASSERT_GT(run_times, 0) << "Cannot run 0 iterations of the test.";
+    ConvolveParams conv_params;
+    conv_params.round_0 = (bit_depth_ == 12) ? ROUND0_BITS + 2 : ROUND0_BITS;
+    conv_params.round_1 = COMPOUND_ROUND1_BITS;
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
+                       p_src0 + src0_offset_, src0_stride_,
+                       p_src1 + src1_offset_, src1_stride_, mask_,
+                       kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params,
+                       bit_depth_);
+    }
+    if (params_.tst_func) {
+      aom_usec_timer_mark(&timer);
+      const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+      aom_usec_timer_start(&timer);
+      for (int i = 0; i < run_times; ++i) {
+        params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_),
+                         dst_stride_, p_src0 + src0_offset_, src0_stride_,
+                         p_src1 + src1_offset_, src1_stride_, mask_,
+                         kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params,
+                         bit_depth_);
+      }
+      aom_usec_timer_mark(&timer);
+      const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+      if (run_times > 1) {
+        printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
+               time1, time2);
+        printf("(%3.2f)\n", time1 / time2);
+      }
+    }
+  }
+
+  int bit_depth_;
+  int src_max_bits_mask_;
+};
+
+TEST_P(BlendA64MaskTestHBD_d16, RandomValues) {
+  if (params_.tst_func == nullptr) return;
+  for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
+       bit_depth_ += 2) {
+    src_max_bits_mask_ =
+        (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
+
+    for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure();
+         ++bsize) {
+      for (int i = 0; i < kBufSize; ++i) {
+        dst_ref_[i] = rng_.Rand8();
+        dst_tst_[i] = rng_.Rand8();
+
+        src0_[i] = rng_.Rand16() & src_max_bits_mask_;
+        src1_[i] = rng_.Rand16() & src_max_bits_mask_;
+      }
+
+      for (int i = 0; i < kMaxMaskSize; ++i)
+        mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+      RunTest(bsize, 1);
+    }
+  }
+}
+
+TEST_P(BlendA64MaskTestHBD_d16, ExtremeValues) {
+  for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
+    src_max_bits_mask_ =
+        (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
+
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = 0;
+      dst_tst_[i] = (1 << bit_depth_) - 1;
+
+      src0_[i] = src_max_bits_mask_;
+      src1_[i] = src_max_bits_mask_;
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA;
+    for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
+      RunTest(bsize, 1);
+    }
+  }
+}
+
+TEST_P(BlendA64MaskTestHBD_d16, DISABLED_Speed) {
+  const int kRunTimes = 10000000;
+  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
+    for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
+      for (int i = 0; i < kBufSize; ++i) {
+        dst_ref_[i] = rng_.Rand12() % (1 << bit_depth_);
+        dst_tst_[i] = rng_.Rand12() % (1 << bit_depth_);
+
+        src0_[i] = rng_.Rand16();
+        src1_[i] = rng_.Rand16();
+      }
+
+      for (int i = 0; i < kMaxMaskSize; ++i)
+        mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+      RunTest(bsize, kRunTimes);
+    }
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    C, BlendA64MaskTestHBD_d16,
+    ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
+                                       aom_highbd_blend_a64_d16_mask_c)));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, BlendA64MaskTestHBD_d16,
+    ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
+                                       aom_highbd_blend_a64_d16_mask_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, BlendA64MaskTestHBD_d16,
+    ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
+                                       aom_highbd_blend_a64_d16_mask_avx2)));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, BlendA64MaskTestHBD_d16,
+    ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
+                                       aom_highbd_blend_a64_d16_mask_neon)));
+#endif  // HAVE_NEON
+
+// TODO(slavarnway): Enable the following in the avx2 commit. (56501)
+#if 0
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, BlendA64MaskTestHBD,
+    ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
+                                   aom_highbd_blend_a64_mask_avx2)));
+#endif  // HAVE_AVX2
+#endif
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/block_test.cc b/third_party/aom/test/block_test.cc
new file mode 100644
index 0000000000..686180cf87
--- /dev/null
+++ b/third_party/aom/test/block_test.cc
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "aom/aom_codec.h"
+#include "av1/common/blockd.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/y4m_video_source.h"
+#include "test/util.h"
+
+// Verify the optimized implementation of get_partition_subsize() produces the
+// same results as the Partition_Subsize lookup table in the spec.
+TEST(BlockdTest, GetPartitionSubsize) {
+  // The Partition_Subsize table in the spec (Section 9.3. Conversion tables).
+  /* clang-format off */
+  static const BLOCK_SIZE kPartitionSubsize[10][BLOCK_SIZES_ALL] = {
+    {
+                                    BLOCK_4X4,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X64,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X128,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
+    }, {
+                                    BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
+    }, {
+                                    BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
+    }, {
+                                    BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X64,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
+    }, {
+                                    BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
+    }, {
+                                    BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
+    }, {
+                                    BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
+    }, {
+                                    BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
+    }, {
+                                    BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X4,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X8,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X16,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
+    }, {
+                                    BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X16,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X32,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X64,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+      BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
+    }
+  };
+  /* clang-format on */
+
+  for (int partition = 0; partition < 10; partition++) {
+    for (int bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
+      EXPECT_EQ(kPartitionSubsize[partition][bsize],
+                get_partition_subsize(static_cast<BLOCK_SIZE>(bsize),
+                                      static_cast<PARTITION_TYPE>(partition)));
+    }
+  }
+}
+
+#if CONFIG_AV1_DECODER && CONFIG_AV1_ENCODER
+namespace {
+// This class is used to validate if sb_size configured is respected
+// in the bitstream
+class SuperBlockSizeTestLarge
+    : public ::libaom_test::CodecTestWith3Params<
+          libaom_test::TestMode, aom_superblock_size_t, aom_rc_mode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  SuperBlockSizeTestLarge()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        superblock_size_(GET_PARAM(2)), rc_end_usage_(GET_PARAM(3)) {
+    sb_size_violated_ = false;
+  }
+  ~SuperBlockSizeTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = rc_end_usage_;
+    cfg_.g_threads = 1;
+    cfg_.g_lag_in_frames = 35;
+    cfg_.rc_target_bitrate = 1000;
+  }
+
+  bool DoDecode() const override { return true; }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 5);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AV1E_SET_SUPERBLOCK_SIZE, superblock_size_);
+    }
+  }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    if (AOM_CODEC_OK == res_dec &&
+        superblock_size_ != AOM_SUPERBLOCK_SIZE_DYNAMIC) {
+      aom_codec_ctx_t *ctx_dec = decoder->GetDecoder();
+      aom_superblock_size_t sb_size;
+      AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_SB_SIZE, &sb_size);
+      if (superblock_size_ != sb_size) {
+        sb_size_violated_ = true;
+      }
+    }
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  ::libaom_test::TestMode encoding_mode_;
+  aom_superblock_size_t superblock_size_;
+  bool sb_size_violated_;
+  aom_rc_mode rc_end_usage_;
+};
+
+TEST_P(SuperBlockSizeTestLarge, SuperBlockSizeTest) {
+  ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 1);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(sb_size_violated_, false)
+      << "Failed for SB size " << superblock_size_;
+}
+
+const ::libaom_test::TestMode kTestModes[] = {
+#if CONFIG_REALTIME_ONLY
+  ::libaom_test::kRealTime
+#else
+  ::libaom_test::kRealTime, ::libaom_test::kOnePassGood,
+  ::libaom_test::kTwoPassGood
+#endif
+};
+
+AV1_INSTANTIATE_TEST_SUITE(SuperBlockSizeTestLarge,
+                           ::testing::ValuesIn(kTestModes),
+                           ::testing::Values(AOM_SUPERBLOCK_SIZE_64X64,
+                                             AOM_SUPERBLOCK_SIZE_128X128),
+                           ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ));
+}  // namespace
+#endif
diff --git a/third_party/aom/test/boolcoder_test.cc b/third_party/aom/test/boolcoder_test.cc
new file mode 100644
index 0000000000..52c58e0b2e
--- /dev/null
+++ b/third_party/aom/test/boolcoder_test.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/bitreader.h"
+#include "aom_dsp/bitwriter.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+const int num_tests = 10;
+}  // namespace
+
+TEST(AV1, TestBitIO) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  for (int n = 0; n < num_tests; ++n) {
+    for (int method = 0; method <= 7; ++method) {  // we generate various proba
+      const int kBitsToTest = 1000;
+      uint8_t probas[kBitsToTest];
+
+      for (int i = 0; i < kBitsToTest; ++i) {
+        const int parity = i & 1;
+        /* clang-format off */
+        probas[i] =
+          (method == 0) ? 0 : (method == 1) ? 255 :
+          (method == 2) ? 128 :
+          (method == 3) ? rnd.Rand8() :
+          (method == 4) ? (parity ? 0 : 255) :
+            // alternate between low and high proba:
+            (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) :
+            (method == 6) ?
+            (parity ? rnd(64) : 255 - rnd(64)) :
+            (parity ? rnd(32) : 255 - rnd(32));
+        /* clang-format on */
+      }
+      for (int bit_method = 0; bit_method <= 3; ++bit_method) {
+        const int random_seed = 6432;
+        const int kBufferSize = 10000;
+        ACMRandom bit_rnd(random_seed);
+        aom_writer bw;
+        uint8_t bw_buffer[kBufferSize];
+        aom_start_encode(&bw, bw_buffer);
+
+        int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
+        for (int i = 0; i < kBitsToTest; ++i) {
+          if (bit_method == 2) {
+            bit = (i & 1);
+          } else if (bit_method == 3) {
+            bit = bit_rnd(2);
+          }
+          aom_write(&bw, bit, static_cast<int>(probas[i]));
+        }
+
+        GTEST_ASSERT_GE(aom_stop_encode(&bw), 0);
+
+        aom_reader br;
+        aom_reader_init(&br, bw_buffer, bw.pos);
+        bit_rnd.Reset(random_seed);
+        for (int i = 0; i < kBitsToTest; ++i) {
+          if (bit_method == 2) {
+            bit = (i & 1);
+          } else if (bit_method == 3) {
+            bit = bit_rnd(2);
+          }
+          GTEST_ASSERT_EQ(aom_read(&br, probas[i], nullptr), bit)
+              << "pos: " << i << " / " << kBitsToTest
+              << " bit_method: " << bit_method << " method: " << method;
+        }
+      }
+    }
+  }
+}
+
+#define FRAC_DIFF_TOTAL_ERROR 0.18
+
+TEST(AV1, TestTell) {
+  const int kBufferSize = 10000;
+  aom_writer bw;
+  uint8_t bw_buffer[kBufferSize];
+  const int kSymbols = 1024;
+  // Coders are noisier at low probabilities, so we start at p = 4.
+  for (int p = 4; p < 256; p++) {
+    double probability = p / 256.;
+    aom_start_encode(&bw, bw_buffer);
+    for (int i = 0; i < kSymbols; i++) {
+      aom_write(&bw, 0, p);
+    }
+    GTEST_ASSERT_GE(aom_stop_encode(&bw), 0);
+    aom_reader br;
+    aom_reader_init(&br, bw_buffer, bw.pos);
+    uint32_t last_tell = aom_reader_tell(&br);
+    uint32_t last_tell_frac = aom_reader_tell_frac(&br);
+    double frac_diff_total = 0;
+    GTEST_ASSERT_GE(aom_reader_tell(&br), 0u);
+    GTEST_ASSERT_LE(aom_reader_tell(&br), 1u);
+    ASSERT_FALSE(aom_reader_has_overflowed(&br));
+    for (int i = 0; i < kSymbols; i++) {
+      aom_read(&br, p, nullptr);
+      uint32_t tell = aom_reader_tell(&br);
+      uint32_t tell_frac = aom_reader_tell_frac(&br);
+      GTEST_ASSERT_GE(tell, last_tell)
+          << "tell: " << tell << ", last_tell: " << last_tell;
+      GTEST_ASSERT_GE(tell_frac, last_tell_frac)
+          << "tell_frac: " << tell_frac
+          << ", last_tell_frac: " << last_tell_frac;
+      // Frac tell should round up to tell.
+      GTEST_ASSERT_EQ(tell, (tell_frac + 7) >> 3);
+      last_tell = tell;
+      frac_diff_total +=
+          fabs(((tell_frac - last_tell_frac) / 8.0) + log2(probability));
+      last_tell_frac = tell_frac;
+    }
+    const uint32_t expected = (uint32_t)(-kSymbols * log2(probability));
+    // Last tell should be close to the expected value.
+    GTEST_ASSERT_LE(last_tell, expected + 20) << " last_tell: " << last_tell;
+    // The average frac_diff error should be pretty small.
+    GTEST_ASSERT_LE(frac_diff_total / kSymbols, FRAC_DIFF_TOTAL_ERROR)
+        << " frac_diff_total: " << frac_diff_total;
+    ASSERT_FALSE(aom_reader_has_overflowed(&br));
+  }
+}
+
+TEST(AV1, TestHasOverflowed) {
+  const int kBufferSize = 10000;
+  aom_writer bw;
+  uint8_t bw_buffer[kBufferSize];
+  const int kSymbols = 1024;
+  // Coders are noisier at low probabilities, so we start at p = 4.
+  for (int p = 4; p < 256; p++) {
+    aom_start_encode(&bw, bw_buffer);
+    for (int i = 0; i < kSymbols; i++) {
+      aom_write(&bw, 1, p);
+    }
+    GTEST_ASSERT_GE(aom_stop_encode(&bw), 0);
+    aom_reader br;
+    aom_reader_init(&br, bw_buffer, bw.pos);
+    ASSERT_FALSE(aom_reader_has_overflowed(&br));
+    for (int i = 0; i < kSymbols; i++) {
+      GTEST_ASSERT_EQ(aom_read(&br, p, nullptr), 1);
+      ASSERT_FALSE(aom_reader_has_overflowed(&br));
+    }
+    // In the worst case, the encoder uses just a tiny fraction of the last
+    // byte in the buffer. So to guarantee that aom_reader_has_overflowed()
+    // returns true, we have to consume very nearly 8 additional bits of data.
+    // In the worse case, one of the bits in that byte will be 1, and the rest
+    // will be zero. Once we are past that 1 bit, when the probability of
+    // reading zero symbol from aom_read() is high, each additional symbol read
+    // will consume very little additional data (in the case that p == 255,
+    // approximately -log_2(255/256) ~= 0.0056 bits). In that case it would
+    // take around 178 calls to consume more than 8 bits. That is only an upper
+    // bound. In practice we are not guaranteed to hit the worse case and can
+    // get away with 174 calls.
+    for (int i = 0; i < 174; i++) {
+      aom_read(&br, p, nullptr);
+    }
+    ASSERT_TRUE(aom_reader_has_overflowed(&br));
+  }
+}
diff --git a/third_party/aom/test/borders_test.cc b/third_party/aom/test/borders_test.cc
new file mode 100644
index 0000000000..594c3e8429
--- /dev/null
+++ b/third_party/aom/test/borders_test.cc
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <climits>
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+class BordersTestLarge
+    : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  BordersTestLarge() : EncoderTest(GET_PARAM(0)) {}
+  ~BordersTestLarge() override = default;
+
+  void SetUp() override { InitializeConfig(GET_PARAM(1)); }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 1);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+    }
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    if (pkt->data.frame.flags & AOM_FRAME_IS_KEY) {
+    }
+  }
+};
+
+TEST_P(BordersTestLarge, TestEncodeHighBitrate) {
+  // Validate that this non multiple of 64 wide clip encodes and decodes
+  // without a mismatch when passing in a very low max q.  This pushes
+  // the encoder to producing lots of big partitions which will likely
+  // extend into the border and test the border condition.
+  cfg_.g_lag_in_frames = 25;
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_target_bitrate = 2000;
+  cfg_.rc_max_quantizer = 10;
+
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       10);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+TEST_P(BordersTestLarge, TestLowBitrate) {
+  // Validate that this clip encodes and decodes without a mismatch
+  // when passing in a very high min q.  This pushes the encoder to producing
+  // lots of small partitions which might will test the other condition.
+
+  cfg_.g_lag_in_frames = 25;
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_target_bitrate = 200;
+  cfg_.rc_min_quantizer = 40;
+
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       10);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+AV1_INSTANTIATE_TEST_SUITE(BordersTestLarge,
+                           ::testing::Values(::libaom_test::kTwoPassGood));
+}  // namespace
diff --git a/third_party/aom/test/cdef_test.cc b/third_party/aom/test/cdef_test.cc
new file mode 100644
index 0000000000..ad54407ca7
--- /dev/null
+++ b/third_party/aom/test/cdef_test.cc
@@ -0,0 +1,962 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <array>
+#include <cstdlib>
+#include <iostream>
+#include <string>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "av1/common/cdef_block.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+using CdefFilterBlockFunctions = std::array<cdef_filter_block_func, 4>;
+
+typedef std::tuple<CdefFilterBlockFunctions, CdefFilterBlockFunctions,
+                   BLOCK_SIZE, int, int>
+    cdef_dir_param_t;
+
+class CDEFBlockTest : public ::testing::TestWithParam<cdef_dir_param_t> {
+ public:
+  ~CDEFBlockTest() override = default;
+  void SetUp() override {
+    cdef = GET_PARAM(0);
+    ref_cdef = GET_PARAM(1);
+    bsize = GET_PARAM(2);
+    boundary = GET_PARAM(3);
+    depth = GET_PARAM(4);
+  }
+
+ protected:
+  BLOCK_SIZE bsize;
+  int boundary;
+  int depth;
+  CdefFilterBlockFunctions cdef;
+  CdefFilterBlockFunctions ref_cdef;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockTest);
+
+typedef CDEFBlockTest CDEFBlockHighbdTest;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockHighbdTest);
+
+typedef CDEFBlockTest CDEFSpeedTest;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFSpeedTest);
+
+typedef CDEFBlockTest CDEFSpeedHighbdTest;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFSpeedHighbdTest);
+
+int64_t test_cdef(BLOCK_SIZE bsize, int iterations,
+                  CdefFilterBlockFunctions cdef,
+                  CdefFilterBlockFunctions ref_cdef, int boundary, int depth) {
+  aom_usec_timer ref_timer;
+  int64_t ref_elapsed_time = 0;
+  const int size = 8;
+  const int ysize = size + 2 * CDEF_VBORDER;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]);
+  DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
+  DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
+  memset(ref_d, 0, sizeof(ref_d));
+  memset(d, 0, sizeof(d));
+
+  int error = 0, pristrength = 0, secstrength, dir;
+  int pridamping, secdamping, bits, level, count,
+      errdepth = 0, errpristrength = 0, errsecstrength = 0, errboundary = 0,
+      errpridamping = 0, errsecdamping = 0;
+  unsigned int pos = 0;
+
+  const int block_width =
+      ((bsize == BLOCK_8X8) || (bsize == BLOCK_8X4)) ? 8 : 4;
+  const int block_height =
+      ((bsize == BLOCK_8X8) || (bsize == BLOCK_4X8)) ? 8 : 4;
+  const unsigned int max_pos = size * size >> static_cast<int>(depth == 8);
+  for (pridamping = 3 + depth - 8; pridamping < 7 - 3 * !!boundary + depth - 8;
+       pridamping++) {
+    for (secdamping = 3 + depth - 8;
+         secdamping < 7 - 3 * !!boundary + depth - 8; secdamping++) {
+      for (count = 0; count < iterations; count++) {
+        for (level = 0; level < (1 << depth) && !error;
+             level += (2 + 6 * !!boundary) << (depth - 8)) {
+          for (bits = 1; bits <= depth && !error; bits += 1 + 3 * !!boundary) {
+            for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
+              s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
+                           (1 << depth) - 1);
+            if (boundary) {
+              if (boundary & 1) {  // Left
+                for (int i = 0; i < ysize; i++)
+                  for (int j = 0; j < CDEF_HBORDER; j++)
+                    s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
+              }
+              if (boundary & 2) {  // Right
+                for (int i = 0; i < ysize; i++)
+                  for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
+                    s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
+              }
+              if (boundary & 4) {  // Above
+                for (int i = 0; i < CDEF_VBORDER; i++)
+                  for (int j = 0; j < CDEF_BSTRIDE; j++)
+                    s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
+              }
+              if (boundary & 8) {  // Below
+                for (int i = CDEF_VBORDER + size; i < ysize; i++)
+                  for (int j = 0; j < CDEF_BSTRIDE; j++)
+                    s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
+              }
+            }
+            for (dir = 0; dir < 8; dir++) {
+              for (pristrength = 0; pristrength <= 19 << (depth - 8) && !error;
+                   pristrength += (1 + 4 * !!boundary) << (depth - 8)) {
+                if (pristrength == 16) pristrength = 19;
+                for (secstrength = 0; secstrength <= 4 << (depth - 8) && !error;
+                     secstrength += 1 << (depth - 8)) {
+                  if (secstrength == 3 << (depth - 8)) continue;
+
+                  const int strength_index =
+                      (secstrength == 0) | ((pristrength == 0) << 1);
+
+                  aom_usec_timer_start(&ref_timer);
+                  ref_cdef[strength_index](
+                      ref_d, size,
+                      s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
+                      pristrength, secstrength, dir, pridamping, secdamping,
+                      depth - 8, block_width, block_height);
+                  aom_usec_timer_mark(&ref_timer);
+                  ref_elapsed_time += aom_usec_timer_elapsed(&ref_timer);
+                  // If cdef and ref_cdef are the same, we're just testing
+                  // speed
+                  if (cdef[0] != ref_cdef[0])
+                    API_REGISTER_STATE_CHECK(cdef[strength_index](
+                        d, size, s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
+                        pristrength, secstrength, dir, pridamping, secdamping,
+                        depth - 8, block_width, block_height));
+                  if (ref_cdef[0] != cdef[0]) {
+                    for (pos = 0; pos < max_pos && !error; pos++) {
+                      error = ref_d[pos] != d[pos];
+                      errdepth = depth;
+                      errpristrength = pristrength;
+                      errsecstrength = secstrength;
+                      errboundary = boundary;
+                      errpridamping = pridamping;
+                      errsecdamping = secdamping;
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  pos--;
+  EXPECT_EQ(0, error) << "Error: CDEFBlockTest, SIMD and C mismatch."
+                      << std::endl
+                      << "First error at " << pos % size << "," << pos / size
+                      << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
+                      << ") " << std::endl
+                      << "pristrength: " << errpristrength << std::endl
+                      << "pridamping: " << errpridamping << std::endl
+                      << "secstrength: " << errsecstrength << std::endl
+                      << "secdamping: " << errsecdamping << std::endl
+                      << "depth: " << errdepth << std::endl
+                      << "size: " << bsize << std::endl
+                      << "boundary: " << errboundary << std::endl
+                      << std::endl;
+
+  return ref_elapsed_time;
+}
+
+void test_cdef_speed(BLOCK_SIZE bsize, int iterations,
+                     CdefFilterBlockFunctions cdef,
+                     CdefFilterBlockFunctions ref_cdef, int boundary,
+                     int depth) {
+  int64_t ref_elapsed_time =
+      test_cdef(bsize, iterations, ref_cdef, ref_cdef, boundary, depth);
+
+  int64_t elapsed_time =
+      test_cdef(bsize, iterations, cdef, cdef, boundary, depth);
+
+  std::cout << "C time: " << ref_elapsed_time << " us" << std::endl
+            << "SIMD time: " << elapsed_time << " us" << std::endl;
+
+  EXPECT_GT(ref_elapsed_time, elapsed_time)
+      << "Error: CDEFSpeedTest, SIMD slower than C." << std::endl
+      << "C time: " << ref_elapsed_time << " us" << std::endl
+      << "SIMD time: " << elapsed_time << " us" << std::endl;
+}
+
+typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var,
+                          int coeff_shift);
+
+typedef std::tuple<find_dir_t, find_dir_t> find_dir_param_t;
+
+class CDEFFindDirTest : public ::testing::TestWithParam<find_dir_param_t> {
+ public:
+  ~CDEFFindDirTest() override = default;
+  void SetUp() override {
+    finddir = GET_PARAM(0);
+    ref_finddir = GET_PARAM(1);
+  }
+
+ protected:
+  find_dir_t finddir;
+  find_dir_t ref_finddir;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirTest);
+
+typedef CDEFFindDirTest CDEFFindDirSpeedTest;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirSpeedTest);
+
+void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var,
+                                 int coeff_shift),
+                  int (*ref_finddir)(const uint16_t *img, int stride,
+                                     int32_t *var, int coeff_shift)) {
+  const int size = 8;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, s[size * size]);
+
+  int error = 0;
+  int depth, bits, level, count, errdepth = 0;
+  int ref_res = 0, res = 0;
+  int32_t ref_var = 0, var = 0;
+
+  for (depth = 8; depth <= 12 && !error; depth += 2) {
+    for (count = 0; count < 512 && !error; count++) {
+      for (level = 0; level < (1 << depth) && !error;
+           level += 1 << (depth - 8)) {
+        for (bits = 1; bits <= depth && !error; bits++) {
+          for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
+            s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
+                         (1 << depth) - 1);
+          for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
+            ref_res = ref_finddir(s, size, &ref_var, depth - 8);
+          if (finddir != ref_finddir)
+            API_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8));
+          if (ref_finddir != finddir) {
+            if (res != ref_res || var != ref_var) error = 1;
+            errdepth = depth;
+          }
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, error) << "Error: CDEFFindDirTest, SIMD and C mismatch."
+                      << std::endl
+                      << "return: " << res << " : " << ref_res << std::endl
+                      << "var: " << var << " : " << ref_var << std::endl
+                      << "depth: " << errdepth << std::endl
+                      << std::endl;
+}
+
+void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
+                                       int32_t *var, int coeff_shift),
+                        int (*ref_finddir)(const uint16_t *img, int stride,
+                                           int32_t *var, int coeff_shift)) {
+  aom_usec_timer ref_timer;
+  aom_usec_timer timer;
+
+  aom_usec_timer_start(&ref_timer);
+  test_finddir(ref_finddir, ref_finddir);
+  aom_usec_timer_mark(&ref_timer);
+  int64_t ref_elapsed_time = aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer_start(&timer);
+  test_finddir(finddir, finddir);
+  aom_usec_timer_mark(&timer);
+  int64_t elapsed_time = aom_usec_timer_elapsed(&timer);
+
+  EXPECT_GT(ref_elapsed_time, elapsed_time)
+      << "Error: CDEFFindDirSpeedTest, SIMD slower than C." << std::endl
+      << "C time: " << ref_elapsed_time << " us" << std::endl
+      << "SIMD time: " << elapsed_time << " us" << std::endl;
+}
+
+typedef void (*find_dir_dual_t)(const uint16_t *img1, const uint16_t *img2,
+                                int stride, int32_t *var1, int32_t *var2,
+                                int coeff_shift, int *out1, int *out2);
+
+typedef std::tuple<find_dir_dual_t, find_dir_dual_t> find_dir_dual_param_t;
+
+class CDEFFindDirDualTest
+    : public ::testing::TestWithParam<find_dir_dual_param_t> {
+ public:
+  ~CDEFFindDirDualTest() override = default;
+  void SetUp() override {
+    finddir = GET_PARAM(0);
+    ref_finddir = GET_PARAM(1);
+  }
+
+ protected:
+  find_dir_dual_t finddir;
+  find_dir_dual_t ref_finddir;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirDualTest);
+
+typedef CDEFFindDirDualTest CDEFFindDirDualSpeedTest;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirDualSpeedTest);
+
+void test_finddir_dual(
+    void (*finddir)(const uint16_t *img1, const uint16_t *img2, int stride,
+                    int32_t *var1, int32_t *var2, int coeff_shift, int *out1,
+                    int *out2),
+    void (*ref_finddir)(const uint16_t *img1, const uint16_t *img2, int stride,
+                        int32_t *var1, int32_t *var2, int coeff_shift,
+                        int *out1, int *out2)) {
+  const int size_wd = 16;
+  const int size_ht = 8;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, s[size_ht * size_wd]);
+
+  int error = 0, errdepth = 0;
+  int32_t ref_var[2] = { 0 };
+  int ref_dir[2] = { 0 };
+  int32_t var[2] = { 0 };
+  int dir[2] = { 0 };
+
+  for (int depth = 8; depth <= 12 && !error; depth += 2) {
+    for (int count = 0; count < 512 && !error; count++) {
+      for (int level = 0; level < (1 << depth) && !error;
+           level += 1 << (depth - 8)) {
+        for (int bits = 1; bits <= depth && !error; bits++) {
+          for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
+            s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
+                         (1 << depth) - 1);
+          for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
+            ref_finddir(s, s + 8, size_wd, &ref_var[0], &ref_var[1], depth - 8,
+                        &ref_dir[0], &ref_dir[1]);
+          if (finddir != ref_finddir)
+            API_REGISTER_STATE_CHECK(finddir(s, s + 8, size_wd, &var[0],
+                                             &var[1], depth - 8, &dir[0],
+                                             &dir[1]));
+          if (ref_finddir != finddir) {
+            for (int j = 0; j < 2; j++) {
+              if (ref_dir[j] != dir[j] || ref_var[j] != var[j]) error = 1;
+            }
+            errdepth = depth;
+          }
+        }
+      }
+    }
+  }
+
+  for (int j = 0; j < 2; j++) {
+    EXPECT_EQ(0, error) << "Error: CDEFFindDirTest, SIMD and C mismatch."
+                        << std::endl
+                        << "direction: " << dir[j] << " : " << ref_dir[j]
+                        << std::endl
+                        << "variance: " << var[j] << " : " << ref_var[j]
+                        << std::endl
+                        << "depth: " << errdepth << std::endl
+                        << std::endl;
+  }
+}
+
+void test_finddir_dual_speed(
+    void (*finddir)(const uint16_t *img1, const uint16_t *img2, int stride,
+                    int32_t *var1, int32_t *var2, int coeff_shift, int *out1,
+                    int *out2),
+    void (*ref_finddir)(const uint16_t *img1, const uint16_t *img2, int stride,
+                        int32_t *var1, int32_t *var2, int coeff_shift,
+                        int *out1, int *out2)) {
+  aom_usec_timer ref_timer;
+  aom_usec_timer timer;
+
+  aom_usec_timer_start(&ref_timer);
+  test_finddir_dual(ref_finddir, ref_finddir);
+  aom_usec_timer_mark(&ref_timer);
+  const double ref_elapsed_time =
+      static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
+
+  aom_usec_timer_start(&timer);
+  test_finddir_dual(finddir, finddir);
+  aom_usec_timer_mark(&timer);
+  const double elapsed_time =
+      static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+  printf(
+      "ref_time=%lf \t simd_time=%lf \t "
+      "gain=%lf \n",
+      ref_elapsed_time, elapsed_time, ref_elapsed_time / elapsed_time);
+}
+
+#define MAX_CDEF_BLOCK 256
+
+constexpr int kIterations = 100;
+
+using CDEFCopyRect8To16 = void (*)(uint16_t *dst, int dstride,
+                                   const uint8_t *src, int sstride, int width,
+                                   int height);
+
+using CDEFCopyRect8To16Param = std::tuple<CDEFCopyRect8To16, CDEFCopyRect8To16>;
+
+class CDEFCopyRect8to16Test
+    : public ::testing::TestWithParam<CDEFCopyRect8To16Param> {
+ public:
+  CDEFCopyRect8to16Test()
+      : rnd_(libaom_test::ACMRandom::DeterministicSeed()),
+        test_func_(GET_PARAM(0)), ref_func_(GET_PARAM(1)) {}
+  ~CDEFCopyRect8to16Test() override = default;
+  void SetUp() override {
+    src_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(8, sizeof(uint8_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK));
+    ASSERT_NE(src_, nullptr);
+    ref_dst_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK));
+    ASSERT_NE(ref_dst_, nullptr);
+    test_dst_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK));
+    ASSERT_NE(test_dst_, nullptr);
+  }
+
+  void TearDown() override {
+    aom_free(src_);
+    aom_free(ref_dst_);
+    aom_free(test_dst_);
+  }
+
+  void test_copy_rect_8_to_16(CDEFCopyRect8To16 test_func,
+                              CDEFCopyRect8To16 ref_func) {
+    constexpr int stride = MAX_CDEF_BLOCK;
+    int error = 0;
+    for (int k = 0; k < kIterations && !error; k++) {
+      // This function operates on values of width that are either 4 or a
+      // multiple of 8. For height, generate a random value between 1 and 256,
+      // making sure it is even.
+      const int width = k == 0 ? 4 : (rnd_.Rand8() % 32 + 1) * 8;
+      const int height = k == 0 ? 4 : (rnd_.Rand8() % 128 + 1) * 2;
+      for (int i = 0; i < height; i++) {
+        for (int j = 0; j < width; j++) {
+          src_[i * stride + j] = rnd_.Rand8();
+        }
+      }
+
+      ref_func(ref_dst_, stride, src_, stride, width, height);
+      test_func(test_dst_, stride, src_, stride, width, height);
+
+      int i, j;
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j++) {
+          if (test_dst_[i * stride + j] != ref_dst_[i * stride + j]) {
+            error = 1;
+            break;
+          }
+        }
+        if (error) {
+          break;
+        }
+      }
+      EXPECT_EQ(0, error)
+          << "Error: CDEFCopyRect8to16Test, SIMD and C mismatch." << std::endl
+          << "First error at " << i << "," << j << " ("
+          << ref_dst_[i * stride + j] << " : " << test_dst_[i * stride + j]
+          << ") " << std::endl
+          << "width: " << width << std::endl
+          << "height: " << height << std::endl
+          << std::endl;
+    }
+  }
+
+ protected:
+  libaom_test::ACMRandom rnd_;
+  uint8_t *src_;
+  uint16_t *ref_dst_;
+  uint16_t *test_dst_;
+  CDEFCopyRect8To16 test_func_;
+  CDEFCopyRect8To16 ref_func_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFCopyRect8to16Test);
+
+using CDEFCopyRect16To16 = void (*)(uint16_t *dst, int dstride,
+                                    const uint16_t *src, int sstride, int width,
+                                    int height);
+
+using CDEFCopyRect16To16Param =
+    std::tuple<CDEFCopyRect16To16, CDEFCopyRect16To16>;
+
+class CDEFCopyRect16to16Test
+    : public ::testing::TestWithParam<CDEFCopyRect16To16Param> {
+ public:
+  CDEFCopyRect16to16Test()
+      : rnd_(libaom_test::ACMRandom::DeterministicSeed()),
+        test_func_(GET_PARAM(0)), ref_func_(GET_PARAM(1)) {}
+  ~CDEFCopyRect16to16Test() override = default;
+  void SetUp() override {
+    src_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK));
+    ASSERT_NE(src_, nullptr);
+    ref_dst_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK));
+    ASSERT_NE(ref_dst_, nullptr);
+    test_dst_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK));
+    ASSERT_NE(test_dst_, nullptr);
+  }
+
+  void TearDown() override {
+    aom_free(src_);
+    aom_free(ref_dst_);
+    aom_free(test_dst_);
+  }
+
+  void test_copy_rect_16_to_16(CDEFCopyRect16To16 test_func,
+                               CDEFCopyRect16To16 ref_func) {
+    constexpr int stride = MAX_CDEF_BLOCK;
+    int error = 0;
+    for (int k = 0; k < kIterations && !error; k++) {
+      // This function operates on values of width that are either 4 or a
+      // multiple of 8. For height, generate a random value between 1 and 256,
+      // making sure it is even.
+      const int width = k == 0 ? 4 : (rnd_.Rand8() % 32 + 1) * 8;
+      const int height = k == 0 ? 4 : (rnd_.Rand8() % 128 + 1) * 2;
+      for (int i = 0; i < height; i++) {
+        for (int j = 0; j < width; j++) {
+          src_[i * stride + j] = rnd_.Rand16();
+        }
+      }
+
+      ref_func(ref_dst_, stride, src_, stride, width, height);
+      test_func(test_dst_, stride, src_, stride, width, height);
+
+      int i, j;
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j++) {
+          if (test_dst_[i * stride + j] != ref_dst_[i * stride + j]) {
+            error = 1;
+            break;
+          }
+        }
+        if (error) {
+          break;
+        }
+      }
+      EXPECT_EQ(0, error)
+          << "Error: CDEFCopyRect16to16Test, SIMD and C mismatch." << std::endl
+          << "First error at " << i << "," << j << " ("
+          << ref_dst_[i * stride + j] << " : " << test_dst_[i * stride + j]
+          << ") " << std::endl
+          << "width: " << width << std::endl
+          << "height: " << height << std::endl
+          << std::endl;
+    }
+  }
+
+ protected:
+  libaom_test::ACMRandom rnd_;
+  uint16_t *src_;
+  uint16_t *ref_dst_;
+  uint16_t *test_dst_;
+  CDEFCopyRect16To16 test_func_;
+  CDEFCopyRect16To16 ref_func_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFCopyRect16to16Test);
+
+TEST_P(CDEFBlockTest, TestSIMDNoMismatch) {
+  test_cdef(bsize, 1, cdef, ref_cdef, boundary, depth);
+}
+
+TEST_P(CDEFBlockHighbdTest, TestSIMDHighbdNoMismatch) {
+  test_cdef(bsize, 1, cdef, ref_cdef, boundary, depth);
+}
+
+TEST_P(CDEFSpeedTest, DISABLED_TestSpeed) {
+  test_cdef_speed(bsize, 4, cdef, ref_cdef, boundary, depth);
+}
+
+TEST_P(CDEFSpeedHighbdTest, DISABLED_TestSpeed) {
+  test_cdef_speed(bsize, 4, cdef, ref_cdef, boundary, depth);
+}
+
+TEST_P(CDEFFindDirTest, TestSIMDNoMismatch) {
+  test_finddir(finddir, ref_finddir);
+}
+
+TEST_P(CDEFFindDirSpeedTest, DISABLED_TestSpeed) {
+  test_finddir_speed(finddir, ref_finddir);
+}
+
+TEST_P(CDEFFindDirDualTest, TestSIMDNoMismatch) {
+  test_finddir_dual(finddir, ref_finddir);
+}
+
+TEST_P(CDEFFindDirDualSpeedTest, DISABLED_TestSpeed) {
+  test_finddir_dual_speed(finddir, ref_finddir);
+}
+
+TEST_P(CDEFCopyRect8to16Test, TestSIMDNoMismatch) {
+  test_copy_rect_8_to_16(test_func_, ref_func_);
+}
+
+TEST_P(CDEFCopyRect16to16Test, TestSIMDNoMismatch) {
+  test_copy_rect_16_to_16(test_func_, ref_func_);
+}
+
+using std::make_tuple;
+
+#if (HAVE_SSE2 || HAVE_SSSE3 || HAVE_SSE4_1 || HAVE_AVX2 || HAVE_NEON)
+static const CdefFilterBlockFunctions kCdefFilterFuncC[] = {
+  { &cdef_filter_8_0_c, &cdef_filter_8_1_c, &cdef_filter_8_2_c,
+    &cdef_filter_8_3_c }
+};
+
+static const CdefFilterBlockFunctions kCdefFilterHighbdFuncC[] = {
+  { &cdef_filter_16_0_c, &cdef_filter_16_0_c, &cdef_filter_16_0_c,
+    &cdef_filter_16_0_c }
+};
+#endif
+
+#if HAVE_SSE2
+static const CdefFilterBlockFunctions kCdefFilterFuncSse2[] = {
+  { &cdef_filter_8_0_sse2, &cdef_filter_8_1_sse2, &cdef_filter_8_2_sse2,
+    &cdef_filter_8_3_sse2 }
+};
+
+static const CdefFilterBlockFunctions kCdefFilterHighbdFuncSse2[] = {
+  { &cdef_filter_16_0_sse2, &cdef_filter_16_1_sse2, &cdef_filter_16_2_sse2,
+    &cdef_filter_16_3_sse2 }
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, CDEFBlockTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse2),
+                       ::testing::ValuesIn(kCdefFilterFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(8)));
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, CDEFBlockHighbdTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse2),
+                       ::testing::ValuesIn(kCdefFilterHighbdFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
+INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_sse2,
+                                                      &cdef_find_dir_c)));
+INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirDualTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_dual_sse2,
+                                                      &cdef_find_dir_dual_c)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, CDEFCopyRect8to16Test,
+    ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c,
+                                 &cdef_copy_rect8_8bit_to_16bit_sse2)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, CDEFCopyRect16to16Test,
+    ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c,
+                                 &cdef_copy_rect8_16bit_to_16bit_sse2)));
+#endif
+
+#if HAVE_SSSE3
+static const CdefFilterBlockFunctions kCdefFilterFuncSsse3[] = {
+  { &cdef_filter_8_0_ssse3, &cdef_filter_8_1_ssse3, &cdef_filter_8_2_ssse3,
+    &cdef_filter_8_3_ssse3 }
+};
+
+static const CdefFilterBlockFunctions kCdefFilterHighbdFuncSsse3[] = {
+  { &cdef_filter_16_0_ssse3, &cdef_filter_16_1_ssse3, &cdef_filter_16_2_ssse3,
+    &cdef_filter_16_3_ssse3 }
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    SSSE3, CDEFBlockTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSsse3),
+                       ::testing::ValuesIn(kCdefFilterFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(8)));
+INSTANTIATE_TEST_SUITE_P(
+    SSSE3, CDEFBlockHighbdTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSsse3),
+                       ::testing::ValuesIn(kCdefFilterHighbdFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
+INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
+                                                      &cdef_find_dir_c)));
+INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirDualTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_dual_ssse3,
+                                                      &cdef_find_dir_dual_c)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SSSE3, CDEFCopyRect8to16Test,
+    ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c,
+                                 &cdef_copy_rect8_8bit_to_16bit_ssse3)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SSSE3, CDEFCopyRect16to16Test,
+    ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c,
+                                 &cdef_copy_rect8_16bit_to_16bit_ssse3)));
+#endif
+
+#if HAVE_SSE4_1
+static const CdefFilterBlockFunctions kCdefFilterFuncSse4_1[] = {
+  { &cdef_filter_8_0_sse4_1, &cdef_filter_8_1_sse4_1, &cdef_filter_8_2_sse4_1,
+    &cdef_filter_8_3_sse4_1 }
+};
+
+static const CdefFilterBlockFunctions kCdefFilterHighbdFuncSse4_1[] = {
+  { &cdef_filter_16_0_sse4_1, &cdef_filter_16_1_sse4_1,
+    &cdef_filter_16_2_sse4_1, &cdef_filter_16_3_sse4_1 }
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, CDEFBlockTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse4_1),
+                       ::testing::ValuesIn(kCdefFilterFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(8)));
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, CDEFBlockHighbdTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse4_1),
+                       ::testing::ValuesIn(kCdefFilterHighbdFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
+INSTANTIATE_TEST_SUITE_P(SSE4_1, CDEFFindDirTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
+                                                      &cdef_find_dir_c)));
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, CDEFFindDirDualTest,
+    ::testing::Values(make_tuple(&cdef_find_dir_dual_sse4_1,
+                                 &cdef_find_dir_dual_c)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, CDEFCopyRect8to16Test,
+    ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c,
+                                 &cdef_copy_rect8_8bit_to_16bit_sse4_1)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, CDEFCopyRect16to16Test,
+    ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c,
+                                 &cdef_copy_rect8_16bit_to_16bit_sse4_1)));
+#endif
+
+#if HAVE_AVX2
+static const CdefFilterBlockFunctions kCdefFilterFuncAvx2[] = {
+  { &cdef_filter_8_0_avx2, &cdef_filter_8_1_avx2, &cdef_filter_8_2_avx2,
+    &cdef_filter_8_3_avx2 }
+};
+
+static const CdefFilterBlockFunctions kCdefFilterHighbdFuncAvx2[] = {
+  { &cdef_filter_16_0_avx2, &cdef_filter_16_1_avx2, &cdef_filter_16_2_avx2,
+    &cdef_filter_16_3_avx2 }
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, CDEFBlockTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncAvx2),
+                       ::testing::ValuesIn(kCdefFilterFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(8)));
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, CDEFBlockHighbdTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncAvx2),
+                       ::testing::ValuesIn(kCdefFilterHighbdFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
+INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_avx2,
+                                                      &cdef_find_dir_c)));
+INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirDualTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_dual_avx2,
+                                                      &cdef_find_dir_dual_c)));
+
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, CDEFCopyRect8to16Test,
+    ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c,
+                                 &cdef_copy_rect8_8bit_to_16bit_avx2)));
+
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, CDEFCopyRect16to16Test,
+    ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c,
+                                 &cdef_copy_rect8_16bit_to_16bit_avx2)));
+#endif
+
+#if HAVE_NEON
+static const CdefFilterBlockFunctions kCdefFilterFuncNeon[] = {
+  { &cdef_filter_8_0_neon, &cdef_filter_8_1_neon, &cdef_filter_8_2_neon,
+    &cdef_filter_8_3_neon }
+};
+
+static const CdefFilterBlockFunctions kCdefFilterHighbdFuncNeon[] = {
+  { &cdef_filter_16_0_neon, &cdef_filter_16_1_neon, &cdef_filter_16_2_neon,
+    &cdef_filter_16_3_neon }
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON, CDEFBlockTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncNeon),
+                       ::testing::ValuesIn(kCdefFilterFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(8)));
+INSTANTIATE_TEST_SUITE_P(
+    NEON, CDEFBlockHighbdTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncNeon),
+                       ::testing::ValuesIn(kCdefFilterHighbdFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
+INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_neon,
+                                                      &cdef_find_dir_c)));
+INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirDualTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_dual_neon,
+                                                      &cdef_find_dir_dual_c)));
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON, CDEFCopyRect8to16Test,
+    ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c,
+                                 &cdef_copy_rect8_8bit_to_16bit_neon)));
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON, CDEFCopyRect16to16Test,
+    ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c,
+                                 &cdef_copy_rect8_16bit_to_16bit_neon)));
+#endif
+
+// Test speed for all supported architectures
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, CDEFSpeedTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse2),
+                       ::testing::ValuesIn(kCdefFilterFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(8)));
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, CDEFSpeedHighbdTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse2),
+                       ::testing::ValuesIn(kCdefFilterHighbdFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(10)));
+INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirSpeedTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_sse2,
+                                                      &cdef_find_dir_c)));
+INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirDualSpeedTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_dual_sse2,
+                                                      &cdef_find_dir_dual_c)));
+#endif
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_SUITE_P(
+    SSSE3, CDEFSpeedTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSsse3),
+                       ::testing::ValuesIn(kCdefFilterFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(8)));
+INSTANTIATE_TEST_SUITE_P(
+    SSSE3, CDEFSpeedHighbdTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSsse3),
+                       ::testing::ValuesIn(kCdefFilterHighbdFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(10)));
+INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirSpeedTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
+                                                      &cdef_find_dir_c)));
+INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirDualSpeedTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_dual_ssse3,
+                                                      &cdef_find_dir_dual_c)));
+#endif
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, CDEFSpeedTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse4_1),
+                       ::testing::ValuesIn(kCdefFilterFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(8)));
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, CDEFSpeedHighbdTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse4_1),
+                       ::testing::ValuesIn(kCdefFilterHighbdFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(10)));
+INSTANTIATE_TEST_SUITE_P(SSE4_1, CDEFFindDirSpeedTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
+                                                      &cdef_find_dir_c)));
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, CDEFFindDirDualSpeedTest,
+    ::testing::Values(make_tuple(&cdef_find_dir_dual_sse4_1,
+                                 &cdef_find_dir_dual_c)));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, CDEFSpeedTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncAvx2),
+                       ::testing::ValuesIn(kCdefFilterFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(8)));
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, CDEFSpeedHighbdTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncAvx2),
+                       ::testing::ValuesIn(kCdefFilterHighbdFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(10)));
+INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirSpeedTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_avx2,
+                                                      &cdef_find_dir_c)));
+INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirDualSpeedTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_dual_avx2,
+                                                      &cdef_find_dir_dual_c)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, CDEFSpeedTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncNeon),
+                       ::testing::ValuesIn(kCdefFilterFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(8)));
+INSTANTIATE_TEST_SUITE_P(
+    NEON, CDEFSpeedHighbdTest,
+    ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncNeon),
+                       ::testing::ValuesIn(kCdefFilterHighbdFuncC),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Values(10)));
+INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirSpeedTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_neon,
+                                                      &cdef_find_dir_c)));
+INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirDualSpeedTest,
+                         ::testing::Values(make_tuple(&cdef_find_dir_dual_neon,
+                                                      &cdef_find_dir_dual_c)));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/cfl_test.cc b/third_party/aom/test/cfl_test.cc
new file mode 100644
index 0000000000..7fdea04c36
--- /dev/null
+++ b/third_party/aom/test/cfl_test.cc
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "test/util.h"
+#include "test/acm_random.h"
+
+using std::make_tuple;
+
+using libaom_test::ACMRandom;
+
+#define NUM_ITERATIONS (100)
+#define NUM_ITERATIONS_SPEED (INT16_MAX)
+
+#define ALL_CFL_TX_SIZES(function)                           \
+  make_tuple(static_cast<TX_SIZE>(TX_4X4), &function),       \
+      make_tuple(static_cast<TX_SIZE>(TX_4X8), &function),   \
+      make_tuple(static_cast<TX_SIZE>(TX_4X16), &function),  \
+      make_tuple(static_cast<TX_SIZE>(TX_8X4), &function),   \
+      make_tuple(static_cast<TX_SIZE>(TX_8X8), &function),   \
+      make_tuple(static_cast<TX_SIZE>(TX_8X16), &function),  \
+      make_tuple(static_cast<TX_SIZE>(TX_8X32), &function),  \
+      make_tuple(static_cast<TX_SIZE>(TX_16X4), &function),  \
+      make_tuple(static_cast<TX_SIZE>(TX_16X8), &function),  \
+      make_tuple(static_cast<TX_SIZE>(TX_16X16), &function), \
+      make_tuple(static_cast<TX_SIZE>(TX_16X32), &function), \
+      make_tuple(static_cast<TX_SIZE>(TX_32X8), &function),  \
+      make_tuple(static_cast<TX_SIZE>(TX_32X16), &function), \
+      make_tuple(static_cast<TX_SIZE>(TX_32X32), &function)
+
+#define ALL_CFL_TX_SIZES_SUBSAMPLE(fun420, fun422, fun444)                   \
+  make_tuple(static_cast<TX_SIZE>(TX_4X4), &fun420, &fun422, &fun444),       \
+      make_tuple(static_cast<TX_SIZE>(TX_4X8), &fun420, &fun422, &fun444),   \
+      make_tuple(static_cast<TX_SIZE>(TX_4X16), &fun420, &fun422, &fun444),  \
+      make_tuple(static_cast<TX_SIZE>(TX_8X4), &fun420, &fun422, &fun444),   \
+      make_tuple(static_cast<TX_SIZE>(TX_8X8), &fun420, &fun422, &fun444),   \
+      make_tuple(static_cast<TX_SIZE>(TX_8X16), &fun420, &fun422, &fun444),  \
+      make_tuple(static_cast<TX_SIZE>(TX_8X32), &fun420, &fun422, &fun444),  \
+      make_tuple(static_cast<TX_SIZE>(TX_16X4), &fun420, &fun422, &fun444),  \
+      make_tuple(static_cast<TX_SIZE>(TX_16X8), &fun420, &fun422, &fun444),  \
+      make_tuple(static_cast<TX_SIZE>(TX_16X16), &fun420, &fun422, &fun444), \
+      make_tuple(static_cast<TX_SIZE>(TX_16X32), &fun420, &fun422, &fun444), \
+      make_tuple(static_cast<TX_SIZE>(TX_32X8), &fun420, &fun422, &fun444),  \
+      make_tuple(static_cast<TX_SIZE>(TX_32X16), &fun420, &fun422, &fun444), \
+      make_tuple(static_cast<TX_SIZE>(TX_32X32), &fun420, &fun422, &fun444)
+
+namespace {
+
+template <typename A>
+static void assert_eq(const A *a, const A *b, int width, int height) {
+  for (int j = 0; j < height; j++) {
+    for (int i = 0; i < width; i++) {
+      ASSERT_EQ(a[j * CFL_BUF_LINE + i], b[j * CFL_BUF_LINE + i]);
+    }
+  }
+}
+
+static void assertFaster(int ref_elapsed_time, int elapsed_time) {
+  EXPECT_GT(ref_elapsed_time, elapsed_time)
+      << "Error: CFLSubtractSpeedTest, SIMD slower than C." << std::endl
+      << "C time: " << ref_elapsed_time << " us" << std::endl
+      << "SIMD time: " << elapsed_time << " us" << std::endl;
+}
+
+static void printSpeed(int ref_elapsed_time, int elapsed_time, int width,
+                       int height) {
+  std::cout.precision(2);
+  std::cout << "[          ] " << width << "x" << height
+            << ": C time = " << ref_elapsed_time
+            << " us, SIMD time = " << elapsed_time << " us"
+            << " (~" << ref_elapsed_time / (double)elapsed_time << "x) "
+            << std::endl;
+}
+
+class CFLTest {
+ public:
+  virtual ~CFLTest() = default;
+  void init(TX_SIZE tx) {
+    tx_size = tx;
+    width = tx_size_wide[tx_size];
+    height = tx_size_high[tx_size];
+    rnd.Reset(ACMRandom::DeterministicSeed());
+  }
+
+ protected:
+  TX_SIZE tx_size;
+  int width;
+  int height;
+  ACMRandom rnd;
+};
+
+template <typename I>
+class CFLTestWithData : public CFLTest {
+ public:
+  ~CFLTestWithData() override = default;
+
+ protected:
+  I data[CFL_BUF_SQUARE];
+  I data_ref[CFL_BUF_SQUARE];
+  void randData(I (ACMRandom::*random)()) {
+    for (int j = 0; j < this->height; j++) {
+      for (int i = 0; i < this->width; i++) {
+        const I d = (this->rnd.*random)();
+        data[j * CFL_BUF_LINE + i] = d;
+        data_ref[j * CFL_BUF_LINE + i] = d;
+      }
+    }
+  }
+};
+
+template <typename I>
+class CFLTestWithAlignedData : public CFLTest {
+ public:
+  ~CFLTestWithAlignedData() override {
+    aom_free(chroma_pels_ref);
+    aom_free(sub_luma_pels_ref);
+    aom_free(chroma_pels);
+    aom_free(sub_luma_pels);
+  }
+
+ protected:
+  void init() {
+    chroma_pels_ref =
+        reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
+    ASSERT_NE(chroma_pels_ref, nullptr);
+    chroma_pels =
+        reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
+    ASSERT_NE(chroma_pels, nullptr);
+    sub_luma_pels_ref = reinterpret_cast<int16_t *>(
+        aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
+    ASSERT_NE(sub_luma_pels_ref, nullptr);
+    sub_luma_pels = reinterpret_cast<int16_t *>(
+        aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
+    ASSERT_NE(sub_luma_pels, nullptr);
+    memset(chroma_pels_ref, 0, sizeof(I) * CFL_BUF_SQUARE);
+    memset(chroma_pels, 0, sizeof(I) * CFL_BUF_SQUARE);
+    memset(sub_luma_pels_ref, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
+    memset(sub_luma_pels, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
+  }
+
+  I *chroma_pels_ref;
+  I *chroma_pels;
+  int16_t *sub_luma_pels_ref;
+  int16_t *sub_luma_pels;
+  int alpha_q3;
+  I dc;
+  void randData(int bd) {
+    alpha_q3 = this->rnd(33) - 16;
+    dc = this->rnd(1 << bd);
+    for (int j = 0; j < this->height; j++) {
+      for (int i = 0; i < this->width; i++) {
+        chroma_pels[j * CFL_BUF_LINE + i] = dc;
+        chroma_pels_ref[j * CFL_BUF_LINE + i] = dc;
+        sub_luma_pels_ref[j * CFL_BUF_LINE + i] =
+            sub_luma_pels[j * CFL_BUF_LINE + i] = this->rnd(1 << (bd + 3));
+      }
+    }
+  }
+};
+
+typedef cfl_subtract_average_fn (*sub_avg_fn)(TX_SIZE tx_size);
+typedef std::tuple<TX_SIZE, sub_avg_fn> sub_avg_param;
+class CFLSubAvgTest : public ::testing::TestWithParam<sub_avg_param>,
+                      public CFLTestWithData<int16_t> {
+ public:
+  void SetUp() override {
+    CFLTest::init(std::get<0>(this->GetParam()));
+    sub_avg = std::get<1>(this->GetParam())(tx_size);
+    sub_avg_ref = cfl_get_subtract_average_fn_c(tx_size);
+  }
+  ~CFLSubAvgTest() override = default;
+
+ protected:
+  cfl_subtract_average_fn sub_avg;
+  cfl_subtract_average_fn sub_avg_ref;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubAvgTest);
+
+TEST_P(CFLSubAvgTest, SubAvgTest) {
+  for (int it = 0; it < NUM_ITERATIONS; it++) {
+    randData(&ACMRandom::Rand15);
+    sub_avg((uint16_t *)data, data);
+    sub_avg_ref((uint16_t *)data_ref, data_ref);
+    assert_eq<int16_t>(data, data_ref, width, height);
+  }
+}
+
+TEST_P(CFLSubAvgTest, DISABLED_SubAvgSpeedTest) {
+  aom_usec_timer ref_timer;
+  aom_usec_timer timer;
+  randData(&ACMRandom::Rand15);
+  aom_usec_timer_start(&ref_timer);
+  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+    sub_avg_ref((uint16_t *)data_ref, data_ref);
+  }
+  aom_usec_timer_mark(&ref_timer);
+  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
+  aom_usec_timer_start(&timer);
+  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+    sub_avg((uint16_t *)data, data);
+  }
+  aom_usec_timer_mark(&timer);
+  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
+  printSpeed(ref_elapsed_time, elapsed_time, width, height);
+  assertFaster(ref_elapsed_time, elapsed_time);
+}
+
+template <typename S, typename T, typename I>
+class CFLSubsampleTest : public ::testing::TestWithParam<S>,
+                         public CFLTestWithData<I> {
+ public:
+  void SetUp() override {
+    CFLTest::init(std::get<0>(this->GetParam()));
+    fun_420 = std::get<1>(this->GetParam())(this->tx_size);
+    fun_422 = std::get<2>(this->GetParam())(this->tx_size);
+    fun_444 = std::get<3>(this->GetParam())(this->tx_size);
+  }
+
+ protected:
+  T fun_420;
+  T fun_422;
+  T fun_444;
+  T fun_420_ref;
+  T fun_422_ref;
+  T fun_444_ref;
+
+  void subsampleTest(T fun, T fun_ref, int sub_width, int sub_height,
+                     I (ACMRandom::*random)()) {
+    uint16_t sub_luma_pels[CFL_BUF_SQUARE];
+    uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
+
+    for (int it = 0; it < NUM_ITERATIONS; it++) {
+      CFLTestWithData<I>::randData(random);
+      fun(this->data, CFL_BUF_LINE, sub_luma_pels);
+      fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels_ref);
+      assert_eq<uint16_t>(sub_luma_pels, sub_luma_pels_ref, sub_width,
+                          sub_height);
+    }
+  }
+
+  void subsampleSpeedTest(T fun, T fun_ref, I (ACMRandom::*random)()) {
+    uint16_t sub_luma_pels[CFL_BUF_SQUARE];
+    uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
+    aom_usec_timer ref_timer;
+    aom_usec_timer timer;
+
+    CFLTestWithData<I>::randData(random);
+    aom_usec_timer_start(&ref_timer);
+    for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+      fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels);
+    }
+    aom_usec_timer_mark(&ref_timer);
+    int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
+    aom_usec_timer_start(&timer);
+    for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+      fun(this->data, CFL_BUF_LINE, sub_luma_pels_ref);
+    }
+    aom_usec_timer_mark(&timer);
+    int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
+    printSpeed(ref_elapsed_time, elapsed_time, this->width, this->height);
+    assertFaster(ref_elapsed_time, elapsed_time);
+  }
+};
+
+typedef cfl_subsample_lbd_fn (*get_subsample_lbd_fn)(TX_SIZE tx_size);
+typedef std::tuple<TX_SIZE, get_subsample_lbd_fn, get_subsample_lbd_fn,
+                   get_subsample_lbd_fn>
+    subsample_lbd_param;
+class CFLSubsampleLBDTest
+    : public CFLSubsampleTest<subsample_lbd_param, cfl_subsample_lbd_fn,
+                              uint8_t> {
+ public:
+  ~CFLSubsampleLBDTest() override = default;
+  void SetUp() override {
+    CFLSubsampleTest::SetUp();
+    fun_420_ref = cfl_get_luma_subsampling_420_lbd_c(tx_size);
+    fun_422_ref = cfl_get_luma_subsampling_422_lbd_c(tx_size);
+    fun_444_ref = cfl_get_luma_subsampling_444_lbd_c(tx_size);
+  }
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubsampleLBDTest);
+
+TEST_P(CFLSubsampleLBDTest, SubsampleLBD420Test) {
+  subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1,
+                &ACMRandom::Rand8);
+}
+
+TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD420SpeedTest) {
+  subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand8);
+}
+
+TEST_P(CFLSubsampleLBDTest, SubsampleLBD422Test) {
+  subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand8);
+}
+
+TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD422SpeedTest) {
+  subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand8);
+}
+
+TEST_P(CFLSubsampleLBDTest, SubsampleLBD444Test) {
+  subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand8);
+}
+
+TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD444SpeedTest) {
+  subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand8);
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+typedef cfl_subsample_hbd_fn (*get_subsample_hbd_fn)(TX_SIZE tx_size);
+typedef std::tuple<TX_SIZE, get_subsample_hbd_fn, get_subsample_hbd_fn,
+                   get_subsample_hbd_fn>
+    subsample_hbd_param;
+class CFLSubsampleHBDTest
+    : public CFLSubsampleTest<subsample_hbd_param, cfl_subsample_hbd_fn,
+                              uint16_t> {
+ public:
+  ~CFLSubsampleHBDTest() override = default;
+  void SetUp() override {
+    CFLSubsampleTest::SetUp();
+    fun_420_ref = cfl_get_luma_subsampling_420_hbd_c(tx_size);
+    fun_422_ref = cfl_get_luma_subsampling_422_hbd_c(tx_size);
+    fun_444_ref = cfl_get_luma_subsampling_444_hbd_c(tx_size);
+  }
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubsampleHBDTest);
+
+TEST_P(CFLSubsampleHBDTest, SubsampleHBD420Test) {
+  subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1,
+                &ACMRandom::Rand12);
+}
+
+TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD420SpeedTest) {
+  subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand12);
+}
+
+TEST_P(CFLSubsampleHBDTest, SubsampleHBD422Test) {
+  subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand12);
+}
+
+TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD422SpeedTest) {
+  subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand12);
+}
+
+TEST_P(CFLSubsampleHBDTest, SubsampleHBD444Test) {
+  subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand12);
+}
+
+TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD444SpeedTest) {
+  subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand12);
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+typedef cfl_predict_lbd_fn (*get_predict_fn)(TX_SIZE tx_size);
+typedef std::tuple<TX_SIZE, get_predict_fn> predict_param;
+class CFLPredictTest : public ::testing::TestWithParam<predict_param>,
+                       public CFLTestWithAlignedData<uint8_t> {
+ public:
+  void SetUp() override {
+    CFLTest::init(std::get<0>(this->GetParam()));
+    CFLTestWithAlignedData::init();
+    predict = std::get<1>(this->GetParam())(tx_size);
+    predict_ref = cfl_get_predict_lbd_fn_c(tx_size);
+  }
+  ~CFLPredictTest() override = default;
+
+ protected:
+  cfl_predict_lbd_fn predict;
+  cfl_predict_lbd_fn predict_ref;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLPredictTest);
+
+TEST_P(CFLPredictTest, PredictTest) {
+  for (int it = 0; it < NUM_ITERATIONS; it++) {
+    randData(8);
+    predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
+    predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
+    assert_eq<uint8_t>(chroma_pels, chroma_pels_ref, width, height);
+  }
+}
+TEST_P(CFLPredictTest, DISABLED_PredictSpeedTest) {
+  aom_usec_timer ref_timer;
+  aom_usec_timer timer;
+  randData(8);
+  aom_usec_timer_start(&ref_timer);
+  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+    predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
+  }
+  aom_usec_timer_mark(&ref_timer);
+  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer_start(&timer);
+  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+    predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
+  }
+  aom_usec_timer_mark(&timer);
+  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
+  printSpeed(ref_elapsed_time, elapsed_time, width, height);
+  assertFaster(ref_elapsed_time, elapsed_time);
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+typedef cfl_predict_hbd_fn (*get_predict_fn_hbd)(TX_SIZE tx_size);
+typedef std::tuple<TX_SIZE, get_predict_fn_hbd> predict_param_hbd;
+class CFLPredictHBDTest : public ::testing::TestWithParam<predict_param_hbd>,
+                          public CFLTestWithAlignedData<uint16_t> {
+ public:
+  void SetUp() override {
+    CFLTest::init(std::get<0>(this->GetParam()));
+    CFLTestWithAlignedData::init();
+    predict = std::get<1>(this->GetParam())(tx_size);
+    predict_ref = cfl_get_predict_hbd_fn_c(tx_size);
+  }
+  ~CFLPredictHBDTest() override = default;
+
+ protected:
+  cfl_predict_hbd_fn predict;
+  cfl_predict_hbd_fn predict_ref;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLPredictHBDTest);
+
+TEST_P(CFLPredictHBDTest, PredictHBDTest) {
+  int bd = 12;
+  for (int it = 0; it < NUM_ITERATIONS; it++) {
+    randData(bd);
+    predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
+    predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
+    assert_eq<uint16_t>(chroma_pels, chroma_pels_ref, width, height);
+  }
+}
+TEST_P(CFLPredictHBDTest, DISABLED_PredictHBDSpeedTest) {
+  aom_usec_timer ref_timer;
+  aom_usec_timer timer;
+  const int bd = 12;
+  randData(bd);
+  aom_usec_timer_start(&ref_timer);
+  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+    predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
+  }
+  aom_usec_timer_mark(&ref_timer);
+  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer_start(&timer);
+  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+    predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
+  }
+  aom_usec_timer_mark(&timer);
+  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
+  printSpeed(ref_elapsed_time, elapsed_time, width, height);
+  assertFaster(ref_elapsed_time, elapsed_time);
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+#if HAVE_SSE2
+const sub_avg_param sub_avg_sizes_sse2[] = { ALL_CFL_TX_SIZES(
+    cfl_get_subtract_average_fn_sse2) };
+
+INSTANTIATE_TEST_SUITE_P(SSE2, CFLSubAvgTest,
+                         ::testing::ValuesIn(sub_avg_sizes_sse2));
+
+#endif
+
+#if HAVE_SSSE3
+const subsample_lbd_param subsample_lbd_sizes_ssse3[] = {
+  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_ssse3,
+                             cfl_get_luma_subsampling_422_lbd_ssse3,
+                             cfl_get_luma_subsampling_444_lbd_ssse3)
+};
+
+const predict_param predict_sizes_ssse3[] = { ALL_CFL_TX_SIZES(
+    cfl_get_predict_lbd_fn_ssse3) };
+
+INSTANTIATE_TEST_SUITE_P(SSSE3, CFLSubsampleLBDTest,
+                         ::testing::ValuesIn(subsample_lbd_sizes_ssse3));
+
+INSTANTIATE_TEST_SUITE_P(SSSE3, CFLPredictTest,
+                         ::testing::ValuesIn(predict_sizes_ssse3));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+const subsample_hbd_param subsample_hbd_sizes_ssse3[] = {
+  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_ssse3,
+                             cfl_get_luma_subsampling_422_hbd_ssse3,
+                             cfl_get_luma_subsampling_444_hbd_ssse3)
+};
+
+const predict_param_hbd predict_sizes_hbd_ssse3[] = { ALL_CFL_TX_SIZES(
+    cfl_get_predict_hbd_fn_ssse3) };
+
+INSTANTIATE_TEST_SUITE_P(SSSE3, CFLSubsampleHBDTest,
+                         ::testing::ValuesIn(subsample_hbd_sizes_ssse3));
+
+INSTANTIATE_TEST_SUITE_P(SSSE3, CFLPredictHBDTest,
+                         ::testing::ValuesIn(predict_sizes_hbd_ssse3));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // HAVE_SSSE3
+
+#if HAVE_AVX2
+const sub_avg_param sub_avg_sizes_avx2[] = { ALL_CFL_TX_SIZES(
+    cfl_get_subtract_average_fn_avx2) };
+
+const subsample_lbd_param subsample_lbd_sizes_avx2[] = {
+  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_avx2,
+                             cfl_get_luma_subsampling_422_lbd_avx2,
+                             cfl_get_luma_subsampling_444_lbd_avx2)
+};
+
+const predict_param predict_sizes_avx2[] = { ALL_CFL_TX_SIZES(
+    cfl_get_predict_lbd_fn_avx2) };
+
+INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubAvgTest,
+                         ::testing::ValuesIn(sub_avg_sizes_avx2));
+
+INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubsampleLBDTest,
+                         ::testing::ValuesIn(subsample_lbd_sizes_avx2));
+
+INSTANTIATE_TEST_SUITE_P(AVX2, CFLPredictTest,
+                         ::testing::ValuesIn(predict_sizes_avx2));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+const subsample_hbd_param subsample_hbd_sizes_avx2[] = {
+  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_avx2,
+                             cfl_get_luma_subsampling_422_hbd_avx2,
+                             cfl_get_luma_subsampling_444_hbd_avx2)
+};
+
+const predict_param_hbd predict_sizes_hbd_avx2[] = { ALL_CFL_TX_SIZES(
+    cfl_get_predict_hbd_fn_avx2) };
+
+INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubsampleHBDTest,
+                         ::testing::ValuesIn(subsample_hbd_sizes_avx2));
+
+INSTANTIATE_TEST_SUITE_P(AVX2, CFLPredictHBDTest,
+                         ::testing::ValuesIn(predict_sizes_hbd_avx2));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+const sub_avg_param sub_avg_sizes_neon[] = { ALL_CFL_TX_SIZES(
+    cfl_get_subtract_average_fn_neon) };
+
+const predict_param predict_sizes_neon[] = { ALL_CFL_TX_SIZES(
+    cfl_get_predict_lbd_fn_neon) };
+
+const subsample_lbd_param subsample_lbd_sizes_neon[] = {
+  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_neon,
+                             cfl_get_luma_subsampling_422_lbd_neon,
+                             cfl_get_luma_subsampling_444_lbd_neon)
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, CFLSubAvgTest,
+                         ::testing::ValuesIn(sub_avg_sizes_neon));
+
+INSTANTIATE_TEST_SUITE_P(NEON, CFLSubsampleLBDTest,
+                         ::testing::ValuesIn(subsample_lbd_sizes_neon));
+
+INSTANTIATE_TEST_SUITE_P(NEON, CFLPredictTest,
+                         ::testing::ValuesIn(predict_sizes_neon));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+const subsample_hbd_param subsample_hbd_sizes_neon[] = {
+  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_neon,
+                             cfl_get_luma_subsampling_422_hbd_neon,
+                             cfl_get_luma_subsampling_444_hbd_neon)
+};
+
+const predict_param_hbd predict_sizes_hbd_neon[] = { ALL_CFL_TX_SIZES(
+    cfl_get_predict_hbd_fn_neon) };
+
+INSTANTIATE_TEST_SUITE_P(NEON, CFLSubsampleHBDTest,
+                         ::testing::ValuesIn(subsample_hbd_sizes_neon));
+
+INSTANTIATE_TEST_SUITE_P(NEON, CFLPredictHBDTest,
+                         ::testing::ValuesIn(predict_sizes_hbd_neon));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // HAVE_NEON
+
+#if HAVE_VSX
+const sub_avg_param sub_avg_sizes_vsx[] = { ALL_CFL_TX_SIZES(
+    cfl_get_subtract_average_fn_vsx) };
+
+INSTANTIATE_TEST_SUITE_P(VSX, CFLSubAvgTest,
+                         ::testing::ValuesIn(sub_avg_sizes_vsx));
+#endif
+}  // namespace
diff --git a/third_party/aom/test/cnn_test.cc b/third_party/aom/test/cnn_test.cc
new file mode 100644
index 0000000000..e5114b56ce
--- /dev/null
+++ b/third_party/aom/test/cnn_test.cc
@@ -0,0 +1,2661 @@
+/*
+ * Copyright (c) 2019, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "av1/encoder/cnn.h"
+#include "av1/encoder/partition_cnn_weights.h"
+#include "test/acm_random.h"
+#include "test/function_equivalence_test.h"
+#include "test/util.h"
+
+#define SQR(x) ((x) * (x))
+
+// Best possible pixelwise guaranteed precision given each float has at most
+// 3 specified decimals.
+#define PIXELWISE_FLOAT_TOL 1E-2
+
+#define MSE_FLOAT_TOL 1E-6
+#define MSE_INT_TOL 0
+
+// CNN convolve pixelwise error threshold for functional equivalence.
+#define CNN_CONVOLVE_PIXELWISE_FLOAT_TOL 1E-3f
+
+namespace {
+
+class CNNTest : public ::testing::Test {
+ protected:
+  static void RunCNNTest(int image_width, int image_height, const float *input,
+                         const float *expected, const CNN_CONFIG *cnn_config,
+                         int in_stride, CNN_THREAD_DATA *thread_data,
+                         double tolerance) {
+    int out_width, out_height, out_channels;
+    av1_find_cnn_output_size(image_width, image_height, cnn_config, &out_width,
+                             &out_height, &out_channels);
+
+    const int out_size = out_width * out_height;
+    const int out_stride = out_width;
+
+    float *output_ =
+        (float *)aom_malloc(sizeof(*output_) * out_size * out_channels);
+    ASSERT_NE(output_, nullptr);
+    float *output[CNN_MAX_CHANNELS] = { nullptr };
+    for (int channel = 0; channel < out_channels; ++channel) {
+      output[channel] = output_ + (channel * out_size);
+    }
+    const int num_outputs = 1;
+    const int output_chs[1] = { out_channels };
+    const int output_strides[1] = { out_stride };
+    CNN_MULTI_OUT output_struct = { num_outputs, output_chs, output_strides,
+                                    output };
+
+    RunMultiOutCNNTest(&input, image_width, image_height, in_stride, cnn_config,
+                       thread_data, &output_struct, &expected, tolerance);
+
+    aom_free(output_);
+  }
+
+  static void RunMultiOutCNNTest(const float **input, int image_width,
+                                 int image_height, int in_stride,
+                                 const CNN_CONFIG *cnn_config,
+                                 CNN_THREAD_DATA *thread_data,
+                                 CNN_MULTI_OUT *output, const float **expected,
+                                 double tolerance) {
+    const int num_outputs = output->num_outputs;
+    const int *output_chs = output->output_channels;
+
+    int *out_widths = (int *)aom_calloc(num_outputs, sizeof(*out_widths));
+    int *out_heights = (int *)aom_calloc(num_outputs, sizeof(*out_heights));
+    int *not_used = (int *)aom_calloc(num_outputs, sizeof(*not_used));
+    ASSERT_NE(out_widths, nullptr);
+    ASSERT_NE(out_heights, nullptr);
+    ASSERT_NE(not_used, nullptr);
+
+    av1_find_cnn_output_size(image_width, image_height, cnn_config, out_widths,
+                             out_heights, not_used);
+    ASSERT_TRUE(av1_cnn_predict(input, image_width, image_height, in_stride,
+                                cnn_config, thread_data, output));
+
+    int channel_offset = 0;
+    for (int output_idx = 0; output_idx < num_outputs; output_idx++) {
+      const float *expected_out = expected[output_idx];
+      const int curr_output_chs = output_chs[output_idx];
+      const int out_size = out_widths[output_idx] * out_heights[output_idx];
+
+      double mse = 0;
+      int expected_ite = 0;
+      for (int channel = 0; channel < curr_output_chs; ++channel) {
+        const float *buf_out = output->output_buffer[channel_offset];
+
+        for (int i = 0; i < out_size; ++i) {
+          EXPECT_NEAR(expected_out[expected_ite], buf_out[i],
+                      PIXELWISE_FLOAT_TOL)
+              << " output " << output_idx << " channel " << channel << " pixel "
+              << expected_ite % out_size << ": " << expected_out[expected_ite]
+              << "/" << buf_out[i] << std::endl;
+          mse += SQR(expected_out[expected_ite] - buf_out[i]);
+          expected_ite++;
+        }
+
+        channel_offset++;
+      }
+      mse /= (out_size * curr_output_chs);
+      EXPECT_LE(mse, tolerance) << " output " << output_idx << std::endl;
+    }
+
+    aom_free(out_widths);
+    aom_free(out_heights);
+    aom_free(not_used);
+  }
+
+  static void AssignLayerWeightsBiases(CNN_CONFIG *cnn_config, float *weights,
+                                       float *bias) {
+    size_t weight_offset = 0;
+    size_t bias_offset = 0;
+    for (int layer = 0; layer < cnn_config->num_layers; ++layer) {
+      CNN_LAYER_CONFIG *layer_config = &cnn_config->layer_config[layer];
+      layer_config->weights = weights + weight_offset;
+      layer_config->bias = bias + bias_offset;
+      weight_offset += layer_config->filter_width *
+                       layer_config->filter_height * layer_config->in_channels *
+                       layer_config->out_channels;
+      bias_offset += layer_config->out_channels;
+
+      ASSERT_NE(layer_config->weights, nullptr);
+      ASSERT_NE(layer_config->bias, nullptr);
+    }
+  }
+};
+
+}  // namespace
+
+TEST_F(CNNTest, TestMultilayerConvolution) {
+  int image_height = 16;
+  int image_width = 16;
+  int filter_height = 5;
+  int filter_width = 4;
+
+  float input[] = {
+    -3, 1,  -3, 2,  -2, -2, 2,  -2, 1,  -2, -3, 1,  2,  2,  2,  -2, 0,  1,  -1,
+    -3, -1, -1, 1,  0,  -3, 1,  0,  -1, 1,  0,  0,  -3, -3, -3, 0,  2,  1,  -1,
+    2,  0,  1,  -3, -1, 2,  2,  1,  -2, 0,  -1, 0,  -2, -2, -1, 1,  0,  0,  0,
+    -2, -2, -2, 1,  1,  -2, 1,  1,  -2, -2, 1,  -2, -1, -2, -3, 2,  -3, -1, 1,
+    0,  -2, -2, -2, 1,  -2, -2, -1, -1, 2,  2,  2,  -1, 1,  -3, -3, 0,  2,  0,
+    2,  1,  -3, -3, 1,  2,  2,  1,  -2, -3, 0,  -3, 0,  -3, -2, 0,  1,  1,  0,
+    -3, 2,  -1, 2,  1,  0,  1,  -2, 1,  -1, -1, 2,  0,  -2, -3, 1,  1,  -2, -1,
+    -3, -3, -1, 0,  -3, -2, 0,  0,  1,  0,  -3, -2, -1, 1,  0,  2,  1,  0,  -3,
+    -2, -3, -3, -1, 0,  -2, 2,  -1, -3, 0,  -1, -1, 2,  0,  -3, -2, -1, 0,  0,
+    1,  -2, 1,  2,  1,  2,  2,  -3, 2,  -1, 0,  0,  -1, 0,  2,  2,  -1, 2,  -2,
+    1,  1,  -3, -3, 1,  -1, -1, -2, 2,  -2, -2, 2,  -1, -3, 2,  -3, 1,  -1, -1,
+    -3, 1,  -1, 1,  0,  -3, -3, 1,  -3, -3, 0,  2,  2,  -2, -1, 2,  0,  2,  1,
+    -1, -3, 0,  0,  -1, -1, 1,  0,  2,  0,  -3, 2,  1,  0,  1,  -3, 2,  -3, -3,
+    -1, -3, -3, 2,  0,  2,  -2, 1,  -1,
+  };
+
+  float weights[] = {
+    -2, 2,  -2, 2,  -1, -3, 2,  2,  0,  0,  -3, -1, -2, -3, 1,  -1, 0,  0,  0,
+    2,  -2, 2,  -2, -3, 1,  1,  1,  -3, -1, 0,  1,  2,  -2, 0,  -1, -3, -1, -2,
+    2,  -3, -3, 1,  -2, -3, 0,  2,  1,  -3, -3, -1, -3, -2, -1, -3, -1, -3, -2,
+    -1, -3, -1, -2, -2, -3, 2,  0,  -3, 0,  -3, -3, 1,  -3, -1, 0,  -1, 1,  1,
+    -1, 1,  -2, 0,  2,  0,  -3, 1,  -1, -1, 2,  0,  1,  -3, -3, 1,  2,  -3, -3,
+    1,  -3, 2,  0,  -3, 1,  2,  2,  -2, -1, -2, 1,  1,  0,  -2, -2, 1,  2,  -1,
+    -3, 1,  -2, 2,  -3, -2, -3, 2,  1,  0,  -2, 0,  1,  -3, 2,  -2, -2, 0,  2,
+    -3, 2,  0,  0,  1,  -2, 1,  1,  -2, -1, -2, 1,  -2, 0,  -2, -2, 0,  -1, -1,
+    -3, -3, -3, 1,  -3, -2, 2,  -1, 2,  0,  2,  -2, 2,  -2, 1,  -3, -3, -1, 0,
+    2,  2,  1,  -1, -3, -1, -3, 2,  1,  -2, 0,  -3, -1, -3, -1, 2,  1,  0,  2,
+    -1, 1,  0,  1,  2,  -1, -2, 2,  1,  -3, -1, -3, 0,  1,  -2, 0,  -2, -3, 0,
+    -2, 2,  2,  0,  0,  2,  -3, 2,  -3, -2, 1,  2,  -3, -3, -1, -3, 0,  -3, -3,
+    -2, -2, -2, 0,  0,  1,  0,  0,  -1, 0,  0,  -3, 0,  -3, -1, -2, 1,  -2, -1,
+    2,  -2, 0,  0,  1,  0,  -2, -1, 0,  -3, 1,  0,  -1, -3, 1,  -1, 1,  -1, -3,
+    1,  0,  1,  1,  -1, 2,  2,  0,  0,  1,  -3, 2,  -2, -2, -3, -2, -1, -2, 2,
+    0,  2,  -2, -3, -1, -3, 2,  2,  -1, 2,  2,  -1, 0,  -3, 1,
+  };
+
+  float bias[] = {
+    1, -1, 0, 1, 1, 1, -2,
+  };
+
+  float expected_same[] = {
+    -1125, 2926,  6406,  631,   -1244, 97,    -1454, 2526,  1065,  3292,  3464,
+    2553,  -330,  532,   1038,  1182,  -402,  3758,  3392,  9854,  4365,  1408,
+    4736,  3134,  3838,  2409,  3221,  4350,  6750,  4045,  815,   1188,  2959,
+    9802,  9590,  4572,  5740,  4253,  1701,  7974,  7012,  6854,  7093,  3907,
+    4539,  3886,  4267,  3505,  465,   7824,  9219,  10026, 7968,  957,   2295,
+    5594,  10811, 9641,  5950,  10043, 8783,  3132,  1421,  1110,  4108,  13929,
+    10660, -84,   -61,   3932,  -180,  6811,  13393, 15147, 15640, 9337,  6961,
+    3808,  1604,  1398,  1047,  6739,  10144, 6517,  4698,  2678,  7389,  2595,
+    5248,  12075, 11272, 13951, 8820,  1090,  2199,  2206,  2788,  12116, 6683,
+    2612,  -291,  3183,  9414,  12316, 14524, 12333, 13208, 7832,  4664,  4657,
+    3534,  1298,  -666,  4250,  7707,  9103,  5760,  688,   9571,  15782, 14203,
+    14878, 17339, 14684, 8690,  5671,  875,   1429,  1531,  6173,  2984,  5558,
+    2996,  7928,  6733,  16117, 15262, 12757, 7980,  3923,  4795,  5973,  2051,
+    455,   -1922, 1816,  5906,  3321,  10908, 10910, 7377,  12204, 12809, 11195,
+    7451,  6666,  74,    -1645, -35,   -391,  3813,  7324,  892,   1656,  6095,
+    12193, 14648, 12156, 14663, 10251, 10325, 7821,  3925,  323,   697,   442,
+    1324,  4669,  7002,  5485,  5171,  5086,  10582, 11053, 9709,  11353, 8543,
+    5256,  2873,  235,   -628,  1496,  1878,  -867,  3420,  6865,  5937,  10182,
+    13277, 10069, 10789, 5998,  624,   -2082, 4417,  1258,  -1080, -819,  -1430,
+    1033,  5220,  6335,  8471,  8980,  11908, 14430, 12584, 8404,  1576,  -803,
+    985,   1481,  1367,  -193,  873,   3684,  2288,  6676,  9477,  11155, 9602,
+    9707,  10507, 4739,  3174,  -575,  -178,  3002,  1710,  423,   -477,  554,
+    3088,  2029,  5113,  5000,  3771,  6090,  5365,  1185,  2855,  399,   -312,
+    -1577, 176,   955,
+  };
+
+  float expected_replicate[] = {
+    13768, 13528, 12999, 6906,  4618,  4043,  2611,  9955,  6685,  4776,  2753,
+    1036,  3063,  4544,  5183,  7349,  12451, 12501, 9131,  12753, 8908,  4058,
+    6299,  7542,  7115,  3307,  3360,  3543,  9754,  7808,  5991,  9019,  14320,
+    14919, 12492, 6871,  7373,  3336,  2085,  10604, 9377,  6882,  5009,  3103,
+    6220,  6278,  7588,  10196, 11045, 11563, 11842, 11911, 8279,  2030,  1858,
+    6368,  12123, 9909,  6347,  10345, 9365,  4038,  1673,  3051,  16492, 16649,
+    12276, 408,   -301,  4122,  -654,  7864,  14038, 15279, 15315, 9744,  8243,
+    5298,  746,   380,   9824,  9124,  10895, 6640,  4712,  2669,  6980,  2759,
+    5385,  12345, 11336, 13129, 8600,  2370,  3682,  5219,  12407, 13123, 6784,
+    2612,  -291,  3183,  9414,  12316, 14524, 12333, 13397, 7543,  3916,  4153,
+    4477,  4314,  7983,  8418,  9163,  9103,  5760,  688,   9571,  15782, 14203,
+    14878, 17718, 14570, 7940,  6642,  5094,  7133,  9964,  10219, 3224,  5558,
+    2996,  7928,  6733,  16117, 15262, 12757, 7958,  4401,  5187,  5476,  5529,
+    6055,  2206,  3909,  6015,  3321,  10908, 10910, 7377,  12204, 12809, 11195,
+    6967,  6840,  481,   -1600, 274,   1,     10373, 8514,  1123,  2117,  6758,
+    12736, 16223, 13585, 15988, 11771, 10600, 7918,  4156,  2840,  3111,  3287,
+    6359,  7652,  8813,  6530,  6967,  7789,  13671, 13990, 13247, 13241, 9836,
+    5251,  3024,  2313,  1834,  4187,  2637,  -1312, 2139,  7378,  7665,  11933,
+    15591, 15314, 15678, 9531,  2820,  -1516, 3400,  1314,  22,    363,   -2896,
+    -898,  5906,  7308,  10650, 12975, 16978, 20370, 18817, 12381, 4118,  -861,
+    -137,  236,   1802,  1632,  -350,  2334,  3400,  8680,  14064, 18216, 18675,
+    21765, 22871, 11491, 4937,  -1555, -11,   1669,  2392,  3265,  -5254, -217,
+    5001,  8063,  13444, 18884, 19706, 22794, 21064, 9545,  6689,  -7,    289,
+    -2021, 504,   2347,
+  };
+
+  float expected_valid[] = {
+    2612,  -291,  3183,  9414,  12316, 14524, 12333, 9103,  5760,  688,
+    9571,  15782, 14203, 14878, 5558,  2996,  7928,  6733,  16117, 15262,
+    12757, 3321,  10908, 10910, 7377,  12204, 12809, 11195,
+  };
+
+  CNN_CONFIG cnn_config = { 3,
+                            0,
+                            0,
+                            0,
+                            0,
+                            {
+                                {
+                                    1,
+                                    filter_width,
+                                    filter_height,
+                                    3,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    0,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_NOC,
+                                    {},
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    3,
+                                    filter_width,
+                                    filter_height,
+                                    3,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    0,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_NOC,
+                                    {},
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    3,
+                                    filter_width,
+                                    filter_height,
+                                    1,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    0,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_NOC,
+                                    {},
+                                    {},
+                                    0,
+                                },
+                            } };
+
+  // Weights and biases need to be specified separately because
+  // of the offset.
+  AssignLayerWeightsBiases(&cnn_config, weights, bias);
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input, expected_same, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  for (int i = 0; i < cnn_config.num_layers; ++i) {
+    cnn_config.layer_config[i].pad = PADDING_SAME_REPLICATE;
+  }
+
+  RunCNNTest(image_width, image_height, input, expected_replicate, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  for (int i = 0; i < cnn_config.num_layers; ++i) {
+    cnn_config.layer_config[i].pad = PADDING_VALID;
+  }
+
+  RunCNNTest(image_width, image_height, input, expected_valid, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+}
+
+TEST_F(CNNTest, TestRELUSingleLayer) {
+  int image_width = 8;
+  int image_height = 8;
+  int filter_height = 5;
+  int filter_width = 4;
+  float input[] = {
+    0, -2, -3, 1,  -1, 2,  -2, 1,  -3, -1, 0,  1,  -2, -3, -2, -2,
+    1, -3, 2,  -3, -1, -1, 2,  0,  -2, -3, 0,  -2, -3, 1,  -1, -1,
+    2, -2, 0,  -2, -3, -3, 1,  1,  -1, 1,  0,  1,  -3, 0,  2,  2,
+    0, -3, 1,  -3, 2,  -2, 1,  -1, -1, -2, -3, -2, -1, -3, -2, -1,
+  };
+  float expected_same[] = {
+    9,  0,  1,  1,  0,  3,  0,  19, 0,  12, 10, 0,  0,  0,  5, 0,
+    0,  18, 21, 7,  19, 4,  3,  0,  0,  9,  16, 0,  11, 16, 0, 11,
+    12, 2,  0,  11, 0,  16, 6,  0,  8,  22, 13, 10, 12, 0,  0, 0,
+    0,  1,  2,  12, 29, 6,  10, 0,  13, 0,  0,  5,  8,  10, 0, 0,
+  };
+  float expected_replicate[] = {
+    18, 17, 12, 2,  0,  0,  5,  11, 0,  17, 22, 6,  0,  0,  17, 0,
+    0,  18, 21, 7,  19, 4,  3,  5,  3,  9,  16, 0,  11, 16, 0,  3,
+    3,  2,  0,  11, 0,  16, 6,  0,  17, 22, 13, 10, 12, 0,  0,  0,
+    0,  4,  1,  10, 30, 7,  10, 0,  23, 8,  0,  13, 15, 19, 8,  10,
+  };
+  float expected_valid[] = {
+    18, 21, 7, 19, 4, 9, 16, 0, 11, 16, 2, 0, 11, 0, 16, 22, 13, 10, 12, 0,
+  };
+  float weights[] = {
+    -2, -3, 1, 2, 2, -2, -3, 0, -3, 2, 2, -3, -3, -2, 0, 1, 2, 0, -1, -1,
+  };
+  float bias[] = { -3 };
+
+  CNN_CONFIG cnn_config = { 1,
+                            0,
+                            0,
+                            0,
+                            0,
+                            { {
+                                1,
+                                filter_width,
+                                filter_height,
+                                1,
+                                1,
+                                1,
+                                0,
+                                weights,
+                                bias,
+                                PADDING_SAME_ZERO,
+                                RELU,
+                                0,
+                                0,
+                                BRANCH_NO_COPY,
+                                BRANCH_NOC,
+                                {},
+                                {},
+                                0,
+                            } } };
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input, expected_same, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  cnn_config.layer_config[0].pad = PADDING_SAME_REPLICATE;
+
+  RunCNNTest(image_width, image_height, input, expected_replicate, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  cnn_config.layer_config[0].pad = PADDING_VALID;
+
+  RunCNNTest(image_width, image_height, input, expected_valid, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+}
+
+TEST_F(CNNTest, TestVaryingStridesVaryingDimImages) {
+  float weights[] = {
+    1,  -5, -3, -4, -1, 1,  2,  -3, 2,  2,  -1, 1,  -5, 1,  1,
+    -3, -5, 3,  1,  4,  -2, -5, -2, -3, -5, 0,  -1, -5, 2,  -2,
+    -2, 1,  -2, -4, 1,  3,  -2, 2,  0,  -3, 2,  -3, -2, -3,
+  };
+  float bias[] = { 2 };
+
+  CNN_CONFIG cnn_config = { 1,
+                            0,
+                            0,
+                            0,
+                            0,
+                            {
+                                {
+                                    1,
+                                    4,
+                                    11,
+                                    1,
+                                    7,
+                                    6,
+                                    0,
+                                    weights,
+                                    bias,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    0,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_NOC,
+                                    {},
+                                    {},
+                                    0,
+                                },
+                            } };
+
+  int image_height = 24;
+  int image_width = 17;
+  float input[] = {
+    -1, -3, 4,  4,  -5, 4,  3,  -5, -1, -3, 4,  -4, 2,  -3, 3,  -5, 2,  -1, -5,
+    1,  -1, 3,  1,  -3, -3, 4,  0,  2,  -3, -5, -5, -4, 0,  -5, -2, -3, -1, -2,
+    2,  -5, 4,  4,  0,  -4, -3, 1,  -3, -5, -4, -4, 1,  -2, -3, 3,  -3, -3, -1,
+    -5, -5, -2, 3,  1,  -1, -5, -5, 1,  -4, -2, -1, -2, -4, -4, 2,  -2, 2,  1,
+    -2, -4, -1, 1,  -2, -5, 3,  -2, -1, -1, -5, -3, 1,  -2, -2, -3, -1, -2, -4,
+    -2, 1,  -4, -1, 4,  3,  -4, 0,  4,  2,  2,  4,  -3, -5, 2,  2,  1,  -1, -4,
+    -2, 1,  3,  2,  0,  4,  -1, -3, 2,  1,  -4, 2,  2,  -4, -2, 0,  -2, -1, 4,
+    4,  2,  3,  -4, 2,  -4, -5, 4,  -1, -3, -1, 0,  -4, 1,  3,  -1, -3, -5, 3,
+    -2, -4, 1,  2,  -2, -3, -3, -5, 1,  -3, -1, 0,  -1, 3,  -4, -1, -5, -5, 1,
+    0,  0,  -2, -2, 2,  -2, 0,  0,  2,  0,  -3, 0,  -1, -4, -4, -1, 3,  -4, -4,
+    -1, 0,  -5, -3, -2, 4,  -3, -4, -4, 0,  -5, 1,  -2, -3, -3, -4, 4,  3,  4,
+    3,  3,  -1, 3,  1,  -3, -2, 3,  3,  0,  2,  -4, -3, 2,  2,  0,  -2, 4,  -2,
+    2,  -2, -1, -4, -2, 2,  -4, 3,  -1, 4,  1,  1,  4,  -1, -4, -4, 1,  1,  -2,
+    4,  -1, 3,  2,  -3, 4,  3,  1,  4,  0,  -4, 2,  0,  2,  4,  -2, -2, 4,  2,
+    -1, -2, 1,  -3, 2,  3,  -5, -3, 4,  4,  2,  -5, -4, -5, -2, -4, 2,  0,  2,
+    -5, 4,  -4, -2, -5, 2,  1,  0,  4,  1,  -2, -3, -4, -3, -4, 3,  3,  2,  0,
+    -3, 1,  -5, 4,  0,  4,  -1, 3,  -5, -5, -2, -1, -1, 4,  3,  3,  4,  3,  -4,
+    4,  -3, -3, -1, -4, -1, -4, -1, -2, 4,  -2, -4, 4,  4,  -3, -4, -1, 1,  2,
+    -1, -2, -2, 3,  2,  2,  -3, 0,  -1, 0,  3,  2,  -5, 0,  -4, 0,  0,  2,  -4,
+    -1, -1, 0,  -2, 0,  1,  0,  0,  4,  -5, -1, -5, 2,  -1, 0,  2,  -1, 1,  3,
+    -3, -5, -2, -3, 4,  -2, -2, -1, -3, -4, -1, -2, -4, 1,  4,  -3, -2, -1, 3,
+    -3, -2, 3,  2,  1,  -4, -3, -5, 1,
+  };
+  float expected_1[] = {
+    41, -26, 5, 76, 13, 83, -21, 53, -54, -14, 21, 121,
+  };
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input, expected_1, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  cnn_config.layer_config[0].skip_width = 6;
+  cnn_config.layer_config[0].skip_height = 7;
+
+  float expected_2[] = {
+    21, -50, 41, 20, 72, 127, -21, 103, 62, -37, 83, -3,
+  };
+  RunCNNTest(image_width, image_height, input, expected_2, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  cnn_config.layer_config[0].skip_width = 3;
+  cnn_config.layer_config[0].skip_height = 10;
+
+  float expected_3[] = {
+    -26, -21, -35, 69, 49,  4,  -51, -43, -56,
+    -41, 15,  -44, 40, -62, 63, 38,  27,  47,
+  };
+  RunCNNTest(image_width, image_height, input, expected_3, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  cnn_config.layer_config[0].skip_width = 10;
+  cnn_config.layer_config[0].skip_height = 3;
+
+  float expected_4[] = {
+    21, 49, 28, 87, 50, 40, 102, 81, 58, 85, 51, 66, 36, 19, -37, -45,
+  };
+
+  RunCNNTest(image_width, image_height, input, expected_4, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+}
+
+TEST_F(CNNTest, TestMaxPool) {
+  int image_width = 8;
+  int image_height = 8;
+  int stride = 3;
+  float input[] = {
+    1,  -4, -4, 8, 0, 7, -5, -2, 8, 2, 2, 8,  5,  -1, -1, 9,
+    -3, 0,  -2, 0, 6, 3, -4, 8,  7, 8, 7, -1, 4,  -1, 0,  2,
+    -5, -2, 8,  5, 5, 4, 2,  7,  4, 6, 2, 8,  8,  -4, -3, -4,
+    -3, -1, 2,  3, 3, 6, -5, 8,  9, 5, 0, -2, -1, 6,  5,  7,
+  };
+
+  float expected[] = {
+    49, 58, 70, 68, 68, 70, 48, 57, 88,
+  };
+
+  float weights[] = {
+    3, 1, 3, 4, -1, 5, -2, 1, -4,
+  };
+
+  float bias[] = {
+    -3,
+  };
+
+  CNN_CONFIG cnn_config = { 1,
+                            0,
+                            0,
+                            0,
+                            0,
+                            { {
+                                1,
+                                3,
+                                3,
+                                1,
+                                stride,
+                                stride,
+                                1,
+                                weights,
+                                bias,
+                                PADDING_SAME_ZERO,
+                                NONE,
+                                0,
+                                0,
+                                BRANCH_NO_COPY,
+                                BRANCH_NOC,
+                                {},
+                                {},
+                                0,
+                            } } };
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input, expected, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+}
+
+TEST_F(CNNTest, TestDeconvolveNonActivationSingleLayerSingleKernel) {
+  int image_width = 4;
+  int image_height = 7;
+  float input[] = {
+    9,  6,   181, 9,  218, 30, 80,  108, 68,  216, 70, 128, 179, 228,
+    33, 212, 34,  14, 48,  27, 230, 23,  202, 113, 80, 56,  122, 112,
+  };
+
+  float expected_1_same[] = {
+    15,   -30,  36,   -525,  377, -193, 558, 531,  6,   -24,  -15,  124,
+    166,  -561, -356, -754,  -3,  -3,   -3,  -3,   -3,  -3,   -3,   -3,
+    433,  -311, 711,  381,   247, -317, 453, 129,  215, -627, -409, -885,
+    17,   -255, -55,  -647,  -3,  -3,   -3,  -3,   -3,  -3,   -3,   -3,
+    133,  -719, 633,  -225,  785, 191,  463, 79,   65,  9,    77,   -853,
+    -365, -949, -15,  -667,  -3,  -3,   -3,  -3,   -3,  -3,   -3,   -3,
+    355,  -866, 990,  207,   747, 12,   520, -116, 176, -312, -133, -1370,
+    -426, -802, 143,  -771,  -3,  -3,   -3,  -3,   -3,  -3,   -3,   -3,
+    65,   -79,  127,  -59,   135, -90,  195, 114,  31,  -91,  -57,  -133,
+    17,   -176, -72,  -276,  -3,  -3,   -3,  -3,   -3,  -3,   -3,   -3,
+    457,  -302, 733,  58,    470, -475, 829, 490,  227, -670, -440, -790,
+    153,  -588, -294, -1150, -3,  -3,   -3,  -3,   -3,  -3,   -3,   -3,
+    157,  -251, 349,  -185,  409, -293, 587, 251,  77,  -187, -107, -369,
+    7,    -481, -135, -827,  -3,  -3,   -3,  -3,   -3,  -3,   -3,   -3,
+  };
+  float expected_1_valid[] = {
+    -30,  15,   -30,  36,   -525,  377,  -193,  558,  531,  24,   24,   6,
+    6,    -24,  -15,  124,  166,   -561, -356,  -754, -21,  -39,  -3,   -3,
+    -3,   -3,   -3,   -3,   -3,    -3,   -3,    -3,   -3,   -657, 433,  -311,
+    711,  381,  247,  -317, 453,   129,  321,   321,  215,  215,  -627, -409,
+    -885, 17,   -255, -55,  -647,  -219, -435,  -3,   -3,   -3,   -3,   -3,
+    -3,   -3,   -3,   -3,   -3,    -3,   -207,  133,  -719, 633,  -225, 785,
+    191,  463,  79,   381,  381,   65,   65,    9,    77,   -853, -365, -949,
+    -15,  -667, -259, -515, -3,    -3,   -3,    -3,   -3,   -3,   -3,   -3,
+    -3,   -3,   -3,   -540, 355,   -866, 990,   207,  747,  12,   520,  -116,
+    633,  633,  176,  176,  -312,  -133, -1370, -426, -802, 143,  -771, -427,
+    -851, -3,   -3,   -3,   -3,    -3,   -3,    -3,   -3,   -3,   -3,   -3,
+    -105, 65,   -79,  127,  -59,   135,  -90,   195,  114,  78,   78,   31,
+    31,   -91,  -57,  -133, 17,    -176, -72,   -276, -57,  -111, -3,   -3,
+    -3,   -3,   -3,   -3,   -3,    -3,   -3,    -3,   -3,   -693, 457,  -302,
+    733,  58,   470,  -475, 829,   490,  336,   336,  227,  227,  -670, -440,
+    -790, 153,  -588, -294, -1150, -229, -455,  -3,   -3,   -3,   -3,   -3,
+    -3,   -3,   -3,   -3,   -3,    -3,   -243,  157,  -251, 349,  -185, 409,
+    -293, 587,  251,  333,  333,   77,   77,    -187, -107, -369, 7,    -481,
+    -135, -827, -227, -451,
+  };
+  float weights_1[] = { -3, 2, -1, 3, 3, 1, 1, -3, -2, -4 };
+  float bias_1[] = { -3 };
+
+  CNN_CONFIG cnn_config = { 1,
+                            0,
+                            0,
+                            0,
+                            0,
+                            { {
+                                1,
+                                5,
+                                2,
+                                1,
+                                2,
+                                3,
+                                0,
+                                weights_1,
+                                bias_1,
+                                PADDING_SAME_ZERO,
+                                NONE,
+                                1,
+                                0,
+                                BRANCH_NO_COPY,
+                                BRANCH_NOC,
+                                {},
+                                {},
+                                0,
+                            } } };
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input, expected_1_same, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  // Change padding to valid
+  cnn_config.layer_config[0].pad = PADDING_VALID;
+
+  RunCNNTest(image_width, image_height, input, expected_1_valid, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  float expected_12_same[] = {
+    15,  -12,  6,    36,   -9,   -528, 377,  -184, 513,  558,  -12,  24,
+    6,   -30,  -15,  -33,  -21,  166,  154,  -546, -356, -718, -30,  -21,
+    433, -221, 561,  711,  -33,  -153, 247,  -83,  -87,  453,  -111, 321,
+    215, -657, -409, -845, -93,  17,   -43,  -243, -55,  -215, -327, -219,
+    133, -71,  -447, 633,  -219, 435,  785,  -73,  -177, 463,  -131, 381,
+    65,  -207, 77,   -59,  -651, -365, -797, -213, -15,  -155, -387, -259,
+    355, -182, -150, 990,  -231, 582,  747,  -36,  -540, 520,  -215, 633,
+    176, -540, -133, -491, -687, -426, -882, -102, 143,  77,   -639, -427,
+    65,  -37,  57,   127,  -17,  -105, 135,  -51,  60,   195,  -30,  78,
+    31,  -105, -57,  -125, -45,  17,   -11,  -147, -72,  -168, -84,  -57,
+    457, -233, 618,  733,  -26,  -540, 470,  -205, 264,  829,  -116, 336,
+    227, -693, -440, -900, -72,  153,  107,  -609, -294, -698, -342, -229,
+    157, -83,  69,   349,  -59,  -201, 409,  -125, 27,   587,  -115, 333,
+    77,  -243, -107, -267, -171, 7,    -105, -369, -135, -379, -339, -227,
+  };
+  float expected_12_valid[] = {
+    -30,  15,   -12,  6,    36,   -9,   -528, 377,  -184, 513,  558,  -12,
+    24,   24,   6,    6,    -30,  -15,  -33,  -21,  166,  154,  -546, -356,
+    -718, -30,  -21,  -39,  -657, 433,  -221, 561,  711,  -33,  -153, 247,
+    -83,  -87,  453,  -111, 321,  321,  215,  215,  -657, -409, -845, -93,
+    17,   -43,  -243, -55,  -215, -327, -219, -435, -207, 133,  -71,  -447,
+    633,  -219, 435,  785,  -73,  -177, 463,  -131, 381,  381,  65,   65,
+    -207, 77,   -59,  -651, -365, -797, -213, -15,  -155, -387, -259, -515,
+    -540, 355,  -182, -150, 990,  -231, 582,  747,  -36,  -540, 520,  -215,
+    633,  633,  176,  176,  -540, -133, -491, -687, -426, -882, -102, 143,
+    77,   -639, -427, -851, -105, 65,   -37,  57,   127,  -17,  -105, 135,
+    -51,  60,   195,  -30,  78,   78,   31,   31,   -105, -57,  -125, -45,
+    17,   -11,  -147, -72,  -168, -84,  -57,  -111, -693, 457,  -233, 618,
+    733,  -26,  -540, 470,  -205, 264,  829,  -116, 336,  336,  227,  227,
+    -693, -440, -900, -72,  153,  107,  -609, -294, -698, -342, -229, -455,
+    -243, 157,  -83,  69,   349,  -59,  -201, 409,  -125, 27,   587,  -115,
+    333,  333,  77,   77,   -243, -107, -267, -171, 7,    -105, -369, -135,
+    -379, -339, -227, -451,
+  };
+
+  // Change skip_width, skip_height to {2, 3}
+  cnn_config.layer_config[0].skip_width = 3;
+  cnn_config.layer_config[0].skip_height = 2;
+  // Set padding to same
+  cnn_config.layer_config[0].pad = PADDING_SAME_ZERO;
+
+  RunCNNTest(image_width, image_height, input, expected_12_same, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  // Change padding to valid
+  cnn_config.layer_config[0].pad = PADDING_VALID;
+  RunCNNTest(image_width, image_height, input, expected_12_valid, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  cnn_config.layer_config[0].filter_width = 4;
+  cnn_config.layer_config[0].filter_height = 3;
+  float weights_2[] = { -1, -3, -1, -3, 0, 2, -2, 4, 3, 0, 1, 4 };
+  float bias_2[] = { -4 };
+  cnn_config.layer_config[0].weights = weights_2;
+  cnn_config.layer_config[0].bias = bias_2;
+
+  cnn_config.layer_config[0].skip_width = 5;
+  cnn_config.layer_config[0].skip_height = 2;
+  float expected_2_same[] = {
+    -13,  -31,  -13,  -31,  -4,   -10,  -22,  -10,  -22,  -4,   -185, -547,
+    -185, -547, -4,   -13,  -31,  -13,  -31,  -4,   -4,   14,   -22,  32,
+    -4,   -4,   8,    -16,  20,   -4,   -4,   358,  -366, 720,  -4,   -4,
+    14,   -22,  32,   -4,   -195, -658, -213, -622, -4,   -16,  -94,  -28,
+    -70,  -4,   459,  -244, 97,   480,  -4,   -85,  -328, -103, -292, -4,
+    -4,   432,  -440, 868,  -4,   -4,   56,   -64,  116,  -4,   -4,   156,
+    -164, 316,  -4,   -4,   212,  -220, 428,  -4,   582,  -208, 146,  664,
+    -4,   -130, -652, -190, -532, -4,   166,  -214, 6,    106,  -4,   192,
+    -388, -24,  44,   -4,   -4,   132,  -140, 268,  -4,   -4,   428,  -436,
+    860,  -4,   -4,   136,  -144, 276,  -4,   -4,   252,  -260, 508,  -4,
+    21,   -541, -115, -269, -4,   416,  -688, -16,  176,  -4,   173,  -103,
+    33,   177,  -4,   168,  -640, -88,  -128, -4,   -4,   354,  -362, 712,
+    -4,   -4,   452,  -460, 908,  -4,   -4,   62,   -70,  128,  -4,   -4,
+    420,  -428, 844,  -4,   499,  -106, 141,  610,  -4,   666,  -46,  210,
+    866,  -4,   47,   -148, -19,  -16,  -4,   605,  -85,  181,  763,  -4,
+    -4,   64,   -72,  132,  -4,   -4,   24,   -32,  52,   -4,   -4,   92,
+    -100, 188,  -4,   -4,   50,   -58,  104,  -4,   -132, -694, -200, -558,
+    -4,   15,   -73,  -13,  -17,  -4,   -62,  -610, -158, -418, -4,   -36,
+    -343, -90,  -235, -4,   -4,   456,  -464, 916,  -4,   -4,   42,   -50,
+    88,   -4,   -4,   400,  -408, 804,  -4,   -4,   222,  -230, 448,  -4,
+    606,  -244, 146,  676,  -4,   9,    -172, -37,  -80,  -4,   480,  -370,
+    76,   438,  -4,   223,  -340, -3,   112,  -4,   -4,   156,  -164, 316,
+    -4,   -4,   108,  -116, 220,  -4,   -4,   240,  -248, 484,  -4,   -4,
+    220,  -228, 444,  -4,
+  };
+  float expected_2_valid[] = {
+    -13,  -31,  -13,  -31,  -4,   -10,  -22,  -10,  -22,  -4,   -185, -547,
+    -185, -547, -4,   -13,  -31,  -13,  -31,  -4,   14,   -22,  32,   -4,
+    -4,   8,    -16,  20,   -4,   -4,   358,  -366, 720,  -4,   -4,   14,
+    -22,  32,   -195, -658, -213, -622, -4,   -16,  -94,  -28,  -70,  -4,
+    459,  -244, 97,   480,  -4,   -85,  -328, -103, -292, -4,   432,  -440,
+    868,  -4,   -4,   56,   -64,  116,  -4,   -4,   156,  -164, 316,  -4,
+    -4,   212,  -220, 428,  582,  -208, 146,  664,  -4,   -130, -652, -190,
+    -532, -4,   166,  -214, 6,    106,  -4,   192,  -388, -24,  44,   -4,
+    132,  -140, 268,  -4,   -4,   428,  -436, 860,  -4,   -4,   136,  -144,
+    276,  -4,   -4,   252,  -260, 508,  21,   -541, -115, -269, -4,   416,
+    -688, -16,  176,  -4,   173,  -103, 33,   177,  -4,   168,  -640, -88,
+    -128, -4,   354,  -362, 712,  -4,   -4,   452,  -460, 908,  -4,   -4,
+    62,   -70,  128,  -4,   -4,   420,  -428, 844,  499,  -106, 141,  610,
+    -4,   666,  -46,  210,  866,  -4,   47,   -148, -19,  -16,  -4,   605,
+    -85,  181,  763,  -4,   64,   -72,  132,  -4,   -4,   24,   -32,  52,
+    -4,   -4,   92,   -100, 188,  -4,   -4,   50,   -58,  104,  -132, -694,
+    -200, -558, -4,   15,   -73,  -13,  -17,  -4,   -62,  -610, -158, -418,
+    -4,   -36,  -343, -90,  -235, -4,   456,  -464, 916,  -4,   -4,   42,
+    -50,  88,   -4,   -4,   400,  -408, 804,  -4,   -4,   222,  -230, 448,
+    606,  -244, 146,  676,  -4,   9,    -172, -37,  -80,  -4,   480,  -370,
+    76,   438,  -4,   223,  -340, -3,   112,  -4,   156,  -164, 316,  -4,
+    -4,   108,  -116, 220,  -4,   -4,   240,  -248, 484,  -4,   -4,   220,
+    -228, 444,  236,  -4,   76,   316,  -4,   164,  -4,   52,   220,  -4,
+    362,  -4,   118,  484,  -4,   332,  -4,   108,  444,
+  };
+  // Set padding to same
+  cnn_config.layer_config[0].pad = PADDING_SAME_ZERO;
+
+  RunCNNTest(image_width, image_height, input, expected_2_same, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  cnn_config.layer_config[0].pad = PADDING_VALID;
+
+  RunCNNTest(image_width, image_height, input, expected_2_valid, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  cnn_config.layer_config[0].skip_width = 2;
+  cnn_config.layer_config[0].skip_height = 5;
+  float expected_21_same[] = {
+    -31,  -19,  -49,   -191, -565, -194, -574, -13,  14,   -22,  44,   -16,
+    382,  -366, 738,   -22,  -4,   23,   32,   545,  20,   204,  720,  5,
+    -4,   -4,   -4,    -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,
+    -4,   -4,   -4,    -4,   -658, -252, -748, -114, -334, -192, -568, -112,
+    432,  -440, 928,   -64,  276,  -164, 532,  -220, -4,   304,  868,  266,
+    116,  400,  316,   104,  -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,
+    -4,   -4,   -4,    -4,   -4,   -4,   -4,   -4,   -208, -288, -856, -290,
+    -862, -202, -598,  -132, 132,  -140, 700,  -436, 1000, -144, 532,  -260,
+    -4,   712,  268,   422,  860,  450,  276,  124,  -4,   -4,   -4,   -4,
+    -4,   -4,   -4,    -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,
+    -541, -411, -1225, -265, -787, -249, -739, -216, 354,  -362, 1168, -460,
+    974,  -70,  552,   -428, -4,   859,  712,  323,  908,  665,  128,  208,
+    -4,   -4,   -4,    -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,
+    -4,   -4,   -4,    -4,   -106, -52,  -148, -66,  -190, -79,  -229, -31,
+    64,   -72,  160,   -32,  148,  -100, 242,  -58,  -4,   72,   132,  154,
+    52,   125,  188,   23,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,
+    -4,   -4,   -4,    -4,   -4,   -4,   -4,   -4,   -694, -257, -763, -229,
+    -679, -319, -949,  -117, 456,  -464, 962,  -50,  492,  -408, 1030, -230,
+    -4,   295,  916,   625,  88,   537,  804,  109,  -4,   -4,   -4,   -4,
+    -4,   -4,   -4,    -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,
+    -244, -140, -412,  -182, -538, -238, -706, -116, 156,  -164, 428,  -116,
+    464,  -248, 708,   -228, -4,   244,  316,  418,  220,  454,  484,  108,
+    -4,   -4,   -4,    -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,
+    -4,   -4,   -4,    -4,
+  };
+  float expected_21_valid[] = {
+    -13,  -31,  -19,  -49,  -191, -565, -194, -574, -13,  -31,   -4,   14,
+    -22,  44,   -16,  382,  -366, 738,  -22,  32,   23,   -4,    23,   32,
+    545,  20,   204,  720,  5,    32,   -4,   -4,   -4,   -4,    -4,   -4,
+    -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,    -4,   -4,
+    -4,   -4,   -222, -658, -252, -748, -114, -334, -192, -568,  -112, -328,
+    -4,   432,  -440, 928,  -64,  276,  -164, 532,  -220, 428,   650,  -4,
+    304,  868,  266,  116,  400,  316,  104,  428,  -4,   -4,    -4,   -4,
+    -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,    -4,   -4,
+    -4,   -4,   -4,   -4,   -72,  -208, -288, -856, -290, -862,  -202, -598,
+    -132, -388, -4,   132,  -140, 700,  -436, 1000, -144, 532,   -260, 508,
+    200,  -4,   712,  268,  422,  860,  450,  276,  124,  508,   -4,   -4,
+    -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,    -4,   -4,
+    -4,   -4,   -4,   -4,   -4,   -4,   -183, -541, -411, -1225, -265, -787,
+    -249, -739, -216, -640, -4,   354,  -362, 1168, -460, 974,   -70,  552,
+    -428, 844,  533,  -4,   859,  712,  323,  908,  665,  128,   208,  844,
+    -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,    -4,   -4,
+    -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -38,  -106,  -52,  -148,
+    -66,  -190, -79,  -229, -31,  -85,  -4,   64,   -72,  160,   -32,  148,
+    -100, 242,  -58,  104,  98,   -4,   72,   132,  154,  52,    125,  188,
+    23,   104,  -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,    -4,   -4,
+    -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,    -234, -694,
+    -257, -763, -229, -679, -319, -949, -117, -343, -4,   456,   -464, 962,
+    -50,  492,  -408, 1030, -230, 448,  686,  -4,   295,  916,   625,  88,
+    537,  804,  109,  448,  -4,   -4,   -4,   -4,   -4,   -4,    -4,   -4,
+    -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,   -4,    -4,   -4,
+    -84,  -244, -140, -412, -182, -538, -238, -706, -116, -340,  -4,   156,
+    -164, 428,  -116, 464,  -248, 708,  -228, 444,  236,  -4,    244,  316,
+    418,  220,  454,  484,  108,  444,
+  };
+
+  cnn_config.layer_config[0].pad = PADDING_SAME_ZERO;
+
+  RunCNNTest(image_width, image_height, input, expected_21_same, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+
+  cnn_config.layer_config[0].pad = PADDING_VALID;
+
+  RunCNNTest(image_width, image_height, input, expected_21_valid, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+}
+
+TEST_F(CNNTest, TestLargeKernelsAndStrides) {
+  float input_10x11[] = {
+    4,  4,  2,  4,  2,  -5, -2, 3, -1, 0,  0,  1,  2,  0,  -5, -2, -5, 1,  -3,
+    -1, 4,  -3, 2,  -2, 1,  0,  1, -3, -3, -4, -2, -2, 1,  -4, -1, 4,  1,  -4,
+    -4, -4, 3,  2,  -5, 3,  -5, 1, 2,  -4, 1,  -1, 3,  4,  -2, 3,  -3, 3,  0,
+    2,  -4, -5, -5, -2, -1, -2, 1, 1,  1,  -2, 4,  -5, 4,  -1, -1, 2,  3,  -4,
+    2,  2,  3,  0,  0,  1,  0,  3, 2,  3,  1,  -2, 3,  -4, 3,  2,  4,  -2, 0,
+    4,  -4, 1,  -3, -3, -3, -5, 1, -3, -5, 0,  4,  -1, -3, 2,
+  };
+
+  float weights_10x11[] = {
+    -3, 4,  -4, -3, -5, 1,  -2, 3,  1,  -4, -4, 0,  -1, 0,  3,  1,  -3, -2, 0,
+    -1, 1,  3,  -4, -4, -3, -3, -2, 4,  3,  -5, 4,  2,  -3, 4,  -2, -1, 2,  -1,
+    -5, 0,  -3, 0,  3,  -5, -5, 3,  -4, -1, -5, 3,  4,  0,  4,  -5, 2,  -1, 2,
+    -1, -1, -1, -5, 0,  -4, 3,  -1, 1,  1,  -1, 3,  2,  -5, -4, 0,  -4, 4,  -5,
+    -3, 4,  -5, 2,  -5, -4, -4, -1, 3,  3,  0,  2,  -4, 1,  -2, 1,  1,  0,  3,
+    -2, 0,  1,  2,  4,  -3, -1, -5, -5, 2,  -4, 1,  1,  2,  -4, -2, -2, 2,  1,
+    3,  4,  -5, 1,  -1, -3, -3, -1, -2, -5, 1,  -1, 0,  1,  4,  4,  0,  0,  4,
+    -3, -1, -5, -3, 0,  1,  1,  1,  -5, 3,  4,  3,  -5, 3,  -2, -2, 0,  -4, 0,
+    0,  -2, 1,  -4, -1, 0,  -5, -2, -2, -5, -3, -3, 1,  1,  -3, 2,  4,  2,  4,
+    -4, -3, 3,  1,  1,  3,  -4, 4,  -2, -3, -3, -3, -3, -4, -2, 3,  -5, 2,  4,
+    -1, -4, -4, 4,  -2, -1, 3,  -3, -4, -4, -2, 4,  1,  0,  2,  -1, 4,  -3, 1,
+    4,  -3, 4,  4,  0,  -4, 3,  -2, -3, 2,  3,  -1, -3, 2,  1,  4,  -2, -3, 1,
+    4,  -2, 2,  -2, -5, -2, 1,  4,  -1, -4, 4,  -5, 2,  -5, -4, -1, -2, 3,  1,
+    2,  1,  -5, 1,  -5, -4, -1, -2, 2,  -2, -4, -3, -2, -2, 4,  -1, 2,  2,  -4,
+    2,  -2, 4,  -4, -2, -2, 1,  -1, 1,  1,  1,  -4, -5, -2, 3,  -4, -1, 3,  -2,
+    3,  2,  -5, -4, 0,  3,  -2, -4, -5, 3,  -2, -4, 2,  -2, 1,  -4, 0,  2,  -5,
+    1,  -4, -1, -1, 4,  -5, -4, 0,  -5, -4, -3, -5, -4, 0,  2,  0,  -4, 2,  -2,
+    1,  1,  -3, 2,  0,  -4, 0,  -4, 1,  0,  -5, -1, -1, -1, -5, 4,  2,  2,  -4,
+    3,  -2, -2, 2,  -3, -2, -1, 2,  -4, -5, 2,  -2, -4, -5, -5, -1, 2,  -1, 0,
+    -5, -2, -2, -5, 0,  1,  -1, -5, 0,  3,  2,  3,  0,  -3, -2, 0,  -5, -1, -2,
+    2,  -4, -1, 2,  2,  -5, 2,  -4, 0,  3,  -3, 1,  0,  0,  1,  -5, -3, 1,  -1,
+    0,  -4, -3, 2,  -4, -4, 4,  -1, 0,  1,  2,  -4, -5, 4,  -2, 1,  -4, -4, -3,
+    -1, -1, 1,  -1, -4, -1, -4, -3, 2,  -1, -2, -4, 1,  1,  0,  -2, 0,  -4, 3,
+    -3, 0,  -4, -1, -4, 2,  -1, -2, -5, -1, -2, -3, 3,  -1, 0,  -3, 0,  1,  -5,
+    1,  -5, 0,  1,
+  };
+
+  float bias_10x11[] = { 3 };
+
+  float expected_10x11[] = {
+    118,
+  };
+
+  CNN_CONFIG cnn_config = { 1,
+                            0,
+                            0,
+                            0,
+                            0,
+                            { {
+                                1,
+                                23,
+                                20,
+                                1,
+                                15,
+                                20,
+                                0,
+                                weights_10x11,
+                                bias_10x11,
+                                PADDING_SAME_ZERO,
+                                NONE,
+                                0,
+                                0,
+                                BRANCH_NO_COPY,
+                                BRANCH_NOC,
+                                {},
+                                {},
+                                0,
+                            } } };
+
+  int image_height = 10;
+  int image_width = 11;
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input_10x11, expected_10x11,
+             &cnn_config, image_width, &thread_data, MSE_INT_TOL);
+
+  float input_11x10[] = {
+    -2, -2, 3,  -5, -1, -3, 1,  3,  2,  1,  1,  -5, 4,  1,  3,  -5, 3,  -3, -5,
+    0,  -1, -3, -3, 1,  1,  -5, -1, -5, -5, -3, 0,  1,  -3, -1, -3, -3, 0,  3,
+    4,  -4, -1, 3,  -3, -1, -3, 1,  -3, -2, -1, -4, -3, 2,  -4, 1,  -4, -1, -3,
+    -5, -1, 2,  3,  0,  2,  2,  -5, 4,  1,  2,  -1, -4, 4,  -4, -4, 0,  -1, 1,
+    -1, 1,  -3, -3, -2, 1,  2,  4,  4,  4,  -3, -3, 0,  1,  0,  1,  4,  1,  3,
+    4,  -3, -2, -4, 4,  2,  0,  3,  4,  -1, 2,  -2, 1,  -3, -2,
+  };
+
+  float weights_11x10[] = {
+    4,  -1, 1,  -1, 2,  4,  3,  3,  -4, 3,  -5, 1,  -1, -1, -2, -2, 0,  2,  -3,
+    -2, 3,  -5, -1, 0,  -1, -2, -2, -1, 2,  4,  3,  1,  0,  0,  -3, 3,  -4, -1,
+    -5, 4,  -2, -2, 1,  2,  -1, -3, 1,  2,  -5, 1,  -3, 3,  3,  0,  -4, -4, -5,
+    -3, -4, -4, 4,  -2, 4,  4,  -2, 2,  -5, -1, -2, -5, -1, 4,  -3, 3,  -2, 0,
+    -4, -3, 0,  -1, -2, 4,  2,  0,  -2, -5, -4, 1,  4,  -4, -2, 2,  -2, 1,  1,
+    -4, 1,  -4, -4, -2, 4,  2,  -1, -5, -5, 1,  -3, -3, 3,  -3, -5, -3, 4,  -1,
+    -1, -3, 0,  -4, 3,  -1, 0,  -2, 0,  -5, -2, -5, 2,  0,  -5, 2,  3,  -2, 2,
+    4,  -1, 1,  -3, 2,  3,  2,  0,  -5, -4, -5, 2,  1,  1,  -1, -2, 3,  4,  2,
+    -2, 4,  -2, 3,  1,  -4, -3, -1, 4,  4,  -3, -5, -2, 2,  0,  3,  -2, 3,  -1,
+    -4, 0,  -2, 0,  3,  4,  -2, -3, -2, 0,  3,  4,  2,  -4, 0,  1,  2,  2,  -1,
+    -1, 4,  1,  4,  -2, -1, -1, -5, 1,  -3, 3,  3,  -1, -4, 3,  -5, 0,  0,  -1,
+    -4, -1, -2, 4,  -2, 3,  3,  -3, 1,  -1, 2,  -1, 4,  4,  -2, -2, 4,  -2, 0,
+    3,  -3, -5, -1, -2, 4,  -4, 2,  -4, 0,  -2, 3,  -3, 2,  2,  -2, -5, -1, 4,
+    3,  -2, -1, 3,  3,  -1, 3,  0,  -3, 0,  4,  2,  0,  -1, 4,  1,  1,  2,  1,
+    3,  1,  1,  1,  -3, -5, -4, 4,  -4, 2,  0,  0,  -4, 1,  4,  -5, 4,  4,  0,
+    1,  0,  -2, -4, -4, -3, 0,  1,  -5, 4,  0,  -3, -2, -4, 2,  4,  1,  -5, 1,
+    -4, 1,  0,  -3, -3, 0,  2,  -5, 4,  3,  -2, -5, 3,  1,  -1, 0,  3,  -2, -2,
+    3,  -2, -5, 4,  1,  -2, 2,  -1, 0,  4,  0,  -5, 3,  -2, 1,  2,  1,  -5, -3,
+    -2, -5, 4,  -4, 0,  3,  2,  -1, -4, -1, 2,  1,  -2, 3,  -1, -4, 2,  0,  -3,
+    1,  -1, 2,  -5, -4, -1, -5, 1,  4,  3,  4,  2,  -3, 1,  -5, -1, 3,  0,  -1,
+    -4, 3,  4,  -5, 4,  4,  -3, 2,  -3, -1, -3, -5, -3, 2,  -3, -2, 1,  1,  0,
+    -5, 3,  2,  1,  -5, 1,  1,  1,  3,  4,  -4, -1, -2, 0,  -5, -3, -5, -2, -4,
+    3,  3,  3,  4,  0,  -4, -1, -5, 0,  -3, 1,  4,  4,  -4, 4,  -5, -5, -1, -2,
+    -5, 3,  -4, 4,  3,  0,  -3, 2,  -2, 0,  0,  4,  4,  0,  -2, 1,  -1, -3, 2,
+    -1, 1,  -3, -5,
+  };
+
+  float bias_11x10[] = {
+    -5,
+  };
+
+  float expected_11x10[] = {
+    36,  -84,  95,   45,  18,   46,   77,  -54, -99,  -149, 66,  49,  161, 11,
+    39,  61,   -66,  61,  4,    -3,   34,  -44, -23,  31,   64,  29,  47,  72,
+    -27, -27,  121,  -3,  100,  1,    30,  -78, -12,  -89,  -59, 8,   -16, 112,
+    91,  -102, -26,  -4,  30,   54,   4,   -84, -24,  -58,  27,  -53, -33, 5,
+    53,  -26,  63,   50,  -103, -130, -23, 6,   -104, -207, 73,  23,  77,  132,
+    38,  32,   -130, -44, -60,  7,    27,  176, 45,   -32,  -2,  99,  -97, 63,
+    69,  126,  47,   63,  136,  -57,  5,   16,  -40,  -157, 8,   38,  -44, -10,
+    91,  7,    122,  140, 30,   -105, 4,   -1,  113,  64,   180, 141,
+  };
+
+  cnn_config.layer_config[0].weights = weights_11x10;
+  cnn_config.layer_config[0].bias = bias_11x10;
+  cnn_config.layer_config[0].filter_width = 20;
+  cnn_config.layer_config[0].filter_height = 23;
+  cnn_config.layer_config[0].skip_width = 1;
+  cnn_config.layer_config[0].skip_height = 1;
+  image_height = 11;
+  image_width = 10;
+
+  RunCNNTest(image_width, image_height, input_11x10, expected_11x10,
+             &cnn_config, image_width, &thread_data, MSE_INT_TOL);
+}
+
+TEST_F(CNNTest, TestSoftsignSingleLayer) {
+  int image_width = 8;
+  int image_height = 8;
+  int filter_height = 5;
+  int filter_width = 4;
+  float input[] = {
+    -0.5220f, 0.8410f,  -0.8990f, -0.0090f, 0.6710f,  -0.9470f, -0.8240f,
+    -0.0870f, 0.5380f,  0.4750f,  0.570f,   -0.3760f, -0.6960f, -0.5940f,
+    -0.3830f, 0.080f,   -0.0980f, -0.4940f, -0.4030f, 0.9460f,  -0.6020f,
+    0.4220f,  0.6190f,  0.6640f,  -0.9210f, -0.1470f, -0.2480f, -0.1120f,
+    -0.580f,  -0.0650f, 0.3330f,  0.9860f,  -0.7430f, 0.7610f,  0.4840f,
+    0.1030f,  0.9570f,  0.6120f,  -0.5240f, -0.1220f, -0.5850f, -0.270f,
+    0.7840f,  -0.9790f, 0.7290f,  -0.30f,   -0.6460f, 0.0780f,  0.4750f,
+    -0.0510f, 0.4550f,  0.3850f,  -0.7230f, 0.4460f,  -0.6260f, -0.810f,
+    0.8720f,  -0.2120f, -0.580f,  -0.9510f, -0.8430f, -0.1340f, -0.0850f,
+    0.9190f,
+  };
+  float expected_same[] = {
+    0.430f,   0.660f,  0.5510f,  -0.610f,  0.450f,  -0.1610f, 0.0520f,  0.3240f,
+    0.6820f,  0.3820f, 0.6360f,  0.7480f,  0.3080f, 0.090f,   0.3910f,  0.1730f,
+    0.340f,   0.6660f, -0.4990f, 0.4280f,  0.1540f, 0.120f,   0.4670f,  0.6150f,
+    -0.3880f, 0.7590f, 0.4190f,  0.7350f,  0.5310f, -0.5160f, -0.1760f, 0.6790f,
+    -0.6780f, 0.5470f, 0.5750f,  -0.6420f, 0.7210f, -0.4620f, 0.5430f,  0.770f,
+    -0.1990f, 0.3950f, 0.7860f,  -0.4380f, 0.7540f, 0.2640f,  -0.6430f, 0.4510f,
+    -0.1260f, 0.1590f, -0.2110f, -0.0560f, 0.6570f, 0.680f,   0.5870f,  0.4720f,
+    0.4040f,  0.3630f, 0.670f,   0.2360f,  0.410f,  0.6980f,  -0.5350f, 0.3940f,
+  };
+  float expected_replicate[] = {
+    0.540f,   0.7230f,  -0.3530f, -0.2130f, 0.7440f,  -0.4470f, -0.6260f,
+    -0.2050f, 0.7230f,  0.4630f,  0.5920f,  0.7440f,  0.6080f,  0.3130f,
+    -0.5670f, -0.4720f, 0.5480f,  0.6660f,  -0.4990f, 0.4280f,  0.1540f,
+    0.120f,   0.3390f,  0.6090f,  0.4160f,  0.7590f,  0.4190f,  0.7350f,
+    0.5310f,  -0.5160f, -0.490f,  0.4450f,  -0.610f,  0.5470f,  0.5750f,
+    -0.6420f, 0.7210f,  -0.4620f, 0.3150f,  0.7370f,  -0.5820f, 0.3950f,
+    0.7860f,  -0.4380f, 0.7540f,  0.2640f,  -0.7430f, -0.5340f, -0.6270f,
+    0.4430f,  0.4730f,  0.4570f,  0.7450f,  0.630f,   0.2620f,  0.3140f,
+    -0.1840f, 0.1810f,  0.7210f,  0.2760f,  0.6430f,  0.6720f,  -0.4390f,
+    0.2040f,
+  };
+  float expected_valid[] = {
+    0.6660f,  -0.4990f, 0.4280f,  0.1540f,  0.120f,  0.7590f,  0.4190f,
+    0.7350f,  0.5310f,  -0.5160f, 0.5470f,  0.5750f, -0.6420f, 0.7210f,
+    -0.4620f, 0.3950f,  0.7860f,  -0.4380f, 0.7540f, 0.2640f,
+  };
+  float weights[] = {
+    0.6210f,  0.3710f,  -0.2770f, -0.7230f, -0.2450f, 0.6770f,  0.3080f,
+    -0.9880f, -0.080f,  0.7190f,  -0.6760f, -0.0170f, -0.8970f, 0.8260f,
+    0.7390f,  -0.4550f, -0.4260f, -0.6330f, 0.0880f,  -0.9390f,
+  };
+  float bias[] = {
+    0.750f,
+  };
+
+  CNN_CONFIG cnn_config = { 1,
+                            0,
+                            0,
+                            0,
+                            0,
+                            { {
+                                1,
+                                filter_width,
+                                filter_height,
+                                1,
+                                1,
+                                1,
+                                0,
+                                weights,
+                                bias,
+                                PADDING_SAME_ZERO,
+                                SOFTSIGN,
+                                0,
+                                0,
+                                BRANCH_NO_COPY,
+                                BRANCH_NOC,
+                                {},
+                                {},
+                                0,
+                            } } };
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input, expected_same, &cnn_config,
+             image_width, &thread_data, MSE_FLOAT_TOL);
+
+  cnn_config.layer_config[0].pad = PADDING_SAME_REPLICATE;
+
+  RunCNNTest(image_width, image_height, input, expected_replicate, &cnn_config,
+             image_width, &thread_data, MSE_FLOAT_TOL);
+
+  cnn_config.layer_config[0].pad = PADDING_VALID;
+
+  RunCNNTest(image_width, image_height, input, expected_valid, &cnn_config,
+             image_width, &thread_data, MSE_FLOAT_TOL);
+}
+
+TEST_F(CNNTest, TestBranchTensorAdd) {
+  int filter_width = 2;
+  int filter_height = 3;
+
+  int image_width = 4;
+  int image_height = 4;
+
+  float input[] = {
+    -3, -2, -2, 0, -1, 3, 2, -2, 1, 3, 4, 0, 2, -5, -4, 0,
+  };
+
+  float weights[] = {
+    -3, -1, 4,  -1, -3, 3,  3,  0,  2,  0,  3,  2,  4,  4, 4,  -5, 1, -4,
+    2,  -4, 1,  -3, 0,  4,  -5, 4,  0,  -4, -3, -1, 0,  0, -2, 0,  0, 2,
+    -5, -1, 1,  -3, 3,  4,  3,  0,  1,  -1, 1,  1,  2,  4, -2, -5, 2, -2,
+    3,  -2, 4,  -1, 0,  2,  3,  2,  -2, -1, -3, 1,  3,  4, -1, -3, 0, -4,
+    4,  2,  -3, -3, -1, 0,  1,  0,  3,  3,  -3, 0,  3,  2, -5, -3, 4, -5,
+    3,  -1, -1, -3, 0,  1,  -1, -4, 2,  4,  -1, 4,  -1, 1, 3,  4,  4, 4,
+    0,  -1, -3, -3, -3, -3, 2,  -3, -2, 2,  3,  -3,
+  };
+
+  float bias[] = {
+    3, 4, -1, -1, 2, 1, -2, 1, 4, 1, 3,
+  };
+
+  float expected[] = {
+    -11502, -4101, -3424, 668,   -17950, -5470, -5504, 626,
+    4835,   446,   1779,  -3483, 3679,   -4214, 4578,  -105,
+  };
+
+  int channels = 2;
+
+  CNN_CONFIG cnn_config = { 6,
+                            0,
+                            0,
+                            0,
+                            0,
+                            { {
+                                  1,
+                                  filter_width,
+                                  filter_height,
+                                  channels,
+                                  1,
+                                  1,
+                                  0,
+                                  weights,
+                                  bias,
+                                  PADDING_SAME_ZERO,
+                                  NONE,
+                                  0,
+                                  0,
+                                  BRANCH_NO_COPY,
+                                  BRANCH_NOC,
+                                  {},
+                                  {},
+                                  -1,
+                              },
+                              {
+                                  channels,
+                                  filter_width,
+                                  filter_height,
+                                  channels,
+                                  1,
+                                  1,
+                                  0,
+                                  nullptr,
+                                  nullptr,
+                                  PADDING_SAME_ZERO,
+                                  NONE,
+                                  0,
+                                  0,
+                                  BRANCH_INPUT,
+                                  BRANCH_NOC,
+                                  {
+                                      0x02,
+                                      0,
+                                      0x00,
+                                  },
+                                  {},
+                                  -1,
+                              },
+                              {
+                                  channels,
+                                  filter_width,
+                                  filter_height,
+                                  channels,
+                                  1,
+                                  1,
+                                  0,
+                                  nullptr,
+                                  nullptr,
+                                  PADDING_SAME_ZERO,
+                                  NONE,
+                                  0,
+                                  1,
+                                  BRANCH_NO_COPY,
+                                  BRANCH_NOC,
+                                  {},
+                                  {},
+                                  -1,
+                              },
+                              {
+                                  channels,
+                                  filter_width,
+                                  filter_height,
+                                  channels,
+                                  1,
+                                  1,
+                                  0,
+                                  nullptr,
+                                  nullptr,
+                                  PADDING_SAME_ZERO,
+                                  NONE,
+                                  0,
+                                  1,
+                                  BRANCH_NO_COPY,
+                                  BRANCH_NOC,
+                                  {},
+                                  {},
+                                  -1,
+                              },
+                              {
+                                  channels,
+                                  filter_width,
+                                  filter_height,
+                                  channels,
+                                  1,
+                                  1,
+                                  0,
+                                  nullptr,
+                                  nullptr,
+                                  PADDING_SAME_ZERO,
+                                  NONE,
+                                  0,
+                                  0,
+                                  BRANCH_NO_COPY,
+                                  BRANCH_ADD,
+                                  {
+                                      0x00,
+                                      0,
+                                      0x02,
+                                  },
+                                  {},
+                                  -1,
+                              },
+                              {
+                                  channels,
+                                  filter_width,
+                                  filter_height,
+                                  1,
+                                  1,
+                                  1,
+                                  0,
+                                  nullptr,
+                                  nullptr,
+                                  PADDING_SAME_ZERO,
+                                  NONE,
+                                  0,
+                                  0,
+                                  BRANCH_NO_COPY,
+                                  BRANCH_NOC,
+                                  {},
+                                  {},
+                                  0,
+                              } } };
+
+  // Weights and biases need to be specified separately because
+  // of the offset.
+  AssignLayerWeightsBiases(&cnn_config, weights, bias);
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input, expected, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+}
+
+TEST_F(CNNTest, TestBranchTensorConcatenation) {
+  int filter_width = 2;
+  int filter_height = 3;
+
+  int image_width = 4;
+  int image_height = 4;
+
+  float input[] = {
+    -3, -2, -2, 0, -1, 3, 2, -2, 1, 3, 4, 0, 2, -5, -4, 0,
+  };
+
+  float weights[] = {
+    3,  0,  2,  0,  2,  3,  1,  -3, 1,  -5, -3, 0,  -4, 4,  0,  -5, 0,  -5, -1,
+    -2, -5, 0,  -3, 2,  -4, 2,  0,  2,  -1, 0,  -4, 3,  0,  0,  -1, -5, 2,  -1,
+    4,  -4, -2, -3, -3, 3,  4,  -2, -1, -4, -1, 4,  4,  -1, 4,  3,  -4, 2,  -2,
+    -4, -3, -2, 3,  -3, -5, -1, 3,  -2, 4,  1,  -4, -3, -5, -5, -3, 4,  -2, -2,
+    -1, -5, -5, 0,  -1, -2, -3, 3,  -4, -5, 2,  -3, 1,  0,  -5, 2,  2,  -2, 0,
+    2,  2,  -2, 4,  2,  2,  0,  1,  -5, -3, 0,  2,  -2, 1,  2,  -5, 2,  3,  3,
+    -1, 3,  0,  -3, 3,  -4, -4, 3,  3,  -4, -2, 2,  -2, 2,  -2, -1, 3,  0,
+  };
+
+  float bias[] = {
+    -3, -5, 4, -4, -3, -2, 0, 3, -4, 4, -3,
+  };
+
+  float expected[] = {
+    -33533, -32087, -6741,  -2124, 39979, 41453, 14034, 689,
+    -22611, -42203, -14882, -239,  15781, 15963, 9524,  837,
+  };
+
+  int channels = 2;
+
+  CNN_CONFIG cnn_config = { 6,
+                            0,
+                            0,
+                            0,
+                            0,
+                            { {
+                                  1,
+                                  filter_width,
+                                  filter_height,
+                                  channels,
+                                  1,
+                                  1,
+                                  0,
+                                  weights,
+                                  bias,
+                                  PADDING_SAME_ZERO,
+                                  NONE,
+                                  0,
+                                  0,
+                                  BRANCH_NO_COPY,
+                                  BRANCH_NOC,
+                                  {},
+                                  {},
+                                  -1,
+                              },
+                              {
+                                  channels,
+                                  filter_width,
+                                  filter_height,
+                                  channels,
+                                  1,
+                                  1,
+                                  0,
+                                  nullptr,
+                                  nullptr,
+                                  PADDING_SAME_ZERO,
+                                  NONE,
+                                  0,
+                                  0,
+                                  BRANCH_INPUT,
+                                  BRANCH_NOC,
+                                  {
+                                      0x02,
+                                      0,
+                                      0x00,
+                                  },
+                                  {},
+                                  -1,
+                              },
+                              {
+                                  channels,
+                                  filter_width,
+                                  filter_height,
+                                  channels,
+                                  1,
+                                  1,
+                                  0,
+                                  nullptr,
+                                  nullptr,
+                                  PADDING_SAME_ZERO,
+                                  NONE,
+                                  0,
+                                  1,
+                                  BRANCH_NO_COPY,
+                                  BRANCH_NOC,
+                                  {},
+                                  {},
+                                  -1,
+                              },
+                              {
+                                  channels,
+                                  filter_width,
+                                  filter_height,
+                                  channels,
+                                  1,
+                                  1,
+                                  0,
+                                  nullptr,
+                                  nullptr,
+                                  PADDING_SAME_ZERO,
+                                  NONE,
+                                  0,
+                                  1,
+                                  BRANCH_NO_COPY,
+                                  BRANCH_NOC,
+                                  {},
+                                  {},
+                                  -1,
+                              },
+                              {
+                                  channels,
+                                  filter_width,
+                                  filter_height,
+                                  channels,
+                                  1,
+                                  1,
+                                  0,
+                                  nullptr,
+                                  nullptr,
+                                  PADDING_SAME_ZERO,
+                                  NONE,
+                                  0,
+                                  0,
+                                  BRANCH_NO_COPY,
+                                  BRANCH_CAT,
+                                  {
+                                      0x00,
+                                      0,
+                                      0x02,
+                                  },
+                                  {},
+                                  -1,
+                              },
+                              {
+                                  channels + channels,
+                                  filter_width,
+                                  filter_height,
+                                  1,
+                                  1,
+                                  1,
+                                  0,
+                                  nullptr,
+                                  nullptr,
+                                  PADDING_SAME_ZERO,
+                                  NONE,
+                                  0,
+                                  0,
+                                  BRANCH_NO_COPY,
+                                  BRANCH_NOC,
+                                  {},
+                                  {},
+                                  0,
+                              } } };
+
+  // Weights and biases need to be specified separately because
+  // of the offset.
+  AssignLayerWeightsBiases(&cnn_config, weights, bias);
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input, expected, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+}
+
+// TODO(logangw): Add test to test all combinations of branch_copy_type.
+
+TEST_F(CNNTest, TestBranchCombinations) {
+  int filter_width = 2;
+  int filter_height = 3;
+
+  int image_width = 4;
+  int image_height = 4;
+
+  float input[] = {
+    3, 2, -5, -4, 4, -2, -4, -3, 4, 2, -3, 2, -3, 1, -5, -1,
+  };
+
+  float weights[] = {
+    2,  3,  0,  4,  4,  3,  1,  0,  1,  -5, 4,  -3, 3,  0,  4,  -1, -1, -5,
+    2,  1,  -3, -5, 3,  -1, -3, -2, 0,  -2, 3,  0,  -2, -4, -2, -2, 2,  -5,
+    4,  -5, 0,  1,  -5, -4, -3, -4, 2,  -2, 1,  0,  3,  -2, -4, 3,  4,  -4,
+    -1, -1, -3, -2, -2, -1, 2,  0,  2,  -1, 2,  -4, -4, -1, 2,  0,  3,  -2,
+    -2, 3,  -3, 4,  -2, 4,  3,  4,  1,  0,  -2, -3, -5, 1,  -3, 2,  0,  -2,
+    -2, -1, -1, -5, -2, -3, -1, 3,  3,  4,  4,  0,  2,  1,  3,  -3, 2,  -5,
+    -5, 1,  -5, -1, 3,  3,  2,  -4, -1, 3,  -4, -2, -5, -2, 1,  3,  2,  2,
+    -5, -2, -3, -1, -2, -4, -1, -2, 2,  1,  -4, -4, 2,  0,  2,  0,  2,  -3,
+    -2, -4, 4,  0,  1,  -3, -5, 4,  -1, 2,  3,  -5, -1, 0,  4,  -1, -1, 3,
+    -1, -3, 3,  1,  4,  3,  4,  3,  -4, -5, -1, 3,  3,  -4, 3,  1,  3,  -5,
+    3,  4,  -5, 4,  2,  -1, -5, 2,  1,  0,  4,  0,  -3, 2,  0,  2,  -2, 1,
+    -1, -2, -1, -5, 4,  3,  3,  -2, 2,  4,  -5, -5, -3, -2, 4,  0,  -4, 1,
+  };
+
+  float bias[] = {
+    -1, 4, 0, 2, 2, -2, 0, -4, -5, -1, 1, -2, 3, 0, 4, -2, 1, 0, 0,
+  };
+
+  float expected[] = {
+    149496, 15553,  -24193, -20956, 134094, 86432,  -68283, -6366,
+    -53031, 133739, 67407,  -13539, -53205, -58635, -20033, 1979,
+  };
+
+  int channels = 2;
+
+  CNN_CONFIG cnn_config = { 10,
+                            0,
+                            0,
+                            0,
+                            0,
+                            {
+                                {
+                                    1,
+                                    filter_width,
+                                    filter_height,
+                                    channels,
+                                    1,
+                                    1,
+                                    0,
+                                    weights,
+                                    bias,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    0,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_NOC,
+                                    {},
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    channels,
+                                    filter_width,
+                                    filter_height,
+                                    channels,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    0,
+                                    BRANCH_INPUT,
+                                    BRANCH_NOC,
+                                    {
+                                        0x06,
+                                        0,
+                                        0x00,
+                                    },
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    channels,
+                                    filter_width,
+                                    filter_height,
+                                    channels,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    2,
+                                    BRANCH_OUTPUT,
+                                    BRANCH_NOC,
+                                    {
+                                        0x08,
+                                        0,
+                                        0x00,
+                                    },
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    channels,
+                                    filter_width,
+                                    filter_height,
+                                    channels,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    3,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_NOC,
+                                    {},
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    channels,
+                                    filter_width,
+                                    filter_height,
+                                    channels,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    2,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_ADD,
+                                    {
+                                        0x00,
+                                        0,
+                                        0x08,
+                                    },
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    channels,
+                                    filter_width,
+                                    filter_height,
+                                    channels,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    2,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_NOC,
+                                    {},
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    channels,
+                                    filter_width,
+                                    filter_height,
+                                    channels,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    1,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_NOC,
+                                    {},
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    channels,
+                                    filter_width,
+                                    filter_height,
+                                    channels,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    1,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_ADD,
+                                    {
+                                        0x00,
+                                        0,
+                                        0x0C,
+                                    },
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    channels,
+                                    filter_width,
+                                    filter_height,
+                                    channels,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    0,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_ADD,
+                                    {
+                                        0x00,
+                                        0,
+                                        0x02,
+                                    },
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    channels,
+                                    filter_width,
+                                    filter_height,
+                                    1,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    0,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_NOC,
+                                    {},
+                                    {},
+                                    0,
+                                },
+                            } };
+
+  // Weights and biases need to be specified separately because
+  // of the offset.
+  AssignLayerWeightsBiases(&cnn_config, weights, bias);
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input, expected, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+}
+
+TEST_F(CNNTest, TestSplittingTensors) {
+  int filter_width = 2;
+  int filter_height = 3;
+
+  int image_width = 4;
+  int image_height = 4;
+
+  float input[] = {
+    -1, -1, 2, 1, 3, 2, 4, -3, -4, -2, 2, -3, 1, -3, 4, -2,
+  };
+
+  float weights[] = {
+    -4, 1,  0,  2,  3,  4,  4,  -4, -5, -3, 2,  2,  -4, -3, 3,  2,
+    4,  -4, -3, -4, -4, 1,  -3, -5, -3, 4,  2,  -2, 2,  -1, -4, -1,
+    -2, -3, 1,  1,  0,  -5, -1, 3,  3,  -5, -3, 0,  -3, 1,  -3, -1,
+    1,  -3, -2, -2, 4,  -2, 0,  1,  2,  2,  -4, 2,  4,  0,  -5, -2,
+    4,  4,  -5, 1,  0,  2,  -2, -5, -5, -3, -5, -5, 4,  -3, 0,  0,
+    -4, -4, 0,  -5, -4, 0,  0,  -3, -5, -3, -1, 2,  -1, 4,  -1, 2,
+  };
+
+  float bias[] = {
+    -4, -2, -3, -3, 3, 1, -2,
+  };
+
+  float expected[] = {
+    530,  -762,  1469, 777,  849,   -771, -1698, 600,
+    -658, -1821, 98,   -668, -1798, 30,   887,   -971,
+  };
+
+  CNN_CONFIG cnn_config = { 3,
+                            0,
+                            0,
+                            0,
+                            0,
+                            {
+                                {
+                                    1,
+                                    filter_width,
+                                    filter_height,
+                                    4,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    0,
+                                    BRANCH_OUTPUT,
+                                    BRANCH_NOC,
+                                    {
+                                        0x02,
+                                        2,
+                                        0x00,
+                                    },
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    4,
+                                    filter_width,
+                                    filter_height,
+                                    2,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    0,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_CAT,
+                                    {
+                                        0x00,
+                                        0,
+                                        0x02,
+                                    },
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    4,
+                                    filter_width,
+                                    filter_height,
+                                    1,
+                                    1,
+                                    1,
+                                    0,
+                                    nullptr,
+                                    nullptr,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    0,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_NOC,
+                                    {},
+                                    {},
+                                    0,
+                                },
+                            } };
+
+  // Weights and biases need to be specified separately because
+  // of the offset.
+  AssignLayerWeightsBiases(&cnn_config, weights, bias);
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input, expected, &cnn_config,
+             image_width, &thread_data, MSE_INT_TOL);
+}
+
+TEST_F(CNNTest, TestOutputChannelsCount) {
+  int filter_width = 1;
+  int filter_height = 1;
+
+  int image_width = 2;
+  int image_height = 2;
+
+  float input[] = { 0, 0, 0, 0 };
+
+  float weights[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+  float bias[] = { 0, 0, 0, 0, 0, 0 };
+
+  float expected[] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+
+  CNN_CONFIG cnn_config = { 3,
+                            0,
+                            0,
+                            0,
+                            0,
+                            {
+                                {
+                                    1,
+                                    filter_width,
+                                    filter_height,
+                                    2,
+                                    1,
+                                    1,
+                                    0,
+                                    weights,
+                                    bias,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    0,
+                                    BRANCH_INPUT,
+                                    BRANCH_NOC,
+                                    {
+                                        0x06,
+                                        0,
+                                        0x00,
+                                    },
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    1,
+                                    filter_width,
+                                    filter_height,
+                                    2,
+                                    1,
+                                    1,
+                                    0,
+                                    weights,
+                                    bias,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    2,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_CAT,
+                                    {
+                                        0x00,
+                                        0,
+                                        0x03,
+                                    },
+                                    {},
+                                    -1,
+                                },
+                                {
+                                    2,
+                                    filter_width,
+                                    filter_height,
+                                    2,
+                                    1,
+                                    1,
+                                    0,
+                                    weights,
+                                    bias,
+                                    PADDING_SAME_ZERO,
+                                    NONE,
+                                    0,
+                                    0,
+                                    BRANCH_NO_COPY,
+                                    BRANCH_CAT,
+                                    {
+                                        0x00,
+                                        0,
+                                        0x04,
+                                    },
+                                    {},
+                                    0,
+                                },
+                            } };
+
+  // Weights and biases need to be specified separately because
+  // of the offset.
+  AssignLayerWeightsBiases(&cnn_config, weights, bias);
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input, expected, &cnn_config,
+             image_width, &thread_data, MSE_FLOAT_TOL);
+}
+
+TEST_F(CNNTest, TestBatchNorm) {
+  int image_width = 28;
+  int image_height = 28;
+  int filter_height = 7;
+  int filter_width = 7;
+  float input[] = {
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0117647f,  0.0705882f,  0.0705882f,  0.0705882f,
+    0.494118f,  0.533333f,  0.686275f,   0.101961f,   0.65098f,    1.0f,
+    0.968627f,  0.498039f,  0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.117647f,   0.141176f,   0.368627f,   0.603922f,
+    0.666667f,  0.992157f,  0.992157f,   0.992157f,   0.992157f,   0.992157f,
+    0.882353f,  0.67451f,   0.992157f,   0.94902f,    0.764706f,   0.25098f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.192157f,
+    0.933333f,  0.992157f,  0.992157f,   0.992157f,   0.992157f,   0.992157f,
+    0.992157f,  0.992157f,  0.992157f,   0.984314f,   0.364706f,   0.321569f,
+    0.321569f,  0.219608f,  0.152941f,   0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0705882f,  0.858824f,   0.992157f,
+    0.992157f,  0.992157f,  0.992157f,   0.992157f,   0.776471f,   0.713725f,
+    0.968627f,  0.945098f,  0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.313725f,   0.611765f,   0.419608f,   0.992157f,
+    0.992157f,  0.803922f,  0.0431373f,  0.0f,        0.168627f,   0.603922f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.054902f,  0.00392157f, 0.603922f,   0.992157f,   0.352941f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.545098f,  0.992157f,   0.745098f,   0.00784314f, 0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0431373f,
+    0.745098f,  0.992157f,  0.27451f,    0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.137255f,   0.945098f,
+    0.882353f,  0.627451f,  0.423529f,   0.00392157f, 0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.317647f,   0.941176f,   0.992157f,
+    0.992157f,  0.466667f,  0.0980392f,  0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.176471f,   0.729412f,   0.992157f,   0.992157f,
+    0.588235f,  0.105882f,  0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0627451f, 0.364706f,   0.988235f,   0.992157f,   0.733333f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.976471f,  0.992157f,   0.976471f,   0.25098f,    0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.180392f,   0.509804f,   0.717647f,   0.992157f,
+    0.992157f,  0.811765f,  0.00784314f, 0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.152941f,   0.580392f,
+    0.898039f,  0.992157f,  0.992157f,   0.992157f,   0.980392f,   0.713725f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0941176f, 0.447059f,  0.866667f,   0.992157f,   0.992157f,   0.992157f,
+    0.992157f,  0.788235f,  0.305882f,   0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0901961f,  0.258824f,   0.835294f,   0.992157f,
+    0.992157f,  0.992157f,  0.992157f,   0.776471f,   0.317647f,   0.00784314f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0705882f,  0.670588f,
+    0.858824f,  0.992157f,  0.992157f,   0.992157f,   0.992157f,   0.764706f,
+    0.313725f,  0.0352941f, 0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.215686f,  0.67451f,   0.886275f,   0.992157f,   0.992157f,   0.992157f,
+    0.992157f,  0.956863f,  0.521569f,   0.0431373f,  0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.533333f,   0.992157f,
+    0.992157f,  0.992157f,  0.831373f,   0.529412f,   0.517647f,   0.0627451f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f,        0.0f,        0.0f,
+    0.0f,       0.0f,       0.0f,        0.0f
+  };
+  float expected[] = {
+    -0.836424f, -0.857365f, -1.62739f,  -1.62739f,  -0.836424f, 5.40742f,
+    0.920853f,  -0.692567f, -0.836424f, -0.534405f, -1.62739f,  -0.836424f,
+    1.32602f,   1.36312f,   0.112766f,  -0.836424f, -0.192962f, 1.56975f,
+    2.45777f,   0.944414f,  -0.192962f, -1.5519f,   -1.5519f,   -0.554006f,
+    -0.192962f, 1.4231f,    -1.5519f,   -0.192962f, 1.3661f,    -1.5519f,
+    -1.5519f,   -0.192962f, -0.843708f, -0.359025f, -0.843708f, -0.843708f,
+    -0.843708f, 4.53065f,   0.0429584f, -0.796804f, -0.843708f, 0.3473f,
+    -0.843708f, -0.843708f, -0.114439f, 3.14817f,   0.0811934f, -0.843708f
+  };
+  float kernel[] = {
+    0.119643f,    -0.237864f,   0.0462892f,   0.0502297f,   -0.0134528f,
+    0.146347f,    0.153133f,    0.0513307f,   0.0752369f,   0.0135557f,
+    -0.111434f,   0.0941854f,   0.0788362f,   0.0299412f,   0.111762f,
+    0.144066f,    0.00431504f,  -0.0177954f,  0.0738092f,   -0.0344215f,
+    0.0832582f,   0.053989f,    -0.112691f,   0.0962145f,   0.0186525f,
+    -0.00660205f, -0.111962f,   -0.126801f,   -0.231625f,   0.17309f,
+    0.0748875f,   -0.179569f,   -0.00513812f, -0.156579f,   -0.147322f,
+    0.184168f,    0.189308f,    -0.200359f,   -0.0156733f,  0.140649f,
+    0.0858496f,   -0.0263217f,  -0.0740749f,  -0.112563f,   0.107528f,
+    0.0609729f,   -0.221625f,   0.0769944f,   -0.00900815f, -0.00136441f,
+    -0.0236521f,  -0.0418025f,  -0.00286299f, 0.12241f,     0.0964093f,
+    -0.0150897f,  0.0532171f,   0.0625916f,   0.116939f,    0.118024f,
+    0.161918f,    -0.00909767f, 0.100897f,    -0.054563f,   -0.175179f,
+    -0.0687892f,  0.00734235f,  0.109833f,    -0.113776f,   0.0595405f,
+    -0.170255f,   0.0124815f,   -0.0363301f,  -0.0127038f,  0.0445554f,
+    -0.0729894f,  0.107428f,    -0.0341417f,  0.132619f,    0.00984557f,
+    -0.00443654f, 0.202929f,    0.0945134f,   0.0148725f,   0.00998574f,
+    -0.0226449f,  0.0478197f,   -0.0793442f,  0.0707599f,   -0.084225f,
+    0.0865795f,   0.071104f,    -0.047894f,   0.0838322f,   0.0635493f,
+    -0.00370265f, -0.157247f,   -0.0289622f,  -0.0590963f,  0.13207f,
+    0.00468011f,  -0.0345372f,  0.217939f,    0.18861f,     -0.0290393f,
+    -0.0440664f,  0.0126197f,   -0.129132f,   -0.124943f,   0.0968156f,
+    -0.0853643f,  -0.182305f,   0.00461618f,  -0.147095f,   -0.230282f,
+    0.00856019f,  0.0278893f,   -0.0300229f,  0.0417871f,   0.0804717f,
+    -0.0768571f,  -0.0397085f,  -0.0601096f,  0.100901f,    -0.0184926f,
+    0.0350673f,   0.0971094f,   -0.0171837f,  -0.289644f,   -0.0899041f,
+    0.08998f,     -0.160319f,   -0.0195103f,  0.0392167f,   -0.137864f,
+    -0.0136294f,  0.0330886f,   -0.0409244f,  -0.092533f,   -0.0427934f,
+    -0.191144f,   -0.0969461f,  0.112035f,    0.138611f,    0.128717f,
+    0.191184f,    0.197462f
+  };
+  float bias[] = { 0.186703f, 0.204358f, -0.0230452f };
+
+  float bn_gamma[] = { 1.32173f, 1.26171f, 1.21966f };
+  float bn_beta[] = { -0.232595f, -0.222652f, -0.232209f };
+  float bn_mean[] = { 0.329233f, 0.199894f, 0.12389f };
+  float bn_std[] = { 0.311986f, 0.189737f, 0.247104f };
+
+  CNN_BATCHNORM_PARAMS bn_params = {
+    bn_gamma,
+    bn_beta,
+    bn_mean,
+    bn_std,
+  };
+
+  CNN_CONFIG cnn_config = {
+    1,
+    0,
+    0,
+    0,
+    0,
+    {
+        {
+            1,
+            filter_width,
+            filter_height,
+            3,
+            7,
+            7,
+            0,
+            kernel,
+            bias,
+            PADDING_VALID,
+            RELU,
+            0,
+            0,
+            BRANCH_NO_COPY,
+            BRANCH_NOC,
+            {},
+            bn_params,
+            0,
+        },
+    },
+  };
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input, expected, &cnn_config,
+             image_width, &thread_data, MSE_FLOAT_TOL);
+}
+
+TEST_F(CNNTest, TestMultithreading) {
+  int image_height = 2;
+  int image_width = 2;
+  int filter_height = 3;
+  int filter_width = 3;
+
+  float input[] = {
+    -2,
+    4,
+    1,
+    0,
+  };
+
+  float weights[] = {
+    -4, 2, -2, 0,  -4, 4, -3, -3, -3, -1, 1,  0,  -5, -3, 0, -5, 0, 0,
+    -1, 0, 2,  -5, 0,  1, 4,  2,  1,  0,  -2, -1, -5, -3, 2, -2, 1, -5,
+  };
+
+  float bias[] = {
+    -4,
+    -3,
+    -2,
+    3,
+  };
+
+  float expected[] = {
+    2, 10, -8, -17, -24, 5, -15, 6, -5, -5, 7, -10, 4, 13, 9, -14,
+  };
+
+  CNN_CONFIG cnn_config = {
+    1,
+    0,
+    0,
+    0,
+    0,
+    {
+        {
+            1,
+            filter_width,
+            filter_height,
+            4,
+            1,
+            1,
+            0,
+            weights,
+            bias,
+            PADDING_SAME_ZERO,
+            NONE,
+            0,
+            0,
+            BRANCH_NO_COPY,
+            BRANCH_NOC,
+            {},
+            {},
+            0,
+        },
+    },
+  };
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  RunCNNTest(image_width, image_height, input, expected, &cnn_config,
+             image_width, &thread_data, MSE_FLOAT_TOL);
+
+  const AVxWorkerInterface *const winterface = aom_get_worker_interface();
+  AVxWorker workers[4];
+
+  for (int i = 0; i < 4; ++i) {
+    winterface->init(&workers[i]);
+  }
+
+  thread_data = { 4, workers };
+
+  RunCNNTest(image_width, image_height, input, expected, &cnn_config,
+             image_width, &thread_data, MSE_FLOAT_TOL);
+
+  for (int i = 0; i < 4; ++i) {
+    winterface->end(&workers[i]);
+  }
+}
+
+TEST_F(CNNTest, TestMultiOutput) {
+  const int image_dim = 8;
+  const int image_ch = 3;
+  const int filter_dim = 2;
+  const int stride = 2;
+  const int num_filters = 2;
+
+  const float input_[] = {
+    1.7537929121f,     0.134331551012f,    0.123580039877f,   0.957731845246f,
+    0.391006834217f,   1.00699352042f,     -0.778177955829f,  -0.814166433059f,
+    -0.656374394915f,  0.321967305228f,    -2.19455719176f,   0.708035038966f,
+    0.409148822266f,   -0.318254408902f,   0.152450211189f,   -0.250210793369f,
+    0.826811563186f,   1.6804156584f,      0.273626975978f,   0.437936241887f,
+    -0.329935520167f,  -0.288761611645f,   0.156937008304f,   0.271054157295f,
+    -0.0224828854332f, 1.70110336895f,     -0.989066699309f,  1.30863131729f,
+    -0.165813705702f,  0.00380178619265f,  -0.0837342367587f, 0.760954783156f,
+    -0.413610373524f,  1.17968204175f,     0.720295719536f,   0.308718974472f,
+    -1.10091337671f,   0.693160033687f,    -0.0202862320697f, 1.0221927503f,
+    -1.24521801881f,   -0.478501952308f,   -1.71648619442f,   -0.182571723636f,
+    0.339292649504f,   2.0806519131f,      0.967974033444f,   0.175248672328f,
+    0.0658124561472f,  0.795504169496f,    0.750592557361f,   -1.46631013249f,
+    -1.79052846838f,   -1.03672179515f,    -0.841985521653f,  1.20995011489f,
+    0.140859718215f,   -0.651552622661f,   0.451065110806f,   1.1189443693f,
+    0.100213260593f,   -0.834076868118f,   -1.28734321611f,   1.22064420095f,
+    -0.364143084361f,  0.750961509335f,    -0.888689074553f,  -0.8253547106f,
+    -1.21800999027f,   -0.966670603566f,   1.37384014741f,    0.47281264834f,
+    -0.420416235531f,  0.520163906493f,    0.501296589423f,   1.53418976951f,
+    0.715234751485f,   0.644551588907f,    0.0763504863375f,  -0.0018541943723f,
+    0.322853189656f,   -0.795099723224f,   -0.125177096675f,  1.4476577471f,
+    -0.585888410088f,  -1.44391754955f,    -0.610543221933f,  -0.221859179799f,
+    0.252060200774f,   -0.86287169623f,    -0.0350246229157f, 1.0932311997f,
+    0.899464648842f,   -0.468806951704f,   -0.300861137168f,  1.15776414206f,
+    1.03268544738f,    -0.171579585622f,   -0.179136557119f,  -0.354091003368f,
+    -0.612298249394f,  -1.20237379258f,    1.54604109659f,    0.130664370287f,
+    0.885225111868f,   1.0362799581f,      0.980561720868f,   -0.619379186999f,
+    -1.33818929924f,   -0.237233737961f,   -1.89335425073f,   0.567821011321f,
+    0.862420368465f,   -1.37380916821f,    0.352190056666f,   0.611261516274f,
+    0.393237747152f,   0.894686247967f,    0.190405182149f,   0.264872662911f,
+    -0.0657009133797f, 0.0580512653493f,   -0.401825294366f,  0.4106081318f,
+    0.49484512188f,    -0.0751103149442f,  -1.43243736382f,   1.79855656009f,
+    -1.1075351975f,    0.000354882733011f, -0.950716438608f,  1.27129831688f,
+    1.00495189838f,    0.110358656713f,    1.08315032822f,    -0.972676676218f,
+    -0.0757668962831f, 1.88932045165f,     -0.0672638136275f, 0.425913010161f,
+    -0.781540372017f,  0.976000248609f,    0.687218504122f,   1.31374513445f,
+    -0.932658930672f,  -1.25339468479f,    0.422071294078f,   -0.24189927912f,
+    0.216906604642f,   -1.88720997548f,    1.99252872889f,    0.353943735777f,
+    0.737434784132f,   -1.17848645017f,    1.70424254896f,    0.775297112968f,
+    -0.516392797501f,  0.398130609129f,    0.737248101457f,   0.166282500886f,
+    1.24699015468f,    0.47116183125f,     1.19091180182f,    -0.372695424578f,
+    0.219773209389f,   -0.829467838962f,   -0.52533122724f,   1.98707754595f,
+    0.553692606972f,   -0.933228902369f,   1.55427751643f,    -1.08813399144f,
+    -0.325686682094f,  0.205091443796f,    -1.70381666435f,   0.466465327942f,
+    1.73126863447f,    -0.939133672634f,   1.48318077459f,    -0.599414038168f,
+    -1.1583078687f,    0.518116190201f,    0.133571482458f,   0.84958342672f,
+    1.02205000597f,    -0.0772082009087f,  -1.69567503859f,   1.4697939436f,
+    1.67813743122f,    -0.627911582938f,   0.131380509137f,   -1.35717850726f,
+  };
+  const float *input[3] = { input_, &input_[image_dim * image_dim],
+                            &input_[2 * image_dim * image_dim] };
+
+  const float bias[] = { 0.0f, 0.0f };
+
+  const float weights_1[] = {
+    -0.489547413618f, 0.141916424749f,  -0.279286485585f,  -0.115322211094f,
+    0.299572786936f,  0.205289980785f,  -0.536254480088f,  -0.253626313744f,
+    -0.422883815849f, -0.169702966298f, -0.540104704793f,  0.495319646763f,
+    0.298799079422f,  -0.10054550901f,  -0.306085047056f,  0.171061886165f,
+    -0.108058703878f, -0.410734629888f, -0.0640674673049f, -0.386524840979f,
+    -0.157203423678f, -0.362138920529f, -0.216206085209f,  0.147502517971f,
+  };
+
+  const float weights_2[] = {
+    0.207580604357f,  0.480821146263f,  -0.29111909562f,   0.47422567493f,
+    0.206892553253f,  -0.235067084092f, 0.354516800602f,   -0.212399370252f,
+    -0.419071343731f, -0.050350731631f, -0.0516457320279f, -0.0359310500731f,
+    0.567044864811f,  -0.060341127522f, 0.0501464839637f,  -0.437785677916f,
+  };
+
+  const float weights_3[] = {
+    -0.0690452401448f, -0.356657338763f,   -0.219464031809f, 0.551288365843f,
+    0.181372090853f,   -0.00245268542109f, 0.409000696276f,  -0.593209108763f,
+    0.587352566749f,   -0.243720660227f,   0.266232713887f,  -0.00439285245097f,
+    0.252883228305f,   0.152646192631f,    0.0918944932026f, 0.398853715057f,
+  };
+
+  const float weights_4[] = {
+    0.207560791573f,   0.194201350401f,   0.227802322443f,  0.206533663345f,
+    0.0557331066805f,  0.0224159800424f,  -0.143939197467f, -0.27703361602f,
+    0.130643888389f,   -0.269456557461f,  0.186242862864f,  -0.162879944774f,
+    -0.145503996718f,  -0.0768822987581f, -0.203127976359f, -0.238119922873f,
+    -0.258806479994f,  0.0357957680385f,  -0.1027606976f,   -0.287920082345f,
+    0.189047820993f,   0.250711538481f,   -0.272815714175f, -0.0431449742024f,
+    0.207261230996f,   -0.0396472677451f, 0.131236557412f,  0.174291832499f,
+    -0.251515885765f,  -0.107164007499f,  0.185824534748f,  -0.00561585838161f,
+    0.273393799578f,   -0.139563699075f,  -0.263922456031f, -0.118859844081f,
+    0.109230982597f,   -0.170170294794f,  0.0123025648515f, -0.0839368964355f,
+    -0.0774058234297f, 0.255847138286f,   -0.208430879637f, 0.279170114319f,
+    -0.272890330712f,  -0.217725903006f,  -0.295923275459f, -0.17008723953f,
+    -0.284281803405f,  0.281406323629f,   0.266910044663f,  -0.209963914338f,
+    0.271980962964f,   0.142013581699f,   -0.143896509026f, -0.290509242975f,
+    -0.305768180935f,  0.196902832117f,   -0.090424189662f, -0.147460802346f,
+    0.217722016651f,   0.12353848977f,    -0.169177363577f, -0.0454230918512f,
+  };
+
+  const float expected_0[] = {
+    -2.04858441055f,  -2.12883075791f,    -0.045177363807f, 0.763949675768f,
+    -0.544361512821f, -1.58123168032f,    1.89319847039f,   0.16859080901f,
+    -1.16023321135f,  -0.396988107751f,   1.76637090744f,   -1.40434786514f,
+    0.908227575669f,  0.817064817605f,    0.215631134908f,  -0.848605613428f,
+    -0.106756747018f, 0.0193027166685f,   0.801345615113f,  -0.395407237598f,
+    -1.79983795658f,  -1.73054496242f,    0.0584392594454f, -0.388786095569f,
+    -0.237269619354f, 0.000843578271263f, -1.24043512104f,  0.487839445893f,
+    -0.394259726605f, 0.559632843424f,    -0.527224052291f, -1.53792340282f,
+  };
+
+  const float expected_1[] = {
+    0.0f, 0.0f,           0.0f, 0.0f, 0.4057888292f, 0.325309571755f,
+    0.0f, 1.22013465602f,
+  };
+
+  const float expected_2[] = {
+    0.156119444687f,
+    0.517385299817f,
+  };
+
+  const float expected_3[] = {
+    0.224177852984f,
+    0.503384419034f,
+    0.156119444687f,
+    0.517385299817f,
+  };
+
+  const float *expected[] = { expected_0, expected_1, expected_2, expected_3 };
+
+  CNN_CONFIG cnn_config = {
+    4,  // num_layers
+    0,  // is_residue
+    0,  // ext_width
+    0,  // ext_height
+    0,  // strict_bounds
+    {
+        // layer_config
+        {
+            image_ch,           // in_channels
+            filter_dim,         // filter_width
+            filter_dim,         // filter_height
+            num_filters,        // out_channels
+            stride,             // skip_width
+            stride,             // skip_height
+            0,                  // max_pool
+            weights_1,          // weights
+            bias,               // bias
+            PADDING_SAME_ZERO,  // pad
+            NONE,               // activation
+            0,                  // deconvolve
+            0,                  // branch
+            BRANCH_OUTPUT,      // branch_copy_type
+            BRANCH_NOC,         // branch_combine_type
+            { 2, 0, 0 },        // branch_config
+            {},                 // bn_params
+            0,                  // output_num
+        },
+        {
+            num_filters,        // in_channels
+            filter_dim,         // filter_width
+            filter_dim,         // filter_height
+            num_filters,        // out_channels
+            stride,             // skip_width
+            stride,             // skip_height
+            0,                  // max_pool
+            weights_2,          // weights
+            bias,               // bias
+            PADDING_SAME_ZERO,  // pad
+            RELU,               // activation
+            0,                  // deconvolve
+            0,                  // branch
+            BRANCH_NO_COPY,     // branch_copy_type
+            BRANCH_NOC,         // branch_combine_type
+            {},                 // branch_config
+            {},                 // bn_params
+            1,                  // output_num
+        },
+        {
+            num_filters,        // in_channels
+            filter_dim,         // filter_width
+            filter_dim,         // filter_height
+            num_filters,        // out_channels
+            stride,             // skip_width
+            stride,             // skip_height
+            0,                  // max_pool
+            weights_3,          // weights
+            bias,               // bias
+            PADDING_SAME_ZERO,  // pad
+            RELU,               // activation
+            0,                  // deconvolve
+            0,                  // branch
+            BRANCH_NO_COPY,     // branch_copy_type
+            BRANCH_NOC,         // branch_combine_type
+            {},                 // branch_config
+            {},                 // bn_params
+            2,                  // output_num
+        },
+        {
+            num_filters,     // in_channels
+            2 * filter_dim,  // filter_width
+            2 * filter_dim,  // filter_height
+            num_filters,     // out_channels
+            2 * stride,      // skip_width
+            2 * stride,      // skip_height
+            0,               // max_pool
+            weights_4,       // weights
+            bias,            // bias
+            PADDING_VALID,   // pad
+            RELU,            // activation
+            0,               // deconvolve
+            1,               // branch
+            BRANCH_NO_COPY,  // branch_copy_type
+            BRANCH_CAT,      // branch_combine_type
+            { 0, 0, 1 },     // branch_config
+            {},              // bn_params
+            3,               // output_num
+        },
+    },
+  };
+
+  CNN_THREAD_DATA thread_data = { 1, nullptr };
+
+  const int num_outputs = 4;
+  const int output_chs[4] = { filter_dim, filter_dim, filter_dim,
+                              2 * filter_dim };
+  const int output_dims[4] = { 4, 2, 1, 1 };
+  const int output_sizes[4] = {
+    output_chs[0] * output_dims[0] * output_dims[0],
+    output_chs[1] * output_dims[1] * output_dims[1],
+    output_chs[2] * output_dims[2] * output_dims[2],
+    output_chs[3] * output_dims[3] * output_dims[3],
+  };
+  float *const output_ = (float *)aom_malloc(
+      sizeof(*output_) *
+      (output_sizes[0] + output_sizes[1] + output_sizes[2] + output_sizes[3]));
+  ASSERT_NE(output_, nullptr);
+  float *output[CNN_MAX_CHANNELS] = { nullptr };
+  int ch_ite = 0;
+  float *output_ite = output_;
+  for (int output_idx = 0; output_idx < num_outputs; output_idx++) {
+    for (int channel = 0; channel < output_chs[output_idx]; ++channel) {
+      output[ch_ite++] = output_ite;
+      output_ite += output_dims[output_idx] * output_dims[output_idx];
+    }
+  }
+  CNN_MULTI_OUT output_struct = { num_outputs, output_chs, output_dims,
+                                  output };
+
+  RunMultiOutCNNTest(input, image_dim, image_dim, image_dim, &cnn_config,
+                     &thread_data, &output_struct, expected, MSE_FLOAT_TOL);
+
+  aom_free(output_);
+}
+
+namespace {
+
+typedef void (*CNNConvolveNoMaxpoolPaddingValidFunc)(
+    const float **input, int in_width, int in_height, int in_stride,
+    const CNN_LAYER_CONFIG *layer_config, float **output, int out_stride,
+    int start_idx, int cstep, int channel_step);
+
+typedef libaom_test::FuncParam<CNNConvolveNoMaxpoolPaddingValidFunc>
+    CNNConvolveTestFuncs;
+
+class CNNConvolveTest : public ::testing::TestWithParam<CNNConvolveTestFuncs> {
+ protected:
+  void SetUp() override { params_ = GetParam(); }
+
+  void RunCNNConvolveSetup(int run_times) {
+    int in_width = 65;
+    int in_height = 65;
+
+    const CNN_CONFIG *cnn_config = &av1_intra_mode_cnn_partition_cnn_config;
+
+    for (int layer = 0; layer < cnn_config->num_layers; ++layer) {
+      int out_width = 0, out_height = 0;
+      int in_size = in_width * in_height;
+      // Get current layer output width and height.
+      av1_find_cnn_layer_output_size(in_height, in_width,
+                                     &cnn_config->layer_config[layer],
+                                     &out_width, &out_height);
+
+      int out_size = out_width * out_height;
+      float *input[20], *output_ref[20], *output_mod[20];
+
+      float *input_data =
+          (float *)aom_malloc(sizeof(*input_data) * in_size *
+                              cnn_config->layer_config[layer].in_channels);
+      float *temp_ptr = input_data;
+      ASSERT_NE(temp_ptr, nullptr);
+      for (int i = 0; i < cnn_config->layer_config[layer].in_channels; ++i) {
+        input[i] = temp_ptr;
+        for (int j = 0; j < in_size; j++) {
+          *(temp_ptr++) = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
+        }
+      }
+
+      float *out_data_ref = (float *)aom_calloc(
+          sizeof(*out_data_ref),
+          out_size * cnn_config->layer_config[layer].out_channels);
+      ASSERT_NE(out_data_ref, nullptr);
+      float *out_data_mod = (float *)aom_calloc(
+          sizeof(*out_data_mod),
+          out_size * cnn_config->layer_config[layer].out_channels);
+      ASSERT_NE(out_data_mod, nullptr);
+      float *temp_ptr1 = out_data_ref;
+      float *temp_ptr2 = out_data_mod;
+      for (int i = 0; i < cnn_config->layer_config[layer].out_channels; ++i) {
+        output_ref[i] = temp_ptr1;
+        output_mod[i] = temp_ptr2;
+        temp_ptr1 += out_size;
+        temp_ptr2 += out_size;
+      }
+
+      RunCNNConvolveTest(input, in_width, in_height, out_size,
+                         &cnn_config->layer_config[layer], 0, 1, run_times,
+                         layer, output_ref, output_mod, out_width);
+
+      // Set current layer output width and height as next layer input width and
+      // height.
+      in_width = out_width;
+      in_height = out_height;
+
+      aom_free(input_data);
+      aom_free(out_data_ref);
+      aom_free(out_data_mod);
+    }
+  }
+
+  void RunCNNConvolveTest(float **input, int in_width, int in_height,
+                          int out_size, const CNN_LAYER_CONFIG *layer_config,
+                          int start_idx, int step, int run_times, int layer,
+                          float **output_ref, float **output_mod,
+                          int out_stride) {
+    const int cstep = layer_config->in_channels * layer_config->out_channels;
+    const int channel_step = AOMMAX(step, 1);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      params_.ref_func((const float **)input, in_width, in_height, in_width,
+                       layer_config, output_ref, out_stride, start_idx, cstep,
+                       channel_step);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      params_.tst_func((const float **)input, in_width, in_height, in_width,
+                       layer_config, output_mod, out_stride, start_idx, cstep,
+                       channel_step);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    if (run_times > 1) {
+      printf("layer : %d \n", layer);
+      printf("%7.2f/%7.2fns (%3.2f)\n", time1, time2, time1 / time2);
+    } else {
+      for (int channel = 0; channel < layer_config->out_channels; ++channel) {
+        const float *buf_ref = output_ref[channel];
+        const float *buf_mod = output_mod[channel];
+
+        for (int i = 0; i < out_size; ++i) {
+          if (buf_ref[i] < CNN_CONVOLVE_PIXELWISE_FLOAT_TOL) {
+            ASSERT_LE(buf_ref[i], CNN_CONVOLVE_PIXELWISE_FLOAT_TOL)
+                << "Reference output was near-zero, test output was not ("
+                << buf_mod[i] << ")";
+          } else {
+            const float error = buf_ref[i] - buf_mod[i];
+            const float relative_error = fabsf(error / buf_ref[i]);
+            ASSERT_LE(relative_error, CNN_CONVOLVE_PIXELWISE_FLOAT_TOL)
+                << " channel " << channel << " pixel " << i << ": "
+                << buf_ref[i] << "/" << buf_mod[i] << std::endl;
+          }
+        }
+      }
+    }
+  }
+
+ private:
+  CNNConvolveTestFuncs params_;
+  libaom_test::ACMRandom rng_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CNNConvolveTest);
+
+TEST_P(CNNConvolveTest, CheckOutput) { RunCNNConvolveSetup(1); }
+
+TEST_P(CNNConvolveTest, DISABLED_Speed) { RunCNNConvolveSetup(100000); }
+
+#if HAVE_AVX2 && !CONFIG_EXCLUDE_SIMD_MISMATCH
+INSTANTIATE_TEST_SUITE_P(AVX2, CNNConvolveTest,
+                         ::testing::Values(CNNConvolveTestFuncs(
+                             &av1_cnn_convolve_no_maxpool_padding_valid_c,
+                             &av1_cnn_convolve_no_maxpool_padding_valid_avx2)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, CNNConvolveTest,
+                         ::testing::Values(CNNConvolveTestFuncs(
+                             &av1_cnn_convolve_no_maxpool_padding_valid_c,
+                             &av1_cnn_convolve_no_maxpool_padding_valid_neon)));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/codec_factory.h b/third_party/aom/test/codec_factory.h
new file mode 100644
index 0000000000..7ffc465a7b
--- /dev/null
+++ b/third_party/aom/test/codec_factory.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef AOM_TEST_CODEC_FACTORY_H_
+#define AOM_TEST_CODEC_FACTORY_H_
+
+#include <tuple>
+
+#include "config/aom_config.h"
+
+#include "aom/aom_decoder.h"
+#include "aom/aom_encoder.h"
+#if CONFIG_AV1_ENCODER
+#include "aom/aomcx.h"
+#endif
+#if CONFIG_AV1_DECODER
+#include "aom/aomdx.h"
+#endif
+
+#include "test/decode_test_driver.h"
+#include "test/encode_test_driver.h"
+namespace libaom_test {
+
+const int kCodecFactoryParam = 0;
+
+class CodecFactory {
+ public:
+  CodecFactory() = default;
+
+  virtual ~CodecFactory() = default;
+
+  virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg) const = 0;
+
+  virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg,
+                                 const aom_codec_flags_t flags) const = 0;
+
+  virtual Encoder *CreateEncoder(aom_codec_enc_cfg_t cfg,
+                                 const aom_codec_flags_t init_flags,
+                                 TwopassStatsStore *stats) const = 0;
+
+  virtual aom_codec_err_t DefaultEncoderConfig(aom_codec_enc_cfg_t *cfg,
+                                               unsigned int usage) const = 0;
+};
+
+/* Provide CodecTestWith<n>Params classes for a variable number of parameters
+ * to avoid having to include a pointer to the CodecFactory in every test
+ * definition.
+ */
+template <class T1>
+class CodecTestWithParam
+    : public ::testing::TestWithParam<
+          std::tuple<const libaom_test::CodecFactory *, T1> > {};
+
+template <class T1, class T2>
+class CodecTestWith2Params
+    : public ::testing::TestWithParam<
+          std::tuple<const libaom_test::CodecFactory *, T1, T2> > {};
+
+template <class T1, class T2, class T3>
+class CodecTestWith3Params
+    : public ::testing::TestWithParam<
+          std::tuple<const libaom_test::CodecFactory *, T1, T2, T3> > {};
+
+template <class T1, class T2, class T3, class T4>
+class CodecTestWith4Params
+    : public ::testing::TestWithParam<
+          std::tuple<const libaom_test::CodecFactory *, T1, T2, T3, T4> > {};
+
+template <class T1, class T2, class T3, class T4, class T5>
+class CodecTestWith5Params
+    : public ::testing::TestWithParam<
+          std::tuple<const libaom_test::CodecFactory *, T1, T2, T3, T4, T5> > {
+};
+
+template <class T1, class T2, class T3, class T4, class T5, class T6>
+class CodecTestWith6Params
+    : public ::testing::TestWithParam<std::tuple<
+          const libaom_test::CodecFactory *, T1, T2, T3, T4, T5, T6> > {};
+
+/*
+ * AV1 Codec Definitions
+ */
+class AV1Decoder : public Decoder {
+ public:
+  explicit AV1Decoder(aom_codec_dec_cfg_t cfg) : Decoder(cfg) {}
+
+  AV1Decoder(aom_codec_dec_cfg_t cfg, const aom_codec_flags_t flag)
+      : Decoder(cfg, flag) {}
+
+ protected:
+  aom_codec_iface_t *CodecInterface() const override {
+#if CONFIG_AV1_DECODER
+    return aom_codec_av1_dx();
+#else
+    return nullptr;
+#endif
+  }
+};
+
+class AV1Encoder : public Encoder {
+ public:
+  AV1Encoder(aom_codec_enc_cfg_t cfg, const aom_codec_flags_t init_flags,
+             TwopassStatsStore *stats)
+      : Encoder(cfg, init_flags, stats) {}
+
+ protected:
+  aom_codec_iface_t *CodecInterface() const override {
+#if CONFIG_AV1_ENCODER
+    return aom_codec_av1_cx();
+#else
+    return nullptr;
+#endif
+  }
+};
+
+class AV1CodecFactory : public CodecFactory {
+ public:
+  AV1CodecFactory() : CodecFactory() {}
+
+  Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg) const override {
+    return CreateDecoder(cfg, 0);
+  }
+
+  Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg,
+                         const aom_codec_flags_t flags) const override {
+#if CONFIG_AV1_DECODER
+    return new AV1Decoder(cfg, flags);
+#else
+    (void)cfg;
+    (void)flags;
+    return nullptr;
+#endif
+  }
+
+  Encoder *CreateEncoder(aom_codec_enc_cfg_t cfg,
+                         const aom_codec_flags_t init_flags,
+                         TwopassStatsStore *stats) const override {
+#if CONFIG_AV1_ENCODER
+    return new AV1Encoder(cfg, init_flags, stats);
+#else
+    (void)cfg;
+    (void)init_flags;
+    (void)stats;
+    return nullptr;
+#endif
+  }
+
+  aom_codec_err_t DefaultEncoderConfig(aom_codec_enc_cfg_t *cfg,
+                                       unsigned int usage) const override {
+#if CONFIG_AV1_ENCODER
+    return aom_codec_enc_config_default(aom_codec_av1_cx(), cfg, usage);
+#else
+    (void)cfg;
+    (void)usage;
+    return AOM_CODEC_INCAPABLE;
+#endif
+  }
+};
+
+const libaom_test::AV1CodecFactory kAV1;
+
+#define AV1_INSTANTIATE_TEST_SUITE(test, ...)                               \
+  INSTANTIATE_TEST_SUITE_P(                                                 \
+      AV1, test,                                                            \
+      ::testing::Combine(                                                   \
+          ::testing::Values(static_cast<const libaom_test::CodecFactory *>( \
+              &libaom_test::kAV1)),                                         \
+          __VA_ARGS__))
+
+}  // namespace libaom_test
+#endif  // AOM_TEST_CODEC_FACTORY_H_
diff --git a/third_party/aom/test/coding_path_sync.cc b/third_party/aom/test/coding_path_sync.cc
new file mode 100644
index 0000000000..f7b7eace90
--- /dev/null
+++ b/third_party/aom/test/coding_path_sync.cc
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+
+#include "config/aom_config.h"
+
+#include "aom/aomcx.h"
+#include "aom/aomdx.h"
+#include "aom/aom_encoder.h"
+#include "aom/aom_decoder.h"
+
+#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
+
+using libaom_test::ACMRandom;
+namespace {
+
+class CompressedSource {
+ public:
+  explicit CompressedSource(int seed) : rnd_(seed), frame_count_(0) {
+    aom_codec_iface_t *algo = aom_codec_av1_cx();
+
+    aom_codec_enc_cfg_t cfg;
+#if CONFIG_REALTIME_ONLY
+    aom_codec_enc_config_default(algo, &cfg, 1);
+#else
+    aom_codec_enc_config_default(algo, &cfg, 0);
+#endif
+
+    // force the quantizer, to reduce the sensitivity on encoding choices.
+    // e.g, we don't want this test to break when the rate control is modified.
+    {
+      const int max_q = cfg.rc_max_quantizer;
+      const int min_q = cfg.rc_min_quantizer;
+      const int q = rnd_.PseudoUniform(max_q - min_q + 1) + min_q;
+
+      cfg.rc_end_usage = AOM_Q;
+      cfg.rc_max_quantizer = q;
+      cfg.rc_min_quantizer = q;
+    }
+
+    // choose the picture size
+    {
+      width_ = rnd_.PseudoUniform(kWidth - 8) + 8;
+      height_ = rnd_.PseudoUniform(kHeight - 8) + 8;
+    }
+
+    // choose the chroma subsampling
+    {
+      const aom_img_fmt_t fmts[] = {
+        AOM_IMG_FMT_I420,
+        AOM_IMG_FMT_I422,
+        AOM_IMG_FMT_I444,
+      };
+
+      format_ = fmts[rnd_.PseudoUniform(NELEMENTS(fmts))];
+    }
+
+    cfg.g_w = width_;
+    cfg.g_h = height_;
+    cfg.g_lag_in_frames = 0;
+    if (format_ == AOM_IMG_FMT_I420)
+      cfg.g_profile = 0;
+    else if (format_ == AOM_IMG_FMT_I444)
+      cfg.g_profile = 1;
+    else if (format_ == AOM_IMG_FMT_I422)
+      cfg.g_profile = 2;
+
+    aom_codec_enc_init(&enc_, algo, &cfg, 0);
+  }
+
+  ~CompressedSource() { aom_codec_destroy(&enc_); }
+
+  const aom_codec_cx_pkt_t *ReadFrame() {
+    uint8_t buf[kWidth * kHeight * 3] = { 0 };
+
+    // render regular pattern
+    const int period = rnd_.Rand8() % 32 + 1;
+    const int phase = rnd_.Rand8() % period;
+
+    const int val_a = rnd_.Rand8();
+    const int val_b = rnd_.Rand8();
+
+    for (int i = 0; i < (int)sizeof buf; ++i)
+      buf[i] = (i + phase) % period < period / 2 ? val_a : val_b;
+
+    aom_image_t img;
+    aom_img_wrap(&img, format_, width_, height_, 0, buf);
+    aom_codec_encode(&enc_, &img, frame_count_++, 1, 0);
+
+    aom_codec_iter_t iter = nullptr;
+
+    const aom_codec_cx_pkt_t *pkt = nullptr;
+
+    do {
+      pkt = aom_codec_get_cx_data(&enc_, &iter);
+    } while (pkt && pkt->kind != AOM_CODEC_CX_FRAME_PKT);
+
+    return pkt;
+  }
+
+ private:
+  static const int kWidth = 128;
+  static const int kHeight = 128;
+
+  ACMRandom rnd_;
+  aom_img_fmt_t format_;
+  aom_codec_ctx_t enc_;
+  int frame_count_;
+  int width_, height_;
+};
+
+// lowers an aom_image_t to an easily comparable/printable form
+std::vector<uint16_t> Serialize(const aom_image_t *img) {
+  std::vector<uint16_t> bytes;
+  bytes.reserve(img->d_w * img->d_h * 3);
+  for (int plane = 0; plane < 3; ++plane) {
+    const int w = aom_img_plane_width(img, plane);
+    const int h = aom_img_plane_height(img, plane);
+
+    for (int r = 0; r < h; ++r) {
+      for (int c = 0; c < w; ++c) {
+        const unsigned char *row = img->planes[plane] + r * img->stride[plane];
+        if (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
+          const uint16_t *row16 = reinterpret_cast<const uint16_t *>(row);
+          bytes.push_back(row16[c]);
+        } else {
+          bytes.push_back(row[c]);
+        }
+      }
+    }
+  }
+
+  return bytes;
+}
+
+class Decoder {
+ public:
+  explicit Decoder(int allowLowbitdepth) {
+    aom_codec_iface_t *algo = aom_codec_av1_dx();
+
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.allow_lowbitdepth = allowLowbitdepth;
+
+    aom_codec_dec_init(&dec_, algo, &cfg, 0);
+  }
+
+  ~Decoder() { aom_codec_destroy(&dec_); }
+
+  std::vector<uint16_t> decode(const aom_codec_cx_pkt_t *pkt) {
+    aom_codec_decode(&dec_, static_cast<uint8_t *>(pkt->data.frame.buf),
+                     pkt->data.frame.sz, nullptr);
+
+    aom_codec_iter_t iter = nullptr;
+    return Serialize(aom_codec_get_frame(&dec_, &iter));
+  }
+
+ private:
+  aom_codec_ctx_t dec_;
+};
+
+// Try to reveal a mismatch between LBD and HBD coding paths.
+TEST(CodingPathSync, SearchForHbdLbdMismatch) {
+  const int count_tests = 10;
+  for (int i = 0; i < count_tests; ++i) {
+    Decoder dec_hbd(0);
+    Decoder dec_lbd(1);
+
+    CompressedSource enc(i);
+
+    for (int k = 0; k < 3; ++k) {
+      const aom_codec_cx_pkt_t *frame = enc.ReadFrame();
+
+      std::vector<uint16_t> lbd_yuv = dec_lbd.decode(frame);
+      std::vector<uint16_t> hbd_yuv = dec_hbd.decode(frame);
+
+      ASSERT_EQ(lbd_yuv, hbd_yuv);
+    }
+  }
+}
+
+TEST(CodingPathSyncLarge, SearchForHbdLbdMismatchLarge) {
+  const int count_tests = 100;
+  const int seed = 1234;
+  for (int i = 0; i < count_tests; ++i) {
+    Decoder dec_hbd(0);
+    Decoder dec_lbd(1);
+
+    CompressedSource enc(seed + i);
+
+    for (int k = 0; k < 5; ++k) {
+      const aom_codec_cx_pkt_t *frame = enc.ReadFrame();
+
+      std::vector<uint16_t> lbd_yuv = dec_lbd.decode(frame);
+      std::vector<uint16_t> hbd_yuv = dec_hbd.decode(frame);
+
+      ASSERT_EQ(lbd_yuv, hbd_yuv);
+    }
+  }
+}
+
+}  // namespace
diff --git a/third_party/aom/test/comp_avg_pred_test.cc b/third_party/aom/test/comp_avg_pred_test.cc
new file mode 100644
index 0000000000..2f81d7e9b7
--- /dev/null
+++ b/third_party/aom/test/comp_avg_pred_test.cc
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "test/comp_avg_pred_test.h"
+
+using libaom_test::ACMRandom;
+using libaom_test::AV1DISTWTDCOMPAVG::AV1DISTWTDCOMPAVGTest;
+using libaom_test::AV1DISTWTDCOMPAVG::DistWtdCompAvgParam;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1DISTWTDCOMPAVGTest);
+using libaom_test::AV1DISTWTDCOMPAVG::AV1DISTWTDCOMPAVGUPSAMPLEDTest;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1DISTWTDCOMPAVGUPSAMPLEDTest);
+using libaom_test::AV1DISTWTDCOMPAVG::DistWtdCompAvgTest;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DistWtdCompAvgTest);
+#if CONFIG_AV1_HIGHBITDEPTH
+using libaom_test::AV1DISTWTDCOMPAVG::AV1HighBDDISTWTDCOMPAVGTest;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighBDDISTWTDCOMPAVGTest);
+using libaom_test::AV1DISTWTDCOMPAVG::AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(
+    AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest);
+#endif
+using std::make_tuple;
+using std::tuple;
+
+uint8_t *DistWtdCompAvgTest::reference_data_ = nullptr;
+uint8_t *DistWtdCompAvgTest::second_pred_ = nullptr;
+uint8_t *DistWtdCompAvgTest::comp_pred_ = nullptr;
+uint8_t *DistWtdCompAvgTest::comp_pred_test_ = nullptr;
+uint8_t *DistWtdCompAvgTest::reference_data8_ = nullptr;
+uint8_t *DistWtdCompAvgTest::second_pred8_ = nullptr;
+uint8_t *DistWtdCompAvgTest::comp_pred8_ = nullptr;
+uint8_t *DistWtdCompAvgTest::comp_pred8_test_ = nullptr;
+uint16_t *DistWtdCompAvgTest::reference_data16_ = nullptr;
+uint16_t *DistWtdCompAvgTest::second_pred16_ = nullptr;
+uint16_t *DistWtdCompAvgTest::comp_pred16_ = nullptr;
+uint16_t *DistWtdCompAvgTest::comp_pred16_test_ = nullptr;
+
+namespace {
+
+TEST_P(AV1DISTWTDCOMPAVGTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
+
+TEST_P(AV1DISTWTDCOMPAVGTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_SUITE_P(SSSE3, AV1DISTWTDCOMPAVGTest,
+                         libaom_test::AV1DISTWTDCOMPAVG::BuildParams(
+                             aom_dist_wtd_comp_avg_pred_ssse3));
+#endif
+
+TEST_P(AV1DISTWTDCOMPAVGUPSAMPLEDTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0));
+}
+
+TEST_P(AV1DISTWTDCOMPAVGUPSAMPLEDTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(0));
+}
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_SUITE_P(SSSE3, AV1DISTWTDCOMPAVGUPSAMPLEDTest,
+                         libaom_test::AV1DISTWTDCOMPAVG::BuildParams(
+                             aom_dist_wtd_comp_avg_upsampled_pred_ssse3));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1DISTWTDCOMPAVGUPSAMPLEDTest,
+                         libaom_test::AV1DISTWTDCOMPAVG::BuildParams(
+                             aom_dist_wtd_comp_avg_upsampled_pred_neon));
+#endif  // HAVE_NEON
+
+TEST_P(DistWtdCompAvgTest, MaxRef) {
+  FillConstant(reference_data_, reference_stride_, mask_);
+  FillConstant(second_pred_, width_, 0);
+  CheckCompAvg();
+}
+
+TEST_P(DistWtdCompAvgTest, MaxSecondPred) {
+  FillConstant(reference_data_, reference_stride_, 0);
+  FillConstant(second_pred_, width_, mask_);
+  CheckCompAvg();
+}
+
+TEST_P(DistWtdCompAvgTest, ShortRef) {
+  const int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(reference_data_, reference_stride_);
+  FillRandom(second_pred_, width_);
+  CheckCompAvg();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(DistWtdCompAvgTest, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  const int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(reference_data_, reference_stride_);
+  FillRandom(second_pred_, width_);
+  CheckCompAvg();
+  reference_stride_ = tmp_stride;
+}
+
+// TODO(chengchen): add highbd tests
+const DistWtdCompAvgParam dist_wtd_comp_avg_c_tests[] = {
+  make_tuple(128, 128, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(128, 64, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(64, 128, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(64, 64, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(64, 32, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(32, 64, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(32, 32, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(32, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(16, 32, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(16, 8, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(8, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(8, 8, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(8, 4, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(4, 8, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(4, 4, &aom_dist_wtd_comp_avg_pred_c, -1),
+
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(16, 64, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(32, 8, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(8, 32, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(16, 4, &aom_dist_wtd_comp_avg_pred_c, -1),
+  make_tuple(4, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(C, DistWtdCompAvgTest,
+                         ::testing::ValuesIn(dist_wtd_comp_avg_c_tests));
+
+#if HAVE_SSSE3
+const DistWtdCompAvgParam dist_wtd_comp_avg_ssse3_tests[] = {
+  make_tuple(128, 128, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(128, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(64, 128, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(64, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(64, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(32, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(32, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(32, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(16, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(16, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(8, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(8, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(8, 4, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(4, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(4, 4, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(16, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(32, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(8, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(16, 4, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+  make_tuple(4, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(SSSE3, DistWtdCompAvgTest,
+                         ::testing::ValuesIn(dist_wtd_comp_avg_ssse3_tests));
+#endif  // HAVE_SSSE3
+
+#if HAVE_NEON
+const DistWtdCompAvgParam dist_wtd_comp_avg_neon_tests[] = {
+  make_tuple(128, 128, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(128, 64, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(64, 128, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(64, 64, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(64, 32, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(32, 64, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(32, 32, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(32, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(16, 32, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(16, 8, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(8, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(8, 8, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(8, 4, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(4, 8, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(4, 4, &aom_dist_wtd_comp_avg_pred_neon, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(16, 64, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(32, 8, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(8, 32, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(16, 4, &aom_dist_wtd_comp_avg_pred_neon, -1),
+  make_tuple(4, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
+#endif  // !CONFIG_REALTIME_ONLY
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, DistWtdCompAvgTest,
+                         ::testing::ValuesIn(dist_wtd_comp_avg_neon_tests));
+#endif  // HAVE_NEON
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST_P(AV1HighBDDISTWTDCOMPAVGTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(1));
+}
+
+TEST_P(AV1HighBDDISTWTDCOMPAVGTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(1));
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, AV1HighBDDISTWTDCOMPAVGTest,
+                         libaom_test::AV1DISTWTDCOMPAVG::BuildParams(
+                             aom_highbd_dist_wtd_comp_avg_pred_sse2, 1));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1HighBDDISTWTDCOMPAVGTest,
+                         libaom_test::AV1DISTWTDCOMPAVG::BuildParams(
+                             aom_highbd_dist_wtd_comp_avg_pred_neon, 1));
+#endif
+
+TEST_P(AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(1));
+}
+
+TEST_P(AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(1));
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest,
+                         libaom_test::AV1DISTWTDCOMPAVG::BuildParams(
+                             aom_highbd_dist_wtd_comp_avg_upsampled_pred_sse2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest,
+                         libaom_test::AV1DISTWTDCOMPAVG::BuildParams(
+                             aom_highbd_dist_wtd_comp_avg_upsampled_pred_neon));
+#endif
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+}  // namespace
diff --git a/third_party/aom/test/comp_avg_pred_test.h b/third_party/aom/test/comp_avg_pred_test.h
new file mode 100644
index 0000000000..396df2e2dd
--- /dev/null
+++ b/third_party/aom/test/comp_avg_pred_test.h
@@ -0,0 +1,757 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_TEST_COMP_AVG_PRED_TEST_H_
+#define AOM_TEST_COMP_AVG_PRED_TEST_H_
+
+#include <tuple>
+
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "test/register_state_check.h"
+#include "av1/common/common_data.h"
+#include "aom_ports/aom_timer.h"
+
+namespace libaom_test {
+const int kMaxSize = 128 + 32;  // padding
+
+namespace AV1DISTWTDCOMPAVG {
+
+typedef void (*distwtdcompavg_func)(uint8_t *comp_pred, const uint8_t *pred,
+                                    int width, int height, const uint8_t *ref,
+                                    int ref_stride,
+                                    const DIST_WTD_COMP_PARAMS *jcp_param);
+
+typedef void (*distwtdcompavgupsampled_func)(
+    MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
+    const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
+    int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
+    int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search);
+
+typedef void (*DistWtdCompAvgFunc)(uint8_t *comp_pred, const uint8_t *pred,
+                                   int width, int height, const uint8_t *ref,
+                                   int ref_stride,
+                                   const DIST_WTD_COMP_PARAMS *jcp_param);
+
+typedef std::tuple<distwtdcompavg_func, BLOCK_SIZE> DISTWTDCOMPAVGParam;
+
+typedef std::tuple<distwtdcompavgupsampled_func, BLOCK_SIZE>
+    DISTWTDCOMPAVGUPSAMPLEDParam;
+
+typedef std::tuple<int, int, DistWtdCompAvgFunc, int> DistWtdCompAvgParam;
+
+#if CONFIG_AV1_HIGHBITDEPTH
+typedef void (*highbddistwtdcompavgupsampled_func)(
+    MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
+    const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
+    int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
+    int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param,
+    int subpel_search);
+
+typedef std::tuple<int, highbddistwtdcompavgupsampled_func, BLOCK_SIZE>
+    HighbdDISTWTDCOMPAVGUPSAMPLEDParam;
+
+typedef std::tuple<int, distwtdcompavg_func, BLOCK_SIZE>
+    HighbdDISTWTDCOMPAVGParam;
+
+::testing::internal::ParamGenerator<HighbdDISTWTDCOMPAVGParam> BuildParams(
+    distwtdcompavg_func filter, int is_hbd) {
+  (void)is_hbd;
+  return ::testing::Combine(::testing::Range(8, 13, 2),
+                            ::testing::Values(filter),
+                            ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
+}
+
+::testing::internal::ParamGenerator<HighbdDISTWTDCOMPAVGUPSAMPLEDParam>
+BuildParams(highbddistwtdcompavgupsampled_func filter) {
+  return ::testing::Combine(::testing::Range(8, 13, 2),
+                            ::testing::Values(filter),
+                            ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+::testing::internal::ParamGenerator<DISTWTDCOMPAVGParam> BuildParams(
+    distwtdcompavg_func filter) {
+  return ::testing::Combine(::testing::Values(filter),
+                            ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
+}
+
+::testing::internal::ParamGenerator<DISTWTDCOMPAVGUPSAMPLEDParam> BuildParams(
+    distwtdcompavgupsampled_func filter) {
+  return ::testing::Combine(::testing::Values(filter),
+                            ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
+}
+
+class AV1DISTWTDCOMPAVGTest
+    : public ::testing::TestWithParam<DISTWTDCOMPAVGParam> {
+ public:
+  ~AV1DISTWTDCOMPAVGTest() override = default;
+  void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+
+ protected:
+  void RunCheckOutput(distwtdcompavg_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(1);
+
+    uint8_t pred8[kMaxSize * kMaxSize];
+    uint8_t ref8[kMaxSize * kMaxSize];
+    uint8_t output[kMaxSize * kMaxSize];
+    uint8_t output2[kMaxSize * kMaxSize];
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand8();
+        ref8[i * w + j] = rnd_.Rand8();
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
+    dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
+
+    for (int ii = 0; ii < 2; ii++) {
+      for (int jj = 0; jj < 4; jj++) {
+        dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii];
+        dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
+
+        const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
+        const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
+        aom_dist_wtd_comp_avg_pred_c(output, pred8 + offset_r * w + offset_c,
+                                     in_w, in_h, ref8 + offset_r * w + offset_c,
+                                     in_w, &dist_wtd_comp_params);
+        test_impl(output2, pred8 + offset_r * w + offset_c, in_w, in_h,
+                  ref8 + offset_r * w + offset_c, in_w, &dist_wtd_comp_params);
+
+        for (int i = 0; i < in_h; ++i) {
+          for (int j = 0; j < in_w; ++j) {
+            int idx = i * in_w + j;
+            ASSERT_EQ(output[idx], output2[idx])
+                << "Mismatch at unit tests for AV1DISTWTDCOMPAVGTest\n"
+                << in_w << "x" << in_h << " Pixel mismatch at index " << idx
+                << " = (" << i << ", " << j << ")";
+          }
+        }
+      }
+    }
+  }
+  void RunSpeedTest(distwtdcompavg_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(1);
+
+    uint8_t pred8[kMaxSize * kMaxSize];
+    uint8_t ref8[kMaxSize * kMaxSize];
+    uint8_t output[kMaxSize * kMaxSize];
+    uint8_t output2[kMaxSize * kMaxSize];
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand8();
+        ref8[i * w + j] = rnd_.Rand8();
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
+    dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
+
+    dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0];
+    dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1];
+
+    const int num_loops = 1000000000 / (in_w + in_h);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+
+    for (int i = 0; i < num_loops; ++i)
+      aom_dist_wtd_comp_avg_pred_c(output, pred8, in_w, in_h, ref8, in_w,
+                                   &dist_wtd_comp_params);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    printf("distwtdcompavg c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
+           1000.0 * elapsed_time / num_loops);
+
+    aom_usec_timer timer1;
+    aom_usec_timer_start(&timer1);
+
+    for (int i = 0; i < num_loops; ++i)
+      test_impl(output2, pred8, in_w, in_h, ref8, in_w, &dist_wtd_comp_params);
+
+    aom_usec_timer_mark(&timer1);
+    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+    printf("distwtdcompavg test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
+           1000.0 * elapsed_time1 / num_loops);
+  }
+
+  libaom_test::ACMRandom rnd_;
+};  // class AV1DISTWTDCOMPAVGTest
+
+class AV1DISTWTDCOMPAVGUPSAMPLEDTest
+    : public ::testing::TestWithParam<DISTWTDCOMPAVGUPSAMPLEDParam> {
+ public:
+  ~AV1DISTWTDCOMPAVGUPSAMPLEDTest() override = default;
+  void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+
+ protected:
+  void RunCheckOutput(distwtdcompavgupsampled_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(1);
+
+    uint8_t pred8[kMaxSize * kMaxSize];
+    uint8_t ref8[kMaxSize * kMaxSize];
+    DECLARE_ALIGNED(16, uint8_t, output[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(16, uint8_t, output2[MAX_SB_SQUARE]);
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand8();
+        ref8[i * w + j] = rnd_.Rand8();
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
+    dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
+    int sub_x_q3, sub_y_q3;
+    int subpel_search;
+    for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
+         ++subpel_search) {
+      for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
+        for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
+          for (int ii = 0; ii < 2; ii++) {
+            for (int jj = 0; jj < 4; jj++) {
+              dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii];
+              dist_wtd_comp_params.bck_offset =
+                  quant_dist_lookup_table[jj][1 - ii];
+
+              const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
+              const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
+
+              aom_dist_wtd_comp_avg_upsampled_pred_c(
+                  nullptr, nullptr, 0, 0, nullptr, output,
+                  pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
+                  sub_y_q3, ref8 + offset_r * w + offset_c, in_w,
+                  &dist_wtd_comp_params, subpel_search);
+              test_impl(nullptr, nullptr, 0, 0, nullptr, output2,
+                        pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
+                        sub_y_q3, ref8 + offset_r * w + offset_c, in_w,
+                        &dist_wtd_comp_params, subpel_search);
+
+              for (int i = 0; i < in_h; ++i) {
+                for (int j = 0; j < in_w; ++j) {
+                  int idx = i * in_w + j;
+                  ASSERT_EQ(output[idx], output2[idx])
+                      << "Mismatch at unit tests for "
+                         "AV1DISTWTDCOMPAVGUPSAMPLEDTest\n"
+                      << in_w << "x" << in_h << " Pixel mismatch at index "
+                      << idx << " = (" << i << ", " << j
+                      << "), sub pixel offset = (" << sub_y_q3 << ", "
+                      << sub_x_q3 << ")";
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  void RunSpeedTest(distwtdcompavgupsampled_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(1);
+
+    uint8_t pred8[kMaxSize * kMaxSize];
+    uint8_t ref8[kMaxSize * kMaxSize];
+    DECLARE_ALIGNED(16, uint8_t, output[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(16, uint8_t, output2[MAX_SB_SQUARE]);
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand8();
+        ref8[i * w + j] = rnd_.Rand8();
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
+    dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
+
+    dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0];
+    dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1];
+
+    int sub_x_q3 = 0;
+    int sub_y_q3 = 0;
+
+    const int num_loops = 1000000000 / (in_w + in_h);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    int subpel_search = USE_8_TAPS;  // set to USE_4_TAPS to test 4-tap filter.
+
+    for (int i = 0; i < num_loops; ++i)
+      aom_dist_wtd_comp_avg_upsampled_pred_c(
+          nullptr, nullptr, 0, 0, nullptr, output, pred8, in_w, in_h, sub_x_q3,
+          sub_y_q3, ref8, in_w, &dist_wtd_comp_params, subpel_search);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    printf("distwtdcompavgupsampled c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
+           1000.0 * elapsed_time / num_loops);
+
+    aom_usec_timer timer1;
+    aom_usec_timer_start(&timer1);
+
+    for (int i = 0; i < num_loops; ++i)
+      test_impl(nullptr, nullptr, 0, 0, nullptr, output2, pred8, in_w, in_h,
+                sub_x_q3, sub_y_q3, ref8, in_w, &dist_wtd_comp_params,
+                subpel_search);
+
+    aom_usec_timer_mark(&timer1);
+    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+    printf("distwtdcompavgupsampled test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
+           1000.0 * elapsed_time1 / num_loops);
+  }
+
+  libaom_test::ACMRandom rnd_;
+};  // class AV1DISTWTDCOMPAVGUPSAMPLEDTest
+
+class DistWtdCompAvgTest
+    : public ::testing::WithParamInterface<DistWtdCompAvgParam>,
+      public ::testing::Test {
+ public:
+  DistWtdCompAvgTest()
+      : width_(GET_PARAM(0)), height_(GET_PARAM(1)), bd_(GET_PARAM(3)) {}
+
+  static void SetUpTestSuite() {
+    reference_data8_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(kDataAlignment, kDataBufferSize));
+    ASSERT_NE(reference_data8_, nullptr);
+    second_pred8_ =
+        reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
+    ASSERT_NE(second_pred8_, nullptr);
+    comp_pred8_ =
+        reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
+    ASSERT_NE(comp_pred8_, nullptr);
+    comp_pred8_test_ =
+        reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
+    ASSERT_NE(comp_pred8_test_, nullptr);
+    reference_data16_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, kDataBufferSize * sizeof(uint16_t)));
+    ASSERT_NE(reference_data16_, nullptr);
+    second_pred16_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
+    ASSERT_NE(second_pred16_, nullptr);
+    comp_pred16_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
+    ASSERT_NE(comp_pred16_, nullptr);
+    comp_pred16_test_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
+    ASSERT_NE(comp_pred16_test_, nullptr);
+  }
+
+  static void TearDownTestSuite() {
+    aom_free(reference_data8_);
+    reference_data8_ = nullptr;
+    aom_free(second_pred8_);
+    second_pred8_ = nullptr;
+    aom_free(comp_pred8_);
+    comp_pred8_ = nullptr;
+    aom_free(comp_pred8_test_);
+    comp_pred8_test_ = nullptr;
+    aom_free(reference_data16_);
+    reference_data16_ = nullptr;
+    aom_free(second_pred16_);
+    second_pred16_ = nullptr;
+    aom_free(comp_pred16_);
+    comp_pred16_ = nullptr;
+    aom_free(comp_pred16_test_);
+    comp_pred16_test_ = nullptr;
+  }
+
+ protected:
+  // Handle up to 4 128x128 blocks, with stride up to 256
+  static const int kDataAlignment = 16;
+  static const int kDataBlockSize = 128 * 256;
+  static const int kDataBufferSize = 4 * kDataBlockSize;
+
+  void SetUp() override {
+    if (bd_ == -1) {
+      use_high_bit_depth_ = false;
+      bit_depth_ = AOM_BITS_8;
+      reference_data_ = reference_data8_;
+      second_pred_ = second_pred8_;
+      comp_pred_ = comp_pred8_;
+      comp_pred_test_ = comp_pred8_test_;
+    } else {
+      use_high_bit_depth_ = true;
+      bit_depth_ = static_cast<aom_bit_depth_t>(bd_);
+      reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_);
+      second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_);
+      comp_pred_ = CONVERT_TO_BYTEPTR(comp_pred16_);
+      comp_pred_test_ = CONVERT_TO_BYTEPTR(comp_pred16_test_);
+    }
+    mask_ = (1 << bit_depth_) - 1;
+    reference_stride_ = width_ * 2;
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+  virtual uint8_t *GetReference(int block_idx) {
+    if (use_high_bit_depth_)
+      return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) +
+                                block_idx * kDataBlockSize);
+    return reference_data_ + block_idx * kDataBlockSize;
+  }
+
+  void ReferenceDistWtdCompAvg(int block_idx) {
+    const uint8_t *const reference8 = GetReference(block_idx);
+    const uint8_t *const second_pred8 = second_pred_;
+    uint8_t *const comp_pred8 = comp_pred_;
+    const uint16_t *const reference16 =
+        CONVERT_TO_SHORTPTR(GetReference(block_idx));
+    const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
+    uint16_t *const comp_pred16 = CONVERT_TO_SHORTPTR(comp_pred_);
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          const int tmp =
+              second_pred8[h * width_ + w] * jcp_param_.bck_offset +
+              reference8[h * reference_stride_ + w] * jcp_param_.fwd_offset;
+          comp_pred8[h * width_ + w] = ROUND_POWER_OF_TWO(tmp, 4);
+        } else {
+          const int tmp =
+              second_pred16[h * width_ + w] * jcp_param_.bck_offset +
+              reference16[h * reference_stride_ + w] * jcp_param_.fwd_offset;
+          comp_pred16[h * width_ + w] = ROUND_POWER_OF_TWO(tmp, 4);
+        }
+      }
+    }
+  }
+
+  void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) {
+    uint8_t *data8 = data;
+    uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          data8[h * stride + w] = static_cast<uint8_t>(fill_constant);
+        } else {
+          data16[h * stride + w] = fill_constant;
+        }
+      }
+    }
+  }
+
+  void FillRandom(uint8_t *data, int stride) {
+    uint8_t *data8 = data;
+    uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          data8[h * stride + w] = rnd_.Rand8();
+        } else {
+          data16[h * stride + w] = rnd_.Rand16() & mask_;
+        }
+      }
+    }
+  }
+
+  void dist_wtd_comp_avg(int block_idx) {
+    const uint8_t *const reference = GetReference(block_idx);
+
+    API_REGISTER_STATE_CHECK(GET_PARAM(2)(comp_pred_test_, second_pred_, width_,
+                                          height_, reference, reference_stride_,
+                                          &jcp_param_));
+  }
+
+  void CheckCompAvg() {
+    for (int j = 0; j < 2; ++j) {
+      for (int i = 0; i < 4; ++i) {
+        jcp_param_.fwd_offset = quant_dist_lookup_table[i][j];
+        jcp_param_.bck_offset = quant_dist_lookup_table[i][1 - j];
+
+        ReferenceDistWtdCompAvg(0);
+        dist_wtd_comp_avg(0);
+
+        for (int y = 0; y < height_; ++y)
+          for (int x = 0; x < width_; ++x)
+            ASSERT_EQ(comp_pred_[y * width_ + x],
+                      comp_pred_test_[y * width_ + x]);
+      }
+    }
+  }
+
+  int width_, height_, mask_, bd_;
+  aom_bit_depth_t bit_depth_;
+  static uint8_t *reference_data_;
+  static uint8_t *second_pred_;
+  bool use_high_bit_depth_;
+  static uint8_t *reference_data8_;
+  static uint8_t *second_pred8_;
+  static uint16_t *reference_data16_;
+  static uint16_t *second_pred16_;
+  int reference_stride_;
+  static uint8_t *comp_pred_;
+  static uint8_t *comp_pred8_;
+  static uint16_t *comp_pred16_;
+  static uint8_t *comp_pred_test_;
+  static uint8_t *comp_pred8_test_;
+  static uint16_t *comp_pred16_test_;
+  DIST_WTD_COMP_PARAMS jcp_param_;
+
+  ACMRandom rnd_;
+};
+
+#if CONFIG_AV1_HIGHBITDEPTH
+class AV1HighBDDISTWTDCOMPAVGTest
+    : public ::testing::TestWithParam<HighbdDISTWTDCOMPAVGParam> {
+ public:
+  ~AV1HighBDDISTWTDCOMPAVGTest() override = default;
+  void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+
+ protected:
+  void RunCheckOutput(distwtdcompavg_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(2);
+    const int bd = GET_PARAM(0);
+    uint16_t pred8[kMaxSize * kMaxSize];
+    uint16_t ref8[kMaxSize * kMaxSize];
+    uint16_t output[kMaxSize * kMaxSize];
+    uint16_t output2[kMaxSize * kMaxSize];
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+        ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
+    dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
+
+    for (int ii = 0; ii < 2; ii++) {
+      for (int jj = 0; jj < 4; jj++) {
+        dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii];
+        dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
+
+        const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
+        const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
+        aom_highbd_dist_wtd_comp_avg_pred_c(
+            CONVERT_TO_BYTEPTR(output),
+            CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w, in_h,
+            CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w,
+            &dist_wtd_comp_params);
+        test_impl(CONVERT_TO_BYTEPTR(output2),
+                  CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w,
+                  in_h, CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c,
+                  in_w, &dist_wtd_comp_params);
+
+        for (int i = 0; i < in_h; ++i) {
+          for (int j = 0; j < in_w; ++j) {
+            int idx = i * in_w + j;
+            ASSERT_EQ(output[idx], output2[idx])
+                << "Mismatch at unit tests for AV1HighBDDISTWTDCOMPAVGTest\n"
+                << in_w << "x" << in_h << " Pixel mismatch at index " << idx
+                << " = (" << i << ", " << j << ")";
+          }
+        }
+      }
+    }
+  }
+  void RunSpeedTest(distwtdcompavg_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(2);
+    const int bd = GET_PARAM(0);
+    uint16_t pred8[kMaxSize * kMaxSize];
+    uint16_t ref8[kMaxSize * kMaxSize];
+    uint16_t output[kMaxSize * kMaxSize];
+    uint16_t output2[kMaxSize * kMaxSize];
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+        ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
+    dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
+
+    dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0];
+    dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1];
+
+    const int num_loops = 1000000000 / (in_w + in_h);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+
+    for (int i = 0; i < num_loops; ++i)
+      aom_highbd_dist_wtd_comp_avg_pred_c(
+          CONVERT_TO_BYTEPTR(output), CONVERT_TO_BYTEPTR(pred8), in_w, in_h,
+          CONVERT_TO_BYTEPTR(ref8), in_w, &dist_wtd_comp_params);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    printf("highbddistwtdcompavg c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
+           1000.0 * elapsed_time / num_loops);
+
+    aom_usec_timer timer1;
+    aom_usec_timer_start(&timer1);
+
+    for (int i = 0; i < num_loops; ++i)
+      test_impl(CONVERT_TO_BYTEPTR(output2), CONVERT_TO_BYTEPTR(pred8), in_w,
+                in_h, CONVERT_TO_BYTEPTR(ref8), in_w, &dist_wtd_comp_params);
+
+    aom_usec_timer_mark(&timer1);
+    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+    printf("highbddistwtdcompavg test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
+           1000.0 * elapsed_time1 / num_loops);
+  }
+
+  libaom_test::ACMRandom rnd_;
+};  // class AV1HighBDDISTWTDCOMPAVGTest
+
+class AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest
+    : public ::testing::TestWithParam<HighbdDISTWTDCOMPAVGUPSAMPLEDParam> {
+ public:
+  ~AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest() override = default;
+  void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+
+ protected:
+  void RunCheckOutput(highbddistwtdcompavgupsampled_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(2);
+    const int bd = GET_PARAM(0);
+    uint16_t pred8[kMaxSize * kMaxSize];
+    uint16_t ref8[kMaxSize * kMaxSize];
+    DECLARE_ALIGNED(16, uint16_t, output[kMaxSize * kMaxSize]);
+    DECLARE_ALIGNED(16, uint16_t, output2[kMaxSize * kMaxSize]);
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+        ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
+    dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
+    int sub_x_q3, sub_y_q3;
+    int subpel_search;
+    for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
+         ++subpel_search) {
+      for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
+        for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
+          for (int ii = 0; ii < 2; ii++) {
+            for (int jj = 0; jj < 4; jj++) {
+              dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii];
+              dist_wtd_comp_params.bck_offset =
+                  quant_dist_lookup_table[jj][1 - ii];
+
+              const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
+              const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
+
+              aom_highbd_dist_wtd_comp_avg_upsampled_pred_c(
+                  nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(output),
+                  CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w,
+                  in_h, sub_x_q3, sub_y_q3,
+                  CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w, bd,
+                  &dist_wtd_comp_params, subpel_search);
+              test_impl(nullptr, nullptr, 0, 0, nullptr,
+                        CONVERT_TO_BYTEPTR(output2),
+                        CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c,
+                        in_w, in_h, sub_x_q3, sub_y_q3,
+                        CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c,
+                        in_w, bd, &dist_wtd_comp_params, subpel_search);
+
+              for (int i = 0; i < in_h; ++i) {
+                for (int j = 0; j < in_w; ++j) {
+                  int idx = i * in_w + j;
+                  ASSERT_EQ(output[idx], output2[idx])
+                      << "Mismatch at unit tests for "
+                         "AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest\n"
+                      << in_w << "x" << in_h << " Pixel mismatch at index "
+                      << idx << " = (" << i << ", " << j
+                      << "), sub pixel offset = (" << sub_y_q3 << ", "
+                      << sub_x_q3 << ")";
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  void RunSpeedTest(highbddistwtdcompavgupsampled_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(2);
+    const int bd = GET_PARAM(0);
+    uint16_t pred8[kMaxSize * kMaxSize];
+    uint16_t ref8[kMaxSize * kMaxSize];
+    DECLARE_ALIGNED(16, uint16_t, output[kMaxSize * kMaxSize]);
+    DECLARE_ALIGNED(16, uint16_t, output2[kMaxSize * kMaxSize]);
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+        ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
+    dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
+
+    dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0];
+    dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1];
+    int sub_x_q3 = 0;
+    int sub_y_q3 = 0;
+    const int num_loops = 1000000000 / (in_w + in_h);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    int subpel_search = USE_8_TAPS;  // set to USE_4_TAPS to test 4-tap filter.
+    for (int i = 0; i < num_loops; ++i)
+      aom_highbd_dist_wtd_comp_avg_upsampled_pred_c(
+          nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(output),
+          CONVERT_TO_BYTEPTR(pred8), in_w, in_h, sub_x_q3, sub_y_q3,
+          CONVERT_TO_BYTEPTR(ref8), in_w, bd, &dist_wtd_comp_params,
+          subpel_search);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    printf("highbddistwtdcompavgupsampled c_code %3dx%-3d: %7.2f us\n", in_w,
+           in_h, 1000.0 * elapsed_time / num_loops);
+
+    aom_usec_timer timer1;
+    aom_usec_timer_start(&timer1);
+
+    for (int i = 0; i < num_loops; ++i)
+      test_impl(nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(output2),
+                CONVERT_TO_BYTEPTR(pred8), in_w, in_h, sub_x_q3, sub_y_q3,
+                CONVERT_TO_BYTEPTR(ref8), in_w, bd, &dist_wtd_comp_params,
+                subpel_search);
+
+    aom_usec_timer_mark(&timer1);
+    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+    printf("highbddistwtdcompavgupsampled test_code %3dx%-3d: %7.2f us\n", in_w,
+           in_h, 1000.0 * elapsed_time1 / num_loops);
+  }
+
+  libaom_test::ACMRandom rnd_;
+};      // class AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+}  // namespace AV1DISTWTDCOMPAVG
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_COMP_AVG_PRED_TEST_H_
diff --git a/third_party/aom/test/comp_mask_pred_test.cc b/third_party/aom/test/comp_mask_pred_test.cc
new file mode 100644
index 0000000000..b65730aa57
--- /dev/null
+++ b/third_party/aom/test/comp_mask_pred_test.cc
@@ -0,0 +1,856 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdlib>
+#include <new>
+#include <tuple>
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/variance.h"
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/aom_timer.h"
+#include "aom_ports/mem.h"
+#include "av1/common/reconinter.h"
+#include "av1/encoder/reconinter_enc.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+typedef void (*comp_mask_pred_func)(uint8_t *comp_pred, const uint8_t *pred,
+                                    int width, int height, const uint8_t *ref,
+                                    int ref_stride, const uint8_t *mask,
+                                    int mask_stride, int invert_mask);
+
+typedef void (*comp_avg_pred_func)(uint8_t *comp_pred, const uint8_t *pred,
+                                   int width, int height, const uint8_t *ref,
+                                   int ref_stride);
+
+#if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON
+const BLOCK_SIZE kCompMaskPredParams[] = {
+  BLOCK_8X8,   BLOCK_8X16, BLOCK_8X32,  BLOCK_16X8, BLOCK_16X16,
+  BLOCK_16X32, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32
+};
+#endif
+
+class AV1CompMaskPredBase : public ::testing::Test {
+ public:
+  ~AV1CompMaskPredBase() override;
+  void SetUp() override;
+
+  void TearDown() override;
+
+ protected:
+  bool CheckResult(int width, int height) {
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        const int idx = y * width + x;
+        if (comp_pred1_[idx] != comp_pred2_[idx]) {
+          printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
+          printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  libaom_test::ACMRandom rnd_;
+  uint8_t *comp_pred1_;
+  uint8_t *comp_pred2_;
+  uint8_t *pred_;
+  uint8_t *ref_buffer_;
+  uint8_t *ref_;
+};
+
+AV1CompMaskPredBase::~AV1CompMaskPredBase() = default;
+
+void AV1CompMaskPredBase::SetUp() {
+  rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
+  av1_init_wedge_masks();
+  comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
+  ASSERT_NE(comp_pred1_, nullptr);
+  comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
+  ASSERT_NE(comp_pred2_, nullptr);
+  pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
+  ASSERT_NE(pred_, nullptr);
+  // The biggest block size is MAX_SB_SQUARE(128*128), however for the
+  // convolution we need to access 3 bytes before and 4 bytes after (for an
+  // 8-tap filter), in both directions, so we need to allocate
+  // (128 + 7) * (128 + 7) = MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49
+  ref_buffer_ =
+      (uint8_t *)aom_memalign(16, MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49);
+  ASSERT_NE(ref_buffer_, nullptr);
+  // Start of the actual block where the convolution will be computed
+  ref_ = ref_buffer_ + (3 * MAX_SB_SIZE + 3);
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    pred_[i] = rnd_.Rand8();
+  }
+  for (int i = 0; i < MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49; ++i) {
+    ref_buffer_[i] = rnd_.Rand8();
+  }
+}
+
+void AV1CompMaskPredBase::TearDown() {
+  aom_free(comp_pred1_);
+  aom_free(comp_pred2_);
+  aom_free(pred_);
+  aom_free(ref_buffer_);
+}
+
+typedef std::tuple<comp_mask_pred_func, BLOCK_SIZE> CompMaskPredParam;
+
+class AV1CompMaskPredTest
+    : public AV1CompMaskPredBase,
+      public ::testing::WithParamInterface<CompMaskPredParam> {
+ protected:
+  void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
+  void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
+};
+
+void AV1CompMaskPredTest::RunCheckOutput(comp_mask_pred_func test_impl,
+                                         BLOCK_SIZE bsize, int inv) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int wedge_types = get_wedge_types_lookup(bsize);
+  for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+    const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
+
+    aom_comp_mask_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w,
+                         inv);
+    test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, inv);
+
+    ASSERT_EQ(CheckResult(w, h), true)
+        << " wedge " << wedge_index << " inv " << inv;
+  }
+}
+
+void AV1CompMaskPredTest::RunSpeedTest(comp_mask_pred_func test_impl,
+                                       BLOCK_SIZE bsize) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int wedge_types = get_wedge_types_lookup(bsize);
+  int wedge_index = wedge_types / 2;
+  const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
+  const int num_loops = 1000000000 / (w + h);
+
+  comp_mask_pred_func funcs[2] = { aom_comp_mask_pred_c, test_impl };
+  double elapsed_time[2] = { 0 };
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    comp_mask_pred_func func = funcs[i];
+    for (int j = 0; j < num_loops; ++j) {
+      func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, 0);
+    }
+    aom_usec_timer_mark(&timer);
+    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    elapsed_time[i] = 1000.0 * time / num_loops;
+  }
+  printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
+         elapsed_time[1]);
+  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
+}
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompMaskPredTest);
+
+TEST_P(AV1CompMaskPredTest, CheckOutput) {
+  // inv = 0, 1
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
+}
+
+TEST_P(AV1CompMaskPredTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
+}
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_SUITE_P(
+    SSSE3, AV1CompMaskPredTest,
+    ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
+                       ::testing::ValuesIn(kCompMaskPredParams)));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1CompMaskPredTest,
+    ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
+                       ::testing::ValuesIn(kCompMaskPredParams)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1CompMaskPredTest,
+    ::testing::Combine(::testing::Values(&aom_comp_mask_pred_neon),
+                       ::testing::ValuesIn(kCompMaskPredParams)));
+#endif
+
+#if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON
+const BLOCK_SIZE kValidBlockSize[] = {
+  BLOCK_4X4,     BLOCK_8X8,   BLOCK_8X16,  BLOCK_8X32,   BLOCK_16X8,
+  BLOCK_16X16,   BLOCK_16X32, BLOCK_32X8,  BLOCK_32X16,  BLOCK_32X32,
+  BLOCK_32X64,   BLOCK_64X32, BLOCK_64X64, BLOCK_64X128, BLOCK_128X64,
+  BLOCK_128X128, BLOCK_16X64, BLOCK_64X16
+};
+#endif
+
+typedef void (*upsampled_pred_func)(MACROBLOCKD *xd, const AV1_COMMON *const cm,
+                                    int mi_row, int mi_col, const MV *const mv,
+                                    uint8_t *comp_pred, int width, int height,
+                                    int subpel_x_q3, int subpel_y_q3,
+                                    const uint8_t *ref, int ref_stride,
+                                    int subpel_search);
+
+typedef std::tuple<upsampled_pred_func, BLOCK_SIZE> UpsampledPredParam;
+
+class AV1UpsampledPredTest
+    : public AV1CompMaskPredBase,
+      public ::testing::WithParamInterface<UpsampledPredParam> {
+ protected:
+  void RunCheckOutput(upsampled_pred_func test_impl, BLOCK_SIZE bsize);
+  void RunSpeedTest(upsampled_pred_func test_impl, BLOCK_SIZE bsize,
+                    int havSub);
+};
+
+void AV1UpsampledPredTest::RunCheckOutput(upsampled_pred_func test_impl,
+                                          BLOCK_SIZE bsize) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  for (int subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
+       ++subpel_search) {
+    // loop through subx and suby
+    for (int sub = 0; sub < 8 * 8; ++sub) {
+      int subx = sub & 0x7;
+      int suby = (sub >> 3);
+
+      aom_upsampled_pred_c(nullptr, nullptr, 0, 0, nullptr, comp_pred1_, w, h,
+                           subx, suby, ref_, MAX_SB_SIZE, subpel_search);
+
+      test_impl(nullptr, nullptr, 0, 0, nullptr, comp_pred2_, w, h, subx, suby,
+                ref_, MAX_SB_SIZE, subpel_search);
+      ASSERT_EQ(CheckResult(w, h), true)
+          << "sub (" << subx << "," << suby << ")";
+    }
+  }
+}
+
+void AV1UpsampledPredTest::RunSpeedTest(upsampled_pred_func test_impl,
+                                        BLOCK_SIZE bsize, int havSub) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int subx = havSub ? 3 : 0;
+  const int suby = havSub ? 4 : 0;
+
+  const int num_loops = 1000000000 / (w + h);
+  upsampled_pred_func funcs[2] = { aom_upsampled_pred_c, test_impl };
+  double elapsed_time[2] = { 0 };
+  int subpel_search = USE_8_TAPS;  // set to USE_4_TAPS to test 4-tap filter.
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    upsampled_pred_func func = funcs[i];
+    for (int j = 0; j < num_loops; ++j) {
+      func(nullptr, nullptr, 0, 0, nullptr, comp_pred1_, w, h, subx, suby, ref_,
+           MAX_SB_SIZE, subpel_search);
+    }
+    aom_usec_timer_mark(&timer);
+    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    elapsed_time[i] = 1000.0 * time / num_loops;
+  }
+  printf("UpsampledPred[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h,
+         elapsed_time[0], elapsed_time[1]);
+  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
+}
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1UpsampledPredTest);
+
+TEST_P(AV1UpsampledPredTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1));
+}
+
+TEST_P(AV1UpsampledPredTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, AV1UpsampledPredTest,
+    ::testing::Combine(::testing::Values(&aom_upsampled_pred_sse2),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1UpsampledPredTest,
+    ::testing::Combine(::testing::Values(&aom_upsampled_pred_neon),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+typedef std::tuple<comp_avg_pred_func, BLOCK_SIZE> CompAvgPredParam;
+
+class AV1CompAvgPredTest : public ::testing::TestWithParam<CompAvgPredParam> {
+ public:
+  ~AV1CompAvgPredTest() override;
+  void SetUp() override;
+
+  void TearDown() override;
+
+ protected:
+  void RunCheckOutput(comp_avg_pred_func test_impl, BLOCK_SIZE bsize);
+  void RunSpeedTest(comp_avg_pred_func test_impl, BLOCK_SIZE bsize);
+  bool CheckResult(int width, int height) {
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        const int idx = y * width + x;
+        if (comp_pred1_[idx] != comp_pred2_[idx]) {
+          printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, x, y);
+          printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  libaom_test::ACMRandom rnd_;
+  uint8_t *comp_pred1_;
+  uint8_t *comp_pred2_;
+  uint8_t *pred_;
+  uint8_t *ref_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompAvgPredTest);
+
+AV1CompAvgPredTest::~AV1CompAvgPredTest() = default;
+
+void AV1CompAvgPredTest::SetUp() {
+  rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
+
+  comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
+  ASSERT_NE(comp_pred1_, nullptr);
+  comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
+  ASSERT_NE(comp_pred2_, nullptr);
+  pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
+  ASSERT_NE(pred_, nullptr);
+  ref_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
+  ASSERT_NE(ref_, nullptr);
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    pred_[i] = rnd_.Rand8();
+  }
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    ref_[i] = rnd_.Rand8();
+  }
+}
+
+void AV1CompAvgPredTest::TearDown() {
+  aom_free(comp_pred1_);
+  aom_free(comp_pred2_);
+  aom_free(pred_);
+  aom_free(ref_);
+}
+
+void AV1CompAvgPredTest::RunCheckOutput(comp_avg_pred_func test_impl,
+                                        BLOCK_SIZE bsize) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  aom_comp_avg_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE);
+  test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE);
+
+  ASSERT_EQ(CheckResult(w, h), true);
+}
+
+void AV1CompAvgPredTest::RunSpeedTest(comp_avg_pred_func test_impl,
+                                      BLOCK_SIZE bsize) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int num_loops = 1000000000 / (w + h);
+
+  comp_avg_pred_func functions[2] = { aom_comp_avg_pred_c, test_impl };
+  double elapsed_time[2] = { 0.0 };
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    comp_avg_pred_func func = functions[i];
+    for (int j = 0; j < num_loops; ++j) {
+      func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    elapsed_time[i] = 1000.0 * time;
+  }
+  printf("CompAvgPred %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
+         elapsed_time[1]);
+  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
+}
+
+TEST_P(AV1CompAvgPredTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1));
+}
+
+TEST_P(AV1CompAvgPredTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
+}
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1CompAvgPredTest,
+    ::testing::Combine(::testing::Values(&aom_comp_avg_pred_avx2),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1CompAvgPredTest,
+    ::testing::Combine(::testing::Values(&aom_comp_avg_pred_neon),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+class AV1HighbdCompMaskPredTestBase : public ::testing::Test {
+ public:
+  ~AV1HighbdCompMaskPredTestBase() override;
+  void SetUp() override;
+
+  void TearDown() override;
+
+ protected:
+  bool CheckResult(int width, int height) {
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        const int idx = y * width + x;
+        if (comp_pred1_[idx] != comp_pred2_[idx]) {
+          printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
+          printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  libaom_test::ACMRandom rnd_;
+  uint16_t *comp_pred1_;
+  uint16_t *comp_pred2_;
+  uint16_t *pred_;
+  uint16_t *ref_buffer_;
+  uint16_t *ref_;
+};
+
+AV1HighbdCompMaskPredTestBase::~AV1HighbdCompMaskPredTestBase() = default;
+
+void AV1HighbdCompMaskPredTestBase::SetUp() {
+  rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
+  av1_init_wedge_masks();
+
+  comp_pred1_ =
+      (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_));
+  ASSERT_NE(comp_pred1_, nullptr);
+  comp_pred2_ =
+      (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_));
+  ASSERT_NE(comp_pred2_, nullptr);
+  pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_));
+  ASSERT_NE(pred_, nullptr);
+  // The biggest block size is MAX_SB_SQUARE(128*128), however for the
+  // convolution we need to access 3 elements before and 4 elements after (for
+  // an 8-tap filter), in both directions, so we need to allocate (128 + 7) *
+  // (128 + 7) = (MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49) *
+  // sizeof(*ref_buffer_)
+  ref_buffer_ = (uint16_t *)aom_memalign(
+      16, (MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49) * sizeof(*ref_buffer_));
+  ASSERT_NE(ref_buffer_, nullptr);
+  // Start of the actual block where the convolution will be computed
+  ref_ = ref_buffer_ + (3 * MAX_SB_SIZE + 3);
+}
+
+void AV1HighbdCompMaskPredTestBase::TearDown() {
+  aom_free(comp_pred1_);
+  aom_free(comp_pred2_);
+  aom_free(pred_);
+  aom_free(ref_buffer_);
+}
+
+typedef void (*highbd_comp_mask_pred_func)(uint8_t *comp_pred8,
+                                           const uint8_t *pred8, int width,
+                                           int height, const uint8_t *ref8,
+                                           int ref_stride, const uint8_t *mask,
+                                           int mask_stride, int invert_mask);
+
+typedef std::tuple<highbd_comp_mask_pred_func, BLOCK_SIZE, int>
+    HighbdCompMaskPredParam;
+
+class AV1HighbdCompMaskPredTest
+    : public AV1HighbdCompMaskPredTestBase,
+      public ::testing::WithParamInterface<HighbdCompMaskPredParam> {
+ public:
+  ~AV1HighbdCompMaskPredTest() override;
+
+ protected:
+  void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
+  void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
+};
+
+AV1HighbdCompMaskPredTest::~AV1HighbdCompMaskPredTest() = default;
+
+void AV1HighbdCompMaskPredTest::RunCheckOutput(
+    highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) {
+  int bd_ = GET_PARAM(2);
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int wedge_types = get_wedge_types_lookup(bsize);
+
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
+  }
+  for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
+    ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
+  }
+
+  for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+    const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
+
+    aom_highbd_comp_mask_pred_c(
+        CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
+        CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
+
+    test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
+              CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
+
+    ASSERT_EQ(CheckResult(w, h), true)
+        << " wedge " << wedge_index << " inv " << inv;
+  }
+}
+
+void AV1HighbdCompMaskPredTest::RunSpeedTest(
+    highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize) {
+  int bd_ = GET_PARAM(2);
+
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int wedge_types = get_wedge_types_lookup(bsize);
+  int wedge_index = wedge_types / 2;
+
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
+  }
+  for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
+    ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
+  }
+
+  const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
+  const int num_loops = 1000000000 / (w + h);
+
+  highbd_comp_mask_pred_func funcs[2] = { aom_highbd_comp_mask_pred_c,
+                                          test_impl };
+  double elapsed_time[2] = { 0 };
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    highbd_comp_mask_pred_func func = funcs[i];
+    for (int j = 0; j < num_loops; ++j) {
+      func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
+           CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, 0);
+    }
+    aom_usec_timer_mark(&timer);
+    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    elapsed_time[i] = 1000.0 * time / num_loops;
+  }
+  printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
+         elapsed_time[1]);
+  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
+}
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdCompMaskPredTest);
+
+TEST_P(AV1HighbdCompMaskPredTest, CheckOutput) {
+  // inv = 0, 1
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
+}
+
+TEST_P(AV1HighbdCompMaskPredTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
+}
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1HighbdCompMaskPredTest,
+    ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_neon),
+                       ::testing::ValuesIn(kCompMaskPredParams),
+                       ::testing::Range(8, 13, 2)));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1HighbdCompMaskPredTest,
+    ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2),
+                       ::testing::ValuesIn(kCompMaskPredParams),
+                       ::testing::Range(8, 13, 2)));
+#endif
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, AV1HighbdCompMaskPredTest,
+    ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2),
+                       ::testing::ValuesIn(kCompMaskPredParams),
+                       ::testing::Range(8, 13, 2)));
+#endif
+
+typedef void (*highbd_upsampled_pred_func)(
+    MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
+    const MV *const mv, uint8_t *comp_pred8, int width, int height,
+    int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride,
+    int bd, int subpel_search);
+
+typedef std::tuple<highbd_upsampled_pred_func, BLOCK_SIZE, int>
+    HighbdUpsampledPredParam;
+
+class AV1HighbdUpsampledPredTest
+    : public AV1HighbdCompMaskPredTestBase,
+      public ::testing::WithParamInterface<HighbdUpsampledPredParam> {
+ public:
+  ~AV1HighbdUpsampledPredTest() override;
+
+ protected:
+  void RunCheckOutput(highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize);
+  void RunSpeedTest(highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize,
+                    int havSub);
+};
+
+AV1HighbdUpsampledPredTest::~AV1HighbdUpsampledPredTest() = default;
+
+void AV1HighbdUpsampledPredTest::RunCheckOutput(
+    highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize) {
+  int bd_ = GET_PARAM(2);
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
+  }
+  for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
+    ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
+  }
+
+  for (int subpel_search = 1; subpel_search <= 2; ++subpel_search) {
+    // loop through subx and suby
+    for (int sub = 0; sub < 8 * 8; ++sub) {
+      int subx = sub & 0x7;
+      int suby = (sub >> 3);
+
+      aom_highbd_upsampled_pred_c(nullptr, nullptr, 0, 0, nullptr,
+                                  CONVERT_TO_BYTEPTR(comp_pred1_), w, h, subx,
+                                  suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE,
+                                  bd_, subpel_search);
+
+      test_impl(nullptr, nullptr, 0, 0, nullptr,
+                CONVERT_TO_BYTEPTR(comp_pred2_), w, h, subx, suby,
+                CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, subpel_search);
+
+      ASSERT_EQ(CheckResult(w, h), true)
+          << "sub (" << subx << "," << suby << ")";
+    }
+  }
+}
+
+void AV1HighbdUpsampledPredTest::RunSpeedTest(
+    highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize, int havSub) {
+  int bd_ = GET_PARAM(2);
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int subx = havSub ? 3 : 0;
+  const int suby = havSub ? 4 : 0;
+
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
+  }
+  for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
+    ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
+  }
+
+  const int num_loops = 1000000000 / (w + h);
+  highbd_upsampled_pred_func funcs[2] = { &aom_highbd_upsampled_pred_c,
+                                          test_impl };
+  double elapsed_time[2] = { 0 };
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    highbd_upsampled_pred_func func = funcs[i];
+    int subpel_search = 2;  // set to 1 to test 4-tap filter.
+    for (int j = 0; j < num_loops; ++j) {
+      func(nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(comp_pred1_), w,
+           h, subx, suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_,
+           subpel_search);
+    }
+    aom_usec_timer_mark(&timer);
+    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    elapsed_time[i] = 1000.0 * time / num_loops;
+  }
+  printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
+         elapsed_time[1]);
+  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
+}
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdUpsampledPredTest);
+
+TEST_P(AV1HighbdUpsampledPredTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1));
+}
+
+TEST_P(AV1HighbdUpsampledPredTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, AV1HighbdUpsampledPredTest,
+    ::testing::Combine(::testing::Values(&aom_highbd_upsampled_pred_sse2),
+                       ::testing::ValuesIn(kValidBlockSize),
+                       ::testing::Range(8, 13, 2)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1HighbdUpsampledPredTest,
+    ::testing::Combine(::testing::Values(&aom_highbd_upsampled_pred_neon),
+                       ::testing::ValuesIn(kValidBlockSize),
+                       ::testing::Range(8, 13, 2)));
+#endif
+
+typedef void (*highbd_comp_avg_pred_func)(uint8_t *comp_pred,
+                                          const uint8_t *pred, int width,
+                                          int height, const uint8_t *ref,
+                                          int ref_stride);
+
+typedef std::tuple<highbd_comp_avg_pred_func, BLOCK_SIZE, int>
+    HighbdCompAvgPredParam;
+
+class AV1HighbdCompAvgPredTest
+    : public ::testing::TestWithParam<HighbdCompAvgPredParam> {
+ public:
+  ~AV1HighbdCompAvgPredTest() override;
+  void SetUp() override;
+
+ protected:
+  void RunCheckOutput(highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize);
+  void RunSpeedTest(highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize);
+  bool CheckResult(int width, int height) const {
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        const int idx = y * width + x;
+        if (comp_pred1_[idx] != comp_pred2_[idx]) {
+          printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, x, y);
+          printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  libaom_test::ACMRandom rnd_;
+  uint16_t *comp_pred1_;
+  uint16_t *comp_pred2_;
+  uint16_t *pred_;
+  uint16_t *ref_;
+};
+
+AV1HighbdCompAvgPredTest::~AV1HighbdCompAvgPredTest() {
+  aom_free(comp_pred1_);
+  aom_free(comp_pred2_);
+  aom_free(pred_);
+  aom_free(ref_);
+}
+
+void AV1HighbdCompAvgPredTest::SetUp() {
+  int bd_ = GET_PARAM(2);
+  rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
+
+  comp_pred1_ =
+      (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_));
+  ASSERT_NE(comp_pred1_, nullptr);
+  comp_pred2_ =
+      (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_));
+  ASSERT_NE(comp_pred2_, nullptr);
+  pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_));
+  ASSERT_NE(pred_, nullptr);
+  ref_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*ref_));
+  ASSERT_NE(ref_, nullptr);
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
+  }
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    ref_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
+  }
+}
+
+void AV1HighbdCompAvgPredTest::RunCheckOutput(
+    highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(comp_pred1_),
+                             CONVERT_TO_BYTEPTR(pred_), w, h,
+                             CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE);
+  test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
+            CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE);
+
+  ASSERT_EQ(CheckResult(w, h), true);
+}
+
+void AV1HighbdCompAvgPredTest::RunSpeedTest(highbd_comp_avg_pred_func test_impl,
+                                            BLOCK_SIZE bsize) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int num_loops = 1000000000 / (w + h);
+
+  highbd_comp_avg_pred_func functions[2] = { aom_highbd_comp_avg_pred_c,
+                                             test_impl };
+  double elapsed_time[2] = { 0.0 };
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    highbd_comp_avg_pred_func func = functions[i];
+    for (int j = 0; j < num_loops; ++j) {
+      func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
+           CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    elapsed_time[i] = 1000.0 * time;
+  }
+  printf("HighbdCompAvg %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
+         elapsed_time[1]);
+  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
+}
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdCompAvgPredTest);
+
+TEST_P(AV1HighbdCompAvgPredTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1));
+}
+
+TEST_P(AV1HighbdCompAvgPredTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
+}
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1HighbdCompAvgPredTest,
+    ::testing::Combine(::testing::Values(&aom_highbd_comp_avg_pred_neon),
+                       ::testing::ValuesIn(kValidBlockSize),
+                       ::testing::Range(8, 13, 2)));
+#endif
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/convolve_test.cc b/third_party/aom/test/convolve_test.cc
new file mode 100644
index 0000000000..c97f814057
--- /dev/null
+++ b/third_party/aom/test/convolve_test.cc
@@ -0,0 +1,922 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <string.h>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/aom_timer.h"
+#include "aom_ports/mem.h"
+#include "av1/common/filter.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+
+static const unsigned int kMaxDimension = MAX_SB_SIZE;
+
+static const int16_t kInvalidFilter[8] = {};
+static const int kNumFilterBanks = SWITCHABLE_FILTERS;
+static const int kNumFilters = 16;
+
+typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
+                             uint8_t *dst, ptrdiff_t dst_stride,
+                             const int16_t *filter_x, int filter_x_stride,
+                             const int16_t *filter_y, int filter_y_stride,
+                             int w, int h);
+
+struct ConvolveFunctions {
+  ConvolveFunctions(ConvolveFunc h8, ConvolveFunc v8, int bd)
+      : h8_(h8), v8_(v8), use_highbd_(bd) {}
+
+  ConvolveFunc h8_;
+  ConvolveFunc v8_;
+  int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
+};
+
+typedef std::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
+
+#define ALL_SIZES_64(convolve_fn)                                         \
+  make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn),         \
+      make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn),     \
+      make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn),   \
+      make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
+      make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
+      make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
+      make_tuple(64, 64, &convolve_fn)
+
+#define ALL_SIZES(convolve_fn)                                          \
+  make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \
+      make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn)
+
+// Reference 8-tap subpixel filter, slightly modified to fit into this test.
+#define AV1_FILTER_WEIGHT 128
+#define AV1_FILTER_SHIFT 7
+uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
+
+void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride,
+                        const int16_t *HFilter, const int16_t *VFilter,
+                        uint8_t *dst_ptr, unsigned int dst_stride,
+                        unsigned int output_width, unsigned int output_height) {
+  // Between passes, we use an intermediate buffer whose height is extended to
+  // have enough horizontally filtered values as input for the vertical pass.
+  // This buffer is allocated to be big enough for the largest block type we
+  // support.
+  const int kInterp_Extend = 4;
+  const unsigned int intermediate_height =
+      (kInterp_Extend - 1) + output_height + kInterp_Extend;
+  unsigned int i, j;
+
+  assert(intermediate_height > 7);
+
+  // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
+  // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
+  //                                 + kInterp_Extend
+  //                               = 3 + 16 + 4
+  //                               = 23
+  // and filter_max_width          = 16
+  //
+  uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension];
+  const int intermediate_next_stride =
+      1 - static_cast<int>(intermediate_height * output_width);
+
+  // Horizontal pass (src -> transposed intermediate).
+  uint8_t *output_ptr = intermediate_buffer;
+  const int src_next_row_stride = src_stride - output_width;
+  src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
+  for (i = 0; i < intermediate_height; ++i) {
+    for (j = 0; j < output_width; ++j) {
+      // Apply filter...
+      const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
+                       (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
+                       (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
+                       (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
+                       (AV1_FILTER_WEIGHT >> 1);  // Rounding
+
+      // Normalize back to 0-255...
+      *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT);
+      ++src_ptr;
+      output_ptr += intermediate_height;
+    }
+    src_ptr += src_next_row_stride;
+    output_ptr += intermediate_next_stride;
+  }
+
+  // Vertical pass (transposed intermediate -> dst).
+  src_ptr = intermediate_buffer;
+  const int dst_next_row_stride = dst_stride - output_width;
+  for (i = 0; i < output_height; ++i) {
+    for (j = 0; j < output_width; ++j) {
+      // Apply filter...
+      const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
+                       (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
+                       (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
+                       (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
+                       (AV1_FILTER_WEIGHT >> 1);  // Rounding
+
+      // Normalize back to 0-255...
+      *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT);
+      src_ptr += intermediate_height;
+    }
+    src_ptr += intermediate_next_stride;
+    dst_ptr += dst_next_row_stride;
+  }
+}
+
+void block2d_average_c(uint8_t *src, unsigned int src_stride,
+                       uint8_t *output_ptr, unsigned int output_stride,
+                       unsigned int output_width, unsigned int output_height) {
+  unsigned int i, j;
+  for (i = 0; i < output_height; ++i) {
+    for (j = 0; j < output_width; ++j) {
+      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
+    }
+    output_ptr += output_stride;
+  }
+}
+
+void filter_average_block2d_8_c(const uint8_t *src_ptr,
+                                const unsigned int src_stride,
+                                const int16_t *HFilter, const int16_t *VFilter,
+                                uint8_t *dst_ptr, unsigned int dst_stride,
+                                unsigned int output_width,
+                                unsigned int output_height) {
+  uint8_t tmp[kMaxDimension * kMaxDimension];
+
+  assert(output_width <= kMaxDimension);
+  assert(output_height <= kMaxDimension);
+  filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension,
+                     output_width, output_height);
+  block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width,
+                    output_height);
+}
+
+void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
+                               const unsigned int src_stride,
+                               const int16_t *HFilter, const int16_t *VFilter,
+                               uint16_t *dst_ptr, unsigned int dst_stride,
+                               unsigned int output_width,
+                               unsigned int output_height, int bd) {
+  // Between passes, we use an intermediate buffer whose height is extended to
+  // have enough horizontally filtered values as input for the vertical pass.
+  // This buffer is allocated to be big enough for the largest block type we
+  // support.
+  const int kInterp_Extend = 4;
+  const unsigned int intermediate_height =
+      (kInterp_Extend - 1) + output_height + kInterp_Extend;
+
+  /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
+   * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
+   *                                 + kInterp_Extend
+   *                               = 3 + 16 + 4
+   *                               = 23
+   * and filter_max_width = 16
+   */
+  uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension] = { 0 };
+  const int intermediate_next_stride =
+      1 - static_cast<int>(intermediate_height * output_width);
+
+  // Horizontal pass (src -> transposed intermediate).
+  {
+    uint16_t *output_ptr = intermediate_buffer;
+    const int src_next_row_stride = src_stride - output_width;
+    unsigned int i, j;
+    src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
+    for (i = 0; i < intermediate_height; ++i) {
+      for (j = 0; j < output_width; ++j) {
+        // Apply filter...
+        const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
+                         (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
+                         (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
+                         (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
+                         (AV1_FILTER_WEIGHT >> 1);  // Rounding
+
+        // Normalize back to 0-255...
+        *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
+        ++src_ptr;
+        output_ptr += intermediate_height;
+      }
+      src_ptr += src_next_row_stride;
+      output_ptr += intermediate_next_stride;
+    }
+  }
+
+  // Vertical pass (transposed intermediate -> dst).
+  {
+    const uint16_t *interm_ptr = intermediate_buffer;
+    const int dst_next_row_stride = dst_stride - output_width;
+    unsigned int i, j;
+    for (i = 0; i < output_height; ++i) {
+      for (j = 0; j < output_width; ++j) {
+        // Apply filter...
+        const int temp =
+            (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) +
+            (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) +
+            (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) +
+            (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) +
+            (AV1_FILTER_WEIGHT >> 1);  // Rounding
+
+        // Normalize back to 0-255...
+        *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
+        interm_ptr += intermediate_height;
+      }
+      interm_ptr += intermediate_next_stride;
+      dst_ptr += dst_next_row_stride;
+    }
+  }
+}
+
+void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
+                              uint16_t *output_ptr, unsigned int output_stride,
+                              unsigned int output_width,
+                              unsigned int output_height) {
+  unsigned int i, j;
+  for (i = 0; i < output_height; ++i) {
+    for (j = 0; j < output_width; ++j) {
+      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
+    }
+    output_ptr += output_stride;
+  }
+}
+
+void highbd_filter_average_block2d_8_c(
+    const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
+    const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride,
+    unsigned int output_width, unsigned int output_height, int bd) {
+  uint16_t tmp[kMaxDimension * kMaxDimension];
+
+  assert(output_width <= kMaxDimension);
+  assert(output_height <= kMaxDimension);
+  highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp,
+                            kMaxDimension, output_width, output_height, bd);
+  highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
+                           output_width, output_height);
+}
+
+class ConvolveTestBase : public ::testing::TestWithParam<ConvolveParam> {
+ public:
+  static void SetUpTestSuite() {
+    // Force input_ to be unaligned, output to be 16 byte aligned.
+    input_ = reinterpret_cast<uint8_t *>(
+                 aom_memalign(kDataAlignment, kInputBufferSize + 1)) +
+             1;
+    ASSERT_NE(input_, nullptr);
+    ref8_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(kDataAlignment, kOutputStride * kMaxDimension));
+    ASSERT_NE(ref8_, nullptr);
+    output_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(kDataAlignment, kOutputBufferSize));
+    ASSERT_NE(output_, nullptr);
+    output_ref_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(kDataAlignment, kOutputBufferSize));
+    ASSERT_NE(output_ref_, nullptr);
+    input16_ = reinterpret_cast<uint16_t *>(aom_memalign(
+                   kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
+               1;
+    ASSERT_NE(input16_, nullptr);
+    ref16_ = reinterpret_cast<uint16_t *>(aom_memalign(
+        kDataAlignment, kOutputStride * kMaxDimension * sizeof(uint16_t)));
+    ASSERT_NE(ref16_, nullptr);
+    output16_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
+    ASSERT_NE(output16_, nullptr);
+    output16_ref_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
+    ASSERT_NE(output16_ref_, nullptr);
+  }
+
+  static void TearDownTestSuite() {
+    aom_free(input_ - 1);
+    input_ = nullptr;
+    aom_free(ref8_);
+    ref8_ = nullptr;
+    aom_free(output_);
+    output_ = nullptr;
+    aom_free(output_ref_);
+    output_ref_ = nullptr;
+    aom_free(input16_ - 1);
+    input16_ = nullptr;
+    aom_free(ref16_);
+    ref16_ = nullptr;
+    aom_free(output16_);
+    output16_ = nullptr;
+    aom_free(output16_ref_);
+    output16_ref_ = nullptr;
+  }
+
+ protected:
+  static const int kDataAlignment = 16;
+  static const int kOuterBlockSize = 4 * kMaxDimension;
+  static const int kInputStride = kOuterBlockSize;
+  static const int kOutputStride = kOuterBlockSize;
+  static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
+  static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
+
+  int Width() const { return GET_PARAM(0); }
+  int Height() const { return GET_PARAM(1); }
+  int BorderLeft() const {
+    const int center = (kOuterBlockSize - Width()) / 2;
+    return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
+  }
+  int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
+
+  bool IsIndexInBorder(int i) {
+    return (i < BorderTop() * kOuterBlockSize ||
+            i >= (BorderTop() + Height()) * kOuterBlockSize ||
+            i % kOuterBlockSize < BorderLeft() ||
+            i % kOuterBlockSize >= (BorderLeft() + Width()));
+  }
+
+  void SetUp() override {
+    UUT_ = GET_PARAM(2);
+    if (UUT_->use_highbd_ != 0)
+      mask_ = (1 << UUT_->use_highbd_) - 1;
+    else
+      mask_ = 255;
+    /* Set up guard blocks for an inner block centered in the outer block */
+    for (int i = 0; i < kOutputBufferSize; ++i) {
+      if (IsIndexInBorder(i)) {
+        output_[i] = 255;
+        output16_[i] = mask_;
+      } else {
+        output_[i] = 0;
+        output16_[i] = 0;
+      }
+    }
+
+    ::libaom_test::ACMRandom prng;
+    for (int i = 0; i < kInputBufferSize; ++i) {
+      if (i & 1) {
+        input_[i] = 255;
+        input16_[i] = mask_;
+      } else {
+        input_[i] = prng.Rand8Extremes();
+        input16_[i] = prng.Rand16() & mask_;
+      }
+    }
+  }
+
+  void SetConstantInput(int value) {
+    memset(input_, value, kInputBufferSize);
+    aom_memset16(input16_, value, kInputBufferSize);
+  }
+
+  void CopyOutputToRef() {
+    memcpy(output_ref_, output_, kOutputBufferSize);
+    // Copy 16-bit pixels values. The effective number of bytes is double.
+    memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize);
+  }
+
+  void CheckGuardBlocks() {
+    for (int i = 0; i < kOutputBufferSize; ++i) {
+      if (IsIndexInBorder(i)) {
+        EXPECT_EQ(255, output_[i]);
+      }
+    }
+  }
+
+  uint8_t *input() const {
+    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
+    if (UUT_->use_highbd_ == 0) {
+      return input_ + offset;
+    } else {
+      return CONVERT_TO_BYTEPTR(input16_) + offset;
+    }
+  }
+
+  uint8_t *output() const {
+    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
+    if (UUT_->use_highbd_ == 0) {
+      return output_ + offset;
+    } else {
+      return CONVERT_TO_BYTEPTR(output16_) + offset;
+    }
+  }
+
+  uint8_t *output_ref() const {
+    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
+    if (UUT_->use_highbd_ == 0) {
+      return output_ref_ + offset;
+    } else {
+      return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
+    }
+  }
+
+  uint16_t lookup(uint8_t *list, int index) const {
+    if (UUT_->use_highbd_ == 0) {
+      return list[index];
+    } else {
+      return CONVERT_TO_SHORTPTR(list)[index];
+    }
+  }
+
+  void assign_val(uint8_t *list, int index, uint16_t val) const {
+    if (UUT_->use_highbd_ == 0) {
+      list[index] = (uint8_t)val;
+    } else {
+      CONVERT_TO_SHORTPTR(list)[index] = val;
+    }
+  }
+
+  void wrapper_filter_average_block2d_8_c(
+      const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
+      const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
+      unsigned int output_width, unsigned int output_height) {
+    if (UUT_->use_highbd_ == 0) {
+      filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
+                                 dst_stride, output_width, output_height);
+    } else {
+      highbd_filter_average_block2d_8_c(
+          CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter,
+          CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
+          UUT_->use_highbd_);
+    }
+  }
+
+  void wrapper_filter_block2d_8_c(
+      const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
+      const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
+      unsigned int output_width, unsigned int output_height) {
+    if (UUT_->use_highbd_ == 0) {
+      filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
+                         dst_stride, output_width, output_height);
+    } else {
+      highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
+                                HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr),
+                                dst_stride, output_width, output_height,
+                                UUT_->use_highbd_);
+    }
+  }
+
+  void MatchesReferenceSubpixelFilter() {
+    uint8_t *const in = input();
+    uint8_t *const out = output();
+    uint8_t *ref;
+    if (UUT_->use_highbd_ == 0) {
+      ref = ref8_;
+    } else {
+      ref = CONVERT_TO_BYTEPTR(ref16_);
+    }
+    int subpel_search;
+    for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
+         ++subpel_search) {
+      for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+        const InterpFilter filter = (InterpFilter)filter_bank;
+        const InterpKernel *filters =
+            (const InterpKernel *)av1_get_interp_filter_kernel(filter,
+                                                               subpel_search);
+        for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
+          for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
+            wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
+                                       filters[filter_y], ref, kOutputStride,
+                                       Width(), Height());
+
+            if (filter_x && filter_y)
+              continue;
+            else if (filter_y)
+              UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
+                        16, filters[filter_y], 16, Width(), Height());
+            else if (filter_x)
+              API_REGISTER_STATE_CHECK(UUT_->h8_(
+                  in, kInputStride, out, kOutputStride, filters[filter_x], 16,
+                  kInvalidFilter, 16, Width(), Height()));
+            else
+              continue;
+
+            CheckGuardBlocks();
+
+            for (int y = 0; y < Height(); ++y)
+              for (int x = 0; x < Width(); ++x)
+                ASSERT_EQ(lookup(ref, y * kOutputStride + x),
+                          lookup(out, y * kOutputStride + x))
+                    << "mismatch at (" << x << "," << y << "), "
+                    << "filters (" << filter_bank << "," << filter_x << ","
+                    << filter_y << ")";
+          }
+        }
+      }
+    }
+  }
+
+  void FilterExtremes() {
+    uint8_t *const in = input();
+    uint8_t *const out = output();
+    uint8_t *ref;
+    if (UUT_->use_highbd_ == 0) {
+      ref = ref8_;
+    } else {
+      ref = CONVERT_TO_BYTEPTR(ref16_);
+    }
+
+    // Populate ref and out with some random data
+    ::libaom_test::ACMRandom prng;
+    for (int y = 0; y < Height(); ++y) {
+      for (int x = 0; x < Width(); ++x) {
+        uint16_t r;
+        if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
+          r = prng.Rand8Extremes();
+        } else {
+          r = prng.Rand16() & mask_;
+        }
+        assign_val(out, y * kOutputStride + x, r);
+        assign_val(ref, y * kOutputStride + x, r);
+      }
+    }
+
+    for (int axis = 0; axis < 2; axis++) {
+      int seed_val = 0;
+      while (seed_val < 256) {
+        for (int y = 0; y < 8; ++y) {
+          for (int x = 0; x < 8; ++x) {
+            assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
+                       ((seed_val >> (axis ? y : x)) & 1) * mask_);
+            if (axis) seed_val++;
+          }
+          if (axis)
+            seed_val -= 8;
+          else
+            seed_val++;
+        }
+        if (axis) seed_val += 8;
+        int subpel_search;
+        for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
+             ++subpel_search) {
+          for (int filter_bank = 0; filter_bank < kNumFilterBanks;
+               ++filter_bank) {
+            const InterpFilter filter = (InterpFilter)filter_bank;
+            const InterpKernel *filters =
+                (const InterpKernel *)av1_get_interp_filter_kernel(
+                    filter, subpel_search);
+            for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
+              for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
+                wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
+                                           filters[filter_y], ref,
+                                           kOutputStride, Width(), Height());
+                if (filter_x && filter_y)
+                  continue;
+                else if (filter_y)
+                  API_REGISTER_STATE_CHECK(UUT_->v8_(
+                      in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
+                      filters[filter_y], 16, Width(), Height()));
+                else if (filter_x)
+                  API_REGISTER_STATE_CHECK(UUT_->h8_(
+                      in, kInputStride, out, kOutputStride, filters[filter_x],
+                      16, kInvalidFilter, 16, Width(), Height()));
+                else
+                  continue;
+
+                for (int y = 0; y < Height(); ++y)
+                  for (int x = 0; x < Width(); ++x)
+                    ASSERT_EQ(lookup(ref, y * kOutputStride + x),
+                              lookup(out, y * kOutputStride + x))
+                        << "mismatch at (" << x << "," << y << "), "
+                        << "filters (" << filter_bank << "," << filter_x << ","
+                        << filter_y << ")";
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  void SpeedTest() {
+    uint8_t *const in = input();
+    uint8_t *const out = output();
+    uint8_t *ref;
+    if (UUT_->use_highbd_ == 0) {
+      ref = ref8_;
+    } else {
+      ref = CONVERT_TO_BYTEPTR(ref16_);
+    }
+
+    // Populate ref and out with some random data
+    ::libaom_test::ACMRandom prng;
+    for (int y = 0; y < Height(); ++y) {
+      for (int x = 0; x < Width(); ++x) {
+        uint16_t r;
+        if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
+          r = prng.Rand8Extremes();
+        } else {
+          r = prng.Rand16() & mask_;
+        }
+        assign_val(out, y * kOutputStride + x, r);
+        assign_val(ref, y * kOutputStride + x, r);
+      }
+    }
+
+    InterpFilter filter = (InterpFilter)1;
+    const InterpKernel *filters =
+        (const InterpKernel *)av1_get_interp_filter_kernel(filter, USE_8_TAPS);
+    wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1],
+                                       out, kOutputStride, Width(), Height());
+
+    aom_usec_timer timer;
+    int tests_num = 1000;
+
+    aom_usec_timer_start(&timer);
+    while (tests_num > 0) {
+      for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+        filter = (InterpFilter)filter_bank;
+        filters = (const InterpKernel *)av1_get_interp_filter_kernel(
+            filter, USE_8_TAPS);
+        for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
+          for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
+            if (filter_x && filter_y) continue;
+            if (filter_y)
+              API_REGISTER_STATE_CHECK(UUT_->v8_(
+                  in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
+                  filters[filter_y], 16, Width(), Height()));
+            else if (filter_x)
+              API_REGISTER_STATE_CHECK(UUT_->h8_(
+                  in, kInputStride, out, kOutputStride, filters[filter_x], 16,
+                  kInvalidFilter, 16, Width(), Height()));
+          }
+        }
+      }
+      tests_num--;
+    }
+    aom_usec_timer_mark(&timer);
+
+    const int elapsed_time =
+        static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
+    printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(),
+           UUT_->use_highbd_, elapsed_time);
+  }
+
+  const ConvolveFunctions *UUT_;
+  static uint8_t *input_;
+  static uint8_t *ref8_;
+  static uint8_t *output_;
+  static uint8_t *output_ref_;
+  static uint16_t *input16_;
+  static uint16_t *ref16_;
+  static uint16_t *output16_;
+  static uint16_t *output16_ref_;
+  int mask_;
+};
+
+uint8_t *ConvolveTestBase::input_ = nullptr;
+uint8_t *ConvolveTestBase::ref8_ = nullptr;
+uint8_t *ConvolveTestBase::output_ = nullptr;
+uint8_t *ConvolveTestBase::output_ref_ = nullptr;
+uint16_t *ConvolveTestBase::input16_ = nullptr;
+uint16_t *ConvolveTestBase::ref16_ = nullptr;
+uint16_t *ConvolveTestBase::output16_ = nullptr;
+uint16_t *ConvolveTestBase::output16_ref_ = nullptr;
+
+using LowbdConvolveTest = ConvolveTestBase;
+
+TEST_P(LowbdConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
+
+void FiltersWontSaturateWhenAddedPairwise() {
+  int subpel_search;
+  for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
+       ++subpel_search) {
+    for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+      const InterpFilter filter = (InterpFilter)filter_bank;
+      const InterpKernel *filters =
+          (const InterpKernel *)av1_get_interp_filter_kernel(filter,
+                                                             subpel_search);
+      for (int i = 0; i < kNumFilters; i++) {
+        const int p0 = filters[i][0] + filters[i][1];
+        const int p1 = filters[i][2] + filters[i][3];
+        const int p2 = filters[i][4] + filters[i][5];
+        const int p3 = filters[i][6] + filters[i][7];
+        EXPECT_LE(p0, 128);
+        EXPECT_LE(p1, 128);
+        EXPECT_LE(p2, 128);
+        EXPECT_LE(p3, 128);
+        EXPECT_LE(p0 + p3, 128);
+        EXPECT_LE(p0 + p3 + p1, 128);
+        EXPECT_LE(p0 + p3 + p1 + p2, 128);
+        EXPECT_EQ(p0 + p1 + p2 + p3, 128);
+      }
+    }
+  }
+}
+
+TEST(LowbdConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
+  FiltersWontSaturateWhenAddedPairwise();
+}
+
+TEST_P(LowbdConvolveTest, MatchesReferenceSubpixelFilter) {
+  MatchesReferenceSubpixelFilter();
+}
+
+TEST_P(LowbdConvolveTest, FilterExtremes) { FilterExtremes(); }
+
+TEST_P(LowbdConvolveTest, DISABLED_Speed) { SpeedTest(); }
+
+using std::make_tuple;
+
+// WRAP macro is only used for high bitdepth build.
+#if CONFIG_AV1_HIGHBITDEPTH
+#define WRAP(func, bd)                                                       \
+  static void wrap_##func##_##bd(                                            \
+      const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
+      ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride,    \
+      const int16_t *filter_y, int filter_y_stride, int w, int h) {          \
+    aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x,            \
+                      filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
+  }
+#if HAVE_SSE2 && AOM_ARCH_X86_64
+WRAP(convolve8_horiz_sse2, 8)
+WRAP(convolve8_vert_sse2, 8)
+WRAP(convolve8_horiz_sse2, 10)
+WRAP(convolve8_vert_sse2, 10)
+WRAP(convolve8_horiz_sse2, 12)
+WRAP(convolve8_vert_sse2, 12)
+#endif  // HAVE_SSE2 && AOM_ARCH_X86_64
+
+WRAP(convolve8_horiz_c, 8)
+WRAP(convolve8_vert_c, 8)
+WRAP(convolve8_horiz_c, 10)
+WRAP(convolve8_vert_c, 10)
+WRAP(convolve8_horiz_c, 12)
+WRAP(convolve8_vert_c, 12)
+
+#if HAVE_AVX2
+WRAP(convolve8_horiz_avx2, 8)
+WRAP(convolve8_vert_avx2, 8)
+
+WRAP(convolve8_horiz_avx2, 10)
+WRAP(convolve8_vert_avx2, 10)
+
+WRAP(convolve8_horiz_avx2, 12)
+WRAP(convolve8_vert_avx2, 12)
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+WRAP(convolve8_horiz_neon, 8)
+WRAP(convolve8_vert_neon, 8)
+
+WRAP(convolve8_horiz_neon, 10)
+WRAP(convolve8_vert_neon, 10)
+
+WRAP(convolve8_horiz_neon, 12)
+WRAP(convolve8_vert_neon, 12)
+#endif  // HAVE_NEON
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+#undef WRAP
+
+#if CONFIG_AV1_HIGHBITDEPTH
+
+using HighbdConvolveTest = ConvolveTestBase;
+
+TEST_P(HighbdConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
+
+TEST(HighbdConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
+  FiltersWontSaturateWhenAddedPairwise();
+}
+
+TEST_P(HighbdConvolveTest, MatchesReferenceSubpixelFilter) {
+  MatchesReferenceSubpixelFilter();
+}
+
+TEST_P(HighbdConvolveTest, FilterExtremes) { FilterExtremes(); }
+
+TEST_P(HighbdConvolveTest, DISABLED_Speed) { SpeedTest(); }
+
+const ConvolveFunctions wrap_convolve8_c(wrap_convolve8_horiz_c_8,
+                                         wrap_convolve8_vert_c_8, 8);
+const ConvolveFunctions wrap_convolve10_c(wrap_convolve8_horiz_c_10,
+                                          wrap_convolve8_vert_c_10, 10);
+const ConvolveFunctions wrap_convolve12_c(wrap_convolve8_horiz_c_12,
+                                          wrap_convolve8_vert_c_12, 12);
+const ConvolveParam kArrayHighbdConvolve_c[] = { ALL_SIZES(wrap_convolve8_c),
+                                                 ALL_SIZES(wrap_convolve10_c),
+                                                 ALL_SIZES(wrap_convolve12_c) };
+
+INSTANTIATE_TEST_SUITE_P(C, HighbdConvolveTest,
+                         ::testing::ValuesIn(kArrayHighbdConvolve_c));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+const ConvolveFunctions convolve8_c(aom_convolve8_horiz_c, aom_convolve8_vert_c,
+                                    0);
+const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
+
+INSTANTIATE_TEST_SUITE_P(C, LowbdConvolveTest,
+                         ::testing::ValuesIn(kArrayConvolve_c));
+
+#if HAVE_SSE2 && AOM_ARCH_X86_64
+#if CONFIG_AV1_HIGHBITDEPTH
+const ConvolveFunctions wrap_convolve8_sse2(wrap_convolve8_horiz_sse2_8,
+                                            wrap_convolve8_vert_sse2_8, 8);
+const ConvolveFunctions wrap_convolve10_sse2(wrap_convolve8_horiz_sse2_10,
+                                             wrap_convolve8_vert_sse2_10, 10);
+const ConvolveFunctions wrap_convolve12_sse2(wrap_convolve8_horiz_sse2_12,
+                                             wrap_convolve8_vert_sse2_12, 12);
+const ConvolveParam kArrayHighbdConvolve_sse2[] = {
+  ALL_SIZES(wrap_convolve8_sse2), ALL_SIZES(wrap_convolve10_sse2),
+  ALL_SIZES(wrap_convolve12_sse2)
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, HighbdConvolveTest,
+                         ::testing::ValuesIn(kArrayHighbdConvolve_sse2));
+#endif
+const ConvolveFunctions convolve8_sse2(aom_convolve8_horiz_sse2,
+                                       aom_convolve8_vert_sse2, 0);
+const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
+
+INSTANTIATE_TEST_SUITE_P(SSE2, LowbdConvolveTest,
+                         ::testing::ValuesIn(kArrayConvolve_sse2));
+#endif
+
+#if HAVE_SSSE3
+const ConvolveFunctions convolve8_ssse3(aom_convolve8_horiz_ssse3,
+                                        aom_convolve8_vert_ssse3, 0);
+
+const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
+
+INSTANTIATE_TEST_SUITE_P(SSSE3, LowbdConvolveTest,
+                         ::testing::ValuesIn(kArrayConvolve8_ssse3));
+#endif
+
+#if HAVE_AVX2
+#if CONFIG_AV1_HIGHBITDEPTH
+const ConvolveFunctions wrap_convolve8_avx2(wrap_convolve8_horiz_avx2_8,
+                                            wrap_convolve8_vert_avx2_8, 8);
+const ConvolveFunctions wrap_convolve10_avx2(wrap_convolve8_horiz_avx2_10,
+                                             wrap_convolve8_vert_avx2_10, 10);
+const ConvolveFunctions wrap_convolve12_avx2(wrap_convolve8_horiz_avx2_12,
+                                             wrap_convolve8_vert_avx2_12, 12);
+const ConvolveParam kArray_HighbdConvolve8_avx2[] = {
+  ALL_SIZES_64(wrap_convolve8_avx2), ALL_SIZES_64(wrap_convolve10_avx2),
+  ALL_SIZES_64(wrap_convolve12_avx2)
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, HighbdConvolveTest,
+                         ::testing::ValuesIn(kArray_HighbdConvolve8_avx2));
+#endif
+const ConvolveFunctions convolve8_avx2(aom_convolve8_horiz_avx2,
+                                       aom_convolve8_vert_avx2, 0);
+const ConvolveParam kArray_Convolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
+
+INSTANTIATE_TEST_SUITE_P(AVX2, LowbdConvolveTest,
+                         ::testing::ValuesIn(kArray_Convolve8_avx2));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+#if CONFIG_AV1_HIGHBITDEPTH
+const ConvolveFunctions wrap_convolve8_neon(wrap_convolve8_horiz_neon_8,
+                                            wrap_convolve8_vert_neon_8, 8);
+const ConvolveFunctions wrap_convolve10_neon(wrap_convolve8_horiz_neon_10,
+                                             wrap_convolve8_vert_neon_10, 10);
+const ConvolveFunctions wrap_convolve12_neon(wrap_convolve8_horiz_neon_12,
+                                             wrap_convolve8_vert_neon_12, 12);
+const ConvolveParam kArray_HighbdConvolve8_neon[] = {
+  ALL_SIZES_64(wrap_convolve8_neon), ALL_SIZES_64(wrap_convolve10_neon),
+  ALL_SIZES_64(wrap_convolve12_neon)
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, HighbdConvolveTest,
+                         ::testing::ValuesIn(kArray_HighbdConvolve8_neon));
+#endif
+const ConvolveFunctions convolve8_neon(aom_convolve8_horiz_neon,
+                                       aom_convolve8_vert_neon, 0);
+const ConvolveParam kArray_Convolve8_neon[] = { ALL_SIZES(convolve8_neon) };
+
+INSTANTIATE_TEST_SUITE_P(NEON, LowbdConvolveTest,
+                         ::testing::ValuesIn(kArray_Convolve8_neon));
+#endif  // HAVE_NEON
+
+#if HAVE_NEON_DOTPROD
+const ConvolveFunctions convolve8_neon_dotprod(aom_convolve8_horiz_neon_dotprod,
+                                               aom_convolve8_vert_neon_dotprod,
+                                               0);
+const ConvolveParam kArray_Convolve8_neon_dotprod[] = { ALL_SIZES(
+    convolve8_neon_dotprod) };
+
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, LowbdConvolveTest,
+                         ::testing::ValuesIn(kArray_Convolve8_neon_dotprod));
+#endif  // HAVE_NEON_DOTPROD
+
+#if HAVE_NEON_I8MM
+const ConvolveFunctions convolve8_neon_i8mm(aom_convolve8_horiz_neon_i8mm,
+                                            aom_convolve8_vert_neon_i8mm, 0);
+const ConvolveParam kArray_Convolve8_neon_i8mm[] = { ALL_SIZES(
+    convolve8_neon_i8mm) };
+
+INSTANTIATE_TEST_SUITE_P(NEON_I8MM, LowbdConvolveTest,
+                         ::testing::ValuesIn(kArray_Convolve8_neon_i8mm));
+#endif  // HAVE_NEON_I8MM
+
+}  // namespace
diff --git a/third_party/aom/test/corner_match_test.cc b/third_party/aom/test/corner_match_test.cc
new file mode 100644
index 0000000000..9733732180
--- /dev/null
+++ b/third_party/aom/test/corner_match_test.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <memory>
+#include <new>
+#include <tuple>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "test/register_state_check.h"
+
+#include "aom_dsp/flow_estimation/corner_match.h"
+
+namespace test_libaom {
+
+namespace AV1CornerMatch {
+
+using libaom_test::ACMRandom;
+
+typedef double (*ComputeCrossCorrFunc)(const unsigned char *im1, int stride1,
+                                       int x1, int y1, const unsigned char *im2,
+                                       int stride2, int x2, int y2);
+
+using std::make_tuple;
+using std::tuple;
+typedef tuple<int, ComputeCrossCorrFunc> CornerMatchParam;
+
+class AV1CornerMatchTest : public ::testing::TestWithParam<CornerMatchParam> {
+ public:
+  ~AV1CornerMatchTest() override;
+  void SetUp() override;
+
+ protected:
+  void RunCheckOutput(int run_times);
+  ComputeCrossCorrFunc target_func;
+
+  libaom_test::ACMRandom rnd_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CornerMatchTest);
+
+AV1CornerMatchTest::~AV1CornerMatchTest() = default;
+void AV1CornerMatchTest::SetUp() {
+  rnd_.Reset(ACMRandom::DeterministicSeed());
+  target_func = GET_PARAM(1);
+}
+
+void AV1CornerMatchTest::RunCheckOutput(int run_times) {
+  const int w = 128, h = 128;
+  const int num_iters = 10000;
+  int i, j;
+  aom_usec_timer ref_timer, test_timer;
+
+  std::unique_ptr<uint8_t[]> input1(new (std::nothrow) uint8_t[w * h]);
+  std::unique_ptr<uint8_t[]> input2(new (std::nothrow) uint8_t[w * h]);
+  ASSERT_NE(input1, nullptr);
+  ASSERT_NE(input2, nullptr);
+
+  // Test the two extreme cases:
+  // i) Random data, should have correlation close to 0
+  // ii) Linearly related data + noise, should have correlation close to 1
+  int mode = GET_PARAM(0);
+  if (mode == 0) {
+    for (i = 0; i < h; ++i)
+      for (j = 0; j < w; ++j) {
+        input1[i * w + j] = rnd_.Rand8();
+        input2[i * w + j] = rnd_.Rand8();
+      }
+  } else if (mode == 1) {
+    for (i = 0; i < h; ++i)
+      for (j = 0; j < w; ++j) {
+        int v = rnd_.Rand8();
+        input1[i * w + j] = v;
+        input2[i * w + j] = (v / 2) + (rnd_.Rand8() & 15);
+      }
+  }
+
+  for (i = 0; i < num_iters; ++i) {
+    int x1 = MATCH_SZ_BY2 + rnd_.PseudoUniform(w - 2 * MATCH_SZ_BY2);
+    int y1 = MATCH_SZ_BY2 + rnd_.PseudoUniform(h - 2 * MATCH_SZ_BY2);
+    int x2 = MATCH_SZ_BY2 + rnd_.PseudoUniform(w - 2 * MATCH_SZ_BY2);
+    int y2 = MATCH_SZ_BY2 + rnd_.PseudoUniform(h - 2 * MATCH_SZ_BY2);
+
+    double res_c = av1_compute_cross_correlation_c(input1.get(), w, x1, y1,
+                                                   input2.get(), w, x2, y2);
+    double res_simd =
+        target_func(input1.get(), w, x1, y1, input2.get(), w, x2, y2);
+
+    if (run_times > 1) {
+      aom_usec_timer_start(&ref_timer);
+      for (j = 0; j < run_times; j++) {
+        av1_compute_cross_correlation_c(input1.get(), w, x1, y1, input2.get(),
+                                        w, x2, y2);
+      }
+      aom_usec_timer_mark(&ref_timer);
+      const int elapsed_time_c =
+          static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
+
+      aom_usec_timer_start(&test_timer);
+      for (j = 0; j < run_times; j++) {
+        target_func(input1.get(), w, x1, y1, input2.get(), w, x2, y2);
+      }
+      aom_usec_timer_mark(&test_timer);
+      const int elapsed_time_simd =
+          static_cast<int>(aom_usec_timer_elapsed(&test_timer));
+
+      printf(
+          "c_time=%d \t simd_time=%d \t "
+          "gain=%d\n",
+          elapsed_time_c, elapsed_time_simd,
+          (elapsed_time_c / elapsed_time_simd));
+    } else {
+      ASSERT_EQ(res_simd, res_c);
+    }
+  }
+}
+
+TEST_P(AV1CornerMatchTest, CheckOutput) { RunCheckOutput(1); }
+TEST_P(AV1CornerMatchTest, DISABLED_Speed) { RunCheckOutput(100000); }
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AV1CornerMatchTest,
+    ::testing::Values(make_tuple(0, &av1_compute_cross_correlation_sse4_1),
+                      make_tuple(1, &av1_compute_cross_correlation_sse4_1)));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1CornerMatchTest,
+    ::testing::Values(make_tuple(0, &av1_compute_cross_correlation_avx2),
+                      make_tuple(1, &av1_compute_cross_correlation_avx2)));
+#endif
+}  // namespace AV1CornerMatch
+
+}  // namespace test_libaom
diff --git a/third_party/aom/test/cpu_speed_test.cc b/third_party/aom/test/cpu_speed_test.cc
new file mode 100644
index 0000000000..b5f5d2974d
--- /dev/null
+++ b/third_party/aom/test/cpu_speed_test.cc
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+namespace {
+
+const int kMaxPSNR = 100;
+
+class CpuSpeedTest
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  CpuSpeedTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        set_cpu_used_(GET_PARAM(2)), min_psnr_(kMaxPSNR),
+        tune_content_(AOM_CONTENT_DEFAULT) {}
+  ~CpuSpeedTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    if (encoding_mode_ != ::libaom_test::kRealTime) {
+      cfg_.g_lag_in_frames = 25;
+    }
+  }
+
+  void BeginPassHook(unsigned int /*pass*/) override { min_psnr_ = kMaxPSNR; }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AV1E_SET_TUNE_CONTENT, tune_content_);
+      if (encoding_mode_ != ::libaom_test::kRealTime) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      }
+    }
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    if (pkt->data.psnr.psnr[0] < min_psnr_) min_psnr_ = pkt->data.psnr.psnr[0];
+  }
+
+  void TestQ0();
+  void TestScreencastQ0();
+  void TestTuneScreen();
+  void TestEncodeHighBitrate();
+  void TestLowBitrate();
+
+  ::libaom_test::TestMode encoding_mode_;
+  int set_cpu_used_;
+  double min_psnr_;
+  int tune_content_;
+};
+
+void CpuSpeedTest::TestQ0() {
+  // Validate that this non multiple of 64 wide clip encodes and decodes
+  // without a mismatch when passing in a very low max q.  This pushes
+  // the encoder to producing lots of big partitions which will likely
+  // extend into the border and test the border condition.
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_target_bitrate = 400;
+  cfg_.rc_max_quantizer = 0;
+  cfg_.rc_min_quantizer = 0;
+
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       10);
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  EXPECT_GE(min_psnr_, kMaxPSNR);
+}
+
+void CpuSpeedTest::TestScreencastQ0() {
+  ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 3);
+  cfg_.g_timebase = video.timebase();
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_target_bitrate = 400;
+  cfg_.rc_max_quantizer = 0;
+  cfg_.rc_min_quantizer = 0;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  EXPECT_GE(min_psnr_, kMaxPSNR);
+}
+
+void CpuSpeedTest::TestTuneScreen() {
+  ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 3);
+  cfg_.g_timebase = video.timebase();
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_minsection_pct = 2000;
+  cfg_.rc_target_bitrate = 2000;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_min_quantizer = 0;
+  tune_content_ = AOM_CONTENT_SCREEN;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+void CpuSpeedTest::TestEncodeHighBitrate() {
+  // Validate that this non multiple of 64 wide clip encodes and decodes
+  // without a mismatch when passing in a very low max q.  This pushes
+  // the encoder to producing lots of big partitions which will likely
+  // extend into the border and test the border condition.
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_target_bitrate = 12000;
+  cfg_.rc_max_quantizer = 10;
+  cfg_.rc_min_quantizer = 0;
+
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       10);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+void CpuSpeedTest::TestLowBitrate() {
+  // Validate that this clip encodes and decodes without a mismatch
+  // when passing in a very high min q.  This pushes the encoder to producing
+  // lots of small partitions which might will test the other condition.
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_target_bitrate = 200;
+  cfg_.rc_min_quantizer = 40;
+
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       10);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+TEST_P(CpuSpeedTest, TestQ0) { TestQ0(); }
+TEST_P(CpuSpeedTest, TestScreencastQ0) { TestScreencastQ0(); }
+TEST_P(CpuSpeedTest, TestTuneScreen) { TestTuneScreen(); }
+TEST_P(CpuSpeedTest, TestEncodeHighBitrate) { TestEncodeHighBitrate(); }
+TEST_P(CpuSpeedTest, TestLowBitrate) { TestLowBitrate(); }
+
+class CpuSpeedTestLarge : public CpuSpeedTest {};
+
+TEST_P(CpuSpeedTestLarge, TestQ0) { TestQ0(); }
+TEST_P(CpuSpeedTestLarge, TestScreencastQ0) { TestScreencastQ0(); }
+TEST_P(CpuSpeedTestLarge, TestTuneScreen) { TestTuneScreen(); }
+TEST_P(CpuSpeedTestLarge, TestEncodeHighBitrate) { TestEncodeHighBitrate(); }
+TEST_P(CpuSpeedTestLarge, TestLowBitrate) { TestLowBitrate(); }
+
+AV1_INSTANTIATE_TEST_SUITE(CpuSpeedTest,
+                           ::testing::Values(::libaom_test::kTwoPassGood,
+                                             ::libaom_test::kOnePassGood),
+                           ::testing::Range(1, 3));
+AV1_INSTANTIATE_TEST_SUITE(CpuSpeedTestLarge,
+                           ::testing::Values(::libaom_test::kTwoPassGood,
+                                             ::libaom_test::kOnePassGood),
+                           ::testing::Range(0, 1));
+}  // namespace
diff --git a/third_party/aom/test/cpu_used_firstpass_test.cc b/third_party/aom/test/cpu_used_firstpass_test.cc
new file mode 100644
index 0000000000..53db8b0d13
--- /dev/null
+++ b/third_party/aom/test/cpu_used_firstpass_test.cc
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdlib>
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+const double kPsnrDiffThreshold = 0.1;
+
+// Params: first pass cpu used, second pass cpu used
+class CpuUsedFirstpassTest
+    : public ::libaom_test::CodecTestWith2Params<int, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  CpuUsedFirstpassTest()
+      : EncoderTest(GET_PARAM(0)), second_pass_cpu_used_(GET_PARAM(2)) {}
+  ~CpuUsedFirstpassTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(::libaom_test::kTwoPassGood);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.rc_target_bitrate = 1000;
+    cfg_.g_lag_in_frames = 19;
+    cfg_.g_threads = 0;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+  }
+
+  void BeginPassHook(unsigned int pass) override {
+    psnr_ = 0.0;
+    nframes_ = 0;
+
+    if (pass == 0)
+      cpu_used_ = first_pass_cpu_used_;
+    else
+      cpu_used_ = second_pass_cpu_used_;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  double GetPsnrDiffThreshold() { return kPsnrDiffThreshold; }
+
+  void DoTest() {
+    libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480,
+                                       cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                       0, 30);
+    double ref_psnr;
+    double psnr_diff;
+
+    first_pass_cpu_used_ = second_pass_cpu_used_;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));  // same preset case ref_psnr
+    ref_psnr = GetAveragePsnr();
+
+    first_pass_cpu_used_ = GET_PARAM(1);
+    if (first_pass_cpu_used_ == second_pass_cpu_used_) return;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    psnr_diff = std::abs(ref_psnr - GetAveragePsnr());
+    EXPECT_LT(psnr_diff, GetPsnrDiffThreshold())
+        << "first pass cpu used = " << first_pass_cpu_used_
+        << ", second pass cpu used = " << second_pass_cpu_used_;
+  }
+
+  int cpu_used_;
+  int first_pass_cpu_used_;
+  int second_pass_cpu_used_;
+  unsigned int nframes_;
+  double psnr_;
+};
+
+TEST_P(CpuUsedFirstpassTest, FirstPassTest) { DoTest(); }
+
+class CpuUsedFirstpassTestLarge : public CpuUsedFirstpassTest {};
+
+TEST_P(CpuUsedFirstpassTestLarge, FirstPassTest) { DoTest(); }
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+static const int kSecondPassCpuUsedLarge[] = { 2, 4 };
+static const int kSecondPassCpuUsed[] = { 6 };
+#else
+static const int kSecondPassCpuUsedLarge[] = { 2 };
+static const int kSecondPassCpuUsed[] = { 4, 6 };
+#endif
+#else
+static const int kSecondPassCpuUsedLarge[] = { 2 };
+static const int kSecondPassCpuUsed[] = { 4, 6 };
+#endif
+
+AV1_INSTANTIATE_TEST_SUITE(
+    CpuUsedFirstpassTestLarge, ::testing::Values(2, 4, 6),
+    ::testing::ValuesIn(kSecondPassCpuUsedLarge));  // cpu_used
+
+AV1_INSTANTIATE_TEST_SUITE(
+    CpuUsedFirstpassTest, ::testing::Values(2, 4, 6),
+    ::testing::ValuesIn(kSecondPassCpuUsed));  // cpu_used
+
+}  // namespace
diff --git a/third_party/aom/test/datarate_test.cc b/third_party/aom/test/datarate_test.cc
new file mode 100644
index 0000000000..a75a72fab6
--- /dev/null
+++ b/third_party/aom/test/datarate_test.cc
@@ -0,0 +1,712 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "config/aom_config.h"
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/codec_factory.h"
+#include "test/datarate_test.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "aom/aom_codec.h"
+
+namespace datarate_test {
+namespace {
+
+// Params: test mode, speed, aq mode and index for bitrate array.
+class DatarateTestLarge
+    : public ::libaom_test::CodecTestWith4Params<libaom_test::TestMode, int,
+                                                 unsigned int, int>,
+      public DatarateTest {
+ public:
+  DatarateTestLarge() : DatarateTest(GET_PARAM(0)) {
+    set_cpu_used_ = GET_PARAM(2);
+    aq_mode_ = GET_PARAM(3);
+  }
+
+ protected:
+  ~DatarateTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(GET_PARAM(1));
+    ResetModel();
+  }
+
+  virtual void BasicRateTargetingVBRTest() {
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.g_error_resilient = 0;
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.g_lag_in_frames = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 140);
+    const int bitrate_array[2] = { 400, 800 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.7)
+        << " The datarate for the file is lower than target by too much!";
+    // FIXME(jingning): Lower this test threshold after vbr mode can render
+    // sufficiently accurate bit rate.
+    ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.45)
+        << " The datarate for the file is greater than target by too much!";
+  }
+
+  virtual void BasicRateTargetingCBRTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 1;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 140);
+    const int bitrate_array[2] = { 150, 550 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85)
+        << " The datarate for the file is lower than target by too much!";
+    ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.19)
+        << " The datarate for the file is greater than target by too much!";
+  }
+
+  virtual void BasicRateTargetingCBRSpikeTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.kf_max_dist = 3000;
+    cfg_.kf_min_dist = 3000;
+
+    ::libaom_test::I420VideoSource video("desktopqvga2.320_240.yuv", 320, 240,
+                                         30, 1, 0, 800);
+    const int bitrate_array[2] = { 100, 200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    max_perc_spike_ = 3.0;
+    max_perc_spike_high_ = 8.0;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85)
+        << " The datarate for the file is lower than target by too much!";
+    ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.19)
+        << " The datarate for the file is greater than target by too much!";
+    ASSERT_LE(num_spikes_, 8);
+    ASSERT_LT(num_spikes_high_, 1);
+  }
+
+  virtual void BasicRateTargetingCBRDynamicBitrateTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.kf_max_dist = 3000;
+    cfg_.kf_min_dist = 3000;
+
+    ::libaom_test::I420VideoSource video("desktop1.320_180.yuv", 320, 180, 30,
+                                         1, 0, 800);
+    const int bitrate_array[2] = { 100, 200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    target_bitrate_update_[0] = cfg_.rc_target_bitrate;
+    target_bitrate_update_[1] = static_cast<int>(1.3 * cfg_.rc_target_bitrate);
+    target_bitrate_update_[2] = static_cast<int>(0.7 * cfg_.rc_target_bitrate);
+    frame_update_bitrate_ = 250;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < 3; i++) {
+      ASSERT_GE(effective_datarate_dynamic_[i],
+                target_bitrate_update_[i] * 0.85)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_dynamic_[i],
+                target_bitrate_update_[i] * 1.20)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingMultiThreadCBRTest() {
+    ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
+                                         1, 0, 400);
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 1;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_threads = 4;
+
+    const int bitrate_array[2] = { 250, 650 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    tile_column_ = 2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate),
+              effective_datarate_ * 0.85)
+        << " The datarate for the file exceeds the target by too much!";
+    ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate),
+              effective_datarate_ * 1.15)
+        << " The datarate for the file missed the target!"
+        << cfg_.rc_target_bitrate << " " << effective_datarate_;
+  }
+
+  virtual void ErrorResilienceOnSceneCuts() {
+    if (GET_PARAM(4) > 0) return;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.g_error_resilient = 1;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    cfg_.rc_target_bitrate = 500;
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85)
+        << " The datarate for the file is lower than target by too much!";
+    ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15)
+        << " The datarate for the file is greater than target by too much!";
+  }
+
+  virtual void BasicRateTargetingCBRPeriodicKeyFrameTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 1;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    // Periodic keyframe
+    cfg_.kf_max_dist = 50;
+
+    ::libaom_test::I420VideoSource video("pixel_capture_w320h240.yuv", 320, 240,
+                                         30, 1, 0, 310);
+    const int bitrate_array[2] = { 150, 550 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85)
+        << " The datarate for the file is lower than target by too much!";
+    ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15)
+        << " The datarate for the file is greater than target by too much!";
+  }
+
+  virtual void CBRPeriodicKeyFrameOnSceneCuts() {
+    if (GET_PARAM(4) > 0) return;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    // Periodic keyframe
+    cfg_.kf_max_dist = 30;
+    cfg_.kf_min_dist = 30;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    cfg_.rc_target_bitrate = 500;
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85)
+        << " The datarate for the file is lower than target by too much!";
+    ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.3)
+        << " The datarate for the file is greater than target by too much!";
+  }
+
+  virtual void BasicRateTargetingAQModeOnOffCBRTest() {
+    if (GET_PARAM(4) > 0) return;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_pass = AOM_RC_ONE_PASS;
+    cfg_.g_usage = AOM_USAGE_REALTIME;
+    cfg_.kf_mode = AOM_KF_DISABLED;
+
+    ::libaom_test::I420VideoSource video("pixel_capture_w320h240.yuv", 320, 240,
+                                         30, 1, 0, 310);
+    cfg_.rc_target_bitrate = 60;
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85)
+        << " The datarate for the file is lower than target by too much!";
+    ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15)
+        << " The datarate for the file is greater than target by too much!";
+  }
+
+  virtual void BasicRateTargeting444CBRScreenTest() {
+    ::libaom_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);
+
+    cfg_.g_profile = 1;
+    cfg_.g_timebase = video.timebase();
+
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 1;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+
+    const int bitrate_array[2] = { 250, 650 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    screen_mode_ = true;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate),
+              effective_datarate_ * 0.85)
+        << " The datarate for the file exceeds the target by too much!";
+    ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate),
+              effective_datarate_ * 1.15)
+        << " The datarate for the file missed the target!"
+        << cfg_.rc_target_bitrate << " " << effective_datarate_;
+  }
+
+  virtual void BasicRateTargetingSuperresCBR() {
+    ::libaom_test::I420VideoSource video("desktopqvga2.320_240.yuv", 320, 240,
+                                         30, 1, 0, 800);
+
+    cfg_.g_profile = 0;
+    cfg_.g_timebase = video.timebase();
+
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 1;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+
+    cfg_.rc_superres_mode = AOM_SUPERRES_FIXED;
+    cfg_.rc_superres_denominator = 16;
+    cfg_.rc_superres_kf_denominator = 16;
+
+    const int bitrate_array[2] = { 250, 650 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate),
+              effective_datarate_ * 0.85)
+        << " The datarate for the file exceeds the target by too much!";
+    ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate),
+              effective_datarate_ * 1.15)
+        << " The datarate for the file missed the target!"
+        << cfg_.rc_target_bitrate << " " << effective_datarate_;
+  }
+
+  virtual void BasicRateTargetingSuperresCBRMultiThreads() {
+    ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
+                                         1, 0, 400);
+
+    cfg_.g_profile = 0;
+    cfg_.g_timebase = video.timebase();
+
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 1;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_threads = 2;
+
+    cfg_.rc_superres_mode = AOM_SUPERRES_FIXED;
+    cfg_.rc_superres_denominator = 16;
+    cfg_.rc_superres_kf_denominator = 16;
+
+    const int bitrate_array[2] = { 250, 650 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    tile_column_ = 1;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate),
+              effective_datarate_ * 0.85)
+        << " The datarate for the file exceeds the target by too much!";
+    ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate),
+              effective_datarate_ * 1.15)
+        << " The datarate for the file missed the target!"
+        << cfg_.rc_target_bitrate << " " << effective_datarate_;
+  }
+};
+
+// Params: test mode, speed, aq mode.
+class DatarateTestFrameDropLarge
+    : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int,
+                                                 unsigned int>,
+      public DatarateTest {
+ public:
+  DatarateTestFrameDropLarge() : DatarateTest(GET_PARAM(0)) {
+    set_cpu_used_ = GET_PARAM(2);
+    aq_mode_ = GET_PARAM(3);
+  }
+
+ protected:
+  ~DatarateTestFrameDropLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(GET_PARAM(1));
+    ResetModel();
+  }
+
+  virtual void ChangingDropFrameThreshTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_undershoot_pct = 20;
+    cfg_.rc_undershoot_pct = 20;
+    cfg_.rc_dropframe_thresh = 10;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 50;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.rc_target_bitrate = 200;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 1;
+    // TODO(marpan): Investigate datarate target failures with a smaller
+    // keyframe interval (128).
+    cfg_.kf_max_dist = 9999;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 100);
+
+    const int kDropFrameThreshTestStep = 30;
+    aom_codec_pts_t last_drop = 140;
+    int last_num_drops = 0;
+    for (int i = 40; i < 100; i += kDropFrameThreshTestStep) {
+      cfg_.rc_dropframe_thresh = i;
+      ResetModel();
+      ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+      ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.40)
+          << " The datarate for the file is greater than target by too much!";
+      if (last_drop > 0) {
+        ASSERT_LE(first_drop_, last_drop)
+            << " The first dropped frame for drop_thresh " << i
+            << " > first dropped frame for drop_thresh "
+            << i - kDropFrameThreshTestStep;
+      }
+      ASSERT_GE(num_drops_, last_num_drops * 0.7)
+          << " The number of dropped frames for drop_thresh " << i
+          << " < number of dropped frames for drop_thresh "
+          << i - kDropFrameThreshTestStep;
+      last_drop = first_drop_;
+      last_num_drops = num_drops_;
+    }
+  }
+};
+
+// Check basic rate targeting for VBR mode.
+TEST_P(DatarateTestLarge, BasicRateTargetingVBR) {
+  BasicRateTargetingVBRTest();
+}
+
+// Check basic rate targeting for CBR.
+TEST_P(DatarateTestLarge, BasicRateTargetingCBR) {
+  BasicRateTargetingCBRTest();
+}
+
+// Check basic rate targeting for CBR, with 4 threads
+TEST_P(DatarateTestLarge, BasicRateTargetingMultiThreadCBR) {
+  BasicRateTargetingMultiThreadCBRTest();
+}
+
+// Check basic rate targeting for periodic key frame.
+TEST_P(DatarateTestLarge, PeriodicKeyFrameCBR) {
+  BasicRateTargetingCBRPeriodicKeyFrameTest();
+}
+
+// Check basic rate targeting for periodic key frame, aligned with scene change.
+TEST_P(DatarateTestLarge, PeriodicKeyFrameCBROnSceneCuts) {
+  CBRPeriodicKeyFrameOnSceneCuts();
+}
+
+// Check basic rate targeting with error resilience on for scene cuts.
+TEST_P(DatarateTestLarge, ErrorResilienceOnSceneCuts) {
+  ErrorResilienceOnSceneCuts();
+}
+
+// Check basic rate targeting for CBR, for 444 input screen mode.
+#if defined(CONFIG_MAX_DECODE_PROFILE) && CONFIG_MAX_DECODE_PROFILE < 1
+TEST_P(DatarateTestLarge, DISABLED_BasicRateTargeting444CBRScreen) {
+#else
+TEST_P(DatarateTestLarge, BasicRateTargeting444CBRScreen) {
+#endif
+  BasicRateTargeting444CBRScreenTest();
+}
+
+// Check basic rate targeting for Superres mode with CBR.
+TEST_P(DatarateTestLarge, BasicRateTargetingSuperresCBR) {
+  BasicRateTargetingSuperresCBR();
+}
+
+// Check basic rate targeting for Superres mode with CBR and multi-threads.
+TEST_P(DatarateTestLarge, BasicRateTargetingSuperresCBRMultiThreads) {
+  BasicRateTargetingSuperresCBRMultiThreads();
+}
+
+// Check that (1) the first dropped frame gets earlier and earlier
+// as the drop frame threshold is increased, and (2) that the total number of
+// frame drops does not decrease as we increase frame drop threshold.
+// Use a lower qp-max to force some frame drops.
+TEST_P(DatarateTestFrameDropLarge, ChangingDropFrameThresh) {
+  ChangingDropFrameThreshTest();
+}
+
+TEST_P(DatarateTestLarge, BasicRateTargetingAQModeOnOffCBR) {
+  BasicRateTargetingAQModeOnOffCBRTest();
+}
+
+class DatarateTestRealtime : public DatarateTestLarge {};
+
+class DatarateTestFrameDropRealtime : public DatarateTestFrameDropLarge {};
+
+// Params: aq mode.
+class DatarateTestSpeedChangeRealtime
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode,
+                                                 unsigned int>,
+      public DatarateTest {
+ public:
+  DatarateTestSpeedChangeRealtime() : DatarateTest(GET_PARAM(0)) {
+    aq_mode_ = GET_PARAM(1);
+    speed_change_test_ = true;
+  }
+
+ protected:
+  ~DatarateTestSpeedChangeRealtime() override = default;
+
+  void SetUp() override {
+    InitializeConfig(GET_PARAM(1));
+    ResetModel();
+  }
+
+  virtual void ChangingSpeedTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_undershoot_pct = 20;
+    cfg_.rc_undershoot_pct = 20;
+    cfg_.rc_dropframe_thresh = 10;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 50;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.rc_target_bitrate = 200;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 1;
+    // TODO(marpan): Investigate datarate target failures with a smaller
+    // keyframe interval (128).
+    cfg_.kf_max_dist = 9999;
+    cfg_.rc_dropframe_thresh = 0;
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 100);
+
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.83)
+        << " The datarate for the file is lower than target by too much!";
+    ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.35)
+        << " The datarate for the file is greater than target by too much!";
+  }
+};
+
+// Check basic rate targeting for VBR mode.
+TEST_P(DatarateTestRealtime, BasicRateTargetingVBR) {
+  BasicRateTargetingVBRTest();
+}
+
+// Check basic rate targeting for CBR.
+TEST_P(DatarateTestRealtime, BasicRateTargetingCBR) {
+  BasicRateTargetingCBRTest();
+}
+
+// Check basic rate targeting for CBR. Use a longer clip,
+// and verify #encode size spikes above threshold.
+TEST_P(DatarateTestRealtime, BasicRateTargetingCBRSpike) {
+  BasicRateTargetingCBRSpikeTest();
+}
+
+// Check basic rate targeting for CBR. Use a longer clip,
+// and verify encoder can respnd and hit new bitrates updated
+// within the stream.
+TEST_P(DatarateTestRealtime, BasicRateTargetingCBRDynamicBitrate) {
+  BasicRateTargetingCBRDynamicBitrateTest();
+}
+
+// Check basic rate targeting for CBR, with 4 threads
+TEST_P(DatarateTestRealtime, BasicRateTargetingMultiThreadCBR) {
+  BasicRateTargetingMultiThreadCBRTest();
+}
+
+// Check basic rate targeting for periodic key frame.
+TEST_P(DatarateTestRealtime, PeriodicKeyFrameCBR) {
+  BasicRateTargetingCBRPeriodicKeyFrameTest();
+}
+
+// Check basic rate targeting for periodic key frame, aligned with scene change.
+TEST_P(DatarateTestRealtime, PeriodicKeyFrameCBROnSceneCuts) {
+  CBRPeriodicKeyFrameOnSceneCuts();
+}
+
+// Check basic rate targeting with error resilience on for scene cuts.
+TEST_P(DatarateTestRealtime, ErrorResilienceOnSceneCuts) {
+  ErrorResilienceOnSceneCuts();
+}
+
+// Check basic rate targeting for CBR for 444 screen mode.
+#if defined(CONFIG_MAX_DECODE_PROFILE) && CONFIG_MAX_DECODE_PROFILE < 1
+TEST_P(DatarateTestRealtime, DISABLED_BasicRateTargeting444CBRScreen) {
+#else
+TEST_P(DatarateTestRealtime, BasicRateTargeting444CBRScreen) {
+#endif
+  BasicRateTargeting444CBRScreenTest();
+}
+
+// Check basic rate targeting for Superres mode with CBR.
+TEST_P(DatarateTestRealtime, BasicRateTargetingSuperresCBR) {
+  BasicRateTargetingSuperresCBR();
+}
+
+// Check basic rate targeting for Superres mode with CBR and multi-threads.
+TEST_P(DatarateTestRealtime, BasicRateTargetingSuperresCBRMultiThreads) {
+  BasicRateTargetingSuperresCBRMultiThreads();
+}
+
+// Check that (1) the first dropped frame gets earlier and earlier
+// as the drop frame threshold is increased, and (2) that the total number of
+// frame drops does not decrease as we increase frame drop threshold.
+// Use a lower qp-max to force some frame drops.
+TEST_P(DatarateTestFrameDropRealtime, ChangingDropFrameThresh) {
+  ChangingDropFrameThreshTest();
+}
+
+TEST_P(DatarateTestSpeedChangeRealtime, ChangingSpeedTest) {
+  ChangingSpeedTest();
+}
+
+class DatarateTestSetFrameQpRealtime
+    : public DatarateTest,
+      public ::testing::TestWithParam<const libaom_test::AV1CodecFactory *> {
+ public:
+  DatarateTestSetFrameQpRealtime() : DatarateTest(GetParam()), frame_(0) {}
+
+ protected:
+  ~DatarateTestSetFrameQpRealtime() override = default;
+
+  void SetUp() override {
+    InitializeConfig(libaom_test::kRealTime);
+    ResetModel();
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    set_cpu_used_ = 7;
+    DatarateTest::PreEncodeFrameHook(video, encoder);
+    frame_qp_ = rnd_.PseudoUniform(63);
+    encoder->Control(AV1E_SET_QUANTIZER_ONE_PASS, frame_qp_);
+    frame_++;
+  }
+
+  void PostEncodeFrameHook(::libaom_test::Encoder *encoder) override {
+    if (frame_ >= total_frames_) return;
+    int qp = 0;
+    encoder->Control(AOME_GET_LAST_QUANTIZER_64, &qp);
+    ASSERT_EQ(qp, frame_qp_);
+  }
+
+ protected:
+  int total_frames_;
+
+ private:
+  int frame_qp_;
+  int frame_;
+  libaom_test::ACMRandom rnd_;
+};
+
+TEST_P(DatarateTestSetFrameQpRealtime, SetFrameQpOnePass) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_undershoot_pct = 20;
+  cfg_.rc_undershoot_pct = 20;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 50;
+  cfg_.rc_end_usage = AOM_CBR;
+  cfg_.rc_target_bitrate = 200;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.g_error_resilient = 1;
+  cfg_.kf_max_dist = 9999;
+  cfg_.rc_dropframe_thresh = 0;
+
+  total_frames_ = 100;
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  ResetModel();
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+AV1_INSTANTIATE_TEST_SUITE(DatarateTestLarge,
+                           ::testing::Values(::libaom_test::kRealTime),
+                           ::testing::Range(5, 7), ::testing::Values(0, 3),
+                           ::testing::Values(0, 1));
+
+AV1_INSTANTIATE_TEST_SUITE(DatarateTestFrameDropLarge,
+                           ::testing::Values(::libaom_test::kRealTime),
+                           ::testing::Range(5, 7), ::testing::Values(0, 3));
+
+AV1_INSTANTIATE_TEST_SUITE(DatarateTestRealtime,
+                           ::testing::Values(::libaom_test::kRealTime),
+                           ::testing::Range(7, 12), ::testing::Values(0, 3),
+                           ::testing::Values(0, 1));
+
+AV1_INSTANTIATE_TEST_SUITE(DatarateTestFrameDropRealtime,
+                           ::testing::Values(::libaom_test::kRealTime),
+                           ::testing::Range(7, 12), ::testing::Values(0, 3));
+
+AV1_INSTANTIATE_TEST_SUITE(DatarateTestSpeedChangeRealtime,
+                           ::testing::Values(::libaom_test::kRealTime),
+                           ::testing::Values(0, 3));
+
+INSTANTIATE_TEST_SUITE_P(
+    AV1, DatarateTestSetFrameQpRealtime,
+    ::testing::Values(
+        static_cast<const libaom_test::CodecFactory *>(&libaom_test::kAV1)));
+
+}  // namespace
+}  // namespace datarate_test
diff --git a/third_party/aom/test/datarate_test.h b/third_party/aom/test/datarate_test.h
new file mode 100644
index 0000000000..accc1ad86b
--- /dev/null
+++ b/third_party/aom/test/datarate_test.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2019, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "config/aom_config.h"
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "aom/aom_codec.h"
+
+namespace datarate_test {
+namespace {
+class DatarateTest : public ::libaom_test::EncoderTest {
+ public:
+  explicit DatarateTest(const ::libaom_test::CodecFactory *codec)
+      : EncoderTest(codec), set_cpu_used_(0), aq_mode_(0),
+        speed_change_test_(false) {}
+
+ protected:
+  ~DatarateTest() override = default;
+
+  virtual void ResetModel() {
+    last_pts_ = 0;
+    bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
+    frame_number_ = 0;
+    tot_frame_number_ = 0;
+    first_drop_ = 0;
+    num_drops_ = 0;
+    // Denoiser is off by default.
+    denoiser_on_ = 0;
+    bits_total_ = 0;
+    denoiser_offon_test_ = 0;
+    denoiser_offon_period_ = -1;
+    tile_column_ = 0;
+    screen_mode_ = false;
+    max_perc_spike_ = 1.0;
+    max_perc_spike_high_ = 1.0;
+    num_spikes_ = 0;
+    num_spikes_high_ = 0;
+    frame_update_bitrate_ = 0;
+    for (int i = 0; i < 3; i++) {
+      target_bitrate_update_[i] = 0;
+      frame_number_dynamic_[i] = 0;
+      bits_total_dynamic_[i] = 0;
+      effective_datarate_dynamic_[i] = 0.0;
+    }
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AV1E_SET_AQ_MODE, aq_mode_);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, tile_column_);
+      encoder->Control(AV1E_SET_ROW_MT, 1);
+      if (cfg_.g_usage == AOM_USAGE_REALTIME) {
+        encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
+        encoder->Control(AV1E_SET_ENABLE_WARPED_MOTION, 0);
+        encoder->Control(AV1E_SET_ENABLE_RESTORATION, 0);
+        encoder->Control(AV1E_SET_ENABLE_OBMC, 0);
+        encoder->Control(AV1E_SET_DELTAQ_MODE, 0);
+        encoder->Control(AV1E_SET_ENABLE_TPL_MODEL, 0);
+        encoder->Control(AV1E_SET_ENABLE_CDEF, 1);
+        encoder->Control(AV1E_SET_COEFF_COST_UPD_FREQ, 2);
+        encoder->Control(AV1E_SET_MODE_COST_UPD_FREQ, 2);
+        encoder->Control(AV1E_SET_MV_COST_UPD_FREQ, 2);
+        encoder->Control(AV1E_SET_DV_COST_UPD_FREQ, 2);
+      }
+      if (screen_mode_) {
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
+        encoder->Control(AV1E_SET_ENABLE_PALETTE, 1);
+        encoder->Control(AV1E_SET_ENABLE_INTRABC, 0);
+      }
+    }
+
+    if (speed_change_test_) {
+      if (video->frame() == 0) {
+        encoder->Control(AOME_SET_CPUUSED, 8);
+      } else if (video->frame() == 30) {
+        encoder->Control(AOME_SET_CPUUSED, 7);
+      } else if (video->frame() == 60) {
+        encoder->Control(AOME_SET_CPUUSED, 6);
+      } else if (video->frame() == 90) {
+        encoder->Control(AOME_SET_CPUUSED, 7);
+      }
+    }
+
+    if (frame_update_bitrate_ > 0) {
+      if (frame_number_ == frame_update_bitrate_) {
+        cfg_.rc_target_bitrate = target_bitrate_update_[1];
+        encoder->Config(&cfg_);
+      } else if (frame_number_ == 2 * frame_update_bitrate_) {
+        cfg_.rc_target_bitrate = target_bitrate_update_[2];
+        encoder->Config(&cfg_);
+      }
+    }
+
+    if (denoiser_offon_test_) {
+      ASSERT_GT(denoiser_offon_period_, 0)
+          << "denoiser_offon_period_ is not positive.";
+      if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
+        // Flip denoiser_on_ periodically
+        denoiser_on_ ^= 1;
+      }
+    }
+
+    encoder->Control(AV1E_SET_NOISE_SENSITIVITY, denoiser_on_);
+
+    const aom_rational_t tb = video->timebase();
+    timebase_ = static_cast<double>(tb.num) / tb.den;
+    duration_ = 0;
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    // Time since last timestamp = duration.
+    aom_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
+
+    if (duration > 1) {
+      // If first drop not set and we have a drop set it to this time.
+      if (!first_drop_) first_drop_ = last_pts_ + 1;
+      // Update the number of frame drops.
+      num_drops_ += static_cast<int>(duration - 1);
+      // Update counter for total number of frames (#frames input to encoder).
+      // Needed for setting the proper layer_id below.
+      tot_frame_number_ += static_cast<int>(duration - 1);
+    }
+
+    // Add to the buffer the bits we'd expect from a constant bitrate server.
+    bits_in_buffer_model_ += static_cast<int64_t>(
+        duration * timebase_ * cfg_.rc_target_bitrate * 1000);
+
+    // Buffer should not go negative.
+    ASSERT_GE(bits_in_buffer_model_, 0)
+        << "Buffer Underrun at frame " << pkt->data.frame.pts;
+
+    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
+
+    // Update the total encoded bits.
+    bits_total_ += frame_size_in_bits;
+
+    // Update the most recent pts.
+    last_pts_ = pkt->data.frame.pts;
+    ++frame_number_;
+    ++tot_frame_number_;
+    const int per_frame_bandwidth = (cfg_.rc_target_bitrate * 1000) / 30;
+    if (frame_size_in_bits > max_perc_spike_ * per_frame_bandwidth &&
+        frame_number_ > 1)
+      num_spikes_++;
+    if (frame_size_in_bits > max_perc_spike_high_ * per_frame_bandwidth &&
+        frame_number_ > 1)
+      num_spikes_high_++;
+
+    if (frame_update_bitrate_ > 0) {
+      if (frame_number_ < frame_update_bitrate_) {
+        bits_total_dynamic_[0] += frame_size_in_bits;
+        frame_number_dynamic_[0]++;
+      } else if (frame_number_ >= frame_update_bitrate_ &&
+                 frame_number_ < 2 * frame_update_bitrate_) {
+        bits_total_dynamic_[1] += frame_size_in_bits;
+        frame_number_dynamic_[1]++;
+      } else {
+        bits_total_dynamic_[2] += frame_size_in_bits;
+        frame_number_dynamic_[2]++;
+      }
+    }
+  }
+
+  void EndPassHook() override {
+    duration_ = (last_pts_ + 1) * timebase_;
+    // Effective file datarate:
+    effective_datarate_ = (bits_total_ / 1000.0) / duration_;
+    if (frame_update_bitrate_ > 0) {
+      for (int i = 0; i < 3; i++)
+        effective_datarate_dynamic_[i] =
+            30 * (bits_total_dynamic_[i] / 1000.0) / frame_number_dynamic_[i];
+    }
+  }
+
+  aom_codec_pts_t last_pts_;
+  double timebase_;
+  int frame_number_;      // Counter for number of non-dropped/encoded frames.
+  int tot_frame_number_;  // Counter for total number of input frames.
+  int64_t bits_total_;
+  double duration_;
+  double effective_datarate_;
+  int set_cpu_used_;
+  int64_t bits_in_buffer_model_;
+  aom_codec_pts_t first_drop_;
+  int num_drops_;
+  int denoiser_on_;
+  int denoiser_offon_test_;
+  int denoiser_offon_period_;
+  unsigned int aq_mode_;
+  bool speed_change_test_;
+  int tile_column_;
+  bool screen_mode_;
+  double max_perc_spike_;
+  double max_perc_spike_high_;
+  int num_spikes_;
+  int num_spikes_high_;
+  // These are use for test with dynamic bitrate change.
+  // Used to verify that the encoder can respond and hit bitrate that is updated
+  // during the sequence.
+  int frame_update_bitrate_;
+  int target_bitrate_update_[3];
+  double effective_datarate_dynamic_[3];
+  int64_t bits_total_dynamic_[3];
+  int frame_number_dynamic_[3];
+};
+
+}  // namespace
+}  // namespace datarate_test
diff --git a/third_party/aom/test/decode_api_test.cc b/third_party/aom/test/decode_api_test.cc
new file mode 100644
index 0000000000..591a167e94
--- /dev/null
+++ b/third_party/aom/test/decode_api_test.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+
+#include "aom/aomdx.h"
+#include "aom/aom_decoder.h"
+
+namespace {
+
+TEST(DecodeAPI, InvalidParams) {
+  uint8_t buf[1] = { 0 };
+  aom_codec_ctx_t dec;
+
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_dec_init(nullptr, nullptr, nullptr, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_dec_init(&dec, nullptr, nullptr, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_decode(nullptr, nullptr, 0, nullptr));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_decode(nullptr, buf, 0, nullptr));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_decode(nullptr, buf, sizeof(buf), nullptr));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_decode(nullptr, nullptr, sizeof(buf), nullptr));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_destroy(nullptr));
+  EXPECT_NE(aom_codec_error(nullptr), nullptr);
+  EXPECT_EQ(aom_codec_error_detail(nullptr), nullptr);
+
+  aom_codec_iface_t *iface = aom_codec_av1_dx();
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_dec_init(nullptr, iface, nullptr, 0));
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_dec_init(&dec, iface, nullptr, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_decode(&dec, nullptr, sizeof(buf), nullptr));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(&dec, buf, 0, nullptr));
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&dec));
+}
+
+TEST(DecodeAPI, InvalidControlId) {
+  aom_codec_iface_t *iface = aom_codec_av1_dx();
+  aom_codec_ctx_t dec;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_dec_init(&dec, iface, nullptr, 0));
+  EXPECT_EQ(AOM_CODEC_ERROR, aom_codec_control(&dec, -1, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_control(&dec, 0, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&dec));
+}
+
+}  // namespace
diff --git a/third_party/aom/test/decode_multithreaded_test.cc b/third_party/aom/test/decode_multithreaded_test.cc
new file mode 100644
index 0000000000..4e06f1afac
--- /dev/null
+++ b/third_party/aom/test/decode_multithreaded_test.cc
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+#include "aom_mem/aom_mem.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+static const int kNumMultiThreadDecoders = 3;
+
+class AV1DecodeMultiThreadedTest
+    : public ::libaom_test::CodecTestWith5Params<int, int, int, int, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  AV1DecodeMultiThreadedTest()
+      : EncoderTest(GET_PARAM(0)), md5_single_thread_(), md5_multi_thread_(),
+        n_tile_cols_(GET_PARAM(1)), n_tile_rows_(GET_PARAM(2)),
+        n_tile_groups_(GET_PARAM(3)), set_cpu_used_(GET_PARAM(4)),
+        row_mt_(GET_PARAM(5)) {
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.w = 704;
+    cfg.h = 576;
+    cfg.threads = 1;
+    cfg.allow_lowbitdepth = 1;
+    single_thread_dec_ = codec_->CreateDecoder(cfg, 0);
+
+    // Test cfg.threads == powers of 2.
+    for (int i = 0; i < kNumMultiThreadDecoders; ++i) {
+      cfg.threads <<= 1;
+      multi_thread_dec_[i] = codec_->CreateDecoder(cfg, 0);
+      multi_thread_dec_[i]->Control(AV1D_SET_ROW_MT, row_mt_);
+    }
+
+    if (single_thread_dec_->IsAV1()) {
+      single_thread_dec_->Control(AV1D_EXT_TILE_DEBUG, 1);
+      single_thread_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+      single_thread_dec_->Control(AV1_SET_DECODE_TILE_COL, -1);
+    }
+    for (int i = 0; i < kNumMultiThreadDecoders; ++i) {
+      if (multi_thread_dec_[i]->IsAV1()) {
+        multi_thread_dec_[i]->Control(AV1D_EXT_TILE_DEBUG, 1);
+        multi_thread_dec_[i]->Control(AV1_SET_DECODE_TILE_ROW, -1);
+        multi_thread_dec_[i]->Control(AV1_SET_DECODE_TILE_COL, -1);
+      }
+    }
+  }
+
+  ~AV1DecodeMultiThreadedTest() override {
+    delete single_thread_dec_;
+    for (int i = 0; i < kNumMultiThreadDecoders; ++i)
+      delete multi_thread_dec_[i];
+  }
+
+  void SetUp() override { InitializeConfig(libaom_test::kTwoPassGood); }
+
+  void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                          libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_);
+      encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_);
+      encoder->Control(AV1E_SET_NUM_TG, n_tile_groups_);
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+    }
+  }
+
+  void UpdateMD5(::libaom_test::Decoder *dec, const aom_codec_cx_pkt_t *pkt,
+                 ::libaom_test::MD5 *md5) {
+    const aom_codec_err_t res = dec->DecodeFrame(
+        reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz);
+    if (res != AOM_CODEC_OK) {
+      abort_ = true;
+      ASSERT_EQ(AOM_CODEC_OK, res);
+    }
+    const aom_image_t *img = dec->GetDxData().Next();
+    md5->Add(img);
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    UpdateMD5(single_thread_dec_, pkt, &md5_single_thread_);
+
+    for (int i = 0; i < kNumMultiThreadDecoders; ++i)
+      UpdateMD5(multi_thread_dec_[i], pkt, &md5_multi_thread_[i]);
+  }
+
+  void DoTest() {
+    const aom_rational timebase = { 33333333, 1000000000 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_target_bitrate = 500;
+    cfg_.g_lag_in_frames = 12;
+    cfg_.rc_end_usage = AOM_VBR;
+
+    libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 576,
+                                       timebase.den, timebase.num, 0, 2);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+    const char *md5_single_thread_str = md5_single_thread_.Get();
+
+    for (int i = 0; i < kNumMultiThreadDecoders; ++i) {
+      const char *md5_multi_thread_str = md5_multi_thread_[i].Get();
+      ASSERT_STREQ(md5_single_thread_str, md5_multi_thread_str);
+    }
+  }
+
+  ::libaom_test::MD5 md5_single_thread_;
+  ::libaom_test::MD5 md5_multi_thread_[kNumMultiThreadDecoders];
+  ::libaom_test::Decoder *single_thread_dec_;
+  ::libaom_test::Decoder *multi_thread_dec_[kNumMultiThreadDecoders];
+
+ private:
+  int n_tile_cols_;
+  int n_tile_rows_;
+  int n_tile_groups_;
+  int set_cpu_used_;
+  int row_mt_;
+};
+
+// run an encode and do the decode both in single thread
+// and multi thread. Ensure that the MD5 of the output in both cases
+// is identical. If so, the test passes.
+TEST_P(AV1DecodeMultiThreadedTest, MD5Match) {
+  cfg_.large_scale_tile = 0;
+  single_thread_dec_->Control(AV1_SET_TILE_MODE, 0);
+  for (int i = 0; i < kNumMultiThreadDecoders; ++i)
+    multi_thread_dec_[i]->Control(AV1_SET_TILE_MODE, 0);
+  DoTest();
+}
+
+class AV1DecodeMultiThreadedTestLarge : public AV1DecodeMultiThreadedTest {};
+
+TEST_P(AV1DecodeMultiThreadedTestLarge, MD5Match) {
+  cfg_.large_scale_tile = 0;
+  single_thread_dec_->Control(AV1_SET_TILE_MODE, 0);
+  for (int i = 0; i < kNumMultiThreadDecoders; ++i)
+    multi_thread_dec_[i]->Control(AV1_SET_TILE_MODE, 0);
+  DoTest();
+}
+
+// TODO(ranjit): More tests have to be added using pre-generated MD5.
+AV1_INSTANTIATE_TEST_SUITE(AV1DecodeMultiThreadedTest, ::testing::Values(1, 2),
+                           ::testing::Values(1, 2), ::testing::Values(1),
+                           ::testing::Values(3), ::testing::Values(0, 1));
+AV1_INSTANTIATE_TEST_SUITE(AV1DecodeMultiThreadedTestLarge,
+                           ::testing::Values(0, 1, 2, 6),
+                           ::testing::Values(0, 1, 2, 6),
+                           ::testing::Values(1, 4), ::testing::Values(0),
+                           ::testing::Values(0, 1));
+
+class AV1DecodeMultiThreadedLSTestLarge
+    : public AV1DecodeMultiThreadedTestLarge {};
+
+TEST_P(AV1DecodeMultiThreadedLSTestLarge, MD5Match) {
+  cfg_.large_scale_tile = 1;
+  single_thread_dec_->Control(AV1_SET_TILE_MODE, 1);
+  for (int i = 0; i < kNumMultiThreadDecoders; ++i)
+    multi_thread_dec_[i]->Control(AV1_SET_TILE_MODE, 1);
+  DoTest();
+}
+
+AV1_INSTANTIATE_TEST_SUITE(AV1DecodeMultiThreadedLSTestLarge,
+                           ::testing::Values(6), ::testing::Values(6),
+                           ::testing::Values(1), ::testing::Values(0, 3),
+                           ::testing::Values(0, 1));
+
+}  // namespace
diff --git a/third_party/aom/test/decode_perf_test.cc b/third_party/aom/test/decode_perf_test.cc
new file mode 100644
index 0000000000..030035466c
--- /dev/null
+++ b/third_party/aom/test/decode_perf_test.cc
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <string>
+#include <tuple>
+
+#include "config/aom_version.h"
+
+#include "aom_ports/aom_timer.h"
+#include "common/ivfenc.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/ivf_video_source.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/webm_video_source.h"
+
+using std::make_tuple;
+
+namespace {
+
+#define VIDEO_NAME 0
+#define THREADS 1
+
+const double kUsecsInSec = 1000000.0;
+const char kNewEncodeOutputFile[] = "new_encode.ivf";
+
+/*
+ DecodePerfTest takes a tuple of filename + number of threads to decode with
+ */
+typedef std::tuple<const char *, unsigned> DecodePerfParam;
+
+// TODO(jimbankoski): Add actual test vectors here when available.
+// const DecodePerfParam kAV1DecodePerfVectors[] = {};
+
+/*
+ In order to reflect real world performance as much as possible, Perf tests
+ *DO NOT* do any correctness checks. Please run them alongside correctness
+ tests to ensure proper codec integrity. Furthermore, in this test we
+ deliberately limit the amount of system calls we make to avoid OS
+ preemption.
+
+ TODO(joshualitt) create a more detailed perf measurement test to collect
+   power/temp/min max frame decode times/etc
+ */
+
+class DecodePerfTest : public ::testing::TestWithParam<DecodePerfParam> {};
+
+TEST_P(DecodePerfTest, PerfTest) {
+  const char *const video_name = GET_PARAM(VIDEO_NAME);
+  const unsigned threads = GET_PARAM(THREADS);
+
+  libaom_test::WebMVideoSource video(video_name);
+  video.Init();
+
+  aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+  cfg.threads = threads;
+  cfg.allow_lowbitdepth = 1;
+  libaom_test::AV1Decoder decoder(cfg, 0);
+
+  aom_usec_timer t;
+  aom_usec_timer_start(&t);
+
+  for (video.Begin(); video.cxdata() != nullptr; video.Next()) {
+    decoder.DecodeFrame(video.cxdata(), video.frame_size());
+  }
+
+  aom_usec_timer_mark(&t);
+  const double elapsed_secs = double(aom_usec_timer_elapsed(&t)) / kUsecsInSec;
+  const unsigned frames = video.frame_number();
+  const double fps = double(frames) / elapsed_secs;
+
+  printf("{\n");
+  printf("\t\"type\" : \"decode_perf_test\",\n");
+  printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
+  printf("\t\"videoName\" : \"%s\",\n", video_name);
+  printf("\t\"threadCount\" : %u,\n", threads);
+  printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs);
+  printf("\t\"totalFrames\" : %u,\n", frames);
+  printf("\t\"framesPerSecond\" : %f\n", fps);
+  printf("}\n");
+}
+
+// TODO(jimbankoski): Enabled when we have actual AV1 Decode vectors.
+// INSTANTIATE_TEST_SUITE_P(AV1, DecodePerfTest,
+//                        ::testing::ValuesIn(kAV1DecodePerfVectors));
+
+class AV1NewEncodeDecodePerfTest
+    : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  AV1NewEncodeDecodePerfTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), speed_(0),
+        outfile_(nullptr), out_frames_(0) {}
+
+  ~AV1NewEncodeDecodePerfTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+
+    cfg_.g_lag_in_frames = 25;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_undershoot_pct = 50;
+    cfg_.rc_overshoot_pct = 50;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 600;
+    cfg_.rc_end_usage = AOM_VBR;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, speed_);
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 2);
+    }
+  }
+
+  void BeginPassHook(unsigned int /*pass*/) override {
+    const char *const env = getenv("LIBAOM_TEST_DATA_PATH");
+    const std::string data_path(env ? env : ".");
+    const std::string path_to_source = data_path + "/" + kNewEncodeOutputFile;
+    outfile_ = fopen(path_to_source.c_str(), "wb");
+    ASSERT_NE(outfile_, nullptr);
+  }
+
+  void EndPassHook() override {
+    if (outfile_ != nullptr) {
+      if (!fseek(outfile_, 0, SEEK_SET))
+        ivf_write_file_header(outfile_, &cfg_, AV1_FOURCC, out_frames_);
+      fclose(outfile_);
+      outfile_ = nullptr;
+    }
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    ++out_frames_;
+
+    // Write initial file header if first frame.
+    if (pkt->data.frame.pts == 0)
+      ivf_write_file_header(outfile_, &cfg_, AV1_FOURCC, out_frames_);
+
+    // Write frame header and data.
+    ivf_write_frame_header(outfile_, out_frames_, pkt->data.frame.sz);
+    ASSERT_EQ(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_),
+              pkt->data.frame.sz);
+  }
+
+  bool DoDecode() const override { return false; }
+
+  void set_speed(unsigned int speed) { speed_ = speed; }
+
+ private:
+  libaom_test::TestMode encoding_mode_;
+  uint32_t speed_;
+  FILE *outfile_;
+  uint32_t out_frames_;
+};
+
+struct EncodePerfTestVideo {
+  EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_,
+                      uint32_t bitrate_, int frames_)
+      : name(name_), width(width_), height(height_), bitrate(bitrate_),
+        frames(frames_) {}
+  const char *name;
+  uint32_t width;
+  uint32_t height;
+  uint32_t bitrate;
+  int frames;
+};
+
+const EncodePerfTestVideo kAV1EncodePerfTestVectors[] = {
+  EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
+};
+
+TEST_P(AV1NewEncodeDecodePerfTest, PerfTest) {
+  SetUp();
+
+  // TODO(JBB): Make this work by going through the set of given files.
+  const int i = 0;
+  const aom_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = kAV1EncodePerfTestVectors[i].bitrate;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  const char *video_name = kAV1EncodePerfTestVectors[i].name;
+  libaom_test::I420VideoSource video(
+      video_name, kAV1EncodePerfTestVectors[i].width,
+      kAV1EncodePerfTestVectors[i].height, timebase.den, timebase.num, 0,
+      kAV1EncodePerfTestVectors[i].frames);
+  set_speed(2);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  const uint32_t threads = 4;
+
+  libaom_test::IVFVideoSource decode_video(kNewEncodeOutputFile);
+  decode_video.Init();
+
+  aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+  cfg.threads = threads;
+  cfg.allow_lowbitdepth = 1;
+  libaom_test::AV1Decoder decoder(cfg, 0);
+
+  aom_usec_timer t;
+  aom_usec_timer_start(&t);
+
+  for (decode_video.Begin(); decode_video.cxdata() != nullptr;
+       decode_video.Next()) {
+    decoder.DecodeFrame(decode_video.cxdata(), decode_video.frame_size());
+  }
+
+  aom_usec_timer_mark(&t);
+  const double elapsed_secs =
+      static_cast<double>(aom_usec_timer_elapsed(&t)) / kUsecsInSec;
+  const unsigned decode_frames = decode_video.frame_number();
+  const double fps = static_cast<double>(decode_frames) / elapsed_secs;
+
+  printf("{\n");
+  printf("\t\"type\" : \"decode_perf_test\",\n");
+  printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
+  printf("\t\"videoName\" : \"%s\",\n", kNewEncodeOutputFile);
+  printf("\t\"threadCount\" : %u,\n", threads);
+  printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs);
+  printf("\t\"totalFrames\" : %u,\n", decode_frames);
+  printf("\t\"framesPerSecond\" : %f\n", fps);
+  printf("}\n");
+}
+
+AV1_INSTANTIATE_TEST_SUITE(AV1NewEncodeDecodePerfTest,
+                           ::testing::Values(::libaom_test::kTwoPassGood));
+}  // namespace
diff --git a/third_party/aom/test/decode_scalability_test.cc b/third_party/aom/test/decode_scalability_test.cc
new file mode 100644
index 0000000000..d66c8ec719
--- /dev/null
+++ b/third_party/aom/test/decode_scalability_test.cc
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <ostream>
+
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/ivf_video_source.h"
+#include "test/util.h"
+#include "test/video_source.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+struct ObuExtensionHeader {
+  int temporal_id;
+  int spatial_id;
+};
+
+struct DecodeParam {
+  const char *filename;
+  const ObuExtensionHeader *headers;
+  size_t num_headers;
+};
+
+std::ostream &operator<<(std::ostream &os, const DecodeParam &dp) {
+  return os << "file: " << dp.filename;
+}
+
+class DecodeScalabilityTest
+    : public ::libaom_test::DecoderTest,
+      public ::libaom_test::CodecTestWithParam<DecodeParam> {
+ protected:
+  DecodeScalabilityTest()
+      : DecoderTest(GET_PARAM(0)), headers_(GET_PARAM(1).headers),
+        num_headers_(GET_PARAM(1).num_headers) {}
+
+  ~DecodeScalabilityTest() override = default;
+
+  void PreDecodeFrameHook(const libaom_test::CompressedVideoSource &video,
+                          libaom_test::Decoder *decoder) override {
+    if (video.frame_number() == 0)
+      decoder->Control(AV1D_SET_OUTPUT_ALL_LAYERS, 1);
+  }
+
+  void DecompressedFrameHook(const aom_image_t &img,
+                             const unsigned int /*frame_number*/) override {
+    const ObuExtensionHeader &header = headers_[header_index_];
+    EXPECT_EQ(img.temporal_id, header.temporal_id);
+    EXPECT_EQ(img.spatial_id, header.spatial_id);
+    header_index_ = (header_index_ + 1) % num_headers_;
+  }
+
+  void RunTest() {
+    const DecodeParam input = GET_PARAM(1);
+    aom_codec_dec_cfg_t cfg = { 1, 0, 0, !FORCE_HIGHBITDEPTH_DECODING };
+    libaom_test::IVFVideoSource decode_video(input.filename);
+    decode_video.Init();
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&decode_video, cfg));
+  }
+
+ private:
+  const ObuExtensionHeader *const headers_;
+  const size_t num_headers_;
+  size_t header_index_ = 0;
+};
+
+TEST_P(DecodeScalabilityTest, ObuExtensionHeader) { RunTest(); }
+
+// For all test files, we have:
+//   operatingPoint = 0
+//   OperatingPointIdc = operating_point_idc[ 0 ]
+
+// av1-1-b8-01-size-16x16.ivf:
+//   operating_points_cnt_minus_1 = 0
+//   operating_point_idc[ 0 ] = 0x0
+const ObuExtensionHeader kSize16x16Headers[1] = { { 0, 0 } };
+
+// av1-1-b8-22-svc-L1T2.ivf:
+//   operating_points_cnt_minus_1 = 1
+//   operating_point_idc[ 0 ] = 0x103
+//   operating_point_idc[ 1 ] = 0x101
+const ObuExtensionHeader kL1T2Headers[2] = { { 0, 0 }, { 1, 0 } };
+
+// av1-1-b8-22-svc-L2T1.ivf:
+//   operating_points_cnt_minus_1 = 1
+//   operating_point_idc[ 0 ] = 0x301
+//   operating_point_idc[ 1 ] = 0x101
+const ObuExtensionHeader kL2T1Headers[2] = { { 0, 0 }, { 0, 1 } };
+
+// av1-1-b8-22-svc-L2T2.ivf:
+//   operating_points_cnt_minus_1 = 3
+//   operating_point_idc[ 0 ] = 0x303
+//   operating_point_idc[ 1 ] = 0x301
+//   operating_point_idc[ 2 ] = 0x103
+//   operating_point_idc[ 3 ] = 0x101
+const ObuExtensionHeader kL2T2Headers[4] = {
+  { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 }
+};
+
+const DecodeParam kAV1DecodeScalabilityTests[] = {
+  // { filename, headers, num_headers }
+  { "av1-1-b8-01-size-16x16.ivf", kSize16x16Headers, 1 },
+  { "av1-1-b8-22-svc-L1T2.ivf", kL1T2Headers, 2 },
+  { "av1-1-b8-22-svc-L2T1.ivf", kL2T1Headers, 2 },
+  { "av1-1-b8-22-svc-L2T2.ivf", kL2T2Headers, 4 },
+};
+
+AV1_INSTANTIATE_TEST_SUITE(DecodeScalabilityTest,
+                           ::testing::ValuesIn(kAV1DecodeScalabilityTests));
+
+}  // namespace
diff --git a/third_party/aom/test/decode_test_driver.cc b/third_party/aom/test/decode_test_driver.cc
new file mode 100644
index 0000000000..f44d670556
--- /dev/null
+++ b/third_party/aom/test/decode_test_driver.cc
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/register_state_check.h"
+#include "test/video_source.h"
+
+namespace libaom_test {
+
+const char kAV1Name[] = "AOMedia Project AV1 Decoder";
+
+aom_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size,
+                                    aom_codec_stream_info_t *stream_info) {
+  return aom_codec_peek_stream_info(CodecInterface(), cxdata, size,
+                                    stream_info);
+}
+
+aom_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size) {
+  return DecodeFrame(cxdata, size, nullptr);
+}
+
+aom_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size,
+                                     void *user_priv) {
+  aom_codec_err_t res_dec;
+  InitOnce();
+  API_REGISTER_STATE_CHECK(
+      res_dec = aom_codec_decode(&decoder_, cxdata, size, user_priv));
+  return res_dec;
+}
+
+bool Decoder::IsAV1() const {
+  const char *codec_name = GetDecoderName();
+  return strncmp(kAV1Name, codec_name, sizeof(kAV1Name) - 1) == 0;
+}
+
+void DecoderTest::HandlePeekResult(Decoder *const /*decoder*/,
+                                   CompressedVideoSource * /*video*/,
+                                   const aom_codec_err_t res_peek) {
+  /* The Av1 implementation of PeekStream returns an error only if the
+   * data passed to it isn't a valid Av1 chunk. */
+  ASSERT_EQ(AOM_CODEC_OK, res_peek)
+      << "Peek return failed: " << aom_codec_err_to_string(res_peek);
+}
+
+void DecoderTest::RunLoop(CompressedVideoSource *video,
+                          const aom_codec_dec_cfg_t &dec_cfg) {
+  Decoder *const decoder = codec_->CreateDecoder(dec_cfg, flags_);
+  ASSERT_NE(decoder, nullptr);
+  bool end_of_file = false;
+  bool peeked_stream = false;
+
+  // Decode frames.
+  for (video->Begin(); !::testing::Test::HasFailure() && !end_of_file;
+       video->Next()) {
+    PreDecodeFrameHook(*video, decoder);
+
+    aom_codec_stream_info_t stream_info;
+    stream_info.is_annexb = 0;
+
+    if (video->cxdata() != nullptr) {
+      if (!peeked_stream) {
+        // TODO(yaowu): PeekStream returns error for non-sequence_header_obu,
+        // therefore should only be tried once per sequence, this shall be fixed
+        // once PeekStream is updated to properly operate on other obus.
+        const aom_codec_err_t res_peek = decoder->PeekStream(
+            video->cxdata(), video->frame_size(), &stream_info);
+        HandlePeekResult(decoder, video, res_peek);
+        ASSERT_FALSE(::testing::Test::HasFailure());
+        peeked_stream = true;
+      }
+
+      aom_codec_err_t res_dec =
+          decoder->DecodeFrame(video->cxdata(), video->frame_size());
+      if (!HandleDecodeResult(res_dec, *video, decoder)) break;
+    } else {
+      // Signal end of the file to the decoder.
+      const aom_codec_err_t res_dec = decoder->DecodeFrame(nullptr, 0);
+      ASSERT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+      end_of_file = true;
+    }
+
+    DxDataIterator dec_iter = decoder->GetDxData();
+    const aom_image_t *img = nullptr;
+
+    // Get decompressed data
+    while (!::testing::Test::HasFailure() && (img = dec_iter.Next()))
+      DecompressedFrameHook(*img, video->frame_number());
+  }
+  delete decoder;
+}
+
+void DecoderTest::RunLoop(CompressedVideoSource *video) {
+  aom_codec_dec_cfg_t dec_cfg = aom_codec_dec_cfg_t();
+  RunLoop(video, dec_cfg);
+}
+
+void DecoderTest::set_cfg(const aom_codec_dec_cfg_t &dec_cfg) {
+  memcpy(&cfg_, &dec_cfg, sizeof(cfg_));
+}
+
+void DecoderTest::set_flags(const aom_codec_flags_t flags) { flags_ = flags; }
+
+}  // namespace libaom_test
diff --git a/third_party/aom/test/decode_test_driver.h b/third_party/aom/test/decode_test_driver.h
new file mode 100644
index 0000000000..311898ecf0
--- /dev/null
+++ b/third_party/aom/test/decode_test_driver.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_TEST_DECODE_TEST_DRIVER_H_
+#define AOM_TEST_DECODE_TEST_DRIVER_H_
+#include <cstring>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+
+#include "aom/aom_decoder.h"
+
+namespace libaom_test {
+
+class CodecFactory;
+class CompressedVideoSource;
+
+// Provides an object to handle decoding output
+class DxDataIterator {
+ public:
+  explicit DxDataIterator(aom_codec_ctx_t *decoder)
+      : decoder_(decoder), iter_(nullptr) {}
+
+  const aom_image_t *Next() { return aom_codec_get_frame(decoder_, &iter_); }
+
+ private:
+  aom_codec_ctx_t *decoder_;
+  aom_codec_iter_t iter_;
+};
+
+// Provides a simplified interface to manage one video decoding.
+// Similar to Encoder class, the exact services should be added
+// as more tests are added.
+class Decoder {
+ public:
+  explicit Decoder(aom_codec_dec_cfg_t cfg)
+      : cfg_(cfg), flags_(0), init_done_(false) {
+    memset(&decoder_, 0, sizeof(decoder_));
+  }
+
+  Decoder(aom_codec_dec_cfg_t cfg, const aom_codec_flags_t flag)
+      : cfg_(cfg), flags_(flag), init_done_(false) {
+    memset(&decoder_, 0, sizeof(decoder_));
+  }
+
+  virtual ~Decoder() { aom_codec_destroy(&decoder_); }
+
+  aom_codec_err_t PeekStream(const uint8_t *cxdata, size_t size,
+                             aom_codec_stream_info_t *stream_info);
+
+  aom_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size);
+
+  aom_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size,
+                              void *user_priv);
+
+  DxDataIterator GetDxData() { return DxDataIterator(&decoder_); }
+
+  void Control(int ctrl_id, int arg) { Control(ctrl_id, arg, AOM_CODEC_OK); }
+
+  void Control(int ctrl_id, const void *arg) {
+    InitOnce();
+    const aom_codec_err_t res = aom_codec_control(&decoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << DecodeError();
+  }
+
+  void Control(int ctrl_id, int arg, aom_codec_err_t expected_value) {
+    InitOnce();
+    const aom_codec_err_t res = aom_codec_control(&decoder_, ctrl_id, arg);
+    ASSERT_EQ(expected_value, res) << DecodeError();
+  }
+
+  const char *DecodeError() {
+    const char *detail = aom_codec_error_detail(&decoder_);
+    return detail ? detail : aom_codec_error(&decoder_);
+  }
+
+  // Passes the external frame buffer information to libaom.
+  aom_codec_err_t SetFrameBufferFunctions(
+      aom_get_frame_buffer_cb_fn_t cb_get,
+      aom_release_frame_buffer_cb_fn_t cb_release, void *user_priv) {
+    InitOnce();
+    return aom_codec_set_frame_buffer_functions(&decoder_, cb_get, cb_release,
+                                                user_priv);
+  }
+
+  const char *GetDecoderName() const {
+    return aom_codec_iface_name(CodecInterface());
+  }
+
+  bool IsAV1() const;
+
+  aom_codec_ctx_t *GetDecoder() { return &decoder_; }
+
+ protected:
+  virtual aom_codec_iface_t *CodecInterface() const = 0;
+
+  void InitOnce() {
+    if (!init_done_) {
+      const aom_codec_err_t res =
+          aom_codec_dec_init(&decoder_, CodecInterface(), &cfg_, flags_);
+      ASSERT_EQ(AOM_CODEC_OK, res) << DecodeError();
+      init_done_ = true;
+    }
+  }
+
+  aom_codec_ctx_t decoder_;
+  aom_codec_dec_cfg_t cfg_;
+  aom_codec_flags_t flags_;
+  bool init_done_;
+};
+
+// Common test functionality for all Decoder tests.
+class DecoderTest {
+ public:
+  // Main decoding loop
+  virtual void RunLoop(CompressedVideoSource *video);
+  virtual void RunLoop(CompressedVideoSource *video,
+                       const aom_codec_dec_cfg_t &dec_cfg);
+
+  virtual void set_cfg(const aom_codec_dec_cfg_t &dec_cfg);
+  virtual void set_flags(const aom_codec_flags_t flags);
+
+  // Hook to be called before decompressing every frame.
+  virtual void PreDecodeFrameHook(const CompressedVideoSource & /*video*/,
+                                  Decoder * /*decoder*/) {}
+
+  // Hook to be called to handle decode result. Return true to continue.
+  virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                                  const CompressedVideoSource & /*video*/,
+                                  Decoder *decoder) {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  // Hook to be called on every decompressed frame.
+  virtual void DecompressedFrameHook(const aom_image_t & /*img*/,
+                                     const unsigned int /*frame_number*/) {}
+
+  // Hook to be called on peek result
+  virtual void HandlePeekResult(Decoder *const decoder,
+                                CompressedVideoSource *video,
+                                const aom_codec_err_t res_peek);
+
+ protected:
+  explicit DecoderTest(const CodecFactory *codec)
+      : codec_(codec), cfg_(), flags_(0) {}
+
+  virtual ~DecoderTest() = default;
+
+  const CodecFactory *codec_;
+  aom_codec_dec_cfg_t cfg_;
+  aom_codec_flags_t flags_;
+};
+
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_DECODE_TEST_DRIVER_H_
diff --git a/third_party/aom/test/decode_to_md5.sh b/third_party/aom/test/decode_to_md5.sh
new file mode 100755
index 0000000000..214755f216
--- /dev/null
+++ b/third_party/aom/test/decode_to_md5.sh
@@ -0,0 +1,77 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom decode_to_md5 example. To add new tests to this
+## file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to decode_to_md5_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+#   $AV1_IVF_FILE is required.
+decode_to_md5_verify_environment() {
+  if [ "$(av1_encode_available)" != "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then
+    return 1
+  fi
+}
+
+# Runs decode_to_md5 on $1 and captures the md5 sum for the final frame. $2 is
+# interpreted as codec name and used solely to name the output file. $3 is the
+# expected md5 sum: It must match that of the final frame.
+decode_to_md5() {
+  local decoder="$(aom_tool_path decode_to_md5)"
+  local input_file="$1"
+  local codec="$2"
+  local expected_md5="$3"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/decode_to_md5_${codec}"
+
+  if [ ! -x "${decoder}" ]; then
+    elog "${decoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \
+      ${devnull} || return 1
+
+  [ -e "${output_file}" ] || return 1
+
+  local md5_last_frame="$(tail -n1 "${output_file}" | awk '{print $1}')"
+  local actual_md5="$(echo "${md5_last_frame}" | awk '{print $1}')"
+  if [ "${actual_md5}" = "${expected_md5}" ]; then
+    return 0
+  else
+    elog "MD5 mismatch:"
+    elog "Expected: ${expected_md5}"
+    elog "Actual: ${actual_md5}"
+    return 1
+  fi
+}
+
+DISABLED_decode_to_md5_av1() {
+  # expected MD5 sum for the last frame.
+  local expected_md5="567dd6d4b7a7170edddbf58bbcc3aff1"
+  local file="${AV1_IVF_FILE}"
+
+  # TODO(urvang): Check in the encoded file (like libvpx does) to avoid
+  # encoding every time.
+  if [ "$(av1_decode_available)" = "yes" ]; then
+    if [ ! -e "${AV1_IVF_FILE}" ]; then
+      file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf"
+      encode_yuv_raw_input_av1 "${file}" --ivf || return 1
+    fi
+    decode_to_md5 "${file}" "av1" "${expected_md5}"
+  fi
+}
+
+# TODO(tomfinegan): Enable when the bitstream stabilizes.
+decode_to_md5_tests="DISABLED_decode_to_md5_av1"
+
+run_tests decode_to_md5_verify_environment "${decode_to_md5_tests}"
diff --git a/third_party/aom/test/decode_with_drops.sh b/third_party/aom/test/decode_with_drops.sh
new file mode 100755
index 0000000000..1fc13ced35
--- /dev/null
+++ b/third_party/aom/test/decode_with_drops.sh
@@ -0,0 +1,68 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom decode_with_drops example. To add new tests to
+## this file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to decode_with_drops_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+#   $AV1_IVF_FILE is required.
+decode_with_drops_verify_environment() {
+  if [ "$(av1_encode_available)" != "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then
+    return 1
+  fi
+}
+
+# Runs decode_with_drops on $1, $2 is interpreted as codec name and used solely
+# to name the output file. $3 is the drop mode, and is passed directly to
+# decode_with_drops.
+decode_with_drops() {
+  local decoder="$(aom_tool_path decode_with_drops)"
+  local input_file="$1"
+  local codec="$2"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/decode_with_drops_${codec}"
+  local drop_mode="$3"
+
+  if [ ! -x "${decoder}" ]; then
+    elog "${decoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \
+      "${drop_mode}" ${devnull} || return 1
+
+  [ -e "${output_file}" ] || return 1
+}
+
+
+# Decodes $AV1_IVF_FILE while dropping frames, twice: once in sequence mode,
+# and once in pattern mode.
+DISABLED_decode_with_drops_av1() {
+  if [ "$(av1_decode_available)" = "yes" ]; then
+    local file="${AV1_IVF_FILE}"
+    if [ ! -e "${AV1_IVF_FILE}" ]; then
+      file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf"
+      encode_yuv_raw_input_av1 "${file}" --ivf || return 1
+    fi
+    # Drop frames 3 and 4.
+    decode_with_drops "${file}" "av1" "3-4" || return 1
+
+    # Test pattern mode: Drop 3 of every 4 frames.
+    decode_with_drops "${file}" "av1" "3/4" || return 1
+  fi
+}
+
+# TODO(yaowu): Disable this test as trailing_bit check is expected to fail
+decode_with_drops_tests="DISABLED_decode_with_drops_av1"
+
+run_tests decode_with_drops_verify_environment "${decode_with_drops_tests}"
diff --git a/third_party/aom/test/deltaq_mode_test.cc b/third_party/aom/test/deltaq_mode_test.cc
new file mode 100644
index 0000000000..5960d276d1
--- /dev/null
+++ b/third_party/aom/test/deltaq_mode_test.cc
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2023, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+#include "aom/aomcx.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_encoder.h"
+#include "aom/aom_image.h"
+#include "config/aom_config.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+/*
+   Reproduces https://crbug.com/aomedia/3376. Emulates the command line:
+
+   ./aomenc --cpu-used=6 --threads=10 --cq-level=14 --passes=1 --limit=1 \
+     --lag-in-frames=0 --end-usage=q --deltaq-mode=3 --min-q=0 --max-q=63 \
+     -o output.av1 niklas_1280_720_30.y4m
+*/
+TEST(DeltaqModeTest, DeltaqMode3MultiThread) {
+  constexpr int kWidth = 1280;
+  constexpr int kHeight = 720;
+  // Dummy buffer of neutral gray samples.
+  constexpr size_t kBufferSize = kWidth * kHeight + kWidth * kHeight / 2;
+  std::vector<unsigned char> buffer(kBufferSize,
+                                    static_cast<unsigned char>(128));
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1,
+                               buffer.data()));
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY),
+            AOM_CODEC_OK);
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  cfg.g_threads = 10;
+  cfg.rc_end_usage = AOM_Q;
+  cfg.g_profile = 0;
+  cfg.g_bit_depth = AOM_BITS_8;
+  cfg.g_input_bit_depth = 8;
+  cfg.g_lag_in_frames = 0;
+  cfg.rc_min_quantizer = 0;
+  cfg.rc_max_quantizer = 63;
+  cfg.g_pass = AOM_RC_ONE_PASS;
+  cfg.g_limit = 1;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 6), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 14), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_DELTAQ_MODE, 3), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_set_option(&enc, "passes", "1"), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_STUDIO_RANGE),
+            AOM_CODEC_OK);
+
+  EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK);
+  aom_codec_iter_t iter = nullptr;
+  const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0x1f0011.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Flush encoder
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0));
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+// The implementation of multi-threading for deltaq-mode=3 in allintra
+// mode is based on row multi-threading.
+// The test ensures that When row mt is turned off,
+// deltaq-mode = 3 can still properly encode and decode.
+TEST(DeltaqModeTest, DeltaqMode3MultiThreadNoRowMT) {
+  constexpr int kWidth = 1280;
+  constexpr int kHeight = 720;
+  // Dummy buffer of neutral gray samples.
+  constexpr size_t kBufferSize = kWidth * kHeight + kWidth * kHeight / 2;
+  std::vector<unsigned char> buffer(kBufferSize,
+                                    static_cast<unsigned char>(128));
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1,
+                               buffer.data()));
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY),
+            AOM_CODEC_OK);
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  cfg.g_threads = 10;
+  cfg.rc_end_usage = AOM_Q;
+  cfg.g_profile = 0;
+  cfg.g_bit_depth = AOM_BITS_8;
+  cfg.g_input_bit_depth = 8;
+  cfg.g_lag_in_frames = 0;
+  cfg.rc_min_quantizer = 0;
+  cfg.rc_max_quantizer = 63;
+  cfg.g_pass = AOM_RC_ONE_PASS;
+  cfg.g_limit = 1;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_ROW_MT, 0), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 6), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 14), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_DELTAQ_MODE, 3), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_set_option(&enc, "passes", "1"), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_STUDIO_RANGE),
+            AOM_CODEC_OK);
+
+  EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK);
+  aom_codec_iter_t iter = nullptr;
+  const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0x1f0011.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Flush encoder
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0));
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+// 10-bit version of the DeltaqMode3MultiThread test.
+TEST(DeltaqModeTest, DeltaqMode3MultiThreadHighbd) {
+  constexpr int kWidth = 1280;
+  constexpr int kHeight = 720;
+  // Dummy buffer of 10-bit neutral gray samples.
+  constexpr size_t kBufferSize = kWidth * kHeight + kWidth * kHeight / 2;
+  std::vector<uint16_t> buffer(kBufferSize, 512);
+
+  aom_image_t img;
+  EXPECT_EQ(&img,
+            aom_img_wrap(&img, AOM_IMG_FMT_I42016, kWidth, kHeight, 1,
+                         reinterpret_cast<unsigned char *>(buffer.data())));
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY),
+            AOM_CODEC_OK);
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  cfg.g_threads = 10;
+  cfg.rc_end_usage = AOM_Q;
+  cfg.g_profile = 0;
+  cfg.g_bit_depth = AOM_BITS_10;
+  cfg.g_input_bit_depth = 10;
+  cfg.g_lag_in_frames = 0;
+  cfg.rc_min_quantizer = 0;
+  cfg.rc_max_quantizer = 63;
+  cfg.g_pass = AOM_RC_ONE_PASS;
+  cfg.g_limit = 1;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 6), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 14), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_DELTAQ_MODE, 3), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_set_option(&enc, "passes", "1"), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_STUDIO_RANGE),
+            AOM_CODEC_OK);
+
+  EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK);
+  aom_codec_iter_t iter = nullptr;
+  const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0x1f0011.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Flush encoder
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0));
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+}  // namespace
diff --git a/third_party/aom/test/disflow_test.cc b/third_party/aom/test/disflow_test.cc
new file mode 100644
index 0000000000..124c9a96c7
--- /dev/null
+++ b/third_party/aom/test/disflow_test.cc
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2023, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "aom_dsp/flow_estimation/disflow.h"
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+
+using ComputeFlowAtPointFunc = void (*)(const uint8_t *src, const uint8_t *ref,
+                                        int x, int y, int width, int height,
+                                        int stride, double *u, double *v);
+
+class ComputeFlowTest
+    : public ::testing::TestWithParam<ComputeFlowAtPointFunc> {
+ public:
+  ComputeFlowTest()
+      : target_func_(GetParam()),
+        rnd_(libaom_test::ACMRandom::DeterministicSeed()) {}
+
+ protected:
+  void RunCheckOutput(int run_times);
+  ComputeFlowAtPointFunc target_func_;
+
+  libaom_test::ACMRandom rnd_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ComputeFlowTest);
+
+void ComputeFlowTest::RunCheckOutput(int run_times) {
+  constexpr int kWidth = 352;
+  constexpr int kHeight = 288;
+
+  ::libaom_test::YUVVideoSource video("bus_352x288_420_f20_b8.yuv",
+                                      AOM_IMG_FMT_I420, kWidth, kHeight, 30, 1,
+                                      0, 2);
+  // Use Y (Luminance) plane.
+  video.Begin();
+  uint8_t *src = video.img()->planes[0];
+  ASSERT_NE(src, nullptr);
+  video.Next();
+  uint8_t *ref = video.img()->planes[0];
+  ASSERT_NE(ref, nullptr);
+
+  // Pick a random value between -5 and 5. The range was chosen arbitrarily as
+  // u and v can take any kind of value in practise, but it shouldn't change the
+  // outcome of the tests.
+  const double u_rand = (static_cast<double>(rnd_.Rand8()) / 255) * 10 - 5;
+  double u_ref = u_rand;
+  double u_test = u_rand;
+
+  const double v_rand = (static_cast<double>(rnd_.Rand8()) / 255) * 10 - 5;
+  double v_ref = v_rand;
+  double v_test = v_rand;
+
+  // Pick a random point in the frame. If the frame is 352x288, that means we
+  // can call the function on all values of x comprised between 8 and 344, and
+  // all values of y comprised between 8 and 280.
+  const int x = rnd_((kWidth - 8) - 8 + 1) + 8;
+  const int y = rnd_((kHeight - 8) - 8 + 1) + 8;
+
+  aom_usec_timer ref_timer, test_timer;
+
+  aom_compute_flow_at_point_c(src, ref, x, y, kWidth, kHeight, kWidth, &u_ref,
+                              &v_ref);
+
+  target_func_(src, ref, x, y, kWidth, kHeight, kWidth, &u_test, &v_test);
+
+  if (run_times > 1) {
+    aom_usec_timer_start(&ref_timer);
+    for (int i = 0; i < run_times; ++i) {
+      aom_compute_flow_at_point_c(src, ref, x, y, kWidth, kHeight, kWidth,
+                                  &u_ref, &v_ref);
+    }
+    aom_usec_timer_mark(&ref_timer);
+    const double elapsed_time_c =
+        static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
+
+    aom_usec_timer_start(&test_timer);
+    for (int i = 0; i < run_times; ++i) {
+      target_func_(src, ref, x, y, kWidth, kHeight, kWidth, &u_test, &v_test);
+    }
+    aom_usec_timer_mark(&test_timer);
+    const double elapsed_time_simd =
+        static_cast<double>(aom_usec_timer_elapsed(&test_timer));
+
+    printf("c_time=%fns \t simd_time=%fns \t speedup=%.2f\n", elapsed_time_c,
+           elapsed_time_simd, (elapsed_time_c / elapsed_time_simd));
+  } else {
+    ASSERT_EQ(u_ref, u_test);
+    ASSERT_EQ(v_ref, v_test);
+  }
+}
+
+TEST_P(ComputeFlowTest, CheckOutput) { RunCheckOutput(1); }
+
+TEST_P(ComputeFlowTest, DISABLED_Speed) { RunCheckOutput(10000000); }
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE4_1, ComputeFlowTest,
+                         ::testing::Values(aom_compute_flow_at_point_sse4_1));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, ComputeFlowTest,
+                         ::testing::Values(aom_compute_flow_at_point_neon));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/divu_small_test.cc b/third_party/aom/test/divu_small_test.cc
new file mode 100644
index 0000000000..496fbc1f8e
--- /dev/null
+++ b/third_party/aom/test/divu_small_test.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdlib.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "aom_dsp/odintrin.h"
+
+using libaom_test::ACMRandom;
+
+TEST(DivuSmallTest, TestDIVUuptoMAX) {
+  for (int d = 1; d <= OD_DIVU_DMAX; d++) {
+    for (uint32_t x = 1; x <= 1000000; x++) {
+      GTEST_ASSERT_EQ(x / d, OD_DIVU_SMALL(x, d))
+          << "x=" << x << " d=" << d << " x/d=" << (x / d)
+          << " != " << OD_DIVU_SMALL(x, d);
+    }
+  }
+}
+
+TEST(DivuSmallTest, TestDIVUrandI31) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  for (int d = 1; d < OD_DIVU_DMAX; d++) {
+    for (int i = 0; i < 1000000; i++) {
+      uint32_t x = rnd.Rand31();
+      GTEST_ASSERT_EQ(x / d, OD_DIVU_SMALL(x, d))
+          << "x=" << x << " d=" << d << " x/d=" << (x / d)
+          << " != " << OD_DIVU_SMALL(x, d);
+    }
+  }
+}
diff --git a/third_party/aom/test/dr_prediction_test.cc b/third_party/aom/test/dr_prediction_test.cc
new file mode 100644
index 0000000000..3865810e9b
--- /dev/null
+++ b/third_party/aom/test/dr_prediction_test.cc
@@ -0,0 +1,542 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/common/blockd.h"
+#include "av1/common/pred_common.h"
+#include "av1/common/reconintra.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+
+const int kZ1Start = 0;
+const int kZ2Start = 90;
+const int kZ3Start = 180;
+
+const TX_SIZE kTxSize[] = { TX_4X4,   TX_8X8,   TX_16X16, TX_32X32, TX_64X64,
+                            TX_4X8,   TX_8X4,   TX_8X16,  TX_16X8,  TX_16X32,
+                            TX_32X16, TX_32X64, TX_64X32, TX_4X16,  TX_16X4,
+                            TX_8X32,  TX_32X8,  TX_16X64, TX_64X16 };
+
+const char *const kTxSizeStrings[] = {
+  "TX_4X4",   "TX_8X8",   "TX_16X16", "TX_32X32", "TX_64X64",
+  "TX_4X8",   "TX_8X4",   "TX_8X16",  "TX_16X8",  "TX_16X32",
+  "TX_32X16", "TX_32X64", "TX_64X32", "TX_4X16",  "TX_16X4",
+  "TX_8X32",  "TX_32X8",  "TX_16X64", "TX_64X16"
+};
+
+using libaom_test::ACMRandom;
+
+typedef void (*DrPred_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                           const uint16_t *above, const uint16_t *left,
+                           int upsample_above, int upsample_left, int dx,
+                           int dy, int bd);
+
+typedef void (*DrPred)(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint8_t *above, const uint8_t *left,
+                       int upsample_above, int upsample_left, int dx, int dy,
+                       int bd);
+
+typedef void (*Z1_Lbd)(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint8_t *above, const uint8_t *left,
+                       int upsample_above, int dx, int dy);
+template <Z1_Lbd fn>
+void z1_wrapper(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                const uint8_t *above, const uint8_t *left, int upsample_above,
+                int upsample_left, int dx, int dy, int bd) {
+  (void)bd;
+  (void)upsample_left;
+  fn(dst, stride, bw, bh, above, left, upsample_above, dx, dy);
+}
+
+typedef void (*Z2_Lbd)(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint8_t *above, const uint8_t *left,
+                       int upsample_above, int upsample_left, int dx, int dy);
+template <Z2_Lbd fn>
+void z2_wrapper(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                const uint8_t *above, const uint8_t *left, int upsample_above,
+                int upsample_left, int dx, int dy, int bd) {
+  (void)bd;
+  (void)upsample_left;
+  fn(dst, stride, bw, bh, above, left, upsample_above, upsample_left, dx, dy);
+}
+
+typedef void (*Z3_Lbd)(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint8_t *above, const uint8_t *left,
+                       int upsample_left, int dx, int dy);
+template <Z3_Lbd fn>
+void z3_wrapper(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                const uint8_t *above, const uint8_t *left, int upsample_above,
+                int upsample_left, int dx, int dy, int bd) {
+  (void)bd;
+  (void)upsample_above;
+  fn(dst, stride, bw, bh, above, left, upsample_left, dx, dy);
+}
+
+typedef void (*Z1_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint16_t *above, const uint16_t *left,
+                       int upsample_above, int dx, int dy, int bd);
+template <Z1_Hbd fn>
+void z1_wrapper_hbd(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                    const uint16_t *above, const uint16_t *left,
+                    int upsample_above, int upsample_left, int dx, int dy,
+                    int bd) {
+  (void)bd;
+  (void)upsample_left;
+  fn(dst, stride, bw, bh, above, left, upsample_above, dx, dy, bd);
+}
+
+typedef void (*Z2_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint16_t *above, const uint16_t *left,
+                       int upsample_above, int upsample_left, int dx, int dy,
+                       int bd);
+template <Z2_Hbd fn>
+void z2_wrapper_hbd(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                    const uint16_t *above, const uint16_t *left,
+                    int upsample_above, int upsample_left, int dx, int dy,
+                    int bd) {
+  (void)bd;
+  fn(dst, stride, bw, bh, above, left, upsample_above, upsample_left, dx, dy,
+     bd);
+}
+
+typedef void (*Z3_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint16_t *above, const uint16_t *left,
+                       int upsample_left, int dx, int dy, int bd);
+template <Z3_Hbd fn>
+void z3_wrapper_hbd(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                    const uint16_t *above, const uint16_t *left,
+                    int upsample_above, int upsample_left, int dx, int dy,
+                    int bd) {
+  (void)bd;
+  (void)upsample_above;
+  fn(dst, stride, bw, bh, above, left, upsample_left, dx, dy, bd);
+}
+
+template <typename FuncType>
+struct DrPredFunc {
+  DrPredFunc(FuncType pred = nullptr, FuncType tst = nullptr,
+             int bit_depth_value = 0, int start_angle_value = 0)
+      : ref_fn(pred), tst_fn(tst), bit_depth(bit_depth_value),
+        start_angle(start_angle_value) {}
+
+  FuncType ref_fn;
+  FuncType tst_fn;
+  int bit_depth;
+  int start_angle;
+};
+
+template <typename Pixel, typename FuncType>
+class DrPredTest : public ::testing::TestWithParam<DrPredFunc<FuncType> > {
+ protected:
+  static const int kMaxNumTests = 10000;
+  static const int kIterations = 10;
+  static const int kDstStride = 64;
+  static const int kDstSize = kDstStride * kDstStride;
+  static const int kOffset = 16;
+  static const int kBufSize = ((2 * MAX_TX_SIZE) << 1) + 16;
+
+  DrPredTest()
+      : enable_upsample_(0), upsample_above_(0), upsample_left_(0), bw_(0),
+        bh_(0), dx_(1), dy_(1), bd_(8), txsize_(TX_4X4) {
+    params_ = this->GetParam();
+    start_angle_ = params_.start_angle;
+    stop_angle_ = start_angle_ + 90;
+
+    dst_ref_ = &dst_ref_data_[0];
+    dst_tst_ = &dst_tst_data_[0];
+    dst_stride_ = kDstStride;
+    above_ = &above_data_[kOffset];
+    left_ = &left_data_[kOffset];
+
+    for (int i = 0; i < kBufSize; ++i) {
+      above_data_[i] = rng_.Rand8();
+      left_data_[i] = rng_.Rand8();
+    }
+
+    for (int i = 0; i < kDstSize; ++i) {
+      dst_ref_[i] = 0;
+      dst_tst_[i] = 0;
+    }
+  }
+
+  ~DrPredTest() override = default;
+
+  void Predict(bool speedtest, int tx) {
+    const int kNumTests = speedtest ? kMaxNumTests : 1;
+    aom_usec_timer timer;
+    int tst_time = 0;
+
+    bd_ = params_.bit_depth;
+
+    aom_usec_timer_start(&timer);
+    for (int k = 0; k < kNumTests; ++k) {
+      params_.ref_fn(dst_ref_, dst_stride_, bw_, bh_, above_, left_,
+                     upsample_above_, upsample_left_, dx_, dy_, bd_);
+    }
+    aom_usec_timer_mark(&timer);
+    const int ref_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+
+    if (params_.tst_fn) {
+      aom_usec_timer_start(&timer);
+      for (int k = 0; k < kNumTests; ++k) {
+        API_REGISTER_STATE_CHECK(params_.tst_fn(dst_tst_, dst_stride_, bw_, bh_,
+                                                above_, left_, upsample_above_,
+                                                upsample_left_, dx_, dy_, bd_));
+      }
+      aom_usec_timer_mark(&timer);
+      tst_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    } else {
+      for (int i = 0; i < kDstSize; ++i) {
+        dst_ref_[i] = dst_tst_[i];
+      }
+    }
+
+    OutputTimes(kNumTests, ref_time, tst_time, tx);
+  }
+
+  void RunTest(bool speedtest, bool needsaturation, int p_angle) {
+    bd_ = params_.bit_depth;
+
+    if (needsaturation) {
+      for (int i = 0; i < kBufSize; ++i) {
+        above_data_[i] = left_data_[i] = (1 << bd_) - 1;
+      }
+    }
+    for (int tx = 0; tx < TX_SIZES_ALL; ++tx) {
+      if (params_.tst_fn == nullptr) {
+        for (int i = 0; i < kDstSize; ++i) {
+          dst_tst_[i] = (1 << bd_) - 1;
+          dst_ref_[i] = (1 << bd_) - 1;
+        }
+      } else {
+        for (int i = 0; i < kDstSize; ++i) {
+          dst_ref_[i] = 0;
+          dst_tst_[i] = 0;
+        }
+      }
+
+      bw_ = tx_size_wide[kTxSize[tx]];
+      bh_ = tx_size_high[kTxSize[tx]];
+
+      if (enable_upsample_) {
+        upsample_above_ =
+            av1_use_intra_edge_upsample(bw_, bh_, p_angle - 90, 0);
+        upsample_left_ =
+            av1_use_intra_edge_upsample(bw_, bh_, p_angle - 180, 0);
+      } else {
+        upsample_above_ = upsample_left_ = 0;
+      }
+
+      Predict(speedtest, tx);
+
+      for (int r = 0; r < bh_; ++r) {
+        for (int c = 0; c < bw_; ++c) {
+          ASSERT_EQ(dst_ref_[r * dst_stride_ + c],
+                    dst_tst_[r * dst_stride_ + c])
+              << bw_ << "x" << bh_ << " r: " << r << " c: " << c
+              << " dx: " << dx_ << " dy: " << dy_
+              << " upsample_above: " << upsample_above_
+              << " upsample_left: " << upsample_left_;
+        }
+      }
+    }
+  }
+
+  void OutputTimes(int num_tests, int ref_time, int tst_time, int tx) {
+    if (num_tests > 1) {
+      if (params_.tst_fn) {
+        const float x = static_cast<float>(ref_time) / tst_time;
+        printf("\t[%8s] :: ref time %6d, tst time %6d     %3.2f\n",
+               kTxSizeStrings[tx], ref_time, tst_time, x);
+      } else {
+        printf("\t[%8s] :: ref time %6d\n", kTxSizeStrings[tx], ref_time);
+      }
+    }
+  }
+
+  void RundrPredTest(const int speed) {
+    if (params_.tst_fn == nullptr) return;
+    const int angles[] = { 3, 45, 87 };
+    const int start_angle = speed ? 0 : start_angle_;
+    const int stop_angle = speed ? 3 : stop_angle_;
+    for (enable_upsample_ = 0; enable_upsample_ < 2; ++enable_upsample_) {
+      for (int i = start_angle; i < stop_angle; ++i) {
+        const int angle = speed ? angles[i] + start_angle_ : i;
+        dx_ = av1_get_dx(angle);
+        dy_ = av1_get_dy(angle);
+        if (speed) {
+          printf("enable_upsample: %d angle: %d ~~~~~~~~~~~~~~~\n",
+                 enable_upsample_, angle);
+        }
+        if (dx_ && dy_) RunTest(speed, false, angle);
+      }
+    }
+  }
+
+  Pixel dst_ref_data_[kDstSize];
+  Pixel dst_tst_data_[kDstSize];
+
+  Pixel left_data_[kBufSize];
+  Pixel dummy_data_[kBufSize];
+  Pixel above_data_[kBufSize];
+
+  Pixel *dst_ref_;
+  Pixel *dst_tst_;
+  Pixel *above_;
+  Pixel *left_;
+  int dst_stride_;
+
+  int enable_upsample_;
+  int upsample_above_;
+  int upsample_left_;
+  int bw_;
+  int bh_;
+  int dx_;
+  int dy_;
+  int bd_;
+  TX_SIZE txsize_;
+
+  int start_angle_;
+  int stop_angle_;
+
+  ACMRandom rng_;
+
+  DrPredFunc<FuncType> params_;
+};
+
+class LowbdDrPredTest : public DrPredTest<uint8_t, DrPred> {};
+
+TEST_P(LowbdDrPredTest, SaturatedValues) {
+  for (enable_upsample_ = 0; enable_upsample_ < 2; ++enable_upsample_) {
+    for (int angle = start_angle_; angle < stop_angle_; ++angle) {
+      dx_ = av1_get_dx(angle);
+      dy_ = av1_get_dy(angle);
+      if (dx_ && dy_) RunTest(false, true, angle);
+    }
+  }
+}
+
+using std::make_tuple;
+
+INSTANTIATE_TEST_SUITE_P(
+    C, LowbdDrPredTest,
+    ::testing::Values(DrPredFunc<DrPred>(&z1_wrapper<av1_dr_prediction_z1_c>,
+                                         nullptr, AOM_BITS_8, kZ1Start),
+                      DrPredFunc<DrPred>(&z2_wrapper<av1_dr_prediction_z2_c>,
+                                         nullptr, AOM_BITS_8, kZ2Start),
+                      DrPredFunc<DrPred>(&z3_wrapper<av1_dr_prediction_z3_c>,
+                                         nullptr, AOM_BITS_8, kZ3Start)));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+class HighbdDrPredTest : public DrPredTest<uint16_t, DrPred_Hbd> {};
+
+TEST_P(HighbdDrPredTest, SaturatedValues) {
+  for (enable_upsample_ = 0; enable_upsample_ < 2; ++enable_upsample_) {
+    for (int angle = start_angle_; angle < stop_angle_; ++angle) {
+      dx_ = av1_get_dx(angle);
+      dy_ = av1_get_dy(angle);
+      if (dx_ && dy_) RunTest(false, true, angle);
+    }
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    C, HighbdDrPredTest,
+    ::testing::Values(
+        DrPredFunc<DrPred_Hbd>(&z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+                               nullptr, AOM_BITS_8, kZ1Start),
+        DrPredFunc<DrPred_Hbd>(&z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+                               nullptr, AOM_BITS_10, kZ1Start),
+        DrPredFunc<DrPred_Hbd>(&z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+                               nullptr, AOM_BITS_12, kZ1Start),
+        DrPredFunc<DrPred_Hbd>(&z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
+                               nullptr, AOM_BITS_8, kZ2Start),
+        DrPredFunc<DrPred_Hbd>(&z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
+                               nullptr, AOM_BITS_10, kZ2Start),
+        DrPredFunc<DrPred_Hbd>(&z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
+                               nullptr, AOM_BITS_12, kZ2Start),
+        DrPredFunc<DrPred_Hbd>(&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+                               nullptr, AOM_BITS_8, kZ3Start),
+        DrPredFunc<DrPred_Hbd>(&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+                               nullptr, AOM_BITS_10, kZ3Start),
+        DrPredFunc<DrPred_Hbd>(&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+                               nullptr, AOM_BITS_12, kZ3Start)));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+TEST_P(LowbdDrPredTest, OperationCheck) { RundrPredTest(0); }
+
+TEST_P(LowbdDrPredTest, DISABLED_Speed) { RundrPredTest(1); }
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST_P(HighbdDrPredTest, OperationCheck) {
+  if (params_.tst_fn == nullptr) return;
+  for (enable_upsample_ = 0; enable_upsample_ < 2; ++enable_upsample_) {
+    for (int angle = start_angle_; angle < stop_angle_; angle++) {
+      dx_ = av1_get_dx(angle);
+      dy_ = av1_get_dy(angle);
+      if (dx_ && dy_) RunTest(false, false, angle);
+    }
+  }
+}
+
+TEST_P(HighbdDrPredTest, DISABLED_Speed) {
+  const int angles[] = { 3, 45, 87 };
+  for (enable_upsample_ = 0; enable_upsample_ < 2; ++enable_upsample_) {
+    for (int i = 0; i < 3; ++i) {
+      int angle = angles[i] + start_angle_;
+      dx_ = av1_get_dx(angle);
+      dy_ = av1_get_dy(angle);
+      printf("enable_upsample: %d angle: %d ~~~~~~~~~~~~~~~\n",
+             enable_upsample_, angle);
+      if (dx_ && dy_) RunTest(true, false, angle);
+    }
+  }
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, LowbdDrPredTest,
+    ::testing::Values(
+        DrPredFunc<DrPred>(&z1_wrapper<av1_dr_prediction_z1_c>,
+                           &z1_wrapper<av1_dr_prediction_z1_sse4_1>, AOM_BITS_8,
+                           kZ1Start),
+        DrPredFunc<DrPred>(&z2_wrapper<av1_dr_prediction_z2_c>,
+                           &z2_wrapper<av1_dr_prediction_z2_sse4_1>, AOM_BITS_8,
+                           kZ2Start),
+        DrPredFunc<DrPred>(&z3_wrapper<av1_dr_prediction_z3_c>,
+                           &z3_wrapper<av1_dr_prediction_z3_sse4_1>, AOM_BITS_8,
+                           kZ3Start)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, LowbdDrPredTest,
+    ::testing::Values(DrPredFunc<DrPred>(&z1_wrapper<av1_dr_prediction_z1_c>,
+                                         &z1_wrapper<av1_dr_prediction_z1_avx2>,
+                                         AOM_BITS_8, kZ1Start),
+                      DrPredFunc<DrPred>(&z2_wrapper<av1_dr_prediction_z2_c>,
+                                         &z2_wrapper<av1_dr_prediction_z2_avx2>,
+                                         AOM_BITS_8, kZ2Start),
+                      DrPredFunc<DrPred>(&z3_wrapper<av1_dr_prediction_z3_c>,
+                                         &z3_wrapper<av1_dr_prediction_z3_avx2>,
+                                         AOM_BITS_8, kZ3Start)));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, HighbdDrPredTest,
+    ::testing::Values(DrPredFunc<DrPred_Hbd>(
+                          &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+                          &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_avx2>,
+                          AOM_BITS_8, kZ1Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+                          &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_avx2>,
+                          AOM_BITS_10, kZ1Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+                          &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_avx2>,
+                          AOM_BITS_12, kZ1Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
+                          &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_avx2>,
+                          AOM_BITS_8, kZ2Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
+                          &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_avx2>,
+                          AOM_BITS_10, kZ2Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
+                          &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_avx2>,
+                          AOM_BITS_12, kZ2Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+                          &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_avx2>,
+                          AOM_BITS_8, kZ3Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+                          &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_avx2>,
+                          AOM_BITS_10, kZ3Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+                          &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_avx2>,
+                          AOM_BITS_12, kZ3Start)));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, LowbdDrPredTest,
+    ::testing::Values(DrPredFunc<DrPred>(&z1_wrapper<av1_dr_prediction_z1_c>,
+                                         &z1_wrapper<av1_dr_prediction_z1_neon>,
+                                         AOM_BITS_8, kZ1Start),
+                      DrPredFunc<DrPred>(&z2_wrapper<av1_dr_prediction_z2_c>,
+                                         &z2_wrapper<av1_dr_prediction_z2_neon>,
+                                         AOM_BITS_8, kZ2Start),
+                      DrPredFunc<DrPred>(&z3_wrapper<av1_dr_prediction_z3_c>,
+                                         &z3_wrapper<av1_dr_prediction_z3_neon>,
+                                         AOM_BITS_8, kZ3Start)));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(
+    NEON, HighbdDrPredTest,
+    ::testing::Values(DrPredFunc<DrPred_Hbd>(
+                          &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+                          &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_neon>,
+                          AOM_BITS_8, kZ1Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+                          &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_neon>,
+                          AOM_BITS_10, kZ1Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+                          &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_neon>,
+                          AOM_BITS_12, kZ1Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
+                          &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_neon>,
+                          AOM_BITS_8, kZ2Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
+                          &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_neon>,
+                          AOM_BITS_10, kZ2Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
+                          &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_neon>,
+                          AOM_BITS_12, kZ2Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+                          &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_neon>,
+                          AOM_BITS_8, kZ3Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+                          &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_neon>,
+                          AOM_BITS_10, kZ3Start),
+                      DrPredFunc<DrPred_Hbd>(
+                          &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+                          &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_neon>,
+                          AOM_BITS_12, kZ3Start)));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+#endif  // HAVE_NEON
+
+}  // namespace
diff --git a/third_party/aom/test/dropframe_encode_test.cc b/third_party/aom/test/dropframe_encode_test.cc
new file mode 100644
index 0000000000..4a54c0b95c
--- /dev/null
+++ b/third_party/aom/test/dropframe_encode_test.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+// Params: test mode, threads.
+class DropFrameEncodeTestLarge
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode,
+                                                 unsigned int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  DropFrameEncodeTestLarge()
+      : EncoderTest(GET_PARAM(0)), frame_number_(0), threads_(GET_PARAM(2)) {}
+
+  void SetUp() override { InitializeConfig(GET_PARAM(1)); }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    frame_number_ = video->frame();
+    if (frame_number_ == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 1);
+    }
+  }
+
+  unsigned int frame_number_;
+  unsigned int threads_;
+};
+
+// Test to reproduce the assertion failure related to buf->display_idx in
+// init_gop_frames_for_tpl() and segmentation fault reported in aomedia:3372
+// while encoding with --drop-frame=1.
+TEST_P(DropFrameEncodeTestLarge, TestNoMisMatch) {
+  cfg_.rc_end_usage = AOM_CBR;
+  cfg_.rc_buf_sz = 1;
+  cfg_.g_pass = AOM_RC_ONE_PASS;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.g_threads = threads_;
+
+  ::libaom_test::I420VideoSource video("desktopqvga2.320_240.yuv", 320, 240, 30,
+                                       1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+AV1_INSTANTIATE_TEST_SUITE(DropFrameEncodeTestLarge,
+                           ::testing::Values(::libaom_test::kOnePassGood),
+                           ::testing::Values(1, 4));
+
+}  // namespace
diff --git a/third_party/aom/test/dump_obu.sh b/third_party/aom/test/dump_obu.sh
new file mode 100755
index 0000000000..933db64a6a
--- /dev/null
+++ b/third_party/aom/test/dump_obu.sh
@@ -0,0 +1,77 @@
+#!/bin/sh
+## Copyright (c) 2018, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom dump_obu tool. To add new tests to this
+## file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to dump_obu_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+readonly dump_obu_test_file="${AOM_TEST_OUTPUT_DIR}/av1_obu_test.ivf"
+
+dump_obu_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+  if [ "$(dump_obu_available)" = "yes" ]; then
+    if [ -z "$(aom_tool_path dump_obu)" ]; then
+      elog "dump_obu not found in LIBAOM_BIN_PATH, its parent, or child tools/."
+    fi
+  fi
+}
+
+dump_obu_available() {
+  if [ "$(av1_decode_available)" = "yes" ] && \
+     [ "$(av1_encode_available)" = "yes" ]; then
+    echo yes
+  fi
+}
+
+aomenc_available() {
+  if [ -x "$(aom_tool_path aomenc)" ]; then
+    echo yes
+  fi
+}
+
+encode_test_file() {
+  if [ "$(aomenc_available)" = "yes" ]; then
+    local encoder="$(aom_tool_path aomenc)"
+    if [ "$(realtime_only_build)" = "yes" ]; then
+      eval "${encoder}" \
+        $(aomenc_encode_test_rt_params) \
+        $(yuv_raw_input) \
+        --ivf \
+        --output=${dump_obu_test_file} \
+        ${devnull} || return 1
+    else
+      eval "${encoder}" \
+        $(aomenc_encode_test_fast_params) \
+        $(yuv_raw_input) \
+        --ivf \
+        --output=${dump_obu_test_file} \
+        ${devnull} || return 1
+    fi
+    if [ ! -e "${dump_obu_test_file}" ]; then
+      elog "dump_obu test input encode failed."
+      return 1
+    fi
+  fi
+}
+
+dump_obu() {
+  encode_test_file || return 1
+  eval $(aom_tool_path dump_obu) "${dump_obu_test_file}" ${devnull}
+}
+
+dump_obu_tests="dump_obu"
+
+run_tests dump_obu_verify_environment "${dump_obu_tests}"
diff --git a/third_party/aom/test/ec_test.cc b/third_party/aom/test/ec_test.cc
new file mode 100644
index 0000000000..a5284deac0
--- /dev/null
+++ b/third_party/aom/test/ec_test.cc
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include <cstdlib>
+#include <memory>
+#include <new>
+
+#include "aom_dsp/entenc.h"
+#include "aom_dsp/entdec.h"
+
+TEST(EC_TEST, random_ec_test) {
+  od_ec_enc enc;
+  od_ec_dec dec;
+  int sz;
+  int i;
+  int ret;
+  unsigned int seed;
+  unsigned char *ptr;
+  uint32_t ptr_sz;
+  char *seed_str;
+  ret = 0;
+  seed_str = getenv("EC_TEST_SEED");
+  if (seed_str) {
+    seed = atoi(seed_str);
+  } else {
+    seed = 0xdaa1a;
+  }
+  srand(seed);
+  od_ec_enc_init(&enc, 1);
+  /*Test compatibility between multiple different encode/decode routines.*/
+  for (i = 0; i < 409600; i++) {
+    int j;
+    sz = rand() / ((RAND_MAX >> (rand() % 9U)) + 1U);
+    std::unique_ptr<unsigned[]> fz(new (std::nothrow) unsigned[sz]);
+    ASSERT_NE(fz, nullptr);
+    std::unique_ptr<unsigned[]> fts(new (std::nothrow) unsigned[sz]);
+    ASSERT_NE(fts, nullptr);
+    std::unique_ptr<unsigned[]> data(new (std::nothrow) unsigned[sz]);
+    ASSERT_NE(data, nullptr);
+    std::unique_ptr<unsigned[]> tell(new (std::nothrow) unsigned[sz + 1]);
+    ASSERT_NE(tell, nullptr);
+    std::unique_ptr<unsigned[]> enc_method(new (std::nothrow) unsigned[sz]);
+    ASSERT_NE(enc_method, nullptr);
+    od_ec_enc_reset(&enc);
+    tell[0] = od_ec_enc_tell_frac(&enc);
+    for (j = 0; j < sz; j++) {
+      data[j] = rand() / ((RAND_MAX >> 1) + 1);
+
+      fts[j] = CDF_PROB_BITS;
+      fz[j] = (rand() % (CDF_PROB_TOP - 2)) >> (CDF_PROB_BITS - fts[j]);
+      fz[j] = OD_MAXI(fz[j], 1);
+      enc_method[j] = 3 + (rand() & 1);
+      switch (enc_method[j]) {
+        case 3: {
+          od_ec_encode_bool_q15(&enc, data[j],
+                                OD_ICDF(fz[j] << (CDF_PROB_BITS - fts[j])));
+          break;
+        }
+        case 4: {
+          uint16_t cdf[2];
+          cdf[0] = OD_ICDF(fz[j]);
+          cdf[1] = OD_ICDF(1U << fts[j]);
+          od_ec_encode_cdf_q15(&enc, data[j], cdf, 2);
+          break;
+        }
+      }
+
+      tell[j + 1] = od_ec_enc_tell_frac(&enc);
+    }
+    ptr = od_ec_enc_done(&enc, &ptr_sz);
+    ASSERT_NE(ptr, nullptr);
+    EXPECT_GE(((od_ec_enc_tell(&enc) + 7U) >> 3), ptr_sz)
+        << "od_ec_enc_tell() lied: "
+           "there's "
+        << ptr_sz << " bytes instead of " << ((od_ec_enc_tell(&enc) + 7) >> 3)
+        << " (Random seed: " << seed << ")\n";
+    od_ec_dec_init(&dec, ptr, ptr_sz);
+    EXPECT_EQ(od_ec_dec_tell_frac(&dec), tell[0])
+        << "od_ec_dec_tell() mismatch between encoder and decoder "
+           "at symbol 0: "
+        << (unsigned)od_ec_dec_tell_frac(&dec) << " instead of " << tell[0]
+        << " (Random seed: " << seed << ").\n";
+    for (j = 0; j < sz; j++) {
+      int dec_method;
+      unsigned int sym = data[j] + 1;  // Initialize sym to an invalid value.
+
+      dec_method = 3 + (rand() & 1);
+
+      switch (dec_method) {
+        case 3: {
+          sym = od_ec_decode_bool_q15(
+              &dec, OD_ICDF(fz[j] << (CDF_PROB_BITS - fts[j])));
+          break;
+        }
+        case 4: {
+          uint16_t cdf[2];
+          cdf[0] = OD_ICDF(fz[j]);
+          cdf[1] = OD_ICDF(1U << fts[j]);
+          sym = od_ec_decode_cdf_q15(&dec, cdf, 2);
+          break;
+        }
+      }
+
+      EXPECT_EQ(sym, data[j])
+          << "Decoded " << sym << " instead of " << data[j]
+          << " with fz=" << fz[j] << " and ftb=" << fts[j] << "at position "
+          << j << " of " << sz << " (Random seed: " << seed << ").\n"
+          << "Encoding method: " << enc_method[j]
+          << " decoding method: " << dec_method << "\n";
+      EXPECT_EQ(od_ec_dec_tell_frac(&dec), tell[j + 1])
+          << "od_ec_dec_tell() mismatch between encoder and "
+             "decoder at symbol "
+          << j + 1 << ": " << (unsigned)od_ec_dec_tell_frac(&dec)
+          << " instead of " << tell[j + 1] << " (Random seed: " << seed
+          << ").\n";
+    }
+  }
+  od_ec_enc_reset(&enc);
+  od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
+  od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
+  od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
+  od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
+  od_ec_encode_bool_q15(&enc, 0, OD_ICDF(24576));
+  od_ec_enc_patch_initial_bits(&enc, 3, 2);
+  EXPECT_FALSE(enc.error) << "od_ec_enc_patch_initial_bits() failed.\n";
+  od_ec_enc_patch_initial_bits(&enc, 0, 5);
+  EXPECT_TRUE(enc.error)
+      << "od_ec_enc_patch_initial_bits() didn't fail when it should have.\n";
+  od_ec_enc_reset(&enc);
+  od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
+  od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
+  od_ec_encode_bool_q15(&enc, 1, OD_ICDF(32256));
+  od_ec_encode_bool_q15(&enc, 0, OD_ICDF(24576));
+  od_ec_enc_patch_initial_bits(&enc, 0, 2);
+  EXPECT_FALSE(enc.error) << "od_ec_enc_patch_initial_bits() failed.\n";
+  ptr = od_ec_enc_done(&enc, &ptr_sz);
+  ASSERT_NE(ptr, nullptr);
+  EXPECT_EQ(ptr_sz, 2u);
+  EXPECT_EQ(ptr[0], 63)
+      << "Got " << ptr[0]
+      << " when expecting 63 for od_ec_enc_patch_initial_bits().\n";
+  od_ec_enc_clear(&enc);
+  EXPECT_EQ(ret, 0);
+}
diff --git a/third_party/aom/test/encode_api_test.cc b/third_party/aom/test/encode_api_test.cc
new file mode 100644
index 0000000000..aa4084f9e4
--- /dev/null
+++ b/third_party/aom/test/encode_api_test.cc
@@ -0,0 +1,659 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+
+#include "aom/aomcx.h"
+#include "aom/aom_encoder.h"
+#include "aom/aom_image.h"
+
+namespace {
+
+#if CONFIG_REALTIME_ONLY
+const unsigned int kUsage = AOM_USAGE_REALTIME;
+#else
+const unsigned int kUsage = AOM_USAGE_GOOD_QUALITY;
+#endif
+
+static void *Memset16(void *dest, int val, size_t length) {
+  uint16_t *dest16 = (uint16_t *)dest;
+  for (size_t i = 0; i < length; ++i) *dest16++ = val;
+  return dest;
+}
+
+TEST(EncodeAPI, InvalidParams) {
+  uint8_t buf[1] = { 0 };
+  aom_image_t img;
+  aom_codec_ctx_t enc;
+  aom_codec_enc_cfg_t cfg;
+
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, 1, 1, 1, buf));
+
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_enc_init(nullptr, nullptr, nullptr, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_enc_init(&enc, nullptr, nullptr, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_encode(nullptr, nullptr, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_encode(nullptr, &img, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_destroy(nullptr));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_enc_config_default(nullptr, nullptr, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_enc_config_default(nullptr, &cfg, 0));
+  EXPECT_NE(aom_codec_error(nullptr), nullptr);
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  SCOPED_TRACE(aom_codec_iface_name(iface));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_enc_init(nullptr, iface, nullptr, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_enc_init(&enc, iface, nullptr, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_enc_config_default(iface, &cfg, 3));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage));
+  cfg.g_w = 1 << 16;
+  cfg.g_h = (1 << 14) + 1;
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage));
+  cfg.g_w = (1 << 14) + 1;
+  cfg.g_h = 1 << 16;
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage));
+  cfg.g_forced_max_frame_width = 1 << 16;
+  cfg.g_forced_max_frame_height = (1 << 14) + 1;
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage));
+  cfg.g_forced_max_frame_width = (1 << 14) + 1;
+  cfg.g_forced_max_frame_height = 1 << 16;
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(nullptr, aom_codec_get_global_headers(nullptr));
+
+  aom_fixed_buf_t *glob_headers = aom_codec_get_global_headers(&enc);
+  EXPECT_NE(glob_headers->buf, nullptr);
+  if (glob_headers) {
+    free(glob_headers->buf);
+    free(glob_headers);
+  }
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+TEST(EncodeAPI, InvalidControlId) {
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_ctx_t enc;
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_ERROR, aom_codec_control(&enc, -1, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_control(&enc, 0, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+void EncodeSetSFrameOnFirstFrame(aom_img_fmt fmt, aom_codec_flags_t flag) {
+  constexpr int kWidth = 2;
+  constexpr int kHeight = 128;
+  unsigned char kBuffer[kWidth * kHeight * 3] = { 0 };
+  aom_image_t img;
+  ASSERT_EQ(aom_img_wrap(&img, fmt, kWidth, kHeight, 1, kBuffer), &img);
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, kUsage), AOM_CODEC_OK);
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+
+  aom_codec_ctx_t enc;
+  ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, flag), AOM_CODEC_OK);
+  // One of these aom_codec_encode() calls should fail.
+  if (aom_codec_encode(&enc, &img, 0, 1, AOM_EFLAG_SET_S_FRAME) ==
+      AOM_CODEC_OK) {
+    EXPECT_NE(aom_codec_encode(&enc, nullptr, 0, 0, 0), AOM_CODEC_OK);
+  }
+  EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+}
+
+TEST(EncodeAPI, SetSFrameOnFirstFrame) {
+  EncodeSetSFrameOnFirstFrame(AOM_IMG_FMT_I420, 0);
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST(EncodeAPI, SetSFrameOnFirstFrameHighbd) {
+  EncodeSetSFrameOnFirstFrame(AOM_IMG_FMT_I42016, AOM_CODEC_USE_HIGHBITDEPTH);
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+TEST(EncodeAPI, MonochromeInProfiles) {
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  ASSERT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage));
+  cfg.g_w = 128;
+  cfg.g_h = 128;
+  cfg.monochrome = 1;
+  aom_codec_ctx_t enc;
+
+  // Test Profile 0
+  cfg.g_profile = 0;
+  ASSERT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+
+  // Test Profile 1
+  cfg.g_profile = 1;
+  ASSERT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0));
+
+  // Test Profile 3
+  cfg.g_profile = 2;
+  ASSERT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+TEST(EncodeAPI, LowBDEncoderLowBDImage) {
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, kUsage), AOM_CODEC_OK);
+
+  aom_codec_ctx_t enc;
+  ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK);
+
+  aom_image_t *image =
+      aom_img_alloc(nullptr, AOM_IMG_FMT_I420, cfg.g_w, cfg.g_h, 0);
+  ASSERT_NE(image, nullptr);
+
+  // Set the image to two colors so that av1_set_screen_content_options() will
+  // call av1_get_perpixel_variance().
+  int luma_value = 0;
+  for (unsigned int i = 0; i < image->d_h; ++i) {
+    memset(image->planes[0] + i * image->stride[0], luma_value, image->d_w);
+    luma_value = 255 - luma_value;
+  }
+  unsigned int uv_h = (image->d_h + 1) / 2;
+  unsigned int uv_w = (image->d_w + 1) / 2;
+  for (unsigned int i = 0; i < uv_h; ++i) {
+    memset(image->planes[1] + i * image->stride[1], 128, uv_w);
+    memset(image->planes[2] + i * image->stride[2], 128, uv_w);
+  }
+
+  ASSERT_EQ(aom_codec_encode(&enc, image, 0, 1, 0), AOM_CODEC_OK);
+
+  aom_img_free(image);
+  ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+}
+
+TEST(EncodeAPI, HighBDEncoderHighBDImage) {
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, kUsage), AOM_CODEC_OK);
+
+  aom_codec_ctx_t enc;
+  aom_codec_err_t init_status =
+      aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH);
+#if !CONFIG_AV1_HIGHBITDEPTH
+  ASSERT_EQ(init_status, AOM_CODEC_INCAPABLE);
+#else
+  ASSERT_EQ(init_status, AOM_CODEC_OK);
+
+  aom_image_t *image =
+      aom_img_alloc(nullptr, AOM_IMG_FMT_I42016, cfg.g_w, cfg.g_h, 0);
+  ASSERT_NE(image, nullptr);
+
+  // Set the image to two colors so that av1_set_screen_content_options() will
+  // call av1_get_perpixel_variance().
+  int luma_value = 0;
+  for (unsigned int i = 0; i < image->d_h; ++i) {
+    Memset16(image->planes[0] + i * image->stride[0], luma_value, image->d_w);
+    luma_value = 255 - luma_value;
+  }
+  unsigned int uv_h = (image->d_h + 1) / 2;
+  unsigned int uv_w = (image->d_w + 1) / 2;
+  for (unsigned int i = 0; i < uv_h; ++i) {
+    Memset16(image->planes[1] + i * image->stride[1], 128, uv_w);
+    Memset16(image->planes[2] + i * image->stride[2], 128, uv_w);
+  }
+
+  ASSERT_EQ(aom_codec_encode(&enc, image, 0, 1, 0), AOM_CODEC_OK);
+
+  aom_img_free(image);
+  ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+#endif
+}
+
+TEST(EncodeAPI, HighBDEncoderLowBDImage) {
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, kUsage), AOM_CODEC_OK);
+
+  aom_codec_ctx_t enc;
+  aom_codec_err_t init_status =
+      aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH);
+#if !CONFIG_AV1_HIGHBITDEPTH
+  ASSERT_EQ(init_status, AOM_CODEC_INCAPABLE);
+#else
+  ASSERT_EQ(init_status, AOM_CODEC_OK);
+
+  aom_image_t *image =
+      aom_img_alloc(nullptr, AOM_IMG_FMT_I420, cfg.g_w, cfg.g_h, 0);
+  ASSERT_NE(image, nullptr);
+
+  // Set the image to two colors so that av1_set_screen_content_options() will
+  // call av1_get_perpixel_variance().
+  int luma_value = 0;
+  for (unsigned int i = 0; i < image->d_h; ++i) {
+    memset(image->planes[0] + i * image->stride[0], luma_value, image->d_w);
+    luma_value = 255 - luma_value;
+  }
+  unsigned int uv_h = (image->d_h + 1) / 2;
+  unsigned int uv_w = (image->d_w + 1) / 2;
+  for (unsigned int i = 0; i < uv_h; ++i) {
+    memset(image->planes[1] + i * image->stride[1], 128, uv_w);
+    memset(image->planes[2] + i * image->stride[2], 128, uv_w);
+  }
+
+  ASSERT_EQ(aom_codec_encode(&enc, image, 0, 1, 0), AOM_CODEC_INVALID_PARAM);
+
+  aom_img_free(image);
+  ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+#endif
+}
+
+TEST(EncodeAPI, LowBDEncoderHighBDImage) {
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, kUsage), AOM_CODEC_OK);
+
+  aom_codec_ctx_t enc;
+  ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK);
+
+  aom_image_t *image =
+      aom_img_alloc(nullptr, AOM_IMG_FMT_I42016, cfg.g_w, cfg.g_h, 0);
+  ASSERT_NE(image, nullptr);
+
+  // Set the image to two colors so that av1_set_screen_content_options() will
+  // call av1_get_perpixel_variance().
+  int luma_value = 0;
+  for (unsigned int i = 0; i < image->d_h; ++i) {
+    Memset16(image->planes[0] + i * image->stride[0], luma_value, image->d_w);
+    luma_value = 255 - luma_value;
+  }
+  unsigned int uv_h = (image->d_h + 1) / 2;
+  unsigned int uv_w = (image->d_w + 1) / 2;
+  for (unsigned int i = 0; i < uv_h; ++i) {
+    Memset16(image->planes[1] + i * image->stride[1], 128, uv_w);
+    Memset16(image->planes[2] + i * image->stride[2], 128, uv_w);
+  }
+
+  ASSERT_EQ(aom_codec_encode(&enc, image, 0, 1, 0), AOM_CODEC_INVALID_PARAM);
+
+  aom_img_free(image);
+  ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+}
+
+aom_image_t *CreateGrayImage(aom_img_fmt_t fmt, unsigned int w,
+                             unsigned int h) {
+  aom_image_t *const image = aom_img_alloc(nullptr, fmt, w, h, 1);
+  if (!image) return image;
+
+  for (unsigned int i = 0; i < image->d_h; ++i) {
+    memset(image->planes[0] + i * image->stride[0], 128, image->d_w);
+  }
+  const unsigned int uv_h = (image->d_h + 1) / 2;
+  const unsigned int uv_w = (image->d_w + 1) / 2;
+  for (unsigned int i = 0; i < uv_h; ++i) {
+    memset(image->planes[1] + i * image->stride[1], 128, uv_w);
+    memset(image->planes[2] + i * image->stride[2], 128, uv_w);
+  }
+  return image;
+}
+
+TEST(EncodeAPI, Buganizer310548198) {
+  aom_codec_iface_t *const iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  const unsigned int usage = AOM_USAGE_REALTIME;
+  ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage), AOM_CODEC_OK);
+  cfg.g_w = 1;
+  cfg.g_h = 444;
+  cfg.g_pass = AOM_RC_ONE_PASS;
+  cfg.g_lag_in_frames = 0;
+
+  aom_codec_ctx_t enc;
+  ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK);
+
+  const int speed = 6;
+  ASSERT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, speed), AOM_CODEC_OK);
+
+  const aom_enc_frame_flags_t flags = 0;
+  int frame_index = 0;
+
+  // Encode a frame.
+  aom_image_t *image = CreateGrayImage(AOM_IMG_FMT_I420, cfg.g_w, cfg.g_h);
+  ASSERT_NE(image, nullptr);
+  ASSERT_EQ(aom_codec_encode(&enc, image, frame_index, 1, flags), AOM_CODEC_OK);
+  frame_index++;
+  const aom_codec_cx_pkt_t *pkt;
+  aom_codec_iter_t iter = nullptr;
+  while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) {
+    ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  }
+  aom_img_free(image);
+
+  cfg.g_w = 1;
+  cfg.g_h = 254;
+  ASSERT_EQ(aom_codec_enc_config_set(&enc, &cfg), AOM_CODEC_OK)
+      << aom_codec_error_detail(&enc);
+
+  cfg.g_w = 1;
+  cfg.g_h = 154;
+  ASSERT_EQ(aom_codec_enc_config_set(&enc, &cfg), AOM_CODEC_OK)
+      << aom_codec_error_detail(&enc);
+
+  // Encode a frame.
+  image = CreateGrayImage(AOM_IMG_FMT_I420, cfg.g_w, cfg.g_h);
+  ASSERT_EQ(aom_codec_encode(&enc, image, frame_index, 1, flags), AOM_CODEC_OK);
+  frame_index++;
+  iter = nullptr;
+  while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) {
+    ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  }
+  aom_img_free(image);
+
+  // Flush the encoder.
+  bool got_data;
+  do {
+    ASSERT_EQ(aom_codec_encode(&enc, nullptr, 0, 0, 0), AOM_CODEC_OK);
+    got_data = false;
+    iter = nullptr;
+    while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) {
+      ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+      got_data = true;
+    }
+  } while (got_data);
+
+  ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+}
+
+// Emulates the WebCodecs VideoEncoder interface.
+class AV1Encoder {
+ public:
+  explicit AV1Encoder(int speed) : speed_(speed) {}
+  ~AV1Encoder();
+
+  void Configure(unsigned int threads, unsigned int width, unsigned int height,
+                 aom_rc_mode end_usage, unsigned int usage);
+  void Encode(bool key_frame);
+
+ private:
+  // Flushes the encoder. Should be called after all the Encode() calls.
+  void Flush();
+
+  const int speed_;
+  bool initialized_ = false;
+  aom_codec_enc_cfg_t cfg_;
+  aom_codec_ctx_t enc_;
+  int frame_index_ = 0;
+};
+
+AV1Encoder::~AV1Encoder() {
+  if (initialized_) {
+    Flush();
+    EXPECT_EQ(aom_codec_destroy(&enc_), AOM_CODEC_OK);
+  }
+}
+
+void AV1Encoder::Configure(unsigned int threads, unsigned int width,
+                           unsigned int height, aom_rc_mode end_usage,
+                           unsigned int usage) {
+  if (!initialized_) {
+    aom_codec_iface_t *const iface = aom_codec_av1_cx();
+    ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg_, usage), AOM_CODEC_OK);
+    cfg_.g_threads = threads;
+    cfg_.g_w = width;
+    cfg_.g_h = height;
+    cfg_.g_forced_max_frame_width = cfg_.g_w;
+    cfg_.g_forced_max_frame_height = cfg_.g_h;
+    cfg_.g_timebase.num = 1;
+    cfg_.g_timebase.den = 1000 * 1000;  // microseconds
+    cfg_.g_pass = AOM_RC_ONE_PASS;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.rc_end_usage = end_usage;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 58;
+    ASSERT_EQ(aom_codec_enc_init(&enc_, iface, &cfg_, 0), AOM_CODEC_OK);
+    ASSERT_EQ(aom_codec_control(&enc_, AOME_SET_CPUUSED, speed_), AOM_CODEC_OK);
+    initialized_ = true;
+    return;
+  }
+
+  ASSERT_EQ(usage, cfg_.g_usage);
+  cfg_.g_threads = threads;
+  cfg_.g_w = width;
+  cfg_.g_h = height;
+  cfg_.rc_end_usage = end_usage;
+  ASSERT_EQ(aom_codec_enc_config_set(&enc_, &cfg_), AOM_CODEC_OK)
+      << aom_codec_error_detail(&enc_);
+}
+
+void AV1Encoder::Encode(bool key_frame) {
+  assert(initialized_);
+  // TODO(wtc): Support high bit depths and other YUV formats.
+  aom_image_t *const image =
+      CreateGrayImage(AOM_IMG_FMT_I420, cfg_.g_w, cfg_.g_h);
+  ASSERT_NE(image, nullptr);
+  const aom_enc_frame_flags_t flags = key_frame ? AOM_EFLAG_FORCE_KF : 0;
+  ASSERT_EQ(aom_codec_encode(&enc_, image, frame_index_, 1, flags),
+            AOM_CODEC_OK);
+  frame_index_++;
+  const aom_codec_cx_pkt_t *pkt;
+  aom_codec_iter_t iter = nullptr;
+  while ((pkt = aom_codec_get_cx_data(&enc_, &iter)) != nullptr) {
+    ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+    if (key_frame) {
+      ASSERT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+    }
+  }
+  aom_img_free(image);
+}
+
+void AV1Encoder::Flush() {
+  bool got_data;
+  do {
+    ASSERT_EQ(aom_codec_encode(&enc_, nullptr, 0, 0, 0), AOM_CODEC_OK);
+    got_data = false;
+    const aom_codec_cx_pkt_t *pkt;
+    aom_codec_iter_t iter = nullptr;
+    while ((pkt = aom_codec_get_cx_data(&enc_, &iter)) != nullptr) {
+      ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+      got_data = true;
+    }
+  } while (got_data);
+}
+
+TEST(EncodeAPI, Buganizer314858909) {
+  AV1Encoder encoder(7);
+
+  encoder.Configure(6, 1582, 750, AOM_CBR, AOM_USAGE_REALTIME);
+
+  // Encode a frame.
+  encoder.Encode(false);
+
+  encoder.Configure(0, 1582, 23, AOM_CBR, AOM_USAGE_REALTIME);
+
+  // Encode a frame..
+  encoder.Encode(false);
+
+  encoder.Configure(16, 1542, 363, AOM_CBR, AOM_USAGE_REALTIME);
+
+  // Encode a frame..
+  encoder.Encode(false);
+}
+
+// Run this test to reproduce the bug in fuzz test: ASSERT: cpi->rec_sse !=
+// UINT64_MAX in av1_rc_bits_per_mb.
+TEST(EncodeAPI, Buganizer310766628) {
+  AV1Encoder encoder(7);
+
+  encoder.Configure(16, 759, 383, AOM_CBR, AOM_USAGE_REALTIME);
+
+  // Encode a frame.
+  encoder.Encode(false);
+
+  encoder.Configure(2, 759, 383, AOM_VBR, AOM_USAGE_REALTIME);
+
+  // Encode a frame. This will trigger the assertion failure.
+  encoder.Encode(false);
+}
+
+// This test covers a possible use case where the change of frame sizes and
+// thread numbers happens before and after the first frame coding.
+TEST(EncodeAPI, Buganizer310455204) {
+  AV1Encoder encoder(7);
+
+  encoder.Configure(0, 1915, 503, AOM_VBR, AOM_USAGE_REALTIME);
+
+  encoder.Configure(4, 1, 1, AOM_VBR, AOM_USAGE_REALTIME);
+
+  encoder.Configure(6, 559, 503, AOM_CBR, AOM_USAGE_REALTIME);
+
+  // Encode a frame.
+  encoder.Encode(false);
+
+  // Increase the number of threads.
+  encoder.Configure(16, 1915, 503, AOM_CBR, AOM_USAGE_REALTIME);
+
+  // Encode a frame.
+  encoder.Encode(false);
+}
+
+// Run this test to reproduce the bug in fuzz test: Float-cast-overflow in
+// av1_rc_bits_per_mb.
+TEST(EncodeAPI, Buganizer310457427) {
+  AV1Encoder encoder(7);
+
+  encoder.Configure(12, 896, 1076, AOM_CBR, AOM_USAGE_REALTIME);
+
+  encoder.Configure(6, 609, 1076, AOM_VBR, AOM_USAGE_REALTIME);
+
+  // Encode a frame.
+  encoder.Encode(false);
+
+  // Encode a frame. This will trigger the float-cast-overflow bug which was
+  // caused by division by zero.
+  encoder.Encode(false);
+}
+
+class EncodeAPIParameterized
+    : public testing::TestWithParam<std::tuple<
+          /*usage=*/unsigned int, /*speed=*/int, /*aq_mode=*/unsigned int>> {};
+
+// Encodes two frames at a given usage, speed, and aq_mode setting.
+// Reproduces b/303023614
+TEST_P(EncodeAPIParameterized, HighBDEncoderHighBDFrames) {
+  const unsigned int usage = std::get<0>(GetParam());
+  int speed = std::get<1>(GetParam());
+
+  if (speed == 10 && usage != AOM_USAGE_REALTIME) {
+    speed = 9;  // 10 is only allowed in AOM_USAGE_REALTIME
+  }
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage), AOM_CODEC_OK);
+  cfg.g_w = 500;
+  cfg.g_h = 400;
+
+  aom_codec_ctx_t enc;
+  aom_codec_err_t init_status =
+      aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH);
+#if !CONFIG_AV1_HIGHBITDEPTH
+  ASSERT_EQ(init_status, AOM_CODEC_INCAPABLE);
+#else
+  ASSERT_EQ(init_status, AOM_CODEC_OK);
+
+  const unsigned int aq_mode = std::get<2>(GetParam());
+
+  ASSERT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, speed), AOM_CODEC_OK);
+  ASSERT_EQ(aom_codec_control(&enc, AV1E_SET_AQ_MODE, aq_mode), AOM_CODEC_OK);
+
+  aom_image_t *image =
+      aom_img_alloc(nullptr, AOM_IMG_FMT_I42016, cfg.g_w, cfg.g_h, 0);
+  ASSERT_NE(image, nullptr);
+
+  for (unsigned int i = 0; i < image->d_h; ++i) {
+    Memset16(image->planes[0] + i * image->stride[0], 128, image->d_w);
+  }
+  unsigned int uv_h = (image->d_h + 1) / 2;
+  unsigned int uv_w = (image->d_w + 1) / 2;
+  for (unsigned int i = 0; i < uv_h; ++i) {
+    Memset16(image->planes[1] + i * image->stride[1], 128, uv_w);
+    Memset16(image->planes[2] + i * image->stride[2], 128, uv_w);
+  }
+
+  // Encode two frames.
+  ASSERT_EQ(
+      aom_codec_encode(&enc, image, /*pts=*/0, /*duration=*/1, /*flags=*/0),
+      AOM_CODEC_OK);
+  ASSERT_EQ(
+      aom_codec_encode(&enc, image, /*pts=*/1, /*duration=*/1, /*flags=*/0),
+      AOM_CODEC_OK);
+
+  aom_img_free(image);
+  ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+#endif
+}
+
+const unsigned int kUsages[] = {
+  AOM_USAGE_REALTIME,
+#if !CONFIG_REALTIME_ONLY
+  AOM_USAGE_GOOD_QUALITY,
+  AOM_USAGE_ALL_INTRA,
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(All, EncodeAPIParameterized,
+                         testing::Combine(
+                             /*usage=*/testing::ValuesIn(kUsages),
+                             /*speed=*/testing::Values(6, 7, 10),
+                             /*aq_mode=*/testing::Values(0, 1, 2, 3)));
+
+#if !CONFIG_REALTIME_ONLY
+TEST(EncodeAPI, AllIntraMode) {
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_ctx_t enc;
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+
+  // Set g_lag_in_frames to a nonzero value. This should cause
+  // aom_codec_enc_init() to fail.
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA));
+  cfg.g_lag_in_frames = 1;
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0));
+
+  // Set kf_max_dist to a nonzero value. This should cause aom_codec_enc_init()
+  // to fail.
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA));
+  cfg.kf_max_dist = 1;
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0));
+}
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/encode_perf_test.cc b/third_party/aom/test/encode_perf_test.cc
new file mode 100644
index 0000000000..b52cf3392c
--- /dev/null
+++ b/third_party/aom/test/encode_perf_test.cc
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <string>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_version.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "aom_ports/aom_timer.h"
+
+namespace {
+
+const int kMaxPsnr = 100;
+const double kUsecsInSec = 1000000.0;
+
+struct EncodePerfTestVideo {
+  EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_,
+                      uint32_t bitrate_, int frames_)
+      : name(name_), width(width_), height(height_), bitrate(bitrate_),
+        frames(frames_) {}
+  const char *name;
+  uint32_t width;
+  uint32_t height;
+  uint32_t bitrate;
+  int frames;
+};
+
+const EncodePerfTestVideo kAV1EncodePerfTestVectors[] = {
+  EncodePerfTestVideo("desktop_640_360_30.yuv", 640, 360, 200, 2484),
+  EncodePerfTestVideo("kirland_640_480_30.yuv", 640, 480, 200, 300),
+  EncodePerfTestVideo("macmarcomoving_640_480_30.yuv", 640, 480, 200, 987),
+  EncodePerfTestVideo("macmarcostationary_640_480_30.yuv", 640, 480, 200, 718),
+  EncodePerfTestVideo("niklas_640_480_30.yuv", 640, 480, 200, 471),
+  EncodePerfTestVideo("tacomanarrows_640_480_30.yuv", 640, 480, 200, 300),
+  EncodePerfTestVideo("tacomasmallcameramovement_640_480_30.yuv", 640, 480, 200,
+                      300),
+  EncodePerfTestVideo("thaloundeskmtg_640_480_30.yuv", 640, 480, 200, 300),
+  EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
+};
+
+const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 };
+const int kEncodePerfTestThreads[] = { 1, 2, 4 };
+
+class AV1EncodePerfTest
+    : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  AV1EncodePerfTest()
+      : EncoderTest(GET_PARAM(0)), min_psnr_(kMaxPsnr), nframes_(0),
+        encoding_mode_(GET_PARAM(1)), speed_(0), threads_(1) {}
+
+  ~AV1EncodePerfTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+
+    cfg_.g_lag_in_frames = 0;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_undershoot_pct = 50;
+    cfg_.rc_overshoot_pct = 50;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 600;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_error_resilient = 1;
+    cfg_.g_threads = threads_;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      const int log2_tile_columns = 3;
+      encoder->Control(AOME_SET_CPUUSED, speed_);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, log2_tile_columns);
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
+    }
+  }
+
+  void BeginPassHook(unsigned int /*pass*/) override {
+    min_psnr_ = kMaxPsnr;
+    nframes_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    if (pkt->data.psnr.psnr[0] < min_psnr_) {
+      min_psnr_ = pkt->data.psnr.psnr[0];
+    }
+  }
+
+  // for performance reasons don't decode
+  bool DoDecode() const override { return false; }
+
+  double min_psnr() const { return min_psnr_; }
+
+  void set_speed(unsigned int speed) { speed_ = speed; }
+
+  void set_threads(unsigned int threads) { threads_ = threads; }
+
+ private:
+  double min_psnr_;
+  unsigned int nframes_;
+  libaom_test::TestMode encoding_mode_;
+  unsigned speed_;
+  unsigned int threads_;
+};
+
+TEST_P(AV1EncodePerfTest, PerfTest) {
+  for (const EncodePerfTestVideo &test_video : kAV1EncodePerfTestVectors) {
+    for (int speed : kEncodePerfTestSpeeds) {
+      for (int threads : kEncodePerfTestThreads) {
+        if (test_video.width < 512 && threads > 1)
+          continue;
+        else if (test_video.width < 1024 && threads > 2)
+          continue;
+
+        set_threads(threads);
+        SetUp();
+
+        const aom_rational timebase = { 33333333, 1000000000 };
+        cfg_.g_timebase = timebase;
+        cfg_.rc_target_bitrate = test_video.bitrate;
+
+        init_flags_ = AOM_CODEC_USE_PSNR;
+
+        const unsigned frames = test_video.frames;
+        const char *video_name = test_video.name;
+        libaom_test::I420VideoSource video(video_name, test_video.width,
+                                           test_video.height, timebase.den,
+                                           timebase.num, 0, test_video.frames);
+        set_speed(speed);
+
+        aom_usec_timer t;
+        aom_usec_timer_start(&t);
+
+        ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+        aom_usec_timer_mark(&t);
+        const double elapsed_secs = aom_usec_timer_elapsed(&t) / kUsecsInSec;
+        const double fps = frames / elapsed_secs;
+        const double minimum_psnr = min_psnr();
+        std::string display_name(video_name);
+        if (threads > 1) {
+          char thread_count[32];
+          snprintf(thread_count, sizeof(thread_count), "_t-%d", threads);
+          display_name += thread_count;
+        }
+
+        printf("{\n");
+        printf("\t\"type\" : \"encode_perf_test\",\n");
+        printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
+        printf("\t\"videoName\" : \"%s\",\n", display_name.c_str());
+        printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs);
+        printf("\t\"totalFrames\" : %u,\n", frames);
+        printf("\t\"framesPerSecond\" : %f,\n", fps);
+        printf("\t\"minPsnr\" : %f,\n", minimum_psnr);
+        printf("\t\"speed\" : %d,\n", speed);
+        printf("\t\"threads\" : %d\n", threads);
+        printf("}\n");
+      }
+    }
+  }
+}
+
+AV1_INSTANTIATE_TEST_SUITE(AV1EncodePerfTest,
+                           ::testing::Values(::libaom_test::kRealTime));
+}  // namespace
diff --git a/third_party/aom/test/encode_small_width_height_test.cc b/third_party/aom/test/encode_small_width_height_test.cc
new file mode 100644
index 0000000000..22f69396d9
--- /dev/null
+++ b/third_party/aom/test/encode_small_width_height_test.cc
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2020, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+// Tests for https://crbug.com/aomedia/2777.
+//
+// Encode images with a small width (<= two AV1 superblocks) or a small height
+// (<= one AV1 superblock) with multiple threads. aom_codec_encode() should
+// not crash.
+
+#include <memory>
+
+#include "aom/aomcx.h"
+#include "aom/aom_encoder.h"
+#include "config/aom_config.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+// Dummy buffer of zero samples.
+constexpr unsigned char kBuffer[2 * (256 * 512 + 2 * 128 * 256)] = { 0 };
+#if CONFIG_REALTIME_ONLY
+const int kUsage = 1;
+#else
+const int kUsage = 0;
+#endif
+
+void EncodeSmallWidthMultiThreaded(aom_img_fmt fmt, aom_codec_flags_t flag) {
+  // The image has only one tile and the tile is two AV1 superblocks wide.
+  // For speed >= 1, superblock size is 64x64 (see av1_select_sb_size()).
+  constexpr int kWidth = 128;
+  constexpr int kHeight = 512;
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, fmt, kWidth, kHeight, 1,
+                               const_cast<unsigned char *>(kBuffer)));
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage));
+  cfg.g_threads = 2;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, flag));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 5));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+TEST(EncodeSmallWidthHeight, SmallWidthMultiThreaded) {
+  EncodeSmallWidthMultiThreaded(AOM_IMG_FMT_I420, 0);
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST(HighbdEncodeSmallWidthHeight, SmallWidthMultiThreaded) {
+  EncodeSmallWidthMultiThreaded(AOM_IMG_FMT_I42016, AOM_CODEC_USE_HIGHBITDEPTH);
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+#if !CONFIG_REALTIME_ONLY
+void EncodeSmallWidthMultiThreadedSpeed0(aom_img_fmt fmt,
+                                         aom_codec_flags_t flag) {
+  // The image has only one tile and the tile is two AV1 superblocks wide.
+  // For speed 0, superblock size is 128x128 (see av1_select_sb_size()).
+  constexpr int kWidth = 256;
+  constexpr int kHeight = 512;
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, fmt, kWidth, kHeight, 1,
+                               const_cast<unsigned char *>(kBuffer)));
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, 0));
+  cfg.g_threads = 2;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, flag));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+TEST(EncodeSmallWidthHeight, SmallWidthMultiThreadedSpeed0) {
+  EncodeSmallWidthMultiThreadedSpeed0(AOM_IMG_FMT_I420, 0);
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST(HighbdEncodeSmallWidthHeight, SmallWidthMultiThreadedSpeed0) {
+  EncodeSmallWidthMultiThreadedSpeed0(AOM_IMG_FMT_I42016,
+                                      AOM_CODEC_USE_HIGHBITDEPTH);
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+#endif
+
+void EncodeSmallHeightMultiThreaded(aom_img_fmt fmt, aom_codec_flags_t flag) {
+  // The image has only one tile and the tile is one AV1 superblock tall.
+  // For speed >= 1, superblock size is 64x64 (see av1_select_sb_size()).
+  constexpr int kWidth = 512;
+  constexpr int kHeight = 64;
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, fmt, kWidth, kHeight, 1,
+                               const_cast<unsigned char *>(kBuffer)));
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage));
+  cfg.g_threads = 2;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, flag));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 5));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+TEST(EncodeSmallWidthHeight, SmallHeightMultiThreaded) {
+  EncodeSmallHeightMultiThreaded(AOM_IMG_FMT_I420, 0);
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST(HighbdEncodeSmallWidthHeight, SmallHeightMultiThreaded) {
+  EncodeSmallHeightMultiThreaded(AOM_IMG_FMT_I42016,
+                                 AOM_CODEC_USE_HIGHBITDEPTH);
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+#if !CONFIG_REALTIME_ONLY
+void EncodeSmallHeightMultiThreadedSpeed0(aom_img_fmt fmt,
+                                          aom_codec_flags_t flag) {
+  // The image has only one tile and the tile is one AV1 superblock tall.
+  // For speed 0, superblock size is 128x128 (see av1_select_sb_size()).
+  constexpr int kWidth = 512;
+  constexpr int kHeight = 128;
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, fmt, kWidth, kHeight, 1,
+                               const_cast<unsigned char *>(kBuffer)));
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, 0));
+  cfg.g_threads = 2;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, flag));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+TEST(EncodeSmallWidthHeight, SmallHeightMultiThreadedSpeed0) {
+  EncodeSmallHeightMultiThreadedSpeed0(AOM_IMG_FMT_I420, 0);
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST(HighbdEncodeSmallWidthHeight, SmallHeightMultiThreadedSpeed0) {
+  EncodeSmallHeightMultiThreadedSpeed0(AOM_IMG_FMT_I42016,
+                                       AOM_CODEC_USE_HIGHBITDEPTH);
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif
+
+// A reproducer test for aomedia:3113. The test should complete without any
+// memory errors.
+void Encode1x1(aom_img_fmt fmt, int bitdepth, aom_codec_flags_t flags) {
+  constexpr int kWidth = 1;
+  constexpr int kHeight = 1;
+
+  // This test cannot use aom_img_alloc() or aom_img_wrap() because they call
+  // align_image_dimension() to align img.w and img.h to the next even number
+  // (2). In this test it is important to set img.w and img.h to 1. Therefore we
+  // set up img manually.
+  aom_image_t img;
+  memset(&img, 0, sizeof(img));
+  img.fmt = fmt;
+  img.bit_depth = bitdepth;
+  img.w = kWidth;
+  img.h = kHeight;
+  img.d_w = kWidth;
+  img.d_h = kHeight;
+  img.x_chroma_shift = 1;
+  img.y_chroma_shift = 1;
+  img.bps = 12;
+  const int y_stride = kWidth;
+  const int uv_stride = (kWidth + 1) >> 1;
+  int y_height = kHeight;
+  int uv_height = (kHeight + 1) >> 1;
+  if (bitdepth > 8) {
+    y_height <<= 1;
+    uv_height <<= 1;
+  }
+  img.stride[AOM_PLANE_Y] = y_stride;
+  img.stride[AOM_PLANE_U] = img.stride[AOM_PLANE_V] = uv_stride;
+  std::unique_ptr<unsigned char[]> y_plane(
+      new unsigned char[y_height * y_stride]());
+  ASSERT_NE(y_plane, nullptr);
+  std::unique_ptr<unsigned char[]> u_plane(
+      new unsigned char[uv_height * uv_stride]());
+  ASSERT_NE(u_plane, nullptr);
+  std::unique_ptr<unsigned char[]> v_plane(
+      new unsigned char[uv_height * uv_stride]());
+  ASSERT_NE(v_plane, nullptr);
+  img.planes[AOM_PLANE_Y] = y_plane.get();
+  img.planes[AOM_PLANE_U] = u_plane.get();
+  img.planes[AOM_PLANE_V] = v_plane.get();
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage));
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, flags));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 5));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+TEST(EncodeSmallWidthHeight, 1x1) { Encode1x1(AOM_IMG_FMT_I420, 8, 0); }
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST(HighbdEncodeSmallWidthHeight, 1x1) {
+  Encode1x1(AOM_IMG_FMT_I42016, 12, AOM_CODEC_USE_HIGHBITDEPTH);
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+}  // namespace
diff --git a/third_party/aom/test/encode_test_driver.cc b/third_party/aom/test/encode_test_driver.cc
new file mode 100644
index 0000000000..b5c506c6d3
--- /dev/null
+++ b/third_party/aom/test/encode_test_driver.cc
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+
+#include "aom_ports/mem.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/encode_test_driver.h"
+#include "test/register_state_check.h"
+#include "test/video_source.h"
+
+namespace libaom_test {
+void Encoder::InitEncoder(VideoSource *video) {
+  aom_codec_err_t res;
+  const aom_image_t *img = video->img();
+
+  if (video->img() && !encoder_.priv) {
+    cfg_.g_w = img->d_w;
+    cfg_.g_h = img->d_h;
+    cfg_.g_timebase = video->timebase();
+    cfg_.rc_twopass_stats_in = stats_->buf();
+
+    res = aom_codec_enc_init(&encoder_, CodecInterface(), &cfg_, init_flags_);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+}
+
+void Encoder::EncodeFrame(VideoSource *video,
+                          const aom_enc_frame_flags_t frame_flags) {
+  if (video->img())
+    EncodeFrameInternal(*video, frame_flags);
+  else
+    Flush();
+
+  // Handle twopass stats
+  CxDataIterator iter = GetCxData();
+
+  while (const aom_codec_cx_pkt_t *pkt = iter.Next()) {
+    if (pkt->kind != AOM_CODEC_STATS_PKT) continue;
+
+    stats_->Append(*pkt);
+  }
+}
+
+void Encoder::EncodeFrameInternal(const VideoSource &video,
+                                  const aom_enc_frame_flags_t frame_flags) {
+  aom_codec_err_t res;
+  const aom_image_t *img = video.img();
+
+  // Handle frame resizing
+  if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) {
+    cfg_.g_w = img->d_w;
+    cfg_.g_h = img->d_h;
+    res = aom_codec_enc_config_set(&encoder_, &cfg_);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+  // Encode the frame
+  API_REGISTER_STATE_CHECK(res =
+                               aom_codec_encode(&encoder_, img, video.pts(),
+                                                video.duration(), frame_flags));
+  ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+}
+
+void Encoder::Flush() {
+  const aom_codec_err_t res = aom_codec_encode(&encoder_, nullptr, 0, 0, 0);
+  if (!encoder_.priv)
+    ASSERT_EQ(AOM_CODEC_ERROR, res) << EncoderError();
+  else
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+}
+
+void EncoderTest::InitializeConfig(TestMode mode) {
+  int usage = AOM_USAGE_GOOD_QUALITY;
+  switch (mode) {
+    case kOnePassGood:
+    case kTwoPassGood: break;
+    case kRealTime: usage = AOM_USAGE_REALTIME; break;
+    case kAllIntra: usage = AOM_USAGE_ALL_INTRA; break;
+    default: ASSERT_TRUE(false) << "Unexpected mode " << mode;
+  }
+  mode_ = mode;
+  passes_ = (mode == kTwoPassGood) ? 2 : 1;
+
+  const aom_codec_err_t res = codec_->DefaultEncoderConfig(&cfg_, usage);
+  ASSERT_EQ(AOM_CODEC_OK, res);
+}
+
+static bool compare_plane(const uint8_t *const buf1, int stride1,
+                          const uint8_t *const buf2, int stride2, int w, int h,
+                          int *const mismatch_row, int *const mismatch_col,
+                          int *const mismatch_pix1, int *const mismatch_pix2) {
+  int r, c;
+
+  for (r = 0; r < h; ++r) {
+    for (c = 0; c < w; ++c) {
+      const int pix1 = buf1[r * stride1 + c];
+      const int pix2 = buf2[r * stride2 + c];
+
+      if (pix1 != pix2) {
+        if (mismatch_row != nullptr) *mismatch_row = r;
+        if (mismatch_col != nullptr) *mismatch_col = c;
+        if (mismatch_pix1 != nullptr) *mismatch_pix1 = pix1;
+        if (mismatch_pix2 != nullptr) *mismatch_pix2 = pix2;
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+// The function should return "true" most of the time, therefore no early
+// break-out is implemented within the match checking process.
+static bool compare_img(const aom_image_t *img1, const aom_image_t *img2,
+                        int *const mismatch_row, int *const mismatch_col,
+                        int *const mismatch_plane, int *const mismatch_pix1,
+                        int *const mismatch_pix2) {
+  if (img1->fmt != img2->fmt || img1->cp != img2->cp || img1->tc != img2->tc ||
+      img1->mc != img2->mc || img1->d_w != img2->d_w ||
+      img1->d_h != img2->d_h || img1->monochrome != img2->monochrome) {
+    if (mismatch_row != nullptr) *mismatch_row = -1;
+    if (mismatch_col != nullptr) *mismatch_col = -1;
+    return false;
+  }
+
+  const int num_planes = img1->monochrome ? 1 : 3;
+  for (int plane = 0; plane < num_planes; plane++) {
+    if (!compare_plane(img1->planes[plane], img1->stride[plane],
+                       img2->planes[plane], img2->stride[plane],
+                       aom_img_plane_width(img1, plane),
+                       aom_img_plane_height(img1, plane), mismatch_row,
+                       mismatch_col, mismatch_pix1, mismatch_pix2)) {
+      if (mismatch_plane != nullptr) *mismatch_plane = plane;
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void EncoderTest::MismatchHook(const aom_image_t *img_enc,
+                               const aom_image_t *img_dec) {
+  int mismatch_row = 0;
+  int mismatch_col = 0;
+  int mismatch_plane = 0;
+  int mismatch_pix_enc = 0;
+  int mismatch_pix_dec = 0;
+
+  ASSERT_FALSE(compare_img(img_enc, img_dec, &mismatch_row, &mismatch_col,
+                           &mismatch_plane, &mismatch_pix_enc,
+                           &mismatch_pix_dec));
+
+  GTEST_FAIL() << "Encode/Decode mismatch found:" << std::endl
+               << "  pixel value enc/dec: " << mismatch_pix_enc << "/"
+               << mismatch_pix_dec << std::endl
+               << "                plane: " << mismatch_plane << std::endl
+               << "              row/col: " << mismatch_row << "/"
+               << mismatch_col << std::endl;
+}
+
+void EncoderTest::RunLoop(VideoSource *video) {
+  stats_.Reset();
+
+  ASSERT_TRUE(passes_ == 1 || passes_ == 2);
+  for (unsigned int pass = 0; pass < passes_; pass++) {
+    aom_codec_pts_t last_pts = 0;
+
+    if (passes_ == 1)
+      cfg_.g_pass = AOM_RC_ONE_PASS;
+    else if (pass == 0)
+      cfg_.g_pass = AOM_RC_FIRST_PASS;
+    else
+      cfg_.g_pass = AOM_RC_LAST_PASS;
+
+    BeginPassHook(pass);
+    std::unique_ptr<Encoder> encoder(
+        codec_->CreateEncoder(cfg_, init_flags_, &stats_));
+    ASSERT_NE(encoder, nullptr);
+
+    ASSERT_NO_FATAL_FAILURE(video->Begin());
+    encoder->InitEncoder(video);
+
+    if (mode_ == kRealTime) {
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
+    }
+
+    ASSERT_FALSE(::testing::Test::HasFatalFailure());
+#if CONFIG_AV1_DECODER
+    aom_codec_dec_cfg_t dec_cfg = aom_codec_dec_cfg_t();
+    dec_cfg.allow_lowbitdepth = 1;
+    std::unique_ptr<Decoder> decoder(
+        codec_->CreateDecoder(dec_cfg, 0 /* flags */));
+    if (decoder->IsAV1()) {
+      // Set dec_cfg.tile_row = -1 and dec_cfg.tile_col = -1 so that the whole
+      // frame is decoded.
+      decoder->Control(AV1_SET_TILE_MODE, cfg_.large_scale_tile);
+      decoder->Control(AV1D_EXT_TILE_DEBUG, 1);
+      decoder->Control(AV1_SET_DECODE_TILE_ROW, -1);
+      decoder->Control(AV1_SET_DECODE_TILE_COL, -1);
+    }
+#endif
+
+    int number_spatial_layers = GetNumSpatialLayers();
+
+    bool again;
+    for (again = true; again; video->Next()) {
+      again = (video->img() != nullptr);
+
+      for (int sl = 0; sl < number_spatial_layers; sl++) {
+        PreEncodeFrameHook(video, encoder.get());
+        encoder->EncodeFrame(video, frame_flags_);
+        PostEncodeFrameHook(encoder.get());
+        CxDataIterator iter = encoder->GetCxData();
+        bool has_cxdata = false;
+
+#if CONFIG_AV1_DECODER
+        bool has_dxdata = false;
+#endif
+        while (const aom_codec_cx_pkt_t *pkt = iter.Next()) {
+          pkt = MutateEncoderOutputHook(pkt);
+          again = true;
+          switch (pkt->kind) {
+            case AOM_CODEC_CX_FRAME_PKT:  //
+              has_cxdata = true;
+#if CONFIG_AV1_DECODER
+              if (decoder.get() != nullptr && DoDecode()) {
+                aom_codec_err_t res_dec;
+                if (DoDecodeInvisible()) {
+                  res_dec = decoder->DecodeFrame(
+                      (const uint8_t *)pkt->data.frame.buf, pkt->data.frame.sz);
+                } else {
+                  res_dec = decoder->DecodeFrame(
+                      (const uint8_t *)pkt->data.frame.buf +
+                          (pkt->data.frame.sz - pkt->data.frame.vis_frame_size),
+                      pkt->data.frame.vis_frame_size);
+                }
+
+                if (!HandleDecodeResult(res_dec, decoder.get())) break;
+
+                has_dxdata = true;
+              }
+#endif
+              ASSERT_GE(pkt->data.frame.pts, last_pts);
+              if (sl == number_spatial_layers - 1)
+                last_pts = pkt->data.frame.pts;
+              FramePktHook(pkt);
+              break;
+
+            case AOM_CODEC_PSNR_PKT: PSNRPktHook(pkt); break;
+
+            case AOM_CODEC_STATS_PKT: StatsPktHook(pkt); break;
+
+            default: break;
+          }
+        }
+        if (has_cxdata) {
+          const aom_image_t *img_enc = encoder->GetPreviewFrame();
+          if (img_enc) {
+            CalculateFrameLevelSSIM(video->img(), img_enc, cfg_.g_bit_depth,
+                                    cfg_.g_input_bit_depth);
+          }
+#if CONFIG_AV1_DECODER
+          if (has_dxdata) {
+            DxDataIterator dec_iter = decoder->GetDxData();
+            const aom_image_t *img_dec = dec_iter.Next();
+            if (img_enc && img_dec) {
+              const bool res = compare_img(img_enc, img_dec, nullptr, nullptr,
+                                           nullptr, nullptr, nullptr);
+              if (!res) {  // Mismatch
+                MismatchHook(img_enc, img_dec);
+              }
+            }
+            if (img_dec) DecompressedFrameHook(*img_dec, video->pts());
+          }
+#endif
+        }
+        if (!Continue()) break;
+      }  // Loop over spatial layers
+    }
+
+    EndPassHook();
+
+    if (!Continue()) break;
+  }
+}
+
+}  // namespace libaom_test
diff --git a/third_party/aom/test/encode_test_driver.h b/third_party/aom/test/encode_test_driver.h
new file mode 100644
index 0000000000..d1e6615cd7
--- /dev/null
+++ b/third_party/aom/test/encode_test_driver.h
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef AOM_TEST_ENCODE_TEST_DRIVER_H_
+#define AOM_TEST_ENCODE_TEST_DRIVER_H_
+
+#include <string>
+#include <vector>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+
+#if CONFIG_AV1_ENCODER
+#include "aom/aomcx.h"
+#endif
+#include "aom/aom_encoder.h"
+
+namespace libaom_test {
+
+class CodecFactory;
+class VideoSource;
+
+enum TestMode { kRealTime, kOnePassGood, kTwoPassGood, kAllIntra };
+#define ALL_TEST_MODES                                                     \
+  ::testing::Values(::libaom_test::kRealTime, ::libaom_test::kOnePassGood, \
+                    ::libaom_test::kTwoPassGood)
+
+#define ONE_PASS_TEST_MODES \
+  ::testing::Values(::libaom_test::kRealTime, ::libaom_test::kOnePassGood)
+
+#define TWO_PASS_TEST_MODES ::testing::Values(::libaom_test::kTwoPassGood)
+
+#define NONREALTIME_TEST_MODES \
+  ::testing::Values(::libaom_test::kOnePassGood, ::libaom_test::kTwoPassGood)
+
+// Provides an object to handle the libaom get_cx_data() iteration pattern
+class CxDataIterator {
+ public:
+  explicit CxDataIterator(aom_codec_ctx_t *encoder)
+      : encoder_(encoder), iter_(nullptr) {}
+
+  const aom_codec_cx_pkt_t *Next() {
+    return aom_codec_get_cx_data(encoder_, &iter_);
+  }
+
+ private:
+  aom_codec_ctx_t *encoder_;
+  aom_codec_iter_t iter_;
+};
+
+// Implements an in-memory store for libaom twopass statistics
+class TwopassStatsStore {
+ public:
+  void Append(const aom_codec_cx_pkt_t &pkt) {
+    buffer_.append(reinterpret_cast<char *>(pkt.data.twopass_stats.buf),
+                   pkt.data.twopass_stats.sz);
+  }
+
+  aom_fixed_buf_t buf() {
+    const aom_fixed_buf_t buf = { &buffer_[0], buffer_.size() };
+    return buf;
+  }
+
+  void Reset() { buffer_.clear(); }
+
+ protected:
+  std::string buffer_;
+};
+
+// Provides a simplified interface to manage one video encoding pass, given
+// a configuration and video source.
+//
+// TODO(jkoleszar): The exact services it provides and the appropriate
+// level of abstraction will be fleshed out as more tests are written.
+class Encoder {
+ public:
+  Encoder(aom_codec_enc_cfg_t cfg, const aom_codec_flags_t init_flags,
+          TwopassStatsStore *stats)
+      : cfg_(cfg), init_flags_(init_flags), stats_(stats) {
+    memset(&encoder_, 0, sizeof(encoder_));
+  }
+
+  virtual ~Encoder() { aom_codec_destroy(&encoder_); }
+
+  CxDataIterator GetCxData() { return CxDataIterator(&encoder_); }
+
+  void InitEncoder(VideoSource *video);
+
+  const aom_image_t *GetPreviewFrame() {
+    return aom_codec_get_preview_frame(&encoder_);
+  }
+  // This is a thin wrapper around aom_codec_encode(), so refer to
+  // aom_encoder.h for its semantics.
+  void EncodeFrame(VideoSource *video, aom_enc_frame_flags_t frame_flags);
+
+  // Convenience wrapper for EncodeFrame()
+  void EncodeFrame(VideoSource *video) { EncodeFrame(video, 0); }
+
+  void Control(int ctrl_id, int arg) {
+    const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+  void Control(int ctrl_id, int *arg) {
+    const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+  void Control(int ctrl_id, struct aom_scaling_mode *arg) {
+    const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+  void Control(int ctrl_id, struct aom_svc_layer_id *arg) {
+    const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+  void Control(int ctrl_id, struct aom_svc_ref_frame_config *arg) {
+    const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+  void Control(int ctrl_id, struct aom_svc_ref_frame_comp_pred *arg) {
+    const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+  void Control(int ctrl_id, struct aom_svc_params *arg) {
+    const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+  void Control(int ctrl_id, struct aom_ext_part_funcs *arg) {
+    const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+#if CONFIG_AV1_ENCODER
+  void Control(int ctrl_id, aom_active_map_t *arg) {
+    const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+#endif
+
+  void SetOption(const char *name, const char *value) {
+    const aom_codec_err_t res = aom_codec_set_option(&encoder_, name, value);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+  void Config(const aom_codec_enc_cfg_t *cfg) {
+    const aom_codec_err_t res = aom_codec_enc_config_set(&encoder_, cfg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+    cfg_ = *cfg;
+  }
+
+ protected:
+  virtual aom_codec_iface_t *CodecInterface() const = 0;
+
+  const char *EncoderError() {
+    const char *detail = aom_codec_error_detail(&encoder_);
+    return detail ? detail : aom_codec_error(&encoder_);
+  }
+
+  // Encode an image
+  void EncodeFrameInternal(const VideoSource &video,
+                           aom_enc_frame_flags_t frame_flags);
+
+  // Flush the encoder on EOS
+  void Flush();
+
+  aom_codec_ctx_t encoder_;
+  aom_codec_enc_cfg_t cfg_;
+  aom_codec_flags_t init_flags_;
+  TwopassStatsStore *stats_;
+};
+
+// Common test functionality for all Encoder tests.
+//
+// This class is a mixin which provides the main loop common to all
+// encoder tests. It provides hooks which can be overridden by subclasses
+// to implement each test's specific behavior, while centralizing the bulk
+// of the boilerplate. Note that it doesn't inherit the gtest testing
+// classes directly, so that tests can be parameterized differently.
+class EncoderTest {
+ protected:
+  explicit EncoderTest(const CodecFactory *codec)
+      : codec_(codec), abort_(false), init_flags_(0), frame_flags_(0),
+        mode_(kRealTime) {
+    // Default to 1 thread.
+    cfg_.g_threads = 1;
+  }
+
+  virtual ~EncoderTest() = default;
+
+  // Initialize the cfg_ member with the default configuration for the
+  // TestMode enum and maps the TestMode enum to the passes_ variable.
+  void InitializeConfig(TestMode mode);
+
+  // Set encoder flag.
+  void set_init_flags(aom_codec_flags_t flag) { init_flags_ = flag; }
+
+  // Main loop
+  virtual void RunLoop(VideoSource *video);
+
+  // Hook to be called at the beginning of a pass.
+  virtual void BeginPassHook(unsigned int /*pass*/) {}
+
+  // Hook to be called at the end of a pass.
+  virtual void EndPassHook() {}
+
+  // Hook to be called before encoding a frame.
+  virtual void PreEncodeFrameHook(VideoSource * /*video*/,
+                                  Encoder * /*encoder*/) {}
+
+  virtual void PostEncodeFrameHook(Encoder * /*encoder*/) {}
+
+  // Hook to be called on every compressed data packet.
+  virtual void FramePktHook(const aom_codec_cx_pkt_t * /*pkt*/) {}
+
+  // Hook to be called on every PSNR packet.
+  virtual void PSNRPktHook(const aom_codec_cx_pkt_t * /*pkt*/) {}
+
+  // Hook to be called on every first pass stats packet.
+  virtual void StatsPktHook(const aom_codec_cx_pkt_t * /*pkt*/) {}
+
+  // Calculates SSIM at frame level.
+  virtual void CalculateFrameLevelSSIM(const aom_image_t * /*img_src*/,
+                                       const aom_image_t * /*img_enc*/,
+                                       aom_bit_depth_t /*bit_depth*/,
+                                       unsigned int /*input_bit_depth*/) {}
+
+  // Hook to determine whether the encode loop should continue.
+  virtual bool Continue() const {
+    return !(::testing::Test::HasFatalFailure() || abort_);
+  }
+
+  // Hook to determine whether to decode frame after encoding
+  virtual bool DoDecode() const { return true; }
+
+  // Hook to determine whether to decode invisible frames after encoding
+  virtual bool DoDecodeInvisible() const { return true; }
+
+  // Hook to handle encode/decode mismatch
+  virtual void MismatchHook(const aom_image_t *img1, const aom_image_t *img2);
+
+  // Hook to be called on every decompressed frame.
+  virtual void DecompressedFrameHook(const aom_image_t & /*img*/,
+                                     aom_codec_pts_t /*pts*/) {}
+
+  // Hook to be called to handle decode result. Return true to continue.
+  virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                                  Decoder *decoder) {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  virtual int GetNumSpatialLayers() { return 1; }
+
+  // Hook that can modify the encoder's output data
+  virtual const aom_codec_cx_pkt_t *MutateEncoderOutputHook(
+      const aom_codec_cx_pkt_t *pkt) {
+    return pkt;
+  }
+
+  const CodecFactory *codec_;
+  bool abort_;
+  aom_codec_enc_cfg_t cfg_;
+  unsigned int passes_;
+  TwopassStatsStore stats_;
+  aom_codec_flags_t init_flags_;
+  aom_enc_frame_flags_t frame_flags_;
+  TestMode mode_;
+};
+
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_ENCODE_TEST_DRIVER_H_
diff --git a/third_party/aom/test/encodemb_test.cc b/third_party/aom/test/encodemb_test.cc
new file mode 100644
index 0000000000..6165fc33f5
--- /dev/null
+++ b/third_party/aom/test/encodemb_test.cc
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdint.h>
+#include <vector>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "av1/encoder/block.h"
+#include "av1/encoder/encodemb.h"
+#include "av1/common/scan.h"
+
+namespace {
+
+// Reorders 'qcoeff_lexico', which is in lexicographic order (row by row), into
+// scan order (zigzag) in 'qcoeff_scan'.
+void ToScanOrder(TX_SIZE tx_size, TX_TYPE tx_type, tran_low_t *qcoeff_lexico,
+                 tran_low_t *qcoeff_scan) {
+  const int max_eob = av1_get_max_eob(tx_size);
+  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
+  for (int i = 0; i < max_eob; ++i) {
+    qcoeff_scan[i] = qcoeff_lexico[scan_order->scan[i]];
+  }
+}
+
+// Reorders 'qcoeff_scan', which is in scan order (zigzag), into lexicographic
+// order (row by row) in 'qcoeff_lexico'.
+void ToLexicoOrder(TX_SIZE tx_size, TX_TYPE tx_type, tran_low_t *qcoeff_scan,
+                   tran_low_t *qcoeff_lexico) {
+  const int max_eob = av1_get_max_eob(tx_size);
+  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
+  for (int i = 0; i < max_eob; ++i) {
+    qcoeff_lexico[scan_order->scan[i]] = qcoeff_scan[i];
+  }
+}
+
+// Runs coefficient dropout on 'qcoeff_scan'.
+void Dropout(TX_SIZE tx_size, TX_TYPE tx_type, int dropout_num_before,
+             int dropout_num_after, tran_low_t *qcoeff_scan) {
+  tran_low_t qcoeff[MAX_TX_SQUARE];
+  // qcoeff_scan is assumed to be in scan order, since tests are easier to
+  // understand this way, but av1_dropout_qcoeff expects coeffs in lexico order
+  // so we convert to lexico then back to scan afterwards.
+  ToLexicoOrder(tx_size, tx_type, qcoeff_scan, qcoeff);
+
+  const int max_eob = av1_get_max_eob(tx_size);
+  const int kDequantFactor = 10;
+  tran_low_t dqcoeff[MAX_TX_SQUARE];
+  for (int i = 0; i < max_eob; ++i) {
+    dqcoeff[i] = qcoeff[i] * kDequantFactor;
+  }
+
+  uint16_t eob = max_eob;
+  while (eob > 0 && qcoeff_scan[eob - 1] == 0) --eob;
+
+  MACROBLOCK mb;
+  const int kPlane = 0;
+  const int kBlock = 0;
+  memset(&mb, 0, sizeof(mb));
+  uint16_t eobs[] = { eob };
+  mb.plane[kPlane].eobs = eobs;
+  mb.plane[kPlane].qcoeff = qcoeff;
+  mb.plane[kPlane].dqcoeff = dqcoeff;
+  uint8_t txb_entropy_ctx[1];
+  mb.plane[kPlane].txb_entropy_ctx = txb_entropy_ctx;
+
+  av1_dropout_qcoeff_num(&mb, kPlane, kBlock, tx_size, tx_type,
+                         dropout_num_before, dropout_num_after);
+
+  ToScanOrder(tx_size, tx_type, qcoeff, qcoeff_scan);
+
+  // Check updated eob value is valid.
+  uint16_t new_eob = max_eob;
+  while (new_eob > 0 && qcoeff_scan[new_eob - 1] == 0) --new_eob;
+  EXPECT_EQ(new_eob, mb.plane[kPlane].eobs[0]);
+
+  // Check dqcoeff is still valid.
+  for (int i = 0; i < max_eob; ++i) {
+    EXPECT_EQ(qcoeff[i] * kDequantFactor, dqcoeff[i]);
+  }
+}
+
+void ExpectArrayEq(tran_low_t *actual, std::vector<tran_low_t> expected) {
+  for (size_t i = 0; i < expected.size(); ++i) {
+    EXPECT_EQ(expected[i], actual[i]) << "Arrays differ at index " << i;
+  }
+}
+
+static constexpr TX_TYPE kTxType = DCT_DCT;
+
+TEST(DropoutTest, KeepsLargeCoeffs) {
+  const TX_SIZE tx_size = TX_8X4;
+  const uint32_t dropout_num_before = 4;
+  const uint32_t dropout_num_after = 6;
+  // Large isolated coeffs should be preserved.
+  tran_low_t qcoeff_scan[] = { 0, 0, 0, 0, 0, 0, 42, 0,    // should be kept
+                               0, 0, 0, 0, 0, 0, 0,  0,    //
+                               0, 0, 0, 0, 0, 0, 0,  -30,  // should be kept
+                               0, 0, 0, 0, 0, 0, 0,  0 };
+  Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan);
+  ExpectArrayEq(qcoeff_scan, { 0, 0, 0, 0, 0, 0, 42, 0,    //
+                               0, 0, 0, 0, 0, 0, 0,  0,    //
+                               0, 0, 0, 0, 0, 0, 0,  -30,  //
+                               0, 0, 0, 0, 0, 0, 0,  0 });
+}
+
+TEST(DropoutTest, RemovesSmallIsolatedCoeffs) {
+  const TX_SIZE tx_size = TX_8X4;
+  const uint32_t dropout_num_before = 4;
+  const uint32_t dropout_num_after = 6;
+  // Small isolated coeffs should be removed.
+  tran_low_t qcoeff_scan[] = { 0, 0, 0, 0, 1,  0, 0, 0,  // should be removed
+                               0, 0, 0, 0, 0,  0, 0, 0,  //
+                               0, 0, 0, 0, -2, 0, 0, 0,  // should be removed
+                               0, 0, 0, 0, 0,  0, 0, 0 };
+  Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan);
+  ExpectArrayEq(qcoeff_scan, { 0, 0, 0, 0, 0, 0, 0, 0,  //
+                               0, 0, 0, 0, 0, 0, 0, 0,  //
+                               0, 0, 0, 0, 0, 0, 0, 0,  //
+                               0, 0, 0, 0, 0, 0, 0, 0 });
+}
+
+TEST(DropoutTest, KeepsSmallCoeffsAmongLargeOnes) {
+  const TX_SIZE tx_size = TX_8X4;
+  const uint32_t dropout_num_before = 4;
+  const uint32_t dropout_num_after = 6;
+  // Small coeffs that are not isolated (not enough zeros before/after should be
+  // kept).
+  tran_low_t qcoeff_scan[] = {
+    1, 0,  0, 0,  -5, 0, 0, -1,  // should be kept
+    0, 0,  0, 10, 0,  0, 2, 0,   // should be kept
+    0, 0,  0, 0,  0,  0, 0, 0,   //
+    0, -2, 0, 0,  0,  0, 0, 0    // should be removed
+  };                             // should be removed
+  Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan);
+  ExpectArrayEq(qcoeff_scan, { 1, 0, 0, 0,  -5, 0, 0, -1,  //
+                               0, 0, 0, 10, 0,  0, 2, 0,   //
+                               0, 0, 0, 0,  0,  0, 0, 0,   //
+                               0, 0, 0, 0,  0,  0, 0, 0 });
+}
+
+TEST(DropoutTest, KeepsSmallCoeffsCloseToStartOrEnd) {
+  const TX_SIZE tx_size = TX_8X4;
+  const uint32_t dropout_num_before = 4;
+  const uint32_t dropout_num_after = 6;
+  // Small coeffs that are too close to the beginning or end of the block
+  // should also be kept (not enough zeroes before/after).
+  tran_low_t qcoeff_scan[] = { 0, 0, -1, 0,  0, 0, 0,  0,  // should be kept
+                               0, 0, 0,  10, 0, 0, 0,  0,  // should be kept
+                               0, 0, 0,  2,  0, 0, 0,  0,  // should be removed
+                               0, 0, 0,  0,  0, 0, -1, 0 };  // should be kept
+  Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan);
+  ExpectArrayEq(qcoeff_scan, { 0, 0, -1, 0,  0, 0, 0,  0,  //
+                               0, 0, 0,  10, 0, 0, 0,  0,  //
+                               0, 0, 0,  0,  0, 0, 0,  0,  //
+                               0, 0, 0,  0,  0, 0, -1, 0 });
+}
+
+TEST(DropoutTest, RemovesSmallClusterOfCoeffs) {
+  const TX_SIZE tx_size = TX_8X4;
+  const uint32_t dropout_num_before = 4;
+  const uint32_t dropout_num_after = 6;
+  // Small clusters (<= kDropoutContinuityMax) of small coeffs should be
+  // removed.
+  tran_low_t qcoeff_scan_two[] = {
+    0, 0, 0, 0, 1, 0, 0, -1,  // should be removed
+    0, 0, 0, 0, 0, 0, 0, 0,   //
+    0, 0, 0, 0, 0, 0, 1, 0,   // should be removed
+    0, 0, 0, 0, 0, 0, 0, 0
+  };
+  Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after,
+          qcoeff_scan_two);
+  ExpectArrayEq(qcoeff_scan_two, { 0, 0, 0, 0, 0, 0, 0, 0,  //
+                                   0, 0, 0, 0, 0, 0, 0, 0,  //
+                                   0, 0, 0, 0, 0, 0, 0, 0,  //
+                                   0, 0, 0, 0, 0, 0, 0, 0 });
+}
+
+TEST(DropoutTest, KeepsLargeClusterOfCoeffs) {
+  const TX_SIZE tx_size = TX_8X4;
+  const uint32_t dropout_num_before = 4;
+  const uint32_t dropout_num_after = 6;
+  // Large clusters (> kDropoutContinuityMax) of small coeffs should be kept.
+  tran_low_t qcoeff_scan[] = { 0, 0, 0, 0, 1, 0,  1, -1,  // should be kept
+                               0, 0, 0, 0, 0, 0,  0, 0,   //
+                               0, 0, 0, 0, 0, -2, 0, 0,   // should be removed
+                               0, 0, 0, 0, 0, 0,  0, 0 };
+  Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan);
+  ExpectArrayEq(qcoeff_scan, { 0, 0, 0, 0, 1, 0, 1, -1,  //
+                               0, 0, 0, 0, 0, 0, 0, 0,   //
+                               0, 0, 0, 0, 0, 0, 0, 0,   //
+                               0, 0, 0, 0, 0, 0, 0, 0 });
+}
+
+TEST(DropoutTest, NumBeforeLargerThanNumAfter) {
+  const TX_SIZE tx_size = TX_8X4;
+  const uint32_t dropout_num_before = 4;
+  const uint32_t dropout_num_after = 2;
+  // The second coeff (-2) doesn't seem to meet the dropout_num_before
+  // criteria. But since the first coeff (1) will be dropped, it will meet
+  // the criteria and should be dropped too.
+  tran_low_t qcoeff_scan[] = { 0,  0, 0, 0, 1, 0, 0, 0,  // should be removed
+                               -2, 0, 0, 0, 0, 0, 0, 0,  // should be removed
+                               0,  0, 0, 0, 0, 0, 0, 0,  //
+                               0,  0, 0, 0, 0, 0, 0, 0 };
+  Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan);
+  ExpectArrayEq(qcoeff_scan, { 0, 0, 0, 0, 0, 0, 0, 0,  //
+                               0, 0, 0, 0, 0, 0, 0, 0,  //
+                               0, 0, 0, 0, 0, 0, 0, 0,  //
+                               0, 0, 0, 0, 0, 0, 0, 0 });
+}
+
+// More complex test combining other test cases.
+TEST(DropoutTest, ComplexTest) {
+  const TX_SIZE tx_size = TX_8X8;
+  const uint32_t dropout_num_before = 4;
+  const uint32_t dropout_num_after = 2;
+  tran_low_t qcoeff_scan[] = { 1, 12, 0,  0,   0, 0, 1,  0,   //
+                               0, 0,  0,  -12, 0, 0, 0,  1,   //
+                               0, 0,  -2, 0,   1, 0, 0,  1,   //
+                               0, 0,  0,  0,   5, 0, -1, 0,   //
+                               0, 0,  0,  1,   0, 0, 0,  -1,  //
+                               0, 0,  0,  0,   2, 0, 0,  0,   //
+                               0, 1,  0,  0,   0, 5, 0,  0,   //
+                               0, 0,  1,  1,   0, 0, 0,  -2 };
+  Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan);
+  ExpectArrayEq(qcoeff_scan, { 1, 12, 0,  0,   0, 0, 0,  0,  //
+                               0, 0,  0,  -12, 0, 0, 0,  1,  //
+                               0, 0,  -2, 0,   1, 0, 0,  1,  //
+                               0, 0,  0,  0,   5, 0, -1, 0,  //
+                               0, 0,  0,  0,   0, 0, 0,  0,  //
+                               0, 0,  0,  0,   0, 0, 0,  0,  //
+                               0, 0,  0,  0,   0, 5, 0,  0,  //
+                               0, 0,  0,  0,   0, 0, 0,  -2 });
+}
+
+}  // namespace
diff --git a/third_party/aom/test/encodetxb_test.cc b/third_party/aom/test/encodetxb_test.cc
new file mode 100644
index 0000000000..49b0fba94a
--- /dev/null
+++ b/third_party/aom/test/encodetxb_test.cc
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "aom_ports/mem.h"
+#include "av1/common/av1_common_int.h"
+#include "av1/common/idct.h"
+#include "av1/common/scan.h"
+#include "av1/common/txb_common.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+using libaom_test::ACMRandom;
+
+typedef void (*GetNzMapContextsFunc)(const uint8_t *const levels,
+                                     const int16_t *const scan,
+                                     const uint16_t eob, const TX_SIZE tx_size,
+                                     const TX_CLASS tx_class,
+                                     int8_t *const coeff_contexts);
+
+class EncodeTxbTest : public ::testing::TestWithParam<GetNzMapContextsFunc> {
+ public:
+  EncodeTxbTest() : get_nz_map_contexts_func_(GetParam()) {}
+
+  ~EncodeTxbTest() override = default;
+
+  void SetUp() override {
+    coeff_contexts_ref_ = reinterpret_cast<int8_t *>(
+        aom_memalign(16, sizeof(*coeff_contexts_ref_) * MAX_TX_SQUARE));
+    ASSERT_NE(coeff_contexts_ref_, nullptr);
+    coeff_contexts_ = reinterpret_cast<int8_t *>(
+        aom_memalign(16, sizeof(*coeff_contexts_) * MAX_TX_SQUARE));
+    ASSERT_NE(coeff_contexts_, nullptr);
+  }
+
+  void TearDown() override {
+    aom_free(coeff_contexts_ref_);
+    aom_free(coeff_contexts_);
+  }
+
+  void GetNzMapContextsRun() {
+    const int kNumTests = 10;
+    int result = 0;
+
+    for (int is_inter = 0; is_inter < 2; ++is_inter) {
+      for (int tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+        const TX_CLASS tx_class = tx_type_to_class[tx_type];
+        for (int tx_size = TX_4X4; tx_size < TX_SIZES_ALL; ++tx_size) {
+          const int bhl = get_txb_bhl((TX_SIZE)tx_size);
+          const int width = get_txb_wide((TX_SIZE)tx_size);
+          const int height = get_txb_high((TX_SIZE)tx_size);
+          const int real_width = tx_size_wide[tx_size];
+          const int real_height = tx_size_high[tx_size];
+          const int16_t *const scan = av1_scan_orders[tx_size][tx_type].scan;
+
+          levels_ = set_levels(levels_buf_, height);
+          for (int i = 0; i < kNumTests && !result; ++i) {
+            for (int eob = 1; eob <= width * height && !result; ++eob) {
+              InitDataWithEob(scan, bhl, eob);
+
+              av1_get_nz_map_contexts_c(levels_, scan, eob, (TX_SIZE)tx_size,
+                                        tx_class, coeff_contexts_ref_);
+              get_nz_map_contexts_func_(levels_, scan, eob, (TX_SIZE)tx_size,
+                                        tx_class, coeff_contexts_);
+
+              result = Compare(scan, eob);
+
+              EXPECT_EQ(result, 0)
+                  << " tx_class " << (int)tx_class << " width " << real_width
+                  << " height " << real_height << " eob " << eob;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  void SpeedTestGetNzMapContextsRun() {
+    const int kNumTests = 2000000000;
+    aom_usec_timer timer;
+    aom_usec_timer timer_ref;
+
+    printf("Note: Only test the largest possible eob case!\n");
+    for (int tx_size = TX_4X4; tx_size < TX_SIZES_ALL; ++tx_size) {
+      const int bhl = get_txb_bhl((TX_SIZE)tx_size);
+      const int width = get_txb_wide((TX_SIZE)tx_size);
+      const int height = get_txb_high((TX_SIZE)tx_size);
+      const int real_width = tx_size_wide[tx_size];
+      const int real_height = tx_size_high[tx_size];
+      const TX_TYPE tx_type = DCT_DCT;
+      const TX_CLASS tx_class = tx_type_to_class[tx_type];
+      const int16_t *const scan = av1_scan_orders[tx_size][tx_type].scan;
+      const int eob = width * height;
+      const int numTests = kNumTests / (width * height);
+
+      levels_ = set_levels(levels_buf_, height);
+      InitDataWithEob(scan, bhl, eob);
+
+      aom_usec_timer_start(&timer_ref);
+      for (int i = 0; i < numTests; ++i) {
+        av1_get_nz_map_contexts_c(levels_, scan, eob, (TX_SIZE)tx_size,
+                                  tx_class, coeff_contexts_ref_);
+      }
+      aom_usec_timer_mark(&timer_ref);
+
+      levels_ = set_levels(levels_buf_, height);
+      InitDataWithEob(scan, bhl, eob);
+
+      aom_usec_timer_start(&timer);
+      for (int i = 0; i < numTests; ++i) {
+        get_nz_map_contexts_func_(levels_, scan, eob, (TX_SIZE)tx_size,
+                                  tx_class, coeff_contexts_);
+      }
+      aom_usec_timer_mark(&timer);
+
+      const int elapsed_time_ref =
+          static_cast<int>(aom_usec_timer_elapsed(&timer_ref));
+      const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+
+      printf("get_nz_map_contexts_%2dx%2d: %7.1f ms ref %7.1f ms gain %4.2f\n",
+             real_width, real_height, elapsed_time / 1000.0,
+             elapsed_time_ref / 1000.0,
+             (elapsed_time_ref * 1.0) / (elapsed_time * 1.0));
+    }
+  }
+
+ private:
+  void InitDataWithEob(const int16_t *const scan, const int bhl,
+                       const int eob) {
+    memset(levels_buf_, 0, sizeof(levels_buf_));
+    memset(coeff_contexts_, 0, sizeof(*coeff_contexts_) * MAX_TX_SQUARE);
+
+    for (int c = 0; c < eob; ++c) {
+      levels_[get_padded_idx(scan[c], bhl)] =
+          static_cast<uint8_t>(clamp(rnd_.Rand8(), 0, INT8_MAX));
+      coeff_contexts_[scan[c]] = static_cast<int8_t>(rnd_.Rand16() >> 1);
+    }
+
+    memcpy(coeff_contexts_ref_, coeff_contexts_,
+           sizeof(*coeff_contexts_) * MAX_TX_SQUARE);
+  }
+
+  bool Compare(const int16_t *const scan, const int eob) const {
+    bool result = false;
+    if (memcmp(coeff_contexts_, coeff_contexts_ref_,
+               sizeof(*coeff_contexts_ref_) * MAX_TX_SQUARE)) {
+      for (int i = 0; i < eob; i++) {
+        const int pos = scan[i];
+        if (coeff_contexts_ref_[pos] != coeff_contexts_[pos]) {
+          printf("coeff_contexts_[%d] diff:%6d (ref),%6d (opt)\n", pos,
+                 coeff_contexts_ref_[pos], coeff_contexts_[pos]);
+          result = true;
+          break;
+        }
+      }
+    }
+    return result;
+  }
+
+  GetNzMapContextsFunc get_nz_map_contexts_func_;
+  ACMRandom rnd_;
+  uint8_t levels_buf_[TX_PAD_2D];
+  uint8_t *levels_;
+  int8_t *coeff_contexts_ref_;
+  int8_t *coeff_contexts_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(EncodeTxbTest);
+
+TEST_P(EncodeTxbTest, GetNzMapContexts) { GetNzMapContextsRun(); }
+
+TEST_P(EncodeTxbTest, DISABLED_SpeedTestGetNzMapContexts) {
+  SpeedTestGetNzMapContextsRun();
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, EncodeTxbTest,
+                         ::testing::Values(av1_get_nz_map_contexts_sse2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, EncodeTxbTest,
+                         ::testing::Values(av1_get_nz_map_contexts_neon));
+#endif
+
+typedef void (*av1_txb_init_levels_func)(const tran_low_t *const coeff,
+                                         const int width, const int height,
+                                         uint8_t *const levels);
+
+typedef std::tuple<av1_txb_init_levels_func, int> TxbInitLevelParam;
+
+class EncodeTxbInitLevelTest
+    : public ::testing::TestWithParam<TxbInitLevelParam> {
+ public:
+  ~EncodeTxbInitLevelTest() override = default;
+  void RunTest(av1_txb_init_levels_func test_func, int tx_size, int is_speed);
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(EncodeTxbInitLevelTest);
+
+void EncodeTxbInitLevelTest::RunTest(av1_txb_init_levels_func test_func,
+                                     int tx_size, int is_speed) {
+  const int width = get_txb_wide((TX_SIZE)tx_size);
+  const int height = get_txb_high((TX_SIZE)tx_size);
+  tran_low_t coeff[MAX_TX_SQUARE];
+
+  uint8_t levels_buf[2][TX_PAD_2D];
+  uint8_t *const levels0 = set_levels(levels_buf[0], height);
+  uint8_t *const levels1 = set_levels(levels_buf[1], height);
+
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  for (int i = 0; i < width * height; i++) {
+    coeff[i] = rnd.Rand16Signed();
+  }
+  for (int i = 0; i < TX_PAD_2D; i++) {
+    levels_buf[0][i] = rnd.Rand8();
+    levels_buf[1][i] = rnd.Rand8();
+  }
+  const int run_times = is_speed ? (width * height) * 10000 : 1;
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < run_times; ++i) {
+    av1_txb_init_levels_c(coeff, width, height, levels0);
+  }
+  const double t1 = get_time_mark(&timer);
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < run_times; ++i) {
+    test_func(coeff, width, height, levels1);
+  }
+  const double t2 = get_time_mark(&timer);
+  if (is_speed) {
+    printf("init %3dx%-3d:%7.2f/%7.2fns", width, height, t1, t2);
+    printf("(%3.2f)\n", t1 / t2);
+  }
+  const int stride = width + TX_PAD_HOR;
+  for (int r = 0; r < height + TX_PAD_VER; ++r) {
+    for (int c = 0; c < stride; ++c) {
+      ASSERT_EQ(levels_buf[0][c + r * stride], levels_buf[1][c + r * stride])
+          << "[" << r << "," << c << "] " << run_times << width << "x"
+          << height;
+    }
+  }
+}
+
+TEST_P(EncodeTxbInitLevelTest, match) {
+  RunTest(GET_PARAM(0), GET_PARAM(1), 0);
+}
+
+TEST_P(EncodeTxbInitLevelTest, DISABLED_Speed) {
+  RunTest(GET_PARAM(0), GET_PARAM(1), 1);
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, EncodeTxbInitLevelTest,
+    ::testing::Combine(::testing::Values(&av1_txb_init_levels_sse4_1),
+                       ::testing::Range(0, static_cast<int>(TX_SIZES_ALL), 1)));
+#endif
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, EncodeTxbInitLevelTest,
+    ::testing::Combine(::testing::Values(&av1_txb_init_levels_avx2),
+                       ::testing::Range(0, static_cast<int>(TX_SIZES_ALL), 1)));
+#endif
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, EncodeTxbInitLevelTest,
+    ::testing::Combine(::testing::Values(&av1_txb_init_levels_neon),
+                       ::testing::Range(0, static_cast<int>(TX_SIZES_ALL), 1)));
+#endif
+}  // namespace
diff --git a/third_party/aom/test/end_to_end_psnr_test.cc b/third_party/aom/test/end_to_end_psnr_test.cc
new file mode 100644
index 0000000000..687308da8c
--- /dev/null
+++ b/third_party/aom/test/end_to_end_psnr_test.cc
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+#include <ostream>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+
+const unsigned int kWidth = 160;
+const unsigned int kHeight = 90;
+const unsigned int kFramerate = 50;
+const unsigned int kFrames = 10;
+const int kBitrate = 500;
+const unsigned int kCqLevel = 18;
+// List of psnr thresholds for speed settings 0-8 and 4 encoding modes
+const double kPsnrThreshold[][4] = {
+  { 34.9, 44.4, 39.5, 41.9 }, { 34.9, 44.4, 39.5, 41.9 },
+  { 34.9, 44.4, 39.4, 41.9 }, { 34.9, 44.4, 39.1, 41.8 },
+  { 34.9, 44.4, 39.1, 41.8 }, { 34.9, 44.29, 38.5, 41.8 },
+  { 34.9, 44.3, 38.5, 41.3 }, { 34.9, 44.3, 38.5, 40.8 },
+  { 34.9, 44.3, 38.5, 40.8 }
+};
+
+typedef struct {
+  const char *filename;
+  unsigned int input_bit_depth;
+  aom_img_fmt fmt;
+  aom_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) {
+  return os << "TestVideoParam { filename:" << test_arg.filename
+            << " input_bit_depth:" << test_arg.input_bit_depth
+            << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth
+            << " profile:" << test_arg.profile << " }";
+}
+
+const TestVideoParam kTestVectors[] = {
+  { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
+  { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422, AOM_BITS_8, 2 },
+  { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 },
+#if CONFIG_AV1_HIGHBITDEPTH
+  { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016, AOM_BITS_10, 0 },
+  { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216, AOM_BITS_10, 2 },
+  { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416, AOM_BITS_10, 1 },
+  { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016, AOM_BITS_12, 2 },
+  { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216, AOM_BITS_12, 2 },
+  { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416, AOM_BITS_12, 2 },
+#endif
+};
+
+// Encoding modes tested
+const libaom_test::TestMode kEncodingModeVectors[] = {
+  ::libaom_test::kTwoPassGood,
+  ::libaom_test::kOnePassGood,
+  ::libaom_test::kRealTime,
+};
+
+// Speed settings tested
+const int kCpuUsedVectors[] = { 1, 2, 3, 5, 6 };
+
+class EndToEndTest
+    : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode,
+                                                 TestVideoParam, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  EndToEndTest()
+      : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(2)),
+        cpu_used_(GET_PARAM(3)), psnr_(0.0), nframes_(0),
+        encoding_mode_(GET_PARAM(1)) {}
+
+  ~EndToEndTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    if (encoding_mode_ == ::libaom_test::kOnePassGood ||
+        encoding_mode_ == ::libaom_test::kTwoPassGood) {
+      cfg_.g_lag_in_frames = 5;
+    } else if (encoding_mode_ == ::libaom_test::kRealTime) {
+      cfg_.rc_buf_sz = 1000;
+      cfg_.rc_buf_initial_sz = 500;
+      cfg_.rc_buf_optimal_sz = 600;
+    }
+  }
+
+  void BeginPassHook(unsigned int) override {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      // Test screen coding tools at cpu_used = 1 && encoding mode is two-pass.
+      if (cpu_used_ == 1 && encoding_mode_ == ::libaom_test::kTwoPassGood)
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
+      else
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
+      if (encoding_mode_ == ::libaom_test::kOnePassGood ||
+          encoding_mode_ == ::libaom_test::kTwoPassGood) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      } else if (encoding_mode_ == ::libaom_test::kAllIntra) {
+        encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel);
+      }
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  double GetPsnrThreshold() {
+    return kPsnrThreshold[cpu_used_][encoding_mode_];
+  }
+
+  void DoTest() {
+    cfg_.rc_target_bitrate = kBitrate;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_profile = test_video_param_.profile;
+    cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+    cfg_.g_bit_depth = test_video_param_.bit_depth;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+    std::unique_ptr<libaom_test::VideoSource> video;
+    if (is_extension_y4m(test_video_param_.filename)) {
+      video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                  kFrames));
+    } else {
+      video.reset(new libaom_test::YUVVideoSource(
+          test_video_param_.filename, test_video_param_.fmt, kWidth, kHeight,
+          kFramerate, 1, 0, kFrames));
+    }
+    ASSERT_NE(video, nullptr);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    const double psnr = GetAveragePsnr();
+    EXPECT_GT(psnr, GetPsnrThreshold() * 0.98)
+        << "cpu used = " << cpu_used_ << ", encoding mode = " << encoding_mode_;
+  }
+
+  TestVideoParam test_video_param_;
+  int cpu_used_;
+
+ private:
+  double psnr_;
+  unsigned int nframes_;
+  libaom_test::TestMode encoding_mode_;
+};
+
+class EndToEndTestLarge : public EndToEndTest {};
+
+class EndToEndAllIntraTestLarge : public EndToEndTest {};
+
+class EndToEndAllIntraTest : public EndToEndTest {};
+
+TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) { DoTest(); }
+
+TEST_P(EndToEndTest, EndtoEndPSNRTest) { DoTest(); }
+
+TEST_P(EndToEndAllIntraTestLarge, EndtoEndPSNRTest) { DoTest(); }
+
+TEST_P(EndToEndAllIntraTest, EndtoEndPSNRTest) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_SUITE(EndToEndTestLarge,
+                           ::testing::ValuesIn(kEncodingModeVectors),
+                           ::testing::ValuesIn(kTestVectors),
+                           ::testing::ValuesIn(kCpuUsedVectors));
+
+AV1_INSTANTIATE_TEST_SUITE(EndToEndTest,
+                           ::testing::Values(::libaom_test::kTwoPassGood),
+                           ::testing::Values(kTestVectors[2]),  // 444
+                           ::testing::Values(3));               // cpu_used
+
+AV1_INSTANTIATE_TEST_SUITE(EndToEndAllIntraTestLarge,
+                           ::testing::Values(::libaom_test::kAllIntra),
+                           ::testing::ValuesIn(kTestVectors),
+                           ::testing::Values(2, 4, 6, 8));  // cpu_used
+
+AV1_INSTANTIATE_TEST_SUITE(EndToEndAllIntraTest,
+                           ::testing::Values(::libaom_test::kAllIntra),
+                           ::testing::Values(kTestVectors[0]),  // 420
+                           ::testing::Values(6));               // cpu_used
+}  // namespace
diff --git a/third_party/aom/test/end_to_end_qmpsnr_test.cc b/third_party/aom/test/end_to_end_qmpsnr_test.cc
new file mode 100644
index 0000000000..7a755a7a51
--- /dev/null
+++ b/third_party/aom/test/end_to_end_qmpsnr_test.cc
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+
+#include "aom_ports/mem.h"
+#include "aom_dsp/ssim.h"
+#include "av1/common/blockd.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+const unsigned int kFrames = 10;
+const unsigned int kCqLevel = 18;
+// List of ssim thresholds for speed settings 0-8 with all intra encoding mode.
+const double kSsimThreshold[] = { 83.4, 83.4, 83.4, 83.3, 83.3,
+                                  83.0, 82.3, 81.1, 81.1 };
+
+typedef struct {
+  const char *filename;
+  unsigned int input_bit_depth;
+  aom_img_fmt fmt;
+  aom_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) {
+  return os << "TestVideoParam { filename:" << test_arg.filename
+            << " input_bit_depth:" << test_arg.input_bit_depth
+            << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth
+            << " profile:" << test_arg.profile << " }";
+}
+
+const TestVideoParam kTestVectors[] = {
+  { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
+  { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422, AOM_BITS_8, 2 },
+  { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 },
+#if CONFIG_AV1_HIGHBITDEPTH
+  { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016, AOM_BITS_10, 0 },
+  { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216, AOM_BITS_10, 2 },
+  { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416, AOM_BITS_10, 1 },
+  { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016, AOM_BITS_12, 2 },
+  { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216, AOM_BITS_12, 2 },
+  { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416, AOM_BITS_12, 2 },
+#endif
+};
+
+// This class is used to check adherence to given ssim value, while using the
+// "dist-metric=qm-psnr" option.
+class EndToEndQMPSNRTest
+    : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode,
+                                                 TestVideoParam, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  EndToEndQMPSNRTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        test_video_param_(GET_PARAM(2)), cpu_used_(GET_PARAM(3)), nframes_(0),
+        ssim_(0.0) {}
+
+  ~EndToEndQMPSNRTest() override = default;
+
+  void SetUp() override { InitializeConfig(encoding_mode_); }
+
+  void BeginPassHook(unsigned int) override {
+    nframes_ = 0;
+    ssim_ = 0.0;
+  }
+
+  void CalculateFrameLevelSSIM(const aom_image_t *img_src,
+                               const aom_image_t *img_enc,
+                               aom_bit_depth_t bit_depth,
+                               unsigned int input_bit_depth) override {
+    double frame_ssim;
+    double plane_ssim[MAX_MB_PLANE] = { 0.0, 0.0, 0.0 };
+    int crop_widths[PLANE_TYPES];
+    int crop_heights[PLANE_TYPES];
+    crop_widths[PLANE_TYPE_Y] = img_src->d_w;
+    crop_heights[PLANE_TYPE_Y] = img_src->d_h;
+    // Width of UV planes calculated based on chroma_shift values.
+    crop_widths[PLANE_TYPE_UV] =
+        img_src->x_chroma_shift == 1 ? (img_src->w + 1) >> 1 : img_src->w;
+    crop_heights[PLANE_TYPE_UV] =
+        img_src->y_chroma_shift == 1 ? (img_src->h + 1) >> 1 : img_src->h;
+    nframes_++;
+
+#if CONFIG_AV1_HIGHBITDEPTH
+    uint8_t is_hbd = bit_depth > AOM_BITS_8;
+    if (is_hbd) {
+      // HBD ssim calculation.
+      uint8_t shift = bit_depth - input_bit_depth;
+      for (int i = AOM_PLANE_Y; i < MAX_MB_PLANE; ++i) {
+        const int is_uv = i > AOM_PLANE_Y;
+        plane_ssim[i] = aom_highbd_ssim2(
+            CONVERT_TO_BYTEPTR(img_src->planes[i]),
+            CONVERT_TO_BYTEPTR(img_enc->planes[i]),
+            img_src->stride[is_uv] >> is_hbd, img_enc->stride[is_uv] >> is_hbd,
+            crop_widths[is_uv], crop_heights[is_uv], input_bit_depth, shift);
+      }
+      frame_ssim = plane_ssim[AOM_PLANE_Y] * .8 +
+                   .1 * (plane_ssim[AOM_PLANE_U] + plane_ssim[AOM_PLANE_V]);
+      // Accumulate to find sequence level ssim value.
+      ssim_ += frame_ssim;
+      return;
+    }
+#else
+    (void)bit_depth;
+    (void)input_bit_depth;
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+    // LBD ssim calculation.
+    for (int i = AOM_PLANE_Y; i < MAX_MB_PLANE; ++i) {
+      const int is_uv = i > AOM_PLANE_Y;
+      plane_ssim[i] = aom_ssim2(img_src->planes[i], img_enc->planes[i],
+                                img_src->stride[is_uv], img_enc->stride[is_uv],
+                                crop_widths[is_uv], crop_heights[is_uv]);
+    }
+    frame_ssim = plane_ssim[AOM_PLANE_Y] * .8 +
+                 .1 * (plane_ssim[AOM_PLANE_U] + plane_ssim[AOM_PLANE_V]);
+    // Accumulate to find sequence level ssim value.
+    ssim_ += frame_ssim;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AOME_SET_TUNING, AOM_TUNE_SSIM);
+      encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel);
+      encoder->SetOption("dist-metric", "qm-psnr");
+    }
+  }
+
+  double GetAverageSsim() const {
+    if (nframes_) return 100 * pow(ssim_ / nframes_, 8.0);
+    return 0.0;
+  }
+
+  double GetSsimThreshold() { return kSsimThreshold[cpu_used_]; }
+
+  void DoTest() {
+    cfg_.g_profile = test_video_param_.profile;
+    cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+    cfg_.g_bit_depth = test_video_param_.bit_depth;
+    if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+    std::unique_ptr<libaom_test::VideoSource> video(
+        new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                        kFrames));
+    ASSERT_NE(video, nullptr);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    const double ssim = GetAverageSsim();
+    EXPECT_GT(ssim, GetSsimThreshold())
+        << "encoding mode = " << encoding_mode_ << ", cpu used = " << cpu_used_;
+  }
+
+ private:
+  const libaom_test::TestMode encoding_mode_;
+  const TestVideoParam test_video_param_;
+  const int cpu_used_;
+  unsigned int nframes_;
+  double ssim_;
+};
+
+class EndToEndQMPSNRTestLarge : public EndToEndQMPSNRTest {};
+
+TEST_P(EndToEndQMPSNRTestLarge, EndtoEndQMPSNRTest) { DoTest(); }
+
+TEST_P(EndToEndQMPSNRTest, EndtoEndQMPSNRTest) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_SUITE(EndToEndQMPSNRTestLarge,
+                           ::testing::Values(::libaom_test::kAllIntra),
+                           ::testing::ValuesIn(kTestVectors),
+                           ::testing::Values(2, 4, 6, 8));  // cpu_used
+
+AV1_INSTANTIATE_TEST_SUITE(EndToEndQMPSNRTest,
+                           ::testing::Values(::libaom_test::kAllIntra),
+                           ::testing::Values(kTestVectors[0]),  // 420
+                           ::testing::Values(6));               // cpu_used
+}  // namespace
diff --git a/third_party/aom/test/end_to_end_ssim_test.cc b/third_party/aom/test/end_to_end_ssim_test.cc
new file mode 100644
index 0000000000..f1b0cae75f
--- /dev/null
+++ b/third_party/aom/test/end_to_end_ssim_test.cc
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "aom_ports/mem.h"
+#include "aom_dsp/ssim.h"
+#include "av1/common/blockd.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+const unsigned int kFrames = 10;
+const unsigned int kCqLevel = 18;
+// List of ssim thresholds for speed settings 0-8 with all intra encoding mode.
+const double kSsimThreshold[] = { 83.4, 83.4, 83.4, 83.3, 83.3,
+                                  83.0, 82.3, 81.1, 81.1 };
+
+typedef struct {
+  const char *filename;
+  unsigned int input_bit_depth;
+  aom_img_fmt fmt;
+  aom_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) {
+  return os << "TestVideoParam { filename:" << test_arg.filename
+            << " input_bit_depth:" << test_arg.input_bit_depth
+            << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth
+            << " profile:" << test_arg.profile << " }";
+}
+
+const TestVideoParam kTestVectors[] = {
+  { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
+  { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422, AOM_BITS_8, 2 },
+  { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 },
+#if CONFIG_AV1_HIGHBITDEPTH
+  { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016, AOM_BITS_10, 0 },
+  { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216, AOM_BITS_10, 2 },
+  { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416, AOM_BITS_10, 1 },
+  { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016, AOM_BITS_12, 2 },
+  { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216, AOM_BITS_12, 2 },
+  { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416, AOM_BITS_12, 2 },
+#endif
+};
+
+// This class is used to check adherence to given ssim value.
+class EndToEndSSIMTest
+    : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode,
+                                                 TestVideoParam, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  EndToEndSSIMTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        test_video_param_(GET_PARAM(2)), cpu_used_(GET_PARAM(3)), nframes_(0),
+        ssim_(0.0) {}
+
+  ~EndToEndSSIMTest() override = default;
+
+  void SetUp() override { InitializeConfig(encoding_mode_); }
+
+  void BeginPassHook(unsigned int) override {
+    nframes_ = 0;
+    ssim_ = 0.0;
+  }
+
+  void CalculateFrameLevelSSIM(const aom_image_t *img_src,
+                               const aom_image_t *img_enc,
+                               aom_bit_depth_t bit_depth,
+                               unsigned int input_bit_depth) override {
+    double frame_ssim;
+    double plane_ssim[MAX_MB_PLANE] = { 0.0, 0.0, 0.0 };
+    int crop_widths[PLANE_TYPES];
+    int crop_heights[PLANE_TYPES];
+    crop_widths[PLANE_TYPE_Y] = img_src->d_w;
+    crop_heights[PLANE_TYPE_Y] = img_src->d_h;
+    // Width of UV planes calculated based on chroma_shift values.
+    crop_widths[PLANE_TYPE_UV] =
+        img_src->x_chroma_shift == 1 ? (img_src->w + 1) >> 1 : img_src->w;
+    crop_heights[PLANE_TYPE_UV] =
+        img_src->y_chroma_shift == 1 ? (img_src->h + 1) >> 1 : img_src->h;
+    nframes_++;
+
+#if CONFIG_AV1_HIGHBITDEPTH
+    uint8_t is_hbd = bit_depth > AOM_BITS_8;
+    if (is_hbd) {
+      // HBD ssim calculation.
+      uint8_t shift = bit_depth - input_bit_depth;
+      for (int i = AOM_PLANE_Y; i < MAX_MB_PLANE; ++i) {
+        const int is_uv = i > AOM_PLANE_Y;
+        plane_ssim[i] = aom_highbd_ssim2(
+            CONVERT_TO_BYTEPTR(img_src->planes[i]),
+            CONVERT_TO_BYTEPTR(img_enc->planes[i]),
+            img_src->stride[is_uv] >> is_hbd, img_enc->stride[is_uv] >> is_hbd,
+            crop_widths[is_uv], crop_heights[is_uv], input_bit_depth, shift);
+      }
+      frame_ssim = plane_ssim[AOM_PLANE_Y] * .8 +
+                   .1 * (plane_ssim[AOM_PLANE_U] + plane_ssim[AOM_PLANE_V]);
+      // Accumulate to find sequence level ssim value.
+      ssim_ += frame_ssim;
+      return;
+    }
+#else
+    (void)bit_depth;
+    (void)input_bit_depth;
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+    // LBD ssim calculation.
+    for (int i = AOM_PLANE_Y; i < MAX_MB_PLANE; ++i) {
+      const int is_uv = i > AOM_PLANE_Y;
+      plane_ssim[i] = aom_ssim2(img_src->planes[i], img_enc->planes[i],
+                                img_src->stride[is_uv], img_enc->stride[is_uv],
+                                crop_widths[is_uv], crop_heights[is_uv]);
+    }
+    frame_ssim = plane_ssim[AOM_PLANE_Y] * .8 +
+                 .1 * (plane_ssim[AOM_PLANE_U] + plane_ssim[AOM_PLANE_V]);
+    // Accumulate to find sequence level ssim value.
+    ssim_ += frame_ssim;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AOME_SET_TUNING, AOM_TUNE_SSIM);
+      encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel);
+    }
+  }
+
+  double GetAverageSsim() const {
+    if (nframes_) return 100 * pow(ssim_ / nframes_, 8.0);
+    return 0.0;
+  }
+
+  double GetSsimThreshold() { return kSsimThreshold[cpu_used_]; }
+
+  void DoTest() {
+    cfg_.g_profile = test_video_param_.profile;
+    cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+    cfg_.g_bit_depth = test_video_param_.bit_depth;
+    if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+    std::unique_ptr<libaom_test::VideoSource> video(
+        new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                        kFrames));
+    ASSERT_NE(video, nullptr);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    const double ssim = GetAverageSsim();
+    EXPECT_GT(ssim, GetSsimThreshold())
+        << "encoding mode = " << encoding_mode_ << ", cpu used = " << cpu_used_;
+  }
+
+ private:
+  const libaom_test::TestMode encoding_mode_;
+  const TestVideoParam test_video_param_;
+  const int cpu_used_;
+  unsigned int nframes_;
+  double ssim_;
+};
+
+class EndToEndSSIMTestLarge : public EndToEndSSIMTest {};
+
+TEST_P(EndToEndSSIMTestLarge, EndtoEndSSIMTest) { DoTest(); }
+
+TEST_P(EndToEndSSIMTest, EndtoEndSSIMTest) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_SUITE(EndToEndSSIMTestLarge,
+                           ::testing::Values(::libaom_test::kAllIntra),
+                           ::testing::ValuesIn(kTestVectors),
+                           ::testing::Values(2, 4, 6, 8));  // cpu_used
+
+AV1_INSTANTIATE_TEST_SUITE(EndToEndSSIMTest,
+                           ::testing::Values(::libaom_test::kAllIntra),
+                           ::testing::Values(kTestVectors[0]),  // 420
+                           ::testing::Values(6));               // cpu_used
+}  // namespace
diff --git a/third_party/aom/test/error_block_test.cc b/third_party/aom/test/error_block_test.cc
new file mode 100644
index 0000000000..e7cd870a98
--- /dev/null
+++ b/third_party/aom/test/error_block_test.cc
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cmath>
+#include <cstdlib>
+#include <string>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/entropy.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+const int kNumIterations = 1000;
+
+using ErrorBlockFunc = int64_t (*)(const tran_low_t *coeff,
+                                   const tran_low_t *dqcoeff,
+                                   intptr_t block_size, int64_t *ssz, int bps);
+
+using ErrorBlockFunc8Bits = int64_t (*)(const tran_low_t *coeff,
+                                        const tran_low_t *dqcoeff,
+                                        intptr_t block_size, int64_t *ssz);
+
+using ErrorBlockLpFunc = int64_t (*)(const int16_t *coeff,
+                                     const int16_t *dqcoeff,
+                                     intptr_t block_size);
+
+using ErrorBlockParam =
+    std::tuple<ErrorBlockFunc, ErrorBlockFunc, aom_bit_depth_t>;
+
+template <ErrorBlockFunc8Bits fn>
+int64_t BlockError8BitWrapper(const tran_low_t *coeff,
+                              const tran_low_t *dqcoeff, intptr_t block_size,
+                              int64_t *ssz, int bps) {
+  EXPECT_EQ(bps, 8);
+  return fn(coeff, dqcoeff, block_size, ssz);
+}
+
+template <ErrorBlockLpFunc fn>
+int64_t BlockErrorLpWrapper(const tran_low_t *coeff, const tran_low_t *dqcoeff,
+                            intptr_t block_size, int64_t *ssz, int bps) {
+  EXPECT_EQ(bps, 8);
+  *ssz = -1;
+  return fn(reinterpret_cast<const int16_t *>(coeff),
+            reinterpret_cast<const int16_t *>(dqcoeff), block_size);
+}
+
+class ErrorBlockTest : public ::testing::TestWithParam<ErrorBlockParam> {
+ public:
+  ~ErrorBlockTest() override = default;
+  void SetUp() override {
+    error_block_op_ = GET_PARAM(0);
+    ref_error_block_op_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+  }
+
+ protected:
+  aom_bit_depth_t bit_depth_;
+  ErrorBlockFunc error_block_op_;
+  ErrorBlockFunc ref_error_block_op_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ErrorBlockTest);
+
+TEST_P(ErrorBlockTest, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  intptr_t block_size;
+  int64_t ssz;
+  int64_t ret;
+  int64_t ref_ssz;
+  int64_t ref_ret;
+  const int msb = bit_depth_ + 8 - 1;
+  for (int i = 0; i < kNumIterations; ++i) {
+    int err_count = 0;
+    block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
+    for (int j = 0; j < block_size; j++) {
+      // coeff and dqcoeff will always have at least the same sign, and this
+      // can be used for optimization, so generate test input precisely.
+      if (rnd(2)) {
+        // Positive number
+        coeff[j] = rnd(1 << msb);
+        dqcoeff[j] = rnd(1 << msb);
+      } else {
+        // Negative number
+        coeff[j] = -rnd(1 << msb);
+        dqcoeff[j] = -rnd(1 << msb);
+      }
+    }
+    ref_ret =
+        ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_);
+    API_REGISTER_STATE_CHECK(
+        ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_));
+    err_count += (ref_ret != ret) | (ref_ssz != ssz);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Error Block Test, C output doesn't match optimized output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(ErrorBlockTest, ExtremeValues) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  intptr_t block_size;
+  int64_t ssz;
+  int64_t ret;
+  int64_t ref_ssz;
+  int64_t ref_ret;
+  const int msb = bit_depth_ + 8 - 1;
+  int max_val = ((1 << msb) - 1);
+  for (int i = 0; i < kNumIterations; ++i) {
+    int err_count = 0;
+    int k = (i / 9) % 9;
+
+    // Change the maximum coeff value, to test different bit boundaries
+    if (k == 8 && (i % 9) == 0) {
+      max_val >>= 1;
+    }
+    block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
+    for (int j = 0; j < block_size; j++) {
+      if (k < 4) {
+        // Test at positive maximum values
+        coeff[j] = k % 2 ? max_val : 0;
+        dqcoeff[j] = (k >> 1) % 2 ? max_val : 0;
+      } else if (k < 8) {
+        // Test at negative maximum values
+        coeff[j] = k % 2 ? -max_val : 0;
+        dqcoeff[j] = (k >> 1) % 2 ? -max_val : 0;
+      } else {
+        if (rnd(2)) {
+          // Positive number
+          coeff[j] = rnd(1 << 14);
+          dqcoeff[j] = rnd(1 << 14);
+        } else {
+          // Negative number
+          coeff[j] = -rnd(1 << 14);
+          dqcoeff[j] = -rnd(1 << 14);
+        }
+      }
+    }
+    ref_ret =
+        ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_);
+    API_REGISTER_STATE_CHECK(
+        ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_));
+    err_count += (ref_ret != ret) | (ref_ssz != ssz);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Error Block Test, C output doesn't match optimized output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(ErrorBlockTest, DISABLED_Speed) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
+  intptr_t block_size;
+  int64_t ssz;
+  int num_iters = 100000;
+  int64_t ref_ssz;
+  const int msb = bit_depth_ + 8 - 1;
+  for (int i = 0; i < 9; ++i) {
+    block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
+    for (int k = 0; k < 9; k++) {
+      for (int j = 0; j < block_size; j++) {
+        if (k < 5) {
+          if (rnd(2)) {
+            // Positive number
+            coeff[j] = rnd(1 << msb);
+            dqcoeff[j] = rnd(1 << msb);
+          } else {
+            // Negative number
+            coeff[j] = -rnd(1 << msb);
+            dqcoeff[j] = -rnd(1 << msb);
+          }
+        } else {
+          if (rnd(2)) {
+            // Positive number
+            coeff[j] = rnd(1 << 14);
+            dqcoeff[j] = rnd(1 << 14);
+          } else {
+            // Negative number
+            coeff[j] = -rnd(1 << 14);
+            dqcoeff[j] = -rnd(1 << 14);
+          }
+        }
+      }
+      aom_usec_timer ref_timer, test_timer;
+
+      aom_usec_timer_start(&ref_timer);
+      for (int iter = 0; iter < num_iters; ++iter) {
+        ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_);
+      }
+      aom_usec_timer_mark(&ref_timer);
+      const int elapsed_time_c =
+          static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
+
+      aom_usec_timer_start(&test_timer);
+      for (int iter = 0; iter < num_iters; ++iter) {
+        error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_);
+      }
+      aom_usec_timer_mark(&test_timer);
+
+      const int elapsed_time_simd =
+          static_cast<int>(aom_usec_timer_elapsed(&test_timer));
+
+      printf(
+          " c_time=%d \t simd_time=%d \t "
+          "gain=%d \n",
+          elapsed_time_c, elapsed_time_simd,
+          (elapsed_time_c / elapsed_time_simd));
+    }
+  }
+}
+
+using std::make_tuple;
+
+#if HAVE_SSE2
+const ErrorBlockParam kErrorBlockTestParamsSse2[] = {
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(&av1_highbd_block_error_sse2, &av1_highbd_block_error_c,
+             AOM_BITS_10),
+  make_tuple(&av1_highbd_block_error_sse2, &av1_highbd_block_error_c,
+             AOM_BITS_12),
+  make_tuple(&av1_highbd_block_error_sse2, &av1_highbd_block_error_c,
+             AOM_BITS_8),
+#endif
+  make_tuple(&BlockError8BitWrapper<av1_block_error_sse2>,
+             &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8),
+  make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_sse2>,
+             &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8)
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, ErrorBlockTest,
+                         ::testing::ValuesIn(kErrorBlockTestParamsSse2));
+#endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+const ErrorBlockParam kErrorBlockTestParamsAvx2[] = {
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(&av1_highbd_block_error_avx2, &av1_highbd_block_error_c,
+             AOM_BITS_10),
+  make_tuple(&av1_highbd_block_error_avx2, &av1_highbd_block_error_c,
+             AOM_BITS_12),
+  make_tuple(&av1_highbd_block_error_avx2, &av1_highbd_block_error_c,
+             AOM_BITS_8),
+#endif
+  make_tuple(&BlockError8BitWrapper<av1_block_error_avx2>,
+             &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8),
+  make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_avx2>,
+             &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8)
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, ErrorBlockTest,
+                         ::testing::ValuesIn(kErrorBlockTestParamsAvx2));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+const ErrorBlockParam kErrorBlockTestParamsNeon[] = {
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(&av1_highbd_block_error_neon, &av1_highbd_block_error_c,
+             AOM_BITS_10),
+  make_tuple(&av1_highbd_block_error_neon, &av1_highbd_block_error_c,
+             AOM_BITS_12),
+  make_tuple(&av1_highbd_block_error_neon, &av1_highbd_block_error_c,
+             AOM_BITS_8),
+#endif
+  make_tuple(&BlockError8BitWrapper<av1_block_error_neon>,
+             &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8),
+  make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_neon>,
+             &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8)
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, ErrorBlockTest,
+                         ::testing::ValuesIn(kErrorBlockTestParamsNeon));
+#endif  // HAVE_NEON
+
+#if HAVE_SVE
+const ErrorBlockParam kErrorBlockTestParamsSVE[] = {
+  make_tuple(&BlockError8BitWrapper<av1_block_error_sve>,
+             &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8),
+  make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_sve>,
+             &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8)
+};
+
+INSTANTIATE_TEST_SUITE_P(SVE, ErrorBlockTest,
+                         ::testing::ValuesIn(kErrorBlockTestParamsSVE));
+#endif  // HAVE_SVE
+}  // namespace
diff --git a/third_party/aom/test/error_resilience_test.cc b/third_party/aom/test/error_resilience_test.cc
new file mode 100644
index 0000000000..d41884df2b
--- /dev/null
+++ b/third_party/aom/test/error_resilience_test.cc
@@ -0,0 +1,465 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+const int kMaxErrorFrames = 12;
+const int kMaxInvisibleErrorFrames = 12;
+const int kMaxDroppableFrames = 12;
+const int kMaxErrorResilientFrames = 12;
+const int kMaxNoMFMVFrames = 12;
+const int kMaxPrimRefNoneFrames = 12;
+const int kMaxSFrames = 12;
+const int kCpuUsed = 1;
+
+class ErrorResilienceTestLarge
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  ErrorResilienceTestLarge()
+      : EncoderTest(GET_PARAM(0)), psnr_(0.0), nframes_(0), mismatch_psnr_(0.0),
+        mismatch_nframes_(0), encoding_mode_(GET_PARAM(1)), allow_mismatch_(0),
+        enable_altref_(GET_PARAM(2)) {
+    Reset();
+  }
+
+  ~ErrorResilienceTestLarge() override = default;
+
+  void Reset() {
+    error_nframes_ = 0;
+    invisible_error_nframes_ = 0;
+    droppable_nframes_ = 0;
+    error_resilient_nframes_ = 0;
+    nomfmv_nframes_ = 0;
+    prim_ref_none_nframes_ = 0;
+    s_nframes_ = 0;
+  }
+
+  void SetupEncoder(int bitrate, int lag) {
+    const aom_rational timebase = { 33333333, 1000000000 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_target_bitrate = bitrate;
+    cfg_.kf_mode = AOM_KF_DISABLED;
+    cfg_.g_lag_in_frames = lag;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+  }
+
+  void SetUp() override { InitializeConfig(encoding_mode_); }
+
+  void BeginPassHook(unsigned int /*pass*/) override {
+    psnr_ = 0.0;
+    nframes_ = 0;
+    decoded_nframes_ = 0;
+    mismatch_psnr_ = 0.0;
+    mismatch_nframes_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                          libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, enable_altref_);
+    }
+    frame_flags_ &=
+        ~(AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF |
+          AOM_EFLAG_NO_REF_FRAME_MVS | AOM_EFLAG_ERROR_RESILIENT |
+          AOM_EFLAG_SET_S_FRAME | AOM_EFLAG_SET_PRIMARY_REF_NONE);
+    if (droppable_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < droppable_nframes_; ++i) {
+        if (droppable_frames_[i] == video->frame()) {
+          std::cout << "             Encoding droppable frame: "
+                    << droppable_frames_[i] << "\n";
+          frame_flags_ |= (AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
+                           AOM_EFLAG_NO_UPD_ARF);
+          break;
+        }
+      }
+    }
+
+    if (error_resilient_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < error_resilient_nframes_; ++i) {
+        if (error_resilient_frames_[i] == video->frame()) {
+          std::cout << "             Encoding error_resilient frame: "
+                    << error_resilient_frames_[i] << "\n";
+          frame_flags_ |= AOM_EFLAG_ERROR_RESILIENT;
+          break;
+        }
+      }
+    }
+
+    if (nomfmv_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < nomfmv_nframes_; ++i) {
+        if (nomfmv_frames_[i] == video->frame()) {
+          std::cout << "             Encoding no mfmv frame: "
+                    << nomfmv_frames_[i] << "\n";
+          frame_flags_ |= AOM_EFLAG_NO_REF_FRAME_MVS;
+          break;
+        }
+      }
+    }
+
+    if (prim_ref_none_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < prim_ref_none_nframes_; ++i) {
+        if (prim_ref_none_frames_[i] == video->frame()) {
+          std::cout << "             Encoding no PRIMARY_REF_NONE frame: "
+                    << prim_ref_none_frames_[i] << "\n";
+          frame_flags_ |= AOM_EFLAG_SET_PRIMARY_REF_NONE;
+          break;
+        }
+      }
+    }
+
+    encoder->Control(AV1E_SET_S_FRAME_MODE, 0);
+    if (s_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < s_nframes_; ++i) {
+        if (s_frames_[i] == video->frame()) {
+          std::cout << "             Encoding S frame: " << s_frames_[i]
+                    << "\n";
+          frame_flags_ |= AOM_EFLAG_SET_S_FRAME;
+          break;
+        }
+      }
+    }
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    // Check that the encode frame flags are correctly reflected
+    // in the output frame flags.
+    const int encode_flags = pkt->data.frame.flags >> 16;
+    if ((encode_flags & (AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
+                         AOM_EFLAG_NO_UPD_ARF)) ==
+        (AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF)) {
+      ASSERT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_DROPPABLE,
+                AOM_FRAME_IS_DROPPABLE);
+    }
+    if (encode_flags & AOM_EFLAG_SET_S_FRAME) {
+      ASSERT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_SWITCH,
+                AOM_FRAME_IS_SWITCH);
+    }
+    if (encode_flags & AOM_EFLAG_ERROR_RESILIENT) {
+      ASSERT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_ERROR_RESILIENT,
+                AOM_FRAME_IS_ERROR_RESILIENT);
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  double GetAverageMismatchPsnr() const {
+    if (mismatch_nframes_) return mismatch_psnr_ / mismatch_nframes_;
+    return 0.0;
+  }
+
+  bool DoDecode() const override {
+    if (error_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < error_nframes_; ++i) {
+        if (error_frames_[i] == nframes_ - 1) {
+          std::cout << "             Skipping decoding frame: "
+                    << error_frames_[i] << "\n";
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  bool DoDecodeInvisible() const override {
+    if (invisible_error_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < invisible_error_nframes_; ++i) {
+        if (invisible_error_frames_[i] == nframes_ - 1) {
+          std::cout << "             Skipping decoding all invisible frames in "
+                       "frame pkt: "
+                    << invisible_error_frames_[i] << "\n";
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) override {
+    if (allow_mismatch_) {
+      double mismatch_psnr = compute_psnr(img1, img2);
+      mismatch_psnr_ += mismatch_psnr;
+      ++mismatch_nframes_;
+      // std::cout << "Mismatch frame psnr: " << mismatch_psnr << "\n";
+    } else {
+      ::libaom_test::EncoderTest::MismatchHook(img1, img2);
+    }
+  }
+
+  void DecompressedFrameHook(const aom_image_t &img,
+                             aom_codec_pts_t pts) override {
+    (void)img;
+    (void)pts;
+    ++decoded_nframes_;
+  }
+
+  void SetErrorFrames(int num, unsigned int *list) {
+    if (num > kMaxErrorFrames)
+      num = kMaxErrorFrames;
+    else if (num < 0)
+      num = 0;
+    error_nframes_ = num;
+    for (unsigned int i = 0; i < error_nframes_; ++i)
+      error_frames_[i] = list[i];
+  }
+
+  void SetInvisibleErrorFrames(int num, unsigned int *list) {
+    if (num > kMaxInvisibleErrorFrames)
+      num = kMaxInvisibleErrorFrames;
+    else if (num < 0)
+      num = 0;
+    invisible_error_nframes_ = num;
+    for (unsigned int i = 0; i < invisible_error_nframes_; ++i)
+      invisible_error_frames_[i] = list[i];
+  }
+
+  void SetDroppableFrames(int num, unsigned int *list) {
+    if (num > kMaxDroppableFrames)
+      num = kMaxDroppableFrames;
+    else if (num < 0)
+      num = 0;
+    droppable_nframes_ = num;
+    for (unsigned int i = 0; i < droppable_nframes_; ++i)
+      droppable_frames_[i] = list[i];
+  }
+
+  void SetErrorResilientFrames(int num, unsigned int *list) {
+    if (num > kMaxErrorResilientFrames)
+      num = kMaxErrorResilientFrames;
+    else if (num < 0)
+      num = 0;
+    error_resilient_nframes_ = num;
+    for (unsigned int i = 0; i < error_resilient_nframes_; ++i)
+      error_resilient_frames_[i] = list[i];
+  }
+
+  void SetNoMFMVFrames(int num, unsigned int *list) {
+    if (num > kMaxNoMFMVFrames)
+      num = kMaxNoMFMVFrames;
+    else if (num < 0)
+      num = 0;
+    nomfmv_nframes_ = num;
+    for (unsigned int i = 0; i < nomfmv_nframes_; ++i)
+      nomfmv_frames_[i] = list[i];
+  }
+
+  void SetPrimaryRefNoneFrames(int num, unsigned int *list) {
+    if (num > kMaxPrimRefNoneFrames)
+      num = kMaxPrimRefNoneFrames;
+    else if (num < 0)
+      num = 0;
+    prim_ref_none_nframes_ = num;
+    for (unsigned int i = 0; i < prim_ref_none_nframes_; ++i)
+      prim_ref_none_frames_[i] = list[i];
+  }
+
+  void SetSFrames(int num, unsigned int *list) {
+    if (num > kMaxSFrames)
+      num = kMaxSFrames;
+    else if (num < 0)
+      num = 0;
+    s_nframes_ = num;
+    for (unsigned int i = 0; i < s_nframes_; ++i) s_frames_[i] = list[i];
+  }
+
+  unsigned int GetMismatchFrames() { return mismatch_nframes_; }
+  unsigned int GetEncodedFrames() { return nframes_; }
+  unsigned int GetDecodedFrames() { return decoded_nframes_; }
+
+  void SetAllowMismatch(int allow) { allow_mismatch_ = allow; }
+
+ private:
+  double psnr_;
+  unsigned int nframes_;
+  unsigned int decoded_nframes_;
+  unsigned int error_nframes_;
+  unsigned int invisible_error_nframes_;
+  unsigned int droppable_nframes_;
+  unsigned int error_resilient_nframes_;
+  unsigned int nomfmv_nframes_;
+  unsigned int prim_ref_none_nframes_;
+  unsigned int s_nframes_;
+  double mismatch_psnr_;
+  unsigned int mismatch_nframes_;
+  unsigned int error_frames_[kMaxErrorFrames];
+  unsigned int invisible_error_frames_[kMaxInvisibleErrorFrames];
+  unsigned int droppable_frames_[kMaxDroppableFrames];
+  unsigned int error_resilient_frames_[kMaxErrorResilientFrames];
+  unsigned int nomfmv_frames_[kMaxNoMFMVFrames];
+  unsigned int prim_ref_none_frames_[kMaxPrimRefNoneFrames];
+  unsigned int s_frames_[kMaxSFrames];
+  libaom_test::TestMode encoding_mode_;
+  int allow_mismatch_;
+  int enable_altref_;
+};
+
+TEST_P(ErrorResilienceTestLarge, OnVersusOff) {
+  SetupEncoder(2000, 10);
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 12);
+
+  // Global error resilient mode OFF.
+  cfg_.g_error_resilient = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr_resilience_off = GetAveragePsnr();
+  EXPECT_GT(psnr_resilience_off, 25.0);
+
+  Reset();
+  // Error resilient mode ON for certain frames
+  unsigned int num_error_resilient_frames = 5;
+  unsigned int error_resilient_frame_list[] = { 3, 5, 6, 9, 11 };
+  SetErrorResilientFrames(num_error_resilient_frames,
+                          error_resilient_frame_list);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr_resilience_on = GetAveragePsnr();
+  EXPECT_GT(psnr_resilience_on, 25.0);
+
+  // Test that turning on error resilient mode hurts by 10% at most.
+  if (psnr_resilience_off > 0.0) {
+    const double psnr_ratio = psnr_resilience_on / psnr_resilience_off;
+    EXPECT_GE(psnr_ratio, 0.9);
+    EXPECT_LE(psnr_ratio, 1.1);
+  }
+}
+
+// Check for successful decoding and no encoder/decoder mismatch
+// if we lose (i.e., drop before decoding) a set of droppable
+// frames (i.e., frames that don't update any reference buffers).
+TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
+  if (GET_PARAM(1) == ::libaom_test::kOnePassGood && GET_PARAM(2) == 1) {
+    fprintf(stderr, "Skipping test case #1 because of bug aomedia:3002\n");
+    return;
+  }
+  SetupEncoder(500, 10);
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 20);
+
+  // Set an arbitrary set of error frames same as droppable frames.
+  unsigned int num_droppable_frames = 3;
+  unsigned int droppable_frame_list[] = { 5, 11, 13 };
+  SetDroppableFrames(num_droppable_frames, droppable_frame_list);
+  SetErrorFrames(num_droppable_frames, droppable_frame_list);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  // Test that no mismatches have been found
+  std::cout << "             Encoded frames: " << GetEncodedFrames() << "\n";
+  std::cout << "             Decoded frames: " << GetDecodedFrames() << "\n";
+  std::cout << "             Mismatch frames: " << GetMismatchFrames() << "\n";
+  EXPECT_EQ(GetEncodedFrames() - GetDecodedFrames(), num_droppable_frames);
+}
+
+// Check for ParseAbility property of an error-resilient frame.
+// Encode a frame in error-resilient mode (E-frame), and disallow all
+// subsequent frames from using MFMV. If frames are dropped before the
+// E frame, all frames starting from the E frame should be parse-able.
+TEST_P(ErrorResilienceTestLarge, ParseAbilityTest) {
+  SetupEncoder(500, 10);
+
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 15);
+
+  SetAllowMismatch(1);
+
+  // Note that an E-frame cannot be forced on a frame that is a
+  // show_existing_frame, or a frame that comes directly after an invisible
+  // frame. Currently, this will cause an assertion failure.
+  // Set an arbitrary error resilient (E) frame
+  unsigned int num_error_resilient_frames = 1;
+  unsigned int error_resilient_frame_list[] = { 8 };
+  SetErrorResilientFrames(num_error_resilient_frames,
+                          error_resilient_frame_list);
+  // Ensure that any invisible frames before the E frame are dropped
+  SetInvisibleErrorFrames(num_error_resilient_frames,
+                          error_resilient_frame_list);
+  // Set all frames after the error resilient frame to not allow MFMV
+  unsigned int num_post_error_resilient_frames = 6;
+  unsigned int post_error_resilient_frame_list[] = { 9, 10, 11, 12, 13, 14 };
+  SetNoMFMVFrames(num_post_error_resilient_frames,
+                  post_error_resilient_frame_list);
+
+  // Set a few frames before the E frame that are lost (not decoded)
+  unsigned int num_error_frames = 5;
+  unsigned int error_frame_list[] = { 3, 4, 5, 6, 7 };
+  SetErrorFrames(num_error_frames, error_frame_list);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  std::cout << "             Encoded frames: " << GetEncodedFrames() << "\n";
+  std::cout << "             Decoded frames: " << GetDecodedFrames() << "\n";
+  std::cout << "             Mismatch frames: " << GetMismatchFrames() << "\n";
+  EXPECT_EQ(GetEncodedFrames() - GetDecodedFrames(), num_error_frames);
+  // All frames following the E-frame and the E-frame are expected to have
+  // mismatches, but still be parse-able.
+  EXPECT_LE(GetMismatchFrames(), num_post_error_resilient_frames + 1);
+}
+
+// Check for ParseAbility property of an S frame.
+// Encode an S-frame. If frames are dropped before the S-frame, all frames
+// starting from the S frame should be parse-able.
+TEST_P(ErrorResilienceTestLarge, SFrameTest) {
+  SetupEncoder(500, 10);
+
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 15);
+
+  SetAllowMismatch(1);
+
+  // Note that an S-frame cannot be forced on a frame that is a
+  // show_existing_frame. This issue still needs to be addressed.
+  // Set an arbitrary S-frame
+  unsigned int num_s_frames = 1;
+  unsigned int s_frame_list[] = { 6 };
+  SetSFrames(num_s_frames, s_frame_list);
+  // Ensure that any invisible frames before the S frame are dropped
+  SetInvisibleErrorFrames(num_s_frames, s_frame_list);
+
+  // Set a few frames before the S frame that are lost (not decoded)
+  unsigned int num_error_frames = 4;
+  unsigned int error_frame_list[] = { 2, 3, 4, 5 };
+  SetErrorFrames(num_error_frames, error_frame_list);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  std::cout << "             Encoded frames: " << GetEncodedFrames() << "\n";
+  std::cout << "             Decoded frames: " << GetDecodedFrames() << "\n";
+  std::cout << "             Mismatch frames: " << GetMismatchFrames() << "\n";
+  EXPECT_EQ(GetEncodedFrames() - GetDecodedFrames(), num_error_frames);
+  // All frames following the S-frame and the S-frame are expected to have
+  // mismatches, but still be parse-able.
+  EXPECT_LE(GetMismatchFrames(), GetEncodedFrames() - s_frame_list[0]);
+}
+
+AV1_INSTANTIATE_TEST_SUITE(ErrorResilienceTestLarge, NONREALTIME_TEST_MODES,
+                           ::testing::Values(0, 1));
+}  // namespace
diff --git a/third_party/aom/test/ethread_test.cc b/third_party/aom/test/ethread_test.cc
new file mode 100644
index 0000000000..ce45394eb8
--- /dev/null
+++ b/third_party/aom/test/ethread_test.cc
@@ -0,0 +1,577 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <string>
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+#include "av1/encoder/firstpass.h"
+
+namespace {
+const unsigned int kCqLevel = 18;
+
+#if !CONFIG_REALTIME_ONLY
+const size_t kFirstPassStatsSz = sizeof(FIRSTPASS_STATS);
+class AVxFirstPassEncoderThreadTest
+    : public ::libaom_test::CodecTestWith4Params<libaom_test::TestMode, int,
+                                                 int, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  AVxFirstPassEncoderThreadTest()
+      : EncoderTest(GET_PARAM(0)), encoder_initialized_(false),
+        encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)),
+        tile_rows_(GET_PARAM(3)), tile_cols_(GET_PARAM(4)) {
+    init_flags_ = AOM_CODEC_USE_PSNR;
+
+    row_mt_ = 1;
+    firstpass_stats_.buf = nullptr;
+    firstpass_stats_.sz = 0;
+  }
+  ~AVxFirstPassEncoderThreadTest() override { free(firstpass_stats_.buf); }
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+
+    cfg_.g_lag_in_frames = 35;
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.rc_2pass_vbr_minsection_pct = 5;
+    cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_min_quantizer = 0;
+  }
+
+  void BeginPassHook(unsigned int /*pass*/) override {
+    encoder_initialized_ = false;
+    abort_ = false;
+  }
+
+  void EndPassHook() override {
+    // For first pass stats test, only run first pass encoder.
+    if (cfg_.g_pass == AOM_RC_FIRST_PASS) abort_ = true;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource * /*video*/,
+                          ::libaom_test::Encoder *encoder) override {
+    if (!encoder_initialized_) {
+      // Encode in 2-pass mode.
+      SetTileSize(encoder);
+      encoder->Control(AV1E_SET_ROW_MT, row_mt_);
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 0);
+
+      encoder_initialized_ = true;
+    }
+  }
+
+  virtual void SetTileSize(libaom_test::Encoder *encoder) {
+    encoder->Control(AV1E_SET_TILE_COLUMNS, tile_cols_);
+    encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_);
+  }
+
+  void StatsPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    const uint8_t *const pkt_buf =
+        reinterpret_cast<uint8_t *>(pkt->data.twopass_stats.buf);
+    const size_t pkt_size = pkt->data.twopass_stats.sz;
+
+    // First pass stats size equals sizeof(FIRSTPASS_STATS)
+    EXPECT_EQ(pkt_size, kFirstPassStatsSz)
+        << "Error: First pass stats size doesn't equal kFirstPassStatsSz";
+
+    firstpass_stats_.buf =
+        realloc(firstpass_stats_.buf, firstpass_stats_.sz + pkt_size);
+    ASSERT_NE(firstpass_stats_.buf, nullptr);
+    memcpy((uint8_t *)firstpass_stats_.buf + firstpass_stats_.sz, pkt_buf,
+           pkt_size);
+    firstpass_stats_.sz += pkt_size;
+  }
+
+  bool encoder_initialized_;
+  ::libaom_test::TestMode encoding_mode_;
+  int set_cpu_used_;
+  int tile_rows_;
+  int tile_cols_;
+  int row_mt_;
+  aom_fixed_buf_t firstpass_stats_;
+};
+
+static void compare_fp_stats_md5(aom_fixed_buf_t *fp_stats) {
+  // fp_stats consists of 2 set of first pass encoding stats. These 2 set of
+  // stats are compared to check if the stats match.
+  uint8_t *stats1 = reinterpret_cast<uint8_t *>(fp_stats->buf);
+  uint8_t *stats2 = stats1 + fp_stats->sz / 2;
+  ::libaom_test::MD5 md5_row_mt_0, md5_row_mt_1;
+
+  md5_row_mt_0.Add(stats1, fp_stats->sz / 2);
+  const char *md5_row_mt_0_str = md5_row_mt_0.Get();
+
+  md5_row_mt_1.Add(stats2, fp_stats->sz / 2);
+  const char *md5_row_mt_1_str = md5_row_mt_1.Get();
+
+  // Check md5 match.
+  ASSERT_STREQ(md5_row_mt_0_str, md5_row_mt_1_str)
+      << "MD5 checksums don't match";
+}
+
+TEST_P(AVxFirstPassEncoderThreadTest, FirstPassStatsTest) {
+  ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+  aom_fixed_buf_t firstpass_stats;
+  size_t single_run_sz;
+
+  cfg_.rc_target_bitrate = 1000;
+
+  // 5 encodes will be run:
+  // 1. row_mt_=0 and threads=1
+  // 2. row_mt_=1 and threads=1
+  // 3. row_mt_=1 and threads=2
+  // 4. row_mt_=1 and threads=4
+  // 5. row_mt_=1 and threads=8
+
+  // 4 comparisons will be made:
+  // 1. Between run 1 and run 2.
+  // 2. Between run 2 and run 3.
+  // 3. Between run 3 and run 4.
+  // 4. Between run 4 and run 5.
+
+  // Test row_mt_: 0 vs 1 at single thread case(threads = 1)
+  cfg_.g_threads = 1;
+
+  row_mt_ = 0;
+  init_flags_ = AOM_CODEC_USE_PSNR;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  row_mt_ = 1;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  firstpass_stats.buf = firstpass_stats_.buf;
+  firstpass_stats.sz = firstpass_stats_.sz;
+  single_run_sz = firstpass_stats_.sz / 2;
+
+  // Compare to check if using or not using row-mt are bit exact.
+  // Comparison 1 (between row_mt_=0 and row_mt_=1).
+  ASSERT_NO_FATAL_FAILURE(compare_fp_stats_md5(&firstpass_stats));
+
+  // Test single thread vs multiple threads
+  row_mt_ = 1;
+
+  cfg_.g_threads = 2;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  // offset to the 2nd and 3rd run.
+  firstpass_stats.buf = reinterpret_cast<void *>(
+      reinterpret_cast<uint8_t *>(firstpass_stats_.buf) + single_run_sz);
+
+  // Compare to check if single-thread and multi-thread stats are bit exact.
+  // Comparison 2 (between threads=1 and threads=2).
+  ASSERT_NO_FATAL_FAILURE(compare_fp_stats_md5(&firstpass_stats));
+
+  cfg_.g_threads = 4;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  // offset to the 3rd and 4th run
+  firstpass_stats.buf = reinterpret_cast<void *>(
+      reinterpret_cast<uint8_t *>(firstpass_stats_.buf) + single_run_sz * 2);
+
+  // Comparison 3 (between threads=2 and threads=4).
+  ASSERT_NO_FATAL_FAILURE(compare_fp_stats_md5(&firstpass_stats));
+
+  cfg_.g_threads = 8;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  // offset to the 4th and 5th run.
+  firstpass_stats.buf = reinterpret_cast<void *>(
+      reinterpret_cast<uint8_t *>(firstpass_stats_.buf) + single_run_sz * 3);
+
+  // Comparison 4 (between threads=4 and threads=8).
+  compare_fp_stats_md5(&firstpass_stats);
+}
+#endif  // !CONFIG_REALTIME_ONLY
+
+class AVxEncoderThreadTest
+    : public ::libaom_test::CodecTestWith5Params<libaom_test::TestMode, int,
+                                                 int, int, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  AVxEncoderThreadTest()
+      : EncoderTest(GET_PARAM(0)), encoder_initialized_(false),
+        encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)),
+        tile_cols_(GET_PARAM(3)), tile_rows_(GET_PARAM(4)),
+        row_mt_(GET_PARAM(5)) {
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.w = 1280;
+    cfg.h = 720;
+    cfg.allow_lowbitdepth = 1;
+    decoder_ = codec_->CreateDecoder(cfg, 0);
+    if (decoder_->IsAV1()) {
+      decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+      decoder_->Control(AV1_SET_DECODE_TILE_COL, -1);
+    }
+
+    size_enc_.clear();
+    md5_dec_.clear();
+    md5_enc_.clear();
+  }
+  ~AVxEncoderThreadTest() override { delete decoder_; }
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+
+    if (encoding_mode_ == ::libaom_test::kOnePassGood ||
+        encoding_mode_ == ::libaom_test::kTwoPassGood) {
+      cfg_.g_lag_in_frames = 6;
+      cfg_.rc_2pass_vbr_minsection_pct = 5;
+      cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+    } else if (encoding_mode_ == ::libaom_test::kRealTime) {
+      cfg_.g_error_resilient = 1;
+    }
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_min_quantizer = 0;
+  }
+
+  void BeginPassHook(unsigned int /*pass*/) override {
+    encoder_initialized_ = false;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource * /*video*/,
+                          ::libaom_test::Encoder *encoder) override {
+    if (!encoder_initialized_) {
+      SetTileSize(encoder);
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AV1E_SET_ROW_MT, row_mt_);
+      if (encoding_mode_ == ::libaom_test::kOnePassGood ||
+          encoding_mode_ == ::libaom_test::kTwoPassGood) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 5);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+        encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 0);
+        encoder->Control(AV1E_SET_MAX_GF_INTERVAL, 4);
+        // In row_mt_=0 case, the output of single thread (1 thread) will be
+        // compared with multi thread (4 thread) output (as per line no:340).
+        // Currently, Loop restoration stage is conditionally disabled for speed
+        // 5, 6 when num_workers > 1. Due to this, the match between single
+        // thread and multi thread output can not be achieved. Hence, testing
+        // this case alone with LR disabled.
+        // TODO(aomedia:3446): Remove the constraint on this test case once Loop
+        // restoration state is same in both single and multi thread path.
+        if (set_cpu_used_ >= 5 && row_mt_ == 0)
+          encoder->Control(AV1E_SET_ENABLE_RESTORATION, 0);
+      } else if (encoding_mode_ == ::libaom_test::kRealTime) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
+        encoder->Control(AV1E_SET_AQ_MODE, 3);
+        encoder->Control(AV1E_SET_COEFF_COST_UPD_FREQ, 2);
+        encoder->Control(AV1E_SET_MODE_COST_UPD_FREQ, 2);
+        encoder->Control(AV1E_SET_MV_COST_UPD_FREQ, 3);
+        encoder->Control(AV1E_SET_DV_COST_UPD_FREQ, 3);
+      } else {
+        encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel);
+      }
+      encoder_initialized_ = true;
+    }
+  }
+
+  virtual void SetTileSize(libaom_test::Encoder *encoder) {
+    encoder->Control(AV1E_SET_TILE_COLUMNS, tile_cols_);
+    encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_);
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    size_enc_.push_back(pkt->data.frame.sz);
+
+    ::libaom_test::MD5 md5_enc;
+    md5_enc.Add(reinterpret_cast<uint8_t *>(pkt->data.frame.buf),
+                pkt->data.frame.sz);
+    md5_enc_.push_back(md5_enc.Get());
+
+    const aom_codec_err_t res = decoder_->DecodeFrame(
+        reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz);
+    if (res != AOM_CODEC_OK) {
+      abort_ = true;
+      ASSERT_EQ(AOM_CODEC_OK, res);
+    }
+    const aom_image_t *img = decoder_->GetDxData().Next();
+
+    if (img) {
+      ::libaom_test::MD5 md5_res;
+      md5_res.Add(img);
+      md5_dec_.push_back(md5_res.Get());
+    }
+  }
+
+  void DoTest() {
+    ::libaom_test::YUVVideoSource video(
+        "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, 640, 480, 30, 1, 15, 26);
+    cfg_.rc_target_bitrate = 1000;
+
+    if (row_mt_ == 0) {
+      // Encode using single thread.
+      cfg_.g_threads = 1;
+      init_flags_ = AOM_CODEC_USE_PSNR;
+      ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+      std::vector<size_t> single_thr_size_enc;
+      std::vector<std::string> single_thr_md5_enc;
+      std::vector<std::string> single_thr_md5_dec;
+      single_thr_size_enc = size_enc_;
+      single_thr_md5_enc = md5_enc_;
+      single_thr_md5_dec = md5_dec_;
+      size_enc_.clear();
+      md5_enc_.clear();
+      md5_dec_.clear();
+
+      // Encode using multiple threads.
+      cfg_.g_threads = 4;
+      ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+      std::vector<size_t> multi_thr_size_enc;
+      std::vector<std::string> multi_thr_md5_enc;
+      std::vector<std::string> multi_thr_md5_dec;
+      multi_thr_size_enc = size_enc_;
+      multi_thr_md5_enc = md5_enc_;
+      multi_thr_md5_dec = md5_dec_;
+      size_enc_.clear();
+      md5_enc_.clear();
+      md5_dec_.clear();
+
+      // Check that the vectors are equal.
+      ASSERT_EQ(single_thr_size_enc, multi_thr_size_enc);
+      ASSERT_EQ(single_thr_md5_enc, multi_thr_md5_enc);
+      ASSERT_EQ(single_thr_md5_dec, multi_thr_md5_dec);
+
+      DoTestMaxThreads(&video, single_thr_size_enc, single_thr_md5_enc,
+                       single_thr_md5_dec);
+    } else if (row_mt_ == 1) {
+      // Encode using multiple threads row-mt enabled.
+      cfg_.g_threads = 2;
+      ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+      std::vector<size_t> multi_thr2_row_mt_size_enc;
+      std::vector<std::string> multi_thr2_row_mt_md5_enc;
+      std::vector<std::string> multi_thr2_row_mt_md5_dec;
+      multi_thr2_row_mt_size_enc = size_enc_;
+      multi_thr2_row_mt_md5_enc = md5_enc_;
+      multi_thr2_row_mt_md5_dec = md5_dec_;
+      size_enc_.clear();
+      md5_enc_.clear();
+      md5_dec_.clear();
+
+      // Disable threads=3 test for now to reduce the time so that the nightly
+      // test would not time out.
+      // cfg_.g_threads = 3;
+      // ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+      // std::vector<size_t> multi_thr3_row_mt_size_enc;
+      // std::vector<std::string> multi_thr3_row_mt_md5_enc;
+      // std::vector<std::string> multi_thr3_row_mt_md5_dec;
+      // multi_thr3_row_mt_size_enc = size_enc_;
+      // multi_thr3_row_mt_md5_enc = md5_enc_;
+      // multi_thr3_row_mt_md5_dec = md5_dec_;
+      // size_enc_.clear();
+      // md5_enc_.clear();
+      // md5_dec_.clear();
+      // Check that the vectors are equal.
+      // ASSERT_EQ(multi_thr3_row_mt_size_enc, multi_thr2_row_mt_size_enc);
+      // ASSERT_EQ(multi_thr3_row_mt_md5_enc, multi_thr2_row_mt_md5_enc);
+      // ASSERT_EQ(multi_thr3_row_mt_md5_dec, multi_thr2_row_mt_md5_dec);
+
+      cfg_.g_threads = 4;
+      ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+      std::vector<size_t> multi_thr4_row_mt_size_enc;
+      std::vector<std::string> multi_thr4_row_mt_md5_enc;
+      std::vector<std::string> multi_thr4_row_mt_md5_dec;
+      multi_thr4_row_mt_size_enc = size_enc_;
+      multi_thr4_row_mt_md5_enc = md5_enc_;
+      multi_thr4_row_mt_md5_dec = md5_dec_;
+      size_enc_.clear();
+      md5_enc_.clear();
+      md5_dec_.clear();
+
+      // Check that the vectors are equal.
+      ASSERT_EQ(multi_thr4_row_mt_size_enc, multi_thr2_row_mt_size_enc);
+      ASSERT_EQ(multi_thr4_row_mt_md5_enc, multi_thr2_row_mt_md5_enc);
+      ASSERT_EQ(multi_thr4_row_mt_md5_dec, multi_thr2_row_mt_md5_dec);
+
+      DoTestMaxThreads(&video, multi_thr2_row_mt_size_enc,
+                       multi_thr2_row_mt_md5_enc, multi_thr2_row_mt_md5_dec);
+    }
+  }
+
+  virtual void DoTestMaxThreads(::libaom_test::YUVVideoSource *video,
+                                const std::vector<size_t> ref_size_enc,
+                                const std::vector<std::string> ref_md5_enc,
+                                const std::vector<std::string> ref_md5_dec) {
+    // This value should be kept the same as MAX_NUM_THREADS
+    // in aom_thread.h
+    cfg_.g_threads = 64;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video));
+    std::vector<size_t> multi_thr_max_row_mt_size_enc;
+    std::vector<std::string> multi_thr_max_row_mt_md5_enc;
+    std::vector<std::string> multi_thr_max_row_mt_md5_dec;
+    multi_thr_max_row_mt_size_enc = size_enc_;
+    multi_thr_max_row_mt_md5_enc = md5_enc_;
+    multi_thr_max_row_mt_md5_dec = md5_dec_;
+    size_enc_.clear();
+    md5_enc_.clear();
+    md5_dec_.clear();
+
+    // Check that the vectors are equal.
+    ASSERT_EQ(ref_size_enc, multi_thr_max_row_mt_size_enc);
+    ASSERT_EQ(ref_md5_enc, multi_thr_max_row_mt_md5_enc);
+    ASSERT_EQ(ref_md5_dec, multi_thr_max_row_mt_md5_dec);
+  }
+
+  bool encoder_initialized_;
+  ::libaom_test::TestMode encoding_mode_;
+  int set_cpu_used_;
+  int tile_cols_;
+  int tile_rows_;
+  int row_mt_;
+  ::libaom_test::Decoder *decoder_;
+  std::vector<size_t> size_enc_;
+  std::vector<std::string> md5_enc_;
+  std::vector<std::string> md5_dec_;
+};
+
+class AVxEncoderThreadRTTest : public AVxEncoderThreadTest {};
+
+TEST_P(AVxEncoderThreadRTTest, EncoderResultTest) {
+  cfg_.large_scale_tile = 0;
+  decoder_->Control(AV1_SET_TILE_MODE, 0);
+  DoTest();
+}
+
+// For real time mode, test speed 5, 6, 7, 8, 9, 10.
+AV1_INSTANTIATE_TEST_SUITE(AVxEncoderThreadRTTest,
+                           ::testing::Values(::libaom_test::kRealTime),
+                           ::testing::Values(5, 6, 7, 8, 9, 10),
+                           ::testing::Values(0, 2), ::testing::Values(0, 2),
+                           ::testing::Values(0, 1));
+
+#if !CONFIG_REALTIME_ONLY
+
+// The AVxEncoderThreadTestLarge takes up ~14% of total run-time of the
+// Valgrind long tests. Exclude it; the smaller tests are still run.
+#if !AOM_VALGRIND_BUILD
+class AVxEncoderThreadTestLarge : public AVxEncoderThreadTest {};
+
+TEST_P(AVxEncoderThreadTestLarge, EncoderResultTest) {
+  cfg_.large_scale_tile = 0;
+  decoder_->Control(AV1_SET_TILE_MODE, 0);
+  DoTest();
+}
+
+// Test cpu_used 0, 1, 3 and 5.
+AV1_INSTANTIATE_TEST_SUITE(AVxEncoderThreadTestLarge,
+                           ::testing::Values(::libaom_test::kTwoPassGood,
+                                             ::libaom_test::kOnePassGood),
+                           ::testing::Values(0, 1, 3, 5),
+                           ::testing::Values(1, 6), ::testing::Values(1, 6),
+                           ::testing::Values(0, 1));
+#endif  // !AOM_VALGRIND_BUILD
+
+TEST_P(AVxEncoderThreadTest, EncoderResultTest) {
+  cfg_.large_scale_tile = 0;
+  decoder_->Control(AV1_SET_TILE_MODE, 0);
+  DoTest();
+}
+
+class AVxEncoderThreadAllIntraTest : public AVxEncoderThreadTest {};
+
+TEST_P(AVxEncoderThreadAllIntraTest, EncoderResultTest) {
+  cfg_.large_scale_tile = 0;
+  decoder_->Control(AV1_SET_TILE_MODE, 0);
+  DoTest();
+}
+
+class AVxEncoderThreadAllIntraTestLarge : public AVxEncoderThreadTest {};
+
+TEST_P(AVxEncoderThreadAllIntraTestLarge, EncoderResultTest) {
+  cfg_.large_scale_tile = 0;
+  decoder_->Control(AV1_SET_TILE_MODE, 0);
+  DoTest();
+}
+
+// first pass stats test
+AV1_INSTANTIATE_TEST_SUITE(AVxFirstPassEncoderThreadTest,
+                           ::testing::Values(::libaom_test::kTwoPassGood),
+                           ::testing::Range(0, 6, 2), ::testing::Range(0, 2),
+                           ::testing::Range(1, 3));
+
+// For AV1, test speed 0, 1, 2, 3, 5.
+// Only test cpu_used 2 here.
+AV1_INSTANTIATE_TEST_SUITE(AVxEncoderThreadTest,
+                           ::testing::Values(::libaom_test::kTwoPassGood),
+                           ::testing::Values(2), ::testing::Values(0, 2),
+                           ::testing::Values(0, 2), ::testing::Values(0, 1));
+
+// For all intra mode, test speed 0, 2, 4, 6, 8.
+// Only test cpu_used 6 here.
+AV1_INSTANTIATE_TEST_SUITE(AVxEncoderThreadAllIntraTest,
+                           ::testing::Values(::libaom_test::kAllIntra),
+                           ::testing::Values(6), ::testing::Values(0, 2),
+                           ::testing::Values(0, 2), ::testing::Values(0, 1));
+
+// Test cpu_used 0, 2, 4 and 8.
+AV1_INSTANTIATE_TEST_SUITE(AVxEncoderThreadAllIntraTestLarge,
+                           ::testing::Values(::libaom_test::kAllIntra),
+                           ::testing::Values(0, 2, 4, 8),
+                           ::testing::Values(1, 6), ::testing::Values(1, 6),
+                           ::testing::Values(0, 1));
+#endif  // !CONFIG_REALTIME_ONLY
+
+class AVxEncoderThreadLSTest : public AVxEncoderThreadTest {
+  void SetTileSize(libaom_test::Encoder *encoder) override {
+    encoder->Control(AV1E_SET_TILE_COLUMNS, tile_cols_);
+    encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_);
+  }
+
+  void DoTestMaxThreads(::libaom_test::YUVVideoSource *video,
+                        const std::vector<size_t> ref_size_enc,
+                        const std::vector<std::string> ref_md5_enc,
+                        const std::vector<std::string> ref_md5_dec) override {
+    (void)video;
+    (void)ref_size_enc;
+    (void)ref_md5_enc;
+    (void)ref_md5_dec;
+  }
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AVxEncoderThreadLSTest);
+
+TEST_P(AVxEncoderThreadLSTest, EncoderResultTest) {
+  cfg_.large_scale_tile = 1;
+  decoder_->Control(AV1_SET_TILE_MODE, 1);
+  decoder_->Control(AV1D_EXT_TILE_DEBUG, 1);
+  DoTest();
+}
+
+// AVxEncoderThreadLSTestLarge takes up about 2% of total run-time of
+// the Valgrind long tests. Since we already run AVxEncoderThreadLSTest,
+// skip this one for Valgrind.
+#if !CONFIG_REALTIME_ONLY && !AOM_VALGRIND_BUILD
+class AVxEncoderThreadLSTestLarge : public AVxEncoderThreadLSTest {};
+
+TEST_P(AVxEncoderThreadLSTestLarge, EncoderResultTest) {
+  cfg_.large_scale_tile = 1;
+  decoder_->Control(AV1_SET_TILE_MODE, 1);
+  decoder_->Control(AV1D_EXT_TILE_DEBUG, 1);
+  DoTest();
+}
+
+AV1_INSTANTIATE_TEST_SUITE(AVxEncoderThreadLSTestLarge,
+                           ::testing::Values(::libaom_test::kTwoPassGood,
+                                             ::libaom_test::kOnePassGood),
+                           ::testing::Values(1, 3), ::testing::Values(0, 6),
+                           ::testing::Values(0, 6), ::testing::Values(1));
+#endif  // !CONFIG_REALTIME_ONLY && !AOM_VALGRIND_BUILD
+}  // namespace
diff --git a/third_party/aom/test/examples.sh b/third_party/aom/test/examples.sh
new file mode 100755
index 0000000000..3e1612303c
--- /dev/null
+++ b/third_party/aom/test/examples.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file runs all of the tests for the libaom examples.
+##
+readonly EXEC_DIR="$(pwd)"
+. $(dirname $0)/tools_common.sh
+
+example_tests=$(ls -r $(dirname $0)/*.sh)
+
+# List of script names to exclude.
+exclude_list="best_encode examples run_encodes tools_common"
+
+if [ "$(realtime_only_build)" = "yes" ]; then
+  exclude_list="${exclude_list} twopass_encoder simple_decoder lightfield_test"
+fi
+
+# Filter out the scripts in $exclude_list.
+for word in ${exclude_list}; do
+  example_tests=$(filter_strings "${example_tests}" "${word}" exclude)
+done
+
+for test in ${example_tests}; do
+  # Source each test script so that exporting variables can be avoided.
+  AOM_TEST_NAME="$(basename ${test%.*})"
+  . "${test}"
+  # Restore the working directory to the one at the beginning of execution.
+  # This avoids side-effects from tests that change the directory.
+  cd "${EXEC_DIR}"
+done
diff --git a/third_party/aom/test/external_frame_buffer_test.cc b/third_party/aom/test/external_frame_buffer_test.cc
new file mode 100644
index 0000000000..8f16c4e2d5
--- /dev/null
+++ b/third_party/aom/test/external_frame_buffer_test.cc
@@ -0,0 +1,547 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <memory>
+#include <string>
+#include "common/tools_common.h"
+#include "config/aom_config.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/ivf_video_source.h"
+#include "test/md5_helper.h"
+#include "test/test_vectors.h"
+#include "test/util.h"
+#if CONFIG_WEBM_IO
+#include "test/webm_video_source.h"
+#endif
+
+namespace {
+
+const int kVideoNameParam = 1;
+
+struct ExternalFrameBuffer {
+  uint8_t *data;
+  size_t size;
+  int in_use;
+};
+
+// Class to manipulate a list of external frame buffers.
+class ExternalFrameBufferList {
+ public:
+  ExternalFrameBufferList()
+      : num_buffers_(0), num_used_buffers_(0), ext_fb_list_(nullptr) {}
+
+  virtual ~ExternalFrameBufferList() {
+    for (int i = 0; i < num_buffers_; ++i) {
+      delete[] ext_fb_list_[i].data;
+    }
+    delete[] ext_fb_list_;
+  }
+
+  // Creates the list to hold the external buffers. Returns true on success.
+  bool CreateBufferList(int num_buffers) {
+    if (num_buffers < 0) return false;
+
+    num_buffers_ = num_buffers;
+    ext_fb_list_ = new ExternalFrameBuffer[num_buffers_];
+    if (ext_fb_list_ == nullptr) {
+      EXPECT_NE(ext_fb_list_, nullptr);
+      return false;
+    }
+    memset(ext_fb_list_, 0, sizeof(ext_fb_list_[0]) * num_buffers_);
+    return true;
+  }
+
+  // Searches the frame buffer list for a free frame buffer. Makes sure
+  // that the frame buffer is at least |min_size| in bytes. Marks that the
+  // frame buffer is in use by libaom. Finally sets |fb| to point to the
+  // external frame buffer. Returns < 0 on an error.
+  int GetFreeFrameBuffer(size_t min_size, aom_codec_frame_buffer_t *fb) {
+    EXPECT_NE(fb, nullptr);
+    const int idx = FindFreeBufferIndex();
+    if (idx == num_buffers_) return -1;
+
+    if (ext_fb_list_[idx].size < min_size) {
+      delete[] ext_fb_list_[idx].data;
+      ext_fb_list_[idx].data = new uint8_t[min_size];
+      if (ext_fb_list_[idx].data == nullptr) {
+        EXPECT_NE(ext_fb_list_[idx].data, nullptr);
+      }
+      memset(ext_fb_list_[idx].data, 0, min_size);
+      ext_fb_list_[idx].size = min_size;
+    }
+
+    SetFrameBuffer(idx, fb);
+
+    num_used_buffers_++;
+    return 0;
+  }
+
+  // Test function that will not allocate any data for the frame buffer.
+  // Returns < 0 on an error.
+  int GetZeroFrameBuffer(size_t min_size, aom_codec_frame_buffer_t *fb) {
+    EXPECT_NE(fb, nullptr);
+    const int idx = FindFreeBufferIndex();
+    if (idx == num_buffers_) return -1;
+
+    if (ext_fb_list_[idx].size < min_size) {
+      delete[] ext_fb_list_[idx].data;
+      ext_fb_list_[idx].data = nullptr;
+      ext_fb_list_[idx].size = min_size;
+    }
+
+    SetFrameBuffer(idx, fb);
+    return 0;
+  }
+
+  // Marks the external frame buffer that |fb| is pointing to as free.
+  // Returns < 0 on an error.
+  int ReturnFrameBuffer(aom_codec_frame_buffer_t *fb) {
+    if (fb == nullptr) {
+      EXPECT_NE(fb, nullptr);
+      return -1;
+    }
+    ExternalFrameBuffer *const ext_fb =
+        reinterpret_cast<ExternalFrameBuffer *>(fb->priv);
+    if (ext_fb == nullptr) {
+      EXPECT_NE(ext_fb, nullptr);
+      return -1;
+    }
+    EXPECT_EQ(1, ext_fb->in_use);
+    ext_fb->in_use = 0;
+    num_used_buffers_--;
+    return 0;
+  }
+
+  // Checks that the aom_image_t data is contained within the external frame
+  // buffer private data passed back in the aom_image_t.
+  void CheckImageFrameBuffer(const aom_image_t *img) {
+    const struct ExternalFrameBuffer *const ext_fb =
+        reinterpret_cast<ExternalFrameBuffer *>(img->fb_priv);
+
+    ASSERT_TRUE(img->planes[0] >= ext_fb->data &&
+                img->planes[0] < (ext_fb->data + ext_fb->size));
+  }
+
+  int num_used_buffers() const { return num_used_buffers_; }
+
+ private:
+  // Returns the index of the first free frame buffer. Returns |num_buffers_|
+  // if there are no free frame buffers.
+  int FindFreeBufferIndex() {
+    int i;
+    // Find a free frame buffer.
+    for (i = 0; i < num_buffers_; ++i) {
+      if (!ext_fb_list_[i].in_use) break;
+    }
+    return i;
+  }
+
+  // Sets |fb| to an external frame buffer. idx is the index into the frame
+  // buffer list.
+  void SetFrameBuffer(int idx, aom_codec_frame_buffer_t *fb) {
+    ASSERT_NE(fb, nullptr);
+    fb->data = ext_fb_list_[idx].data;
+    fb->size = ext_fb_list_[idx].size;
+    ASSERT_EQ(0, ext_fb_list_[idx].in_use);
+    ext_fb_list_[idx].in_use = 1;
+    fb->priv = &ext_fb_list_[idx];
+  }
+
+  int num_buffers_;
+  int num_used_buffers_;
+  ExternalFrameBuffer *ext_fb_list_;
+};
+
+#if CONFIG_WEBM_IO
+
+// Callback used by libaom to request the application to return a frame
+// buffer of at least |min_size| in bytes.
+int get_aom_frame_buffer(void *user_priv, size_t min_size,
+                         aom_codec_frame_buffer_t *fb) {
+  ExternalFrameBufferList *const fb_list =
+      reinterpret_cast<ExternalFrameBufferList *>(user_priv);
+  return fb_list->GetFreeFrameBuffer(min_size, fb);
+}
+
+// Callback used by libaom to tell the application that |fb| is not needed
+// anymore.
+int release_aom_frame_buffer(void *user_priv, aom_codec_frame_buffer_t *fb) {
+  ExternalFrameBufferList *const fb_list =
+      reinterpret_cast<ExternalFrameBufferList *>(user_priv);
+  return fb_list->ReturnFrameBuffer(fb);
+}
+
+// Callback will not allocate data for frame buffer.
+int get_aom_zero_frame_buffer(void *user_priv, size_t min_size,
+                              aom_codec_frame_buffer_t *fb) {
+  ExternalFrameBufferList *const fb_list =
+      reinterpret_cast<ExternalFrameBufferList *>(user_priv);
+  return fb_list->GetZeroFrameBuffer(min_size, fb);
+}
+
+// Callback will allocate one less byte than |min_size|.
+int get_aom_one_less_byte_frame_buffer(void *user_priv, size_t min_size,
+                                       aom_codec_frame_buffer_t *fb) {
+  ExternalFrameBufferList *const fb_list =
+      reinterpret_cast<ExternalFrameBufferList *>(user_priv);
+  return fb_list->GetFreeFrameBuffer(min_size - 1, fb);
+}
+
+// Callback will not release the external frame buffer.
+int do_not_release_aom_frame_buffer(void *user_priv,
+                                    aom_codec_frame_buffer_t *fb) {
+  (void)user_priv;
+  (void)fb;
+  return 0;
+}
+
+#endif  // CONFIG_WEBM_IO
+
+// Class for testing passing in external frame buffers to libaom.
+class ExternalFrameBufferMD5Test
+    : public ::libaom_test::DecoderTest,
+      public ::libaom_test::CodecTestWithParam<const char *> {
+ protected:
+  ExternalFrameBufferMD5Test()
+      : DecoderTest(GET_PARAM(::libaom_test::kCodecFactoryParam)),
+        md5_file_(nullptr), num_buffers_(0) {}
+
+  ~ExternalFrameBufferMD5Test() override {
+    if (md5_file_ != nullptr) fclose(md5_file_);
+  }
+
+  void PreDecodeFrameHook(const libaom_test::CompressedVideoSource &video,
+                          libaom_test::Decoder *decoder) override {
+    if (num_buffers_ > 0 && video.frame_number() == 0) {
+      // Have libaom use frame buffers we create.
+      ASSERT_TRUE(fb_list_.CreateBufferList(num_buffers_));
+      ASSERT_EQ(AOM_CODEC_OK,
+                decoder->SetFrameBufferFunctions(GetAV1FrameBuffer,
+                                                 ReleaseAV1FrameBuffer, this));
+    }
+  }
+
+  void OpenMD5File(const std::string &md5_file_name_) {
+    md5_file_ = libaom_test::OpenTestDataFile(md5_file_name_);
+    ASSERT_NE(md5_file_, nullptr)
+        << "Md5 file open failed. Filename: " << md5_file_name_;
+  }
+
+  void DecompressedFrameHook(const aom_image_t &img,
+                             const unsigned int frame_number) override {
+    ASSERT_NE(md5_file_, nullptr);
+    char expected_md5[33];
+    char junk[128];
+
+    // Read correct md5 checksums.
+    const int res = fscanf(md5_file_, "%s  %s", expected_md5, junk);
+    ASSERT_NE(EOF, res) << "Read md5 data failed";
+    expected_md5[32] = '\0';
+
+    ::libaom_test::MD5 md5_res;
+#if FORCE_HIGHBITDEPTH_DECODING
+    const aom_img_fmt_t shifted_fmt =
+        (aom_img_fmt)(img.fmt & ~AOM_IMG_FMT_HIGHBITDEPTH);
+    if (img.bit_depth == 8 && shifted_fmt != img.fmt) {
+      aom_image_t *img_shifted =
+          aom_img_alloc(nullptr, shifted_fmt, img.d_w, img.d_h, 16);
+      img_shifted->bit_depth = img.bit_depth;
+      img_shifted->monochrome = img.monochrome;
+      aom_img_downshift(img_shifted, &img, 0);
+      md5_res.Add(img_shifted);
+      aom_img_free(img_shifted);
+    } else {
+#endif
+      md5_res.Add(&img);
+#if FORCE_HIGHBITDEPTH_DECODING
+    }
+#endif
+    const char *const actual_md5 = md5_res.Get();
+
+    // Check md5 match.
+    ASSERT_STREQ(expected_md5, actual_md5)
+        << "Md5 checksums don't match: frame number = " << frame_number;
+
+    const struct ExternalFrameBuffer *const ext_fb =
+        reinterpret_cast<ExternalFrameBuffer *>(img.fb_priv);
+
+    ASSERT_TRUE(img.planes[0] >= ext_fb->data &&
+                img.planes[0] < (ext_fb->data + ext_fb->size));
+  }
+
+  // Callback to get a free external frame buffer. Return value < 0 is an
+  // error.
+  static int GetAV1FrameBuffer(void *user_priv, size_t min_size,
+                               aom_codec_frame_buffer_t *fb) {
+    ExternalFrameBufferMD5Test *const md5Test =
+        reinterpret_cast<ExternalFrameBufferMD5Test *>(user_priv);
+    return md5Test->fb_list_.GetFreeFrameBuffer(min_size, fb);
+  }
+
+  // Callback to release an external frame buffer. Return value < 0 is an
+  // error.
+  static int ReleaseAV1FrameBuffer(void *user_priv,
+                                   aom_codec_frame_buffer_t *fb) {
+    ExternalFrameBufferMD5Test *const md5Test =
+        reinterpret_cast<ExternalFrameBufferMD5Test *>(user_priv);
+    return md5Test->fb_list_.ReturnFrameBuffer(fb);
+  }
+
+  void set_num_buffers(int num_buffers) { num_buffers_ = num_buffers; }
+  int num_buffers() const { return num_buffers_; }
+
+ private:
+  FILE *md5_file_;
+  int num_buffers_;
+  ExternalFrameBufferList fb_list_;
+};
+
+#if CONFIG_WEBM_IO
+const char kAV1TestFile[] = "av1-1-b8-03-sizeup.mkv";
+const char kAV1NonRefTestFile[] = "av1-1-b8-01-size-226x226.ivf";
+
+// Class for testing passing in external frame buffers to libaom.
+class ExternalFrameBufferTest : public ::testing::Test {
+ protected:
+  ExternalFrameBufferTest()
+      : video_(nullptr), decoder_(nullptr), num_buffers_(0) {}
+
+  void SetUp() override {
+    video_ = new libaom_test::WebMVideoSource(kAV1TestFile);
+    ASSERT_NE(video_, nullptr);
+    video_->Init();
+    video_->Begin();
+
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.allow_lowbitdepth = !FORCE_HIGHBITDEPTH_DECODING;
+    decoder_ = new libaom_test::AV1Decoder(cfg, 0);
+    ASSERT_NE(decoder_, nullptr);
+  }
+
+  void TearDown() override {
+    delete decoder_;
+    decoder_ = nullptr;
+    delete video_;
+    video_ = nullptr;
+  }
+
+  // Passes the external frame buffer information to libaom.
+  aom_codec_err_t SetFrameBufferFunctions(
+      int num_buffers, aom_get_frame_buffer_cb_fn_t cb_get,
+      aom_release_frame_buffer_cb_fn_t cb_release) {
+    if (num_buffers > 0) {
+      num_buffers_ = num_buffers;
+      EXPECT_TRUE(fb_list_.CreateBufferList(num_buffers_));
+    }
+
+    return decoder_->SetFrameBufferFunctions(cb_get, cb_release, &fb_list_);
+  }
+
+  aom_codec_err_t DecodeOneFrame() {
+    const aom_codec_err_t res =
+        decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
+    CheckDecodedFrames();
+    if (res == AOM_CODEC_OK) video_->Next();
+    return res;
+  }
+
+  aom_codec_err_t DecodeRemainingFrames() {
+    for (; video_->cxdata() != nullptr; video_->Next()) {
+      const aom_codec_err_t res =
+          decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
+      if (res != AOM_CODEC_OK) return res;
+      CheckDecodedFrames();
+    }
+    return AOM_CODEC_OK;
+  }
+
+ protected:
+  void CheckDecodedFrames() {
+    libaom_test::DxDataIterator dec_iter = decoder_->GetDxData();
+    const aom_image_t *img = nullptr;
+
+    // Get decompressed data
+    while ((img = dec_iter.Next()) != nullptr) {
+      fb_list_.CheckImageFrameBuffer(img);
+    }
+  }
+
+  libaom_test::CompressedVideoSource *video_;
+  libaom_test::AV1Decoder *decoder_;
+  int num_buffers_;
+  ExternalFrameBufferList fb_list_;
+};
+
+class ExternalFrameBufferNonRefTest : public ExternalFrameBufferTest {
+ protected:
+  void SetUp() override {
+    video_ = new libaom_test::IVFVideoSource(kAV1NonRefTestFile);
+    ASSERT_NE(video_, nullptr);
+    video_->Init();
+    video_->Begin();
+
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.allow_lowbitdepth = !FORCE_HIGHBITDEPTH_DECODING;
+    decoder_ = new libaom_test::AV1Decoder(cfg, 0);
+    ASSERT_NE(decoder_, nullptr);
+  }
+
+  virtual void CheckFrameBufferRelease() {
+    TearDown();
+    ASSERT_EQ(0, fb_list_.num_used_buffers());
+  }
+};
+#endif  // CONFIG_WEBM_IO
+
+// This test runs through the set of test vectors, and decodes them.
+// Libaom will call into the application to allocate a frame buffer when
+// needed. The md5 checksums are computed for each frame in the video file.
+// If md5 checksums match the correct md5 data, then the test is passed.
+// Otherwise, the test failed.
+TEST_P(ExternalFrameBufferMD5Test, ExtFBMD5Match) {
+  const std::string filename = GET_PARAM(kVideoNameParam);
+  aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+
+  // Number of buffers equals #AOM_MAXIMUM_REF_BUFFERS +
+  // #AOM_MAXIMUM_WORK_BUFFERS + four jitter buffers.
+  const int jitter_buffers = 4;
+  const int num_buffers =
+      AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS + jitter_buffers;
+  set_num_buffers(num_buffers);
+
+  // Open compressed video file.
+  std::unique_ptr<libaom_test::CompressedVideoSource> video;
+  if (filename.substr(filename.length() - 3, 3) == "ivf") {
+    video.reset(new libaom_test::IVFVideoSource(filename));
+  } else {
+#if CONFIG_WEBM_IO
+    video.reset(new libaom_test::WebMVideoSource(filename));
+#else
+    fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n",
+            filename.c_str());
+    return;
+#endif
+  }
+  ASSERT_NE(video, nullptr);
+  video->Init();
+
+  // Construct md5 file name.
+  const std::string md5_filename = filename + ".md5";
+  OpenMD5File(md5_filename);
+
+  // Set decode config.
+  cfg.allow_lowbitdepth = !FORCE_HIGHBITDEPTH_DECODING;
+  set_cfg(cfg);
+
+  // Decode frame, and check the md5 matching.
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get(), cfg));
+}
+
+#if CONFIG_WEBM_IO
+TEST_F(ExternalFrameBufferTest, MinFrameBuffers) {
+  // Minimum number of external frame buffers for AV1 is
+  // #AOM_MAXIMUM_REF_BUFFERS + #AOM_MAXIMUM_WORK_BUFFERS.
+  const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(AOM_CODEC_OK,
+            SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer,
+                                    release_aom_frame_buffer));
+  ASSERT_EQ(AOM_CODEC_OK, DecodeRemainingFrames());
+}
+
+TEST_F(ExternalFrameBufferTest, EightJitterBuffers) {
+  // Number of buffers equals #AOM_MAXIMUM_REF_BUFFERS +
+  // #AOM_MAXIMUM_WORK_BUFFERS + eight jitter buffers.
+  const int jitter_buffers = 8;
+  const int num_buffers =
+      AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS + jitter_buffers;
+  ASSERT_EQ(AOM_CODEC_OK,
+            SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer,
+                                    release_aom_frame_buffer));
+  ASSERT_EQ(AOM_CODEC_OK, DecodeRemainingFrames());
+}
+
+TEST_F(ExternalFrameBufferTest, NotEnoughBuffers) {
+  // Minimum number of external frame buffers for AV1 is
+  // #AOM_MAXIMUM_REF_BUFFERS + #AOM_MAXIMUM_WORK_BUFFERS. Most files will
+  // only use 5 frame buffers at one time.
+  const int num_buffers = 2;
+  ASSERT_EQ(AOM_CODEC_OK,
+            SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer,
+                                    release_aom_frame_buffer));
+  ASSERT_EQ(AOM_CODEC_OK, DecodeOneFrame());
+  // Only run this on long clips. Decoding a very short clip will return
+  // AOM_CODEC_OK even with only 2 buffers.
+  ASSERT_EQ(AOM_CODEC_MEM_ERROR, DecodeRemainingFrames());
+}
+
+TEST_F(ExternalFrameBufferTest, NoRelease) {
+  const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(AOM_CODEC_OK,
+            SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer,
+                                    do_not_release_aom_frame_buffer));
+  ASSERT_EQ(AOM_CODEC_OK, DecodeOneFrame());
+  ASSERT_EQ(AOM_CODEC_MEM_ERROR, DecodeRemainingFrames());
+}
+
+TEST_F(ExternalFrameBufferTest, NullRealloc) {
+  const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(AOM_CODEC_OK,
+            SetFrameBufferFunctions(num_buffers, get_aom_zero_frame_buffer,
+                                    release_aom_frame_buffer));
+  ASSERT_EQ(AOM_CODEC_MEM_ERROR, DecodeOneFrame());
+}
+
+TEST_F(ExternalFrameBufferTest, ReallocOneLessByte) {
+  const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(AOM_CODEC_OK, SetFrameBufferFunctions(
+                              num_buffers, get_aom_one_less_byte_frame_buffer,
+                              release_aom_frame_buffer));
+  ASSERT_EQ(AOM_CODEC_MEM_ERROR, DecodeOneFrame());
+}
+
+TEST_F(ExternalFrameBufferTest, NullGetFunction) {
+  const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(
+      AOM_CODEC_INVALID_PARAM,
+      SetFrameBufferFunctions(num_buffers, nullptr, release_aom_frame_buffer));
+}
+
+TEST_F(ExternalFrameBufferTest, NullReleaseFunction) {
+  const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(
+      AOM_CODEC_INVALID_PARAM,
+      SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer, nullptr));
+}
+
+TEST_F(ExternalFrameBufferTest, SetAfterDecode) {
+  const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(AOM_CODEC_OK, DecodeOneFrame());
+  ASSERT_EQ(AOM_CODEC_ERROR,
+            SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer,
+                                    release_aom_frame_buffer));
+}
+
+TEST_F(ExternalFrameBufferNonRefTest, ReleaseNonRefFrameBuffer) {
+  const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
+  ASSERT_EQ(AOM_CODEC_OK,
+            SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer,
+                                    release_aom_frame_buffer));
+  ASSERT_EQ(AOM_CODEC_OK, DecodeRemainingFrames());
+  CheckFrameBufferRelease();
+}
+#endif  // CONFIG_WEBM_IO
+
+AV1_INSTANTIATE_TEST_SUITE(
+    ExternalFrameBufferMD5Test,
+    ::testing::ValuesIn(libaom_test::kAV1TestVectors,
+                        libaom_test::kAV1TestVectors +
+                            libaom_test::kNumAV1TestVectors));
+}  // namespace
diff --git a/third_party/aom/test/fdct4x4_test.cc b/third_party/aom/test/fdct4x4_test.cc
new file mode 100644
index 0000000000..9cbf208adb
--- /dev/null
+++ b/third_party/aom/test/fdct4x4_test.cc
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2020, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <tuple>
+
+#include "aom_dsp/aom_dsp_common.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/av1_rtcd.h"
+#include "config/aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+#include "av1/common/entropy.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/mem.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+template <typename OutputType>
+using FdctFunc = void (*)(const int16_t *in, OutputType *out, int stride);
+
+template <typename OutputType>
+using FhtFunc = void (*)(const int16_t *in, OutputType *out, int stride,
+                         TxfmParam *txfm_param);
+
+template <typename OutputType>
+using Fdct4x4Param =
+    std::tuple<FdctFunc<OutputType>, FhtFunc<OutputType>, aom_bit_depth_t, int>;
+
+#if HAVE_NEON || HAVE_SSE2
+void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
+                 TxfmParam * /*txfm_param*/) {
+  aom_fdct4x4_c(in, out, stride);
+}
+
+void fdct4x4_lp_ref(const int16_t *in, int16_t *out, int stride,
+                    TxfmParam * /*txfm_param*/) {
+  aom_fdct4x4_lp_c(in, out, stride);
+}
+#endif
+
+template <typename OutputType>
+class Trans4x4FDCT : public libaom_test::TransformTestBase<OutputType>,
+                     public ::testing::TestWithParam<Fdct4x4Param<OutputType>> {
+ public:
+  ~Trans4x4FDCT() override = default;
+
+  using TxfmBaseOutType = libaom_test::TransformTestBase<OutputType>;
+  void SetUp() override {
+    fwd_txfm_ = std::get<0>(this->GetParam());
+    TxfmBaseOutType::pitch_ = 4;
+    TxfmBaseOutType::height_ = 4;
+    TxfmBaseOutType::fwd_txfm_ref = std::get<1>(this->GetParam());
+    TxfmBaseOutType::bit_depth_ = std::get<2>(this->GetParam());
+    TxfmBaseOutType::mask_ = (1 << TxfmBaseOutType::bit_depth_) - 1;
+    TxfmBaseOutType::num_coeffs_ = std::get<3>(this->GetParam());
+  }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, OutputType *out, int stride) override {
+    fwd_txfm_(in, out, stride);
+  }
+
+  void RunInvTxfm(const OutputType *out, uint8_t *dst, int stride) override {
+    (void)out;
+    (void)dst;
+    (void)stride;
+  }
+
+  FdctFunc<OutputType> fwd_txfm_;
+};
+
+using Trans4x4FDCTTranLow = Trans4x4FDCT<tran_low_t>;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Trans4x4FDCTTranLow);
+TEST_P(Trans4x4FDCTTranLow, CoeffCheck) { RunCoeffCheck(); }
+TEST_P(Trans4x4FDCTTranLow, MemCheck) { RunMemCheck(); }
+
+using Trans4x4FDCTInt16 = Trans4x4FDCT<int16_t>;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Trans4x4FDCTInt16);
+TEST_P(Trans4x4FDCTInt16, CoeffCheck) { RunCoeffCheck(); }
+TEST_P(Trans4x4FDCTInt16, MemCheck) { RunMemCheck(); }
+
+using std::make_tuple;
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, Trans4x4FDCTTranLow,
+                         ::testing::Values(make_tuple(&aom_fdct4x4_neon,
+                                                      &fdct4x4_ref, AOM_BITS_8,
+                                                      16)));
+
+INSTANTIATE_TEST_SUITE_P(NEON, Trans4x4FDCTInt16,
+                         ::testing::Values(make_tuple(&aom_fdct4x4_lp_neon,
+                                                      &fdct4x4_lp_ref,
+                                                      AOM_BITS_8, 16)));
+#endif
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, Trans4x4FDCTTranLow,
+                         ::testing::Values(make_tuple(&aom_fdct4x4_sse2,
+                                                      &fdct4x4_ref, AOM_BITS_8,
+                                                      16)));
+
+INSTANTIATE_TEST_SUITE_P(SSE2, Trans4x4FDCTInt16,
+                         ::testing::Values(make_tuple(&aom_fdct4x4_lp_sse2,
+                                                      &fdct4x4_lp_ref,
+                                                      AOM_BITS_8, 16)));
+#endif
+}  // namespace
diff --git a/third_party/aom/test/fft_test.cc b/third_party/aom/test/fft_test.cc
new file mode 100644
index 0000000000..06a17a3f8f
--- /dev/null
+++ b/third_party/aom/test/fft_test.cc
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+
+#include <algorithm>
+#include <complex>
+#include <ostream>
+#include <vector>
+
+#include "aom_dsp/fft_common.h"
+#include "aom_mem/aom_mem.h"
+#include "av1/common/common.h"
+#include "config/aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+typedef void (*tform_fun_t)(const float *input, float *temp, float *output);
+
+// Simple 1D FFT implementation
+template <typename InputType>
+void fft(const InputType *data, std::complex<float> *result, int n) {
+  if (n == 1) {
+    result[0] = data[0];
+    return;
+  }
+  std::vector<InputType> temp(n);
+  for (int k = 0; k < n / 2; ++k) {
+    temp[k] = data[2 * k];
+    temp[n / 2 + k] = data[2 * k + 1];
+  }
+  fft(&temp[0], result, n / 2);
+  fft(&temp[n / 2], result + n / 2, n / 2);
+  for (int k = 0; k < n / 2; ++k) {
+    std::complex<float> w = std::complex<float>((float)cos(2. * PI * k / n),
+                                                (float)-sin(2. * PI * k / n));
+    std::complex<float> a = result[k];
+    std::complex<float> b = result[n / 2 + k];
+    result[k] = a + w * b;
+    result[n / 2 + k] = a - w * b;
+  }
+}
+
+void transpose(std::vector<std::complex<float> > *data, int n) {
+  for (int y = 0; y < n; ++y) {
+    for (int x = y + 1; x < n; ++x) {
+      std::swap((*data)[y * n + x], (*data)[x * n + y]);
+    }
+  }
+}
+
+// Simple 2D FFT implementation
+template <class InputType>
+std::vector<std::complex<float> > fft2d(const InputType *input, int n) {
+  std::vector<std::complex<float> > rowfft(n * n);
+  std::vector<std::complex<float> > result(n * n);
+  for (int y = 0; y < n; ++y) {
+    fft(input + y * n, &rowfft[y * n], n);
+  }
+  transpose(&rowfft, n);
+  for (int y = 0; y < n; ++y) {
+    fft(&rowfft[y * n], &result[y * n], n);
+  }
+  transpose(&result, n);
+  return result;
+}
+
+struct FFTTestArg {
+  int n;
+  void (*fft)(const float *input, float *temp, float *output);
+  FFTTestArg(int n_in, tform_fun_t fft_in) : n(n_in), fft(fft_in) {}
+};
+
+std::ostream &operator<<(std::ostream &os, const FFTTestArg &test_arg) {
+  return os << "fft_arg { n:" << test_arg.n
+            << " fft:" << reinterpret_cast<const void *>(test_arg.fft) << " }";
+}
+
+class FFT2DTest : public ::testing::TestWithParam<FFTTestArg> {
+ protected:
+  void SetUp() override {
+    int n = GetParam().n;
+    input_ = (float *)aom_memalign(32, sizeof(*input_) * n * n);
+    temp_ = (float *)aom_memalign(32, sizeof(*temp_) * n * n);
+    output_ = (float *)aom_memalign(32, sizeof(*output_) * n * n * 2);
+    ASSERT_NE(input_, nullptr);
+    ASSERT_NE(temp_, nullptr);
+    ASSERT_NE(output_, nullptr);
+    memset(input_, 0, sizeof(*input_) * n * n);
+    memset(temp_, 0, sizeof(*temp_) * n * n);
+    memset(output_, 0, sizeof(*output_) * n * n * 2);
+  }
+  void TearDown() override {
+    aom_free(input_);
+    aom_free(temp_);
+    aom_free(output_);
+  }
+  float *input_;
+  float *temp_;
+  float *output_;
+};
+
+TEST_P(FFT2DTest, Correct) {
+  int n = GetParam().n;
+  for (int i = 0; i < n * n; ++i) {
+    input_[i] = 1;
+    std::vector<std::complex<float> > expected = fft2d<float>(&input_[0], n);
+    GetParam().fft(&input_[0], &temp_[0], &output_[0]);
+    for (int y = 0; y < n; ++y) {
+      for (int x = 0; x < (n / 2) + 1; ++x) {
+        EXPECT_NEAR(expected[y * n + x].real(), output_[2 * (y * n + x)], 1e-5);
+        EXPECT_NEAR(expected[y * n + x].imag(), output_[2 * (y * n + x) + 1],
+                    1e-5);
+      }
+    }
+    input_[i] = 0;
+  }
+}
+
+TEST_P(FFT2DTest, Benchmark) {
+  int n = GetParam().n;
+  float sum = 0;
+  const int num_trials = 1000 * (64 - n);
+  for (int i = 0; i < num_trials; ++i) {
+    input_[i % (n * n)] = 1;
+    GetParam().fft(&input_[0], &temp_[0], &output_[0]);
+    sum += output_[0];
+    input_[i % (n * n)] = 0;
+  }
+  EXPECT_NEAR(sum, num_trials, 1e-3);
+}
+
+INSTANTIATE_TEST_SUITE_P(C, FFT2DTest,
+                         ::testing::Values(FFTTestArg(2, aom_fft2x2_float_c),
+                                           FFTTestArg(4, aom_fft4x4_float_c),
+                                           FFTTestArg(8, aom_fft8x8_float_c),
+                                           FFTTestArg(16, aom_fft16x16_float_c),
+                                           FFTTestArg(32,
+                                                      aom_fft32x32_float_c)));
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, FFT2DTest,
+    ::testing::Values(FFTTestArg(4, aom_fft4x4_float_sse2),
+                      FFTTestArg(8, aom_fft8x8_float_sse2),
+                      FFTTestArg(16, aom_fft16x16_float_sse2),
+                      FFTTestArg(32, aom_fft32x32_float_sse2)));
+#endif  // HAVE_SSE2
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, FFT2DTest,
+    ::testing::Values(FFTTestArg(8, aom_fft8x8_float_avx2),
+                      FFTTestArg(16, aom_fft16x16_float_avx2),
+                      FFTTestArg(32, aom_fft32x32_float_avx2)));
+#endif  // HAVE_AVX2
+#endif  // AOM_ARCH_X86 || AOM_ARCH_X86_64
+
+struct IFFTTestArg {
+  int n;
+  tform_fun_t ifft;
+  IFFTTestArg(int n_in, tform_fun_t ifft_in) : n(n_in), ifft(ifft_in) {}
+};
+
+std::ostream &operator<<(std::ostream &os, const IFFTTestArg &test_arg) {
+  return os << "ifft_arg { n:" << test_arg.n
+            << " fft:" << reinterpret_cast<const void *>(test_arg.ifft) << " }";
+}
+
+class IFFT2DTest : public ::testing::TestWithParam<IFFTTestArg> {
+ protected:
+  void SetUp() override {
+    int n = GetParam().n;
+    input_ = (float *)aom_memalign(32, sizeof(*input_) * n * n * 2);
+    temp_ = (float *)aom_memalign(32, sizeof(*temp_) * n * n * 2);
+    output_ = (float *)aom_memalign(32, sizeof(*output_) * n * n);
+    ASSERT_NE(input_, nullptr);
+    ASSERT_NE(temp_, nullptr);
+    ASSERT_NE(output_, nullptr);
+    memset(input_, 0, sizeof(*input_) * n * n * 2);
+    memset(temp_, 0, sizeof(*temp_) * n * n * 2);
+    memset(output_, 0, sizeof(*output_) * n * n);
+  }
+  void TearDown() override {
+    aom_free(input_);
+    aom_free(temp_);
+    aom_free(output_);
+  }
+  float *input_;
+  float *temp_;
+  float *output_;
+};
+
+TEST_P(IFFT2DTest, Correctness) {
+  int n = GetParam().n;
+  ASSERT_GE(n, 2);
+  std::vector<float> expected(n * n);
+  std::vector<float> actual(n * n);
+  // Do forward transform then invert to make sure we get back expected
+  for (int y = 0; y < n; ++y) {
+    for (int x = 0; x < n; ++x) {
+      expected[y * n + x] = 1;
+      std::vector<std::complex<float> > input_c = fft2d(&expected[0], n);
+      for (int i = 0; i < n * n; ++i) {
+        input_[2 * i + 0] = input_c[i].real();
+        input_[2 * i + 1] = input_c[i].imag();
+      }
+      GetParam().ifft(&input_[0], &temp_[0], &output_[0]);
+
+      for (int yy = 0; yy < n; ++yy) {
+        for (int xx = 0; xx < n; ++xx) {
+          EXPECT_NEAR(expected[yy * n + xx], output_[yy * n + xx] / (n * n),
+                      1e-5);
+        }
+      }
+      expected[y * n + x] = 0;
+    }
+  }
+}
+
+TEST_P(IFFT2DTest, Benchmark) {
+  int n = GetParam().n;
+  float sum = 0;
+  const int num_trials = 1000 * (64 - n);
+  for (int i = 0; i < num_trials; ++i) {
+    input_[i % (n * n)] = 1;
+    GetParam().ifft(&input_[0], &temp_[0], &output_[0]);
+    sum += output_[0];
+    input_[i % (n * n)] = 0;
+  }
+  EXPECT_GE(sum, num_trials / 2);
+}
+INSTANTIATE_TEST_SUITE_P(
+    C, IFFT2DTest,
+    ::testing::Values(IFFTTestArg(2, aom_ifft2x2_float_c),
+                      IFFTTestArg(4, aom_ifft4x4_float_c),
+                      IFFTTestArg(8, aom_ifft8x8_float_c),
+                      IFFTTestArg(16, aom_ifft16x16_float_c),
+                      IFFTTestArg(32, aom_ifft32x32_float_c)));
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, IFFT2DTest,
+    ::testing::Values(IFFTTestArg(4, aom_ifft4x4_float_sse2),
+                      IFFTTestArg(8, aom_ifft8x8_float_sse2),
+                      IFFTTestArg(16, aom_ifft16x16_float_sse2),
+                      IFFTTestArg(32, aom_ifft32x32_float_sse2)));
+#endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, IFFT2DTest,
+    ::testing::Values(IFFTTestArg(8, aom_ifft8x8_float_avx2),
+                      IFFTTestArg(16, aom_ifft16x16_float_avx2),
+                      IFFTTestArg(32, aom_ifft32x32_float_avx2)));
+#endif  // HAVE_AVX2
+#endif  // AOM_ARCH_X86 || AOM_ARCH_X86_64
+
+}  // namespace
diff --git a/third_party/aom/test/film_grain_table_test.cc b/third_party/aom/test/film_grain_table_test.cc
new file mode 100644
index 0000000000..808d966feb
--- /dev/null
+++ b/third_party/aom/test/film_grain_table_test.cc
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <string>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "aom_dsp/grain_table.h"
+#include "aom/internal/aom_codec_internal.h"
+#include "av1/encoder/grain_test_vectors.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/video_source.h"
+
+void grain_equal(const aom_film_grain_t *expected,
+                 const aom_film_grain_t *actual) {
+  EXPECT_EQ(expected->apply_grain, actual->apply_grain);
+  EXPECT_EQ(expected->update_parameters, actual->update_parameters);
+  if (!expected->update_parameters) return;
+  EXPECT_EQ(expected->num_y_points, actual->num_y_points);
+  EXPECT_EQ(expected->num_cb_points, actual->num_cb_points);
+  EXPECT_EQ(expected->num_cr_points, actual->num_cr_points);
+  EXPECT_EQ(0, memcmp(expected->scaling_points_y, actual->scaling_points_y,
+                      expected->num_y_points *
+                          sizeof(expected->scaling_points_y[0])));
+  EXPECT_EQ(0, memcmp(expected->scaling_points_cb, actual->scaling_points_cb,
+                      expected->num_cb_points *
+                          sizeof(expected->scaling_points_cb[0])));
+  EXPECT_EQ(0, memcmp(expected->scaling_points_cr, actual->scaling_points_cr,
+                      expected->num_cr_points *
+                          sizeof(expected->scaling_points_cr[0])));
+  EXPECT_EQ(expected->scaling_shift, actual->scaling_shift);
+  EXPECT_EQ(expected->ar_coeff_lag, actual->ar_coeff_lag);
+  EXPECT_EQ(expected->ar_coeff_shift, actual->ar_coeff_shift);
+
+  const int num_pos_luma =
+      2 * expected->ar_coeff_lag * (expected->ar_coeff_lag + 1);
+  const int num_pos_chroma = num_pos_luma;
+  EXPECT_EQ(0, memcmp(expected->ar_coeffs_y, actual->ar_coeffs_y,
+                      sizeof(expected->ar_coeffs_y[0]) * num_pos_luma));
+  if (actual->num_cb_points || actual->chroma_scaling_from_luma) {
+    EXPECT_EQ(0, memcmp(expected->ar_coeffs_cb, actual->ar_coeffs_cb,
+                        sizeof(expected->ar_coeffs_cb[0]) * num_pos_chroma));
+  }
+  if (actual->num_cr_points || actual->chroma_scaling_from_luma) {
+    EXPECT_EQ(0, memcmp(expected->ar_coeffs_cr, actual->ar_coeffs_cr,
+                        sizeof(expected->ar_coeffs_cr[0]) * num_pos_chroma));
+  }
+  EXPECT_EQ(expected->overlap_flag, actual->overlap_flag);
+  EXPECT_EQ(expected->chroma_scaling_from_luma,
+            actual->chroma_scaling_from_luma);
+  EXPECT_EQ(expected->grain_scale_shift, actual->grain_scale_shift);
+  // EXPECT_EQ(expected->random_seed, actual->random_seed);
+
+  // clip_to_restricted and bit_depth aren't written
+  if (expected->num_cb_points) {
+    EXPECT_EQ(expected->cb_mult, actual->cb_mult);
+    EXPECT_EQ(expected->cb_luma_mult, actual->cb_luma_mult);
+    EXPECT_EQ(expected->cb_offset, actual->cb_offset);
+  }
+  if (expected->num_cr_points) {
+    EXPECT_EQ(expected->cr_mult, actual->cr_mult);
+    EXPECT_EQ(expected->cr_luma_mult, actual->cr_luma_mult);
+    EXPECT_EQ(expected->cr_offset, actual->cr_offset);
+  }
+}
+
+TEST(FilmGrainTableTest, AddAndLookupSingleSegment) {
+  aom_film_grain_table_t table;
+  memset(&table, 0, sizeof(table));
+
+  aom_film_grain_t grain;
+  EXPECT_FALSE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain));
+
+  aom_film_grain_table_append(&table, 1000, 2000, film_grain_test_vectors + 0);
+  EXPECT_FALSE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain));
+  EXPECT_FALSE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain));
+
+  EXPECT_TRUE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain));
+
+  grain.bit_depth = film_grain_test_vectors[0].bit_depth;
+  EXPECT_EQ(0, memcmp(&grain, film_grain_test_vectors + 0, sizeof(table)));
+
+  // Extend the existing segment
+  aom_film_grain_table_append(&table, 2000, 3000, film_grain_test_vectors + 0);
+  EXPECT_EQ(nullptr, table.head->next);
+
+  // Lookup and remove and check that the entry is no longer there
+  EXPECT_TRUE(aom_film_grain_table_lookup(&table, 1000, 2000, true, &grain));
+  EXPECT_FALSE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain));
+
+  EXPECT_TRUE(aom_film_grain_table_lookup(&table, 2000, 3000, true, &grain));
+  EXPECT_FALSE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain));
+
+  EXPECT_EQ(nullptr, table.head);
+  EXPECT_EQ(nullptr, table.tail);
+  aom_film_grain_table_free(&table);
+}
+
+TEST(FilmGrainTableTest, AddSingleSegmentRemoveBiggerSegment) {
+  aom_film_grain_table_t table;
+  aom_film_grain_t grain;
+
+  memset(&table, 0, sizeof(table));
+
+  aom_film_grain_table_append(&table, 0, 1000, film_grain_test_vectors + 0);
+  EXPECT_TRUE(aom_film_grain_table_lookup(&table, 0, 1100, true, &grain));
+
+  EXPECT_EQ(nullptr, table.head);
+  EXPECT_EQ(nullptr, table.tail);
+  aom_film_grain_table_free(&table);
+}
+
+TEST(FilmGrainTableTest, SplitSingleSegment) {
+  aom_film_grain_table_t table;
+  aom_film_grain_t grain;
+  memset(&table, 0, sizeof(table));
+
+  aom_film_grain_table_append(&table, 0, 1000, film_grain_test_vectors + 0);
+
+  // Test lookup and remove that adjusts start time
+  EXPECT_TRUE(aom_film_grain_table_lookup(&table, 0, 100, true, &grain));
+  EXPECT_EQ(nullptr, table.head->next);
+  EXPECT_EQ(100, table.head->start_time);
+
+  // Test lookup and remove that adjusts end time
+  EXPECT_TRUE(aom_film_grain_table_lookup(&table, 900, 1000, true, &grain));
+  EXPECT_EQ(nullptr, table.head->next);
+  EXPECT_EQ(100, table.head->start_time);
+  EXPECT_EQ(900, table.head->end_time);
+
+  // Test lookup and remove that splits the first entry
+  EXPECT_TRUE(aom_film_grain_table_lookup(&table, 400, 600, true, &grain));
+  EXPECT_EQ(100, table.head->start_time);
+  EXPECT_EQ(400, table.head->end_time);
+
+  ASSERT_NE(nullptr, table.head->next);
+  EXPECT_EQ(table.tail, table.head->next);
+  EXPECT_EQ(600, table.head->next->start_time);
+  EXPECT_EQ(900, table.head->next->end_time);
+
+  aom_film_grain_table_free(&table);
+}
+
+TEST(FilmGrainTableTest, AddAndLookupMultipleSegments) {
+  aom_film_grain_table_t table;
+  memset(&table, 0, sizeof(table));
+
+  aom_film_grain_t grain;
+  const int kNumTestVectors =
+      sizeof(film_grain_test_vectors) / sizeof(film_grain_test_vectors[0]);
+  for (int i = 0; i < kNumTestVectors; ++i) {
+    aom_film_grain_table_append(&table, i * 1000, (i + 1) * 1000,
+                                film_grain_test_vectors + i);
+  }
+
+  for (int i = kNumTestVectors - 1; i >= 0; --i) {
+    EXPECT_TRUE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000,
+                                            true, &grain));
+    grain_equal(film_grain_test_vectors + i, &grain);
+    EXPECT_FALSE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000,
+                                             true, &grain));
+  }
+
+  // Verify that all the data has been removed
+  for (int i = 0; i < kNumTestVectors; ++i) {
+    EXPECT_FALSE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000,
+                                             true, &grain));
+  }
+  aom_film_grain_table_free(&table);
+}
+
+class FilmGrainTableIOTest : public ::testing::Test {
+ protected:
+  void SetUp() override { memset(&error_, 0, sizeof(error_)); }
+  struct aom_internal_error_info error_;
+};
+
+TEST_F(FilmGrainTableIOTest, ReadMissingFile) {
+  aom_film_grain_table_t table;
+  memset(&table, 0, sizeof(table));
+  ASSERT_EQ(AOM_CODEC_ERROR, aom_film_grain_table_read(
+                                 &table, "/path/to/missing/file", &error_));
+}
+
+TEST_F(FilmGrainTableIOTest, ReadTruncatedFile) {
+  aom_film_grain_table_t table;
+  memset(&table, 0, sizeof(table));
+
+  std::string grain_file;
+  FILE *file = libaom_test::GetTempOutFile(&grain_file);
+  ASSERT_NE(file, nullptr);
+  fwrite("deadbeef", 8, 1, file);
+  fclose(file);
+  ASSERT_EQ(AOM_CODEC_ERROR,
+            aom_film_grain_table_read(&table, grain_file.c_str(), &error_));
+  EXPECT_EQ(0, remove(grain_file.c_str()));
+}
+
+TEST_F(FilmGrainTableIOTest, RoundTripReadWrite) {
+  aom_film_grain_table_t table;
+  memset(&table, 0, sizeof(table));
+
+  aom_film_grain_t expected_grain[16];
+  const int kNumTestVectors =
+      sizeof(film_grain_test_vectors) / sizeof(film_grain_test_vectors[0]);
+  for (int i = 0; i < kNumTestVectors; ++i) {
+    expected_grain[i] = film_grain_test_vectors[i];
+    expected_grain[i].random_seed = i;
+    expected_grain[i].update_parameters = i % 2;
+    expected_grain[i].apply_grain = (i + 1) % 2;
+    expected_grain[i].bit_depth = 0;
+    aom_film_grain_table_append(&table, i * 1000, (i + 1) * 1000,
+                                expected_grain + i);
+  }
+  std::string grain_file;
+  FILE *tmpfile = libaom_test::GetTempOutFile(&grain_file);
+  ASSERT_NE(tmpfile, nullptr);
+  fclose(tmpfile);
+  ASSERT_EQ(AOM_CODEC_OK,
+            aom_film_grain_table_write(&table, grain_file.c_str(), &error_));
+  aom_film_grain_table_free(&table);
+
+  memset(&table, 0, sizeof(table));
+  ASSERT_EQ(AOM_CODEC_OK,
+            aom_film_grain_table_read(&table, grain_file.c_str(), &error_));
+  for (int i = 0; i < kNumTestVectors; ++i) {
+    aom_film_grain_t grain;
+    EXPECT_TRUE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000,
+                                            true, &grain));
+    grain_equal(expected_grain + i, &grain);
+  }
+  aom_film_grain_table_free(&table);
+  EXPECT_EQ(0, remove(grain_file.c_str()));
+}
+
+TEST_F(FilmGrainTableIOTest, RoundTripSplit) {
+  std::string grain_file;
+  FILE *tmpfile = libaom_test::GetTempOutFile(&grain_file);
+  ASSERT_NE(tmpfile, nullptr);
+  fclose(tmpfile);
+
+  aom_film_grain_table_t table;
+  memset(&table, 0, sizeof(table));
+
+  aom_film_grain_t grain = film_grain_test_vectors[0];
+  aom_film_grain_table_append(&table, 0, 3000, &grain);
+  ASSERT_TRUE(aom_film_grain_table_lookup(&table, 1000, 2000, true, &grain));
+  ASSERT_TRUE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain));
+  EXPECT_FALSE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain));
+  ASSERT_TRUE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain));
+  ASSERT_EQ(AOM_CODEC_OK,
+            aom_film_grain_table_write(&table, grain_file.c_str(), &error_));
+  aom_film_grain_table_free(&table);
+
+  memset(&table, 0, sizeof(table));
+  ASSERT_EQ(AOM_CODEC_OK,
+            aom_film_grain_table_read(&table, grain_file.c_str(), &error_));
+  ASSERT_TRUE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain));
+  ASSERT_FALSE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain));
+  ASSERT_TRUE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain));
+  aom_film_grain_table_free(&table);
+
+  EXPECT_EQ(0, remove(grain_file.c_str()));
+}
+
+const ::libaom_test::TestMode kFilmGrainEncodeTestModes[] = {
+  ::libaom_test::kRealTime,
+#if !CONFIG_REALTIME_ONLY
+  ::libaom_test::kOnePassGood
+#endif
+};
+
+class FilmGrainEncodeTest
+    : public ::libaom_test::CodecTestWith3Params<int, int,
+                                                 ::libaom_test::TestMode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  FilmGrainEncodeTest()
+      : EncoderTest(GET_PARAM(0)), test_monochrome_(GET_PARAM(1)),
+        key_frame_dist_(GET_PARAM(2)), test_mode_(GET_PARAM(3)) {}
+  ~FilmGrainEncodeTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(test_mode_);
+    cfg_.monochrome = test_monochrome_ == 1;
+    cfg_.rc_target_bitrate = 300;
+    cfg_.kf_max_dist = key_frame_dist_;
+    cfg_.g_lag_in_frames = 0;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED,
+                       test_mode_ == ::libaom_test::kRealTime ? 7 : 5);
+      encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_FILM);
+      encoder->Control(AV1E_SET_DENOISE_NOISE_LEVEL, 1);
+    } else if (video->frame() == 1) {
+      cfg_.monochrome = (test_monochrome_ == 1 || test_monochrome_ == 2);
+      encoder->Config(&cfg_);
+    } else {
+      cfg_.monochrome = test_monochrome_ == 1;
+      encoder->Config(&cfg_);
+    }
+  }
+
+  bool DoDecode() const override { return false; }
+
+  void DoTest() {
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 3);
+    cfg_.g_w = video.img()->d_w;
+    cfg_.g_h = video.img()->d_h;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+ private:
+  // 0: monochroome always off.
+  // 1: monochrome always on.
+  // 2: monochrome changes from 0, 1, 0, for encoded frames 0, 1, 2.
+  // The case where monochrome changes from 1 to 0 (i.e., encoder initialized
+  // with monochrome = 1 and then subsequently encoded with monochrome = 0)
+  // will fail. The test InitMonochrome1_EncodeMonochrome0 below verifies this.
+  int test_monochrome_;
+  int key_frame_dist_;
+  ::libaom_test::TestMode test_mode_;
+};
+
+TEST_P(FilmGrainEncodeTest, Test) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_SUITE(FilmGrainEncodeTest, ::testing::Range(0, 3),
+                           ::testing::Values(0, 10),
+                           ::testing::ValuesIn(kFilmGrainEncodeTestModes));
+
+// Initialize encoder with monochrome = 1, and then encode frame with
+// monochrome = 0. This will result in an error: see the following check
+// in encoder_set_config() in av1/av1_cx_iface.c.
+// TODO(marpan): Consider moving this test to another file, as the failure
+// has nothing to do with film grain mode.
+TEST(FilmGrainEncodeTest, InitMonochrome1EncodeMonochrome0) {
+  const int kWidth = 352;
+  const int kHeight = 288;
+  const int usage = AOM_USAGE_REALTIME;
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage), AOM_CODEC_OK);
+  aom_codec_ctx_t enc;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  // Initialize encoder, with monochrome = 0.
+  cfg.monochrome = 1;
+  aom_codec_err_t init_status = aom_codec_enc_init(&enc, iface, &cfg, 0);
+  ASSERT_EQ(init_status, AOM_CODEC_OK);
+  ASSERT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 7), AOM_CODEC_OK);
+  ASSERT_EQ(aom_codec_control(&enc, AV1E_SET_TUNE_CONTENT, AOM_CONTENT_FILM),
+            AOM_CODEC_OK);
+  ASSERT_EQ(aom_codec_control(&enc, AV1E_SET_DENOISE_NOISE_LEVEL, 1),
+            AOM_CODEC_OK);
+  // Set image with zero values.
+  constexpr size_t kBufferSize =
+      kWidth * kHeight + 2 * (kWidth + 1) / 2 * (kHeight + 1) / 2;
+  std::vector<unsigned char> buffer(kBufferSize);
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1,
+                               buffer.data()));
+  // Encode first frame.
+  ASSERT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK);
+  // Second frame: update config with monochrome = 1.
+  cfg.monochrome = 0;
+  ASSERT_EQ(aom_codec_enc_config_set(&enc, &cfg), AOM_CODEC_INVALID_PARAM);
+  ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+}
diff --git a/third_party/aom/test/filterintra_test.cc b/third_party/aom/test/filterintra_test.cc
new file mode 100644
index 0000000000..0a0ab11dc3
--- /dev/null
+++ b/third_party/aom/test/filterintra_test.cc
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/av1_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/enums.h"
+
+namespace {
+
+using libaom_test::ACMRandom;
+using std::tuple;
+
+typedef void (*Predictor)(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
+                          const uint8_t *above, const uint8_t *left, int mode);
+
+// Note:
+//  Test parameter list:
+//  Reference predictor, optimized predictor, prediction mode, tx size
+//
+typedef tuple<Predictor, Predictor, int> PredFuncMode;
+typedef tuple<PredFuncMode, TX_SIZE> PredParams;
+
+const int MaxTxSize = 32;
+
+const int MaxTestNum = 100;
+
+class AV1FilterIntraPredTest : public ::testing::TestWithParam<PredParams> {
+ public:
+  ~AV1FilterIntraPredTest() override = default;
+  void SetUp() override {
+    PredFuncMode funcMode = GET_PARAM(0);
+    predFuncRef_ = std::get<0>(funcMode);
+    predFunc_ = std::get<1>(funcMode);
+    mode_ = std::get<2>(funcMode);
+    txSize_ = GET_PARAM(1);
+
+    alloc_ = new uint8_t[2 * MaxTxSize + 1];
+    predRef_ = new uint8_t[MaxTxSize * MaxTxSize];
+    pred_ = new uint8_t[MaxTxSize * MaxTxSize];
+    ASSERT_NE(alloc_, nullptr);
+    ASSERT_NE(predRef_, nullptr);
+    ASSERT_NE(pred_, nullptr);
+  }
+
+  void TearDown() override {
+    delete[] alloc_;
+    delete[] predRef_;
+    delete[] pred_;
+  }
+
+ protected:
+  void RunTest() const {
+    int tstIndex = 0;
+    int stride = tx_size_wide[txSize_];
+    uint8_t *left = alloc_;
+    uint8_t *above = alloc_ + MaxTxSize;
+    while (tstIndex < MaxTestNum) {
+      PrepareBuffer();
+      predFuncRef_(predRef_, stride, txSize_, &above[1], left, mode_);
+      API_REGISTER_STATE_CHECK(
+          predFunc_(pred_, stride, txSize_, &above[1], left, mode_));
+      DiffPred(tstIndex);
+      tstIndex += 1;
+    }
+  }
+  void RunSpeedTest() const {
+    int stride = tx_size_wide[txSize_];
+    uint8_t *left = alloc_;
+    uint8_t *above = alloc_ + MaxTxSize;
+    const int numIter = 5000;
+
+    PrepareBuffer();
+    aom_usec_timer ref_timer;
+    aom_usec_timer_start(&ref_timer);
+    for (int i = 0; i < numIter; i++) {
+      predFuncRef_(predRef_, stride, txSize_, &above[1], left, mode_);
+    }
+    aom_usec_timer_mark(&ref_timer);
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < numIter; i++) {
+      predFunc_(pred_, stride, txSize_, &above[1], left, mode_);
+    }
+    aom_usec_timer_mark(&timer);
+
+    const int ref_sum_time =
+        static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
+    const int sum_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+
+    printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \t mode =  %d \n",
+           ref_sum_time, sum_time,
+           (static_cast<float>(ref_sum_time) / static_cast<float>(sum_time)),
+           static_cast<int>(mode_));
+
+    DiffPred(0);
+  }
+
+ private:
+  void PrepareBuffer() const {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    int i = 0;
+    while (i < (2 * MaxTxSize + 1)) {
+      alloc_[i] = rnd.Rand8();
+      i++;
+    }
+  }
+
+  void DiffPred(int testNum) const {
+    int i = 0;
+    while (i < tx_size_wide[txSize_] * tx_size_high[txSize_]) {
+      EXPECT_EQ(predRef_[i], pred_[i]) << "Error at position: " << i << " "
+                                       << "Tx size: " << tx_size_wide[txSize_]
+                                       << "x" << tx_size_high[txSize_] << " "
+                                       << "Test number: " << testNum;
+      i++;
+    }
+  }
+
+  Predictor predFunc_;
+  Predictor predFuncRef_;
+  int mode_;
+  TX_SIZE txSize_;
+  uint8_t *alloc_;
+  uint8_t *pred_;
+  uint8_t *predRef_;
+};
+
+TEST_P(AV1FilterIntraPredTest, BitExactCheck) { RunTest(); }
+
+TEST_P(AV1FilterIntraPredTest, DISABLED_Speed) { RunSpeedTest(); }
+
+using ::testing::make_tuple;
+#if HAVE_SSE4_1
+const PredFuncMode kPredFuncMdArray[] = {
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
+             FILTER_DC_PRED),
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
+             FILTER_V_PRED),
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
+             FILTER_H_PRED),
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
+             FILTER_D157_PRED),
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
+             FILTER_PAETH_PRED),
+};
+
+const TX_SIZE kTxSize[] = { TX_4X4,  TX_8X8,  TX_16X16, TX_32X32, TX_4X8,
+                            TX_8X4,  TX_8X16, TX_16X8,  TX_16X32, TX_32X16,
+                            TX_4X16, TX_16X4, TX_8X32,  TX_32X8 };
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AV1FilterIntraPredTest,
+    ::testing::Combine(::testing::ValuesIn(kPredFuncMdArray),
+                       ::testing::ValuesIn(kTxSize)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+const PredFuncMode kPredFuncMdArrayNEON[] = {
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_neon,
+             FILTER_DC_PRED),
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_neon,
+             FILTER_V_PRED),
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_neon,
+             FILTER_H_PRED),
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_neon,
+             FILTER_D157_PRED),
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_neon,
+             FILTER_PAETH_PRED),
+};
+
+const TX_SIZE kTxSizeNEON[] = { TX_4X4,  TX_8X8,  TX_16X16, TX_32X32, TX_4X8,
+                                TX_8X4,  TX_8X16, TX_16X8,  TX_16X32, TX_32X16,
+                                TX_4X16, TX_16X4, TX_8X32,  TX_32X8 };
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1FilterIntraPredTest,
+    ::testing::Combine(::testing::ValuesIn(kPredFuncMdArrayNEON),
+                       ::testing::ValuesIn(kTxSizeNEON)));
+#endif  // HAVE_NEON
+
+}  // namespace
diff --git a/third_party/aom/test/firstpass_test.cc b/third_party/aom/test/firstpass_test.cc
new file mode 100644
index 0000000000..1f4f3b7853
--- /dev/null
+++ b/third_party/aom/test/firstpass_test.cc
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stddef.h>
+
+#include "av1/common/common.h"
+#include "av1/encoder/firstpass.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+TEST(FirstpassTest, FirstpassInfoInitWithExtBuf) {
+  FIRSTPASS_INFO firstpass_info;
+  FIRSTPASS_STATS ext_stats_buf[10];
+  const int ref_stats_size = 10;
+  for (int i = 0; i < ref_stats_size; ++i) {
+    av1_zero(ext_stats_buf[i]);
+    ext_stats_buf[i].frame = i;
+  }
+  aom_codec_err_t ret =
+      av1_firstpass_info_init(&firstpass_info, ext_stats_buf, 10);
+  EXPECT_EQ(firstpass_info.stats_count, ref_stats_size);
+  EXPECT_EQ(firstpass_info.future_stats_count + firstpass_info.past_stats_count,
+            firstpass_info.stats_count);
+  EXPECT_EQ(firstpass_info.cur_index, 0);
+  EXPECT_EQ(ret, AOM_CODEC_OK);
+}
+
+TEST(FirstpassTest, FirstpassInfoInitWithStaticBuf) {
+  FIRSTPASS_INFO firstpass_info;
+  aom_codec_err_t ret = av1_firstpass_info_init(&firstpass_info, nullptr, 0);
+  EXPECT_EQ(firstpass_info.stats_count, 0);
+  EXPECT_EQ(firstpass_info.cur_index, 0);
+  EXPECT_EQ(ret, AOM_CODEC_OK);
+}
+
+TEST(FirstpassTest, FirstpassInfoPushPop) {
+  FIRSTPASS_INFO firstpass_info;
+  av1_firstpass_info_init(&firstpass_info, nullptr, 0);
+  EXPECT_EQ(firstpass_info.stats_buf_size, FIRSTPASS_INFO_STATIC_BUF_SIZE);
+  for (int i = 0; i < FIRSTPASS_INFO_STATIC_BUF_SIZE; ++i) {
+    FIRSTPASS_STATS stats;
+    av1_zero(stats);
+    stats.frame = i;
+    aom_codec_err_t ret = av1_firstpass_info_push(&firstpass_info, &stats);
+    EXPECT_EQ(ret, AOM_CODEC_OK);
+  }
+  EXPECT_EQ(firstpass_info.stats_count, FIRSTPASS_INFO_STATIC_BUF_SIZE);
+  const int pop_count = FIRSTPASS_INFO_STATIC_BUF_SIZE / 2;
+  for (int i = 0; i < pop_count; ++i) {
+    const FIRSTPASS_STATS *stats = av1_firstpass_info_peek(&firstpass_info, 0);
+    aom_codec_err_t ret =
+        av1_firstpass_info_move_cur_index_and_pop(&firstpass_info);
+    EXPECT_NE(stats, nullptr);
+    EXPECT_EQ(stats->frame, i);
+    EXPECT_EQ(ret, AOM_CODEC_OK);
+  }
+  EXPECT_EQ(firstpass_info.stats_count,
+            FIRSTPASS_INFO_STATIC_BUF_SIZE - pop_count);
+
+  const int push_count = FIRSTPASS_INFO_STATIC_BUF_SIZE / 2;
+  for (int i = 0; i < push_count; ++i) {
+    FIRSTPASS_STATS stats;
+    av1_zero(stats);
+    aom_codec_err_t ret = av1_firstpass_info_push(&firstpass_info, &stats);
+    EXPECT_EQ(ret, AOM_CODEC_OK);
+  }
+  EXPECT_EQ(firstpass_info.stats_count, FIRSTPASS_INFO_STATIC_BUF_SIZE);
+
+  EXPECT_EQ(firstpass_info.stats_count, firstpass_info.stats_buf_size);
+  {
+    // Push the stats when the queue is full.
+    FIRSTPASS_STATS stats;
+    av1_zero(stats);
+    aom_codec_err_t ret = av1_firstpass_info_push(&firstpass_info, &stats);
+    EXPECT_EQ(ret, AOM_CODEC_ERROR);
+  }
+}
+
+TEST(FirstpassTest, FirstpassInfoTotalStats) {
+  FIRSTPASS_INFO firstpass_info;
+  av1_firstpass_info_init(&firstpass_info, nullptr, 0);
+  EXPECT_EQ(firstpass_info.total_stats.frame, 0);
+  for (int i = 0; i < 10; ++i) {
+    FIRSTPASS_STATS stats;
+    av1_zero(stats);
+    stats.count = 1;
+    av1_firstpass_info_push(&firstpass_info, &stats);
+  }
+  EXPECT_EQ(firstpass_info.total_stats.count, 10);
+}
+
+TEST(FirstpassTest, FirstpassInfoMoveCurr) {
+  FIRSTPASS_INFO firstpass_info;
+  av1_firstpass_info_init(&firstpass_info, nullptr, 0);
+  int frame_cnt = 0;
+  EXPECT_EQ(firstpass_info.stats_buf_size, FIRSTPASS_INFO_STATIC_BUF_SIZE);
+  for (int i = 0; i < FIRSTPASS_INFO_STATIC_BUF_SIZE; ++i) {
+    FIRSTPASS_STATS stats;
+    av1_zero(stats);
+    stats.frame = frame_cnt;
+    ++frame_cnt;
+    aom_codec_err_t ret = av1_firstpass_info_push(&firstpass_info, &stats);
+    EXPECT_EQ(ret, AOM_CODEC_OK);
+  }
+  EXPECT_EQ(firstpass_info.cur_index, firstpass_info.start_index);
+  {
+    aom_codec_err_t ret = av1_firstpass_info_pop(&firstpass_info);
+    // We cannot pop when cur_index == start_index
+    EXPECT_EQ(ret, AOM_CODEC_ERROR);
+  }
+  int ref_frame_cnt = 0;
+  const int move_count = FIRSTPASS_INFO_STATIC_BUF_SIZE * 2 / 3;
+  for (int i = 0; i < move_count; ++i) {
+    const FIRSTPASS_STATS *this_stats =
+        av1_firstpass_info_peek(&firstpass_info, 0);
+    EXPECT_EQ(this_stats->frame, ref_frame_cnt);
+    ++ref_frame_cnt;
+    av1_firstpass_info_move_cur_index(&firstpass_info);
+  }
+  EXPECT_EQ(firstpass_info.future_stats_count,
+            FIRSTPASS_INFO_STATIC_BUF_SIZE - move_count);
+  EXPECT_EQ(firstpass_info.past_stats_count, move_count);
+  EXPECT_EQ(firstpass_info.stats_count, FIRSTPASS_INFO_STATIC_BUF_SIZE);
+
+  const int test_count = FIRSTPASS_INFO_STATIC_BUF_SIZE / 2;
+  for (int i = 0; i < test_count; ++i) {
+    aom_codec_err_t ret = av1_firstpass_info_pop(&firstpass_info);
+    EXPECT_EQ(ret, AOM_CODEC_OK);
+  }
+
+  // Pop #test_count stats
+  for (int i = 0; i < test_count; ++i) {
+    FIRSTPASS_STATS stats;
+    av1_zero(stats);
+    stats.frame = frame_cnt;
+    ++frame_cnt;
+    aom_codec_err_t ret = av1_firstpass_info_push(&firstpass_info, &stats);
+    EXPECT_EQ(ret, AOM_CODEC_OK);
+  }
+
+  // peek and move #test_count stats
+  for (int i = 0; i < test_count; ++i) {
+    const FIRSTPASS_STATS *this_stats =
+        av1_firstpass_info_peek(&firstpass_info, 0);
+    EXPECT_EQ(this_stats->frame, ref_frame_cnt);
+    ++ref_frame_cnt;
+    av1_firstpass_info_move_cur_index(&firstpass_info);
+  }
+
+  // pop #test_count stats
+  for (int i = 0; i < test_count; ++i) {
+    aom_codec_err_t ret = av1_firstpass_info_pop(&firstpass_info);
+    EXPECT_EQ(ret, AOM_CODEC_OK);
+  }
+}
+
+}  // namespace
diff --git a/third_party/aom/test/force_key_frame_test.cc b/third_party/aom/test/force_key_frame_test.cc
new file mode 100644
index 0000000000..2b85d26530
--- /dev/null
+++ b/third_party/aom/test/force_key_frame_test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+// Tests for https://crbug.com/aomedia/3327.
+//
+// In good-quality mode, set cfg.g_lag_in_frames to 1 or 0 and encode two
+// frames in one-pass mode. Pass AOM_EFLAG_FORCE_KF to the second
+// aom_codec_encode() call. Both frames should be encoded as key frames.
+
+#include <memory>
+
+#include "aom/aomcx.h"
+#include "aom/aom_encoder.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+void TestOnePassMode(unsigned int lag_in_frames) {
+  // A buffer of gray samples of size 128x128, YUV 4:2:0.
+  constexpr size_t kImageDataSize = 128 * 128 + 2 * 64 * 64;
+  std::unique_ptr<unsigned char[]> img_data(new unsigned char[kImageDataSize]);
+  ASSERT_NE(img_data, nullptr);
+  memset(img_data.get(), 128, kImageDataSize);
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  ASSERT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY));
+  cfg.g_w = 128;
+  cfg.g_h = 128;
+  cfg.g_pass = AOM_RC_ONE_PASS;
+  cfg.g_lag_in_frames = lag_in_frames;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+
+  aom_image_t img;
+  EXPECT_EQ(&img,
+            aom_img_wrap(&img, AOM_IMG_FMT_I420, 128, 128, 1, img_data.get()));
+
+  aom_codec_iter_t iter;
+  const aom_codec_cx_pkt_t *pkt;
+  int frame_count = 0;
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+
+  iter = nullptr;
+  while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) {
+    ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+    EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u)
+        << "frame " << frame_count;
+    frame_count++;
+  }
+
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_encode(&enc, &img, 1, 1, AOM_EFLAG_FORCE_KF));
+
+  iter = nullptr;
+  while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) {
+    ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+    EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u)
+        << "frame " << frame_count;
+    frame_count++;
+  }
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0));
+
+  iter = nullptr;
+  while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) {
+    ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+    EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u)
+        << "frame " << frame_count;
+    frame_count++;
+  }
+
+  EXPECT_EQ(frame_count, 2);
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+TEST(ForceKeyFrameTest, OnePassModeLag0) { TestOnePassMode(0); }
+
+TEST(ForceKeyFrameTest, OnePassModeLag1) { TestOnePassMode(1); }
+
+TEST(ForceKeyFrameTest, OnePassModeLag2) { TestOnePassMode(2); }
+
+}  // namespace
diff --git a/third_party/aom/test/forced_max_frame_width_height_test.cc b/third_party/aom/test/forced_max_frame_width_height_test.cc
new file mode 100644
index 0000000000..3347713c5b
--- /dev/null
+++ b/third_party/aom/test/forced_max_frame_width_height_test.cc
@@ -0,0 +1,280 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+// Tests for https://crbug.com/aomedia/3326.
+//
+// Set cfg.g_forced_max_frame_width and cfg.g_forced_max_frame_height and
+// encode two frames of increasing sizes. The second aom_codec_encode() should
+// not crash or have memory errors.
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "aom/aomcx.h"
+#include "aom/aom_encoder.h"
+#include "config/aom_config.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+// cfg.g_lag_in_frames must be set to 0 or 1 to allow the frame size to change,
+// as required by the following check in encoder_set_config() in
+// av1/av1_cx_iface.c:
+//
+//   if (cfg->g_w != ctx->cfg.g_w || cfg->g_h != ctx->cfg.g_h) {
+//     if (cfg->g_lag_in_frames > 1 || cfg->g_pass != AOM_RC_ONE_PASS)
+//       ERROR("Cannot change width or height after initialization");
+//     ...
+//   }
+
+void RunTest(unsigned int usage, unsigned int lag_in_frames,
+             const char *tune_metric) {
+  // A buffer of gray samples. Large enough for 128x128 and 256x256, YUV 4:2:0.
+  constexpr size_t kImageDataSize = 256 * 256 + 2 * 128 * 128;
+  std::unique_ptr<unsigned char[]> img_data(new unsigned char[kImageDataSize]);
+  ASSERT_NE(img_data, nullptr);
+  memset(img_data.get(), 128, kImageDataSize);
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, usage));
+  cfg.g_w = 128;
+  cfg.g_h = 128;
+  cfg.g_forced_max_frame_width = 256;
+  cfg.g_forced_max_frame_height = 256;
+  cfg.g_lag_in_frames = lag_in_frames;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_set_option(&enc, "tune", tune_metric));
+
+  aom_image_t img;
+  EXPECT_EQ(&img,
+            aom_img_wrap(&img, AOM_IMG_FMT_I420, 128, 128, 1, img_data.get()));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+
+  cfg.g_w = 256;
+  cfg.g_h = 256;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg));
+
+  EXPECT_EQ(&img,
+            aom_img_wrap(&img, AOM_IMG_FMT_I420, 256, 256, 1, img_data.get()));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+#if !CONFIG_REALTIME_ONLY
+
+TEST(EncodeForcedMaxFrameWidthHeight, GoodQualityLag0TunePSNR) {
+  RunTest(AOM_USAGE_GOOD_QUALITY, /*lag_in_frames=*/0, "psnr");
+}
+
+TEST(EncodeForcedMaxFrameWidthHeight, GoodQualityLag0TuneSSIM) {
+  RunTest(AOM_USAGE_GOOD_QUALITY, /*lag_in_frames=*/0, "ssim");
+}
+
+TEST(EncodeForcedMaxFrameWidthHeight, GoodQualityLag1TunePSNR) {
+  RunTest(AOM_USAGE_GOOD_QUALITY, /*lag_in_frames=*/1, "psnr");
+}
+
+TEST(EncodeForcedMaxFrameWidthHeight, GoodQualityLag1TuneSSIM) {
+  RunTest(AOM_USAGE_GOOD_QUALITY, /*lag_in_frames=*/1, "ssim");
+}
+
+void FillImageGradient(aom_image_t *image, int bit_depth) {
+  assert(image->range == AOM_CR_FULL_RANGE);
+  for (int plane = 0; plane < 3; plane++) {
+    const int plane_width = aom_img_plane_width(image, plane);
+    const int plane_height = aom_img_plane_height(image, plane);
+    unsigned char *row = image->planes[plane];
+    const int stride = image->stride[plane];
+    for (int y = 0; y < plane_height; ++y) {
+      for (int x = 0; x < plane_width; ++x) {
+        const int value = (x + y) * ((1 << bit_depth) - 1) /
+                          std::max(1, plane_width + plane_height - 2);
+        assert(value >= 0 && value <= (1 << bit_depth) - 1);
+        if (bit_depth > 8) {
+          reinterpret_cast<uint16_t *>(row)[x] = static_cast<uint16_t>(value);
+        } else {
+          row[x] = static_cast<unsigned char>(value);
+        }
+      }
+      row += stride;
+    }
+  }
+}
+
+TEST(EncodeForcedMaxFrameWidthHeight, DimensionDecreasing) {
+  constexpr int kWidth = 128;
+  constexpr int kHeight = 128;
+  constexpr size_t kBufferSize = 3 * kWidth * kHeight;
+  std::vector<unsigned char> buffer(kBufferSize);
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1,
+                               buffer.data()));
+  img.cp = AOM_CICP_CP_UNSPECIFIED;
+  img.tc = AOM_CICP_TC_UNSPECIFIED;
+  img.mc = AOM_CICP_MC_UNSPECIFIED;
+  img.range = AOM_CR_FULL_RANGE;
+  FillImageGradient(&img, 8);
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY));
+  cfg.rc_end_usage = AOM_Q;
+  cfg.g_profile = 0;
+  cfg.g_bit_depth = AOM_BITS_8;
+  cfg.g_input_bit_depth = 8;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  cfg.g_forced_max_frame_width = kWidth;
+  cfg.g_forced_max_frame_height = kHeight;
+  cfg.g_lag_in_frames = 1;
+  cfg.rc_min_quantizer = 20;
+  cfg.rc_max_quantizer = 40;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 30));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 6));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM));
+
+  // First frame
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+  aom_codec_iter_t iter = nullptr;
+  const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0x1f0011.
+  EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Second frame
+  constexpr int kWidthSmall = 64;
+  constexpr int kHeightSmall = 64;
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidthSmall,
+                               kHeightSmall, 1, buffer.data()));
+  img.cp = AOM_CICP_CP_UNSPECIFIED;
+  img.tc = AOM_CICP_TC_UNSPECIFIED;
+  img.mc = AOM_CICP_MC_UNSPECIFIED;
+  img.range = AOM_CR_FULL_RANGE;
+  FillImageGradient(&img, 8);
+  cfg.g_w = kWidthSmall;
+  cfg.g_h = kHeightSmall;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Flush encoder
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0));
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+#endif  // !CONFIG_REALTIME_ONLY
+
+TEST(EncodeForcedMaxFrameWidthHeight, RealtimeLag0TunePSNR) {
+  RunTest(AOM_USAGE_REALTIME, /*lag_in_frames=*/0, "psnr");
+}
+
+TEST(EncodeForcedMaxFrameWidthHeight, RealtimeLag0TuneSSIM) {
+  RunTest(AOM_USAGE_REALTIME, /*lag_in_frames=*/0, "ssim");
+}
+
+TEST(EncodeForcedMaxFrameWidthHeight, RealtimeLag1TunePSNR) {
+  RunTest(AOM_USAGE_REALTIME, /*lag_in_frames=*/1, "psnr");
+}
+
+TEST(EncodeForcedMaxFrameWidthHeight, RealtimeLag1TuneSSIM) {
+  RunTest(AOM_USAGE_REALTIME, /*lag_in_frames=*/1, "ssim");
+}
+
+TEST(EncodeForcedMaxFrameWidthHeight, MaxFrameSizeTooBig) {
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME));
+  cfg.g_w = 256;
+  cfg.g_h = 256;
+  cfg.g_forced_max_frame_width = 131072;
+  cfg.g_forced_max_frame_height = 131072;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0));
+}
+
+TEST(EncodeForcedMaxFrameWidthHeight, FirstFrameTooBig) {
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME));
+  cfg.g_w = 258;
+  cfg.g_h = 256;
+  cfg.g_forced_max_frame_width = 256;
+  cfg.g_forced_max_frame_height = 256;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  cfg.g_w = 256;
+  cfg.g_h = 258;
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  cfg.g_w = 256;
+  cfg.g_h = 256;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+TEST(EncodeForcedMaxFrameWidthHeight, SecondFrameTooBig) {
+  // A buffer of gray samples. Large enough for 128x128 and 256x256, YUV 4:2:0.
+  constexpr size_t kImageDataSize = 256 * 256 + 2 * 128 * 128;
+  std::unique_ptr<unsigned char[]> img_data(new unsigned char[kImageDataSize]);
+  ASSERT_NE(img_data, nullptr);
+  memset(img_data.get(), 128, kImageDataSize);
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME));
+  cfg.g_w = 128;
+  cfg.g_h = 128;
+  cfg.g_forced_max_frame_width = 255;
+  cfg.g_forced_max_frame_height = 256;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+
+  aom_image_t img;
+  EXPECT_EQ(&img,
+            aom_img_wrap(&img, AOM_IMG_FMT_I420, 128, 128, 1, img_data.get()));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+
+  cfg.g_w = 256;
+  cfg.g_h = 256;
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_config_set(&enc, &cfg));
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+}  // namespace
diff --git a/third_party/aom/test/frame_parallel_enc_test.cc b/third_party/aom/test/frame_parallel_enc_test.cc
new file mode 100644
index 0000000000..86d5ddb7d4
--- /dev/null
+++ b/third_party/aom/test/frame_parallel_enc_test.cc
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <string>
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+
+#if CONFIG_FPMT_TEST && !CONFIG_REALTIME_ONLY
+class AVxFrameParallelThreadEncodeTest
+    : public ::libaom_test::CodecTestWith3Params<int, int, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  AVxFrameParallelThreadEncodeTest()
+      : EncoderTest(GET_PARAM(0)), encoder_initialized_(false),
+        set_cpu_used_(GET_PARAM(1)), tile_cols_(GET_PARAM(2)),
+        tile_rows_(GET_PARAM(3)) {
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.w = 1280;
+    cfg.h = 720;
+    cfg.allow_lowbitdepth = 1;
+    decoder_ = codec_->CreateDecoder(cfg, 0);
+  }
+  ~AVxFrameParallelThreadEncodeTest() override { delete decoder_; }
+
+  void SetUp() override {
+    InitializeConfig(::libaom_test::kTwoPassGood);
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.g_lag_in_frames = 35;
+    cfg_.rc_2pass_vbr_minsection_pct = 5;
+    cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.g_threads = 16;
+  }
+
+  void BeginPassHook(unsigned int /*pass*/) override {
+    encoder_initialized_ = false;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource * /*video*/,
+                          ::libaom_test::Encoder *encoder) override {
+    if (encoder_initialized_) return;
+    SetTileSize(encoder);
+    encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+    encoder->Control(AV1E_SET_FP_MT, 1);
+    encoder->Control(AV1E_SET_FP_MT_UNIT_TEST, enable_actual_parallel_encode_);
+    encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+    encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+    encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+    encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 0);
+
+    encoder_initialized_ = true;
+  }
+
+  virtual void SetTileSize(libaom_test::Encoder *encoder) {
+    encoder->Control(AV1E_SET_TILE_COLUMNS, tile_cols_);
+    encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_);
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    size_enc_.push_back(pkt->data.frame.sz);
+
+    ::libaom_test::MD5 md5_enc;
+    md5_enc.Add(reinterpret_cast<uint8_t *>(pkt->data.frame.buf),
+                pkt->data.frame.sz);
+    md5_enc_.push_back(md5_enc.Get());
+
+    const aom_codec_err_t res = decoder_->DecodeFrame(
+        reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz);
+    if (res != AOM_CODEC_OK) {
+      abort_ = true;
+      ASSERT_EQ(AOM_CODEC_OK, res);
+    }
+    const aom_image_t *img = decoder_->GetDxData().Next();
+
+    if (img) {
+      ::libaom_test::MD5 md5_res;
+      md5_res.Add(img);
+      md5_dec_.push_back(md5_res.Get());
+    }
+  }
+
+  void DoTest(::libaom_test::VideoSource *input_video) {
+    /* This is the actual parallel encode of frames using multiple cpis.
+     * The parallel frames are independently encoded.
+     * Threads are distributed among the parallel frames whereas non-parallel
+     * frames use all the threads. Example: for 8 threads, in case of 4 frames
+     * in a parallel encode set, each frame gets 2 threads. In case of 3 frames
+     * in a parallel encode set, threads are distributed as 2, 3 ,3.
+     */
+    enable_actual_parallel_encode_ = 1;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(input_video));
+    std::vector<size_t> enc_stream_fpmt_size;
+    std::vector<std::string> enc_stream_fpmt;
+    std::vector<std::string> dec_stream_fpmt;
+    enc_stream_fpmt_size = size_enc_;
+    enc_stream_fpmt = md5_enc_;
+    dec_stream_fpmt = md5_dec_;
+    size_enc_.clear();
+    md5_enc_.clear();
+    md5_dec_.clear();
+
+    /* This is the simulation of parallel encode of frames using single cpi.
+     * In simulation, it should be ensured to have no dependency across frames
+     * (similar to parallel encode).
+     * Each frame uses all the threads configured.
+     */
+    enable_actual_parallel_encode_ = 0;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(input_video));
+    std::vector<size_t> enc_stream_sim_size;
+    std::vector<std::string> enc_stream_sim;
+    std::vector<std::string> dec_stream_sim;
+    enc_stream_sim_size = size_enc_;
+    enc_stream_sim = md5_enc_;
+    dec_stream_sim = md5_dec_;
+    size_enc_.clear();
+    md5_enc_.clear();
+    md5_dec_.clear();
+
+    // Check that the vectors are equal.
+    ASSERT_EQ(enc_stream_sim_size, enc_stream_fpmt_size);
+    ASSERT_EQ(enc_stream_sim, enc_stream_fpmt);
+    ASSERT_EQ(dec_stream_sim, dec_stream_fpmt);
+  }
+
+  bool encoder_initialized_;
+  int set_cpu_used_;
+  int tile_cols_;
+  int tile_rows_;
+  int enable_actual_parallel_encode_;
+  ::libaom_test::Decoder *decoder_;
+  std::vector<size_t> size_enc_;
+  std::vector<std::string> md5_enc_;
+  std::vector<std::string> md5_dec_;
+};
+
+class AVxFrameParallelThreadEncodeHDResTestLarge
+    : public AVxFrameParallelThreadEncodeTest {};
+
+TEST_P(AVxFrameParallelThreadEncodeHDResTestLarge,
+       FrameParallelThreadEncodeTest) {
+  ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+  cfg_.rc_target_bitrate = 500;
+  DoTest(&video);
+}
+
+class AVxFrameParallelThreadEncodeLowResTestLarge
+    : public AVxFrameParallelThreadEncodeTest {};
+
+TEST_P(AVxFrameParallelThreadEncodeLowResTestLarge,
+       FrameParallelThreadEncodeTest) {
+  ::libaom_test::YUVVideoSource video("hantro_collage_w352h288.yuv",
+                                      AOM_IMG_FMT_I420, 352, 288, 30, 1, 0, 60);
+  cfg_.rc_target_bitrate = 200;
+  DoTest(&video);
+}
+
+class AVxFrameParallelThreadEncodeLowResTest
+    : public AVxFrameParallelThreadEncodeTest {};
+
+TEST_P(AVxFrameParallelThreadEncodeLowResTest, FrameParallelThreadEncodeTest) {
+  ::libaom_test::YUVVideoSource video("hantro_collage_w352h288.yuv",
+                                      AOM_IMG_FMT_I420, 352, 288, 30, 1, 0, 60);
+  cfg_.rc_target_bitrate = 200;
+  DoTest(&video);
+}
+
+AV1_INSTANTIATE_TEST_SUITE(AVxFrameParallelThreadEncodeHDResTestLarge,
+                           ::testing::Values(2, 3, 4, 5, 6),
+                           ::testing::Values(0, 1, 2), ::testing::Values(0, 1));
+
+AV1_INSTANTIATE_TEST_SUITE(AVxFrameParallelThreadEncodeLowResTestLarge,
+                           ::testing::Values(2, 3), ::testing::Values(0, 1, 2),
+                           ::testing::Values(0, 1));
+
+AV1_INSTANTIATE_TEST_SUITE(AVxFrameParallelThreadEncodeLowResTest,
+                           ::testing::Values(4, 5, 6), ::testing::Values(1),
+                           ::testing::Values(0));
+#endif  // CONFIG_FPMT_TEST && !CONFIG_REALTIME_ONLY
+
+}  // namespace
diff --git a/third_party/aom/test/frame_size_tests.cc b/third_party/aom/test/frame_size_tests.cc
new file mode 100644
index 0000000000..ea8cf47ab8
--- /dev/null
+++ b/third_party/aom/test/frame_size_tests.cc
@@ -0,0 +1,388 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <array>
+#include <memory>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/video_source.h"
+#include "test/util.h"
+
+namespace {
+
+class AV1FrameSizeTests : public ::testing::Test,
+                          public ::libaom_test::EncoderTest {
+ protected:
+  AV1FrameSizeTests()
+      : EncoderTest(&::libaom_test::kAV1), expected_res_(AOM_CODEC_OK) {}
+  ~AV1FrameSizeTests() override = default;
+
+  void SetUp() override { InitializeConfig(::libaom_test::kRealTime); }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(expected_res_, res_dec) << decoder->DecodeError();
+    return !::testing::Test::HasFailure();
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 7);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+    }
+  }
+
+  int expected_res_;
+};
+
+#if CONFIG_SIZE_LIMIT
+// TODO(Casey.Smalley@arm.com) fails due to newer bounds checks that get caught
+// before the assert below added in ebc2714d71a834fc32a19eef0a81f51fbc47db01
+TEST_F(AV1FrameSizeTests, DISABLED_TestInvalidSizes) {
+  ::libaom_test::RandomVideoSource video;
+
+  video.SetSize(DECODE_WIDTH_LIMIT + 16, DECODE_HEIGHT_LIMIT + 16);
+  video.set_limit(2);
+  expected_res_ = AOM_CODEC_CORRUPT_FRAME;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+// TODO(Casey.Smalley@arm.com) similar to the above test, needs to be
+// updated for the new rejection case
+TEST_F(AV1FrameSizeTests, DISABLED_LargeValidSizes) {
+  ::libaom_test::RandomVideoSource video;
+
+  video.SetSize(DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT);
+  video.set_limit(2);
+  expected_res_ = AOM_CODEC_OK;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+#endif
+
+TEST_F(AV1FrameSizeTests, OneByOneVideo) {
+  ::libaom_test::RandomVideoSource video;
+
+  video.SetSize(1, 1);
+  video.set_limit(2);
+  expected_res_ = AOM_CODEC_OK;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+// Parameters: AOM_USAGE_*, aom_rc_mode, cpu-used.
+class AV1ResolutionChange
+    : public testing::TestWithParam<std::tuple<int, aom_rc_mode, int>> {
+ public:
+  AV1ResolutionChange()
+      : usage_(std::get<0>(GetParam())), rc_mode_(std::get<1>(GetParam())),
+        cpu_used_(std::get<2>(GetParam())) {}
+  AV1ResolutionChange(const AV1ResolutionChange &) = delete;
+  AV1ResolutionChange &operator=(const AV1ResolutionChange &) = delete;
+  ~AV1ResolutionChange() override = default;
+
+ protected:
+  int usage_;
+  aom_rc_mode rc_mode_;
+  int cpu_used_;
+};
+
+TEST_P(AV1ResolutionChange, InvalidRefSize) {
+  struct FrameSize {
+    unsigned int width;
+    unsigned int height;
+  };
+  static constexpr std::array<FrameSize, 3> kFrameSizes = { {
+      { 1768, 200 },
+      { 50, 200 },
+      { 850, 200 },
+  } };
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage_), AOM_CODEC_OK);
+
+  // Resolution changes are only permitted with one pass encoding with no lag.
+  cfg.g_pass = AOM_RC_ONE_PASS;
+  cfg.g_lag_in_frames = 0;
+  cfg.rc_end_usage = rc_mode_;
+
+  aom_codec_ctx_t ctx;
+  EXPECT_EQ(aom_codec_enc_init(&ctx, iface, &cfg, 0), AOM_CODEC_OK);
+  std::unique_ptr<aom_codec_ctx_t, decltype(&aom_codec_destroy)> enc(
+      &ctx, &aom_codec_destroy);
+  EXPECT_EQ(aom_codec_control(enc.get(), AOME_SET_CPUUSED, cpu_used_),
+            AOM_CODEC_OK);
+
+  size_t frame_count = 0;
+  ::libaom_test::RandomVideoSource video;
+  video.Begin();
+  constexpr int kNumFramesPerResolution = 2;
+  for (const auto &frame_size : kFrameSizes) {
+    cfg.g_w = frame_size.width;
+    cfg.g_h = frame_size.height;
+    EXPECT_EQ(aom_codec_enc_config_set(enc.get(), &cfg), AOM_CODEC_OK);
+    video.SetSize(cfg.g_w, cfg.g_h);
+
+    aom_codec_iter_t iter;
+    const aom_codec_cx_pkt_t *pkt;
+
+    for (int i = 0; i < kNumFramesPerResolution; ++i) {
+      video.Next();  // SetSize() does not call FillFrame().
+      EXPECT_EQ(aom_codec_encode(enc.get(), video.img(), video.pts(),
+                                 video.duration(), /*flags=*/0),
+                AOM_CODEC_OK);
+
+      iter = nullptr;
+      while ((pkt = aom_codec_get_cx_data(enc.get(), &iter)) != nullptr) {
+        ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+        // The frame following a resolution change should be a keyframe as the
+        // change is too extreme to allow previous references to be used.
+        if (i == 0 || usage_ == AOM_USAGE_ALL_INTRA) {
+          EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u)
+              << "frame " << frame_count;
+        }
+        frame_count++;
+      }
+    }
+  }
+
+  EXPECT_EQ(frame_count, kNumFramesPerResolution * kFrameSizes.size());
+}
+
+TEST_P(AV1ResolutionChange, RandomInput) {
+  struct FrameSize {
+    unsigned int width;
+    unsigned int height;
+  };
+  static constexpr std::array<FrameSize, 4> kFrameSizes = { {
+      { 50, 200 },
+      { 100, 200 },
+      { 100, 300 },
+      { 200, 400 },
+  } };
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage_), AOM_CODEC_OK);
+
+  // Resolution changes are only permitted with one pass encoding with no lag.
+  cfg.g_pass = AOM_RC_ONE_PASS;
+  cfg.g_lag_in_frames = 0;
+  cfg.rc_end_usage = rc_mode_;
+  // For random input source, if max frame sizes are not set, the first encoded
+  // frame size will be locked as the max frame size, and the encoder will
+  // identify it as unsupported bitstream.
+  unsigned int max_width = cfg.g_w;   // default frame width
+  unsigned int max_height = cfg.g_h;  // default frame height
+  for (const auto &frame_size : kFrameSizes) {
+    max_width = frame_size.width > max_width ? frame_size.width : max_width;
+    max_height =
+        frame_size.height > max_height ? frame_size.height : max_height;
+  }
+  cfg.g_forced_max_frame_width = max_width;
+  cfg.g_forced_max_frame_height = max_height;
+
+  aom_codec_ctx_t ctx;
+  EXPECT_EQ(aom_codec_enc_init(&ctx, iface, &cfg, 0), AOM_CODEC_OK);
+  std::unique_ptr<aom_codec_ctx_t, decltype(&aom_codec_destroy)> enc(
+      &ctx, &aom_codec_destroy);
+  EXPECT_EQ(aom_codec_control(enc.get(), AOME_SET_CPUUSED, cpu_used_),
+            AOM_CODEC_OK);
+
+  size_t frame_count = 0;
+  ::libaom_test::RandomVideoSource video;
+  video.Begin();
+  constexpr int kNumFramesPerResolution = 2;
+  for (const auto &frame_size : kFrameSizes) {
+    cfg.g_w = frame_size.width;
+    cfg.g_h = frame_size.height;
+    EXPECT_EQ(aom_codec_enc_config_set(enc.get(), &cfg), AOM_CODEC_OK);
+    video.SetSize(cfg.g_w, cfg.g_h);
+
+    aom_codec_iter_t iter;
+    const aom_codec_cx_pkt_t *pkt;
+
+    for (int i = 0; i < kNumFramesPerResolution; ++i) {
+      video.Next();  // SetSize() does not call FillFrame().
+      EXPECT_EQ(aom_codec_encode(enc.get(), video.img(), video.pts(),
+                                 video.duration(), /*flags=*/0),
+                AOM_CODEC_OK);
+
+      iter = nullptr;
+      while ((pkt = aom_codec_get_cx_data(enc.get(), &iter)) != nullptr) {
+        ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+        // The frame following a resolution change should be a keyframe as the
+        // change is too extreme to allow previous references to be used.
+        if (i == 0 || usage_ == AOM_USAGE_ALL_INTRA) {
+          EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u)
+              << "frame " << frame_count;
+        }
+        frame_count++;
+      }
+    }
+  }
+
+  EXPECT_EQ(frame_count, kNumFramesPerResolution * kFrameSizes.size());
+}
+
+TEST_P(AV1ResolutionChange, InvalidInputSize) {
+  struct FrameSize {
+    unsigned int width;
+    unsigned int height;
+  };
+  static constexpr std::array<FrameSize, 3> kFrameSizes = { {
+      { 1768, 0 },
+      { 0, 200 },
+      { 850, 200 },
+  } };
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage_), AOM_CODEC_OK);
+
+  // Resolution changes are only permitted with one pass encoding with no lag.
+  cfg.g_pass = AOM_RC_ONE_PASS;
+  cfg.g_lag_in_frames = 0;
+  cfg.rc_end_usage = rc_mode_;
+
+  aom_codec_ctx_t ctx;
+  EXPECT_EQ(aom_codec_enc_init(&ctx, iface, &cfg, 0), AOM_CODEC_OK);
+  std::unique_ptr<aom_codec_ctx_t, decltype(&aom_codec_destroy)> enc(
+      &ctx, &aom_codec_destroy);
+  EXPECT_EQ(aom_codec_control(enc.get(), AOME_SET_CPUUSED, cpu_used_),
+            AOM_CODEC_OK);
+
+  int frame_count = 0;
+  ::libaom_test::RandomVideoSource video;
+  video.Begin();
+  constexpr int kNumFramesPerResolution = 2;
+  for (const auto &frame_size : kFrameSizes) {
+    cfg.g_w = frame_size.width;
+    cfg.g_h = frame_size.height;
+    if (cfg.g_w < 1 || cfg.g_w > 65536 || cfg.g_h < 1 || cfg.g_h > 65536) {
+      EXPECT_EQ(aom_codec_enc_config_set(enc.get(), &cfg),
+                AOM_CODEC_INVALID_PARAM);
+      continue;
+    }
+
+    EXPECT_EQ(aom_codec_enc_config_set(enc.get(), &cfg), AOM_CODEC_OK);
+    video.SetSize(cfg.g_w, cfg.g_h);
+
+    aom_codec_iter_t iter;
+    const aom_codec_cx_pkt_t *pkt;
+
+    for (int i = 0; i < kNumFramesPerResolution; ++i) {
+      video.Next();  // SetSize() does not call FillFrame().
+      EXPECT_EQ(aom_codec_encode(enc.get(), video.img(), video.pts(),
+                                 video.duration(), /*flags=*/0),
+                AOM_CODEC_OK);
+
+      iter = nullptr;
+      while ((pkt = aom_codec_get_cx_data(enc.get(), &iter)) != nullptr) {
+        ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+        // The frame following a resolution change should be a keyframe as the
+        // change is too extreme to allow previous references to be used.
+        if (i == 0 || usage_ == AOM_USAGE_ALL_INTRA) {
+          EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u)
+              << "frame " << frame_count;
+        }
+        frame_count++;
+      }
+    }
+  }
+
+  EXPECT_EQ(frame_count, 2);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    Realtime, AV1ResolutionChange,
+    ::testing::Combine(::testing::Values(AOM_USAGE_REALTIME),
+                       ::testing::Values(AOM_VBR, AOM_CBR),
+                       ::testing::Range(6, 11)));
+
+#if !CONFIG_REALTIME_ONLY
+INSTANTIATE_TEST_SUITE_P(
+    GoodQuality, AV1ResolutionChange,
+    ::testing::Combine(::testing::Values(AOM_USAGE_GOOD_QUALITY),
+                       ::testing::Values(AOM_VBR, AOM_CBR, AOM_CQ, AOM_Q),
+                       ::testing::Range(2, 6)));
+INSTANTIATE_TEST_SUITE_P(
+    GoodQualityLarge, AV1ResolutionChange,
+    ::testing::Combine(::testing::Values(AOM_USAGE_GOOD_QUALITY),
+                       ::testing::Values(AOM_VBR, AOM_CBR, AOM_CQ, AOM_Q),
+                       ::testing::Range(0, 2)));
+INSTANTIATE_TEST_SUITE_P(
+    AllIntra, AV1ResolutionChange,
+    ::testing::Combine(::testing::Values(AOM_USAGE_ALL_INTRA),
+                       ::testing::Values(AOM_Q), ::testing::Range(6, 10)));
+
+typedef struct {
+  unsigned int width;
+  unsigned int height;
+} FrameSizeParam;
+
+const FrameSizeParam FrameSizeTestParams[] = { { 96, 96 }, { 176, 144 } };
+
+// This unit test is used to validate the allocated size of compressed data
+// (ctx->cx_data) buffer, by feeding pseudo random input to the encoder in
+// lossless encoding mode.
+//
+// If compressed data buffer is not large enough, the av1_get_compressed_data()
+// call in av1/av1_cx_iface.c will overflow the buffer.
+class AV1LosslessFrameSizeTests
+    : public ::libaom_test::CodecTestWith2Params<FrameSizeParam,
+                                                 ::libaom_test::TestMode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  AV1LosslessFrameSizeTests()
+      : EncoderTest(GET_PARAM(0)), frame_size_param_(GET_PARAM(1)),
+        encoding_mode_(GET_PARAM(2)) {}
+  ~AV1LosslessFrameSizeTests() override = default;
+
+  void SetUp() override { InitializeConfig(encoding_mode_); }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(expected_res_, res_dec) << decoder->DecodeError();
+    return !::testing::Test::HasFailure();
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 6);
+      encoder->Control(AV1E_SET_LOSSLESS, 1);
+    }
+  }
+
+  const FrameSizeParam frame_size_param_;
+  const ::libaom_test::TestMode encoding_mode_;
+  int expected_res_;
+};
+
+TEST_P(AV1LosslessFrameSizeTests, LosslessEncode) {
+  ::libaom_test::RandomVideoSource video;
+
+  video.SetSize(frame_size_param_.width, frame_size_param_.height);
+  video.set_limit(10);
+  expected_res_ = AOM_CODEC_OK;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+AV1_INSTANTIATE_TEST_SUITE(AV1LosslessFrameSizeTests,
+                           ::testing::ValuesIn(FrameSizeTestParams),
+                           testing::Values(::libaom_test::kAllIntra));
+#endif  // !CONFIG_REALTIME_ONLY
+
+}  // namespace
diff --git a/third_party/aom/test/function_equivalence_test.h b/third_party/aom/test/function_equivalence_test.h
new file mode 100644
index 0000000000..2268b9f2ad
--- /dev/null
+++ b/third_party/aom/test/function_equivalence_test.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_TEST_FUNCTION_EQUIVALENCE_TEST_H_
+#define AOM_TEST_FUNCTION_EQUIVALENCE_TEST_H_
+
+#include <ostream>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+
+namespace libaom_test {
+// Base class for tests that compare 2 implementations of the same function
+// for equivalence. The template parameter should be pointer to a function
+// that is being tested.
+//
+// The test takes a 3-parameters encapsulating struct 'FuncParam', containing:
+//   - Pointer to reference function
+//   - Pointer to tested function
+//   - Integer bit depth (default to 0).
+//
+// These values are then accessible in the tests as member of params_:
+// params_.ref_func, params_.tst_func, and params_.bit_depth.
+//
+
+template <typename T>
+struct FuncParam {
+  FuncParam(T ref = nullptr, T tst = nullptr, int depth = 0)
+      : ref_func(ref), tst_func(tst), bit_depth(depth) {}
+  T ref_func;
+  T tst_func;
+  int bit_depth;
+};
+
+template <typename T>
+std::ostream &operator<<(std::ostream &os, const FuncParam<T> &p) {
+  return os << "bit_depth:" << p.bit_depth
+            << " function:" << reinterpret_cast<const void *>(p.ref_func)
+            << " function:" << reinterpret_cast<const void *>(p.tst_func);
+}
+
+template <typename T>
+class FunctionEquivalenceTest : public ::testing::TestWithParam<FuncParam<T> > {
+ public:
+  FunctionEquivalenceTest() : rng_(ACMRandom::DeterministicSeed()) {}
+
+  ~FunctionEquivalenceTest() override = default;
+
+  void SetUp() override { params_ = this->GetParam(); }
+
+ protected:
+  ACMRandom rng_;
+  FuncParam<T> params_;
+};
+
+}  // namespace libaom_test
+#endif  // AOM_TEST_FUNCTION_EQUIVALENCE_TEST_H_
diff --git a/third_party/aom/test/fwht4x4_test.cc b/third_party/aom/test/fwht4x4_test.cc
new file mode 100644
index 0000000000..bb9e218f6f
--- /dev/null
+++ b/third_party/aom/test/fwht4x4_test.cc
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <tuple>
+
+#include "aom_dsp/aom_dsp_common.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/av1_rtcd.h"
+#include "config/aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+#include "av1/common/entropy.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/mem.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
+
+using libaom_test::FhtFunc;
+
+typedef std::tuple<FdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t, int, FdctFunc>
+    Dct4x4Param;
+
+void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
+                 TxfmParam * /*txfm_param*/) {
+  av1_fwht4x4_c(in, out, stride);
+}
+
+void iwht4x4_10_c(const tran_low_t *in, uint8_t *out, int stride) {
+  av1_highbd_iwht4x4_16_add_c(in, out, stride, 10);
+}
+
+void iwht4x4_12_c(const tran_low_t *in, uint8_t *out, int stride) {
+  av1_highbd_iwht4x4_16_add_c(in, out, stride, 12);
+}
+
+#if HAVE_SSE4_1
+
+void iwht4x4_10_sse4_1(const tran_low_t *in, uint8_t *out, int stride) {
+  av1_highbd_iwht4x4_16_add_sse4_1(in, out, stride, 10);
+}
+
+void iwht4x4_12_sse4_1(const tran_low_t *in, uint8_t *out, int stride) {
+  av1_highbd_iwht4x4_16_add_sse4_1(in, out, stride, 12);
+}
+
+#endif
+
+class Trans4x4WHT : public libaom_test::TransformTestBase<tran_low_t>,
+                    public ::testing::TestWithParam<Dct4x4Param> {
+ public:
+  ~Trans4x4WHT() override = default;
+
+  void SetUp() override {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    pitch_ = 4;
+    height_ = 4;
+    fwd_txfm_ref = fwht4x4_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+    fwd_txfm_c_ = GET_PARAM(5);
+  }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) override {
+    fwd_txfm_(in, out, stride);
+  }
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) override {
+    inv_txfm_(out, dst, stride);
+  }
+  void RunSpeedTest() {
+    if (!fwd_txfm_c_) {
+      GTEST_SKIP();
+    } else {
+      ACMRandom rnd(ACMRandom::DeterministicSeed());
+      const int count_test_block = 10;
+      const int numIter = 5000;
+
+      int c_sum_time = 0;
+      int simd_sum_time = 0;
+
+      int stride = 96;
+
+      int16_t *input_block = reinterpret_cast<int16_t *>(
+          aom_memalign(16, sizeof(int16_t) * stride * height_));
+      ASSERT_NE(input_block, nullptr);
+      tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>(
+          aom_memalign(16, sizeof(output_ref_block[0]) * num_coeffs_));
+      ASSERT_NE(output_ref_block, nullptr);
+      tran_low_t *output_block = reinterpret_cast<tran_low_t *>(
+          aom_memalign(16, sizeof(output_block[0]) * num_coeffs_));
+      ASSERT_NE(output_block, nullptr);
+
+      for (int i = 0; i < count_test_block; ++i) {
+        for (int j = 0; j < height_; ++j) {
+          for (int k = 0; k < pitch_; ++k) {
+            int in_idx = j * stride + k;
+            int out_idx = j * pitch_ + k;
+            input_block[in_idx] =
+                (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+            if (bit_depth_ == AOM_BITS_8) {
+              output_block[out_idx] = output_ref_block[out_idx] = rnd.Rand8();
+            } else {
+              output_block[out_idx] = output_ref_block[out_idx] =
+                  rnd.Rand16() & mask_;
+            }
+          }
+        }
+
+        aom_usec_timer c_timer_;
+        aom_usec_timer_start(&c_timer_);
+        for (int iter = 0; iter < numIter; iter++) {
+          API_REGISTER_STATE_CHECK(
+              fwd_txfm_c_(input_block, output_ref_block, stride));
+        }
+        aom_usec_timer_mark(&c_timer_);
+
+        aom_usec_timer simd_timer_;
+        aom_usec_timer_start(&simd_timer_);
+
+        for (int iter = 0; iter < numIter; iter++) {
+          API_REGISTER_STATE_CHECK(
+              fwd_txfm_(input_block, output_block, stride));
+        }
+        aom_usec_timer_mark(&simd_timer_);
+
+        c_sum_time += static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
+        simd_sum_time += static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
+
+        // The minimum quant value is 4.
+        for (int j = 0; j < height_; ++j) {
+          for (int k = 0; k < pitch_; ++k) {
+            int out_idx = j * pitch_ + k;
+            ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx])
+                << "Error: not bit-exact result at index: " << out_idx
+                << " at test block: " << i;
+          }
+        }
+      }
+
+      printf(
+          "c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
+          simd_sum_time,
+          (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
+
+      aom_free(input_block);
+      aom_free(output_ref_block);
+      aom_free(output_block);
+    }
+  }
+
+  FdctFunc fwd_txfm_;
+  IdctFunc inv_txfm_;
+
+  FdctFunc fwd_txfm_c_;  // C version of forward transform for speed test.
+};
+
+TEST_P(Trans4x4WHT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
+
+TEST_P(Trans4x4WHT, CoeffCheck) { RunCoeffCheck(); }
+
+TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); }
+
+TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
+
+TEST_P(Trans4x4WHT, DISABLED_Speed) { RunSpeedTest(); }
+
+using std::make_tuple;
+
+INSTANTIATE_TEST_SUITE_P(
+    C, Trans4x4WHT,
+    ::testing::Values(make_tuple(&av1_fwht4x4_c, &iwht4x4_10_c, DCT_DCT,
+                                 AOM_BITS_10, 16,
+                                 static_cast<FdctFunc>(nullptr)),
+                      make_tuple(&av1_fwht4x4_c, &iwht4x4_12_c, DCT_DCT,
+                                 AOM_BITS_12, 16,
+                                 static_cast<FdctFunc>(nullptr))));
+
+#if HAVE_SSE4_1
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, Trans4x4WHT,
+    ::testing::Values(make_tuple(&av1_fwht4x4_sse4_1, &iwht4x4_10_sse4_1,
+                                 DCT_DCT, AOM_BITS_10, 16,
+                                 static_cast<FdctFunc>(nullptr)),
+                      make_tuple(&av1_fwht4x4_sse4_1, &iwht4x4_12_sse4_1,
+                                 DCT_DCT, AOM_BITS_12, 16,
+                                 static_cast<FdctFunc>(nullptr))));
+
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON, Trans4x4WHT,
+    ::testing::Values(make_tuple(&av1_fwht4x4_neon, &iwht4x4_10_c, DCT_DCT,
+                                 AOM_BITS_10, 16, &av1_fwht4x4_c),
+                      make_tuple(&av1_fwht4x4_neon, &iwht4x4_12_c, DCT_DCT,
+                                 AOM_BITS_12, 16, &av1_fwht4x4_c)));
+
+#endif  // HAVE_NEON
+
+}  // namespace
diff --git a/third_party/aom/test/gf_pyr_height_test.cc b/third_party/aom/test/gf_pyr_height_test.cc
new file mode 100644
index 0000000000..0996d80c25
--- /dev/null
+++ b/third_party/aom/test/gf_pyr_height_test.cc
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2019, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <ostream>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+static const struct GFPyrHeightTestParam {
+  int gf_min_pyr_height;
+  int gf_max_pyr_height;
+  double psnr_thresh;
+} kTestParams[] = {
+  // gf_min_pyr_height = 0
+  { 0, 0, 32.30 },
+  { 0, 1, 33.90 },
+  { 0, 2, 34.00 },
+  { 0, 3, 34.20 },
+  { 0, 4, 34.30 },
+  { 0, 5, 34.35 },
+  // gf_min_pyr_height = 1
+  { 1, 1, 33.90 },
+  { 1, 2, 34.00 },
+  { 1, 3, 34.20 },
+  { 1, 4, 34.30 },
+  { 1, 5, 34.35 },
+  // gf_min_pyr_height = 2
+  { 2, 2, 34.00 },
+  { 2, 3, 34.20 },
+  { 2, 4, 34.30 },
+  { 2, 5, 34.35 },
+  // gf_min_pyr_height = 3
+  { 3, 3, 34.20 },
+  { 3, 4, 34.30 },
+  { 3, 5, 34.35 },
+  // gf_min_pyr_height = 4
+  { 4, 4, 34.30 },
+  { 4, 5, 34.35 },
+  // gf_min_pyr_height = 5
+  { 5, 5, 34.35 },
+};
+
+// Compiler may decide to add some padding to the struct above for alignment,
+// which the gtest may try to print (on error for example). This would cause
+// valgrind to complain that the padding is uninitialized. To avoid that, we
+// provide our own function to print the struct.
+// This also makes '--gtest_list_tests' output more understandable.
+std::ostream &operator<<(std::ostream &os, const GFPyrHeightTestParam &p) {
+  os << "GFPyrHeightTestParam { "
+     << "gf_min_pyr_height = " << p.gf_min_pyr_height << ", "
+     << "gf_max_pyr_height = " << p.gf_max_pyr_height << ", "
+     << "psnr_thresh = " << p.psnr_thresh << " }";
+  return os;
+}
+
+// Params: encoding mode, rate control mode and GFPyrHeightTestParam object.
+class GFPyrHeightTest
+    : public ::libaom_test::CodecTestWith3Params<
+          libaom_test::TestMode, aom_rc_mode, GFPyrHeightTestParam>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  GFPyrHeightTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        rc_mode_(GET_PARAM(2)) {
+    gf_min_pyr_height_ = GET_PARAM(3).gf_min_pyr_height;
+    gf_max_pyr_height_ = GET_PARAM(3).gf_max_pyr_height;
+    psnr_threshold_ = GET_PARAM(3).psnr_thresh;
+  }
+  ~GFPyrHeightTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cpu_used_ = 4;
+    cfg_.rc_end_usage = rc_mode_;
+    if (rc_mode_ == AOM_VBR) {
+      cfg_.rc_target_bitrate = 200;
+    }
+    cfg_.g_lag_in_frames = 19;
+    cfg_.g_threads = 0;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+  }
+
+  void BeginPassHook(unsigned int) override {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      if (rc_mode_ == AOM_Q) {
+        encoder->Control(AOME_SET_CQ_LEVEL, 32);
+      }
+      if (encoding_mode_ != ::libaom_test::kRealTime) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      }
+      encoder->Control(AV1E_SET_GF_MIN_PYRAMID_HEIGHT, gf_min_pyr_height_);
+      encoder->Control(AV1E_SET_GF_MAX_PYRAMID_HEIGHT, gf_max_pyr_height_);
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  double GetPsnrThreshold() { return psnr_threshold_; }
+
+  ::libaom_test::TestMode encoding_mode_;
+  aom_rc_mode rc_mode_;
+  double psnr_threshold_;
+  int gf_min_pyr_height_;
+  int gf_max_pyr_height_;
+  int cpu_used_;
+  int nframes_;
+  double psnr_;
+};
+
+TEST_P(GFPyrHeightTest, EncodeAndVerifyPSNR) {
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 32);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  EXPECT_GT(GetAveragePsnr(), GetPsnrThreshold())
+      << "GF Min Pyramid Height = " << gf_min_pyr_height_ << ", "
+      << "GF Max Pyramid Height = " << gf_max_pyr_height_;
+}
+
+AV1_INSTANTIATE_TEST_SUITE(GFPyrHeightTest, NONREALTIME_TEST_MODES,
+                           ::testing::Values(AOM_Q, AOM_VBR),
+                           ::testing::ValuesIn(kTestParams));
+}  // namespace
diff --git a/third_party/aom/test/gviz_api.py b/third_party/aom/test/gviz_api.py
new file mode 100755
index 0000000000..d3a443dabf
--- /dev/null
+++ b/third_party/aom/test/gviz_api.py
@@ -0,0 +1,1087 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2016, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and
+# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+# was not distributed with this source code in the LICENSE file, you can
+# obtain it at www.aomedia.org/license/software. If the Alliance for Open
+# Media Patent License 1.0 was not distributed with this source code in the
+# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+#
+
+"""Converts Python data into data for Google Visualization API clients.
+
+This library can be used to create a google.visualization.DataTable usable by
+visualizations built on the Google Visualization API. Output formats are raw
+JSON, JSON response, JavaScript, CSV, and HTML table.
+
+See http://code.google.com/apis/visualization/ for documentation on the
+Google Visualization API.
+"""
+
+__author__ = "Amit Weinstein, Misha Seltzer, Jacob Baskin"
+
+import cgi
+import cStringIO
+import csv
+import datetime
+try:
+  import json
+except ImportError:
+  import simplejson as json
+import types
+
+
+class DataTableException(Exception):
+  """The general exception object thrown by DataTable."""
+  pass
+
+
+class DataTableJSONEncoder(json.JSONEncoder):
+  """JSON encoder that handles date/time/datetime objects correctly."""
+
+  def __init__(self):
+    json.JSONEncoder.__init__(self,
+                              separators=(",", ":"),
+                              ensure_ascii=False)
+
+  def default(self, o):
+    if isinstance(o, datetime.datetime):
+      if o.microsecond == 0:
+        # If the time doesn't have ms-resolution, leave it out to keep
+        # things smaller.
+        return "Date(%d,%d,%d,%d,%d,%d)" % (
+            o.year, o.month - 1, o.day, o.hour, o.minute, o.second)
+      else:
+        return "Date(%d,%d,%d,%d,%d,%d,%d)" % (
+            o.year, o.month - 1, o.day, o.hour, o.minute, o.second,
+            o.microsecond / 1000)
+    elif isinstance(o, datetime.date):
+      return "Date(%d,%d,%d)" % (o.year, o.month - 1, o.day)
+    elif isinstance(o, datetime.time):
+      return [o.hour, o.minute, o.second]
+    else:
+      return super(DataTableJSONEncoder, self).default(o)
+
+
+class DataTable(object):
+  """Wraps the data to convert to a Google Visualization API DataTable.
+
+  Create this object, populate it with data, then call one of the ToJS...
+  methods to return a string representation of the data in the format described.
+
+  You can clear all data from the object to reuse it, but you cannot clear
+  individual cells, rows, or columns. You also cannot modify the table schema
+  specified in the class constructor.
+
+  You can add new data one or more rows at a time. All data added to an
+  instantiated DataTable must conform to the schema passed in to __init__().
+
+  You can reorder the columns in the output table, and also specify row sorting
+  order by column. The default column order is according to the original
+  table_description parameter. Default row sort order is ascending, by column
+  1 values. For a dictionary, we sort the keys for order.
+
+  The data and the table_description are closely tied, as described here:
+
+  The table schema is defined in the class constructor's table_description
+  parameter. The user defines each column using a tuple of
+  (id[, type[, label[, custom_properties]]]). The default value for type is
+  string, label is the same as ID if not specified, and custom properties is
+  an empty dictionary if not specified.
+
+  table_description is a dictionary or list, containing one or more column
+  descriptor tuples, nested dictionaries, and lists. Each dictionary key, list
+  element, or dictionary element must eventually be defined as
+  a column description tuple. Here's an example of a dictionary where the key
+  is a tuple, and the value is a list of two tuples:
+    {('a', 'number'): [('b', 'number'), ('c', 'string')]}
+
+  This flexibility in data entry enables you to build and manipulate your data
+  in a Python structure that makes sense for your program.
+
+  Add data to the table using the same nested design as the table's
+  table_description, replacing column descriptor tuples with cell data, and
+  each row is an element in the top level collection. This will be a bit
+  clearer after you look at the following examples showing the
+  table_description, matching data, and the resulting table:
+
+  Columns as list of tuples [col1, col2, col3]
+    table_description: [('a', 'number'), ('b', 'string')]
+    AppendData( [[1, 'z'], [2, 'w'], [4, 'o'], [5, 'k']] )
+    Table:
+    a  b   <--- these are column ids/labels
+    1  z
+    2  w
+    4  o
+    5  k
+
+  Dictionary of columns, where key is a column, and value is a list of
+  columns  {col1: [col2, col3]}
+    table_description: {('a', 'number'): [('b', 'number'), ('c', 'string')]}
+    AppendData( data: {1: [2, 'z'], 3: [4, 'w']}
+    Table:
+    a  b  c
+    1  2  z
+    3  4  w
+
+  Dictionary where key is a column, and the value is itself a dictionary of
+  columns {col1: {col2, col3}}
+    table_description: {('a', 'number'): {'b': 'number', 'c': 'string'}}
+    AppendData( data: {1: {'b': 2, 'c': 'z'}, 3: {'b': 4, 'c': 'w'}}
+    Table:
+    a  b  c
+    1  2  z
+    3  4  w
+  """
+
+  def __init__(self, table_description, data=None, custom_properties=None):
+    """Initialize the data table from a table schema and (optionally) data.
+
+    See the class documentation for more information on table schema and data
+    values.
+
+    Args:
+      table_description: A table schema, following one of the formats described
+                         in TableDescriptionParser(). Schemas describe the
+                         column names, data types, and labels. See
+                         TableDescriptionParser() for acceptable formats.
+      data: Optional. If given, fills the table with the given data. The data
+            structure must be consistent with schema in table_description. See
+            the class documentation for more information on acceptable data. You
+            can add data later by calling AppendData().
+      custom_properties: Optional. A dictionary from string to string that
+                         goes into the table's custom properties. This can be
+                         later changed by changing self.custom_properties.
+
+    Raises:
+      DataTableException: Raised if the data and the description did not match,
+                          or did not use the supported formats.
+    """
+    self.__columns = self.TableDescriptionParser(table_description)
+    self.__data = []
+    self.custom_properties = {}
+    if custom_properties is not None:
+      self.custom_properties = custom_properties
+    if data:
+      self.LoadData(data)
+
+  @staticmethod
+  def CoerceValue(value, value_type):
+    """Coerces a single value into the type expected for its column.
+
+    Internal helper method.
+
+    Args:
+      value: The value which should be converted
+      value_type: One of "string", "number", "boolean", "date", "datetime" or
+                  "timeofday".
+
+    Returns:
+      An item of the Python type appropriate to the given value_type. Strings
+      are also converted to Unicode using UTF-8 encoding if necessary.
+      If a tuple is given, it should be in one of the following forms:
+        - (value, formatted value)
+        - (value, formatted value, custom properties)
+      where the formatted value is a string, and custom properties is a
+      dictionary of the custom properties for this cell.
+      To specify custom properties without specifying formatted value, one can
+      pass None as the formatted value.
+      One can also have a null-valued cell with formatted value and/or custom
+      properties by specifying None for the value.
+      This method ignores the custom properties except for checking that it is a
+      dictionary. The custom properties are handled in the ToJSon and ToJSCode
+      methods.
+      The real type of the given value is not strictly checked. For example,
+      any type can be used for string - as we simply take its str( ) and for
+      boolean value we just check "if value".
+      Examples:
+        CoerceValue(None, "string") returns None
+        CoerceValue((5, "5$"), "number") returns (5, "5$")
+        CoerceValue(100, "string") returns "100"
+        CoerceValue(0, "boolean") returns False
+
+    Raises:
+      DataTableException: The value and type did not match in a not-recoverable
+                          way, for example given value 'abc' for type 'number'.
+    """
+    if isinstance(value, tuple):
+      # In case of a tuple, we run the same function on the value itself and
+      # add the formatted value.
+      if (len(value) not in [2, 3] or
+          (len(value) == 3 and not isinstance(value[2], dict))):
+        raise DataTableException("Wrong format for value and formatting - %s." %
+                                 str(value))
+      if not isinstance(value[1], types.StringTypes + (types.NoneType,)):
+        raise DataTableException("Formatted value is not string, given %s." %
+                                 type(value[1]))
+      js_value = DataTable.CoerceValue(value[0], value_type)
+      return (js_value,) + value[1:]
+
+    t_value = type(value)
+    if value is None:
+      return value
+    if value_type == "boolean":
+      return bool(value)
+
+    elif value_type == "number":
+      if isinstance(value, (int, long, float)):
+        return value
+      raise DataTableException("Wrong type %s when expected number" % t_value)
+
+    elif value_type == "string":
+      if isinstance(value, unicode):
+        return value
+      else:
+        return str(value).decode("utf-8")
+
+    elif value_type == "date":
+      if isinstance(value, datetime.datetime):
+        return datetime.date(value.year, value.month, value.day)
+      elif isinstance(value, datetime.date):
+        return value
+      else:
+        raise DataTableException("Wrong type %s when expected date" % t_value)
+
+    elif value_type == "timeofday":
+      if isinstance(value, datetime.datetime):
+        return datetime.time(value.hour, value.minute, value.second)
+      elif isinstance(value, datetime.time):
+        return value
+      else:
+        raise DataTableException("Wrong type %s when expected time" % t_value)
+
+    elif value_type == "datetime":
+      if isinstance(value, datetime.datetime):
+        return value
+      else:
+        raise DataTableException("Wrong type %s when expected datetime" %
+                                 t_value)
+    # If we got here, it means the given value_type was not one of the
+    # supported types.
+    raise DataTableException("Unsupported type %s" % value_type)
+
+  @staticmethod
+  def EscapeForJSCode(encoder, value):
+    if value is None:
+      return "null"
+    elif isinstance(value, datetime.datetime):
+      if value.microsecond == 0:
+        # If it's not ms-resolution, leave that out to save space.
+        return "new Date(%d,%d,%d,%d,%d,%d)" % (value.year,
+                                                value.month - 1,  # To match JS
+                                                value.day,
+                                                value.hour,
+                                                value.minute,
+                                                value.second)
+      else:
+        return "new Date(%d,%d,%d,%d,%d,%d,%d)" % (value.year,
+                                                   value.month - 1,  # match JS
+                                                   value.day,
+                                                   value.hour,
+                                                   value.minute,
+                                                   value.second,
+                                                   value.microsecond / 1000)
+    elif isinstance(value, datetime.date):
+      return "new Date(%d,%d,%d)" % (value.year, value.month - 1, value.day)
+    else:
+      return encoder.encode(value)
+
+  @staticmethod
+  def ToString(value):
+    if value is None:
+      return "(empty)"
+    elif isinstance(value, (datetime.datetime,
+                            datetime.date,
+                            datetime.time)):
+      return str(value)
+    elif isinstance(value, unicode):
+      return value
+    elif isinstance(value, bool):
+      return str(value).lower()
+    else:
+      return str(value).decode("utf-8")
+
+  @staticmethod
+  def ColumnTypeParser(description):
+    """Parses a single column description. Internal helper method.
+
+    Args:
+      description: a column description in the possible formats:
+       'id'
+       ('id',)
+       ('id', 'type')
+       ('id', 'type', 'label')
+       ('id', 'type', 'label', {'custom_prop1': 'custom_val1'})
+    Returns:
+      Dictionary with the following keys: id, label, type, and
+      custom_properties where:
+        - If label not given, it equals the id.
+        - If type not given, string is used by default.
+        - If custom properties are not given, an empty dictionary is used by
+          default.
+
+    Raises:
+      DataTableException: The column description did not match the RE, or
+          unsupported type was passed.
+    """
+    if not description:
+      raise DataTableException("Description error: empty description given")
+
+    if not isinstance(description, (types.StringTypes, tuple)):
+      raise DataTableException("Description error: expected either string or "
+                               "tuple, got %s." % type(description))
+
+    if isinstance(description, types.StringTypes):
+      description = (description,)
+
+    # According to the tuple's length, we fill the keys
+    # We verify everything is of type string
+    for elem in description[:3]:
+      if not isinstance(elem, types.StringTypes):
+        raise DataTableException("Description error: expected tuple of "
+                                 "strings, current element of type %s." %
+                                 type(elem))
+    desc_dict = {"id": description[0],
+                 "label": description[0],
+                 "type": "string",
+                 "custom_properties": {}}
+    if len(description) > 1:
+      desc_dict["type"] = description[1].lower()
+      if len(description) > 2:
+        desc_dict["label"] = description[2]
+        if len(description) > 3:
+          if not isinstance(description[3], dict):
+            raise DataTableException("Description error: expected custom "
+                                     "properties of type dict, current element "
+                                     "of type %s." % type(description[3]))
+          desc_dict["custom_properties"] = description[3]
+          if len(description) > 4:
+            raise DataTableException("Description error: tuple of length > 4")
+    if desc_dict["type"] not in ["string", "number", "boolean",
+                                 "date", "datetime", "timeofday"]:
+      raise DataTableException(
+          "Description error: unsupported type '%s'" % desc_dict["type"])
+    return desc_dict
+
+  @staticmethod
+  def TableDescriptionParser(table_description, depth=0):
+    """Parses the table_description object for internal use.
+
+    Parses the user-submitted table description into an internal format used
+    by the Python DataTable class. Returns the flat list of parsed columns.
+
+    Args:
+      table_description: A description of the table which should comply
+                         with one of the formats described below.
+      depth: Optional. The depth of the first level in the current description.
+             Used by recursive calls to this function.
+
+    Returns:
+      List of columns, where each column represented by a dictionary with the
+      keys: id, label, type, depth, container which means the following:
+      - id: the id of the column
+      - name: The name of the column
+      - type: The datatype of the elements in this column. Allowed types are
+              described in ColumnTypeParser().
+      - depth: The depth of this column in the table description
+      - container: 'dict', 'iter' or 'scalar' for parsing the format easily.
+      - custom_properties: The custom properties for this column.
+      The returned description is flattened regardless of how it was given.
+
+    Raises:
+      DataTableException: Error in a column description or in the description
+                          structure.
+
+    Examples:
+      A column description can be of the following forms:
+       'id'
+       ('id',)
+       ('id', 'type')
+       ('id', 'type', 'label')
+       ('id', 'type', 'label', {'custom_prop1': 'custom_val1'})
+       or as a dictionary:
+       'id': 'type'
+       'id': ('type',)
+       'id': ('type', 'label')
+       'id': ('type', 'label', {'custom_prop1': 'custom_val1'})
+      If the type is not specified, we treat it as string.
+      If no specific label is given, the label is simply the id.
+      If no custom properties are given, we use an empty dictionary.
+
+      input: [('a', 'date'), ('b', 'timeofday', 'b', {'foo': 'bar'})]
+      output: [{'id': 'a', 'label': 'a', 'type': 'date',
+                'depth': 0, 'container': 'iter', 'custom_properties': {}},
+               {'id': 'b', 'label': 'b', 'type': 'timeofday',
+                'depth': 0, 'container': 'iter',
+                'custom_properties': {'foo': 'bar'}}]
+
+      input: {'a': [('b', 'number'), ('c', 'string', 'column c')]}
+      output: [{'id': 'a', 'label': 'a', 'type': 'string',
+                'depth': 0, 'container': 'dict', 'custom_properties': {}},
+               {'id': 'b', 'label': 'b', 'type': 'number',
+                'depth': 1, 'container': 'iter', 'custom_properties': {}},
+               {'id': 'c', 'label': 'column c', 'type': 'string',
+                'depth': 1, 'container': 'iter', 'custom_properties': {}}]
+
+      input:  {('a', 'number', 'column a'): { 'b': 'number', 'c': 'string'}}
+      output: [{'id': 'a', 'label': 'column a', 'type': 'number',
+                'depth': 0, 'container': 'dict', 'custom_properties': {}},
+               {'id': 'b', 'label': 'b', 'type': 'number',
+                'depth': 1, 'container': 'dict', 'custom_properties': {}},
+               {'id': 'c', 'label': 'c', 'type': 'string',
+                'depth': 1, 'container': 'dict', 'custom_properties': {}}]
+
+      input: { ('w', 'string', 'word'): ('c', 'number', 'count') }
+      output: [{'id': 'w', 'label': 'word', 'type': 'string',
+                'depth': 0, 'container': 'dict', 'custom_properties': {}},
+               {'id': 'c', 'label': 'count', 'type': 'number',
+                'depth': 1, 'container': 'scalar', 'custom_properties': {}}]
+
+      input: {'a': ('number', 'column a'), 'b': ('string', 'column b')}
+      output: [{'id': 'a', 'label': 'column a', 'type': 'number', 'depth': 0,
+               'container': 'dict', 'custom_properties': {}},
+               {'id': 'b', 'label': 'column b', 'type': 'string', 'depth': 0,
+               'container': 'dict', 'custom_properties': {}}
+
+      NOTE: there might be ambiguity in the case of a dictionary representation
+      of a single column. For example, the following description can be parsed
+      in 2 different ways: {'a': ('b', 'c')} can be thought of a single column
+      with the id 'a', of type 'b' and the label 'c', or as 2 columns: one named
+      'a', and the other named 'b' of type 'c'. We choose the first option by
+      default, and in case the second option is the right one, it is possible to
+      make the key into a tuple (i.e. {('a',): ('b', 'c')}) or add more info
+      into the tuple, thus making it look like this: {'a': ('b', 'c', 'b', {})}
+      -- second 'b' is the label, and {} is the custom properties field.
+    """
+    # For the recursion step, we check for a scalar object (string or tuple)
+    if isinstance(table_description, (types.StringTypes, tuple)):
+      parsed_col = DataTable.ColumnTypeParser(table_description)
+      parsed_col["depth"] = depth
+      parsed_col["container"] = "scalar"
+      return [parsed_col]
+
+    # Since it is not scalar, table_description must be iterable.
+    if not hasattr(table_description, "__iter__"):
+      raise DataTableException("Expected an iterable object, got %s" %
+                               type(table_description))
+    if not isinstance(table_description, dict):
+      # We expects a non-dictionary iterable item.
+      columns = []
+      for desc in table_description:
+        parsed_col = DataTable.ColumnTypeParser(desc)
+        parsed_col["depth"] = depth
+        parsed_col["container"] = "iter"
+        columns.append(parsed_col)
+      if not columns:
+        raise DataTableException("Description iterable objects should not"
+                                 " be empty.")
+      return columns
+    # The other case is a dictionary
+    if not table_description:
+      raise DataTableException("Empty dictionaries are not allowed inside"
+                               " description")
+
+    # To differentiate between the two cases of more levels below or this is
+    # the most inner dictionary, we consider the number of keys (more then one
+    # key is indication for most inner dictionary) and the type of the key and
+    # value in case of only 1 key (if the type of key is string and the type of
+    # the value is a tuple of 0-3 items, we assume this is the most inner
+    # dictionary).
+    # NOTE: this way of differentiating might create ambiguity. See docs.
+    if (len(table_description) != 1 or
+        (isinstance(table_description.keys()[0], types.StringTypes) and
+         isinstance(table_description.values()[0], tuple) and
+         len(table_description.values()[0]) < 4)):
+      # This is the most inner dictionary. Parsing types.
+      columns = []
+      # We sort the items, equivalent to sort the keys since they are unique
+      for key, value in sorted(table_description.items()):
+        # We parse the column type as (key, type) or (key, type, label) using
+        # ColumnTypeParser.
+        if isinstance(value, tuple):
+          parsed_col = DataTable.ColumnTypeParser((key,) + value)
+        else:
+          parsed_col = DataTable.ColumnTypeParser((key, value))
+        parsed_col["depth"] = depth
+        parsed_col["container"] = "dict"
+        columns.append(parsed_col)
+      return columns
+    # This is an outer dictionary, must have at most one key.
+    parsed_col = DataTable.ColumnTypeParser(table_description.keys()[0])
+    parsed_col["depth"] = depth
+    parsed_col["container"] = "dict"
+    return ([parsed_col] +
+            DataTable.TableDescriptionParser(table_description.values()[0],
+                                             depth=depth + 1))
+
+  @property
+  def columns(self):
+    """Returns the parsed table description."""
+    return self.__columns
+
+  def NumberOfRows(self):
+    """Returns the number of rows in the current data stored in the table."""
+    return len(self.__data)
+
+  def SetRowsCustomProperties(self, rows, custom_properties):
+    """Sets the custom properties for given row(s).
+
+    Can accept a single row or an iterable of rows.
+    Sets the given custom properties for all specified rows.
+
+    Args:
+      rows: The row, or rows, to set the custom properties for.
+      custom_properties: A string to string dictionary of custom properties to
+      set for all rows.
+    """
+    if not hasattr(rows, "__iter__"):
+      rows = [rows]
+    for row in rows:
+      self.__data[row] = (self.__data[row][0], custom_properties)
+
+  def LoadData(self, data, custom_properties=None):
+    """Loads new rows to the data table, clearing existing rows.
+
+    May also set the custom_properties for the added rows. The given custom
+    properties dictionary specifies the dictionary that will be used for *all*
+    given rows.
+
+    Args:
+      data: The rows that the table will contain.
+      custom_properties: A dictionary of string to string to set as the custom
+                         properties for all rows.
+    """
+    self.__data = []
+    self.AppendData(data, custom_properties)
+
+  def AppendData(self, data, custom_properties=None):
+    """Appends new data to the table.
+
+    Data is appended in rows. Data must comply with
+    the table schema passed in to __init__(). See CoerceValue() for a list
+    of acceptable data types. See the class documentation for more information
+    and examples of schema and data values.
+
+    Args:
+      data: The row to add to the table. The data must conform to the table
+            description format.
+      custom_properties: A dictionary of string to string, representing the
+                         custom properties to add to all the rows.
+
+    Raises:
+      DataTableException: The data structure does not match the description.
+    """
+    # If the maximal depth is 0, we simply iterate over the data table
+    # lines and insert them using _InnerAppendData. Otherwise, we simply
+    # let the _InnerAppendData handle all the levels.
+    if not self.__columns[-1]["depth"]:
+      for row in data:
+        self._InnerAppendData(({}, custom_properties), row, 0)
+    else:
+      self._InnerAppendData(({}, custom_properties), data, 0)
+
+  def _InnerAppendData(self, prev_col_values, data, col_index):
+    """Inner function to assist LoadData."""
+    # We first check that col_index has not exceeded the columns size
+    if col_index >= len(self.__columns):
+      raise DataTableException("The data does not match description, too deep")
+
+    # Dealing with the scalar case, the data is the last value.
+    if self.__columns[col_index]["container"] == "scalar":
+      prev_col_values[0][self.__columns[col_index]["id"]] = data
+      self.__data.append(prev_col_values)
+      return
+
+    if self.__columns[col_index]["container"] == "iter":
+      if not hasattr(data, "__iter__") or isinstance(data, dict):
+        raise DataTableException("Expected iterable object, got %s" %
+                                 type(data))
+      # We only need to insert the rest of the columns
+      # If there are less items than expected, we only add what there is.
+      for value in data:
+        if col_index >= len(self.__columns):
+          raise DataTableException("Too many elements given in data")
+        prev_col_values[0][self.__columns[col_index]["id"]] = value
+        col_index += 1
+      self.__data.append(prev_col_values)
+      return
+
+    # We know the current level is a dictionary, we verify the type.
+    if not isinstance(data, dict):
+      raise DataTableException("Expected dictionary at current level, got %s" %
+                               type(data))
+    # We check if this is the last level
+    if self.__columns[col_index]["depth"] == self.__columns[-1]["depth"]:
+      # We need to add the keys in the dictionary as they are
+      for col in self.__columns[col_index:]:
+        if col["id"] in data:
+          prev_col_values[0][col["id"]] = data[col["id"]]
+      self.__data.append(prev_col_values)
+      return
+
+    # We have a dictionary in an inner depth level.
+    if not data.keys():
+      # In case this is an empty dictionary, we add a record with the columns
+      # filled only until this point.
+      self.__data.append(prev_col_values)
+    else:
+      for key in sorted(data):
+        col_values = dict(prev_col_values[0])
+        col_values[self.__columns[col_index]["id"]] = key
+        self._InnerAppendData((col_values, prev_col_values[1]),
+                              data[key], col_index + 1)
+
+  def _PreparedData(self, order_by=()):
+    """Prepares the data for enumeration - sorting it by order_by.
+
+    Args:
+      order_by: Optional. Specifies the name of the column(s) to sort by, and
+                (optionally) which direction to sort in. Default sort direction
+                is asc. Following formats are accepted:
+                "string_col_name"  -- For a single key in default (asc) order.
+                ("string_col_name", "asc|desc") -- For a single key.
+                [("col_1","asc|desc"), ("col_2","asc|desc")] -- For more than
+                    one column, an array of tuples of (col_name, "asc|desc").
+
+    Returns:
+      The data sorted by the keys given.
+
+    Raises:
+      DataTableException: Sort direction not in 'asc' or 'desc'
+    """
+    if not order_by:
+      return self.__data
+
+    proper_sort_keys = []
+    if isinstance(order_by, types.StringTypes) or (
+        isinstance(order_by, tuple) and len(order_by) == 2 and
+        order_by[1].lower() in ["asc", "desc"]):
+      order_by = (order_by,)
+    for key in order_by:
+      if isinstance(key, types.StringTypes):
+        proper_sort_keys.append((key, 1))
+      elif (isinstance(key, (list, tuple)) and len(key) == 2 and
+            key[1].lower() in ("asc", "desc")):
+        proper_sort_keys.append((key[0], key[1].lower() == "asc" and 1 or -1))
+      else:
+        raise DataTableException("Expected tuple with second value: "
+                                 "'asc' or 'desc'")
+
+    def SortCmpFunc(row1, row2):
+      """cmp function for sorted. Compares by keys and 'asc'/'desc' keywords."""
+      for key, asc_mult in proper_sort_keys:
+        cmp_result = asc_mult * cmp(row1[0].get(key), row2[0].get(key))
+        if cmp_result:
+          return cmp_result
+      return 0
+
+    return sorted(self.__data, cmp=SortCmpFunc)
+
+  def ToJSCode(self, name, columns_order=None, order_by=()):
+    """Writes the data table as a JS code string.
+
+    This method writes a string of JS code that can be run to
+    generate a DataTable with the specified data. Typically used for debugging
+    only.
+
+    Args:
+      name: The name of the table. The name would be used as the DataTable's
+            variable name in the created JS code.
+      columns_order: Optional. Specifies the order of columns in the
+                     output table. Specify a list of all column IDs in the order
+                     in which you want the table created.
+                     Note that you must list all column IDs in this parameter,
+                     if you use it.
+      order_by: Optional. Specifies the name of the column(s) to sort by.
+                Passed as is to _PreparedData.
+
+    Returns:
+      A string of JS code that, when run, generates a DataTable with the given
+      name and the data stored in the DataTable object.
+      Example result:
+        "var tab1 = new google.visualization.DataTable();
+         tab1.addColumn("string", "a", "a");
+         tab1.addColumn("number", "b", "b");
+         tab1.addColumn("boolean", "c", "c");
+         tab1.addRows(10);
+         tab1.setCell(0, 0, "a");
+         tab1.setCell(0, 1, 1, null, {"foo": "bar"});
+         tab1.setCell(0, 2, true);
+         ...
+         tab1.setCell(9, 0, "c");
+         tab1.setCell(9, 1, 3, "3$");
+         tab1.setCell(9, 2, false);"
+
+    Raises:
+      DataTableException: The data does not match the type.
+    """
+
+    encoder = DataTableJSONEncoder()
+
+    if columns_order is None:
+      columns_order = [col["id"] for col in self.__columns]
+    col_dict = dict([(col["id"], col) for col in self.__columns])
+
+    # We first create the table with the given name
+    jscode = "var %s = new google.visualization.DataTable();\n" % name
+    if self.custom_properties:
+      jscode += "%s.setTableProperties(%s);\n" % (
+          name, encoder.encode(self.custom_properties))
+
+    # We add the columns to the table
+    for i, col in enumerate(columns_order):
+      jscode += "%s.addColumn(%s, %s, %s);\n" % (
+          name,
+          encoder.encode(col_dict[col]["type"]),
+          encoder.encode(col_dict[col]["label"]),
+          encoder.encode(col_dict[col]["id"]))
+      if col_dict[col]["custom_properties"]:
+        jscode += "%s.setColumnProperties(%d, %s);\n" % (
+            name, i, encoder.encode(col_dict[col]["custom_properties"]))
+    jscode += "%s.addRows(%d);\n" % (name, len(self.__data))
+
+    # We now go over the data and add each row
+    for (i, (row, cp)) in enumerate(self._PreparedData(order_by)):
+      # We add all the elements of this row by their order
+      for (j, col) in enumerate(columns_order):
+        if col not in row or row[col] is None:
+          continue
+        value = self.CoerceValue(row[col], col_dict[col]["type"])
+        if isinstance(value, tuple):
+          cell_cp = ""
+          if len(value) == 3:
+            cell_cp = ", %s" % encoder.encode(row[col][2])
+          # We have a formatted value or custom property as well
+          jscode += ("%s.setCell(%d, %d, %s, %s%s);\n" %
+                     (name, i, j,
+                      self.EscapeForJSCode(encoder, value[0]),
+                      self.EscapeForJSCode(encoder, value[1]), cell_cp))
+        else:
+          jscode += "%s.setCell(%d, %d, %s);\n" % (
+              name, i, j, self.EscapeForJSCode(encoder, value))
+      if cp:
+        jscode += "%s.setRowProperties(%d, %s);\n" % (
+            name, i, encoder.encode(cp))
+    return jscode
+
+  def ToHtml(self, columns_order=None, order_by=()):
+    """Writes the data table as an HTML table code string.
+
+    Args:
+      columns_order: Optional. Specifies the order of columns in the
+                     output table. Specify a list of all column IDs in the order
+                     in which you want the table created.
+                     Note that you must list all column IDs in this parameter,
+                     if you use it.
+      order_by: Optional. Specifies the name of the column(s) to sort by.
+                Passed as is to _PreparedData.
+
+    Returns:
+      An HTML table code string.
+      Example result (the result is without the newlines):
+       <html><body><table border="1">
+        <thead><tr><th>a</th><th>b</th><th>c</th></tr></thead>
+        <tbody>
+         <tr><td>1</td><td>"z"</td><td>2</td></tr>
+         <tr><td>"3$"</td><td>"w"</td><td></td></tr>
+        </tbody>
+       </table></body></html>
+
+    Raises:
+      DataTableException: The data does not match the type.
+    """
+    table_template = "<html><body><table border=\"1\">%s</table></body></html>"
+    columns_template = "<thead><tr>%s</tr></thead>"
+    rows_template = "<tbody>%s</tbody>"
+    row_template = "<tr>%s</tr>"
+    header_cell_template = "<th>%s</th>"
+    cell_template = "<td>%s</td>"
+
+    if columns_order is None:
+      columns_order = [col["id"] for col in self.__columns]
+    col_dict = dict([(col["id"], col) for col in self.__columns])
+
+    columns_list = []
+    for col in columns_order:
+      columns_list.append(header_cell_template %
+                          cgi.escape(col_dict[col]["label"]))
+    columns_html = columns_template % "".join(columns_list)
+
+    rows_list = []
+    # We now go over the data and add each row
+    for row, unused_cp in self._PreparedData(order_by):
+      cells_list = []
+      # We add all the elements of this row by their order
+      for col in columns_order:
+        # For empty string we want empty quotes ("").
+        value = ""
+        if col in row and row[col] is not None:
+          value = self.CoerceValue(row[col], col_dict[col]["type"])
+        if isinstance(value, tuple):
+          # We have a formatted value and we're going to use it
+          cells_list.append(cell_template % cgi.escape(self.ToString(value[1])))
+        else:
+          cells_list.append(cell_template % cgi.escape(self.ToString(value)))
+      rows_list.append(row_template % "".join(cells_list))
+    rows_html = rows_template % "".join(rows_list)
+
+    return table_template % (columns_html + rows_html)
+
+  def ToCsv(self, columns_order=None, order_by=(), separator=","):
+    """Writes the data table as a CSV string.
+
+    Output is encoded in UTF-8 because the Python "csv" module can't handle
+    Unicode properly according to its documentation.
+
+    Args:
+      columns_order: Optional. Specifies the order of columns in the
+                     output table. Specify a list of all column IDs in the order
+                     in which you want the table created.
+                     Note that you must list all column IDs in this parameter,
+                     if you use it.
+      order_by: Optional. Specifies the name of the column(s) to sort by.
+                Passed as is to _PreparedData.
+      separator: Optional. The separator to use between the values.
+
+    Returns:
+      A CSV string representing the table.
+      Example result:
+       'a','b','c'
+       1,'z',2
+       3,'w',''
+
+    Raises:
+      DataTableException: The data does not match the type.
+    """
+
+    csv_buffer = cStringIO.StringIO()
+    writer = csv.writer(csv_buffer, delimiter=separator)
+
+    if columns_order is None:
+      columns_order = [col["id"] for col in self.__columns]
+    col_dict = dict([(col["id"], col) for col in self.__columns])
+
+    writer.writerow([col_dict[col]["label"].encode("utf-8")
+                     for col in columns_order])
+
+    # We now go over the data and add each row
+    for row, unused_cp in self._PreparedData(order_by):
+      cells_list = []
+      # We add all the elements of this row by their order
+      for col in columns_order:
+        value = ""
+        if col in row and row[col] is not None:
+          value = self.CoerceValue(row[col], col_dict[col]["type"])
+        if isinstance(value, tuple):
+          # We have a formatted value. Using it only for date/time types.
+          if col_dict[col]["type"] in ["date", "datetime", "timeofday"]:
+            cells_list.append(self.ToString(value[1]).encode("utf-8"))
+          else:
+            cells_list.append(self.ToString(value[0]).encode("utf-8"))
+        else:
+          cells_list.append(self.ToString(value).encode("utf-8"))
+      writer.writerow(cells_list)
+    return csv_buffer.getvalue()
+
+  def ToTsvExcel(self, columns_order=None, order_by=()):
+    """Returns a file in tab-separated-format readable by MS Excel.
+
+    Returns a file in UTF-16 little endian encoding, with tabs separating the
+    values.
+
+    Args:
+      columns_order: Delegated to ToCsv.
+      order_by: Delegated to ToCsv.
+
+    Returns:
+      A tab-separated little endian UTF16 file representing the table.
+    """
+    return (self.ToCsv(columns_order, order_by, separator="\t")
+            .decode("utf-8").encode("UTF-16LE"))
+
+  def _ToJSonObj(self, columns_order=None, order_by=()):
+    """Returns an object suitable to be converted to JSON.
+
+    Args:
+      columns_order: Optional. A list of all column IDs in the order in which
+                     you want them created in the output table. If specified,
+                     all column IDs must be present.
+      order_by: Optional. Specifies the name of the column(s) to sort by.
+                Passed as is to _PreparedData().
+
+    Returns:
+      A dictionary object for use by ToJSon or ToJSonResponse.
+    """
+    if columns_order is None:
+      columns_order = [col["id"] for col in self.__columns]
+    col_dict = dict([(col["id"], col) for col in self.__columns])
+
+    # Creating the column JSON objects
+    col_objs = []
+    for col_id in columns_order:
+      col_obj = {"id": col_dict[col_id]["id"],
+                 "label": col_dict[col_id]["label"],
+                 "type": col_dict[col_id]["type"]}
+      if col_dict[col_id]["custom_properties"]:
+        col_obj["p"] = col_dict[col_id]["custom_properties"]
+      col_objs.append(col_obj)
+
+    # Creating the rows jsons
+    row_objs = []
+    for row, cp in self._PreparedData(order_by):
+      cell_objs = []
+      for col in columns_order:
+        value = self.CoerceValue(row.get(col, None), col_dict[col]["type"])
+        if value is None:
+          cell_obj = None
+        elif isinstance(value, tuple):
+          cell_obj = {"v": value[0]}
+          if len(value) > 1 and value[1] is not None:
+            cell_obj["f"] = value[1]
+          if len(value) == 3:
+            cell_obj["p"] = value[2]
+        else:
+          cell_obj = {"v": value}
+        cell_objs.append(cell_obj)
+      row_obj = {"c": cell_objs}
+      if cp:
+        row_obj["p"] = cp
+      row_objs.append(row_obj)
+
+    json_obj = {"cols": col_objs, "rows": row_objs}
+    if self.custom_properties:
+      json_obj["p"] = self.custom_properties
+
+    return json_obj
+
+  def ToJSon(self, columns_order=None, order_by=()):
+    """Returns a string that can be used in a JS DataTable constructor.
+
+    This method writes a JSON string that can be passed directly into a Google
+    Visualization API DataTable constructor. Use this output if you are
+    hosting the visualization HTML on your site, and want to code the data
+    table in Python. Pass this string into the
+    google.visualization.DataTable constructor, e.g,:
+      ... on my page that hosts my visualization ...
+      google.setOnLoadCallback(drawTable);
+      function drawTable() {
+        var data = new google.visualization.DataTable(_my_JSon_string, 0.6);
+        myTable.draw(data);
+      }
+
+    Args:
+      columns_order: Optional. Specifies the order of columns in the
+                     output table. Specify a list of all column IDs in the order
+                     in which you want the table created.
+                     Note that you must list all column IDs in this parameter,
+                     if you use it.
+      order_by: Optional. Specifies the name of the column(s) to sort by.
+                Passed as is to _PreparedData().
+
+    Returns:
+      A JSon constructor string to generate a JS DataTable with the data
+      stored in the DataTable object.
+      Example result (the result is without the newlines):
+       {cols: [{id:"a",label:"a",type:"number"},
+               {id:"b",label:"b",type:"string"},
+              {id:"c",label:"c",type:"number"}],
+        rows: [{c:[{v:1},{v:"z"},{v:2}]}, c:{[{v:3,f:"3$"},{v:"w"},{v:null}]}],
+        p:    {'foo': 'bar'}}
+
+    Raises:
+      DataTableException: The data does not match the type.
+    """
+
+    encoder = DataTableJSONEncoder()
+    return encoder.encode(
+        self._ToJSonObj(columns_order, order_by)).encode("utf-8")
+
+  def ToJSonResponse(self, columns_order=None, order_by=(), req_id=0,
+                     response_handler="google.visualization.Query.setResponse"):
+    """Writes a table as a JSON response that can be returned as-is to a client.
+
+    This method writes a JSON response to return to a client in response to a
+    Google Visualization API query. This string can be processed by the calling
+    page, and is used to deliver a data table to a visualization hosted on
+    a different page.
+
+    Args:
+      columns_order: Optional. Passed straight to self.ToJSon().
+      order_by: Optional. Passed straight to self.ToJSon().
+      req_id: Optional. The response id, as retrieved by the request.
+      response_handler: Optional. The response handler, as retrieved by the
+          request.
+
+    Returns:
+      A JSON response string to be received by JS the visualization Query
+      object. This response would be translated into a DataTable on the
+      client side.
+      Example result (newlines added for readability):
+       google.visualization.Query.setResponse({
+          'version':'0.6', 'reqId':'0', 'status':'OK',
+          'table': {cols: [...], rows: [...]}});
+
+    Note: The URL returning this string can be used as a data source by Google
+          Visualization Gadgets or from JS code.
+    """
+
+    response_obj = {
+        "version": "0.6",
+        "reqId": str(req_id),
+        "table": self._ToJSonObj(columns_order, order_by),
+        "status": "ok"
+    }
+    encoder = DataTableJSONEncoder()
+    return "%s(%s);" % (response_handler,
+                        encoder.encode(response_obj).encode("utf-8"))
+
+  def ToResponse(self, columns_order=None, order_by=(), tqx=""):
+    """Writes the right response according to the request string passed in tqx.
+
+    This method parses the tqx request string (format of which is defined in
+    the documentation for implementing a data source of Google Visualization),
+    and returns the right response according to the request.
+    It parses out the "out" parameter of tqx, calls the relevant response
+    (ToJSonResponse() for "json", ToCsv() for "csv", ToHtml() for "html",
+    ToTsvExcel() for "tsv-excel") and passes the response function the rest of
+    the relevant request keys.
+
+    Args:
+      columns_order: Optional. Passed as is to the relevant response function.
+      order_by: Optional. Passed as is to the relevant response function.
+      tqx: Optional. The request string as received by HTTP GET. Should be in
+           the format "key1:value1;key2:value2...". All keys have a default
+           value, so an empty string will just do the default (which is calling
+           ToJSonResponse() with no extra parameters).
+
+    Returns:
+      A response string, as returned by the relevant response function.
+
+    Raises:
+      DataTableException: One of the parameters passed in tqx is not supported.
+    """
+    tqx_dict = {}
+    if tqx:
+      tqx_dict = dict(opt.split(":") for opt in tqx.split(";"))
+    if tqx_dict.get("version", "0.6") != "0.6":
+      raise DataTableException(
+          "Version (%s) passed by request is not supported."
+          % tqx_dict["version"])
+
+    if tqx_dict.get("out", "json") == "json":
+      response_handler = tqx_dict.get("responseHandler",
+                                      "google.visualization.Query.setResponse")
+      return self.ToJSonResponse(columns_order, order_by,
+                                 req_id=tqx_dict.get("reqId", 0),
+                                 response_handler=response_handler)
+    elif tqx_dict["out"] == "html":
+      return self.ToHtml(columns_order, order_by)
+    elif tqx_dict["out"] == "csv":
+      return self.ToCsv(columns_order, order_by)
+    elif tqx_dict["out"] == "tsv-excel":
+      return self.ToTsvExcel(columns_order, order_by)
+    else:
+      raise DataTableException(
+          "'out' parameter: '%s' is not supported" % tqx_dict["out"])
diff --git a/third_party/aom/test/hadamard_test.cc b/third_party/aom/test/hadamard_test.cc
new file mode 100644
index 0000000000..b01e78faaa
--- /dev/null
+++ b/third_party/aom/test/hadamard_test.cc
@@ -0,0 +1,547 @@
+/*
+ *  Copyright (c) 2019, Alliance for Open Media. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <algorithm>
+#include <ostream>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+
+using libaom_test::ACMRandom;
+
+using HadamardFunc = void (*)(const int16_t *a, ptrdiff_t a_stride,
+                              tran_low_t *b);
+// Low precision version of Hadamard Transform
+using HadamardLPFunc = void (*)(const int16_t *a, ptrdiff_t a_stride,
+                                int16_t *b);
+// Low precision version of Hadamard Transform 8x8 - Dual
+using HadamardLP8x8DualFunc = void (*)(const int16_t *a, ptrdiff_t a_stride,
+                                       int16_t *b);
+
+template <typename OutputType>
+void Hadamard4x4(const OutputType *a, OutputType *out) {
+  OutputType b[8];
+  for (int i = 0; i < 4; i += 2) {
+    b[i + 0] = (a[i * 4] + a[(i + 1) * 4]) >> 1;
+    b[i + 1] = (a[i * 4] - a[(i + 1) * 4]) >> 1;
+  }
+
+  out[0] = b[0] + b[2];
+  out[1] = b[1] + b[3];
+  out[2] = b[0] - b[2];
+  out[3] = b[1] - b[3];
+}
+
+template <typename OutputType>
+void ReferenceHadamard4x4(const int16_t *a, int a_stride, OutputType *b) {
+  OutputType input[16];
+  OutputType buf[16];
+  for (int i = 0; i < 4; ++i) {
+    for (int j = 0; j < 4; ++j) {
+      input[i * 4 + j] = static_cast<OutputType>(a[i * a_stride + j]);
+    }
+  }
+  for (int i = 0; i < 4; ++i) Hadamard4x4(input + i, buf + i * 4);
+  for (int i = 0; i < 4; ++i) Hadamard4x4(buf + i, b + i * 4);
+
+  // Extra transpose to match C and SSE2 behavior(i.e., aom_hadamard_4x4).
+  for (int i = 0; i < 4; i++) {
+    for (int j = i + 1; j < 4; j++) {
+      OutputType temp = b[j * 4 + i];
+      b[j * 4 + i] = b[i * 4 + j];
+      b[i * 4 + j] = temp;
+    }
+  }
+}
+
+template <typename OutputType>
+void HadamardLoop(const OutputType *a, OutputType *out) {
+  OutputType b[8];
+  for (int i = 0; i < 8; i += 2) {
+    b[i + 0] = a[i * 8] + a[(i + 1) * 8];
+    b[i + 1] = a[i * 8] - a[(i + 1) * 8];
+  }
+  OutputType c[8];
+  for (int i = 0; i < 8; i += 4) {
+    c[i + 0] = b[i + 0] + b[i + 2];
+    c[i + 1] = b[i + 1] + b[i + 3];
+    c[i + 2] = b[i + 0] - b[i + 2];
+    c[i + 3] = b[i + 1] - b[i + 3];
+  }
+  out[0] = c[0] + c[4];
+  out[7] = c[1] + c[5];
+  out[3] = c[2] + c[6];
+  out[4] = c[3] + c[7];
+  out[2] = c[0] - c[4];
+  out[6] = c[1] - c[5];
+  out[1] = c[2] - c[6];
+  out[5] = c[3] - c[7];
+}
+
+template <typename OutputType>
+void ReferenceHadamard8x8(const int16_t *a, int a_stride, OutputType *b) {
+  OutputType input[64];
+  OutputType buf[64];
+  for (int i = 0; i < 8; ++i) {
+    for (int j = 0; j < 8; ++j) {
+      input[i * 8 + j] = static_cast<OutputType>(a[i * a_stride + j]);
+    }
+  }
+  for (int i = 0; i < 8; ++i) HadamardLoop(input + i, buf + i * 8);
+  for (int i = 0; i < 8; ++i) HadamardLoop(buf + i, b + i * 8);
+
+  // Extra transpose to match SSE2 behavior (i.e., aom_hadamard_8x8 and
+  // aom_hadamard_lp_8x8).
+  for (int i = 0; i < 8; i++) {
+    for (int j = i + 1; j < 8; j++) {
+      OutputType temp = b[j * 8 + i];
+      b[j * 8 + i] = b[i * 8 + j];
+      b[i * 8 + j] = temp;
+    }
+  }
+}
+
+template <typename OutputType>
+void ReferenceHadamard8x8Dual(const int16_t *a, int a_stride, OutputType *b) {
+  /* The source is a 8x16 block. The destination is rearranged to 8x16.
+   * Input is 9 bit. */
+  ReferenceHadamard8x8(a, a_stride, b);
+  ReferenceHadamard8x8(a + 8, a_stride, b + 64);
+}
+
+template <typename OutputType>
+void ReferenceHadamard16x16(const int16_t *a, int a_stride, OutputType *b,
+                            bool shift) {
+  /* The source is a 16x16 block. The destination is rearranged to 8x32.
+   * Input is 9 bit. */
+  ReferenceHadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
+  ReferenceHadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
+  ReferenceHadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
+  ReferenceHadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
+
+  /* Overlay the 8x8 blocks and combine. */
+  for (int i = 0; i < 64; ++i) {
+    /* 8x8 steps the range up to 15 bits. */
+    const OutputType a0 = b[0];
+    const OutputType a1 = b[64];
+    const OutputType a2 = b[128];
+    const OutputType a3 = b[192];
+
+    /* Prevent the result from escaping int16_t. */
+    const OutputType b0 = (a0 + a1) >> 1;
+    const OutputType b1 = (a0 - a1) >> 1;
+    const OutputType b2 = (a2 + a3) >> 1;
+    const OutputType b3 = (a2 - a3) >> 1;
+
+    /* Store a 16 bit value. */
+    b[0] = b0 + b2;
+    b[64] = b1 + b3;
+    b[128] = b0 - b2;
+    b[192] = b1 - b3;
+
+    ++b;
+  }
+
+  if (shift) {
+    b -= 64;
+    // Extra shift to match aom_hadamard_16x16_c and aom_hadamard_16x16_avx2.
+    for (int i = 0; i < 16; i++) {
+      for (int j = 0; j < 4; j++) {
+        OutputType temp = b[i * 16 + 4 + j];
+        b[i * 16 + 4 + j] = b[i * 16 + 8 + j];
+        b[i * 16 + 8 + j] = temp;
+      }
+    }
+  }
+}
+
+template <typename OutputType>
+void ReferenceHadamard32x32(const int16_t *a, int a_stride, OutputType *b,
+                            bool shift) {
+  ReferenceHadamard16x16(a + 0 + 0 * a_stride, a_stride, b + 0, shift);
+  ReferenceHadamard16x16(a + 16 + 0 * a_stride, a_stride, b + 256, shift);
+  ReferenceHadamard16x16(a + 0 + 16 * a_stride, a_stride, b + 512, shift);
+  ReferenceHadamard16x16(a + 16 + 16 * a_stride, a_stride, b + 768, shift);
+
+  for (int i = 0; i < 256; ++i) {
+    const OutputType a0 = b[0];
+    const OutputType a1 = b[256];
+    const OutputType a2 = b[512];
+    const OutputType a3 = b[768];
+
+    const OutputType b0 = (a0 + a1) >> 2;
+    const OutputType b1 = (a0 - a1) >> 2;
+    const OutputType b2 = (a2 + a3) >> 2;
+    const OutputType b3 = (a2 - a3) >> 2;
+
+    b[0] = b0 + b2;
+    b[256] = b1 + b3;
+    b[512] = b0 - b2;
+    b[768] = b1 - b3;
+
+    ++b;
+  }
+}
+
+template <typename OutputType>
+void ReferenceHadamard(const int16_t *a, int a_stride, OutputType *b, int bw,
+                       int bh, bool shift) {
+  if (bw == 32 && bh == 32) {
+    ReferenceHadamard32x32(a, a_stride, b, shift);
+  } else if (bw == 16 && bh == 16) {
+    ReferenceHadamard16x16(a, a_stride, b, shift);
+  } else if (bw == 8 && bh == 8) {
+    ReferenceHadamard8x8(a, a_stride, b);
+  } else if (bw == 4 && bh == 4) {
+    ReferenceHadamard4x4(a, a_stride, b);
+  } else if (bw == 8 && bh == 16) {
+    ReferenceHadamard8x8Dual(a, a_stride, b);
+  } else {
+    GTEST_FAIL() << "Invalid Hadamard transform size " << bw << bh << std::endl;
+  }
+}
+
+template <typename HadamardFuncType>
+struct FuncWithSize {
+  FuncWithSize(HadamardFuncType f, int bw, int bh)
+      : func(f), block_width(bw), block_height(bh) {}
+  HadamardFuncType func;
+  int block_width;
+  int block_height;
+};
+
+using HadamardFuncWithSize = FuncWithSize<HadamardFunc>;
+using HadamardLPFuncWithSize = FuncWithSize<HadamardLPFunc>;
+using HadamardLP8x8DualFuncWithSize = FuncWithSize<HadamardLP8x8DualFunc>;
+
+template <typename OutputType, typename HadamardFuncType>
+class HadamardTestBase
+    : public ::testing::TestWithParam<FuncWithSize<HadamardFuncType>> {
+ public:
+  HadamardTestBase(const FuncWithSize<HadamardFuncType> &func_param,
+                   bool do_shift) {
+    h_func_ = func_param.func;
+    bw_ = func_param.block_width;
+    bh_ = func_param.block_height;
+    shift_ = do_shift;
+  }
+
+  void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+
+  // The Rand() function generates values in the range [-((1 << BitDepth) - 1),
+  // (1 << BitDepth) - 1]. This is because the input to the Hadamard transform
+  // is the residual pixel, which is defined as 'source pixel - predicted
+  // pixel'. Source pixel and predicted pixel take values in the range
+  // [0, (1 << BitDepth) - 1] and thus the residual pixel ranges from
+  // -((1 << BitDepth) - 1) to ((1 << BitDepth) - 1).
+  virtual int16_t Rand() = 0;
+
+  void CompareReferenceRandom() {
+    const int kMaxBlockSize = 32 * 32;
+    const int block_size = bw_ * bh_;
+
+    DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]);
+    DECLARE_ALIGNED(16, OutputType, b[kMaxBlockSize]);
+    memset(a, 0, sizeof(a));
+    memset(b, 0, sizeof(b));
+
+    OutputType b_ref[kMaxBlockSize];
+    memset(b_ref, 0, sizeof(b_ref));
+
+    for (int i = 0; i < block_size; ++i) a[i] = Rand();
+    ReferenceHadamard(a, bw_, b_ref, bw_, bh_, shift_);
+    API_REGISTER_STATE_CHECK(h_func_(a, bw_, b));
+
+    // The order of the output is not important. Sort before checking.
+    std::sort(b, b + block_size);
+    std::sort(b_ref, b_ref + block_size);
+    EXPECT_EQ(memcmp(b, b_ref, sizeof(b)), 0);
+  }
+
+  void CompareReferenceExtreme() {
+    const int kMaxBlockSize = 32 * 32;
+    const int block_size = bw_ * bh_;
+    const int kBitDepth = 8;
+    DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]);
+    DECLARE_ALIGNED(16, OutputType, b[kMaxBlockSize]);
+    memset(b, 0, sizeof(b));
+
+    OutputType b_ref[kMaxBlockSize];
+    memset(b_ref, 0, sizeof(b_ref));
+    for (int i = 0; i < 2; ++i) {
+      const int sign = (i == 0) ? 1 : -1;
+      for (int j = 0; j < block_size; ++j) a[j] = sign * ((1 << kBitDepth) - 1);
+
+      ReferenceHadamard(a, bw_, b_ref, bw_, bh_, shift_);
+      API_REGISTER_STATE_CHECK(h_func_(a, bw_, b));
+
+      // The order of the output is not important. Sort before checking.
+      std::sort(b, b + block_size);
+      std::sort(b_ref, b_ref + block_size);
+      EXPECT_EQ(memcmp(b, b_ref, sizeof(b)), 0);
+    }
+  }
+
+  void VaryStride() {
+    const int kMaxBlockSize = 32 * 32;
+    const int block_size = bw_ * bh_;
+
+    DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize * 8]);
+    DECLARE_ALIGNED(16, OutputType, b[kMaxBlockSize]);
+    memset(a, 0, sizeof(a));
+    for (int i = 0; i < block_size * 8; ++i) a[i] = Rand();
+
+    OutputType b_ref[kMaxBlockSize];
+    for (int i = 8; i < 64; i += 8) {
+      memset(b, 0, sizeof(b));
+      memset(b_ref, 0, sizeof(b_ref));
+
+      ReferenceHadamard(a, i, b_ref, bw_, bh_, shift_);
+      API_REGISTER_STATE_CHECK(h_func_(a, i, b));
+
+      // The order of the output is not important. Sort before checking.
+      std::sort(b, b + block_size);
+      std::sort(b_ref, b_ref + block_size);
+      EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+    }
+  }
+
+  void SpeedTest(int times) {
+    const int kMaxBlockSize = 32 * 32;
+    DECLARE_ALIGNED(16, int16_t, input[kMaxBlockSize]);
+    DECLARE_ALIGNED(16, OutputType, output[kMaxBlockSize]);
+    memset(input, 1, sizeof(input));
+    memset(output, 0, sizeof(output));
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < times; ++i) {
+      h_func_(input, bw_, output);
+    }
+    aom_usec_timer_mark(&timer);
+
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    printf("Hadamard%dx%d[%12d runs]: %d us\n", bw_, bh_, times, elapsed_time);
+  }
+
+ protected:
+  ACMRandom rnd_;
+
+ private:
+  HadamardFuncType h_func_;
+  int bw_;
+  int bh_;
+  bool shift_;
+};
+
+class HadamardLowbdTest : public HadamardTestBase<tran_low_t, HadamardFunc> {
+ public:
+  HadamardLowbdTest() : HadamardTestBase(GetParam(), /*do_shift=*/true) {}
+  // Use values between -255 (0xFF01) and 255 (0x00FF)
+  int16_t Rand() override {
+    int16_t src = rnd_.Rand8();
+    int16_t pred = rnd_.Rand8();
+    return src - pred;
+  }
+};
+
+TEST_P(HadamardLowbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
+
+TEST_P(HadamardLowbdTest, CompareReferenceExtreme) {
+  CompareReferenceExtreme();
+}
+
+TEST_P(HadamardLowbdTest, VaryStride) { VaryStride(); }
+
+TEST_P(HadamardLowbdTest, DISABLED_SpeedTest) { SpeedTest(1000000); }
+
+INSTANTIATE_TEST_SUITE_P(
+    C, HadamardLowbdTest,
+    ::testing::Values(HadamardFuncWithSize(&aom_hadamard_4x4_c, 4, 4),
+                      HadamardFuncWithSize(&aom_hadamard_8x8_c, 8, 8),
+                      HadamardFuncWithSize(&aom_hadamard_16x16_c, 16, 16),
+                      HadamardFuncWithSize(&aom_hadamard_32x32_c, 32, 32)));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, HadamardLowbdTest,
+    ::testing::Values(HadamardFuncWithSize(&aom_hadamard_4x4_sse2, 4, 4),
+                      HadamardFuncWithSize(&aom_hadamard_8x8_sse2, 8, 8),
+                      HadamardFuncWithSize(&aom_hadamard_16x16_sse2, 16, 16),
+                      HadamardFuncWithSize(&aom_hadamard_32x32_sse2, 32, 32)));
+#endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, HadamardLowbdTest,
+    ::testing::Values(HadamardFuncWithSize(&aom_hadamard_16x16_avx2, 16, 16),
+                      HadamardFuncWithSize(&aom_hadamard_32x32_avx2, 32, 32)));
+#endif  // HAVE_AVX2
+
+// TODO(aomedia:3314): Disable NEON unit test for now, since hadamard 16x16 NEON
+// need modifications to match C/AVX2 behavior.
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, HadamardLowbdTest,
+    ::testing::Values(HadamardFuncWithSize(&aom_hadamard_4x4_neon, 4, 4),
+                      HadamardFuncWithSize(&aom_hadamard_8x8_neon, 8, 8),
+                      HadamardFuncWithSize(&aom_hadamard_16x16_neon, 16, 16),
+                      HadamardFuncWithSize(&aom_hadamard_32x32_neon, 32, 32)));
+#endif  // HAVE_NEON
+
+#if CONFIG_AV1_HIGHBITDEPTH
+class HadamardHighbdTest : public HadamardTestBase<tran_low_t, HadamardFunc> {
+ protected:
+  HadamardHighbdTest() : HadamardTestBase(GetParam(), /*do_shift=*/true) {}
+  // Use values between -4095 (0xF001) and 4095 (0x0FFF)
+  int16_t Rand() override {
+    int16_t src = rnd_.Rand12();
+    int16_t pred = rnd_.Rand12();
+    return src - pred;
+  }
+};
+
+TEST_P(HadamardHighbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
+
+TEST_P(HadamardHighbdTest, VaryStride) { VaryStride(); }
+
+TEST_P(HadamardHighbdTest, DISABLED_Speed) {
+  SpeedTest(10);
+  SpeedTest(10000);
+  SpeedTest(10000000);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    C, HadamardHighbdTest,
+    ::testing::Values(
+        HadamardFuncWithSize(&aom_highbd_hadamard_8x8_c, 8, 8),
+        HadamardFuncWithSize(&aom_highbd_hadamard_16x16_c, 16, 16),
+        HadamardFuncWithSize(&aom_highbd_hadamard_32x32_c, 32, 32)));
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, HadamardHighbdTest,
+    ::testing::Values(
+        HadamardFuncWithSize(&aom_highbd_hadamard_8x8_avx2, 8, 8),
+        HadamardFuncWithSize(&aom_highbd_hadamard_16x16_avx2, 16, 16),
+        HadamardFuncWithSize(&aom_highbd_hadamard_32x32_avx2, 32, 32)));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, HadamardHighbdTest,
+    ::testing::Values(
+        HadamardFuncWithSize(&aom_highbd_hadamard_8x8_neon, 8, 8),
+        HadamardFuncWithSize(&aom_highbd_hadamard_16x16_neon, 16, 16),
+        HadamardFuncWithSize(&aom_highbd_hadamard_32x32_neon, 32, 32)));
+#endif  // HAVE_NEON
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+// Tests for low precision
+class HadamardLowbdLPTest : public HadamardTestBase<int16_t, HadamardLPFunc> {
+ public:
+  HadamardLowbdLPTest() : HadamardTestBase(GetParam(), /*do_shift=*/false) {}
+  // Use values between -255 (0xFF01) and 255 (0x00FF)
+  int16_t Rand() override {
+    int16_t src = rnd_.Rand8();
+    int16_t pred = rnd_.Rand8();
+    return src - pred;
+  }
+};
+
+TEST_P(HadamardLowbdLPTest, CompareReferenceRandom) {
+  CompareReferenceRandom();
+}
+
+TEST_P(HadamardLowbdLPTest, VaryStride) { VaryStride(); }
+
+TEST_P(HadamardLowbdLPTest, DISABLED_SpeedTest) { SpeedTest(1000000); }
+
+INSTANTIATE_TEST_SUITE_P(
+    C, HadamardLowbdLPTest,
+    ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_c, 8, 8),
+                      HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_c, 16,
+                                             16)));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, HadamardLowbdLPTest,
+    ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_sse2, 8, 8),
+                      HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_sse2, 16,
+                                             16)));
+#endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, HadamardLowbdLPTest,
+                         ::testing::Values(HadamardLPFuncWithSize(
+                             &aom_hadamard_lp_16x16_avx2, 16, 16)));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, HadamardLowbdLPTest,
+    ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_neon, 8, 8),
+                      HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_neon, 16,
+                                             16)));
+#endif  // HAVE_NEON
+
+// Tests for 8x8 dual low precision
+class HadamardLowbdLP8x8DualTest
+    : public HadamardTestBase<int16_t, HadamardLP8x8DualFunc> {
+ public:
+  HadamardLowbdLP8x8DualTest()
+      : HadamardTestBase(GetParam(), /*do_shift=*/false) {}
+  // Use values between -255 (0xFF01) and 255 (0x00FF)
+  int16_t Rand() override {
+    int16_t src = rnd_.Rand8();
+    int16_t pred = rnd_.Rand8();
+    return src - pred;
+  }
+};
+
+TEST_P(HadamardLowbdLP8x8DualTest, CompareReferenceRandom) {
+  CompareReferenceRandom();
+}
+
+TEST_P(HadamardLowbdLP8x8DualTest, VaryStride) { VaryStride(); }
+
+TEST_P(HadamardLowbdLP8x8DualTest, DISABLED_SpeedTest) { SpeedTest(1000000); }
+
+INSTANTIATE_TEST_SUITE_P(C, HadamardLowbdLP8x8DualTest,
+                         ::testing::Values(HadamardLP8x8DualFuncWithSize(
+                             &aom_hadamard_lp_8x8_dual_c, 8, 16)));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, HadamardLowbdLP8x8DualTest,
+                         ::testing::Values(HadamardLP8x8DualFuncWithSize(
+                             &aom_hadamard_lp_8x8_dual_sse2, 8, 16)));
+#endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, HadamardLowbdLP8x8DualTest,
+                         ::testing::Values(HadamardLP8x8DualFuncWithSize(
+                             &aom_hadamard_lp_8x8_dual_avx2, 8, 16)));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, HadamardLowbdLP8x8DualTest,
+                         ::testing::Values(HadamardLP8x8DualFuncWithSize(
+                             &aom_hadamard_lp_8x8_dual_neon, 8, 16)));
+#endif  // HAVE_NEON
+
+}  // namespace
diff --git a/third_party/aom/test/hash_test.cc b/third_party/aom/test/hash_test.cc
new file mode 100644
index 0000000000..a1de9323db
--- /dev/null
+++ b/third_party/aom/test/hash_test.cc
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdlib>
+#include <new>
+#include <tuple>
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "av1/encoder/hash.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+typedef uint32_t (*get_crc32c_value_func)(void *calculator, uint8_t *p,
+                                          size_t length);
+
+typedef std::tuple<get_crc32c_value_func, int> HashParam;
+
+class AV1Crc32cHashTest : public ::testing::TestWithParam<HashParam> {
+ public:
+  ~AV1Crc32cHashTest() override;
+  void SetUp() override;
+
+  void TearDown() override;
+
+ protected:
+  void RunCheckOutput(get_crc32c_value_func test_impl);
+  void RunSpeedTest(get_crc32c_value_func test_impl);
+
+  void RunZeroTest(get_crc32c_value_func test_impl);
+
+  libaom_test::ACMRandom rnd_;
+  CRC32C calc_;
+  uint8_t *buffer_;
+  int bsize_;
+  size_t length_;
+};
+
+AV1Crc32cHashTest::~AV1Crc32cHashTest() = default;
+
+void AV1Crc32cHashTest::SetUp() {
+  rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
+  av1_crc32c_calculator_init(&calc_);
+
+  bsize_ = GET_PARAM(1);
+  length_ = bsize_ * bsize_ * sizeof(uint16_t);
+  buffer_ = new uint8_t[length_];
+  ASSERT_NE(buffer_, nullptr);
+  for (size_t i = 0; i < length_; ++i) {
+    buffer_[i] = rnd_.Rand8();
+  }
+}
+
+void AV1Crc32cHashTest::TearDown() { delete[] buffer_; }
+
+void AV1Crc32cHashTest::RunCheckOutput(get_crc32c_value_func test_impl) {
+  get_crc32c_value_func ref_impl = av1_get_crc32c_value_c;
+  // for the same buffer crc should be the same
+  uint32_t crc0 = test_impl(&calc_, buffer_, length_);
+  uint32_t crc1 = test_impl(&calc_, buffer_, length_);
+  uint32_t crc2 = ref_impl(&calc_, buffer_, length_);
+  ASSERT_EQ(crc0, crc1);
+  ASSERT_EQ(crc0, crc2);  // should equal to software version
+  // modify buffer
+  buffer_[0] += 1;
+  uint32_t crc3 = test_impl(&calc_, buffer_, length_);
+  uint32_t crc4 = ref_impl(&calc_, buffer_, length_);
+  ASSERT_NE(crc0, crc3);  // crc shoud not equal to previous one
+  ASSERT_EQ(crc3, crc4);
+}
+
+void AV1Crc32cHashTest::RunSpeedTest(get_crc32c_value_func test_impl) {
+  get_crc32c_value_func impls[] = { av1_get_crc32c_value_c, test_impl };
+  const int repeat = 10000000 / (bsize_ + bsize_);
+
+  aom_usec_timer timer;
+  double time[2];
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer_start(&timer);
+    for (int j = 0; j < repeat; ++j) {
+      impls[i](&calc_, buffer_, length_);
+    }
+    aom_usec_timer_mark(&timer);
+    time[i] = static_cast<double>(aom_usec_timer_elapsed(&timer));
+  }
+  printf("hash %3dx%-3d:%7.2f/%7.2fus", bsize_, bsize_, time[0], time[1]);
+  printf("(%3.2f)\n", time[0] / time[1]);
+}
+
+void AV1Crc32cHashTest::RunZeroTest(get_crc32c_value_func test_impl) {
+  uint8_t buffer0[1024] = { 0 };
+  // for buffer with different size the crc should not be the same
+  const uint32_t crc0 = test_impl(&calc_, buffer0, 32);
+  const uint32_t crc1 = test_impl(&calc_, buffer0, 128);
+  const uint32_t crc2 = test_impl(&calc_, buffer0, 1024);
+  ASSERT_NE(crc0, crc1);
+  ASSERT_NE(crc0, crc2);
+  ASSERT_NE(crc1, crc2);
+}
+
+TEST_P(AV1Crc32cHashTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
+
+TEST_P(AV1Crc32cHashTest, CheckZero) { RunZeroTest(GET_PARAM(0)); }
+
+TEST_P(AV1Crc32cHashTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
+
+const int kValidBlockSize[] = { 64, 32, 8, 4 };
+
+INSTANTIATE_TEST_SUITE_P(
+    C, AV1Crc32cHashTest,
+    ::testing::Combine(::testing::Values(&av1_get_crc32c_value_c),
+                       ::testing::ValuesIn(kValidBlockSize)));
+
+#if HAVE_SSE4_2
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_2, AV1Crc32cHashTest,
+    ::testing::Combine(::testing::Values(&av1_get_crc32c_value_sse4_2),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+#if HAVE_ARM_CRC32
+INSTANTIATE_TEST_SUITE_P(
+    ARM_CRC32, AV1Crc32cHashTest,
+    ::testing::Combine(::testing::Values(&av1_get_crc32c_value_arm_crc32),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/hbd_metrics_test.cc b/third_party/aom/test/hbd_metrics_test.cc
new file mode 100644
index 0000000000..303d580c4a
--- /dev/null
+++ b/third_party/aom/test/hbd_metrics_test.cc
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <new>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+
+#include "config/aom_config.h"
+
+#include "aom_dsp/psnr.h"
+#include "aom_dsp/ssim.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/msvc.h"
+#include "aom_scale/yv12config.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+typedef double (*LBDMetricFunc)(const YV12_BUFFER_CONFIG *source,
+                                const YV12_BUFFER_CONFIG *dest);
+typedef double (*HBDMetricFunc)(const YV12_BUFFER_CONFIG *source,
+                                const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
+                                uint32_t bd);
+
+double compute_hbd_psnr(const YV12_BUFFER_CONFIG *source,
+                        const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
+                        uint32_t bd) {
+  PSNR_STATS psnr;
+  aom_calc_highbd_psnr(source, dest, &psnr, bd, in_bd);
+  return psnr.psnr[0];
+}
+
+double compute_psnr(const YV12_BUFFER_CONFIG *source,
+                    const YV12_BUFFER_CONFIG *dest) {
+  PSNR_STATS psnr;
+  aom_calc_psnr(source, dest, &psnr);
+  return psnr.psnr[0];
+}
+
+double compute_hbd_psnrhvs(const YV12_BUFFER_CONFIG *source,
+                           const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
+                           uint32_t bd) {
+  double tempy, tempu, tempv;
+  return aom_psnrhvs(source, dest, &tempy, &tempu, &tempv, bd, in_bd);
+}
+
+double compute_psnrhvs(const YV12_BUFFER_CONFIG *source,
+                       const YV12_BUFFER_CONFIG *dest) {
+  double tempy, tempu, tempv;
+  return aom_psnrhvs(source, dest, &tempy, &tempu, &tempv, 8, 8);
+}
+
+double compute_hbd_fastssim(const YV12_BUFFER_CONFIG *source,
+                            const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
+                            uint32_t bd) {
+  double tempy, tempu, tempv;
+  return aom_calc_fastssim(source, dest, &tempy, &tempu, &tempv, bd, in_bd);
+}
+
+double compute_fastssim(const YV12_BUFFER_CONFIG *source,
+                        const YV12_BUFFER_CONFIG *dest) {
+  double tempy, tempu, tempv;
+  return aom_calc_fastssim(source, dest, &tempy, &tempu, &tempv, 8, 8);
+}
+
+double compute_hbd_aomssim(const YV12_BUFFER_CONFIG *source,
+                           const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
+                           uint32_t bd) {
+  double ssim[2], weight[2];
+  aom_highbd_calc_ssim(source, dest, weight, bd, in_bd, ssim);
+  return 100 * pow(ssim[0] / weight[0], 8.0);
+}
+
+double compute_aomssim(const YV12_BUFFER_CONFIG *source,
+                       const YV12_BUFFER_CONFIG *dest) {
+  double ssim, weight;
+  aom_lowbd_calc_ssim(source, dest, &weight, &ssim);
+  return 100 * pow(ssim / weight, 8.0);
+}
+
+class HBDMetricsTestBase {
+ public:
+  virtual ~HBDMetricsTestBase() = default;
+
+ protected:
+  void RunAccuracyCheck() {
+    const int width = 1920;
+    const int height = 1080;
+    size_t i = 0;
+    const uint8_t kPixFiller = 128;
+    YV12_BUFFER_CONFIG lbd_src, lbd_dst;
+    YV12_BUFFER_CONFIG hbd_src, hbd_dst;
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    double lbd_db, hbd_db;
+
+    memset(&lbd_src, 0, sizeof(lbd_src));
+    memset(&lbd_dst, 0, sizeof(lbd_dst));
+    memset(&hbd_src, 0, sizeof(hbd_src));
+    memset(&hbd_dst, 0, sizeof(hbd_dst));
+
+    aom_alloc_frame_buffer(&lbd_src, width, height, 1, 1, 0, 32, 16, 0, 0);
+    aom_alloc_frame_buffer(&lbd_dst, width, height, 1, 1, 0, 32, 16, 0, 0);
+    aom_alloc_frame_buffer(&hbd_src, width, height, 1, 1, 1, 32, 16, 0, 0);
+    aom_alloc_frame_buffer(&hbd_dst, width, height, 1, 1, 1, 32, 16, 0, 0);
+
+    memset(lbd_src.buffer_alloc, kPixFiller, lbd_src.buffer_alloc_sz);
+    while (i < lbd_src.buffer_alloc_sz) {
+      uint16_t spel, dpel;
+      spel = lbd_src.buffer_alloc[i];
+      // Create some distortion for dst buffer.
+      dpel = rnd.Rand8();
+      lbd_dst.buffer_alloc[i] = (uint8_t)dpel;
+      ((uint16_t *)(hbd_src.buffer_alloc))[i] = spel << (bit_depth_ - 8);
+      ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8);
+      i++;
+    }
+
+    lbd_db = lbd_metric_(&lbd_src, &lbd_dst);
+    hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_);
+    EXPECT_LE(fabs(lbd_db - hbd_db), threshold_);
+
+    i = 0;
+    while (i < lbd_src.buffer_alloc_sz) {
+      uint16_t dpel;
+      // Create some small distortion for dst buffer.
+      dpel = 120 + (rnd.Rand8() >> 4);
+      lbd_dst.buffer_alloc[i] = (uint8_t)dpel;
+      ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8);
+      i++;
+    }
+
+    lbd_db = lbd_metric_(&lbd_src, &lbd_dst);
+    hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_);
+    EXPECT_LE(fabs(lbd_db - hbd_db), threshold_);
+
+    i = 0;
+    while (i < lbd_src.buffer_alloc_sz) {
+      uint16_t dpel;
+      // Create some small distortion for dst buffer.
+      dpel = 126 + (rnd.Rand8() >> 6);
+      lbd_dst.buffer_alloc[i] = (uint8_t)dpel;
+      ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8);
+      i++;
+    }
+
+    lbd_db = lbd_metric_(&lbd_src, &lbd_dst);
+    hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_);
+    EXPECT_LE(fabs(lbd_db - hbd_db), threshold_);
+
+    aom_free_frame_buffer(&lbd_src);
+    aom_free_frame_buffer(&lbd_dst);
+    aom_free_frame_buffer(&hbd_src);
+    aom_free_frame_buffer(&hbd_dst);
+  }
+
+  int input_bit_depth_;
+  int bit_depth_;
+  double threshold_;
+  LBDMetricFunc lbd_metric_;
+  HBDMetricFunc hbd_metric_;
+};
+
+typedef std::tuple<LBDMetricFunc, HBDMetricFunc, int, int, double>
+    MetricTestTParam;
+class HBDMetricsTest : public HBDMetricsTestBase,
+                       public ::testing::TestWithParam<MetricTestTParam> {
+ public:
+  void SetUp() override {
+    lbd_metric_ = GET_PARAM(0);
+    hbd_metric_ = GET_PARAM(1);
+    input_bit_depth_ = GET_PARAM(2);
+    bit_depth_ = GET_PARAM(3);
+    threshold_ = GET_PARAM(4);
+  }
+};
+
+TEST_P(HBDMetricsTest, RunAccuracyCheck) { RunAccuracyCheck(); }
+
+// Allow small variation due to floating point operations.
+static const double kSsim_thresh = 0.001;
+// Allow some additional errors accumulated in floating point operations.
+static const double kFSsim_thresh = 0.03;
+// Allow some extra variation due to rounding error accumulated in dct.
+static const double kPhvs_thresh = 0.3;
+
+INSTANTIATE_TEST_SUITE_P(
+    AOMSSIM, HBDMetricsTest,
+    ::testing::Values(MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim,
+                                       8, 10, kSsim_thresh),
+                      MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim,
+                                       10, 10, kPhvs_thresh),
+                      MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim,
+                                       8, 12, kSsim_thresh),
+                      MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim,
+                                       12, 12, kPhvs_thresh)));
+INSTANTIATE_TEST_SUITE_P(
+    FASTSSIM, HBDMetricsTest,
+    ::testing::Values(MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim,
+                                       8, 10, kFSsim_thresh),
+                      MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim,
+                                       10, 10, kFSsim_thresh),
+                      MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim,
+                                       8, 12, kFSsim_thresh),
+                      MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim,
+                                       12, 12, kFSsim_thresh)));
+INSTANTIATE_TEST_SUITE_P(
+    PSNRHVS, HBDMetricsTest,
+    ::testing::Values(MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs,
+                                       8, 10, kPhvs_thresh),
+                      MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs,
+                                       10, 10, kPhvs_thresh),
+                      MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs,
+                                       8, 12, kPhvs_thresh),
+                      MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs,
+                                       12, 12, kPhvs_thresh)));
+INSTANTIATE_TEST_SUITE_P(
+    PSNR, HBDMetricsTest,
+    ::testing::Values(
+        MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 8, 10, kPhvs_thresh),
+        MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 10, 10,
+                         kPhvs_thresh),
+        MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 8, 12, kPhvs_thresh),
+        MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 12, 12,
+                         kPhvs_thresh)));
+}  // namespace
diff --git a/third_party/aom/test/hiprec_convolve_test.cc b/third_party/aom/test/hiprec_convolve_test.cc
new file mode 100644
index 0000000000..78883ccddf
--- /dev/null
+++ b/third_party/aom/test/hiprec_convolve_test.cc
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/hiprec_convolve_test_util.h"
+
+using libaom_test::ACMRandom;
+#if CONFIG_AV1_HIGHBITDEPTH
+using libaom_test::AV1HighbdHiprecConvolve::AV1HighbdHiprecConvolveTest;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdHiprecConvolveTest);
+#endif
+using libaom_test::AV1HiprecConvolve::AV1HiprecConvolveTest;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HiprecConvolveTest);
+using std::make_tuple;
+using std::tuple;
+
+namespace {
+
+TEST_P(AV1HiprecConvolveTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
+TEST_P(AV1HiprecConvolveTest, DISABLED_SpeedTest) {
+  RunSpeedTest(GET_PARAM(3));
+}
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, AV1HiprecConvolveTest,
+                         libaom_test::AV1HiprecConvolve::BuildParams(
+                             av1_wiener_convolve_add_src_sse2));
+#endif
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1HiprecConvolveTest,
+                         libaom_test::AV1HiprecConvolve::BuildParams(
+                             av1_wiener_convolve_add_src_avx2));
+#endif
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1HiprecConvolveTest,
+                         libaom_test::AV1HiprecConvolve::BuildParams(
+                             av1_wiener_convolve_add_src_neon));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+#if HAVE_SSSE3 || HAVE_AVX2 || HAVE_NEON
+TEST_P(AV1HighbdHiprecConvolveTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(4));
+}
+TEST_P(AV1HighbdHiprecConvolveTest, DISABLED_SpeedTest) {
+  RunSpeedTest(GET_PARAM(4));
+}
+#if HAVE_SSSE3
+INSTANTIATE_TEST_SUITE_P(SSSE3, AV1HighbdHiprecConvolveTest,
+                         libaom_test::AV1HighbdHiprecConvolve::BuildParams(
+                             av1_highbd_wiener_convolve_add_src_ssse3));
+#endif
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, AV1HighbdHiprecConvolveTest,
+                         libaom_test::AV1HighbdHiprecConvolve::BuildParams(
+                             av1_highbd_wiener_convolve_add_src_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AV1HighbdHiprecConvolveTest,
+                         libaom_test::AV1HighbdHiprecConvolve::BuildParams(
+                             av1_highbd_wiener_convolve_add_src_neon));
+#endif
+#endif
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+}  // namespace
diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc
new file mode 100644
index 0000000000..6d7902fd04
--- /dev/null
+++ b/third_party/aom/test/hiprec_convolve_test_util.cc
@@ -0,0 +1,380 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "test/hiprec_convolve_test_util.h"
+
+#include <memory>
+#include <new>
+
+#include "av1/common/restoration.h"
+
+using std::make_tuple;
+using std::tuple;
+
+namespace libaom_test {
+
+// Generate a random pair of filter kernels, using the ranges
+// of possible values from the loop-restoration experiment
+static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel,
+                             InterpKernel vkernel, int kernel_type = 2) {
+  if (kernel_type == 0) {
+    // Low possible values for filter coefficients, 7-tap kernel
+    hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MINV;
+    hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV;
+    hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV;
+    hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
+    hkernel[7] = vkernel[7] = 0;
+  } else if (kernel_type == 1) {
+    // Max possible values for filter coefficients, 7-tap kernel
+    hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MAXV;
+    hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV;
+    hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV;
+    hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
+    hkernel[7] = vkernel[7] = 0;
+  } else if (kernel_type == 2) {
+    // Randomly generated values for filter coefficients, 7-tap kernel
+    hkernel[0] = hkernel[6] =
+        WIENER_FILT_TAP0_MINV +
+        rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
+    hkernel[1] = hkernel[5] =
+        WIENER_FILT_TAP1_MINV +
+        rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
+    hkernel[2] = hkernel[4] =
+        WIENER_FILT_TAP2_MINV +
+        rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
+    hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
+    hkernel[7] = 0;
+
+    vkernel[0] = vkernel[6] =
+        WIENER_FILT_TAP0_MINV +
+        rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 2 - WIENER_FILT_TAP0_MINV);
+    vkernel[1] = vkernel[5] =
+        WIENER_FILT_TAP1_MINV +
+        rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV);
+    vkernel[2] = vkernel[4] =
+        WIENER_FILT_TAP2_MINV +
+        rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV);
+    vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]);
+    vkernel[7] = 0;
+  } else if (kernel_type == 3) {
+    // Low possible values for filter coefficients, 5-tap kernel
+    hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = 0;
+    hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV;
+    hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV;
+    hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
+    hkernel[7] = vkernel[7] = 0;
+  } else if (kernel_type == 4) {
+    // Max possible values for filter coefficients, 5-tap kernel
+    hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = 0;
+    hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV;
+    hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV;
+    hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
+    hkernel[7] = vkernel[7] = 0;
+  } else {
+    // Randomly generated values for filter coefficients, 5-tap kernel
+    hkernel[0] = hkernel[6] = 0;
+    hkernel[1] = hkernel[5] =
+        WIENER_FILT_TAP1_MINV +
+        rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
+    hkernel[2] = hkernel[4] =
+        WIENER_FILT_TAP2_MINV +
+        rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
+    hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
+    hkernel[7] = 0;
+
+    vkernel[0] = vkernel[6] = 0;
+    vkernel[1] = vkernel[5] =
+        WIENER_FILT_TAP1_MINV +
+        rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV);
+    vkernel[2] = vkernel[4] =
+        WIENER_FILT_TAP2_MINV +
+        rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV);
+    vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]);
+    vkernel[7] = 0;
+  }
+}
+
+namespace AV1HiprecConvolve {
+
+::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
+    hiprec_convolve_func filter) {
+  const HiprecConvolveParam params[] = {
+    make_tuple(8, 8, 50000, filter),   make_tuple(8, 4, 50000, filter),
+    make_tuple(64, 24, 1000, filter),  make_tuple(64, 64, 1000, filter),
+    make_tuple(64, 56, 1000, filter),  make_tuple(32, 8, 10000, filter),
+    make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter),
+    make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter),
+    make_tuple(64, 34, 1000, filter),  make_tuple(8, 17, 10000, filter),
+    make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter)
+  };
+  return ::testing::ValuesIn(params);
+}
+
+AV1HiprecConvolveTest::~AV1HiprecConvolveTest() = default;
+void AV1HiprecConvolveTest::SetUp() {
+  rnd_.Reset(ACMRandom::DeterministicSeed());
+}
+
+void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
+  const int w = 128, h = 128;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2);
+  int i, j, k, m;
+  const WienerConvolveParams conv_params = get_conv_params_wiener(8);
+
+  std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
+  ASSERT_NE(input_, nullptr);
+  uint8_t *input = input_.get();
+
+  // The AVX2 convolve functions always write rows with widths that are
+  // multiples of 16. So to avoid a buffer overflow, we may need to pad
+  // rows to a multiple of 16.
+  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
+  std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
+  ASSERT_NE(output, nullptr);
+  std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
+  ASSERT_NE(output2, nullptr);
+
+  // Generate random filter kernels
+  DECLARE_ALIGNED(16, InterpKernel, hkernel);
+  DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+  for (int kernel_type = 0; kernel_type < 6; kernel_type++) {
+    generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
+    for (i = 0; i < num_iters; ++i) {
+      for (k = 0; k < h; ++k)
+        for (m = 0; m < w; ++m) input[k * w + m] = rnd_.Rand8();
+      // Choose random locations within the source block
+      int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
+      int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
+      av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w,
+                                    output.get(), out_w, hkernel, 16, vkernel,
+                                    16, out_w, out_h, &conv_params);
+      test_impl(input + offset_r * w + offset_c, w, output2.get(), out_w,
+                hkernel, 16, vkernel, 16, out_w, out_h, &conv_params);
+
+      for (j = 0; j < out_w * out_h; ++j)
+        ASSERT_EQ(output[j], output2[j])
+            << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
+            << (j / out_w) << ") on iteration " << i;
+    }
+  }
+}
+
+void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
+  const int w = 128, h = 128;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2) / 500;
+  int i, j, k;
+  const WienerConvolveParams conv_params = get_conv_params_wiener(8);
+
+  std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
+  ASSERT_NE(input_, nullptr);
+  uint8_t *input = input_.get();
+
+  // The AVX2 convolve functions always write rows with widths that are
+  // multiples of 16. So to avoid a buffer overflow, we may need to pad
+  // rows to a multiple of 16.
+  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
+  std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
+  ASSERT_NE(output, nullptr);
+  std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
+  ASSERT_NE(output2, nullptr);
+
+  // Generate random filter kernels
+  DECLARE_ALIGNED(16, InterpKernel, hkernel);
+  DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+  generate_kernels(&rnd_, hkernel, vkernel);
+
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
+
+  aom_usec_timer ref_timer;
+  aom_usec_timer_start(&ref_timer);
+  for (i = 0; i < num_iters; ++i) {
+    for (j = 3; j < h - out_h - 4; j++) {
+      for (k = 3; k < w - out_w - 4; k++) {
+        av1_wiener_convolve_add_src_c(input + j * w + k, w, output.get(), out_w,
+                                      hkernel, 16, vkernel, 16, out_w, out_h,
+                                      &conv_params);
+      }
+    }
+  }
+  aom_usec_timer_mark(&ref_timer);
+  const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer tst_timer;
+  aom_usec_timer_start(&tst_timer);
+  for (i = 0; i < num_iters; ++i) {
+    for (j = 3; j < h - out_h - 4; j++) {
+      for (k = 3; k < w - out_w - 4; k++) {
+        test_impl(input + j * w + k, w, output2.get(), out_w, hkernel, 16,
+                  vkernel, 16, out_w, out_h, &conv_params);
+      }
+    }
+  }
+  aom_usec_timer_mark(&tst_timer);
+  const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+  std::cout << "[          ] C time = " << ref_time / 1000
+            << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+  EXPECT_GT(ref_time, tst_time)
+      << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
+      << "C time: " << ref_time << " us\n"
+      << "SIMD time: " << tst_time << " us\n";
+}
+}  // namespace AV1HiprecConvolve
+
+#if CONFIG_AV1_HIGHBITDEPTH
+namespace AV1HighbdHiprecConvolve {
+
+::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
+    highbd_hiprec_convolve_func filter) {
+  const HighbdHiprecConvolveParam params[] = {
+    make_tuple(8, 8, 50000, 8, filter),   make_tuple(64, 64, 1000, 8, filter),
+    make_tuple(32, 8, 10000, 8, filter),  make_tuple(8, 8, 50000, 10, filter),
+    make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter),
+    make_tuple(8, 8, 50000, 12, filter),  make_tuple(64, 64, 1000, 12, filter),
+    make_tuple(32, 8, 10000, 12, filter),
+  };
+  return ::testing::ValuesIn(params);
+}
+
+AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() = default;
+void AV1HighbdHiprecConvolveTest::SetUp() {
+  rnd_.Reset(ACMRandom::DeterministicSeed());
+}
+
+void AV1HighbdHiprecConvolveTest::RunCheckOutput(
+    highbd_hiprec_convolve_func test_impl) {
+  const int w = 128, h = 128;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2);
+  const int bd = GET_PARAM(3);
+  int i, j;
+  const WienerConvolveParams conv_params = get_conv_params_wiener(bd);
+
+  std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
+  ASSERT_NE(input, nullptr);
+
+  // The AVX2 convolve functions always write rows with widths that are
+  // multiples of 16. So to avoid a buffer overflow, we may need to pad
+  // rows to a multiple of 16.
+  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
+  std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
+  ASSERT_NE(output, nullptr);
+  std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
+  ASSERT_NE(output2, nullptr);
+
+  // Generate random filter kernels
+  DECLARE_ALIGNED(16, InterpKernel, hkernel);
+  DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+
+  uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
+  uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
+  uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
+  for (int kernel_type = 0; kernel_type < 6; kernel_type++) {
+    generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
+    for (i = 0; i < num_iters; ++i) {
+      // Choose random locations within the source block
+      int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
+      int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
+      av1_highbd_wiener_convolve_add_src_c(
+          input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel,
+          16, vkernel, 16, out_w, out_h, &conv_params, bd);
+      test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
+                hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd);
+
+      for (j = 0; j < out_w * out_h; ++j)
+        ASSERT_EQ(output[j], output2[j])
+            << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
+            << (j / out_w) << ") on iteration " << i;
+    }
+  }
+}
+
+void AV1HighbdHiprecConvolveTest::RunSpeedTest(
+    highbd_hiprec_convolve_func test_impl) {
+  const int w = 128, h = 128;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2) / 500;
+  const int bd = GET_PARAM(3);
+  int i, j, k;
+  const WienerConvolveParams conv_params = get_conv_params_wiener(bd);
+
+  std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
+  ASSERT_NE(input, nullptr);
+
+  // The AVX2 convolve functions always write rows with widths that are
+  // multiples of 16. So to avoid a buffer overflow, we may need to pad
+  // rows to a multiple of 16.
+  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
+  std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
+  ASSERT_NE(output, nullptr);
+  std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
+  ASSERT_NE(output2, nullptr);
+
+  // Generate random filter kernels
+  DECLARE_ALIGNED(16, InterpKernel, hkernel);
+  DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+  generate_kernels(&rnd_, hkernel, vkernel);
+
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+
+  uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
+  uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
+  uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
+
+  aom_usec_timer ref_timer;
+  aom_usec_timer_start(&ref_timer);
+  for (i = 0; i < num_iters; ++i) {
+    for (j = 3; j < h - out_h - 4; j++) {
+      for (k = 3; k < w - out_w - 4; k++) {
+        av1_highbd_wiener_convolve_add_src_c(
+            input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel,
+            16, out_w, out_h, &conv_params, bd);
+      }
+    }
+  }
+  aom_usec_timer_mark(&ref_timer);
+  const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer tst_timer;
+  aom_usec_timer_start(&tst_timer);
+  for (i = 0; i < num_iters; ++i) {
+    for (j = 3; j < h - out_h - 4; j++) {
+      for (k = 3; k < w - out_w - 4; k++) {
+        test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16,
+                  vkernel, 16, out_w, out_h, &conv_params, bd);
+      }
+    }
+  }
+  aom_usec_timer_mark(&tst_timer);
+  const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+  std::cout << "[          ] C time = " << ref_time / 1000
+            << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+  EXPECT_GT(ref_time, tst_time)
+      << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
+      << "C time: " << ref_time << " us\n"
+      << "SIMD time: " << tst_time << " us\n";
+}
+}  // namespace AV1HighbdHiprecConvolve
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+}  // namespace libaom_test
diff --git a/third_party/aom/test/hiprec_convolve_test_util.h b/third_party/aom/test/hiprec_convolve_test_util.h
new file mode 100644
index 0000000000..beae5c729b
--- /dev/null
+++ b/third_party/aom/test/hiprec_convolve_test_util.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_TEST_HIPREC_CONVOLVE_TEST_UTIL_H_
+#define AOM_TEST_HIPREC_CONVOLVE_TEST_UTIL_H_
+
+#include <tuple>
+
+#include "config/av1_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "test/register_state_check.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "aom_ports/aom_timer.h"
+#include "av1/common/convolve.h"
+#include "av1/common/mv.h"
+
+namespace libaom_test {
+
+namespace AV1HiprecConvolve {
+
+typedef void (*hiprec_convolve_func)(const uint8_t *src, ptrdiff_t src_stride,
+                                     uint8_t *dst, ptrdiff_t dst_stride,
+                                     const int16_t *filter_x, int x_step_q4,
+                                     const int16_t *filter_y, int y_step_q4,
+                                     int w, int h,
+                                     const WienerConvolveParams *conv_params);
+
+typedef std::tuple<int, int, int, hiprec_convolve_func> HiprecConvolveParam;
+
+::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
+    hiprec_convolve_func filter);
+
+class AV1HiprecConvolveTest
+    : public ::testing::TestWithParam<HiprecConvolveParam> {
+ public:
+  ~AV1HiprecConvolveTest() override;
+  void SetUp() override;
+
+ protected:
+  void RunCheckOutput(hiprec_convolve_func test_impl);
+  void RunSpeedTest(hiprec_convolve_func test_impl);
+
+  libaom_test::ACMRandom rnd_;
+};
+
+}  // namespace AV1HiprecConvolve
+
+#if CONFIG_AV1_HIGHBITDEPTH
+namespace AV1HighbdHiprecConvolve {
+typedef void (*highbd_hiprec_convolve_func)(
+    const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
+    ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,
+    const int16_t *filter_y, int y_step_q4, int w, int h,
+    const WienerConvolveParams *conv_params, int bps);
+
+typedef std::tuple<int, int, int, int, highbd_hiprec_convolve_func>
+    HighbdHiprecConvolveParam;
+
+::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
+    highbd_hiprec_convolve_func filter);
+
+class AV1HighbdHiprecConvolveTest
+    : public ::testing::TestWithParam<HighbdHiprecConvolveParam> {
+ public:
+  ~AV1HighbdHiprecConvolveTest() override;
+  void SetUp() override;
+
+ protected:
+  void RunCheckOutput(highbd_hiprec_convolve_func test_impl);
+  void RunSpeedTest(highbd_hiprec_convolve_func test_impl);
+
+  libaom_test::ACMRandom rnd_;
+};
+
+}  // namespace AV1HighbdHiprecConvolve
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_HIPREC_CONVOLVE_TEST_UTIL_H_
diff --git a/third_party/aom/test/horver_correlation_test.cc b/third_party/aom/test/horver_correlation_test.cc
new file mode 100644
index 0000000000..5e397ffdf7
--- /dev/null
+++ b/third_party/aom/test/horver_correlation_test.cc
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_integer.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*HorverFunc)(const int16_t *diff, int stride, int w, int h,
+                           float *hcorr, float *vcorr);
+
+typedef std::tuple<const HorverFunc> HorverTestParam;
+
+class HorverTest : public ::testing::TestWithParam<HorverTestParam> {
+ public:
+  void SetUp() override {
+    data_buf_ = (int16_t *)aom_malloc(MAX_SB_SQUARE * sizeof(int16_t));
+    ASSERT_NE(data_buf_, nullptr);
+    target_func_ = GET_PARAM(0);
+  }
+  void TearDown() override { aom_free(data_buf_); }
+  void RunHorverTest();
+  void RunHorverTest_ExtremeValues();
+  void RunHorverSpeedTest(int run_times);
+
+ private:
+  HorverFunc target_func_;
+  ACMRandom rng_;
+  int16_t *data_buf_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HorverTest);
+
+void HorverTest::RunHorverTest() {
+  for (int block_size = 0; block_size < BLOCK_SIZES_ALL; block_size++) {
+    const int w = block_size_wide[block_size];
+    const int h = block_size_high[block_size];
+    for (int iter = 0; iter < 1000 && !HasFatalFailure(); ++iter) {
+      float hcorr_ref = 0.0, vcorr_ref = 0.0;
+      float hcorr_test = 0.0, vcorr_test = 0.0;
+
+      for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+        data_buf_[i] = (rng_.Rand16() % (1 << 12)) - (1 << 11);
+      }
+
+      av1_get_horver_correlation_full_c(data_buf_, MAX_SB_SIZE, w, h,
+                                        &hcorr_ref, &vcorr_ref);
+
+      target_func_(data_buf_, MAX_SB_SIZE, w, h, &hcorr_test, &vcorr_test);
+
+      ASSERT_LE(fabs(hcorr_ref - hcorr_test), 1e-6)
+          << "hcorr incorrect (" << w << "x" << h << ")";
+      ASSERT_LE(fabs(vcorr_ref - vcorr_test), 1e-6)
+          << "vcorr incorrect (" << w << "x" << h << ")";
+    }
+    //    printf("(%3dx%-3d) passed\n", w, h);
+  }
+}
+
+void HorverTest::RunHorverSpeedTest(int run_times) {
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    data_buf_[i] = rng_.Rand16() % (1 << 12);
+  }
+
+  for (int block_size = 0; block_size < BLOCK_SIZES_ALL; block_size++) {
+    const int w = block_size_wide[block_size];
+    const int h = block_size_high[block_size];
+    float hcorr_ref = 0.0, vcorr_ref = 0.0;
+    float hcorr_test = 0.0, vcorr_test = 0.0;
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      av1_get_horver_correlation_full_c(data_buf_, MAX_SB_SIZE, w, h,
+                                        &hcorr_ref, &vcorr_ref);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      target_func_(data_buf_, MAX_SB_SIZE, w, h, &hcorr_test, &vcorr_test);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    printf("%3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", w, h, time1, time2,
+           time1 / time2);
+  }
+}
+
+void HorverTest::RunHorverTest_ExtremeValues() {
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    // Most of get_horver_test is squaring and summing, so simply saturating
+    // the whole buffer is mostly likely to cause an overflow.
+    data_buf_[i] = (1 << 12) - 1;
+  }
+
+  for (int block_size = 0; block_size < BLOCK_SIZES_ALL; block_size++) {
+    const int w = block_size_wide[block_size];
+    const int h = block_size_high[block_size];
+    float hcorr_ref = 0.0, vcorr_ref = 0.0;
+    float hcorr_test = 0.0, vcorr_test = 0.0;
+
+    av1_get_horver_correlation_full_c(data_buf_, MAX_SB_SIZE, w, h, &hcorr_ref,
+                                      &vcorr_ref);
+    target_func_(data_buf_, MAX_SB_SIZE, w, h, &hcorr_test, &vcorr_test);
+
+    ASSERT_LE(fabs(hcorr_ref - hcorr_test), 1e-6) << "hcorr incorrect";
+    ASSERT_LE(fabs(vcorr_ref - vcorr_test), 1e-6) << "vcorr incorrect";
+  }
+}
+
+TEST_P(HorverTest, RandomValues) { RunHorverTest(); }
+
+TEST_P(HorverTest, ExtremeValues) { RunHorverTest_ExtremeValues(); }
+
+TEST_P(HorverTest, DISABLED_Speed) { RunHorverSpeedTest(100000); }
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, HorverTest,
+    ::testing::Values(av1_get_horver_correlation_full_sse4_1));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, HorverTest, ::testing::Values(av1_get_horver_correlation_full_neon));
+#endif  // HAVE_NEON
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, HorverTest, ::testing::Values(av1_get_horver_correlation_full_avx2));
+#endif  // HAVE_AVX2
+
+}  // namespace
diff --git a/third_party/aom/test/horz_superres_test.cc b/third_party/aom/test/horz_superres_test.cc
new file mode 100644
index 0000000000..595ed548c7
--- /dev/null
+++ b/third_party/aom/test/horz_superres_test.cc
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+#include <ostream>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "av1/encoder/encoder.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+
+using std::make_tuple;
+using std::tuple;
+
+/* TESTING PARAMETERS */
+
+const int kBitrate = 40;
+
+typedef struct {
+  const char *filename;
+  aom_img_fmt fmt;
+  aom_bit_depth_t bit_depth;
+  unsigned int profile;
+  unsigned int limit;
+  unsigned int screen_content;
+  double psnr_threshold;   // used by modes other than AOM_SUPERRES_AUTO
+  double psnr_threshold2;  // used by AOM_SUPERRES_AUTO
+} TestVideoParam;
+
+std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) {
+  return os << "TestVideoParam { filename:" << test_arg.filename
+            << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth
+            << " profile:" << test_arg.profile << " limit:" << test_arg.limit
+            << " screen_content:" << test_arg.screen_content
+            << " psnr_threshold:" << test_arg.psnr_threshold << " }";
+}
+
+const TestVideoParam kTestVideoVectors[] = {
+  { "park_joy_90p_8_420.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 5, 0, 25.3,
+    44.7 },
+#if CONFIG_AV1_HIGHBITDEPTH
+  { "park_joy_90p_10_444.y4m", AOM_IMG_FMT_I44416, AOM_BITS_10, 1, 5, 0, 27.0,
+    46.8 },
+#endif
+  { "screendata.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 4, 1, 23.0, 52.5 },
+  // Image coding (single frame).
+  { "niklas_1280_720_30.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 1, 0, 32.0,
+    49.0 },
+};
+
+// Modes with extra params have their own tests.
+const aom_superres_mode kSuperresModesWithoutParams[] = { AOM_SUPERRES_RANDOM,
+                                                          AOM_SUPERRES_AUTO };
+
+// Superres denominators and superres kf denominators to be tested
+typedef tuple<int, int> SuperresDenominatorPair;
+const SuperresDenominatorPair kSuperresDenominators[] = {
+  make_tuple(16, 9),  make_tuple(13, 11), make_tuple(9, 9),
+  make_tuple(13, 13), make_tuple(11, 16), make_tuple(8, 16),
+  make_tuple(16, 8),  make_tuple(8, 8),   make_tuple(9, 14),
+};
+
+// Superres q thresholds and superres kf q thresholds to be tested
+typedef tuple<int, int> SuperresQThresholdPair;
+const SuperresQThresholdPair kSuperresQThresholds[] = {
+  make_tuple(63, 63), make_tuple(63, 41), make_tuple(17, 63),
+  make_tuple(41, 11), make_tuple(1, 37),  make_tuple(11, 11),
+  make_tuple(1, 1),   make_tuple(17, 29), make_tuple(29, 11),
+};
+
+/* END (TESTING PARAMETERS) */
+
+// Test parameter list:
+//  <[needed for EncoderTest], test_video_param_, superres_mode_>
+typedef tuple<const libaom_test::CodecFactory *, TestVideoParam,
+              aom_superres_mode>
+    HorzSuperresTestParam;
+
+class HorzSuperresEndToEndTest
+    : public ::testing::TestWithParam<HorzSuperresTestParam>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  HorzSuperresEndToEndTest()
+      : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)),
+        superres_mode_(GET_PARAM(2)), psnr_(0.0), frame_count_(0) {}
+
+  ~HorzSuperresEndToEndTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(::libaom_test::kTwoPassGood);
+    cfg_.g_lag_in_frames = 5;
+    cfg_.rc_end_usage = AOM_Q;
+    cfg_.rc_target_bitrate = kBitrate;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_profile = test_video_param_.profile;
+    cfg_.g_input_bit_depth = (unsigned int)test_video_param_.bit_depth;
+    cfg_.g_bit_depth = test_video_param_.bit_depth;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+    // Set superres parameters
+    cfg_.rc_superres_mode = superres_mode_;
+  }
+
+  void BeginPassHook(unsigned int) override {
+    psnr_ = 0.0;
+    frame_count_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    frame_count_++;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
+
+      // Set cpu-used = 8 for speed
+      encoder->Control(AOME_SET_CPUUSED, 8);
+
+      // Test screen coding tools
+      if (test_video_param_.screen_content)
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
+      else
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
+
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (frame_count_) return psnr_ / frame_count_;
+    return 0.0;
+  }
+
+  void DoTest() {
+    std::unique_ptr<libaom_test::VideoSource> video;
+    video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                test_video_param_.limit));
+    ASSERT_NE(video, nullptr);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    const double psnr_thresh = (superres_mode_ == AOM_SUPERRES_AUTO)
+                                   ? test_video_param_.psnr_threshold2
+                                   : test_video_param_.psnr_threshold;
+    const double psnr = GetAveragePsnr();
+    EXPECT_GT(psnr, psnr_thresh);
+
+    EXPECT_EQ(test_video_param_.limit, frame_count_);
+  }
+
+  TestVideoParam test_video_param_;
+  aom_superres_mode superres_mode_;
+
+ private:
+  double psnr_;
+  unsigned int frame_count_;
+};
+
+TEST_P(HorzSuperresEndToEndTest, HorzSuperresEndToEndPSNRTest) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_SUITE(HorzSuperresEndToEndTest,
+                           ::testing::ValuesIn(kTestVideoVectors),
+                           ::testing::ValuesIn(kSuperresModesWithoutParams));
+
+// Test parameter list:
+//  <[needed for EncoderTest], test_video_param_, tuple(superres_denom_,
+//  superres_kf_denom_)>
+typedef tuple<const libaom_test::CodecFactory *, TestVideoParam,
+              SuperresDenominatorPair>
+    HorzSuperresFixedTestParam;
+
+class HorzSuperresFixedEndToEndTest
+    : public ::testing::TestWithParam<HorzSuperresFixedTestParam>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  HorzSuperresFixedEndToEndTest()
+      : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)),
+        superres_mode_(AOM_SUPERRES_FIXED), psnr_(0.0), frame_count_(0) {
+    SuperresDenominatorPair denoms = GET_PARAM(2);
+    superres_denom_ = std::get<0>(denoms);
+    superres_kf_denom_ = std::get<1>(denoms);
+  }
+
+  ~HorzSuperresFixedEndToEndTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(::libaom_test::kTwoPassGood);
+    cfg_.g_lag_in_frames = 5;
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.rc_target_bitrate = kBitrate;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_profile = test_video_param_.profile;
+    cfg_.g_input_bit_depth = (unsigned int)test_video_param_.bit_depth;
+    cfg_.g_bit_depth = test_video_param_.bit_depth;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+    // Set superres parameters
+    cfg_.rc_superres_mode = superres_mode_;
+    cfg_.rc_superres_denominator = superres_denom_;
+    cfg_.rc_superres_kf_denominator = superres_kf_denom_;
+  }
+
+  void BeginPassHook(unsigned int) override {
+    psnr_ = 0.0;
+    frame_count_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    frame_count_++;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
+
+      // Set cpu-used = 8 for speed
+      encoder->Control(AOME_SET_CPUUSED, 8);
+
+      // Test screen coding tools
+      if (test_video_param_.screen_content)
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
+      else
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
+
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (frame_count_) return psnr_ / frame_count_;
+    return 0.0;
+  }
+
+  void DoTest() {
+    std::unique_ptr<libaom_test::VideoSource> video;
+    video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                test_video_param_.limit));
+    ASSERT_NE(video, nullptr);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    const double psnr = GetAveragePsnr();
+    EXPECT_GT(psnr, test_video_param_.psnr_threshold)
+        << "superres_mode_ = " << superres_mode_
+        << ", superres_denom_ = " << superres_denom_
+        << ", superres_kf_denom_ = " << superres_kf_denom_;
+
+    EXPECT_EQ(test_video_param_.limit, frame_count_)
+        << "superres_mode_ = " << superres_mode_
+        << ", superres_denom_ = " << superres_denom_
+        << ", superres_kf_denom_ = " << superres_kf_denom_;
+  }
+
+  TestVideoParam test_video_param_;
+  aom_superres_mode superres_mode_;
+  int superres_denom_;
+  int superres_kf_denom_;
+
+ private:
+  double psnr_;
+  unsigned int frame_count_;
+};
+
+TEST_P(HorzSuperresFixedEndToEndTest, HorzSuperresFixedTestParam) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_SUITE(HorzSuperresFixedEndToEndTest,
+                           ::testing::ValuesIn(kTestVideoVectors),
+                           ::testing::ValuesIn(kSuperresDenominators));
+
+// Test parameter list:
+//  <[needed for EncoderTest], test_video_param_,
+//  tuple(superres_qthresh_,superres_kf_qthresh_)>
+typedef tuple<const libaom_test::CodecFactory *, TestVideoParam,
+              SuperresQThresholdPair>
+    HorzSuperresQThreshTestParam;
+
+class HorzSuperresQThreshEndToEndTest
+    : public ::testing::TestWithParam<HorzSuperresQThreshTestParam>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  HorzSuperresQThreshEndToEndTest()
+      : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)),
+        superres_mode_(AOM_SUPERRES_QTHRESH), psnr_(0.0), frame_count_(0) {
+    SuperresQThresholdPair qthresholds = GET_PARAM(2);
+    superres_qthresh_ = std::get<0>(qthresholds);
+    superres_kf_qthresh_ = std::get<1>(qthresholds);
+  }
+
+  ~HorzSuperresQThreshEndToEndTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(::libaom_test::kTwoPassGood);
+    cfg_.g_lag_in_frames = 5;
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.rc_target_bitrate = kBitrate;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_profile = test_video_param_.profile;
+    cfg_.g_input_bit_depth = (unsigned int)test_video_param_.bit_depth;
+    cfg_.g_bit_depth = test_video_param_.bit_depth;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+    // Set superres parameters
+    cfg_.rc_superres_mode = superres_mode_;
+    cfg_.rc_superres_qthresh = superres_qthresh_;
+    cfg_.rc_superres_kf_qthresh = superres_kf_qthresh_;
+  }
+
+  void BeginPassHook(unsigned int) override {
+    psnr_ = 0.0;
+    frame_count_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    frame_count_++;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 0);
+
+      // Set cpu-used = 8 for speed
+      encoder->Control(AOME_SET_CPUUSED, 8);
+
+      // Test screen coding tools
+      if (test_video_param_.screen_content)
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
+      else
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
+
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (frame_count_) return psnr_ / frame_count_;
+    return 0.0;
+  }
+
+  void DoTest() {
+    std::unique_ptr<libaom_test::VideoSource> video;
+    video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                test_video_param_.limit));
+    ASSERT_NE(video, nullptr);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    const double psnr = GetAveragePsnr();
+    EXPECT_GT(psnr, test_video_param_.psnr_threshold)
+        << "superres_mode_ = " << superres_mode_
+        << ", superres_qthresh_ = " << superres_qthresh_
+        << ", superres_kf_qthresh_ = " << superres_kf_qthresh_;
+
+    EXPECT_EQ(test_video_param_.limit, frame_count_)
+        << "superres_mode_ = " << superres_mode_
+        << ", superres_qthresh_ = " << superres_qthresh_
+        << ", superres_kf_qthresh_ = " << superres_kf_qthresh_;
+  }
+
+  TestVideoParam test_video_param_;
+  aom_superres_mode superres_mode_;
+  int superres_qthresh_;
+  int superres_kf_qthresh_;
+
+ private:
+  double psnr_;
+  unsigned int frame_count_;
+};
+
+TEST_P(HorzSuperresQThreshEndToEndTest, HorzSuperresQThreshEndToEndPSNRTest) {
+  DoTest();
+}
+
+AV1_INSTANTIATE_TEST_SUITE(HorzSuperresQThreshEndToEndTest,
+                           ::testing::ValuesIn(kTestVideoVectors),
+                           ::testing::ValuesIn(kSuperresQThresholds));
+
+}  // namespace
diff --git a/third_party/aom/test/i420_video_source.h b/third_party/aom/test/i420_video_source.h
new file mode 100644
index 0000000000..233e7152b9
--- /dev/null
+++ b/third_party/aom/test/i420_video_source.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef AOM_TEST_I420_VIDEO_SOURCE_H_
+#define AOM_TEST_I420_VIDEO_SOURCE_H_
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+#include "test/yuv_video_source.h"
+
+namespace libaom_test {
+
+// This class extends VideoSource to allow parsing of raw yv12
+// so that we can do actual file encodes.
+class I420VideoSource : public YUVVideoSource {
+ public:
+  I420VideoSource(const std::string &file_name, unsigned int width,
+                  unsigned int height, int rate_numerator, int rate_denominator,
+                  unsigned int start, int limit)
+      : YUVVideoSource(file_name, AOM_IMG_FMT_I420, width, height,
+                       rate_numerator, rate_denominator, start, limit) {}
+};
+
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_I420_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/intra_edge_test.cc b/third_party/aom/test/intra_edge_test.cc
new file mode 100644
index 0000000000..96ee65466b
--- /dev/null
+++ b/third_party/aom/test/intra_edge_test.cc
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/register_state_check.h"
+#include "test/function_equivalence_test.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_integer.h"
+#include "av1/common/enums.h"
+
+using libaom_test::FunctionEquivalenceTest;
+
+namespace {
+
+template <typename F, typename T>
+class UpsampleTest : public FunctionEquivalenceTest<F> {
+ protected:
+  static const int kIterations = 1000000;
+  static const int kMinEdge = 4;
+  static const int kMaxEdge = 24;
+  static const int kBufSize = 2 * 64 + 32;
+  static const int kOffset = 16;
+
+  ~UpsampleTest() override = default;
+
+  virtual void Execute(T *edge_tst) = 0;
+
+  void Common() {
+    edge_ref_ = &edge_ref_data_[kOffset];
+    edge_tst_ = &edge_tst_data_[kOffset];
+
+    Execute(edge_tst_);
+
+    const int max_idx = (size_ - 1) * 2;
+    for (int r = -2; r <= max_idx; ++r) {
+      ASSERT_EQ(edge_ref_[r], edge_tst_[r]);
+    }
+  }
+
+  T edge_ref_data_[kBufSize];
+  T edge_tst_data_[kBufSize];
+
+  T *edge_ref_;
+  T *edge_tst_;
+
+  int size_;
+};
+
+typedef void (*UP8B)(uint8_t *p, int size);
+typedef libaom_test::FuncParam<UP8B> TestFuncs;
+
+class UpsampleTest8B : public UpsampleTest<UP8B, uint8_t> {
+ protected:
+  void Execute(uint8_t *edge_tst) override {
+    params_.ref_func(edge_ref_, size_);
+    API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_));
+  }
+};
+
+TEST_P(UpsampleTest8B, RandomValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    size_ = 4 * (this->rng_(4) + 1);
+
+    int i, pix = 0;
+    for (i = 0; i < kOffset + size_; ++i) {
+      pix = rng_.Rand8();
+      edge_ref_data_[i] = pix;
+      edge_tst_data_[i] = edge_ref_data_[i];
+    }
+
+    // Extend final sample
+    while (i < kBufSize) {
+      edge_ref_data_[i] = pix;
+      edge_tst_data_[i] = pix;
+      i++;
+    }
+
+    Common();
+  }
+}
+
+TEST_P(UpsampleTest8B, DISABLED_Speed) {
+  const int test_count = 10000000;
+  size_ = kMaxEdge;
+  for (int i = 0; i < kOffset + size_; ++i) {
+    edge_tst_data_[i] = rng_.Rand8();
+  }
+  edge_tst_ = &edge_tst_data_[kOffset];
+  for (int iter = 0; iter < test_count; ++iter) {
+    API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_));
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, UpsampleTest8B,
+    ::testing::Values(TestFuncs(av1_upsample_intra_edge_c,
+                                av1_upsample_intra_edge_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, UpsampleTest8B,
+    ::testing::Values(TestFuncs(av1_upsample_intra_edge_c,
+                                av1_upsample_intra_edge_neon)));
+#endif  // HAVE_NEON
+
+template <typename F, typename T>
+class FilterEdgeTest : public FunctionEquivalenceTest<F> {
+ protected:
+  static const int kIterations = 1000000;
+  static const int kMaxEdge = 2 * 64;
+  static const int kBufSize = kMaxEdge + 32;
+  static const int kOffset = 15;
+
+  ~FilterEdgeTest() override = default;
+
+  virtual void Execute(T *edge_tst) = 0;
+
+  void Common() {
+    edge_ref_ = &edge_ref_data_[kOffset];
+    edge_tst_ = &edge_tst_data_[kOffset];
+
+    Execute(edge_tst_);
+
+    for (int r = 0; r < size_; ++r) {
+      ASSERT_EQ(edge_ref_[r], edge_tst_[r]);
+    }
+  }
+
+  T edge_ref_data_[kBufSize];
+  T edge_tst_data_[kBufSize];
+
+  T *edge_ref_;
+  T *edge_tst_;
+
+  int size_;
+  int strength_;
+};
+
+typedef void (*FE8B)(uint8_t *p, int size, int strength);
+typedef libaom_test::FuncParam<FE8B> FilterEdgeTestFuncs;
+
+class FilterEdgeTest8B : public FilterEdgeTest<FE8B, uint8_t> {
+ protected:
+  void Execute(uint8_t *edge_tst) override {
+    params_.ref_func(edge_ref_, size_, strength_);
+    API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_, strength_));
+  }
+};
+
+TEST_P(FilterEdgeTest8B, RandomValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    strength_ = this->rng_(4);
+    size_ = 4 * (this->rng_(128 / 4) + 1) + 1;
+
+    int i, pix = 0;
+    for (i = 0; i < kOffset + size_; ++i) {
+      pix = rng_.Rand8();
+      edge_ref_data_[i] = pix;
+      edge_tst_data_[i] = pix;
+    }
+
+    Common();
+  }
+}
+
+TEST_P(FilterEdgeTest8B, DISABLED_Speed) {
+  const int test_count = 10000000;
+  size_ = kMaxEdge;
+  strength_ = 1;
+  for (int i = 0; i < kOffset + size_; ++i) {
+    edge_tst_data_[i] = rng_.Rand8();
+  }
+  edge_tst_ = &edge_tst_data_[kOffset];
+  for (int iter = 0; iter < test_count; ++iter) {
+    API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_, strength_));
+    // iterate over filter strengths (1,2,3)
+    strength_ = strength_ == 3 ? 1 : strength_ + 1;
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, FilterEdgeTest8B,
+    ::testing::Values(FilterEdgeTestFuncs(av1_filter_intra_edge_c,
+                                          av1_filter_intra_edge_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, FilterEdgeTest8B,
+    ::testing::Values(FilterEdgeTestFuncs(av1_filter_intra_edge_c,
+                                          av1_filter_intra_edge_neon)));
+#endif  // HAVE_NEON
+
+#if CONFIG_AV1_HIGHBITDEPTH
+
+typedef void (*UPHB)(uint16_t *p, int size, int bd);
+typedef libaom_test::FuncParam<UPHB> TestFuncsHBD;
+
+class UpsampleTestHB : public UpsampleTest<UPHB, uint16_t> {
+ protected:
+  void Execute(uint16_t *edge_tst) override {
+    params_.ref_func(edge_ref_, size_, bit_depth_);
+    API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_, bit_depth_));
+  }
+  int bit_depth_;
+};
+
+TEST_P(UpsampleTestHB, RandomValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    switch (rng_(3)) {
+      case 0: bit_depth_ = 8; break;
+      case 1: bit_depth_ = 10; break;
+      default: bit_depth_ = 12; break;
+    }
+    const int hi = 1 << bit_depth_;
+
+    size_ = 4 * (this->rng_(4) + 1);
+
+    int i, pix = 0;
+    for (i = 0; i < kOffset + size_; ++i) {
+      pix = rng_(hi);
+      edge_ref_data_[i] = pix;
+      edge_tst_data_[i] = pix;
+    }
+
+    // Extend final sample
+    while (i < kBufSize) {
+      edge_ref_data_[i] = pix;
+      edge_tst_data_[i] = pix;
+      i++;
+    }
+
+    Common();
+  }
+}
+
+TEST_P(UpsampleTestHB, DISABLED_Speed) {
+  const int test_count = 10000000;
+  size_ = kMaxEdge;
+  bit_depth_ = 12;
+  const int hi = 1 << bit_depth_;
+  for (int i = 0; i < kOffset + size_; ++i) {
+    edge_tst_data_[i] = rng_(hi);
+  }
+  edge_tst_ = &edge_tst_data_[kOffset];
+  for (int iter = 0; iter < test_count; ++iter) {
+    API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_, bit_depth_));
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, UpsampleTestHB,
+    ::testing::Values(TestFuncsHBD(av1_highbd_upsample_intra_edge_c,
+                                   av1_highbd_upsample_intra_edge_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, UpsampleTestHB,
+    ::testing::Values(TestFuncsHBD(av1_highbd_upsample_intra_edge_c,
+                                   av1_highbd_upsample_intra_edge_neon)));
+#endif  // HAVE_NEON
+
+typedef void (*FEHB)(uint16_t *p, int size, int strength);
+typedef libaom_test::FuncParam<FEHB> FilterEdgeTestFuncsHBD;
+
+class FilterEdgeTestHB : public FilterEdgeTest<FEHB, uint16_t> {
+ protected:
+  void Execute(uint16_t *edge_tst) override {
+    params_.ref_func(edge_ref_, size_, strength_);
+    API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_, strength_));
+  }
+  int bit_depth_;
+};
+
+TEST_P(FilterEdgeTestHB, RandomValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    switch (rng_(3)) {
+      case 0: bit_depth_ = 8; break;
+      case 1: bit_depth_ = 10; break;
+      default: bit_depth_ = 12; break;
+    }
+    const int hi = 1 << bit_depth_;
+    strength_ = this->rng_(4);
+    size_ = 4 * (this->rng_(128 / 4) + 1) + 1;
+
+    int i, pix = 0;
+    for (i = 0; i < kOffset + size_; ++i) {
+      pix = rng_(hi);
+      edge_ref_data_[i] = pix;
+      edge_tst_data_[i] = pix;
+    }
+
+    Common();
+  }
+}
+
+TEST_P(FilterEdgeTestHB, DISABLED_Speed) {
+  const int test_count = 10000000;
+  size_ = kMaxEdge;
+  strength_ = 1;
+  bit_depth_ = 12;
+  const int hi = 1 << bit_depth_;
+  for (int i = 0; i < kOffset + size_; ++i) {
+    edge_tst_data_[i] = rng_(hi);
+  }
+  edge_tst_ = &edge_tst_data_[kOffset];
+  for (int iter = 0; iter < test_count; ++iter) {
+    API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_, strength_));
+    // iterate over filter strengths (1,2,3)
+    strength_ = strength_ == 3 ? 1 : strength_ + 1;
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE4_1, FilterEdgeTestHB,
+                         ::testing::Values(FilterEdgeTestFuncsHBD(
+                             av1_highbd_filter_intra_edge_c,
+                             av1_highbd_filter_intra_edge_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, FilterEdgeTestHB,
+                         ::testing::Values(FilterEdgeTestFuncsHBD(
+                             av1_highbd_filter_intra_edge_c,
+                             av1_highbd_filter_intra_edge_neon)));
+#endif  // HAVE_NEON
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+}  // namespace
diff --git a/third_party/aom/test/intrabc_test.cc b/third_party/aom/test/intrabc_test.cc
new file mode 100644
index 0000000000..2c60596ab8
--- /dev/null
+++ b/third_party/aom/test/intrabc_test.cc
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+
+#include "av1/common/av1_common_int.h"
+#include "av1/common/blockd.h"
+#include "av1/common/enums.h"
+#include "av1/common/mv.h"
+#include "av1/common/mvref_common.h"
+#include "av1/common/tile_common.h"
+
+namespace {
+TEST(IntrabcTest, DvValidation) {
+  struct DvTestCase {
+    MV dv;
+    int mi_row_offset;
+    int mi_col_offset;
+    BLOCK_SIZE bsize;
+    bool valid;
+  };
+  const int kSubPelScale = 8;
+  const int kTileMaxMibWidth = 8;
+  const DvTestCase kDvCases[] = {
+    { { 0, 0 }, 0, 0, BLOCK_128X128, false },
+    { { 0, 0 }, 0, 0, BLOCK_64X64, false },
+    { { 0, 0 }, 0, 0, BLOCK_32X32, false },
+    { { 0, 0 }, 0, 0, BLOCK_16X16, false },
+    { { 0, 0 }, 0, 0, BLOCK_8X8, false },
+    { { 0, 0 }, 0, 0, BLOCK_4X4, false },
+    { { -MAX_SB_SIZE * kSubPelScale, -MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_16X16,
+      true },
+    { { 0, -MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_16X16,
+      false },
+    { { -MAX_SB_SIZE * kSubPelScale, 0 },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_16X16,
+      true },
+    { { MAX_SB_SIZE * kSubPelScale, 0 },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_16X16,
+      false },
+    { { 0, MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_16X16,
+      false },
+    { { -32 * kSubPelScale, -32 * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_32X32,
+      true },
+    { { -32 * kSubPelScale, -32 * kSubPelScale },
+      32 / MI_SIZE,
+      32 / MI_SIZE,
+      BLOCK_32X32,
+      false },
+    { { -32 * kSubPelScale - kSubPelScale / 2, -32 * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_32X32,
+      false },
+    { { -33 * kSubPelScale, -32 * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_32X32,
+      true },
+    { { -32 * kSubPelScale, -32 * kSubPelScale - kSubPelScale / 2 },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_32X32,
+      false },
+    { { -32 * kSubPelScale, -33 * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_32X32,
+      true },
+    { { -MAX_SB_SIZE * kSubPelScale, -MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      true },
+    { { -(MAX_SB_SIZE + 1) * kSubPelScale, -MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      false },
+    { { -MAX_SB_SIZE * kSubPelScale, -(MAX_SB_SIZE + 1) * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      false },
+    { { -(MAX_SB_SIZE - 1) * kSubPelScale, -MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      false },
+    { { -MAX_SB_SIZE * kSubPelScale, -(MAX_SB_SIZE - 1) * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      true },
+    { { -(MAX_SB_SIZE - 1) * kSubPelScale, -(MAX_SB_SIZE - 1) * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      false },
+    { { -MAX_SB_SIZE * kSubPelScale, MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      false },
+    { { -MAX_SB_SIZE * kSubPelScale,
+        (kTileMaxMibWidth - 2) * MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      false },
+    { { -MAX_SB_SIZE * kSubPelScale,
+        ((kTileMaxMibWidth - 2) * MAX_SB_SIZE + 1) * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      false },
+  };
+
+  MACROBLOCKD xd;
+  memset(&xd, 0, sizeof(xd));
+  xd.tile.mi_row_start = 8 * MAX_MIB_SIZE;
+  xd.tile.mi_row_end = 16 * MAX_MIB_SIZE;
+  xd.tile.mi_col_start = 24 * MAX_MIB_SIZE;
+  xd.tile.mi_col_end = xd.tile.mi_col_start + kTileMaxMibWidth * MAX_MIB_SIZE;
+  xd.plane[1].subsampling_x = 1;
+  xd.plane[1].subsampling_y = 1;
+  xd.plane[2].subsampling_x = 1;
+  xd.plane[2].subsampling_y = 1;
+
+  SequenceHeader seq_params = {};
+  AV1_COMMON cm;
+  memset(&cm, 0, sizeof(cm));
+  cm.seq_params = &seq_params;
+
+  for (const DvTestCase &dv_case : kDvCases) {
+    const int mi_row = xd.tile.mi_row_start + dv_case.mi_row_offset;
+    const int mi_col = xd.tile.mi_col_start + dv_case.mi_col_offset;
+    xd.is_chroma_ref = is_chroma_reference(mi_row, mi_col, dv_case.bsize,
+                                           xd.plane[1].subsampling_x,
+                                           xd.plane[1].subsampling_y);
+    EXPECT_EQ(static_cast<int>(dv_case.valid),
+              av1_is_dv_valid(dv_case.dv, &cm, &xd, mi_row, mi_col,
+                              dv_case.bsize, MAX_MIB_SIZE_LOG2));
+  }
+}
+}  // namespace
diff --git a/third_party/aom/test/intrapred_test.cc b/third_party/aom/test/intrapred_test.cc
new file mode 100644
index 0000000000..8796e8ba69
--- /dev/null
+++ b/third_party/aom/test/intrapred_test.cc
@@ -0,0 +1,488 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/blockd.h"
+#include "av1/common/common.h"
+#include "av1/common/pred_common.h"
+#include "aom_mem/aom_mem.h"
+
+namespace {
+
+using libaom_test::ACMRandom;
+
+const int count_test_block = 100000;
+
+typedef void (*HighbdIntraPred)(uint16_t *dst, ptrdiff_t stride,
+                                const uint16_t *above, const uint16_t *left,
+                                int bps);
+typedef void (*IntraPred)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above,
+                          const uint8_t *left);
+
+}  // namespace
+
+// NOTE: Under gcc version 7.3.0 (Debian 7.3.0-5), if this template is in the
+// anonymous namespace, then we get a strange compiler warning in
+// the begin() and end() methods of the ParamGenerator template class in
+// gtest/internal/gtest-param-util.h:
+//   warning: ‘<anonymous>’ is used uninitialized in this function
+// As a workaround, put this template outside the anonymous namespace.
+// See bug aomedia:2003.
+template <typename FuncType>
+struct IntraPredFunc {
+  IntraPredFunc(FuncType pred = nullptr, FuncType ref = nullptr,
+                int block_width_value = 0, int block_height_value = 0,
+                int bit_depth_value = 0)
+      : pred_fn(pred), ref_fn(ref), block_width(block_width_value),
+        block_height(block_height_value), bit_depth(bit_depth_value) {}
+
+  FuncType pred_fn;
+  FuncType ref_fn;
+  int block_width;
+  int block_height;
+  int bit_depth;
+};
+
+namespace {
+
+template <typename FuncType, typename Pixel>
+class AV1IntraPredTest
+    : public ::testing::TestWithParam<IntraPredFunc<FuncType> > {
+ public:
+  void RunTest(Pixel *left_col, Pixel *above_data, Pixel *dst, Pixel *ref_dst) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int block_width = params_.block_width;
+    const int block_height = params_.block_height;
+    above_row_ = above_data + 16;
+    left_col_ = left_col;
+    dst_ = dst;
+    ref_dst_ = ref_dst;
+    int error_count = 0;
+    for (int i = 0; i < count_test_block; ++i) {
+      // Fill edges with random data, try first with saturated values.
+      for (int x = -1; x <= block_width * 2; x++) {
+        if (i == 0) {
+          above_row_[x] = mask_;
+        } else {
+          above_row_[x] = rnd.Rand16() & mask_;
+        }
+      }
+      for (int y = 0; y < block_height; y++) {
+        if (i == 0) {
+          left_col_[y] = mask_;
+        } else {
+          left_col_[y] = rnd.Rand16() & mask_;
+        }
+      }
+      Predict();
+      CheckPrediction(i, &error_count);
+    }
+    ASSERT_EQ(0, error_count);
+  }
+  void RunSpeedTest(Pixel *left_col, Pixel *above_data, Pixel *dst,
+                    Pixel *ref_dst) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int block_width = params_.block_width;
+    const int block_height = params_.block_height;
+    above_row_ = above_data + 16;
+    left_col_ = left_col;
+    dst_ = dst;
+    ref_dst_ = ref_dst;
+    int error_count = 0;
+    const int numIter = 100;
+
+    int c_sum_time = 0;
+    int simd_sum_time = 0;
+    for (int i = 0; i < count_test_block; ++i) {
+      // Fill edges with random data, try first with saturated values.
+      for (int x = -1; x <= block_width * 2; x++) {
+        if (i == 0) {
+          above_row_[x] = mask_;
+        } else {
+          above_row_[x] = rnd.Rand16() & mask_;
+        }
+      }
+      for (int y = 0; y < block_height; y++) {
+        if (i == 0) {
+          left_col_[y] = mask_;
+        } else {
+          left_col_[y] = rnd.Rand16() & mask_;
+        }
+      }
+
+      aom_usec_timer c_timer_;
+      aom_usec_timer_start(&c_timer_);
+
+      PredictRefSpeedTest(numIter);
+
+      aom_usec_timer_mark(&c_timer_);
+
+      aom_usec_timer simd_timer_;
+      aom_usec_timer_start(&simd_timer_);
+
+      PredictFncSpeedTest(numIter);
+
+      aom_usec_timer_mark(&simd_timer_);
+
+      c_sum_time += static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
+      simd_sum_time += static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
+
+      CheckPrediction(i, &error_count);
+    }
+
+    printf(
+        "blockWxH = %d x %d c_time = %d \t simd_time = %d \t Gain = %4.2f \n",
+        block_width, block_height, c_sum_time, simd_sum_time,
+        (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
+    ASSERT_EQ(0, error_count);
+  }
+
+ protected:
+  void SetUp() override {
+    params_ = this->GetParam();
+    stride_ = params_.block_width * 3;
+    mask_ = (1 << params_.bit_depth) - 1;
+  }
+
+  virtual void Predict() = 0;
+
+  virtual void PredictRefSpeedTest(int num) = 0;
+  virtual void PredictFncSpeedTest(int num) = 0;
+
+  void CheckPrediction(int test_case_number, int *error_count) const {
+    // For each pixel ensure that the calculated value is the same as reference.
+    const int block_width = params_.block_width;
+    const int block_height = params_.block_height;
+    for (int y = 0; y < block_height; y++) {
+      for (int x = 0; x < block_width; x++) {
+        *error_count += ref_dst_[x + y * stride_] != dst_[x + y * stride_];
+        if (*error_count == 1) {
+          ASSERT_EQ(ref_dst_[x + y * stride_], dst_[x + y * stride_])
+              << " Failed on Test Case Number " << test_case_number
+              << " location: x = " << x << " y = " << y;
+        }
+      }
+    }
+  }
+
+  Pixel *above_row_;
+  Pixel *left_col_;
+  Pixel *dst_;
+  Pixel *ref_dst_;
+  ptrdiff_t stride_;
+  int mask_;
+
+  IntraPredFunc<FuncType> params_;
+};
+
+#if CONFIG_AV1_HIGHBITDEPTH
+class HighbdIntraPredTest : public AV1IntraPredTest<HighbdIntraPred, uint16_t> {
+ protected:
+  void Predict() override {
+    const int bit_depth = params_.bit_depth;
+    params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
+    API_REGISTER_STATE_CHECK(
+        params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth));
+  }
+  void PredictRefSpeedTest(int num) override {
+    const int bit_depth = params_.bit_depth;
+    for (int i = 0; i < num; i++) {
+      params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
+    }
+  }
+  void PredictFncSpeedTest(int num) override {
+    const int bit_depth = params_.bit_depth;
+    for (int i = 0; i < num; i++) {
+      params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth);
+    }
+  }
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HighbdIntraPredTest);
+
+#endif
+
+class LowbdIntraPredTest : public AV1IntraPredTest<IntraPred, uint8_t> {
+ protected:
+  void Predict() override {
+    params_.ref_fn(ref_dst_, stride_, above_row_, left_col_);
+    API_REGISTER_STATE_CHECK(
+        params_.pred_fn(dst_, stride_, above_row_, left_col_));
+  }
+  void PredictRefSpeedTest(int num) override {
+    for (int i = 0; i < num; i++) {
+      params_.ref_fn(ref_dst_, stride_, above_row_, left_col_);
+    }
+  }
+  void PredictFncSpeedTest(int num) override {
+    for (int i = 0; i < num; i++) {
+      params_.pred_fn(dst_, stride_, above_row_, left_col_);
+    }
+  }
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(LowbdIntraPredTest);
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST_P(HighbdIntraPredTest, Bitexact) {
+  // max block size is 64
+  DECLARE_ALIGNED(16, uint16_t, left_col[2 * 64]);
+  DECLARE_ALIGNED(16, uint16_t, above_data[2 * 64 + 64]);
+  DECLARE_ALIGNED(16, uint16_t, dst[3 * 64 * 64]);
+  DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 64 * 64]);
+  av1_zero(left_col);
+  av1_zero(above_data);
+  RunTest(left_col, above_data, dst, ref_dst);
+}
+
+TEST_P(HighbdIntraPredTest, DISABLED_Speed) {
+  // max block size is 64
+  DECLARE_ALIGNED(16, uint16_t, left_col[2 * 64]);
+  DECLARE_ALIGNED(16, uint16_t, above_data[2 * 64 + 64]);
+  DECLARE_ALIGNED(16, uint16_t, dst[3 * 64 * 64]);
+  DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 64 * 64]);
+  av1_zero(left_col);
+  av1_zero(above_data);
+  RunSpeedTest(left_col, above_data, dst, ref_dst);
+}
+#endif
+
+TEST_P(LowbdIntraPredTest, Bitexact) {
+  // max block size is 64
+  DECLARE_ALIGNED(16, uint8_t, left_col[2 * 64]);
+  DECLARE_ALIGNED(16, uint8_t, above_data[2 * 64 + 64]);
+  DECLARE_ALIGNED(16, uint8_t, dst[3 * 64 * 64]);
+  DECLARE_ALIGNED(16, uint8_t, ref_dst[3 * 64 * 64]);
+  av1_zero(left_col);
+  av1_zero(above_data);
+  RunTest(left_col, above_data, dst, ref_dst);
+}
+TEST_P(LowbdIntraPredTest, DISABLED_Speed) {
+  // max block size is 64
+  DECLARE_ALIGNED(16, uint8_t, left_col[2 * 64]);
+  DECLARE_ALIGNED(16, uint8_t, above_data[2 * 64 + 64]);
+  DECLARE_ALIGNED(16, uint8_t, dst[3 * 64 * 64]);
+  DECLARE_ALIGNED(16, uint8_t, ref_dst[3 * 64 * 64]);
+  av1_zero(left_col);
+  av1_zero(above_data);
+  RunSpeedTest(left_col, above_data, dst, ref_dst);
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+// -----------------------------------------------------------------------------
+// High Bit Depth Tests
+#define highbd_entry(type, width, height, opt, bd)                          \
+  IntraPredFunc<HighbdIntraPred>(                                           \
+      &aom_highbd_##type##_predictor_##width##x##height##_##opt,            \
+      &aom_highbd_##type##_predictor_##width##x##height##_c, width, height, \
+      bd)
+
+#define highbd_intrapred(type, opt, bd)                                        \
+  highbd_entry(type, 4, 4, opt, bd), highbd_entry(type, 4, 8, opt, bd),        \
+      highbd_entry(type, 4, 16, opt, bd), highbd_entry(type, 8, 4, opt, bd),   \
+      highbd_entry(type, 8, 8, opt, bd), highbd_entry(type, 8, 16, opt, bd),   \
+      highbd_entry(type, 8, 32, opt, bd), highbd_entry(type, 16, 4, opt, bd),  \
+      highbd_entry(type, 16, 8, opt, bd), highbd_entry(type, 16, 16, opt, bd), \
+      highbd_entry(type, 16, 32, opt, bd),                                     \
+      highbd_entry(type, 16, 64, opt, bd), highbd_entry(type, 32, 8, opt, bd), \
+      highbd_entry(type, 32, 16, opt, bd),                                     \
+      highbd_entry(type, 32, 32, opt, bd),                                     \
+      highbd_entry(type, 32, 64, opt, bd),                                     \
+      highbd_entry(type, 64, 16, opt, bd),                                     \
+      highbd_entry(type, 64, 32, opt, bd), highbd_entry(type, 64, 64, opt, bd)
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+// ---------------------------------------------------------------------------
+// Low Bit Depth Tests
+
+#define lowbd_entry(type, width, height, opt)                                  \
+  IntraPredFunc<IntraPred>(&aom_##type##_predictor_##width##x##height##_##opt, \
+                           &aom_##type##_predictor_##width##x##height##_c,     \
+                           width, height, 8)
+
+#define lowbd_intrapred(type, opt)                                    \
+  lowbd_entry(type, 4, 4, opt), lowbd_entry(type, 4, 8, opt),         \
+      lowbd_entry(type, 4, 16, opt), lowbd_entry(type, 8, 4, opt),    \
+      lowbd_entry(type, 8, 8, opt), lowbd_entry(type, 8, 16, opt),    \
+      lowbd_entry(type, 8, 32, opt), lowbd_entry(type, 16, 4, opt),   \
+      lowbd_entry(type, 16, 8, opt), lowbd_entry(type, 16, 16, opt),  \
+      lowbd_entry(type, 16, 32, opt), lowbd_entry(type, 16, 64, opt), \
+      lowbd_entry(type, 32, 8, opt), lowbd_entry(type, 32, 16, opt),  \
+      lowbd_entry(type, 32, 32, opt), lowbd_entry(type, 32, 64, opt), \
+      lowbd_entry(type, 64, 16, opt), lowbd_entry(type, 64, 32, opt), \
+      lowbd_entry(type, 64, 64, opt)
+
+#if HAVE_SSE2
+const IntraPredFunc<IntraPred> LowbdIntraPredTestVector[] = {
+  lowbd_intrapred(dc, sse2),      lowbd_intrapred(dc_top, sse2),
+  lowbd_intrapred(dc_left, sse2), lowbd_intrapred(dc_128, sse2),
+  lowbd_intrapred(v, sse2),       lowbd_intrapred(h, sse2),
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, LowbdIntraPredTest,
+                         ::testing::ValuesIn(LowbdIntraPredTestVector));
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorNeon[] = {
+  lowbd_intrapred(dc, neon),       lowbd_intrapred(dc_top, neon),
+  lowbd_intrapred(dc_left, neon),  lowbd_intrapred(dc_128, neon),
+  lowbd_intrapred(v, neon),        lowbd_intrapred(h, neon),
+  lowbd_intrapred(smooth, neon),   lowbd_intrapred(smooth_v, neon),
+  lowbd_intrapred(smooth_h, neon), lowbd_intrapred(paeth, neon),
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, LowbdIntraPredTest,
+                         ::testing::ValuesIn(LowbdIntraPredTestVectorNeon));
+#endif  // HAVE_NEON
+
+#if HAVE_SSSE3
+const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorSsse3[] = {
+  lowbd_intrapred(paeth, ssse3),
+  lowbd_intrapred(smooth, ssse3),
+  lowbd_intrapred(smooth_v, ssse3),
+  lowbd_intrapred(smooth_h, ssse3),
+};
+
+INSTANTIATE_TEST_SUITE_P(SSSE3, LowbdIntraPredTest,
+                         ::testing::ValuesIn(LowbdIntraPredTestVectorSsse3));
+#endif  // HAVE_SSSE3
+
+#if HAVE_AVX2
+const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = {
+  lowbd_entry(dc, 32, 16, avx2),      lowbd_entry(dc, 32, 32, avx2),
+  lowbd_entry(dc, 32, 64, avx2),      lowbd_entry(dc, 64, 16, avx2),
+  lowbd_entry(dc, 64, 32, avx2),      lowbd_entry(dc, 64, 64, avx2),
+
+  lowbd_entry(dc_top, 32, 16, avx2),  lowbd_entry(dc_top, 32, 32, avx2),
+  lowbd_entry(dc_top, 32, 64, avx2),  lowbd_entry(dc_top, 64, 16, avx2),
+  lowbd_entry(dc_top, 64, 32, avx2),  lowbd_entry(dc_top, 64, 64, avx2),
+
+  lowbd_entry(dc_left, 32, 16, avx2), lowbd_entry(dc_left, 32, 32, avx2),
+  lowbd_entry(dc_left, 32, 64, avx2), lowbd_entry(dc_left, 64, 16, avx2),
+  lowbd_entry(dc_left, 64, 32, avx2), lowbd_entry(dc_left, 64, 64, avx2),
+
+  lowbd_entry(dc_128, 32, 16, avx2),  lowbd_entry(dc_128, 32, 32, avx2),
+  lowbd_entry(dc_128, 32, 64, avx2),  lowbd_entry(dc_128, 64, 16, avx2),
+  lowbd_entry(dc_128, 64, 32, avx2),  lowbd_entry(dc_128, 64, 64, avx2),
+
+  lowbd_entry(v, 32, 16, avx2),       lowbd_entry(v, 32, 32, avx2),
+  lowbd_entry(v, 32, 64, avx2),       lowbd_entry(v, 64, 16, avx2),
+  lowbd_entry(v, 64, 32, avx2),       lowbd_entry(v, 64, 64, avx2),
+
+  lowbd_entry(h, 32, 32, avx2),
+
+  lowbd_entry(paeth, 16, 8, avx2),    lowbd_entry(paeth, 16, 16, avx2),
+  lowbd_entry(paeth, 16, 32, avx2),   lowbd_entry(paeth, 16, 64, avx2),
+  lowbd_entry(paeth, 32, 16, avx2),   lowbd_entry(paeth, 32, 32, avx2),
+  lowbd_entry(paeth, 32, 64, avx2),   lowbd_entry(paeth, 64, 16, avx2),
+  lowbd_entry(paeth, 64, 32, avx2),   lowbd_entry(paeth, 64, 64, avx2),
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, LowbdIntraPredTest,
+                         ::testing::ValuesIn(LowbdIntraPredTestVectorAvx2));
+#endif  // HAVE_AVX2
+
+#if CONFIG_AV1_HIGHBITDEPTH
+#if HAVE_NEON
+const IntraPredFunc<HighbdIntraPred> HighbdIntraPredTestVectorNeon[] = {
+  highbd_intrapred(dc, neon, 12),       highbd_intrapred(dc_top, neon, 12),
+  highbd_intrapred(dc_left, neon, 12),  highbd_intrapred(dc_128, neon, 12),
+  highbd_intrapred(v, neon, 12),        highbd_intrapred(h, neon, 12),
+  highbd_intrapred(paeth, neon, 12),    highbd_intrapred(smooth, neon, 12),
+  highbd_intrapred(smooth_v, neon, 12), highbd_intrapred(smooth_h, neon, 12),
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, HighbdIntraPredTest,
+                         ::testing::ValuesIn(HighbdIntraPredTestVectorNeon));
+#endif  // HAVE_NEON
+
+#if HAVE_SSE2
+const IntraPredFunc<HighbdIntraPred> HighbdIntraPredTestVectorSse2[] = {
+  highbd_entry(dc, 4, 4, sse2, 12),
+  highbd_entry(dc, 4, 8, sse2, 12),
+  highbd_entry(dc, 8, 4, sse2, 12),
+  highbd_entry(dc, 8, 8, sse2, 12),
+  highbd_entry(dc, 8, 16, sse2, 12),
+  highbd_entry(dc, 16, 8, sse2, 12),
+  highbd_entry(dc, 16, 16, sse2, 12),
+  highbd_entry(dc, 16, 32, sse2, 12),
+  highbd_entry(dc, 32, 16, sse2, 12),
+  highbd_entry(dc, 32, 32, sse2, 12),
+
+  highbd_entry(dc_top, 4, 4, sse2, 12),
+  highbd_entry(dc_top, 4, 8, sse2, 12),
+  highbd_entry(dc_top, 8, 4, sse2, 12),
+  highbd_entry(dc_top, 8, 8, sse2, 12),
+  highbd_entry(dc_top, 8, 16, sse2, 12),
+  highbd_entry(dc_top, 16, 8, sse2, 12),
+  highbd_entry(dc_top, 16, 16, sse2, 12),
+  highbd_entry(dc_top, 16, 32, sse2, 12),
+  highbd_entry(dc_top, 32, 16, sse2, 12),
+  highbd_entry(dc_top, 32, 32, sse2, 12),
+
+  highbd_entry(dc_left, 4, 4, sse2, 12),
+  highbd_entry(dc_left, 4, 8, sse2, 12),
+  highbd_entry(dc_left, 8, 4, sse2, 12),
+  highbd_entry(dc_left, 8, 8, sse2, 12),
+  highbd_entry(dc_left, 8, 16, sse2, 12),
+  highbd_entry(dc_left, 16, 8, sse2, 12),
+  highbd_entry(dc_left, 16, 16, sse2, 12),
+  highbd_entry(dc_left, 16, 32, sse2, 12),
+  highbd_entry(dc_left, 32, 16, sse2, 12),
+  highbd_entry(dc_left, 32, 32, sse2, 12),
+
+  highbd_entry(dc_128, 4, 4, sse2, 12),
+  highbd_entry(dc_128, 4, 8, sse2, 12),
+  highbd_entry(dc_128, 8, 4, sse2, 12),
+  highbd_entry(dc_128, 8, 8, sse2, 12),
+  highbd_entry(dc_128, 8, 16, sse2, 12),
+  highbd_entry(dc_128, 16, 8, sse2, 12),
+  highbd_entry(dc_128, 16, 16, sse2, 12),
+  highbd_entry(dc_128, 16, 32, sse2, 12),
+  highbd_entry(dc_128, 32, 16, sse2, 12),
+  highbd_entry(dc_128, 32, 32, sse2, 12),
+
+  highbd_entry(v, 4, 4, sse2, 12),
+  highbd_entry(v, 4, 8, sse2, 12),
+  highbd_entry(v, 8, 4, sse2, 12),
+  highbd_entry(v, 8, 8, sse2, 12),
+  highbd_entry(v, 8, 16, sse2, 12),
+  highbd_entry(v, 16, 8, sse2, 12),
+  highbd_entry(v, 16, 16, sse2, 12),
+  highbd_entry(v, 16, 32, sse2, 12),
+  highbd_entry(v, 32, 16, sse2, 12),
+  highbd_entry(v, 32, 32, sse2, 12),
+
+  highbd_entry(h, 4, 4, sse2, 12),
+  highbd_entry(h, 4, 8, sse2, 12),
+  highbd_entry(h, 8, 4, sse2, 12),
+  highbd_entry(h, 8, 8, sse2, 12),
+  highbd_entry(h, 8, 16, sse2, 12),
+  highbd_entry(h, 16, 8, sse2, 12),
+  highbd_entry(h, 16, 16, sse2, 12),
+  highbd_entry(h, 16, 32, sse2, 12),
+  highbd_entry(h, 32, 16, sse2, 12),
+  highbd_entry(h, 32, 32, sse2, 12),
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, HighbdIntraPredTest,
+                         ::testing::ValuesIn(HighbdIntraPredTestVectorSse2));
+#endif  // HAVE_SSE2
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/invalid_file_test.cc b/third_party/aom/test/invalid_file_test.cc
new file mode 100644
index 0000000000..791cdb8928
--- /dev/null
+++ b/third_party/aom/test/invalid_file_test.cc
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdio>
+#include <ostream>
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/ivf_video_source.h"
+#include "test/util.h"
+#include "test/video_source.h"
+
+namespace {
+
+struct DecodeParam {
+  int threads;
+  const char *filename;
+  const char *res_filename;  // If nullptr, the result filename is
+                             // filename + ".res".
+};
+
+// Constructs result file name.
+std::string GetResFilename(const DecodeParam &param) {
+  if (param.res_filename != nullptr) return param.res_filename;
+  const std::string filename = param.filename;
+  return filename + ".res";
+}
+
+std::ostream &operator<<(std::ostream &os, const DecodeParam &dp) {
+  return os << "threads: " << dp.threads << " file: " << dp.filename
+            << " result file: " << GetResFilename(dp);
+}
+
+class InvalidFileTest : public ::libaom_test::DecoderTest,
+                        public ::libaom_test::CodecTestWithParam<DecodeParam> {
+ protected:
+  InvalidFileTest() : DecoderTest(GET_PARAM(0)), res_file_(nullptr) {}
+
+  ~InvalidFileTest() override {
+    if (res_file_ != nullptr) fclose(res_file_);
+  }
+
+  void OpenResFile(const std::string &res_file_name) {
+    res_file_ = libaom_test::OpenTestDataFile(res_file_name);
+    ASSERT_NE(res_file_, nullptr)
+        << "Result file open failed. Filename: " << res_file_name;
+  }
+
+  void DecompressedFrameHook(const aom_image_t &img,
+                             const unsigned int /*frame_number*/) override {
+    EXPECT_NE(img.fb_priv, nullptr);
+  }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          const libaom_test::CompressedVideoSource &video,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_NE(res_file_, nullptr);
+    int expected_res_dec = -1;
+
+    // Read integer result.
+    const int res = fscanf(res_file_, "%d", &expected_res_dec);
+    EXPECT_NE(res, EOF) << "Read result data failed";
+
+    if (expected_res_dec != -1) {
+      // Check results match.
+      const DecodeParam input = GET_PARAM(1);
+      if (input.threads > 1) {
+        // The serial decode check is too strict for tile-threaded decoding as
+        // there is no guarantee on the decode order nor which specific error
+        // will take precedence. Currently a tile-level error is not forwarded
+        // so the frame will simply be marked corrupt.
+        EXPECT_TRUE(res_dec == expected_res_dec ||
+                    res_dec == AOM_CODEC_CORRUPT_FRAME)
+            << "Results don't match: frame number = " << video.frame_number()
+            << ". (" << decoder->DecodeError()
+            << "). Expected: " << expected_res_dec << " or "
+            << AOM_CODEC_CORRUPT_FRAME;
+      } else {
+        EXPECT_EQ(expected_res_dec, res_dec)
+            << "Results don't match: frame number = " << video.frame_number()
+            << ". (" << decoder->DecodeError() << ")";
+      }
+    }
+
+    return !HasFailure();
+  }
+
+  void HandlePeekResult(libaom_test::Decoder *const /*decoder*/,
+                        libaom_test::CompressedVideoSource * /*video*/,
+                        const aom_codec_err_t /*res_peek*/) override {}
+
+  void RunTest() {
+    const DecodeParam input = GET_PARAM(1);
+    aom_codec_dec_cfg_t cfg = { 0, 0, 0, !FORCE_HIGHBITDEPTH_DECODING };
+    cfg.threads = input.threads;
+    libaom_test::IVFVideoSource decode_video(input.filename);
+    decode_video.Init();
+
+    // The result file holds a list of expected integer results, one for each
+    // decoded frame.  Any result that doesn't match the file's list will
+    // cause a test failure.
+    const std::string res_filename = GetResFilename(input);
+    OpenResFile(res_filename);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&decode_video, cfg));
+  }
+
+ private:
+  FILE *res_file_;
+};
+
+TEST_P(InvalidFileTest, ReturnCode) { RunTest(); }
+
+// If res_filename (the third field) is nullptr, then the result filename is
+// filename + ".res" by default. Set res_filename to a string if the result
+// filename differs from the default.
+const DecodeParam kAV1InvalidFileTests[] = {
+  // { threads, filename, res_filename }
+  { 1, "invalid-bug-1814.ivf", nullptr },
+  { 1, "invalid-chromium-906381.ivf", nullptr },
+  { 1, "invalid-google-142530197.ivf", nullptr },
+  { 1, "invalid-google-142530197-1.ivf", nullptr },
+  { 4, "invalid-oss-fuzz-9463.ivf", "invalid-oss-fuzz-9463.ivf.res.2" },
+  { 1, "invalid-oss-fuzz-9720.ivf", nullptr },
+  { 1, "invalid-oss-fuzz-10389.ivf", "invalid-oss-fuzz-10389.ivf.res.4" },
+#if !CHROMIUM && !CONFIG_SIZE_LIMIT ||                  \
+    (CONFIG_SIZE_LIMIT && DECODE_WIDTH_LIMIT >= 5120 && \
+     DECODE_HEIGHT_LIMIT >= 180)
+  { 1, "invalid-oss-fuzz-11523.ivf", "invalid-oss-fuzz-11523.ivf.res.2" },
+#endif
+  { 4, "invalid-oss-fuzz-15363.ivf", nullptr },
+  { 1, "invalid-oss-fuzz-16437.ivf", "invalid-oss-fuzz-16437.ivf.res.2" },
+#if CONFIG_MAX_DECODE_PROFILE >= 1
+  { 1, "invalid-oss-fuzz-24706.ivf", nullptr },
+#endif
+#if CONFIG_AV1_HIGHBITDEPTH
+  // These test vectors contain 10-bit or 12-bit video.
+  { 1, "invalid-oss-fuzz-9288.ivf", nullptr },
+  { 1, "invalid-oss-fuzz-9482.ivf", nullptr },
+  { 1, "invalid-oss-fuzz-10061.ivf", nullptr },
+  { 1, "invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf", nullptr },
+  { 1, "invalid-oss-fuzz-10227.ivf", nullptr },
+  { 4, "invalid-oss-fuzz-10555.ivf", nullptr },
+  { 1, "invalid-oss-fuzz-10705.ivf", nullptr },
+#if CONFIG_CWG_C013
+  { 1, "invalid-oss-fuzz-10723.ivf", "invalid-oss-fuzz-10723.ivf.res.3" },
+#else
+  { 1, "invalid-oss-fuzz-10723.ivf", "invalid-oss-fuzz-10723.ivf.res.2" },
+#endif
+  { 1, "invalid-oss-fuzz-10779.ivf", nullptr },
+  { 1, "invalid-oss-fuzz-11477.ivf", nullptr },
+  { 1, "invalid-oss-fuzz-11479.ivf", "invalid-oss-fuzz-11479.ivf.res.2" },
+  { 1, "invalid-oss-fuzz-33030.ivf", nullptr },
+#endif
+};
+
+AV1_INSTANTIATE_TEST_SUITE(InvalidFileTest,
+                           ::testing::ValuesIn(kAV1InvalidFileTests));
+
+}  // namespace
diff --git a/third_party/aom/test/ivf_video_source.h b/third_party/aom/test/ivf_video_source.h
new file mode 100644
index 0000000000..85731f5566
--- /dev/null
+++ b/third_party/aom/test/ivf_video_source.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef AOM_TEST_IVF_VIDEO_SOURCE_H_
+#define AOM_TEST_IVF_VIDEO_SOURCE_H_
+
+#include <cstdio>
+#include <cstdlib>
+#include <new>
+#include <string>
+
+#include "aom_ports/sanitizer.h"
+#include "test/video_source.h"
+
+namespace libaom_test {
+const unsigned int kCodeBufferSize = 256 * 1024 * 1024;
+const unsigned int kIvfFileHdrSize = 32;
+const unsigned int kIvfFrameHdrSize = 12;
+
+static unsigned int MemGetLe32(const uint8_t *mem) {
+  return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]);
+}
+
+// This class extends VideoSource to allow parsing of ivf files,
+// so that we can do actual file decodes.
+class IVFVideoSource : public CompressedVideoSource {
+ public:
+  explicit IVFVideoSource(const std::string &file_name)
+      : file_name_(file_name), input_file_(nullptr),
+        compressed_frame_buf_(nullptr), frame_sz_(0), frame_(0),
+        end_of_file_(false) {}
+
+  ~IVFVideoSource() override {
+    delete[] compressed_frame_buf_;
+
+    if (input_file_) fclose(input_file_);
+  }
+
+  void Init() override {
+    // Allocate a buffer for read in the compressed video frame.
+    compressed_frame_buf_ = new uint8_t[kCodeBufferSize];
+    ASSERT_NE(compressed_frame_buf_, nullptr) << "Allocate frame buffer failed";
+    ASAN_POISON_MEMORY_REGION(compressed_frame_buf_, kCodeBufferSize);
+  }
+
+  void Begin() override {
+    input_file_ = OpenTestDataFile(file_name_);
+    ASSERT_NE(input_file_, nullptr)
+        << "Input file open failed. Filename: " << file_name_;
+
+    // Read file header
+    uint8_t file_hdr[kIvfFileHdrSize];
+    ASSERT_EQ(kIvfFileHdrSize, fread(file_hdr, 1, kIvfFileHdrSize, input_file_))
+        << "File header read failed.";
+    // Check file header
+    ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' &&
+                file_hdr[2] == 'I' && file_hdr[3] == 'F')
+        << "Input is not an IVF file.";
+
+    FillFrame();
+  }
+
+  void Next() override {
+    ++frame_;
+    FillFrame();
+  }
+
+  void FillFrame() {
+    ASSERT_NE(input_file_, nullptr);
+    uint8_t frame_hdr[kIvfFrameHdrSize];
+    // Check frame header and read a frame from input_file.
+    if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_) !=
+        kIvfFrameHdrSize) {
+      end_of_file_ = true;
+    } else {
+      end_of_file_ = false;
+
+      frame_sz_ = MemGetLe32(frame_hdr);
+      ASSERT_LE(frame_sz_, kCodeBufferSize)
+          << "Frame is too big for allocated code buffer";
+      ASAN_UNPOISON_MEMORY_REGION(compressed_frame_buf_, kCodeBufferSize);
+      ASSERT_EQ(frame_sz_,
+                fread(compressed_frame_buf_, 1, frame_sz_, input_file_))
+          << "Failed to read complete frame";
+      ASAN_POISON_MEMORY_REGION(compressed_frame_buf_ + frame_sz_,
+                                kCodeBufferSize - frame_sz_);
+    }
+  }
+
+  const uint8_t *cxdata() const override {
+    return end_of_file_ ? nullptr : compressed_frame_buf_;
+  }
+  size_t frame_size() const override { return frame_sz_; }
+  unsigned int frame_number() const override { return frame_; }
+
+ protected:
+  std::string file_name_;
+  FILE *input_file_;
+  uint8_t *compressed_frame_buf_;
+  size_t frame_sz_;
+  unsigned int frame_;
+  bool end_of_file_;
+};
+
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_IVF_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/kf_test.cc b/third_party/aom/test/kf_test.cc
new file mode 100644
index 0000000000..7d8cbfe8c6
--- /dev/null
+++ b/third_party/aom/test/kf_test.cc
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2020, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <string.h>
+
+#include <ostream>
+
+#include "aom/aom_codec.h"
+#include "aom/aom_encoder.h"
+#include "aom/aom_image.h"
+#include "aom/aomcx.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+#define NUM_LAG_VALUES 3
+
+namespace {
+aom_image_t *CreateGrayImage(aom_img_fmt_t fmt, unsigned int w,
+                             unsigned int h) {
+  aom_image_t *const image = aom_img_alloc(nullptr, fmt, w, h, 1);
+  if (!image) return image;
+
+  for (unsigned int i = 0; i < image->d_h; ++i) {
+    memset(image->planes[0] + i * image->stride[0], 128, image->d_w);
+  }
+  const unsigned int uv_h = (image->d_h + 1) / 2;
+  const unsigned int uv_w = (image->d_w + 1) / 2;
+  for (unsigned int i = 0; i < uv_h; ++i) {
+    memset(image->planes[1] + i * image->stride[1], 128, uv_w);
+    memset(image->planes[2] + i * image->stride[2], 128, uv_w);
+  }
+  return image;
+}
+
+// Tests kf_max_dist in one-pass encoding with zero lag.
+void TestKeyFrameMaximumInterval(unsigned int usage, unsigned int kf_max_dist) {
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage), AOM_CODEC_OK);
+  cfg.g_w = 320;
+  cfg.g_h = 240;
+  cfg.g_pass = AOM_RC_ONE_PASS;
+  cfg.g_lag_in_frames = 0;
+  cfg.kf_mode = AOM_KF_AUTO;
+  cfg.kf_min_dist = 0;
+  cfg.kf_max_dist = kf_max_dist;
+
+  aom_codec_ctx_t enc;
+  ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK);
+
+  ASSERT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 6), AOM_CODEC_OK);
+
+  aom_image_t *image = CreateGrayImage(AOM_IMG_FMT_I420, cfg.g_w, cfg.g_h);
+  ASSERT_NE(image, nullptr);
+
+  // Encode frames.
+  const aom_codec_cx_pkt_t *pkt;
+  const unsigned int num_frames = kf_max_dist == 0 ? 4 : 3 * kf_max_dist + 1;
+  for (unsigned int i = 0; i < num_frames; ++i) {
+    ASSERT_EQ(aom_codec_encode(&enc, image, i, 1, 0), AOM_CODEC_OK);
+    aom_codec_iter_t iter = nullptr;
+    while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) {
+      ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+      if (kf_max_dist == 0 || i % kf_max_dist == 0) {
+        ASSERT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+      } else {
+        ASSERT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u);
+      }
+    }
+  }
+
+  // Flush the encoder.
+  bool got_data;
+  do {
+    ASSERT_EQ(aom_codec_encode(&enc, nullptr, 0, 1, 0), AOM_CODEC_OK);
+    got_data = false;
+    aom_codec_iter_t iter = nullptr;
+    while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) {
+      ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+      got_data = true;
+    }
+  } while (got_data);
+
+  aom_img_free(image);
+  ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+}
+
+TEST(KeyFrameIntervalTest, KeyFrameMaximumInterval) {
+  for (unsigned int usage : { AOM_USAGE_GOOD_QUALITY, AOM_USAGE_REALTIME }) {
+    // Test 0 and 1 (both mean all intra), some powers of 2, some multiples of
+    // 10, and some prime numbers.
+    for (unsigned int kf_max_dist :
+         { 0, 1, 2, 3, 4, 7, 10, 13, 16, 20, 23, 29, 32 }) {
+      TestKeyFrameMaximumInterval(usage, kf_max_dist);
+    }
+  }
+}
+
+typedef struct {
+  const unsigned int min_kf_dist;
+  const unsigned int max_kf_dist;
+} kfIntervalParam;
+
+const kfIntervalParam kfTestParams[] = {
+  { 1, 1 }, { 0, 10 }, { 10, 10 }, { 0, 30 }, { 30, 30 }
+};
+
+std::ostream &operator<<(std::ostream &os, const kfIntervalParam &test_arg) {
+  return os << "kfIntervalParam { min_kf_dist:" << test_arg.min_kf_dist
+            << " max_kf_dist:" << test_arg.max_kf_dist << " }";
+}
+
+// This class is used to test the presence of forward key frame.
+class KeyFrameIntervalTestLarge
+    : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode,
+                                                 kfIntervalParam, aom_rc_mode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  KeyFrameIntervalTestLarge()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        kf_dist_param_(GET_PARAM(2)), end_usage_check_(GET_PARAM(3)) {
+    kf_dist_ = -1;
+    is_kf_interval_violated_ = false;
+  }
+  ~KeyFrameIntervalTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = end_usage_check_;
+    cfg_.g_threads = 1;
+    cfg_.kf_min_dist = kf_dist_param_.min_kf_dist;
+    cfg_.kf_max_dist = kf_dist_param_.max_kf_dist;
+    cfg_.g_lag_in_frames = 19;
+  }
+
+  bool DoDecode() const override { return true; }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 5);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+    }
+  }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    if (AOM_CODEC_OK == res_dec) {
+      aom_codec_ctx_t *ctx_dec = decoder->GetDecoder();
+      int frame_flags = 0;
+      AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_FRAME_FLAGS,
+                                    &frame_flags);
+      if (kf_dist_ != -1) {
+        kf_dist_++;
+        if (kf_dist_ > (int)kf_dist_param_.max_kf_dist) {
+          is_kf_interval_violated_ = true;
+        }
+      }
+      if ((frame_flags & AOM_FRAME_IS_KEY) == AOM_FRAME_IS_KEY) {
+        if (kf_dist_ != -1 && kf_dist_ < (int)kf_dist_param_.min_kf_dist) {
+          is_kf_interval_violated_ = true;
+        }
+        kf_dist_ = 0;
+      }
+    }
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  ::libaom_test::TestMode encoding_mode_;
+  const kfIntervalParam kf_dist_param_;
+  int kf_dist_;
+  bool is_kf_interval_violated_;
+  aom_rc_mode end_usage_check_;
+};
+
+// Because valgrind builds take a very long time to run, use a lower
+// resolution video for valgrind runs.
+const char *TestFileName() {
+#if AOM_VALGRIND_BUILD
+  return "hantro_collage_w176h144.yuv";
+#else
+  return "hantro_collage_w352h288.yuv";
+#endif  // AOM_VALGRIND_BUILD
+}
+
+int TestFileWidth() {
+#if AOM_VALGRIND_BUILD
+  return 176;
+#else
+  return 352;
+#endif  // AOM_VALGRIND_BUILD
+}
+
+int TestFileHeight() {
+#if AOM_VALGRIND_BUILD
+  return 144;
+#else
+  return 288;
+#endif  // AOM_VALGRIND_BUILD
+}
+
+TEST_P(KeyFrameIntervalTestLarge, KeyFrameIntervalTest) {
+  libaom_test::I420VideoSource video(TestFileName(), TestFileWidth(),
+                                     TestFileHeight(), cfg_.g_timebase.den,
+                                     cfg_.g_timebase.num, 0, 75);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(is_kf_interval_violated_, false) << kf_dist_param_;
+}
+
+// This class tests for presence and placement of application forced key frames.
+class ForcedKeyTestLarge
+    : public ::libaom_test::CodecTestWith5Params<libaom_test::TestMode, int,
+                                                 int, int, aom_rc_mode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  ForcedKeyTestLarge()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        auto_alt_ref_(GET_PARAM(2)), fwd_kf_enabled_(GET_PARAM(3)),
+        cpu_used_(GET_PARAM(4)), rc_end_usage_(GET_PARAM(5)) {
+    forced_kf_frame_num_ = 1;
+    frame_num_ = 0;
+    is_kf_placement_violated_ = false;
+  }
+  ~ForcedKeyTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    cfg_.rc_end_usage = rc_end_usage_;
+    cfg_.g_threads = 0;
+    cfg_.kf_max_dist = 30;
+    cfg_.kf_min_dist = 0;
+    cfg_.fwd_kf_enabled = fwd_kf_enabled_;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, auto_alt_ref_);
+#if CONFIG_AV1_ENCODER
+      // override test default for tile columns if necessary.
+      if (GET_PARAM(0) == &libaom_test::kAV1) {
+        encoder->Control(AV1E_SET_TILE_COLUMNS, 6);
+      }
+#endif
+    }
+    frame_flags_ =
+        ((int)video->frame() == forced_kf_frame_num_) ? AOM_EFLAG_FORCE_KF : 0;
+  }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    if (AOM_CODEC_OK == res_dec) {
+      if ((int)frame_num_ == forced_kf_frame_num_) {
+        aom_codec_ctx_t *ctx_dec = decoder->GetDecoder();
+        int frame_flags = 0;
+        AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_FRAME_FLAGS,
+                                      &frame_flags);
+        if ((frame_flags & AOM_FRAME_IS_KEY) != AOM_FRAME_IS_KEY) {
+          is_kf_placement_violated_ = true;
+        }
+      }
+      ++frame_num_;
+    }
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  void Frame1IsKey();
+  void ForcedFrameIsKey();
+  void ForcedFrameIsKeyCornerCases();
+
+  ::libaom_test::TestMode encoding_mode_;
+  int auto_alt_ref_;
+  int fwd_kf_enabled_;
+  int cpu_used_;
+  aom_rc_mode rc_end_usage_;
+  int forced_kf_frame_num_;
+  unsigned int frame_num_;
+  bool is_kf_placement_violated_;
+};
+
+void ForcedKeyTestLarge::Frame1IsKey() {
+  const aom_rational timebase = { 1, 30 };
+  // 1st element of this 2D array is for good encoding mode and 2nd element
+  // is for RT encoding mode.
+  const int lag_values[2][NUM_LAG_VALUES] = { { 3, 15, 25 }, { 0, -1, -1 } };
+  int is_realtime = (encoding_mode_ == ::libaom_test::kRealTime);
+
+  forced_kf_frame_num_ = 1;
+  for (int i = 0; i < NUM_LAG_VALUES; ++i) {
+    if (lag_values[is_realtime][i] == -1) continue;
+    frame_num_ = 0;
+    cfg_.g_lag_in_frames = lag_values[is_realtime][i];
+    is_kf_placement_violated_ = false;
+    libaom_test::I420VideoSource video(
+        TestFileName(), TestFileWidth(), TestFileHeight(), timebase.den,
+        timebase.num, 0, fwd_kf_enabled_ ? 60 : 30);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_EQ(is_kf_placement_violated_, false)
+        << "Frame #" << frame_num_ << " isn't a keyframe!";
+  }
+}
+
+// This class checks the presence and placement of application
+// forced key frames.
+void ForcedKeyTestLarge::ForcedFrameIsKey() {
+  const aom_rational timebase = { 1, 30 };
+  const int lag_values[] = { 3, 15, 25, -1 };
+
+  for (int i = 0; lag_values[i] != -1; ++i) {
+    frame_num_ = 0;
+    forced_kf_frame_num_ = lag_values[i] - 1;
+    cfg_.g_lag_in_frames = lag_values[i];
+    is_kf_placement_violated_ = false;
+    libaom_test::I420VideoSource video(
+        TestFileName(), TestFileWidth(), TestFileHeight(), timebase.den,
+        timebase.num, 0, fwd_kf_enabled_ ? 60 : 30);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_EQ(is_kf_placement_violated_, false)
+        << "Frame #" << frame_num_ << " isn't a keyframe!";
+
+    // Two pass and single pass CBR are currently segfaulting for the case when
+    // forced kf is placed after lag in frames.
+    // TODO(anyone): Enable(uncomment) below test once above bug is fixed.
+    //    frame_num_ = 0;
+    //    forced_kf_frame_num_ = lag_values[i] + 1;
+    //    cfg_.g_lag_in_frames = lag_values[i];
+    //    is_kf_placement_violated_ = false;
+    //    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    //    ASSERT_EQ(is_kf_placement_violated_, false)
+    //    << "Frame #" << frame_num_ << " isn't a keyframe!";
+  }
+}
+
+void ForcedKeyTestLarge::ForcedFrameIsKeyCornerCases() {
+  const aom_rational timebase = { 1, 30 };
+  const int kf_offsets[] = { -2, -1, 1, 2, 0 };
+  cfg_.g_lag_in_frames = 35;
+  if (encoding_mode_ == ::libaom_test::kRealTime) cfg_.g_lag_in_frames = 0;
+
+  for (int i = 0; kf_offsets[i] != 0; ++i) {
+    frame_num_ = 0;
+    forced_kf_frame_num_ = (int)cfg_.kf_max_dist + kf_offsets[i];
+    forced_kf_frame_num_ = forced_kf_frame_num_ > 0 ? forced_kf_frame_num_ : 1;
+    is_kf_placement_violated_ = false;
+    libaom_test::I420VideoSource video(
+        TestFileName(), TestFileWidth(), TestFileHeight(), timebase.den,
+        timebase.num, 0, fwd_kf_enabled_ ? 60 : 30);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_EQ(is_kf_placement_violated_, false)
+        << "Frame #" << frame_num_ << " isn't a keyframe!";
+  }
+}
+
+AV1_INSTANTIATE_TEST_SUITE(KeyFrameIntervalTestLarge,
+                           testing::Values(::libaom_test::kOnePassGood,
+                                           ::libaom_test::kTwoPassGood),
+                           ::testing::ValuesIn(kfTestParams),
+                           ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ));
+
+TEST_P(ForcedKeyTestLarge, Frame1IsKey) { Frame1IsKey(); }
+TEST_P(ForcedKeyTestLarge, ForcedFrameIsKey) { ForcedFrameIsKey(); }
+TEST_P(ForcedKeyTestLarge, ForcedFrameIsKeyCornerCases) {
+  ForcedFrameIsKeyCornerCases();
+}
+
+class ForcedKeyRTTestLarge : public ForcedKeyTestLarge {};
+
+TEST_P(ForcedKeyRTTestLarge, Frame1IsKey) { Frame1IsKey(); }
+TEST_P(ForcedKeyRTTestLarge, ForcedFrameIsKeyCornerCases) {
+  ForcedFrameIsKeyCornerCases();
+}
+// TODO(anyone): Add CBR to list of rc_modes once forced kf placement after
+// lag in frames bug is fixed.
+AV1_INSTANTIATE_TEST_SUITE(ForcedKeyTestLarge,
+                           ::testing::Values(::libaom_test::kOnePassGood,
+                                             ::libaom_test::kTwoPassGood),
+                           ::testing::Values(0, 1), ::testing::Values(0, 1),
+                           ::testing::Values(2, 5),
+                           ::testing::Values(AOM_Q, AOM_VBR, AOM_CQ));
+AV1_INSTANTIATE_TEST_SUITE(ForcedKeyRTTestLarge,
+                           ::testing::Values(::libaom_test::kRealTime),
+                           ::testing::Values(0), ::testing::Values(0),
+                           ::testing::Values(7, 9),
+                           ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR));
+}  // namespace
diff --git a/third_party/aom/test/level_test.cc b/third_party/aom/test/level_test.cc
new file mode 100644
index 0000000000..a7c26d2305
--- /dev/null
+++ b/third_party/aom/test/level_test.cc
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2019, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <memory>
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+const int kLevelMin = 0;
+const int kLevelMax = 31;
+const int kLevelKeepStats = 32;
+// Speed settings tested
+static const int kCpuUsedVectors[] = {
+  1,
+  2,
+  3,
+  4,
+};
+
+class LevelTest
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  LevelTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        cpu_used_(GET_PARAM(2)), target_level_(31) {}
+
+  ~LevelTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    if (encoding_mode_ != ::libaom_test::kRealTime) {
+      cfg_.g_lag_in_frames = 5;
+    } else {
+      cfg_.rc_buf_sz = 1000;
+      cfg_.rc_buf_initial_sz = 500;
+      cfg_.rc_buf_optimal_sz = 600;
+    }
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AV1E_SET_TARGET_SEQ_LEVEL_IDX, target_level_);
+      if (encoding_mode_ != ::libaom_test::kRealTime) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      }
+    }
+
+    int num_operating_points;
+    encoder->Control(AV1E_GET_NUM_OPERATING_POINTS, &num_operating_points);
+    ASSERT_EQ(num_operating_points, 1);
+    encoder->Control(AV1E_GET_SEQ_LEVEL_IDX, level_);
+    ASSERT_LE(level_[0], kLevelMax);
+    ASSERT_GE(level_[0], kLevelMin);
+  }
+
+  libaom_test::TestMode encoding_mode_;
+  int cpu_used_;
+  int target_level_;
+  int level_[32];
+};
+
+TEST(LevelTest, TestTargetLevelApi) {
+  aom_codec_iface_t *codec = aom_codec_av1_cx();
+  aom_codec_ctx_t enc;
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(codec, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, codec, &cfg, 0));
+  for (int operating_point = 0; operating_point <= 32; ++operating_point) {
+    for (int level = 0; level <= 32; ++level) {
+      const int target_level = operating_point * 100 + level;
+      if (operating_point <= 31 &&
+          ((level < (CONFIG_CWG_C013 ? 28 : 20) && level != 2 && level != 3 &&
+            level != 6 && level != 7 && level != 10 && level != 11) ||
+           level == kLevelMax || level == kLevelKeepStats)) {
+        EXPECT_EQ(AOM_CODEC_OK,
+                  AOM_CODEC_CONTROL_TYPECHECKED(
+                      &enc, AV1E_SET_TARGET_SEQ_LEVEL_IDX, target_level));
+      } else {
+        EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+                  AOM_CODEC_CONTROL_TYPECHECKED(
+                      &enc, AV1E_SET_TARGET_SEQ_LEVEL_IDX, target_level));
+      }
+    }
+  }
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+TEST(LevelTest, InvalidOperatingPointIndexErrorDetail) {
+  aom_codec_iface_t *codec = aom_codec_av1_cx();
+  aom_codec_ctx_t enc;
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(aom_codec_enc_config_default(codec, &cfg, 0), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_enc_init(&enc, codec, &cfg, 0), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TARGET_SEQ_LEVEL_IDX, 3219),
+            AOM_CODEC_INVALID_PARAM);
+  EXPECT_EQ(aom_codec_error_detail(&enc),
+            std::string("Invalid operating point index: 32"));
+  EXPECT_EQ(aom_codec_set_option(&enc, "target-seq-level-idx", "3319"),
+            AOM_CODEC_INVALID_PARAM);
+  EXPECT_EQ(aom_codec_error_detail(&enc),
+            std::string("Invalid operating point index: 33"));
+  EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+}
+
+TEST_P(LevelTest, TestTargetLevel19) {
+  std::unique_ptr<libaom_test::VideoSource> video;
+  video.reset(new libaom_test::Y4mVideoSource("park_joy_90p_8_420.y4m", 0, 10));
+  ASSERT_NE(video, nullptr);
+  // Level index 19 corresponding to level 6.3.
+  target_level_ = 19;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+}
+
+TEST_P(LevelTest, TestLevelMonitoringLowBitrate) {
+  // To save run time, we only test speed 4.
+  if (cpu_used_ == 4) {
+    libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 40);
+    target_level_ = kLevelKeepStats;
+    cfg_.rc_target_bitrate = 1000;
+    cfg_.g_limit = 40;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_EQ(level_[0], 0);
+  }
+}
+
+TEST_P(LevelTest, TestLevelMonitoringHighBitrate) {
+  // To save run time, we only test speed 4.
+  if (cpu_used_ == 4) {
+    libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 40);
+    target_level_ = kLevelKeepStats;
+    cfg_.rc_target_bitrate = 4000;
+    cfg_.g_limit = 40;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_EQ(level_[0], 4);
+  }
+}
+
+TEST_P(LevelTest, TestTargetLevel0) {
+  // To save run time, we only test speed 4.
+  if (cpu_used_ == 4) {
+    libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 50);
+    const int target_level = 0;
+    target_level_ = target_level;
+    cfg_.rc_target_bitrate = 4000;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_EQ(level_[0], target_level);
+  }
+}
+
+TEST_P(LevelTest, TestTargetLevelRecode) {
+  if (cpu_used_ == 4 && encoding_mode_ == ::libaom_test::kTwoPassGood) {
+    libaom_test::I420VideoSource video("rand_noise_w1280h720.yuv", 1280, 720,
+                                       25, 1, 0, 10);
+    const int target_level = 0005;
+    target_level_ = target_level;
+    cfg_.rc_target_bitrate = 5000;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+}
+
+AV1_INSTANTIATE_TEST_SUITE(LevelTest,
+                           ::testing::Values(::libaom_test::kTwoPassGood,
+                                             ::libaom_test::kOnePassGood),
+                           ::testing::ValuesIn(kCpuUsedVectors));
+}  // namespace
diff --git a/third_party/aom/test/lightfield_test.sh b/third_party/aom/test/lightfield_test.sh
new file mode 100755
index 0000000000..cf1ea73a84
--- /dev/null
+++ b/third_party/aom/test/lightfield_test.sh
@@ -0,0 +1,115 @@
+#!/bin/sh
+## Copyright (c) 2018, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the lightfield example.
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $infile is required.
+lightfield_test_verify_environment() {
+  local infile="${LIBAOM_TEST_DATA_PATH}/vase10x10.yuv"
+  if [ ! -e "${infile}" ]; then
+    echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Run the lightfield example
+lightfield_test() {
+  local img_width=1024
+  local img_height=1024
+  local lf_width=10
+  local lf_height=10
+  local lf_blocksize=5
+  local num_references=4
+  local num_tile_lists=2
+
+  # Encode the lightfield.
+  local encoder="${LIBAOM_BIN_PATH}/lightfield_encoder${AOM_TEST_EXE_SUFFIX}"
+  local yuv_file="${LIBAOM_TEST_DATA_PATH}/vase10x10.yuv"
+  local lf_file="${AOM_TEST_OUTPUT_DIR}/vase10x10.ivf"
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${encoder}" "${img_width}" "${img_height}" \
+      "${yuv_file}" "${lf_file}" "${lf_width}" \
+      "${lf_height}" "${lf_blocksize}" ${devnull} || return 1
+
+  [ -e "${lf_file}" ] || return 1
+
+  # Check to ensure all camera frames have the identical frame header. If not identical, this test fails.
+  for i in ./fh*; do
+    diff ./fh004 $i > /dev/null
+      if [ $? -eq 1 ]; then
+      return 1
+    fi
+  done
+
+  # Check to ensure all camera frames use the identical frame context. If not identical, this test fails.
+  for i in ./fc*; do
+    diff ./fc004 $i > /dev/null
+      if [ $? -eq 1 ]; then
+      return 1
+    fi
+  done
+
+  # Parse lightfield bitstream to construct and output a new bitstream that can
+  # be decoded by an AV1 decoder.
+  local bs_decoder="${LIBAOM_BIN_PATH}/lightfield_bitstream_parsing${AOM_TEST_EXE_SUFFIX}"
+  local tl_file="${AOM_TEST_OUTPUT_DIR}/vase_tile_list.ivf"
+  local tl_text_file="${LIBAOM_TEST_DATA_PATH}/vase10x10_tiles.txt"
+  if [ ! -x "${bs_decoder}" ]; then
+    elog "${bs_decoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${bs_decoder}" "${lf_file}" "${tl_file}" \
+      "${num_references}" "${tl_text_file}" ${devnull} || return 1
+
+  [ -e "${tl_file}" ] || return 1
+
+  # Run lightfield tile list decoder
+  local tl_decoder="${LIBAOM_BIN_PATH}/lightfield_tile_list_decoder${AOM_TEST_EXE_SUFFIX}"
+  local tl_outfile="${AOM_TEST_OUTPUT_DIR}/vase_tile_list.yuv"
+  if [ ! -x "${tl_decoder}" ]; then
+    elog "${tl_decoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${tl_decoder}" "${tl_file}" "${tl_outfile}" \
+      "${num_references}" "${num_tile_lists}" ${devnull} || return 1
+
+  [ -e "${tl_outfile}" ] || return 1
+
+  # Run reference lightfield decoder
+  local ref_decoder="${LIBAOM_BIN_PATH}/lightfield_decoder${AOM_TEST_EXE_SUFFIX}"
+  local tl_reffile="${AOM_TEST_OUTPUT_DIR}/vase_reference.yuv"
+  if [ ! -x "${ref_decoder}" ]; then
+    elog "${ref_decoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${ref_decoder}" "${lf_file}" "${tl_reffile}" \
+      "${num_references}" "${tl_text_file}" ${devnull} || return 1
+
+  [ -e "${tl_reffile}" ] || return 1
+
+  # Check if tl_outfile and tl_reffile are identical. If not identical, this test fails.
+  diff ${tl_outfile} ${tl_reffile} > /dev/null
+  if [ $? -eq 1 ]; then
+    return 1
+  fi
+}
+
+lightfield_test_tests="lightfield_test"
+
+run_tests lightfield_test_verify_environment "${lightfield_test_tests}"
diff --git a/third_party/aom/test/log2_test.cc b/third_party/aom/test/log2_test.cc
new file mode 100644
index 0000000000..71cf8b25d9
--- /dev/null
+++ b/third_party/aom/test/log2_test.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <limits.h>
+#include <math.h>
+
+#include "aom_ports/bitops.h"
+#include "av1/common/entropymode.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+TEST(Log2Test, GetMsb) {
+  // Test small numbers exhaustively.
+  for (unsigned int n = 1; n < 10000; n++) {
+    EXPECT_EQ(get_msb(n), static_cast<int>(floor(log2(n))));
+  }
+
+  // Test every power of 2 and the two adjacent numbers.
+  for (int exponent = 2; exponent < 32; exponent++) {
+    const unsigned int power_of_2 = 1U << exponent;
+    EXPECT_EQ(get_msb(power_of_2 - 1), exponent - 1);
+    EXPECT_EQ(get_msb(power_of_2), exponent);
+    EXPECT_EQ(get_msb(power_of_2 + 1), exponent);
+  }
+}
+
+TEST(Log2Test, Av1CeilLog2) {
+  // Test small numbers exhaustively.
+  EXPECT_EQ(av1_ceil_log2(0), 0);
+  for (int n = 1; n < 10000; n++) {
+    EXPECT_EQ(av1_ceil_log2(n), static_cast<int>(ceil(log2(n))));
+  }
+
+  // Test every power of 2 and the two adjacent numbers.
+  for (int exponent = 2; exponent < 31; exponent++) {
+    const int power_of_2 = 1 << exponent;
+    EXPECT_EQ(av1_ceil_log2(power_of_2 - 1), exponent);
+    EXPECT_EQ(av1_ceil_log2(power_of_2), exponent);
+    EXPECT_EQ(av1_ceil_log2(power_of_2 + 1), exponent + 1);
+  }
+
+  // INT_MAX = 2^31 - 1
+  EXPECT_EQ(av1_ceil_log2(INT_MAX), 31);
+}
diff --git a/third_party/aom/test/loopfilter_control_test.cc b/third_party/aom/test/loopfilter_control_test.cc
new file mode 100644
index 0000000000..9c00235e19
--- /dev/null
+++ b/third_party/aom/test/loopfilter_control_test.cc
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+
+const unsigned int kFrames = 10;
+const int kBitrate = 500;
+
+// List of psnr thresholds for LF settings 0-3
+// keys: video, LF control, aq mode.
+std::unordered_map<std::string,
+                   std::unordered_map<int, std::unordered_map<int, double>>>
+    kPsnrThreshold = { { "park_joy_90p_8_420.y4m",
+                         { { 0, { { 0, 35.0 }, { 3, 35.8 } } },
+                           { 1, { { 0, 35.1 }, { 3, 35.9 } } },
+                           { 2, { { 0, 35.1 }, { 3, 36.1 } } },
+                           { 3, { { 0, 35.1 }, { 3, 36.1 } } } } },
+                       { "paris_352_288_30.y4m",
+                         { { 0, { { 0, 35.40 }, { 3, 36.0 } } },
+                           { 1, { { 0, 35.50 }, { 3, 36.0 } } },
+                           { 2, { { 0, 35.50 }, { 3, 36.0 } } },
+                           { 3, { { 0, 35.50 }, { 3, 36.0 } } } } },
+                       { "niklas_1280_720_30.y4m",
+                         { { 0, { { 0, 33.20 }, { 3, 32.90 } } },
+                           { 1, { { 0, 33.57 }, { 3, 33.22 } } },
+                           { 2, { { 0, 33.57 }, { 3, 33.22 } } },
+                           { 3, { { 0, 33.45 }, { 3, 33.10 } } } } } };
+
+typedef struct {
+  const char *filename;
+  unsigned int input_bit_depth;
+  aom_img_fmt fmt;
+  aom_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) {
+  return os << "TestVideoParam { filename:" << test_arg.filename
+            << " input_bit_depth:" << test_arg.input_bit_depth
+            << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth
+            << " profile:" << test_arg.profile << " }";
+}
+
+const TestVideoParam kTestVectors[] = {
+  { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
+  { "paris_352_288_30.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
+  { "niklas_1280_720_30.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
+};
+
+// Params: test video, lf_control, aq mode, threads, tile columns.
+class LFControlEndToEndTest
+    : public ::libaom_test::CodecTestWith5Params<TestVideoParam, int,
+                                                 unsigned int, int, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  LFControlEndToEndTest()
+      : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)),
+        lf_control_(GET_PARAM(2)), psnr_(0.0), nframes_(0),
+        aq_mode_(GET_PARAM(3)), threads_(GET_PARAM(4)),
+        tile_columns_(GET_PARAM(5)) {}
+
+  ~LFControlEndToEndTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(::libaom_test::kRealTime);
+
+    cfg_.g_threads = threads_;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 600;
+    cfg_.kf_max_dist = 9999;
+    cfg_.kf_min_dist = 9999;
+  }
+
+  void BeginPassHook(unsigned int) override {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_ENABLE_RESTORATION, 0);
+      encoder->Control(AV1E_SET_ENABLE_OBMC, 0);
+      encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
+      encoder->Control(AV1E_SET_ENABLE_WARPED_MOTION, 0);
+      encoder->Control(AV1E_SET_DELTAQ_MODE, 0);
+      encoder->Control(AV1E_SET_ENABLE_TPL_MODEL, 0);
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, tile_columns_);
+      encoder->Control(AOME_SET_CPUUSED, 10);
+      encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
+      encoder->Control(AV1E_SET_AQ_MODE, aq_mode_);
+      encoder->Control(AV1E_SET_ROW_MT, 1);
+      encoder->Control(AV1E_SET_ENABLE_CDEF, 1);
+      encoder->Control(AV1E_SET_COEFF_COST_UPD_FREQ, 2);
+      encoder->Control(AV1E_SET_MODE_COST_UPD_FREQ, 2);
+      encoder->Control(AV1E_SET_MV_COST_UPD_FREQ, 2);
+      encoder->Control(AV1E_SET_DV_COST_UPD_FREQ, 2);
+      encoder->Control(AV1E_SET_LOOPFILTER_CONTROL, lf_control_);
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  double GetPsnrThreshold() {
+    return kPsnrThreshold[test_video_param_.filename][lf_control_][aq_mode_];
+  }
+
+  void DoTest() {
+    cfg_.rc_target_bitrate = kBitrate;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_profile = test_video_param_.profile;
+    cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+    cfg_.g_bit_depth = test_video_param_.bit_depth;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+    std::unique_ptr<libaom_test::VideoSource> video;
+    video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                kFrames));
+    ASSERT_NE(video, nullptr);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    const double psnr = GetAveragePsnr();
+    EXPECT_GT(psnr, GetPsnrThreshold())
+        << "loopfilter control = " << lf_control_ << " aq mode = " << aq_mode_;
+  }
+
+  TestVideoParam test_video_param_;
+  int lf_control_;
+
+ private:
+  double psnr_;
+  unsigned int nframes_;
+  unsigned int aq_mode_;
+  int threads_;
+  int tile_columns_;
+};
+
+class LFControlEndToEndTestThreaded : public LFControlEndToEndTest {};
+
+TEST_P(LFControlEndToEndTest, EndtoEndPSNRTest) { DoTest(); }
+
+TEST_P(LFControlEndToEndTestThreaded, EndtoEndPSNRTest) { DoTest(); }
+
+TEST(LFControlGetterTest, NullptrInput) {
+  int *lf_level = nullptr;
+  aom_codec_ctx_t encoder;
+  aom_codec_enc_cfg_t cfg;
+  aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg, 1);
+  EXPECT_EQ(aom_codec_enc_init(&encoder, aom_codec_av1_cx(), &cfg, 0),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&encoder, AOME_GET_LOOPFILTER_LEVEL, lf_level),
+            AOM_CODEC_INVALID_PARAM);
+  EXPECT_EQ(aom_codec_destroy(&encoder), AOM_CODEC_OK);
+}
+
+AV1_INSTANTIATE_TEST_SUITE(LFControlEndToEndTest,
+                           ::testing::ValuesIn(kTestVectors),
+                           ::testing::Range(0, 4),
+                           ::testing::Values<unsigned int>(0, 3),
+                           ::testing::Values(1), ::testing::Values(1));
+
+AV1_INSTANTIATE_TEST_SUITE(LFControlEndToEndTestThreaded,
+                           ::testing::ValuesIn(kTestVectors),
+                           ::testing::Range(0, 4),
+                           ::testing::Values<unsigned int>(0, 3),
+                           ::testing::Range(2, 5), ::testing::Range(2, 5));
+}  // namespace
diff --git a/third_party/aom/test/lossless_test.cc b/third_party/aom/test/lossless_test.cc
new file mode 100644
index 0000000000..756ad05019
--- /dev/null
+++ b/third_party/aom/test/lossless_test.cc
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+namespace {
+
+const int kMaxPsnr = 100;
+
+class LosslessTestLarge
+    : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode,
+                                                 aom_rc_mode, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  LosslessTestLarge()
+      : EncoderTest(GET_PARAM(0)), psnr_(kMaxPsnr), nframes_(0),
+        encoding_mode_(GET_PARAM(1)), rc_end_usage_(GET_PARAM(2)),
+        cpu_used_(GET_PARAM(3)) {}
+
+  ~LosslessTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    cfg_.rc_end_usage = rc_end_usage_;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      // Only call Control if quantizer > 0 to verify that using quantizer
+      // alone will activate lossless
+      if (cfg_.rc_max_quantizer > 0 || cfg_.rc_min_quantizer > 0) {
+        encoder->Control(AV1E_SET_LOSSLESS, 1);
+      }
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+    }
+  }
+
+  void BeginPassHook(unsigned int /*pass*/) override {
+    psnr_ = kMaxPsnr;
+    nframes_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    if (pkt->data.psnr.psnr[0] < psnr_) psnr_ = pkt->data.psnr.psnr[0];
+  }
+
+  double GetMinPsnr() const { return psnr_; }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    if (AOM_CODEC_OK == res_dec) {
+      aom_codec_ctx_t *ctx_dec = decoder->GetDecoder();
+      AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_LAST_QUANTIZER,
+                                    &base_qindex_);
+      EXPECT_EQ(base_qindex_, 0)
+          << "Error: Base_qindex is non zero for lossless coding";
+    }
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  void TestLosslessEncoding();
+  void TestLosslessEncodingVGALag0();
+  void TestLosslessEncoding444();
+  void TestLosslessEncodingCtrl();
+
+ private:
+  double psnr_;
+  unsigned int nframes_;
+  libaom_test::TestMode encoding_mode_;
+  aom_rc_mode rc_end_usage_;
+  int cpu_used_;
+  int base_qindex_;
+};
+
+void LosslessTestLarge::TestLosslessEncoding() {
+  const aom_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = 2000;
+  cfg_.g_lag_in_frames = 25;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 0;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  // intentionally changed the dimension for better testing coverage
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     timebase.den, timebase.num, 0, 5);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr_lossless = GetMinPsnr();
+  EXPECT_GE(psnr_lossless, kMaxPsnr);
+}
+
+void LosslessTestLarge::TestLosslessEncodingVGALag0() {
+  const aom_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = 2000;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 0;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480,
+                                     timebase.den, timebase.num, 0, 30);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr_lossless = GetMinPsnr();
+  EXPECT_GE(psnr_lossless, kMaxPsnr);
+}
+
+void LosslessTestLarge::TestLosslessEncoding444() {
+  libaom_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 5);
+
+  cfg_.g_profile = 1;
+  cfg_.g_timebase = video.timebase();
+  cfg_.rc_target_bitrate = 2000;
+  cfg_.g_lag_in_frames = 25;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 0;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr_lossless = GetMinPsnr();
+  EXPECT_GE(psnr_lossless, kMaxPsnr);
+}
+
+void LosslessTestLarge::TestLosslessEncodingCtrl() {
+  const aom_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = 2000;
+  cfg_.g_lag_in_frames = 25;
+  // Intentionally set Q > 0, to make sure control can be used to activate
+  // lossless
+  cfg_.rc_min_quantizer = 10;
+  cfg_.rc_max_quantizer = 20;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     timebase.den, timebase.num, 0, 5);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr_lossless = GetMinPsnr();
+  EXPECT_GE(psnr_lossless, kMaxPsnr);
+}
+
+TEST_P(LosslessTestLarge, TestLosslessEncoding) { TestLosslessEncoding(); }
+
+TEST_P(LosslessTestLarge, TestLosslessEncodingVGALag0) {
+  TestLosslessEncodingVGALag0();
+}
+
+TEST_P(LosslessTestLarge, TestLosslessEncoding444) {
+  TestLosslessEncoding444();
+}
+
+TEST_P(LosslessTestLarge, TestLosslessEncodingCtrl) {
+  TestLosslessEncodingCtrl();
+}
+
+class LosslessAllIntraTestLarge : public LosslessTestLarge {};
+
+TEST_P(LosslessAllIntraTestLarge, TestLosslessEncodingCtrl) {
+  const aom_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  // Intentionally set Q > 0, to make sure control can be used to activate
+  // lossless
+  cfg_.rc_min_quantizer = 10;
+  cfg_.rc_max_quantizer = 20;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     timebase.den, timebase.num, 0, 5);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr_lossless = GetMinPsnr();
+  EXPECT_GE(psnr_lossless, kMaxPsnr);
+}
+
+using LosslessRealtimeTestLarge = LosslessTestLarge;
+
+TEST_P(LosslessRealtimeTestLarge, TestLosslessEncoding) {
+  TestLosslessEncoding();
+}
+
+TEST_P(LosslessRealtimeTestLarge, TestLosslessEncodingVGALag0) {
+  TestLosslessEncodingVGALag0();
+}
+
+TEST_P(LosslessRealtimeTestLarge, TestLosslessEncoding444) {
+  TestLosslessEncoding444();
+}
+
+TEST_P(LosslessRealtimeTestLarge, TestLosslessEncodingCtrl) {
+  TestLosslessEncodingCtrl();
+}
+
+AV1_INSTANTIATE_TEST_SUITE(LosslessTestLarge,
+                           ::testing::Values(::libaom_test::kOnePassGood,
+                                             ::libaom_test::kTwoPassGood),
+                           ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ),
+                           ::testing::Values(0));  // cpu_used
+
+AV1_INSTANTIATE_TEST_SUITE(LosslessAllIntraTestLarge,
+                           ::testing::Values(::libaom_test::kAllIntra),
+                           ::testing::Values(AOM_Q),
+                           ::testing::Values(6, 9));  // cpu_used
+
+AV1_INSTANTIATE_TEST_SUITE(LosslessRealtimeTestLarge,
+                           ::testing::Values(::libaom_test::kRealTime),
+                           ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ),
+                           ::testing::Range(6, 11));  // cpu_used
+}  // namespace
diff --git a/third_party/aom/test/lpf_test.cc b/third_party/aom/test/lpf_test.cc
new file mode 100644
index 0000000000..04b1c86d4d
--- /dev/null
+++ b/third_party/aom/test/lpf_test.cc
@@ -0,0 +1,824 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cmath>
+#include <cstdlib>
+#include <string>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/av1_loopfilter.h"
+#include "av1/common/entropy.h"
+#include "aom/aom_integer.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+// Horizontally and Vertically need 32x32: 8  Coeffs preceeding filtered section
+//                                         16 Coefs within filtered section
+//                                         8  Coeffs following filtered section
+const int kNumCoeffs = 1024;
+
+const int number_of_iterations = 10000;
+
+const int kSpeedTestNum = 500000;
+
+#define LOOP_PARAM \
+  int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh
+#define DUAL_LOOP_PARAM                                                      \
+  int p, const uint8_t *blimit0, const uint8_t *limit0,                      \
+      const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, \
+      const uint8_t *thresh1
+
+typedef void (*loop_op_t)(uint8_t *s, LOOP_PARAM);
+typedef void (*dual_loop_op_t)(uint8_t *s, DUAL_LOOP_PARAM);
+typedef void (*hbdloop_op_t)(uint16_t *s, LOOP_PARAM, int bd);
+typedef void (*hbddual_loop_op_t)(uint16_t *s, DUAL_LOOP_PARAM, int bd);
+
+typedef std::tuple<hbdloop_op_t, hbdloop_op_t, int> hbdloop_param_t;
+typedef std::tuple<hbddual_loop_op_t, hbddual_loop_op_t, int>
+    hbddual_loop_param_t;
+typedef std::tuple<loop_op_t, loop_op_t, int> loop_param_t;
+typedef std::tuple<dual_loop_op_t, dual_loop_op_t, int> dual_loop_param_t;
+
+template <typename Pixel_t, int PIXEL_WIDTH_t>
+void InitInput(Pixel_t *s, Pixel_t *ref_s, ACMRandom *rnd, const uint8_t limit,
+               const int mask, const int32_t p, const int i) {
+  uint16_t tmp_s[kNumCoeffs];
+
+  for (int j = 0; j < kNumCoeffs;) {
+    const uint8_t val = rnd->Rand8();
+    if (val & 0x80) {  // 50% chance to choose a new value.
+      tmp_s[j] = rnd->Rand16();
+      j++;
+    } else {  // 50% chance to repeat previous value in row X times.
+      int k = 0;
+      while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
+        if (j < 1) {
+          tmp_s[j] = rnd->Rand16();
+        } else if (val & 0x20) {  // Increment by a value within the limit.
+          tmp_s[j] = static_cast<uint16_t>(tmp_s[j - 1] + (limit - 1));
+        } else {  // Decrement by a value within the limit.
+          tmp_s[j] = static_cast<uint16_t>(tmp_s[j - 1] - (limit - 1));
+        }
+        j++;
+      }
+    }
+  }
+
+  for (int j = 0; j < kNumCoeffs;) {
+    const uint8_t val = rnd->Rand8();
+    if (val & 0x80) {
+      j++;
+    } else {  // 50% chance to repeat previous value in column X times.
+      int k = 0;
+      while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
+        if (j < 1) {
+          tmp_s[j] = rnd->Rand16();
+        } else if (val & 0x20) {  // Increment by a value within the limit.
+          tmp_s[(j % 32) * 32 + j / 32] = static_cast<uint16_t>(
+              tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] + (limit - 1));
+        } else {  // Decrement by a value within the limit.
+          tmp_s[(j % 32) * 32 + j / 32] = static_cast<uint16_t>(
+              tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] - (limit - 1));
+        }
+        j++;
+      }
+    }
+  }
+
+  for (int j = 0; j < kNumCoeffs; j++) {
+    if (i % 2) {
+      s[j] = tmp_s[j] & mask;
+    } else {
+      s[j] = tmp_s[p * (j % p) + j / p] & mask;
+    }
+    ref_s[j] = s[j];
+  }
+}
+
+uint8_t GetOuterThresh(ACMRandom *rnd) {
+  return static_cast<uint8_t>(rnd->PseudoUniform(3 * MAX_LOOP_FILTER + 5));
+}
+
+uint8_t GetInnerThresh(ACMRandom *rnd) {
+  return static_cast<uint8_t>(rnd->PseudoUniform(MAX_LOOP_FILTER + 1));
+}
+
+uint8_t GetHevThresh(ACMRandom *rnd) {
+  return static_cast<uint8_t>(rnd->PseudoUniform(MAX_LOOP_FILTER + 1) >> 4);
+}
+
+template <typename func_type_t, typename params_t>
+class LoopTestParam : public ::testing::TestWithParam<params_t> {
+ public:
+  ~LoopTestParam() override = default;
+  void SetUp() override {
+    loopfilter_op_ = std::get<0>(this->GetParam());
+    ref_loopfilter_op_ = std::get<1>(this->GetParam());
+    bit_depth_ = std::get<2>(this->GetParam());
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+ protected:
+  int bit_depth_;
+  int mask_;
+  func_type_t loopfilter_op_;
+  func_type_t ref_loopfilter_op_;
+};
+
+#if CONFIG_AV1_HIGHBITDEPTH
+void call_filter(uint16_t *s, LOOP_PARAM, int bd, hbdloop_op_t op) {
+  op(s, p, blimit, limit, thresh, bd);
+}
+void call_dualfilter(uint16_t *s, DUAL_LOOP_PARAM, int bd,
+                     hbddual_loop_op_t op) {
+  op(s, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bd);
+}
+#endif
+void call_filter(uint8_t *s, LOOP_PARAM, int bd, loop_op_t op) {
+  (void)bd;
+  op(s, p, blimit, limit, thresh);
+}
+void call_dualfilter(uint8_t *s, DUAL_LOOP_PARAM, int bd, dual_loop_op_t op) {
+  (void)bd;
+  op(s, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+typedef LoopTestParam<hbdloop_op_t, hbdloop_param_t> Loop8Test6Param_hbd;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Loop8Test6Param_hbd);
+typedef LoopTestParam<hbddual_loop_op_t, hbddual_loop_param_t>
+    Loop8Test9Param_hbd;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Loop8Test9Param_hbd);
+#endif
+typedef LoopTestParam<loop_op_t, loop_param_t> Loop8Test6Param_lbd;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Loop8Test6Param_lbd);
+typedef LoopTestParam<dual_loop_op_t, dual_loop_param_t> Loop8Test9Param_lbd;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Loop8Test9Param_lbd);
+
+#define OPCHECK(a, b)                                                          \
+  do {                                                                         \
+    ACMRandom rnd(ACMRandom::DeterministicSeed());                             \
+    const int count_test_block = number_of_iterations;                         \
+    const int32_t p = kNumCoeffs / 32;                                         \
+    DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                      \
+    DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]);                                  \
+    int err_count_total = 0;                                                   \
+    int first_failure = -1;                                                    \
+    for (int i = 0; i < count_test_block; ++i) {                               \
+      int err_count = 0;                                                       \
+      uint8_t tmp = GetOuterThresh(&rnd);                                      \
+      DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = { tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp }; \
+      tmp = GetInnerThresh(&rnd);                                              \
+      DECLARE_ALIGNED(16, const uint8_t,                                       \
+                      limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+      tmp = GetHevThresh(&rnd);                                                \
+      DECLARE_ALIGNED(16, const uint8_t, thresh[16]) = { tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp }; \
+      InitInput<a, b>(s, ref_s, &rnd, *limit, mask_, p, i);                    \
+      call_filter(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_,     \
+                  ref_loopfilter_op_);                                         \
+      API_REGISTER_STATE_CHECK(call_filter(s + 8 + p * 8, p, blimit, limit,    \
+                                           thresh, bit_depth_,                 \
+                                           loopfilter_op_));                   \
+      for (int j = 0; j < kNumCoeffs; ++j) {                                   \
+        err_count += ref_s[j] != s[j];                                         \
+      }                                                                        \
+      if (err_count && !err_count_total) {                                     \
+        first_failure = i;                                                     \
+      }                                                                        \
+      err_count_total += err_count;                                            \
+    }                                                                          \
+    EXPECT_EQ(0, err_count_total)                                              \
+        << "Error: Loop8Test6Param, C output doesn't match SIMD "              \
+           "loopfilter output. "                                               \
+        << "First failed at test case " << first_failure;                      \
+  } while (false)
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST_P(Loop8Test6Param_hbd, OperationCheck) { OPCHECK(uint16_t, 16); }
+#endif
+TEST_P(Loop8Test6Param_lbd, OperationCheck) { OPCHECK(uint8_t, 8); }
+
+#define VALCHECK(a, b)                                                         \
+  do {                                                                         \
+    ACMRandom rnd(ACMRandom::DeterministicSeed());                             \
+    const int count_test_block = number_of_iterations;                         \
+    DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                      \
+    DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]);                                  \
+    int err_count_total = 0;                                                   \
+    int first_failure = -1;                                                    \
+    for (int i = 0; i < count_test_block; ++i) {                               \
+      int err_count = 0;                                                       \
+      uint8_t tmp = GetOuterThresh(&rnd);                                      \
+      DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = { tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp }; \
+      tmp = GetInnerThresh(&rnd);                                              \
+      DECLARE_ALIGNED(16, const uint8_t,                                       \
+                      limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+      tmp = GetHevThresh(&rnd);                                                \
+      DECLARE_ALIGNED(16, const uint8_t, thresh[16]) = { tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp }; \
+      int32_t p = kNumCoeffs / 32;                                             \
+      for (int j = 0; j < kNumCoeffs; ++j) {                                   \
+        s[j] = rnd.Rand16() & mask_;                                           \
+        ref_s[j] = s[j];                                                       \
+      }                                                                        \
+      call_filter(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_,     \
+                  ref_loopfilter_op_);                                         \
+      API_REGISTER_STATE_CHECK(call_filter(s + 8 + p * 8, p, blimit, limit,    \
+                                           thresh, bit_depth_,                 \
+                                           loopfilter_op_));                   \
+      for (int j = 0; j < kNumCoeffs; ++j) {                                   \
+        err_count += ref_s[j] != s[j];                                         \
+      }                                                                        \
+      if (err_count && !err_count_total) {                                     \
+        first_failure = i;                                                     \
+      }                                                                        \
+      err_count_total += err_count;                                            \
+    }                                                                          \
+    EXPECT_EQ(0, err_count_total)                                              \
+        << "Error: Loop8Test6Param, C output doesn't match SIMD "              \
+           "loopfilter output. "                                               \
+        << "First failed at test case " << first_failure;                      \
+  } while (false)
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST_P(Loop8Test6Param_hbd, ValueCheck) { VALCHECK(uint16_t, 16); }
+#endif
+TEST_P(Loop8Test6Param_lbd, ValueCheck) { VALCHECK(uint8_t, 8); }
+
+#define SPEEDCHECK(a, b)                                                      \
+  do {                                                                        \
+    ACMRandom rnd(ACMRandom::DeterministicSeed());                            \
+    const int count_test_block = kSpeedTestNum;                               \
+    const int32_t bd = bit_depth_;                                            \
+    DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                     \
+    uint8_t tmp = GetOuterThresh(&rnd);                                       \
+    DECLARE_ALIGNED(16, const uint8_t,                                        \
+                    blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    tmp = GetInnerThresh(&rnd);                                               \
+    DECLARE_ALIGNED(16, const uint8_t,                                        \
+                    limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
+    tmp = GetHevThresh(&rnd);                                                 \
+    DECLARE_ALIGNED(16, const uint8_t,                                        \
+                    thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    int32_t p = kNumCoeffs / 32;                                              \
+    for (int j = 0; j < kNumCoeffs; ++j) {                                    \
+      s[j] = rnd.Rand16() & mask_;                                            \
+    }                                                                         \
+    for (int i = 0; i < count_test_block; ++i) {                              \
+      call_filter(s + 8 + p * 8, p, blimit, limit, thresh, bd,                \
+                  loopfilter_op_);                                            \
+    }                                                                         \
+  } while (false)
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST_P(Loop8Test6Param_hbd, DISABLED_Speed) { SPEEDCHECK(uint16_t, 16); }
+#endif
+TEST_P(Loop8Test6Param_lbd, DISABLED_Speed) { SPEEDCHECK(uint8_t, 8); }
+
+#define OPCHECKd(a, b)                                                         \
+  do {                                                                         \
+    ACMRandom rnd(ACMRandom::DeterministicSeed());                             \
+    const int count_test_block = number_of_iterations;                         \
+    DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                      \
+    DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]);                                  \
+    int err_count_total = 0;                                                   \
+    int first_failure = -1;                                                    \
+    for (int i = 0; i < count_test_block; ++i) {                               \
+      int err_count = 0;                                                       \
+      uint8_t tmp = GetOuterThresh(&rnd);                                      \
+      DECLARE_ALIGNED(                                                         \
+          16, const uint8_t, blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp };            \
+      tmp = GetInnerThresh(&rnd);                                              \
+      DECLARE_ALIGNED(16, const uint8_t, limit0[16]) = { tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp }; \
+      tmp = GetHevThresh(&rnd);                                                \
+      DECLARE_ALIGNED(                                                         \
+          16, const uint8_t, thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp };            \
+      tmp = GetOuterThresh(&rnd);                                              \
+      DECLARE_ALIGNED(                                                         \
+          16, const uint8_t, blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp };            \
+      tmp = GetInnerThresh(&rnd);                                              \
+      DECLARE_ALIGNED(16, const uint8_t, limit1[16]) = { tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp }; \
+      tmp = GetHevThresh(&rnd);                                                \
+      DECLARE_ALIGNED(                                                         \
+          16, const uint8_t, thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp };            \
+      int32_t p = kNumCoeffs / 32;                                             \
+      const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1;             \
+      InitInput<a, b>(s, ref_s, &rnd, limit, mask_, p, i);                     \
+      call_dualfilter(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \
+                      limit1, thresh1, bit_depth_, ref_loopfilter_op_);        \
+      API_REGISTER_STATE_CHECK(                                                \
+          call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \
+                          limit1, thresh1, bit_depth_, loopfilter_op_));       \
+      for (int j = 0; j < kNumCoeffs; ++j) {                                   \
+        err_count += ref_s[j] != s[j];                                         \
+      }                                                                        \
+      if (err_count && !err_count_total) {                                     \
+        first_failure = i;                                                     \
+      }                                                                        \
+      err_count_total += err_count;                                            \
+    }                                                                          \
+    EXPECT_EQ(0, err_count_total)                                              \
+        << "Error: Loop8Test9Param, C output doesn't match SIMD "              \
+           "loopfilter output. "                                               \
+        << "First failed at test case " << first_failure;                      \
+  } while (false)
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST_P(Loop8Test9Param_hbd, OperationCheck) { OPCHECKd(uint16_t, 16); }
+#endif
+TEST_P(Loop8Test9Param_lbd, OperationCheck) { OPCHECKd(uint8_t, 8); }
+
+#define VALCHECKd(a, b)                                                        \
+  do {                                                                         \
+    ACMRandom rnd(ACMRandom::DeterministicSeed());                             \
+    const int count_test_block = number_of_iterations;                         \
+    DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                      \
+    DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]);                                  \
+    int err_count_total = 0;                                                   \
+    int first_failure = -1;                                                    \
+    for (int i = 0; i < count_test_block; ++i) {                               \
+      int err_count = 0;                                                       \
+      uint8_t tmp = GetOuterThresh(&rnd);                                      \
+      DECLARE_ALIGNED(                                                         \
+          16, const uint8_t, blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp };            \
+      tmp = GetInnerThresh(&rnd);                                              \
+      DECLARE_ALIGNED(16, const uint8_t, limit0[16]) = { tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp }; \
+      tmp = GetHevThresh(&rnd);                                                \
+      DECLARE_ALIGNED(                                                         \
+          16, const uint8_t, thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp };            \
+      tmp = GetOuterThresh(&rnd);                                              \
+      DECLARE_ALIGNED(                                                         \
+          16, const uint8_t, blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp };            \
+      tmp = GetInnerThresh(&rnd);                                              \
+      DECLARE_ALIGNED(16, const uint8_t, limit1[16]) = { tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp,   \
+                                                         tmp, tmp, tmp, tmp }; \
+      tmp = GetHevThresh(&rnd);                                                \
+      DECLARE_ALIGNED(                                                         \
+          16, const uint8_t, thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                              tmp, tmp, tmp, tmp };            \
+      int32_t p = kNumCoeffs / 32;                                             \
+      for (int j = 0; j < kNumCoeffs; ++j) {                                   \
+        s[j] = rnd.Rand16() & mask_;                                           \
+        ref_s[j] = s[j];                                                       \
+      }                                                                        \
+      call_dualfilter(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \
+                      limit1, thresh1, bit_depth_, ref_loopfilter_op_);        \
+      API_REGISTER_STATE_CHECK(                                                \
+          call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \
+                          limit1, thresh1, bit_depth_, loopfilter_op_));       \
+      for (int j = 0; j < kNumCoeffs; ++j) {                                   \
+        err_count += ref_s[j] != s[j];                                         \
+      }                                                                        \
+      if (err_count && !err_count_total) {                                     \
+        first_failure = i;                                                     \
+      }                                                                        \
+      err_count_total += err_count;                                            \
+    }                                                                          \
+    EXPECT_EQ(0, err_count_total)                                              \
+        << "Error: Loop8Test9Param, C output doesn't match SIMD "              \
+           "loopfilter output. "                                               \
+        << "First failed at test case " << first_failure;                      \
+  } while (false)
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST_P(Loop8Test9Param_hbd, ValueCheck) { VALCHECKd(uint16_t, 16); }
+#endif
+TEST_P(Loop8Test9Param_lbd, ValueCheck) { VALCHECKd(uint8_t, 8); }
+
+#define SPEEDCHECKd(a, b)                                                      \
+  do {                                                                         \
+    ACMRandom rnd(ACMRandom::DeterministicSeed());                             \
+    const int count_test_block = kSpeedTestNum;                                \
+    DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                      \
+    uint8_t tmp = GetOuterThresh(&rnd);                                        \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    tmp = GetInnerThresh(&rnd);                                                \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
+    tmp = GetHevThresh(&rnd);                                                  \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    tmp = GetOuterThresh(&rnd);                                                \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    tmp = GetInnerThresh(&rnd);                                                \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
+    tmp = GetHevThresh(&rnd);                                                  \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    int32_t p = kNumCoeffs / 32;                                               \
+    for (int j = 0; j < kNumCoeffs; ++j) {                                     \
+      s[j] = rnd.Rand16() & mask_;                                             \
+    }                                                                          \
+    for (int i = 0; i < count_test_block; ++i) {                               \
+      call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,     \
+                      limit1, thresh1, bit_depth_, loopfilter_op_);            \
+    }                                                                          \
+  } while (false)
+
+#if CONFIG_AV1_HIGHBITDEPTH
+TEST_P(Loop8Test9Param_hbd, DISABLED_Speed) { SPEEDCHECKd(uint16_t, 16); }
+#endif
+TEST_P(Loop8Test9Param_lbd, DISABLED_Speed) { SPEEDCHECKd(uint8_t, 8); }
+
+using std::make_tuple;
+
+#if HAVE_SSE2
+#if CONFIG_AV1_HIGHBITDEPTH
+const hbdloop_param_t kHbdLoop8Test6[] = {
+  make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
+             8),
+  make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c,
+             8),
+  make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
+             8),
+  make_tuple(&aom_highbd_lpf_horizontal_14_sse2,
+             &aom_highbd_lpf_horizontal_14_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 8),
+
+  make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c,
+             8),
+  make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
+             10),
+  make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c,
+             10),
+  make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
+             10),
+  make_tuple(&aom_highbd_lpf_horizontal_14_sse2,
+             &aom_highbd_lpf_horizontal_14_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c,
+             10),
+  make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
+             12),
+  make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c,
+             12),
+  make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
+             12),
+  make_tuple(&aom_highbd_lpf_horizontal_14_sse2,
+             &aom_highbd_lpf_horizontal_14_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c,
+             12),
+  make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 12)
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, Loop8Test6Param_hbd,
+                         ::testing::ValuesIn(kHbdLoop8Test6));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+const loop_param_t kLoop8Test6[] = {
+  make_tuple(&aom_lpf_horizontal_4_sse2, &aom_lpf_horizontal_4_c, 8),
+  make_tuple(&aom_lpf_horizontal_8_sse2, &aom_lpf_horizontal_8_c, 8),
+  make_tuple(&aom_lpf_horizontal_6_sse2, &aom_lpf_horizontal_6_c, 8),
+  make_tuple(&aom_lpf_vertical_6_sse2, &aom_lpf_vertical_6_c, 8),
+  make_tuple(&aom_lpf_horizontal_14_sse2, &aom_lpf_horizontal_14_c, 8),
+  make_tuple(&aom_lpf_vertical_4_sse2, &aom_lpf_vertical_4_c, 8),
+  make_tuple(&aom_lpf_vertical_8_sse2, &aom_lpf_vertical_8_c, 8),
+  make_tuple(&aom_lpf_vertical_14_sse2, &aom_lpf_vertical_14_c, 8),
+  make_tuple(&aom_lpf_horizontal_4_quad_sse2, &aom_lpf_horizontal_4_quad_c, 8),
+  make_tuple(&aom_lpf_vertical_4_quad_sse2, &aom_lpf_vertical_4_quad_c, 8),
+  make_tuple(&aom_lpf_horizontal_6_quad_sse2, &aom_lpf_horizontal_6_quad_c, 8),
+  make_tuple(&aom_lpf_vertical_6_quad_sse2, &aom_lpf_vertical_6_quad_c, 8),
+  make_tuple(&aom_lpf_horizontal_8_quad_sse2, &aom_lpf_horizontal_8_quad_c, 8),
+  make_tuple(&aom_lpf_vertical_8_quad_sse2, &aom_lpf_vertical_8_quad_c, 8),
+  make_tuple(&aom_lpf_horizontal_14_quad_sse2, &aom_lpf_horizontal_14_quad_c,
+             8),
+  make_tuple(&aom_lpf_vertical_14_quad_sse2, &aom_lpf_vertical_14_quad_c, 8)
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, Loop8Test6Param_lbd,
+                         ::testing::ValuesIn(kLoop8Test6));
+
+const dual_loop_param_t kLoop8Test9[] = {
+  make_tuple(&aom_lpf_horizontal_4_dual_sse2, &aom_lpf_horizontal_4_dual_c, 8),
+  make_tuple(&aom_lpf_vertical_4_dual_sse2, &aom_lpf_vertical_4_dual_c, 8),
+  make_tuple(&aom_lpf_horizontal_6_dual_sse2, &aom_lpf_horizontal_6_dual_c, 8),
+  make_tuple(&aom_lpf_vertical_6_dual_sse2, &aom_lpf_vertical_6_dual_c, 8),
+  make_tuple(&aom_lpf_horizontal_8_dual_sse2, &aom_lpf_horizontal_8_dual_c, 8),
+  make_tuple(&aom_lpf_vertical_8_dual_sse2, &aom_lpf_vertical_8_dual_c, 8),
+  make_tuple(&aom_lpf_horizontal_14_dual_sse2, &aom_lpf_horizontal_14_dual_c,
+             8),
+  make_tuple(&aom_lpf_vertical_14_dual_sse2, &aom_lpf_vertical_14_dual_c, 8)
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, Loop8Test9Param_lbd,
+                         ::testing::ValuesIn(kLoop8Test9));
+
+#endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+const loop_param_t kLoop8Test6Avx2[] = {
+  make_tuple(&aom_lpf_horizontal_6_quad_avx2, &aom_lpf_horizontal_6_quad_c, 8),
+  make_tuple(&aom_lpf_horizontal_8_quad_avx2, &aom_lpf_horizontal_8_quad_c, 8),
+  make_tuple(&aom_lpf_horizontal_14_quad_avx2, &aom_lpf_horizontal_14_quad_c,
+             8),
+  make_tuple(&aom_lpf_vertical_14_quad_avx2, &aom_lpf_vertical_14_quad_c, 8),
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, Loop8Test6Param_lbd,
+                         ::testing::ValuesIn(kLoop8Test6Avx2));
+#endif
+
+#if HAVE_SSE2 && CONFIG_AV1_HIGHBITDEPTH
+const hbddual_loop_param_t kHbdLoop8Test9[] = {
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2,
+             &aom_highbd_lpf_horizontal_6_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2,
+             &aom_highbd_lpf_horizontal_14_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
+             &aom_highbd_lpf_vertical_4_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2,
+             &aom_highbd_lpf_vertical_6_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
+             &aom_highbd_lpf_vertical_8_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2,
+             &aom_highbd_lpf_vertical_14_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2,
+             &aom_highbd_lpf_horizontal_6_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2,
+             &aom_highbd_lpf_horizontal_14_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
+             &aom_highbd_lpf_vertical_4_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2,
+             &aom_highbd_lpf_vertical_6_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
+             &aom_highbd_lpf_vertical_8_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2,
+             &aom_highbd_lpf_vertical_14_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2,
+             &aom_highbd_lpf_horizontal_6_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2,
+             &aom_highbd_lpf_horizontal_14_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
+             &aom_highbd_lpf_vertical_4_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2,
+             &aom_highbd_lpf_vertical_6_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
+             &aom_highbd_lpf_vertical_8_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2,
+             &aom_highbd_lpf_vertical_14_dual_c, 12),
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, Loop8Test9Param_hbd,
+                         ::testing::ValuesIn(kHbdLoop8Test9));
+
+#endif  // HAVE_SSE2 && CONFIG_AV1_HIGHBITDEPTH
+
+#if HAVE_NEON
+const loop_param_t kLoop8Test6[] = {
+  make_tuple(&aom_lpf_vertical_14_neon, &aom_lpf_vertical_14_c, 8),
+  make_tuple(&aom_lpf_vertical_8_neon, &aom_lpf_vertical_8_c, 8),
+  make_tuple(&aom_lpf_vertical_6_neon, &aom_lpf_vertical_6_c, 8),
+  make_tuple(&aom_lpf_vertical_4_neon, &aom_lpf_vertical_4_c, 8),
+  make_tuple(&aom_lpf_horizontal_14_neon, &aom_lpf_horizontal_14_c, 8),
+  make_tuple(&aom_lpf_horizontal_8_neon, &aom_lpf_horizontal_8_c, 8),
+  make_tuple(&aom_lpf_horizontal_6_neon, &aom_lpf_horizontal_6_c, 8),
+  make_tuple(&aom_lpf_horizontal_4_neon, &aom_lpf_horizontal_4_c, 8),
+  make_tuple(&aom_lpf_horizontal_4_quad_neon, &aom_lpf_horizontal_4_quad_c, 8),
+  make_tuple(&aom_lpf_vertical_4_quad_neon, &aom_lpf_vertical_4_quad_c, 8),
+  make_tuple(&aom_lpf_horizontal_6_quad_neon, &aom_lpf_horizontal_6_quad_c, 8),
+  make_tuple(&aom_lpf_vertical_6_quad_neon, &aom_lpf_vertical_6_quad_c, 8),
+  make_tuple(&aom_lpf_horizontal_8_quad_neon, &aom_lpf_horizontal_8_quad_c, 8),
+  make_tuple(&aom_lpf_vertical_8_quad_neon, &aom_lpf_vertical_8_quad_c, 8),
+  make_tuple(&aom_lpf_horizontal_14_quad_neon, &aom_lpf_horizontal_14_quad_c,
+             8),
+  make_tuple(&aom_lpf_vertical_14_quad_neon, &aom_lpf_vertical_14_quad_c, 8)
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, Loop8Test6Param_lbd,
+                         ::testing::ValuesIn(kLoop8Test6));
+
+const dual_loop_param_t kLoop8Test9[] = {
+  make_tuple(&aom_lpf_horizontal_4_dual_neon, &aom_lpf_horizontal_4_dual_c, 8),
+  make_tuple(&aom_lpf_horizontal_6_dual_neon, &aom_lpf_horizontal_6_dual_c, 8),
+  make_tuple(&aom_lpf_horizontal_8_dual_neon, &aom_lpf_horizontal_8_dual_c, 8),
+  make_tuple(&aom_lpf_horizontal_14_dual_neon, &aom_lpf_horizontal_14_dual_c,
+             8),
+  make_tuple(&aom_lpf_vertical_4_dual_neon, &aom_lpf_vertical_4_dual_c, 8),
+  make_tuple(&aom_lpf_vertical_6_dual_neon, &aom_lpf_vertical_6_dual_c, 8),
+  make_tuple(&aom_lpf_vertical_8_dual_neon, &aom_lpf_vertical_8_dual_c, 8),
+  make_tuple(&aom_lpf_vertical_14_dual_neon, &aom_lpf_vertical_14_dual_c, 8)
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, Loop8Test9Param_lbd,
+                         ::testing::ValuesIn(kLoop8Test9));
+#if CONFIG_AV1_HIGHBITDEPTH
+const hbdloop_param_t kHbdLoop8Test6[] = {
+  make_tuple(&aom_highbd_lpf_horizontal_4_neon, &aom_highbd_lpf_horizontal_4_c,
+             8),
+  make_tuple(&aom_highbd_lpf_horizontal_4_neon, &aom_highbd_lpf_horizontal_4_c,
+             10),
+  make_tuple(&aom_highbd_lpf_horizontal_4_neon, &aom_highbd_lpf_horizontal_4_c,
+             12),
+  make_tuple(&aom_highbd_lpf_horizontal_6_neon, &aom_highbd_lpf_horizontal_6_c,
+             8),
+  make_tuple(&aom_highbd_lpf_horizontal_6_neon, &aom_highbd_lpf_horizontal_6_c,
+             10),
+  make_tuple(&aom_highbd_lpf_horizontal_6_neon, &aom_highbd_lpf_horizontal_6_c,
+             12),
+  make_tuple(&aom_highbd_lpf_horizontal_8_neon, &aom_highbd_lpf_horizontal_8_c,
+             8),
+  make_tuple(&aom_highbd_lpf_horizontal_8_neon, &aom_highbd_lpf_horizontal_8_c,
+             10),
+  make_tuple(&aom_highbd_lpf_horizontal_8_neon, &aom_highbd_lpf_horizontal_8_c,
+             12),
+  make_tuple(&aom_highbd_lpf_horizontal_14_neon,
+             &aom_highbd_lpf_horizontal_14_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_14_neon,
+             &aom_highbd_lpf_horizontal_14_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_14_neon,
+             &aom_highbd_lpf_horizontal_14_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_4_neon, &aom_highbd_lpf_vertical_4_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_4_neon, &aom_highbd_lpf_vertical_4_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_4_neon, &aom_highbd_lpf_vertical_4_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_6_neon, &aom_highbd_lpf_vertical_6_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_6_neon, &aom_highbd_lpf_vertical_6_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_6_neon, &aom_highbd_lpf_vertical_6_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_8_neon, &aom_highbd_lpf_vertical_8_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_8_neon, &aom_highbd_lpf_vertical_8_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_8_neon, &aom_highbd_lpf_vertical_8_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_14_neon, &aom_highbd_lpf_vertical_14_c,
+             8),
+  make_tuple(&aom_highbd_lpf_vertical_14_neon, &aom_highbd_lpf_vertical_14_c,
+             10),
+  make_tuple(&aom_highbd_lpf_vertical_14_neon, &aom_highbd_lpf_vertical_14_c,
+             12),
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, Loop8Test6Param_hbd,
+                         ::testing::ValuesIn(kHbdLoop8Test6));
+
+const hbddual_loop_param_t kHbdLoop8Test9[] = {
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_neon,
+             &aom_highbd_lpf_horizontal_4_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_6_dual_neon,
+             &aom_highbd_lpf_horizontal_6_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_neon,
+             &aom_highbd_lpf_horizontal_8_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_14_dual_neon,
+             &aom_highbd_lpf_horizontal_14_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_neon,
+             &aom_highbd_lpf_vertical_4_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_6_dual_neon,
+             &aom_highbd_lpf_vertical_6_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_neon,
+             &aom_highbd_lpf_vertical_8_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_14_dual_neon,
+             &aom_highbd_lpf_vertical_14_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_neon,
+             &aom_highbd_lpf_horizontal_4_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_6_dual_neon,
+             &aom_highbd_lpf_horizontal_6_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_neon,
+             &aom_highbd_lpf_horizontal_8_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_14_dual_neon,
+             &aom_highbd_lpf_horizontal_14_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_neon,
+             &aom_highbd_lpf_vertical_4_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_6_dual_neon,
+             &aom_highbd_lpf_vertical_6_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_neon,
+             &aom_highbd_lpf_vertical_8_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_14_dual_neon,
+             &aom_highbd_lpf_vertical_14_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_neon,
+             &aom_highbd_lpf_horizontal_4_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_6_dual_neon,
+             &aom_highbd_lpf_horizontal_6_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_neon,
+             &aom_highbd_lpf_horizontal_8_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_14_dual_neon,
+             &aom_highbd_lpf_horizontal_14_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_neon,
+             &aom_highbd_lpf_vertical_4_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_6_dual_neon,
+             &aom_highbd_lpf_vertical_6_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_neon,
+             &aom_highbd_lpf_vertical_8_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_14_dual_neon,
+             &aom_highbd_lpf_vertical_14_dual_c, 12),
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, Loop8Test9Param_hbd,
+                         ::testing::ValuesIn(kHbdLoop8Test9));
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // HAVE_NEON
+
+#if HAVE_AVX2 && CONFIG_AV1_HIGHBITDEPTH
+const hbddual_loop_param_t kHbdLoop8Test9Avx2[] = {
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
+             &aom_highbd_lpf_vertical_4_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
+             &aom_highbd_lpf_vertical_4_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
+             &aom_highbd_lpf_vertical_4_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
+             &aom_highbd_lpf_vertical_8_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
+             &aom_highbd_lpf_vertical_8_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
+             &aom_highbd_lpf_vertical_8_dual_c, 12),
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, Loop8Test9Param_hbd,
+                         ::testing::ValuesIn(kHbdLoop8Test9Avx2));
+#endif
+}  // namespace
diff --git a/third_party/aom/test/masked_sad_test.cc b/third_party/aom/test/masked_sad_test.cc
new file mode 100644
index 0000000000..bb037460d1
--- /dev/null
+++ b/third_party/aom/test/masked_sad_test.cc
@@ -0,0 +1,617 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom/aom_integer.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+const int number_of_iterations = 200;
+
+typedef unsigned int (*MaskedSADFunc)(const uint8_t *src, int src_stride,
+                                      const uint8_t *ref, int ref_stride,
+                                      const uint8_t *second_pred,
+                                      const uint8_t *msk, int msk_stride,
+                                      int invert_mask);
+typedef std::tuple<MaskedSADFunc, MaskedSADFunc> MaskedSADParam;
+
+typedef void (*MaskedSADx4Func)(const uint8_t *src, int src_stride,
+                                const uint8_t *ref[], int ref_stride,
+                                const uint8_t *second_pred, const uint8_t *msk,
+                                int msk_stride, int invert_mask,
+                                unsigned sads[]);
+
+typedef std::tuple<MaskedSADx4Func, MaskedSADx4Func> MaskedSADx4Param;
+
+class MaskedSADTestBase : public ::testing::Test {
+ public:
+  ~MaskedSADTestBase() override = default;
+  void SetUp() override = 0;
+  virtual void runRef(const uint8_t *src_ptr, int src_stride,
+                      const uint8_t *ref_ptr[], int ref_stride,
+                      const uint8_t *second_pred, const uint8_t *msk,
+                      int msk_stride, int inv_mask, unsigned sads[],
+                      int times) = 0;
+  virtual void runTest(const uint8_t *src_ptr, int src_stride,
+                       const uint8_t *ref_ptr[], int ref_stride,
+                       const uint8_t *second_pred, const uint8_t *msk,
+                       int msk_stride, int inv_mask, unsigned sads[],
+                       int times) = 0;
+
+  void runMaskedSADTest(int run_times);
+};
+
+class MaskedSADTest : public MaskedSADTestBase,
+                      public ::testing::WithParamInterface<MaskedSADParam> {
+ public:
+  ~MaskedSADTest() override = default;
+  void SetUp() override {
+    maskedSAD_op_ = GET_PARAM(0);
+    ref_maskedSAD_op_ = GET_PARAM(1);
+  }
+
+  void runRef(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr[],
+              int ref_stride, const uint8_t *second_pred, const uint8_t *msk,
+              int msk_stride, int inv_mask, unsigned sads[],
+              int times) override;
+  void runTest(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr[],
+               int ref_stride, const uint8_t *second_pred, const uint8_t *msk,
+               int msk_stride, int inv_mask, unsigned sads[],
+               int times) override;
+
+ protected:
+  MaskedSADFunc maskedSAD_op_;
+  MaskedSADFunc ref_maskedSAD_op_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MaskedSADTest);
+
+class MaskedSADx4Test : public MaskedSADTestBase,
+                        public ::testing::WithParamInterface<MaskedSADx4Param> {
+ public:
+  ~MaskedSADx4Test() override = default;
+  void SetUp() override {
+    maskedSAD_op_ = GET_PARAM(0);
+    ref_maskedSAD_op_ = GET_PARAM(1);
+  }
+  void runRef(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr[],
+              int ref_stride, const uint8_t *second_pred, const uint8_t *msk,
+              int msk_stride, int inv_mask, unsigned sads[],
+              int times) override;
+  void runTest(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr[],
+               int ref_stride, const uint8_t *second_pred, const uint8_t *msk,
+               int msk_stride, int inv_mask, unsigned sads[],
+               int times) override;
+
+ protected:
+  MaskedSADx4Func maskedSAD_op_;
+  MaskedSADx4Func ref_maskedSAD_op_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MaskedSADx4Test);
+
+void MaskedSADTest::runRef(const uint8_t *src_ptr, int src_stride,
+                           const uint8_t *ref_ptr[], int ref_stride,
+                           const uint8_t *second_pred, const uint8_t *msk,
+                           int msk_stride, int invert_mask, unsigned sads[],
+                           int times) {
+  for (int repeat = 0; repeat < times; ++repeat) {
+    sads[0] = ref_maskedSAD_op_(src_ptr, src_stride, ref_ptr[0], ref_stride,
+                                second_pred, msk, msk_stride, invert_mask);
+  }
+}
+
+void MaskedSADTest::runTest(const uint8_t *src_ptr, int src_stride,
+                            const uint8_t *ref_ptr[], int ref_stride,
+                            const uint8_t *second_pred, const uint8_t *msk,
+                            int msk_stride, int invert_mask, unsigned sads[],
+                            int times) {
+  if (times == 1) {
+    sads[0] = maskedSAD_op_(src_ptr, src_stride, ref_ptr[0], ref_stride,
+                            second_pred, msk, msk_stride, invert_mask);
+  } else {
+    for (int repeat = 0; repeat < times; ++repeat) {
+      API_REGISTER_STATE_CHECK(
+          sads[0] = maskedSAD_op_(src_ptr, src_stride, ref_ptr[0], ref_stride,
+                                  second_pred, msk, msk_stride, invert_mask));
+    }
+  }
+}
+
+void MaskedSADx4Test::runRef(const uint8_t *src_ptr, int src_stride,
+                             const uint8_t *ref_ptr[], int ref_stride,
+                             const uint8_t *second_pred, const uint8_t *msk,
+                             int msk_stride, int invert_mask, unsigned sads[],
+                             int times) {
+  for (int repeat = 0; repeat < times; ++repeat) {
+    ref_maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride, second_pred,
+                      msk, msk_stride, invert_mask, sads);
+  }
+}
+
+void MaskedSADx4Test::runTest(const uint8_t *src_ptr, int src_stride,
+                              const uint8_t *ref_ptr[], int ref_stride,
+                              const uint8_t *second_pred, const uint8_t *msk,
+                              int msk_stride, int invert_mask, unsigned sads[],
+                              int times) {
+  if (times == 1) {
+    API_REGISTER_STATE_CHECK(maskedSAD_op_(src_ptr, src_stride, ref_ptr,
+                                           ref_stride, second_pred, msk,
+                                           msk_stride, invert_mask, sads));
+  } else {
+    for (int repeat = 0; repeat < times; ++repeat) {
+      maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, msk,
+                    msk_stride, invert_mask, sads);
+    }
+  }
+}
+
+void MaskedSADTestBase::runMaskedSADTest(int run_times) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const unsigned kBlockSize = MAX_SB_SIZE * MAX_SB_SIZE;
+  DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE * 4]);
+  DECLARE_ALIGNED(16, uint8_t, second_pred_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+
+  const uint8_t *refs[] = { ref_ptr, ref_ptr + kBlockSize,
+                            ref_ptr + 2 * kBlockSize,
+                            ref_ptr + 3 * kBlockSize };
+  unsigned sads[] = { 0, 0, 0, 0 };
+  unsigned sads_ref[] = { 0, 0, 0, 0 };
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = MAX_SB_SIZE;
+  int ref_stride = MAX_SB_SIZE;
+  int msk_stride = MAX_SB_SIZE;
+  const int iters = run_times == 1 ? number_of_iterations : 1;
+  for (int i = 0; i < iters; ++i) {
+    if (run_times == 1 && i == 0) {
+      // The maximum accumulator value occurs when src=0 and
+      // ref/second_pref=255 (or vice-versa, since we take the absolute
+      // difference). Check this case explicitly to ensure we do not overflow
+      // during accumulation.
+      for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
+        src_ptr[j] = 0;
+        ref_ptr[j] = 255;
+        (ref_ptr + kBlockSize)[j] = 255;
+        (ref_ptr + 2 * kBlockSize)[j] = 255;
+        (ref_ptr + 3 * kBlockSize)[j] = 255;
+        second_pred_ptr[j] = 255;
+      }
+    } else {
+      for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
+        src_ptr[j] = rnd.Rand8();
+        ref_ptr[j] = rnd.Rand8();
+        (ref_ptr + kBlockSize)[j] = rnd.Rand8();
+        (ref_ptr + 2 * kBlockSize)[j] = rnd.Rand8();
+        (ref_ptr + 3 * kBlockSize)[j] = rnd.Rand8();
+        second_pred_ptr[j] = rnd.Rand8();
+      }
+    }
+    for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
+      msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64;
+      assert(msk_ptr[j] <= 64);
+    }
+
+    for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
+      aom_usec_timer timer;
+      aom_usec_timer_start(&timer);
+      runRef(src_ptr, src_stride, refs, ref_stride, second_pred_ptr, msk_ptr,
+             msk_stride, invert_mask, sads_ref, run_times);
+      aom_usec_timer_mark(&timer);
+      const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+      aom_usec_timer_start(&timer);
+      runTest(src_ptr, src_stride, refs, ref_stride, second_pred_ptr, msk_ptr,
+              msk_stride, invert_mask, sads, run_times);
+      aom_usec_timer_mark(&timer);
+      const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+      if (run_times > 10) {
+        printf("%7.2f/%7.2fns", time1, time2);
+        printf("(%3.2f)\n", time1 / time2);
+      }
+      if (sads_ref[0] != sads[0] || sads_ref[1] != sads[1] ||
+          sads_ref[2] != sads[2] || sads_ref[3] != sads[3]) {
+        err_count++;
+        if (first_failure == -1) first_failure = i;
+      }
+    }
+  }
+  EXPECT_EQ(0, err_count) << "Error: Masked SAD Test,  output doesn't match. "
+                          << "First failed at test case " << first_failure;
+}
+
+TEST_P(MaskedSADTest, OperationCheck) { runMaskedSADTest(1); }
+
+TEST_P(MaskedSADTest, DISABLED_Speed) { runMaskedSADTest(2000000); }
+
+TEST_P(MaskedSADx4Test, OperationCheck) { runMaskedSADTest(1); }
+
+TEST_P(MaskedSADx4Test, DISABLED_Speed) { runMaskedSADTest(2000000); }
+
+#if CONFIG_AV1_HIGHBITDEPTH
+typedef unsigned int (*HighbdMaskedSADFunc)(const uint8_t *src, int src_stride,
+                                            const uint8_t *ref, int ref_stride,
+                                            const uint8_t *second_pred,
+                                            const uint8_t *msk, int msk_stride,
+                                            int invert_mask);
+typedef std::tuple<HighbdMaskedSADFunc, HighbdMaskedSADFunc>
+    HighbdMaskedSADParam;
+
+class HighbdMaskedSADTest
+    : public ::testing::TestWithParam<HighbdMaskedSADParam> {
+ public:
+  ~HighbdMaskedSADTest() override = default;
+  void SetUp() override {
+    maskedSAD_op_ = GET_PARAM(0);
+    ref_maskedSAD_op_ = GET_PARAM(1);
+  }
+
+  void runHighbdMaskedSADTest(int run_times);
+
+ protected:
+  HighbdMaskedSADFunc maskedSAD_op_;
+  HighbdMaskedSADFunc ref_maskedSAD_op_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HighbdMaskedSADTest);
+
+void HighbdMaskedSADTest::runHighbdMaskedSADTest(int run_times) {
+  unsigned int ref_ret = 0, ret = 1;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint16_t, second_pred_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
+  uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = MAX_SB_SIZE;
+  int ref_stride = MAX_SB_SIZE;
+  int msk_stride = MAX_SB_SIZE;
+  const int iters = run_times == 1 ? number_of_iterations : 1;
+  for (int i = 0; i < iters; ++i) {
+    for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
+      src_ptr[j] = rnd.Rand16() & 0xfff;
+      ref_ptr[j] = rnd.Rand16() & 0xfff;
+      second_pred_ptr[j] = rnd.Rand16() & 0xfff;
+      msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64;
+    }
+
+    for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
+      aom_usec_timer timer;
+      aom_usec_timer_start(&timer);
+      for (int repeat = 0; repeat < run_times; ++repeat) {
+        ref_ret = ref_maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride,
+                                    second_pred8_ptr, msk_ptr, msk_stride,
+                                    invert_mask);
+      }
+      aom_usec_timer_mark(&timer);
+      const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+      aom_usec_timer_start(&timer);
+      if (run_times == 1) {
+        API_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src8_ptr, src_stride,
+                                                     ref8_ptr, ref_stride,
+                                                     second_pred8_ptr, msk_ptr,
+                                                     msk_stride, invert_mask));
+      } else {
+        for (int repeat = 0; repeat < run_times; ++repeat) {
+          ret =
+              maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride,
+                            second_pred8_ptr, msk_ptr, msk_stride, invert_mask);
+        }
+      }
+      aom_usec_timer_mark(&timer);
+      const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+      if (run_times > 10) {
+        printf("%7.2f/%7.2fns", time1, time2);
+        printf("(%3.2f)\n", time1 / time2);
+      }
+      if (ret != ref_ret) {
+        err_count++;
+        if (first_failure == -1) first_failure = i;
+      }
+    }
+  }
+  EXPECT_EQ(0, err_count)
+      << "Error: High BD Masked SAD Test, output doesn't match. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(HighbdMaskedSADTest, OperationCheck) { runHighbdMaskedSADTest(1); }
+
+TEST_P(HighbdMaskedSADTest, DISABLED_Speed) { runHighbdMaskedSADTest(1000000); }
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+using std::make_tuple;
+
+#if HAVE_SSSE3
+const MaskedSADParam msad_test[] = {
+  make_tuple(&aom_masked_sad4x4_ssse3, &aom_masked_sad4x4_c),
+  make_tuple(&aom_masked_sad4x8_ssse3, &aom_masked_sad4x8_c),
+  make_tuple(&aom_masked_sad8x4_ssse3, &aom_masked_sad8x4_c),
+  make_tuple(&aom_masked_sad8x8_ssse3, &aom_masked_sad8x8_c),
+  make_tuple(&aom_masked_sad8x16_ssse3, &aom_masked_sad8x16_c),
+  make_tuple(&aom_masked_sad16x8_ssse3, &aom_masked_sad16x8_c),
+  make_tuple(&aom_masked_sad16x16_ssse3, &aom_masked_sad16x16_c),
+  make_tuple(&aom_masked_sad16x32_ssse3, &aom_masked_sad16x32_c),
+  make_tuple(&aom_masked_sad32x16_ssse3, &aom_masked_sad32x16_c),
+  make_tuple(&aom_masked_sad32x32_ssse3, &aom_masked_sad32x32_c),
+  make_tuple(&aom_masked_sad32x64_ssse3, &aom_masked_sad32x64_c),
+  make_tuple(&aom_masked_sad64x32_ssse3, &aom_masked_sad64x32_c),
+  make_tuple(&aom_masked_sad64x64_ssse3, &aom_masked_sad64x64_c),
+  make_tuple(&aom_masked_sad64x128_ssse3, &aom_masked_sad64x128_c),
+  make_tuple(&aom_masked_sad128x64_ssse3, &aom_masked_sad128x64_c),
+  make_tuple(&aom_masked_sad128x128_ssse3, &aom_masked_sad128x128_c),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_masked_sad4x16_ssse3, &aom_masked_sad4x16_c),
+  make_tuple(&aom_masked_sad16x4_ssse3, &aom_masked_sad16x4_c),
+  make_tuple(&aom_masked_sad8x32_ssse3, &aom_masked_sad8x32_c),
+  make_tuple(&aom_masked_sad32x8_ssse3, &aom_masked_sad32x8_c),
+  make_tuple(&aom_masked_sad16x64_ssse3, &aom_masked_sad16x64_c),
+  make_tuple(&aom_masked_sad64x16_ssse3, &aom_masked_sad64x16_c),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(SSSE3, MaskedSADTest, ::testing::ValuesIn(msad_test));
+
+const MaskedSADx4Param msadx4_test[] = {
+  make_tuple(&aom_masked_sad4x4x4d_ssse3, &aom_masked_sad4x4x4d_c),
+  make_tuple(&aom_masked_sad4x8x4d_ssse3, &aom_masked_sad4x8x4d_c),
+  make_tuple(&aom_masked_sad8x4x4d_ssse3, &aom_masked_sad8x4x4d_c),
+  make_tuple(&aom_masked_sad8x8x4d_ssse3, &aom_masked_sad8x8x4d_c),
+  make_tuple(&aom_masked_sad8x16x4d_ssse3, &aom_masked_sad8x16x4d_c),
+  make_tuple(&aom_masked_sad16x8x4d_ssse3, &aom_masked_sad16x8x4d_c),
+  make_tuple(&aom_masked_sad16x16x4d_ssse3, &aom_masked_sad16x16x4d_c),
+  make_tuple(&aom_masked_sad16x32x4d_ssse3, &aom_masked_sad16x32x4d_c),
+  make_tuple(&aom_masked_sad32x16x4d_ssse3, &aom_masked_sad32x16x4d_c),
+  make_tuple(&aom_masked_sad32x32x4d_ssse3, &aom_masked_sad32x32x4d_c),
+  make_tuple(&aom_masked_sad32x64x4d_ssse3, &aom_masked_sad32x64x4d_c),
+  make_tuple(&aom_masked_sad64x32x4d_ssse3, &aom_masked_sad64x32x4d_c),
+  make_tuple(&aom_masked_sad64x64x4d_ssse3, &aom_masked_sad64x64x4d_c),
+  make_tuple(&aom_masked_sad64x128x4d_ssse3, &aom_masked_sad64x128x4d_c),
+  make_tuple(&aom_masked_sad128x64x4d_ssse3, &aom_masked_sad128x64x4d_c),
+  make_tuple(&aom_masked_sad128x128x4d_ssse3, &aom_masked_sad128x128x4d_c),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_masked_sad4x16x4d_ssse3, &aom_masked_sad4x16x4d_c),
+  make_tuple(&aom_masked_sad16x4x4d_ssse3, &aom_masked_sad16x4x4d_c),
+  make_tuple(&aom_masked_sad8x32x4d_ssse3, &aom_masked_sad8x32x4d_c),
+  make_tuple(&aom_masked_sad32x8x4d_ssse3, &aom_masked_sad32x8x4d_c),
+  make_tuple(&aom_masked_sad16x64x4d_ssse3, &aom_masked_sad16x64x4d_c),
+  make_tuple(&aom_masked_sad64x16x4d_ssse3, &aom_masked_sad64x16x4d_c),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(SSSE3, MaskedSADx4Test,
+                         ::testing::ValuesIn(msadx4_test));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+const HighbdMaskedSADParam hbd_msad_test[] = {
+  make_tuple(&aom_highbd_masked_sad4x4_ssse3, &aom_highbd_masked_sad4x4_c),
+  make_tuple(&aom_highbd_masked_sad4x8_ssse3, &aom_highbd_masked_sad4x8_c),
+  make_tuple(&aom_highbd_masked_sad8x4_ssse3, &aom_highbd_masked_sad8x4_c),
+  make_tuple(&aom_highbd_masked_sad8x8_ssse3, &aom_highbd_masked_sad8x8_c),
+  make_tuple(&aom_highbd_masked_sad8x16_ssse3, &aom_highbd_masked_sad8x16_c),
+  make_tuple(&aom_highbd_masked_sad16x8_ssse3, &aom_highbd_masked_sad16x8_c),
+  make_tuple(&aom_highbd_masked_sad16x16_ssse3, &aom_highbd_masked_sad16x16_c),
+  make_tuple(&aom_highbd_masked_sad16x32_ssse3, &aom_highbd_masked_sad16x32_c),
+  make_tuple(&aom_highbd_masked_sad32x16_ssse3, &aom_highbd_masked_sad32x16_c),
+  make_tuple(&aom_highbd_masked_sad32x32_ssse3, &aom_highbd_masked_sad32x32_c),
+  make_tuple(&aom_highbd_masked_sad32x64_ssse3, &aom_highbd_masked_sad32x64_c),
+  make_tuple(&aom_highbd_masked_sad64x32_ssse3, &aom_highbd_masked_sad64x32_c),
+  make_tuple(&aom_highbd_masked_sad64x64_ssse3, &aom_highbd_masked_sad64x64_c),
+  make_tuple(&aom_highbd_masked_sad64x128_ssse3,
+             &aom_highbd_masked_sad64x128_c),
+  make_tuple(&aom_highbd_masked_sad128x64_ssse3,
+             &aom_highbd_masked_sad128x64_c),
+  make_tuple(&aom_highbd_masked_sad128x128_ssse3,
+             &aom_highbd_masked_sad128x128_c),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_highbd_masked_sad4x16_ssse3, &aom_highbd_masked_sad4x16_c),
+  make_tuple(&aom_highbd_masked_sad16x4_ssse3, &aom_highbd_masked_sad16x4_c),
+  make_tuple(&aom_highbd_masked_sad8x32_ssse3, &aom_highbd_masked_sad8x32_c),
+  make_tuple(&aom_highbd_masked_sad32x8_ssse3, &aom_highbd_masked_sad32x8_c),
+  make_tuple(&aom_highbd_masked_sad16x64_ssse3, &aom_highbd_masked_sad16x64_c),
+  make_tuple(&aom_highbd_masked_sad64x16_ssse3, &aom_highbd_masked_sad64x16_c),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(SSSE3, HighbdMaskedSADTest,
+                         ::testing::ValuesIn(hbd_msad_test));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // HAVE_SSSE3
+
+#if HAVE_AVX2
+const MaskedSADParam msad_avx2_test[] = {
+  make_tuple(&aom_masked_sad4x4_avx2, &aom_masked_sad4x4_ssse3),
+  make_tuple(&aom_masked_sad4x8_avx2, &aom_masked_sad4x8_ssse3),
+  make_tuple(&aom_masked_sad8x4_avx2, &aom_masked_sad8x4_ssse3),
+  make_tuple(&aom_masked_sad8x8_avx2, &aom_masked_sad8x8_ssse3),
+  make_tuple(&aom_masked_sad8x16_avx2, &aom_masked_sad8x16_ssse3),
+  make_tuple(&aom_masked_sad16x8_avx2, &aom_masked_sad16x8_ssse3),
+  make_tuple(&aom_masked_sad16x16_avx2, &aom_masked_sad16x16_ssse3),
+  make_tuple(&aom_masked_sad16x32_avx2, &aom_masked_sad16x32_ssse3),
+  make_tuple(&aom_masked_sad32x16_avx2, &aom_masked_sad32x16_ssse3),
+  make_tuple(&aom_masked_sad32x32_avx2, &aom_masked_sad32x32_ssse3),
+  make_tuple(&aom_masked_sad32x64_avx2, &aom_masked_sad32x64_ssse3),
+  make_tuple(&aom_masked_sad64x32_avx2, &aom_masked_sad64x32_ssse3),
+  make_tuple(&aom_masked_sad64x64_avx2, &aom_masked_sad64x64_ssse3),
+  make_tuple(&aom_masked_sad64x128_avx2, &aom_masked_sad64x128_ssse3),
+  make_tuple(&aom_masked_sad128x64_avx2, &aom_masked_sad128x64_ssse3),
+  make_tuple(&aom_masked_sad128x128_avx2, &aom_masked_sad128x128_ssse3),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_masked_sad4x16_avx2, &aom_masked_sad4x16_ssse3),
+  make_tuple(&aom_masked_sad16x4_avx2, &aom_masked_sad16x4_ssse3),
+  make_tuple(&aom_masked_sad8x32_avx2, &aom_masked_sad8x32_ssse3),
+  make_tuple(&aom_masked_sad32x8_avx2, &aom_masked_sad32x8_ssse3),
+  make_tuple(&aom_masked_sad16x64_avx2, &aom_masked_sad16x64_ssse3),
+  make_tuple(&aom_masked_sad64x16_avx2, &aom_masked_sad64x16_ssse3)
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, MaskedSADTest,
+                         ::testing::ValuesIn(msad_avx2_test));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+const HighbdMaskedSADParam hbd_msad_avx2_test[] = {
+  make_tuple(&aom_highbd_masked_sad4x4_avx2, &aom_highbd_masked_sad4x4_ssse3),
+  make_tuple(&aom_highbd_masked_sad4x8_avx2, &aom_highbd_masked_sad4x8_ssse3),
+  make_tuple(&aom_highbd_masked_sad8x4_avx2, &aom_highbd_masked_sad8x4_ssse3),
+  make_tuple(&aom_highbd_masked_sad8x8_avx2, &aom_highbd_masked_sad8x8_ssse3),
+  make_tuple(&aom_highbd_masked_sad8x16_avx2, &aom_highbd_masked_sad8x16_ssse3),
+  make_tuple(&aom_highbd_masked_sad16x8_avx2, &aom_highbd_masked_sad16x8_ssse3),
+  make_tuple(&aom_highbd_masked_sad16x16_avx2,
+             &aom_highbd_masked_sad16x16_ssse3),
+  make_tuple(&aom_highbd_masked_sad16x32_avx2,
+             &aom_highbd_masked_sad16x32_ssse3),
+  make_tuple(&aom_highbd_masked_sad32x16_avx2,
+             &aom_highbd_masked_sad32x16_ssse3),
+  make_tuple(&aom_highbd_masked_sad32x32_avx2,
+             &aom_highbd_masked_sad32x32_ssse3),
+  make_tuple(&aom_highbd_masked_sad32x64_avx2,
+             &aom_highbd_masked_sad32x64_ssse3),
+  make_tuple(&aom_highbd_masked_sad64x32_avx2,
+             &aom_highbd_masked_sad64x32_ssse3),
+  make_tuple(&aom_highbd_masked_sad64x64_avx2,
+             &aom_highbd_masked_sad64x64_ssse3),
+  make_tuple(&aom_highbd_masked_sad64x128_avx2,
+             &aom_highbd_masked_sad64x128_ssse3),
+  make_tuple(&aom_highbd_masked_sad128x64_avx2,
+             &aom_highbd_masked_sad128x64_ssse3),
+  make_tuple(&aom_highbd_masked_sad128x128_avx2,
+             &aom_highbd_masked_sad128x128_ssse3),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_highbd_masked_sad4x16_avx2, &aom_highbd_masked_sad4x16_ssse3),
+  make_tuple(&aom_highbd_masked_sad16x4_avx2, &aom_highbd_masked_sad16x4_ssse3),
+  make_tuple(&aom_highbd_masked_sad8x32_avx2, &aom_highbd_masked_sad8x32_ssse3),
+  make_tuple(&aom_highbd_masked_sad32x8_avx2, &aom_highbd_masked_sad32x8_ssse3),
+  make_tuple(&aom_highbd_masked_sad16x64_avx2,
+             &aom_highbd_masked_sad16x64_ssse3),
+  make_tuple(&aom_highbd_masked_sad64x16_avx2,
+             &aom_highbd_masked_sad64x16_ssse3)
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, HighbdMaskedSADTest,
+                         ::testing::ValuesIn(hbd_msad_avx2_test));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+const MaskedSADParam msad_test[] = {
+  make_tuple(&aom_masked_sad4x4_neon, &aom_masked_sad4x4_c),
+  make_tuple(&aom_masked_sad4x8_neon, &aom_masked_sad4x8_c),
+  make_tuple(&aom_masked_sad8x4_neon, &aom_masked_sad8x4_c),
+  make_tuple(&aom_masked_sad8x8_neon, &aom_masked_sad8x8_c),
+  make_tuple(&aom_masked_sad8x16_neon, &aom_masked_sad8x16_c),
+  make_tuple(&aom_masked_sad16x8_neon, &aom_masked_sad16x8_c),
+  make_tuple(&aom_masked_sad16x16_neon, &aom_masked_sad16x16_c),
+  make_tuple(&aom_masked_sad16x32_neon, &aom_masked_sad16x32_c),
+  make_tuple(&aom_masked_sad32x16_neon, &aom_masked_sad32x16_c),
+  make_tuple(&aom_masked_sad32x32_neon, &aom_masked_sad32x32_c),
+  make_tuple(&aom_masked_sad32x64_neon, &aom_masked_sad32x64_c),
+  make_tuple(&aom_masked_sad64x32_neon, &aom_masked_sad64x32_c),
+  make_tuple(&aom_masked_sad64x64_neon, &aom_masked_sad64x64_c),
+  make_tuple(&aom_masked_sad64x128_neon, &aom_masked_sad64x128_c),
+  make_tuple(&aom_masked_sad128x64_neon, &aom_masked_sad128x64_c),
+  make_tuple(&aom_masked_sad128x128_neon, &aom_masked_sad128x128_c),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_masked_sad4x16_neon, &aom_masked_sad4x16_c),
+  make_tuple(&aom_masked_sad16x4_neon, &aom_masked_sad16x4_c),
+  make_tuple(&aom_masked_sad8x32_neon, &aom_masked_sad8x32_c),
+  make_tuple(&aom_masked_sad32x8_neon, &aom_masked_sad32x8_c),
+  make_tuple(&aom_masked_sad16x64_neon, &aom_masked_sad16x64_c),
+  make_tuple(&aom_masked_sad64x16_neon, &aom_masked_sad64x16_c),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, MaskedSADTest, ::testing::ValuesIn(msad_test));
+
+const MaskedSADx4Param msadx4_test[] = {
+  make_tuple(&aom_masked_sad4x4x4d_neon, &aom_masked_sad4x4x4d_c),
+  make_tuple(&aom_masked_sad4x8x4d_neon, &aom_masked_sad4x8x4d_c),
+  make_tuple(&aom_masked_sad8x4x4d_neon, &aom_masked_sad8x4x4d_c),
+  make_tuple(&aom_masked_sad8x8x4d_neon, &aom_masked_sad8x8x4d_c),
+  make_tuple(&aom_masked_sad8x16x4d_neon, &aom_masked_sad8x16x4d_c),
+  make_tuple(&aom_masked_sad16x8x4d_neon, &aom_masked_sad16x8x4d_c),
+  make_tuple(&aom_masked_sad16x16x4d_neon, &aom_masked_sad16x16x4d_c),
+  make_tuple(&aom_masked_sad16x32x4d_neon, &aom_masked_sad16x32x4d_c),
+  make_tuple(&aom_masked_sad32x16x4d_neon, &aom_masked_sad32x16x4d_c),
+  make_tuple(&aom_masked_sad32x32x4d_neon, &aom_masked_sad32x32x4d_c),
+  make_tuple(&aom_masked_sad32x64x4d_neon, &aom_masked_sad32x64x4d_c),
+  make_tuple(&aom_masked_sad64x32x4d_neon, &aom_masked_sad64x32x4d_c),
+  make_tuple(&aom_masked_sad64x64x4d_neon, &aom_masked_sad64x64x4d_c),
+  make_tuple(&aom_masked_sad64x128x4d_neon, &aom_masked_sad64x128x4d_c),
+  make_tuple(&aom_masked_sad128x64x4d_neon, &aom_masked_sad128x64x4d_c),
+  make_tuple(&aom_masked_sad128x128x4d_neon, &aom_masked_sad128x128x4d_c),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_masked_sad4x16x4d_neon, &aom_masked_sad4x16x4d_c),
+  make_tuple(&aom_masked_sad16x4x4d_neon, &aom_masked_sad16x4x4d_c),
+  make_tuple(&aom_masked_sad8x32x4d_neon, &aom_masked_sad8x32x4d_c),
+  make_tuple(&aom_masked_sad32x8x4d_neon, &aom_masked_sad32x8x4d_c),
+  make_tuple(&aom_masked_sad16x64x4d_neon, &aom_masked_sad16x64x4d_c),
+  make_tuple(&aom_masked_sad64x16x4d_neon, &aom_masked_sad64x16x4d_c),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, MaskedSADx4Test,
+                         ::testing::ValuesIn(msadx4_test));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+const MaskedSADParam hbd_msad_neon_test[] = {
+  make_tuple(&aom_highbd_masked_sad4x4_neon, &aom_highbd_masked_sad4x4_c),
+  make_tuple(&aom_highbd_masked_sad4x8_neon, &aom_highbd_masked_sad4x8_c),
+  make_tuple(&aom_highbd_masked_sad8x4_neon, &aom_highbd_masked_sad8x4_c),
+  make_tuple(&aom_highbd_masked_sad8x8_neon, &aom_highbd_masked_sad8x8_c),
+  make_tuple(&aom_highbd_masked_sad8x16_neon, &aom_highbd_masked_sad8x16_c),
+  make_tuple(&aom_highbd_masked_sad16x8_neon, &aom_highbd_masked_sad16x8_c),
+  make_tuple(&aom_highbd_masked_sad16x16_neon, &aom_highbd_masked_sad16x16_c),
+  make_tuple(&aom_highbd_masked_sad16x32_neon, &aom_highbd_masked_sad16x32_c),
+  make_tuple(&aom_highbd_masked_sad32x16_neon, &aom_highbd_masked_sad32x16_c),
+  make_tuple(&aom_highbd_masked_sad32x32_neon, &aom_highbd_masked_sad32x32_c),
+  make_tuple(&aom_highbd_masked_sad32x64_neon, &aom_highbd_masked_sad32x64_c),
+  make_tuple(&aom_highbd_masked_sad64x32_neon, &aom_highbd_masked_sad64x32_c),
+  make_tuple(&aom_highbd_masked_sad64x64_neon, &aom_highbd_masked_sad64x64_c),
+  make_tuple(&aom_highbd_masked_sad64x128_neon, &aom_highbd_masked_sad64x128_c),
+  make_tuple(&aom_highbd_masked_sad128x64_neon, &aom_highbd_masked_sad128x64_c),
+  make_tuple(&aom_highbd_masked_sad128x128_neon,
+             &aom_highbd_masked_sad128x128_c),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_highbd_masked_sad4x16_neon, &aom_highbd_masked_sad4x16_c),
+  make_tuple(&aom_highbd_masked_sad16x4_neon, &aom_highbd_masked_sad16x4_c),
+  make_tuple(&aom_highbd_masked_sad8x32_neon, &aom_highbd_masked_sad8x32_c),
+  make_tuple(&aom_highbd_masked_sad32x8_neon, &aom_highbd_masked_sad32x8_c),
+  make_tuple(&aom_highbd_masked_sad16x64_neon, &aom_highbd_masked_sad16x64_c),
+  make_tuple(&aom_highbd_masked_sad64x16_neon, &aom_highbd_masked_sad64x16_c),
+#endif  // !CONFIG_REALTIME_ONLY
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, HighbdMaskedSADTest,
+                         ::testing::ValuesIn(hbd_msad_neon_test));
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+#endif  // HAVE_NEON
+
+}  // namespace
diff --git a/third_party/aom/test/masked_variance_test.cc b/third_party/aom/test/masked_variance_test.cc
new file mode 100644
index 0000000000..8482a12f53
--- /dev/null
+++ b/third_party/aom/test/masked_variance_test.cc
@@ -0,0 +1,712 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/aom_filter.h"
+#include "aom_mem/aom_mem.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+const int number_of_iterations = 200;
+
+typedef unsigned int (*MaskedSubPixelVarianceFunc)(
+    const uint8_t *src, int src_stride, int xoffset, int yoffset,
+    const uint8_t *ref, int ref_stride, const uint8_t *second_pred,
+    const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse);
+
+typedef std::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc>
+    MaskedSubPixelVarianceParam;
+
+class MaskedSubPixelVarianceTest
+    : public ::testing::TestWithParam<MaskedSubPixelVarianceParam> {
+ public:
+  ~MaskedSubPixelVarianceTest() override = default;
+  void SetUp() override {
+    opt_func_ = GET_PARAM(0);
+    ref_func_ = GET_PARAM(1);
+  }
+
+ protected:
+  MaskedSubPixelVarianceFunc opt_func_;
+  MaskedSubPixelVarianceFunc ref_func_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MaskedSubPixelVarianceTest);
+
+TEST_P(MaskedSubPixelVarianceTest, OperationCheck) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  // Note: We pad out the input array to a multiple of 16 bytes wide, so that
+  // consecutive rows keep the 16-byte alignment.
+  DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
+  DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
+  DECLARE_ALIGNED(16, uint8_t,
+                  second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = (MAX_SB_SIZE + 16);
+  int ref_stride = (MAX_SB_SIZE + 16);
+  int msk_stride = (MAX_SB_SIZE + 16);
+  int xoffset;
+  int yoffset;
+
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int xoffsets[] = { 0, 4, rnd(BIL_SUBPEL_SHIFTS) };
+    int yoffsets[] = { 0, 4, rnd(BIL_SUBPEL_SHIFTS) };
+    for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16); j++) {
+      src_ptr[j] = rnd.Rand8();
+      ref_ptr[j] = rnd.Rand8();
+      second_pred_ptr[j] = rnd.Rand8();
+      msk_ptr[j] = rnd(65);
+    }
+    for (int k = 0; k < 3; k++) {
+      for (int l = 0; l < 3; l++) {
+        xoffset = xoffsets[k];
+        yoffset = yoffsets[l];
+        for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
+          ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
+                              ref_stride, second_pred_ptr, msk_ptr, msk_stride,
+                              invert_mask, &ref_sse);
+          API_REGISTER_STATE_CHECK(
+              opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset,
+                                  ref_ptr, ref_stride, second_pred_ptr, msk_ptr,
+                                  msk_stride, invert_mask, &opt_sse));
+
+          if (opt_ret != ref_ret || opt_sse != ref_sse) {
+            err_count++;
+            if (first_failure == -1) first_failure = i;
+          }
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, err_count)
+      << "Error: Masked Sub Pixel Variance Test OperationCheck,"
+      << "C output doesn't match SSSE3 output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
+  DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
+  DECLARE_ALIGNED(16, uint8_t,
+                  second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
+  int first_failure_x = -1;
+  int first_failure_y = -1;
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = (MAX_SB_SIZE + 16);
+  int ref_stride = (MAX_SB_SIZE + 16);
+  int msk_stride = (MAX_SB_SIZE + 16);
+
+  for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
+    for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
+      for (int i = 0; i < 16; ++i) {
+        memset(src_ptr, (i & 0x1) ? 255 : 0,
+               (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16));
+        memset(ref_ptr, (i & 0x2) ? 255 : 0,
+               (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16));
+        memset(second_pred_ptr, (i & 0x4) ? 255 : 0,
+               (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16));
+        memset(msk_ptr, (i & 0x8) ? 64 : 0,
+               (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16));
+
+        for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
+          ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
+                              ref_stride, second_pred_ptr, msk_ptr, msk_stride,
+                              invert_mask, &ref_sse);
+          API_REGISTER_STATE_CHECK(
+              opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset,
+                                  ref_ptr, ref_stride, second_pred_ptr, msk_ptr,
+                                  msk_stride, invert_mask, &opt_sse));
+
+          if (opt_ret != ref_ret || opt_sse != ref_sse) {
+            err_count++;
+            if (first_failure == -1) {
+              first_failure = i;
+              first_failure_x = xoffset;
+              first_failure_y = yoffset;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues,"
+                          << "C output doesn't match SSSE3 output. "
+                          << "First failed at test case " << first_failure
+                          << " x_offset = " << first_failure_x
+                          << " y_offset = " << first_failure_y;
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+typedef std::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc,
+                   aom_bit_depth_t>
+    HighbdMaskedSubPixelVarianceParam;
+
+class HighbdMaskedSubPixelVarianceTest
+    : public ::testing::TestWithParam<HighbdMaskedSubPixelVarianceParam> {
+ public:
+  ~HighbdMaskedSubPixelVarianceTest() override = default;
+  void SetUp() override {
+    opt_func_ = GET_PARAM(0);
+    ref_func_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+  }
+
+ protected:
+  MaskedSubPixelVarianceFunc opt_func_;
+  MaskedSubPixelVarianceFunc ref_func_;
+  aom_bit_depth_t bit_depth_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HighbdMaskedSubPixelVarianceTest);
+
+TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
+  DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
+  DECLARE_ALIGNED(16, uint16_t,
+                  second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
+  uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
+  uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr);
+  int err_count = 0;
+  int first_failure = -1;
+  int first_failure_x = -1;
+  int first_failure_y = -1;
+  int src_stride = (MAX_SB_SIZE + 8);
+  int ref_stride = (MAX_SB_SIZE + 8);
+  int msk_stride = (MAX_SB_SIZE + 8);
+  int xoffset, yoffset;
+
+  for (int i = 0; i < number_of_iterations; ++i) {
+    for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8); j++) {
+      src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+      ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+      second_pred_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+      msk_ptr[j] = rnd(65);
+    }
+    for (xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
+      for (yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
+        for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
+          ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr,
+                              ref_stride, second_pred8_ptr, msk_ptr, msk_stride,
+                              invert_mask, &ref_sse);
+          API_REGISTER_STATE_CHECK(
+              opt_ret = opt_func_(src8_ptr, src_stride, xoffset, yoffset,
+                                  ref8_ptr, ref_stride, second_pred8_ptr,
+                                  msk_ptr, msk_stride, invert_mask, &opt_sse));
+
+          if (opt_ret != ref_ret || opt_sse != ref_sse) {
+            err_count++;
+            if (first_failure == -1) {
+              first_failure = i;
+              first_failure_x = xoffset;
+              first_failure_y = yoffset;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, err_count)
+      << "Error: Masked Sub Pixel Variance Test OperationCheck,"
+      << "C output doesn't match SSSE3 output. "
+      << "First failed at test case " << first_failure
+      << " x_offset = " << first_failure_x << " y_offset = " << first_failure_y;
+}
+
+TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
+  DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
+  DECLARE_ALIGNED(16, uint16_t,
+                  second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
+  uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
+  uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr);
+  int first_failure_x = -1;
+  int first_failure_y = -1;
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = (MAX_SB_SIZE + 8);
+  int ref_stride = (MAX_SB_SIZE + 8);
+  int msk_stride = (MAX_SB_SIZE + 8);
+
+  for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
+    for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
+      for (int i = 0; i < 16; ++i) {
+        aom_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
+                     (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8));
+        aom_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
+                     (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8));
+        aom_memset16(second_pred_ptr, (i & 0x4) ? ((1 << bit_depth_) - 1) : 0,
+                     (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8));
+        memset(msk_ptr, (i & 0x8) ? 64 : 0,
+               (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8));
+
+        for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
+          ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr,
+                              ref_stride, second_pred8_ptr, msk_ptr, msk_stride,
+                              invert_mask, &ref_sse);
+          API_REGISTER_STATE_CHECK(
+              opt_ret = opt_func_(src8_ptr, src_stride, xoffset, yoffset,
+                                  ref8_ptr, ref_stride, second_pred8_ptr,
+                                  msk_ptr, msk_stride, invert_mask, &opt_sse));
+
+          if (opt_ret != ref_ret || opt_sse != ref_sse) {
+            err_count++;
+            if (first_failure == -1) {
+              first_failure = i;
+              first_failure_x = xoffset;
+              first_failure_y = yoffset;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues,"
+                          << "C output doesn't match SSSE3 output. "
+                          << "First failed at test case " << first_failure
+                          << " x_offset = " << first_failure_x
+                          << " y_offset = " << first_failure_y;
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+using std::make_tuple;
+
+#if HAVE_SSSE3
+
+const MaskedSubPixelVarianceParam sub_pel_var_test[] = {
+  make_tuple(&aom_masked_sub_pixel_variance128x128_ssse3,
+             &aom_masked_sub_pixel_variance128x128_c),
+  make_tuple(&aom_masked_sub_pixel_variance128x64_ssse3,
+             &aom_masked_sub_pixel_variance128x64_c),
+  make_tuple(&aom_masked_sub_pixel_variance64x128_ssse3,
+             &aom_masked_sub_pixel_variance64x128_c),
+  make_tuple(&aom_masked_sub_pixel_variance64x64_ssse3,
+             &aom_masked_sub_pixel_variance64x64_c),
+  make_tuple(&aom_masked_sub_pixel_variance64x32_ssse3,
+             &aom_masked_sub_pixel_variance64x32_c),
+  make_tuple(&aom_masked_sub_pixel_variance32x64_ssse3,
+             &aom_masked_sub_pixel_variance32x64_c),
+  make_tuple(&aom_masked_sub_pixel_variance32x32_ssse3,
+             &aom_masked_sub_pixel_variance32x32_c),
+  make_tuple(&aom_masked_sub_pixel_variance32x16_ssse3,
+             &aom_masked_sub_pixel_variance32x16_c),
+  make_tuple(&aom_masked_sub_pixel_variance16x32_ssse3,
+             &aom_masked_sub_pixel_variance16x32_c),
+  make_tuple(&aom_masked_sub_pixel_variance16x16_ssse3,
+             &aom_masked_sub_pixel_variance16x16_c),
+  make_tuple(&aom_masked_sub_pixel_variance16x8_ssse3,
+             &aom_masked_sub_pixel_variance16x8_c),
+  make_tuple(&aom_masked_sub_pixel_variance8x16_ssse3,
+             &aom_masked_sub_pixel_variance8x16_c),
+  make_tuple(&aom_masked_sub_pixel_variance8x8_ssse3,
+             &aom_masked_sub_pixel_variance8x8_c),
+  make_tuple(&aom_masked_sub_pixel_variance8x4_ssse3,
+             &aom_masked_sub_pixel_variance8x4_c),
+  make_tuple(&aom_masked_sub_pixel_variance4x8_ssse3,
+             &aom_masked_sub_pixel_variance4x8_c),
+  make_tuple(&aom_masked_sub_pixel_variance4x4_ssse3,
+             &aom_masked_sub_pixel_variance4x4_c),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_masked_sub_pixel_variance64x16_ssse3,
+             &aom_masked_sub_pixel_variance64x16_c),
+  make_tuple(&aom_masked_sub_pixel_variance16x64_ssse3,
+             &aom_masked_sub_pixel_variance16x64_c),
+  make_tuple(&aom_masked_sub_pixel_variance32x8_ssse3,
+             &aom_masked_sub_pixel_variance32x8_c),
+  make_tuple(&aom_masked_sub_pixel_variance8x32_ssse3,
+             &aom_masked_sub_pixel_variance8x32_c),
+  make_tuple(&aom_masked_sub_pixel_variance16x4_ssse3,
+             &aom_masked_sub_pixel_variance16x4_c),
+  make_tuple(&aom_masked_sub_pixel_variance4x16_ssse3,
+             &aom_masked_sub_pixel_variance4x16_c),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(SSSE3_C_COMPARE, MaskedSubPixelVarianceTest,
+                         ::testing::ValuesIn(sub_pel_var_test));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+const HighbdMaskedSubPixelVarianceParam hbd_sub_pel_var_test[] = {
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x128_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance128x128_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x64_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance128x64_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x128_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance64x128_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x64_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance64x64_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x32_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance64x32_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x64_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance32x64_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x32_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance32x32_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x16_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance32x16_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x32_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance16x32_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x16_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance16x16_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x8_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance16x8_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x16_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance8x16_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x8_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance8x8_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x4_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance8x4_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x8_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance4x8_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x4_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance4x4_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x128_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance128x128_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x64_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance128x64_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x128_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance64x128_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x64_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance64x64_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x32_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance64x32_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x64_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance32x64_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x32_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance32x32_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x16_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance32x16_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x32_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance16x32_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x16_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance16x16_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x8_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance16x8_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x16_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance8x16_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x8_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance8x8_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x4_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance8x4_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x8_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance4x8_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x4_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance4x4_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x128_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance128x128_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x64_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance128x64_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x128_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance64x128_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x64_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance64x64_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x32_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance64x32_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x64_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance32x64_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x32_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance32x32_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x16_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance32x16_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x32_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance16x32_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x16_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance16x16_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x8_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance16x8_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x16_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance8x16_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x8_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance8x8_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x4_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance8x4_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x8_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance4x8_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x4_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance4x4_c, AOM_BITS_12),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x16_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance64x16_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x64_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance16x64_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x8_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance32x8_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x32_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance8x32_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x4_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance16x4_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x16_ssse3,
+             &aom_highbd_8_masked_sub_pixel_variance4x16_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x16_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance64x16_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x64_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance16x64_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x8_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance32x8_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x32_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance8x32_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x4_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance16x4_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x16_ssse3,
+             &aom_highbd_10_masked_sub_pixel_variance4x16_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x16_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance64x16_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x64_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance16x64_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x8_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance32x8_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x32_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance8x32_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x4_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance16x4_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x16_ssse3,
+             &aom_highbd_12_masked_sub_pixel_variance4x16_c, AOM_BITS_12),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(SSSE3_C_COMPARE, HighbdMaskedSubPixelVarianceTest,
+                         ::testing::ValuesIn(hbd_sub_pel_var_test));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // HAVE_SSSE3
+
+#if HAVE_NEON
+
+const MaskedSubPixelVarianceParam sub_pel_var_test[] = {
+  make_tuple(&aom_masked_sub_pixel_variance128x128_neon,
+             &aom_masked_sub_pixel_variance128x128_c),
+  make_tuple(&aom_masked_sub_pixel_variance128x64_neon,
+             &aom_masked_sub_pixel_variance128x64_c),
+  make_tuple(&aom_masked_sub_pixel_variance64x128_neon,
+             &aom_masked_sub_pixel_variance64x128_c),
+  make_tuple(&aom_masked_sub_pixel_variance64x64_neon,
+             &aom_masked_sub_pixel_variance64x64_c),
+  make_tuple(&aom_masked_sub_pixel_variance64x32_neon,
+             &aom_masked_sub_pixel_variance64x32_c),
+  make_tuple(&aom_masked_sub_pixel_variance32x64_neon,
+             &aom_masked_sub_pixel_variance32x64_c),
+  make_tuple(&aom_masked_sub_pixel_variance32x32_neon,
+             &aom_masked_sub_pixel_variance32x32_c),
+  make_tuple(&aom_masked_sub_pixel_variance32x16_neon,
+             &aom_masked_sub_pixel_variance32x16_c),
+  make_tuple(&aom_masked_sub_pixel_variance16x32_neon,
+             &aom_masked_sub_pixel_variance16x32_c),
+  make_tuple(&aom_masked_sub_pixel_variance16x16_neon,
+             &aom_masked_sub_pixel_variance16x16_c),
+  make_tuple(&aom_masked_sub_pixel_variance16x8_neon,
+             &aom_masked_sub_pixel_variance16x8_c),
+  make_tuple(&aom_masked_sub_pixel_variance8x16_neon,
+             &aom_masked_sub_pixel_variance8x16_c),
+  make_tuple(&aom_masked_sub_pixel_variance8x8_neon,
+             &aom_masked_sub_pixel_variance8x8_c),
+  make_tuple(&aom_masked_sub_pixel_variance8x4_neon,
+             &aom_masked_sub_pixel_variance8x4_c),
+  make_tuple(&aom_masked_sub_pixel_variance4x8_neon,
+             &aom_masked_sub_pixel_variance4x8_c),
+  make_tuple(&aom_masked_sub_pixel_variance4x4_neon,
+             &aom_masked_sub_pixel_variance4x4_c),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_masked_sub_pixel_variance64x16_neon,
+             &aom_masked_sub_pixel_variance64x16_c),
+  make_tuple(&aom_masked_sub_pixel_variance16x64_neon,
+             &aom_masked_sub_pixel_variance16x64_c),
+  make_tuple(&aom_masked_sub_pixel_variance32x8_neon,
+             &aom_masked_sub_pixel_variance32x8_c),
+  make_tuple(&aom_masked_sub_pixel_variance8x32_neon,
+             &aom_masked_sub_pixel_variance8x32_c),
+  make_tuple(&aom_masked_sub_pixel_variance16x4_neon,
+             &aom_masked_sub_pixel_variance16x4_c),
+  make_tuple(&aom_masked_sub_pixel_variance4x16_neon,
+             &aom_masked_sub_pixel_variance4x16_c),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON_C_COMPARE, MaskedSubPixelVarianceTest,
+                         ::testing::ValuesIn(sub_pel_var_test));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+const HighbdMaskedSubPixelVarianceParam hbd_sub_pel_var_test_neon[] = {
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x128_neon,
+             &aom_highbd_8_masked_sub_pixel_variance128x128_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x64_neon,
+             &aom_highbd_8_masked_sub_pixel_variance128x64_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x128_neon,
+             &aom_highbd_8_masked_sub_pixel_variance64x128_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x64_neon,
+             &aom_highbd_8_masked_sub_pixel_variance64x64_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x32_neon,
+             &aom_highbd_8_masked_sub_pixel_variance64x32_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x64_neon,
+             &aom_highbd_8_masked_sub_pixel_variance32x64_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x32_neon,
+             &aom_highbd_8_masked_sub_pixel_variance32x32_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x16_neon,
+             &aom_highbd_8_masked_sub_pixel_variance32x16_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x32_neon,
+             &aom_highbd_8_masked_sub_pixel_variance16x32_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x16_neon,
+             &aom_highbd_8_masked_sub_pixel_variance16x16_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x8_neon,
+             &aom_highbd_8_masked_sub_pixel_variance16x8_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x16_neon,
+             &aom_highbd_8_masked_sub_pixel_variance8x16_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x8_neon,
+             &aom_highbd_8_masked_sub_pixel_variance8x8_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x4_neon,
+             &aom_highbd_8_masked_sub_pixel_variance8x4_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x8_neon,
+             &aom_highbd_8_masked_sub_pixel_variance4x8_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x4_neon,
+             &aom_highbd_8_masked_sub_pixel_variance4x4_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x128_neon,
+             &aom_highbd_10_masked_sub_pixel_variance128x128_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x64_neon,
+             &aom_highbd_10_masked_sub_pixel_variance128x64_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x128_neon,
+             &aom_highbd_10_masked_sub_pixel_variance64x128_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x64_neon,
+             &aom_highbd_10_masked_sub_pixel_variance64x64_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x32_neon,
+             &aom_highbd_10_masked_sub_pixel_variance64x32_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x64_neon,
+             &aom_highbd_10_masked_sub_pixel_variance32x64_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x32_neon,
+             &aom_highbd_10_masked_sub_pixel_variance32x32_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x16_neon,
+             &aom_highbd_10_masked_sub_pixel_variance32x16_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x32_neon,
+             &aom_highbd_10_masked_sub_pixel_variance16x32_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x16_neon,
+             &aom_highbd_10_masked_sub_pixel_variance16x16_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x8_neon,
+             &aom_highbd_10_masked_sub_pixel_variance16x8_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x16_neon,
+             &aom_highbd_10_masked_sub_pixel_variance8x16_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x8_neon,
+             &aom_highbd_10_masked_sub_pixel_variance8x8_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x4_neon,
+             &aom_highbd_10_masked_sub_pixel_variance8x4_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x8_neon,
+             &aom_highbd_10_masked_sub_pixel_variance4x8_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x4_neon,
+             &aom_highbd_10_masked_sub_pixel_variance4x4_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x128_neon,
+             &aom_highbd_12_masked_sub_pixel_variance128x128_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x64_neon,
+             &aom_highbd_12_masked_sub_pixel_variance128x64_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x128_neon,
+             &aom_highbd_12_masked_sub_pixel_variance64x128_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x64_neon,
+             &aom_highbd_12_masked_sub_pixel_variance64x64_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x32_neon,
+             &aom_highbd_12_masked_sub_pixel_variance64x32_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x64_neon,
+             &aom_highbd_12_masked_sub_pixel_variance32x64_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x32_neon,
+             &aom_highbd_12_masked_sub_pixel_variance32x32_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x16_neon,
+             &aom_highbd_12_masked_sub_pixel_variance32x16_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x32_neon,
+             &aom_highbd_12_masked_sub_pixel_variance16x32_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x16_neon,
+             &aom_highbd_12_masked_sub_pixel_variance16x16_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x8_neon,
+             &aom_highbd_12_masked_sub_pixel_variance16x8_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x16_neon,
+             &aom_highbd_12_masked_sub_pixel_variance8x16_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x8_neon,
+             &aom_highbd_12_masked_sub_pixel_variance8x8_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x4_neon,
+             &aom_highbd_12_masked_sub_pixel_variance8x4_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x8_neon,
+             &aom_highbd_12_masked_sub_pixel_variance4x8_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x4_neon,
+             &aom_highbd_12_masked_sub_pixel_variance4x4_c, AOM_BITS_12),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x16_neon,
+             &aom_highbd_8_masked_sub_pixel_variance64x16_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x64_neon,
+             &aom_highbd_8_masked_sub_pixel_variance16x64_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x8_neon,
+             &aom_highbd_8_masked_sub_pixel_variance32x8_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x32_neon,
+             &aom_highbd_8_masked_sub_pixel_variance8x32_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x4_neon,
+             &aom_highbd_8_masked_sub_pixel_variance16x4_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x16_neon,
+             &aom_highbd_8_masked_sub_pixel_variance4x16_c, AOM_BITS_8),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x16_neon,
+             &aom_highbd_10_masked_sub_pixel_variance64x16_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x64_neon,
+             &aom_highbd_10_masked_sub_pixel_variance16x64_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x8_neon,
+             &aom_highbd_10_masked_sub_pixel_variance32x8_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x32_neon,
+             &aom_highbd_10_masked_sub_pixel_variance8x32_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x4_neon,
+             &aom_highbd_10_masked_sub_pixel_variance16x4_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x16_neon,
+             &aom_highbd_10_masked_sub_pixel_variance4x16_c, AOM_BITS_10),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x16_neon,
+             &aom_highbd_12_masked_sub_pixel_variance64x16_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x64_neon,
+             &aom_highbd_12_masked_sub_pixel_variance16x64_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x8_neon,
+             &aom_highbd_12_masked_sub_pixel_variance32x8_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x32_neon,
+             &aom_highbd_12_masked_sub_pixel_variance8x32_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x4_neon,
+             &aom_highbd_12_masked_sub_pixel_variance16x4_c, AOM_BITS_12),
+  make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x16_neon,
+             &aom_highbd_12_masked_sub_pixel_variance4x16_c, AOM_BITS_12),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON_C_COMPARE, HighbdMaskedSubPixelVarianceTest,
+                         ::testing::ValuesIn(hbd_sub_pel_var_test_neon));
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+#endif  // HAVE_NEON
+}  // namespace
diff --git a/third_party/aom/test/md5_helper.h b/third_party/aom/test/md5_helper.h
new file mode 100644
index 0000000000..69f1ae76b0
--- /dev/null
+++ b/third_party/aom/test/md5_helper.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_TEST_MD5_HELPER_H_
+#define AOM_TEST_MD5_HELPER_H_
+
+#include "aom/aom_decoder.h"
+#include "common/md5_utils.h"
+
+namespace libaom_test {
+class MD5 {
+ public:
+  MD5() { MD5Init(&md5_); }
+
+  void Add(const aom_image_t *img) {
+    for (int plane = 0; plane < 3; ++plane) {
+      const uint8_t *buf = img->planes[plane];
+      // Calculate the width and height to do the md5 check. For the chroma
+      // plane, we never want to round down and thus skip a pixel so if
+      // we are shifting by 1 (chroma_shift) we add 1 before doing the shift.
+      // This works only for chroma_shift of 0 and 1.
+      const int bytes_per_sample =
+          (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
+      const int h =
+          plane ? (img->d_h + img->y_chroma_shift) >> img->y_chroma_shift
+                : img->d_h;
+      const int w =
+          (plane ? (img->d_w + img->x_chroma_shift) >> img->x_chroma_shift
+                 : img->d_w) *
+          bytes_per_sample;
+
+      for (int y = 0; y < h; ++y) {
+        MD5Update(&md5_, buf, w);
+        buf += img->stride[plane];
+      }
+    }
+  }
+
+  void Add(const uint8_t *data, size_t size) {
+    MD5Update(&md5_, data, static_cast<uint32_t>(size));
+  }
+
+  const char *Get() {
+    static const char hex[16] = {
+      '0', '1', '2', '3', '4', '5', '6', '7',
+      '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
+    };
+    uint8_t tmp[16];
+    MD5Context ctx_tmp = md5_;
+
+    MD5Final(tmp, &ctx_tmp);
+    for (int i = 0; i < 16; i++) {
+      res_[i * 2 + 0] = hex[tmp[i] >> 4];
+      res_[i * 2 + 1] = hex[tmp[i] & 0xf];
+    }
+    res_[32] = 0;
+
+    return res_;
+  }
+
+ protected:
+  char res_[33];
+  MD5Context md5_;
+};
+
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_MD5_HELPER_H_
diff --git a/third_party/aom/test/metadata_test.cc b/third_party/aom/test/metadata_test.cc
new file mode 100644
index 0000000000..9467c29e86
--- /dev/null
+++ b/third_party/aom/test/metadata_test.cc
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2019, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "aom/aom_codec.h"
+#include "aom/aom_image.h"
+#include "aom/internal/aom_image_internal.h"
+#include "aom_scale/yv12config.h"
+#include "av1/encoder/bitstream.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/video_source.h"
+
+namespace {
+const size_t kMetadataPayloadSizeT35 = 24;
+// 0xB5 stands for the itut t35 metadata country code for the Unites States
+const uint8_t kMetadataPayloadT35[kMetadataPayloadSizeT35] = {
+  0xB5, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
+  0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+};
+
+const size_t kMetadataPayloadSizeCll = 4;
+const uint8_t kMetadataPayloadCll[kMetadataPayloadSizeCll] = { 0xB5, 0x01, 0x02,
+                                                               0x03 };
+
+const size_t kMetadataObuSizeT35 = 28;
+const uint8_t kMetadataObuT35[kMetadataObuSizeT35] = {
+  0x2A, 0x1A, 0x02, 0xB5, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10,
+  0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x80
+};
+const size_t kMetadataObuSizeMdcv = 28;
+const uint8_t kMetadataObuMdcv[kMetadataObuSizeMdcv] = {
+  0x2A, 0x1A, 0x02, 0xB5, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10,
+  0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x80
+};
+const size_t kMetadataObuSizeCll = 8;
+const uint8_t kMetadataObuCll[kMetadataObuSizeCll] = { 0x2A, 0x06, 0x01, 0xB5,
+                                                       0x01, 0x02, 0x03, 0x80 };
+
+class MetadataEncodeTest
+    : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  MetadataEncodeTest() : EncoderTest(GET_PARAM(0)) {}
+
+  ~MetadataEncodeTest() override = default;
+
+  void SetUp() override { InitializeConfig(GET_PARAM(1)); }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder * /*encoder*/) override {
+    aom_image_t *current_frame = video->img();
+    if (current_frame) {
+      if (current_frame->metadata) aom_img_remove_metadata(current_frame);
+      ASSERT_EQ(aom_img_add_metadata(current_frame, OBU_METADATA_TYPE_ITUT_T35,
+                                     kMetadataPayloadT35, 0, AOM_MIF_ANY_FRAME),
+                -1);
+      ASSERT_EQ(aom_img_add_metadata(current_frame, OBU_METADATA_TYPE_ITUT_T35,
+                                     nullptr, kMetadataPayloadSizeT35,
+                                     AOM_MIF_ANY_FRAME),
+                -1);
+      ASSERT_EQ(aom_img_add_metadata(current_frame, OBU_METADATA_TYPE_ITUT_T35,
+                                     nullptr, 0, AOM_MIF_ANY_FRAME),
+                -1);
+      ASSERT_EQ(
+          aom_img_add_metadata(current_frame, OBU_METADATA_TYPE_ITUT_T35,
+                               kMetadataPayloadT35, kMetadataPayloadSizeT35,
+                               AOM_MIF_ANY_FRAME),
+          0);
+
+      ASSERT_EQ(
+          aom_img_add_metadata(current_frame, OBU_METADATA_TYPE_HDR_MDCV,
+                               kMetadataPayloadT35, kMetadataPayloadSizeT35,
+                               AOM_MIF_KEY_FRAME),
+          0);
+
+      ASSERT_EQ(
+          aom_img_add_metadata(current_frame, OBU_METADATA_TYPE_HDR_CLL,
+                               kMetadataPayloadCll, kMetadataPayloadSizeCll,
+                               AOM_MIF_KEY_FRAME),
+          0);
+    }
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) {
+      const size_t bitstream_size = pkt->data.frame.sz;
+      const uint8_t *bitstream =
+          static_cast<const uint8_t *>(pkt->data.frame.buf);
+      // look for valid metadatas in bitstream
+      bool itut_t35_metadata_found = false;
+      if (bitstream_size >= kMetadataObuSizeT35) {
+        for (size_t i = 0; i <= bitstream_size - kMetadataObuSizeT35; ++i) {
+          if (memcmp(bitstream + i, kMetadataObuT35, kMetadataObuSizeT35) ==
+              0) {
+            itut_t35_metadata_found = true;
+          }
+        }
+      }
+      ASSERT_EQ(itut_t35_metadata_found, 1u);
+
+      // Testing for HDR MDCV metadata
+      bool hdr_mdcv_metadata_found = false;
+      if (bitstream_size >= kMetadataObuSizeMdcv) {
+        for (size_t i = 0; i <= bitstream_size - kMetadataObuSizeMdcv; ++i) {
+          if (memcmp(bitstream + i, kMetadataObuMdcv, kMetadataObuSizeMdcv) ==
+              0) {
+            hdr_mdcv_metadata_found = true;
+          }
+        }
+      }
+      ASSERT_TRUE(hdr_mdcv_metadata_found);
+
+      // Testing for HDR CLL metadata
+      bool hdr_cll_metadata_found = false;
+      if (bitstream_size >= kMetadataObuSizeCll) {
+        for (size_t i = 0; i <= bitstream_size - kMetadataObuSizeCll; ++i) {
+          if (memcmp(bitstream + i, kMetadataObuCll, kMetadataObuSizeCll) ==
+              0) {
+            hdr_cll_metadata_found = true;
+          }
+        }
+      }
+      ASSERT_TRUE(hdr_cll_metadata_found);
+    }
+  }
+
+  void DecompressedFrameHook(const aom_image_t &img,
+                             aom_codec_pts_t /*pts*/) override {
+    ASSERT_NE(img.metadata, nullptr);
+
+    ASSERT_EQ(img.metadata->sz, 3u);
+
+    for (size_t i = 0; i < img.metadata->sz - 1; ++i) {
+      ASSERT_EQ(kMetadataPayloadSizeT35, img.metadata->metadata_array[i]->sz);
+      EXPECT_EQ(
+          memcmp(kMetadataPayloadT35, img.metadata->metadata_array[i]->payload,
+                 kMetadataPayloadSizeT35),
+          0);
+    }
+
+    ASSERT_EQ(kMetadataPayloadSizeCll, img.metadata->metadata_array[2]->sz);
+    EXPECT_EQ(
+        memcmp(kMetadataPayloadCll, img.metadata->metadata_array[2]->payload,
+               kMetadataPayloadSizeCll),
+        0);
+  }
+};
+
+TEST_P(MetadataEncodeTest, TestMetadataEncoding) {
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 5);
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  cfg_.g_w = 352;
+  cfg_.g_h = 288;
+
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 600;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_min_quantizer = 2;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_undershoot_pct = 50;
+  cfg_.rc_overshoot_pct = 50;
+  cfg_.rc_end_usage = AOM_CBR;
+  cfg_.kf_mode = AOM_KF_AUTO;
+  cfg_.g_lag_in_frames = 1;
+  cfg_.kf_min_dist = cfg_.kf_max_dist = 3000;
+  // Enable dropped frames.
+  cfg_.rc_dropframe_thresh = 1;
+  // Disable error_resilience mode.
+  cfg_.g_error_resilient = 0;
+  // Run at low bitrate.
+  cfg_.rc_target_bitrate = 40;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+AV1_INSTANTIATE_TEST_SUITE(MetadataEncodeTest,
+                           ::testing::Values(::libaom_test::kOnePassGood));
+
+}  // namespace
+
+TEST(MetadataTest, MetadataAllocation) {
+  aom_metadata_t *metadata =
+      aom_img_metadata_alloc(OBU_METADATA_TYPE_ITUT_T35, kMetadataPayloadT35,
+                             kMetadataPayloadSizeT35, AOM_MIF_ANY_FRAME);
+  ASSERT_NE(metadata, nullptr);
+  aom_img_metadata_free(metadata);
+}
+
+TEST(MetadataTest, MetadataArrayAllocation) {
+  aom_metadata_array_t *metadata_array = aom_img_metadata_array_alloc(2);
+  ASSERT_NE(metadata_array, nullptr);
+
+  metadata_array->metadata_array[0] =
+      aom_img_metadata_alloc(OBU_METADATA_TYPE_ITUT_T35, kMetadataPayloadT35,
+                             kMetadataPayloadSizeT35, AOM_MIF_ANY_FRAME);
+  metadata_array->metadata_array[1] =
+      aom_img_metadata_alloc(OBU_METADATA_TYPE_ITUT_T35, kMetadataPayloadT35,
+                             kMetadataPayloadSizeT35, AOM_MIF_ANY_FRAME);
+
+  aom_img_metadata_array_free(metadata_array);
+}
+
+TEST(MetadataTest, AddMetadataToImage) {
+  aom_image_t image;
+  image.metadata = nullptr;
+
+  ASSERT_EQ(aom_img_add_metadata(&image, OBU_METADATA_TYPE_ITUT_T35,
+                                 kMetadataPayloadT35, kMetadataPayloadSizeT35,
+                                 AOM_MIF_ANY_FRAME),
+            0);
+  aom_img_metadata_array_free(image.metadata);
+  EXPECT_EQ(aom_img_add_metadata(nullptr, OBU_METADATA_TYPE_ITUT_T35,
+                                 kMetadataPayloadT35, kMetadataPayloadSizeT35,
+                                 AOM_MIF_ANY_FRAME),
+            -1);
+}
+
+TEST(MetadataTest, RemoveMetadataFromImage) {
+  aom_image_t image;
+  image.metadata = nullptr;
+
+  ASSERT_EQ(aom_img_add_metadata(&image, OBU_METADATA_TYPE_ITUT_T35,
+                                 kMetadataPayloadT35, kMetadataPayloadSizeT35,
+                                 AOM_MIF_ANY_FRAME),
+            0);
+  aom_img_remove_metadata(&image);
+  aom_img_remove_metadata(nullptr);
+}
+
+TEST(MetadataTest, CopyMetadataToFrameBuffer) {
+  YV12_BUFFER_CONFIG yvBuf;
+  yvBuf.metadata = nullptr;
+
+  aom_metadata_array_t *metadata_array = aom_img_metadata_array_alloc(1);
+  ASSERT_NE(metadata_array, nullptr);
+
+  metadata_array->metadata_array[0] =
+      aom_img_metadata_alloc(OBU_METADATA_TYPE_ITUT_T35, kMetadataPayloadT35,
+                             kMetadataPayloadSizeT35, AOM_MIF_ANY_FRAME);
+
+  // Metadata_array
+  int status = aom_copy_metadata_to_frame_buffer(&yvBuf, metadata_array);
+  EXPECT_EQ(status, 0);
+  status = aom_copy_metadata_to_frame_buffer(nullptr, metadata_array);
+  EXPECT_EQ(status, -1);
+  aom_img_metadata_array_free(metadata_array);
+
+  // Metadata_array_2
+  aom_metadata_array_t *metadata_array_2 = aom_img_metadata_array_alloc(0);
+  ASSERT_NE(metadata_array_2, nullptr);
+  status = aom_copy_metadata_to_frame_buffer(&yvBuf, metadata_array_2);
+  EXPECT_EQ(status, -1);
+  aom_img_metadata_array_free(metadata_array_2);
+
+  // YV12_BUFFER_CONFIG
+  status = aom_copy_metadata_to_frame_buffer(&yvBuf, nullptr);
+  EXPECT_EQ(status, -1);
+  aom_remove_metadata_from_frame_buffer(&yvBuf);
+  aom_remove_metadata_from_frame_buffer(nullptr);
+}
+
+TEST(MetadataTest, GetMetadataFromImage) {
+  aom_image_t image;
+  image.metadata = nullptr;
+
+  ASSERT_EQ(aom_img_add_metadata(&image, OBU_METADATA_TYPE_ITUT_T35,
+                                 kMetadataPayloadT35, kMetadataPayloadSizeT35,
+                                 AOM_MIF_ANY_FRAME),
+            0);
+
+  EXPECT_EQ(aom_img_get_metadata(nullptr, 0), nullptr);
+  EXPECT_EQ(aom_img_get_metadata(&image, 1u), nullptr);
+  EXPECT_EQ(aom_img_get_metadata(&image, 10u), nullptr);
+
+  const aom_metadata_t *metadata = aom_img_get_metadata(&image, 0);
+  ASSERT_NE(metadata, nullptr);
+  ASSERT_EQ(metadata->sz, kMetadataPayloadSizeT35);
+  EXPECT_EQ(
+      memcmp(kMetadataPayloadT35, metadata->payload, kMetadataPayloadSizeT35),
+      0);
+
+  aom_img_metadata_array_free(image.metadata);
+}
+
+TEST(MetadataTest, ReadMetadatasFromImage) {
+  aom_image_t image;
+  image.metadata = nullptr;
+
+  uint32_t types[3];
+  types[0] = OBU_METADATA_TYPE_ITUT_T35;
+  types[1] = OBU_METADATA_TYPE_HDR_CLL;
+  types[2] = OBU_METADATA_TYPE_HDR_MDCV;
+
+  ASSERT_EQ(aom_img_add_metadata(&image, types[0], kMetadataPayloadT35,
+                                 kMetadataPayloadSizeT35, AOM_MIF_ANY_FRAME),
+            0);
+  ASSERT_EQ(aom_img_add_metadata(&image, types[1], kMetadataPayloadT35,
+                                 kMetadataPayloadSizeT35, AOM_MIF_KEY_FRAME),
+            0);
+  ASSERT_EQ(aom_img_add_metadata(&image, types[2], kMetadataPayloadT35,
+                                 kMetadataPayloadSizeT35, AOM_MIF_KEY_FRAME),
+            0);
+
+  size_t number_metadata = aom_img_num_metadata(&image);
+  ASSERT_EQ(number_metadata, 3u);
+  for (size_t i = 0; i < number_metadata; ++i) {
+    const aom_metadata_t *metadata = aom_img_get_metadata(&image, i);
+    ASSERT_NE(metadata, nullptr);
+    ASSERT_EQ(metadata->type, types[i]);
+    ASSERT_EQ(metadata->sz, kMetadataPayloadSizeT35);
+    EXPECT_EQ(
+        memcmp(kMetadataPayloadT35, metadata->payload, kMetadataPayloadSizeT35),
+        0);
+  }
+  aom_img_metadata_array_free(image.metadata);
+}
diff --git a/third_party/aom/test/metrics_template.html b/third_party/aom/test/metrics_template.html
new file mode 100644
index 0000000000..b57c62314a
--- /dev/null
+++ b/third_party/aom/test/metrics_template.html
@@ -0,0 +1,422 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<title>Video Codec Test Results</title>
+<style type="text/css">
+<!-- Begin 960 reset -->
+a,abbr,acronym,address,applet,article,aside,audio,b,big,blockquote,body,canvas,caption,center,cite,c
+ode,dd,del,details,dfn,dialog,div,dl,dt,em,embed,fieldset,figcaption,figure,font,footer,form,h1,h2,h
+3,h4,h5,h6,header,hgroup,hr,html,i,iframe,img,ins,kbd,label,legend,li,mark,menu,meter,nav,object,ol,
+output,p,pre,progress,q,rp,rt,ruby,s,samp,section,small,span,strike,strong,sub,summary,sup,table,tbo
+dy,td,tfoot,th,thead,time,tr,tt,u,ul,var,video,xmp{border:0;margin:0;padding:0;font-size:100%}html,b
+ody{height:100%}article,aside,details,figcaption,figure,footer,header,hgroup,menu,nav,section{displa
+y:block}b,strong{font-weight:bold}img{color:transparent;font-size:0;vertical-align:middle;-ms-interp
+olation-mode:bicubic}ol,ul{list-style:none}li{display:list-item}table{border-collapse:collapse;borde
+r-spacing:0}th,td,caption{font-weight:normal;vertical-align:top;text-align:left}q{quotes:none}q:befo
+re,q:after{content:'';content:none}sub,sup,small{font-size:75%}sub,sup{line-height:0;position:relati
+ve;vertical-align:baseline}sub{bottom:-0.25em}sup{top:-0.5em}svg{overflow:hidden}
+<!-- End 960 reset -->
+<!-- Begin 960 text -->
+body{font:13px/1.5 'Helvetica Neue',Arial,'Liberation Sans',FreeSans,sans-serif}pre,code{font-family
+:'DejaVu Sans Mono',Menlo,Consolas,monospace}hr{border:0 #ccc solid;border-top-width:1px;clear:both;
+height:0}h1{font-size:25px}h2{font-size:23px}h3{font-size:21px}h4{font-size:19px}h5{font-size:17px}h
+6{font-size:15px}ol{list-style:decimal}ul{list-style:disc}li{margin-left:30px}p,dl,hr,h1,h2,h3,h4,h5
+,h6,ol,ul,pre,table,address,fieldset,figure{margin-bottom:20px}
+<!-- End 960 text -->
+<!-- Begin 960 grid (fluid variant)
+     12 columns, 1152px total width
+     http://960.gs/ | http://grids.heroku.com/ -->
+.container_12{width:92%;margin-left:4%;margin-right:4%}.grid_1,.grid_2,.grid_3,.grid_4,.grid_5,.grid
+_6,.grid_7,.grid_8,.grid_9,.grid_10,.grid_11,.grid_12{display:inline;float:left;position:relative;ma
+rgin-left:1%;margin-right:1%}.alpha{margin-left:0}.omega{margin-right:0}.container_12 .grid_1{width:
+6.333%}.container_12 .grid_2{width:14.667%}.container_12 .grid_3{width:23.0%}.container_12 .grid_4{w
+idth:31.333%}.container_12 .grid_5{width:39.667%}.container_12 .grid_6{width:48.0%}.container_12 .gr
+id_7{width:56.333%}.container_12 .grid_8{width:64.667%}.container_12 .grid_9{width:73.0%}.container_
+12 .grid_10{width:81.333%}.container_12 .grid_11{width:89.667%}.container_12 .grid_12{width:98.0%}.c
+ontainer_12 .prefix_1{padding-left:8.333%}.container_12 .prefix_2{padding-left:16.667%}.container_12
+ .prefix_3{padding-left:25.0%}.container_12 .prefix_4{padding-left:33.333%}.container_12 .prefix_5{p
+adding-left:41.667%}.container_12 .prefix_6{padding-left:50.0%}.container_12 .prefix_7{padding-left:
+58.333%}.container_12 .prefix_8{padding-left:66.667%}.container_12 .prefix_9{padding-left:75.0%}.con
+tainer_12 .prefix_10{padding-left:83.333%}.container_12 .prefix_11{padding-left:91.667%}.container_1
+2 .suffix_1{padding-right:8.333%}.container_12 .suffix_2{padding-right:16.667%}.container_12 .suffix
+_3{padding-right:25.0%}.container_12 .suffix_4{padding-right:33.333%}.container_12 .suffix_5{padding
+-right:41.667%}.container_12 .suffix_6{padding-right:50.0%}.container_12 .suffix_7{padding-right:58.
+333%}.container_12 .suffix_8{padding-right:66.667%}.container_12 .suffix_9{padding-right:75.0%}.cont
+ainer_12 .suffix_10{padding-right:83.333%}.container_12 .suffix_11{padding-right:91.667%}.container_
+12 .push_1{left:8.333%}.container_12 .push_2{left:16.667%}.container_12 .push_3{left:25.0%}.containe
+r_12 .push_4{left:33.333%}.container_12 .push_5{left:41.667%}.container_12 .push_6{left:50.0%}.conta
+iner_12 .push_7{left:58.333%}.container_12 .push_8{left:66.667%}.container_12 .push_9{left:75.0%}.co
+ntainer_12 .push_10{left:83.333%}.container_12 .push_11{left:91.667%}.container_12 .pull_1{left:-8.3
+33%}.container_12 .pull_2{left:-16.667%}.container_12 .pull_3{left:-25.0%}.container_12 .pull_4{left
+:-33.333%}.container_12 .pull_5{left:-41.667%}.container_12 .pull_6{left:-50.0%}.container_12 .pull_
+7{left:-58.333%}.container_12 .pull_8{left:-66.667%}.container_12 .pull_9{left:-75.0%}.container_12
+.pull_10{left:-83.333%}.container_12 .pull_11{left:-91.667%}.clear{clear:both;display:block;overflow
+:hidden;visibility:hidden;width:0;height:0}.clearfix:after{clear:both;content:' ';display:block;font
+-size:0;line-height:0;visibility:hidden;width:0;height:0}.clearfix{display:inline-block}* html .clea
+rfix{height:1%}.clearfix{display:block}
+<!-- End 960 grid -->
+
+div.metricgraph {
+
+}
+
+body {
+
+}
+
+div.header {
+  font-family: Arial, sans-serif;
+}
+
+div.header h2 {
+  margin: .5em auto;
+}
+
+div.radio {
+  font-family: Arial, sans-serif;
+  margin-bottom: 1em;
+}
+
+div.main {
+
+}
+
+div.cliplist {
+  font-family: Arial, sans-serif;
+  margin-top: 6px;
+}
+
+div.chartarea {
+  font-family: Arial, sans-serif;
+}
+
+div.indicators {
+  font-family: Arial, sans-serif;
+  font-size: 13px;
+  margin-top: 6px;
+  min-height: 600px;
+  background-color: #f7f7f7;
+}
+
+div.indicators div.content {
+  margin: 1em;
+}
+
+div.indicators div.content h5 {
+  font-size: 13px;
+  text-align: center;
+  margin: 0;
+}
+
+div.indicators div.content ul {
+  margin-left: 0;
+  padding-left: 0;
+  margin-top: 0;
+}
+
+div.indicators div.content ul li {
+  margin-left: 1.5em;
+}
+
+div.indicators div.content p:first-child {
+  margin-bottom: .5em;
+}
+
+span.google-visualization-table-sortind {
+  color: #000;
+}
+.header-style {
+  font-weight: bold;
+  border: 1px solid #fff;
+  background-color: #ccc;
+}
+
+td.header-style+td {
+
+}
+
+.orange-background {
+  background-color: orange;
+}
+
+.light-gray-background {
+  background-color: #f0f0f0;
+}
+</style>
+<script type="text/javascript" src="https://www.google.com/jsapi"></script>
+<script type="text/javascript">
+var chart_left   = 40;
+var chart_top    = 6;
+var chart_height = document.documentElement.clientHeight-100;
+var chart_width  = "100%";
+ftable='filestable_avg'
+var snrs = [];
+var filestable_dsnr = [];
+var filestable_drate = [];
+var filestable_avg = [];
+
+// Python template code replaces the following 2 lines.
+//%%metrics_js%%//
+//%%filestable_dpsnr%%//
+//%%filestable_avg%%//
+//%%filestable_drate%%//
+//%%snrs%%//
+
+var selected = 0
+var imagestr = '';
+var bettertable=0;
+var chart=0;
+var better=0;
+var metricdata=0;
+var metricView=0;
+var column=1;
+var formatter=0;
+
+function changeColumn(col) {
+  column = col;
+  console.log(col)
+  draw_files();
+}
+
+function changeMetric(m) {
+  ftable=m
+  draw_files()
+}
+
+function setup_vis() {
+  chart = new google.visualization.ScatterChart(
+      document.getElementById("metricgraph"));
+
+  bettertable = new google.visualization.Table(
+      document.getElementById("bettertable"));
+
+  draw_files();
+  build_metrics_radio();
+}
+
+function build_metrics_radio() {
+  for (metric=1; metric < metrics.length; metric++) {
+    var rb = document.createElement('input');
+    var l = document.createElement('label');
+    rb.setAttribute('type','radio');
+    rb.setAttribute('name','metric');
+    rb.setAttribute('onClick', "changeColumn('"+metric.toString()+"')");
+    l.innerHTML = metrics[metric];
+    document.getElementById('metrics').appendChild(rb);
+    document.getElementById('metrics').appendChild(l);
+  }
+}
+
+function draw_files() {
+  var options = {'allowHtml': true, 'width': "100%", 'height': "50%"};
+  if (better != 0) delete better;
+
+  col=eval(ftable+'[column]')
+  better = new google.visualization.DataTable(col)
+
+  // Python Template code replaces the following line with a list of
+  // formatters.
+  if (ftable == 'filestable_dsnr')
+    formatter = new google.visualization.NumberFormat(
+      {fractionDigits: 4, suffix:" db"});
+  else
+    formatter = new google.visualization.NumberFormat(
+       {fractionDigits: 4, suffix:"%"});
+
+  //%%formatters%%//
+
+  bettertable.draw(better,options);
+  google.visualization.events.addListener(bettertable, 'select',
+                                          selectBetterHandler);
+  query_file()
+}
+
+function query_file() {
+  imagestr = better.getFormattedValue(selected, 0)
+  var metricjson = eval('(' + snrs[column][selected] + ')');
+  metricdata = new google.visualization.DataTable(metricjson, 0.6);
+  if( metricView != 0 ) delete metricView;
+  metricView = new google.visualization.DataView(metricdata);
+
+  chart.draw(metricView, {curveType:'function',
+      explorer: {},
+      chartArea:{left:chart_left, top:chart_top, width:chart_width,
+      height:chart_height-90},
+      hAxis:{title:"Datarate in kbps"},
+      vAxis:{title:"Quality in decibels", format: '##.0', textPosition: 'in'},
+      legend:{position:"in"}, title:imagestr, pointSize:2, lineWidth:1,
+      width:chart_width, height:chart_height-50 });
+
+  google.visualization.events.addListener(chart, 'select', chartSelect);
+  google.visualization.events.addListener(chart, 'onmouseover', chartMouseOver);
+  google.visualization.events.addListener(chart, 'onmouseout', chartMouseOut);
+}
+
+function chartMouseOut(e) {
+  statusbar = document.getElementById('status');
+  statusbar.style.display = 'none';
+}
+
+function chartMouseOver(e) {
+  pointDifference(e.row, e.column)
+}
+
+function pointDifference(row, col) {
+  if(!row || !col)
+    return;
+
+  var cols = metricdata.getNumberOfColumns();
+  var rows = metricdata.getNumberOfRows();
+
+  var sel_bitrate = metricView.getValue(row, 0 );
+  var sel_metric = metricView.getValue(row, col);
+
+  var message = '<ul>' + metricView.getColumnLabel(col) +
+     ' (' + sel_bitrate.toFixed(0) + ' kbps, ' + sel_metric.toFixed(2) + ')' + ' is ';
+
+
+  // col 0 is datarate
+  for( var i=1;i<cols;++i) {
+
+    var metric_greatest_thats_less = 0;
+    var rate_greatest_thats_less = 0;
+    var metric_smallest_thats_greater = 999;
+    var rate_smallest_thats_greater = 0;
+
+    if(i==col)
+      continue;
+
+    // Find the lowest metric for the column that's greater than sel_metric and
+    // the highest metric for this column that's less than the metric.
+    for(var line_count = 0; line_count < rows; ++line_count) {
+      this_metric = metricdata.getValue(line_count, i)
+      this_rate = metricdata.getValue(line_count, 0)
+      if(!this_metric)
+        continue;
+
+      if(this_metric > metric_greatest_thats_less &&
+         this_metric <= sel_metric) {
+        metric_greatest_thats_less = this_metric;
+        rate_greatest_thats_less = this_rate;
+      }
+      if(this_metric < metric_smallest_thats_greater &&
+        this_metric > sel_metric) {
+        metric_smallest_thats_greater = this_metric;
+        rate_smallest_thats_greater = this_rate;
+      }
+    }
+
+    if(rate_smallest_thats_greater == 0 || rate_greatest_thats_less == 0) {
+      message = message + " <li> Couldn't find a point on both sides.</li>"
+    } else {
+      metric_slope = ( rate_smallest_thats_greater - rate_greatest_thats_less) /
+          ( metric_smallest_thats_greater - metric_greatest_thats_less);
+
+      projected_rate = ( sel_metric - metric_greatest_thats_less) *
+          metric_slope + rate_greatest_thats_less;
+
+      difference = 100 * (projected_rate / sel_bitrate - 1);
+
+
+      if (difference > 0)
+        message = message + "<li>  " + difference.toFixed(2) +
+                  "% smaller than <em>" +
+                  metricdata.getColumnLabel(i) + "</em></li> "
+      else
+        message = message + "<li>  " + -difference.toFixed(2) +
+                  "% bigger than <em>" +
+                  metricdata.getColumnLabel(i) + "</em></li> "
+    }
+
+  }
+  message = message + "</ul>"
+  statusbar = document.getElementById('status');
+  statusbar.innerHTML = "<p>" + message + "</p>";
+  statusbar.style.display = 'block';
+}
+
+function chartSelect() {
+  var selection = chart.getSelection();
+  var message = '';
+  var min = metricView.getFormattedValue(selection[0].row, 0);
+  var max = metricView.getFormattedValue(selection[selection.length-1].row, 0);
+  var val = metricView.getFormattedValue(selection[0].row,selection[0].column);
+
+  pointDifference(selection[0].row, selection[0].column)
+  min = min / 3
+  max = max * 3
+  metricView.setRows(metricdata.getFilteredRows(
+      [{column: 0,minValue: min, maxValue:max}]));
+
+  chart.draw(metricView, {curveType:'function',
+      chartArea:{left:40, top:10, width:chart_width, height:chart_height - 110},
+      hAxis:{title:"datarate in kbps"}, vAxis:{title:"quality in decibels"},
+      legend:{position:"in"}, title:imagestr, pointSize:2, lineWidth:1,
+      width:chart_width, height:chart_height - 50});
+}
+
+function selectBetterHandler() {
+  var selection = bettertable.getSelection();
+  for (var i = 0; i < selection.length; i++) {
+    item = selection[i];
+  }
+  selected = item.row
+  query_file()
+}
+
+
+google.load('visualization', '1', {'packages' : ['corechart','table']});
+google.setOnLoadCallback(setup_vis);
+</script>
+</head>
+
+<body>
+
+  <div class="container_12">
+
+    <div class="grid_12 header">
+      <h2>Codec Comparison Results</h2>
+    </div>
+
+    <div class="grid_12 radio">
+
+      <form name="myform">
+        Method For Combining Points
+        <input type="radio" checked name="column" value="1"
+          onClick="changeMetric('filestable_avg')" />Average of bitrates difference
+        <input type="radio" name="column" value="2"
+          onClick="changeMetric('filestable_dsnr')" />BDSNR
+        <input type="radio" name="column" value="3"
+          onClick="changeMetric('filestable_drate')" />BDRATE
+      </form>
+
+      <form id="metrics" name="myform">
+      </form>
+
+    </div>
+
+    <div class="grid_12 main">
+
+      <div class="grid_5 alpha cliplist">
+        <div id="bettertable"></div>
+      </div>
+
+      <div class="grid_5 chartarea">
+        <div id="metricgraph"></div>
+      </div>
+
+      <div class="grid_2 omega indicators">
+        <div class="content">
+          <h5>Indicators</h5>
+          <hr>
+          <div id="status"></div>
+        </div>
+      </div>
+
+    </div>
+
+  </div>
+
+</body>
+</html>
diff --git a/third_party/aom/test/minmax_test.cc b/third_party/aom/test/minmax_test.cc
new file mode 100644
index 0000000000..33be4ff6dc
--- /dev/null
+++ b/third_party/aom/test/minmax_test.cc
@@ -0,0 +1,244 @@
+/*
+ *  Copyright (c) 2023 The WebM project authors. All Rights Reserved.
+ *  Copyright (c) 2023, Alliance for Open Media. All Rights Reserved.
+ *
+ *  This source code is subject to the terms of the BSD 2 Clause License and
+ *  the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ *  was not distributed with this source code in the LICENSE file, you can
+ *  obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ *  Media Patent License 1.0 was not distributed with this source code in the
+ *  PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+
+using ::libaom_test::ACMRandom;
+
+typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride, const uint8_t *b,
+                           int b_stride, int *min, int *max);
+
+class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> {
+ public:
+  void SetUp() override {
+    mm_func_ = GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+ protected:
+  MinMaxFunc mm_func_;
+  ACMRandom rnd_;
+};
+
+void reference_minmax(const uint8_t *a, int a_stride, const uint8_t *b,
+                      int b_stride, int *min_ret, int *max_ret) {
+  int min = 255;
+  int max = 0;
+  for (int i = 0; i < 8; i++) {
+    for (int j = 0; j < 8; j++) {
+      const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]);
+      if (min > diff) min = diff;
+      if (max < diff) max = diff;
+    }
+  }
+
+  *min_ret = min;
+  *max_ret = max;
+}
+
+TEST_P(MinMaxTest, MinValue) {
+  for (int i = 0; i < 64; i++) {
+    uint8_t a[64], b[64];
+    memset(a, 0, sizeof(a));
+    memset(b, 255, sizeof(b));
+    b[i] = i;  // Set a minimum difference of i.
+
+    int min, max;
+    API_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+    EXPECT_EQ(255, max);
+    EXPECT_EQ(i, min);
+  }
+}
+
+TEST_P(MinMaxTest, MaxValue) {
+  for (int i = 0; i < 64; i++) {
+    uint8_t a[64], b[64];
+    memset(a, 0, sizeof(a));
+    memset(b, 0, sizeof(b));
+    b[i] = i;  // Set a maximum difference of i.
+
+    int min, max;
+    API_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+    EXPECT_EQ(i, max);
+    EXPECT_EQ(0, min);
+  }
+}
+
+TEST_P(MinMaxTest, CompareReference) {
+  uint8_t a[64], b[64];
+  for (int j = 0; j < 64; j++) {
+    a[j] = rnd_.Rand8();
+    b[j] = rnd_.Rand8();
+  }
+
+  int min_ref, max_ref, min, max;
+  reference_minmax(a, 8, b, 8, &min_ref, &max_ref);
+  API_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+  EXPECT_EQ(max_ref, max);
+  EXPECT_EQ(min_ref, min);
+}
+
+TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
+  uint8_t a[8 * 64], b[8 * 64];
+  for (int i = 0; i < 8 * 64; i++) {
+    a[i] = rnd_.Rand8();
+    b[i] = rnd_.Rand8();
+  }
+  for (int a_stride = 8; a_stride <= 64; a_stride += 8) {
+    for (int b_stride = 8; b_stride <= 64; b_stride += 8) {
+      int min_ref, max_ref, min, max;
+      reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
+      API_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
+      EXPECT_EQ(max_ref, max)
+          << "when a_stride = " << a_stride << " and b_stride = " << b_stride;
+      EXPECT_EQ(min_ref, min)
+          << "when a_stride = " << a_stride << " and b_stride = " << b_stride;
+    }
+  }
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+
+using HBDMinMaxTest = MinMaxTest;
+
+void highbd_reference_minmax(const uint8_t *a, int a_stride, const uint8_t *b,
+                             int b_stride, int *min_ret, int *max_ret) {
+  int min = 65535;
+  int max = 0;
+  const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a);
+  const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b);
+  for (int i = 0; i < 8; i++) {
+    for (int j = 0; j < 8; j++) {
+      const int diff = abs(a_ptr[i * a_stride + j] - b_ptr[i * b_stride + j]);
+      if (min > diff) min = diff;
+      if (max < diff) max = diff;
+    }
+  }
+
+  *min_ret = min;
+  *max_ret = max;
+}
+
+TEST_P(HBDMinMaxTest, MinValue) {
+  uint8_t *a = CONVERT_TO_BYTEPTR(
+      reinterpret_cast<uint16_t *>(aom_malloc(64 * sizeof(uint16_t))));
+  uint8_t *b = CONVERT_TO_BYTEPTR(
+      reinterpret_cast<uint16_t *>(aom_malloc(64 * sizeof(uint16_t))));
+  for (int i = 0; i < 64; i++) {
+    aom_memset16(CONVERT_TO_SHORTPTR(a), 0, 64);
+    aom_memset16(CONVERT_TO_SHORTPTR(b), 65535, 64);
+    CONVERT_TO_SHORTPTR(b)[i] = i;  // Set a minimum difference of i.
+
+    int min, max;
+    API_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+    EXPECT_EQ(65535, max);
+    EXPECT_EQ(i, min);
+  }
+  aom_free(CONVERT_TO_SHORTPTR(a));
+  aom_free(CONVERT_TO_SHORTPTR(b));
+}
+
+TEST_P(HBDMinMaxTest, MaxValue) {
+  uint8_t *a = CONVERT_TO_BYTEPTR(
+      reinterpret_cast<uint16_t *>(aom_malloc(64 * sizeof(uint16_t))));
+  uint8_t *b = CONVERT_TO_BYTEPTR(
+      reinterpret_cast<uint16_t *>(aom_malloc(64 * sizeof(uint16_t))));
+  for (int i = 0; i < 64; i++) {
+    aom_memset16(CONVERT_TO_SHORTPTR(a), 0, 64);
+    aom_memset16(CONVERT_TO_SHORTPTR(b), 0, 64);
+    CONVERT_TO_SHORTPTR(b)[i] = i;  // Set a minimum difference of i.
+
+    int min, max;
+    API_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+    EXPECT_EQ(i, max);
+    EXPECT_EQ(0, min);
+  }
+  aom_free(CONVERT_TO_SHORTPTR(a));
+  aom_free(CONVERT_TO_SHORTPTR(b));
+}
+
+TEST_P(HBDMinMaxTest, CompareReference) {
+  uint8_t *a = CONVERT_TO_BYTEPTR(
+      reinterpret_cast<uint16_t *>(aom_malloc(64 * sizeof(uint16_t))));
+  uint8_t *b = CONVERT_TO_BYTEPTR(
+      reinterpret_cast<uint16_t *>(aom_malloc(64 * sizeof(uint16_t))));
+  for (int j = 0; j < 64; j++) {
+    CONVERT_TO_SHORTPTR(a)[j] = rnd_.Rand16();
+    CONVERT_TO_SHORTPTR(b)[j] = rnd_.Rand16();
+  }
+
+  int min_ref, max_ref, min, max;
+  highbd_reference_minmax(a, 8, b, 8, &min_ref, &max_ref);
+  API_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+  aom_free(CONVERT_TO_SHORTPTR(a));
+  aom_free(CONVERT_TO_SHORTPTR(b));
+  EXPECT_EQ(max_ref, max);
+  EXPECT_EQ(min_ref, min);
+}
+
+TEST_P(HBDMinMaxTest, CompareReferenceAndVaryStride) {
+  uint8_t *a = CONVERT_TO_BYTEPTR(
+      reinterpret_cast<uint16_t *>(aom_malloc((8 * 64) * sizeof(uint16_t))));
+  uint8_t *b = CONVERT_TO_BYTEPTR(
+      reinterpret_cast<uint16_t *>(aom_malloc((8 * 64) * sizeof(uint16_t))));
+  for (int i = 0; i < 8 * 64; i++) {
+    CONVERT_TO_SHORTPTR(a)[i] = rnd_.Rand16();
+    CONVERT_TO_SHORTPTR(b)[i] = rnd_.Rand16();
+  }
+  for (int a_stride = 8; a_stride <= 64; a_stride += 8) {
+    for (int b_stride = 8; b_stride <= 64; b_stride += 8) {
+      int min_ref, max_ref, min, max;
+      highbd_reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
+      API_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
+      EXPECT_EQ(max_ref, max)
+          << "when a_stride = " << a_stride << " and b_stride = " << b_stride;
+      EXPECT_EQ(min_ref, min)
+          << "when a_stride = " << a_stride << " and b_stride = " << b_stride;
+    }
+  }
+  aom_free(CONVERT_TO_SHORTPTR(a));
+  aom_free(CONVERT_TO_SHORTPTR(b));
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+INSTANTIATE_TEST_SUITE_P(C, MinMaxTest, ::testing::Values(&aom_minmax_8x8_c));
+#if CONFIG_AV1_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(C, HBDMinMaxTest,
+                         ::testing::Values(&aom_highbd_minmax_8x8_c));
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, HBDMinMaxTest,
+                         ::testing::Values(&aom_highbd_minmax_8x8_neon));
+#endif
+#endif
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, MinMaxTest,
+                         ::testing::Values(&aom_minmax_8x8_sse2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, MinMaxTest,
+                         ::testing::Values(&aom_minmax_8x8_neon));
+#endif
+}  // namespace
diff --git a/third_party/aom/test/monochrome_test.cc b/third_party/aom/test/monochrome_test.cc
new file mode 100644
index 0000000000..f22b5fe0f2
--- /dev/null
+++ b/third_party/aom/test/monochrome_test.cc
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <climits>
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/video_source.h"
+#include "test/util.h"
+
+namespace {
+
+const unsigned int kCqLevel = 18;
+const double kMaxPsnr = 100.0;
+
+// kPsnrThreshold represents the psnr threshold used to validate the quality of
+// the first frame. The indices correspond to one/two-pass, allintra and
+// realtime encoding modes.
+const double kPsnrThreshold[3] = { 29.0, 41.5, 41.5 };
+
+// kPsnrFluctuation represents the maximum allowed psnr fluctuation w.r.t first
+// frame. The indices correspond to one/two-pass, allintra and realtime
+// encoding modes.
+const double kPsnrFluctuation[3] = { 2.5, 0.3, 16.0 };
+
+class MonochromeTest
+    : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int,
+                                                 int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  MonochromeTest()
+      : EncoderTest(GET_PARAM(0)), lossless_(GET_PARAM(2)),
+        frame0_psnr_y_(0.0) {}
+
+  ~MonochromeTest() override = default;
+
+  void SetUp() override { InitializeConfig(GET_PARAM(1)); }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, GET_PARAM(3));
+      if (mode_ == ::libaom_test::kAllIntra) {
+        encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel);
+      }
+      if (lossless_) {
+        encoder->Control(AV1E_SET_LOSSLESS, 1);
+      }
+    }
+  }
+
+  void DecompressedFrameHook(const aom_image_t &img,
+                             aom_codec_pts_t pts) override {
+    (void)pts;
+
+    // Get value of top-left corner pixel of U plane
+    int chroma_value = img.planes[AOM_PLANE_U][0];
+
+    bool is_chroma_constant =
+        ComparePlaneToValue(img, AOM_PLANE_U, chroma_value) &&
+        ComparePlaneToValue(img, AOM_PLANE_V, chroma_value);
+
+    // Chroma planes should be constant
+    EXPECT_TRUE(is_chroma_constant);
+
+    // Monochrome flag on image should be set
+    EXPECT_EQ(img.monochrome, 1);
+
+    chroma_value_list_.push_back(chroma_value);
+  }
+
+  // Returns true if all pixels on the plane are equal to value, and returns
+  // false otherwise.
+  bool ComparePlaneToValue(const aom_image_t &img, const int plane,
+                           const int value) {
+    const int w = aom_img_plane_width(&img, plane);
+    const int h = aom_img_plane_height(&img, plane);
+    const uint8_t *const buf = img.planes[plane];
+    const int stride = img.stride[plane];
+
+    for (int r = 0; r < h; ++r) {
+      for (int c = 0; c < w; ++c) {
+        if (buf[r * stride + c] != value) return false;
+      }
+    }
+    return true;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    // Check average PSNR value is >= 100 db in case of lossless encoding.
+    if (lossless_) {
+      EXPECT_GE(pkt->data.psnr.psnr[0], kMaxPsnr);
+      return;
+    }
+    const int psnr_index = (mode_ == ::libaom_test::kRealTime)   ? 2
+                           : (mode_ == ::libaom_test::kAllIntra) ? 1
+                                                                 : 0;
+    // Check that the initial Y PSNR value is 'high enough', and check that
+    // subsequent Y PSNR values are 'close' to this initial value.
+    if (frame0_psnr_y_ == 0.0) {
+      frame0_psnr_y_ = pkt->data.psnr.psnr[1];
+      EXPECT_GT(frame0_psnr_y_, kPsnrThreshold[psnr_index]);
+    }
+    EXPECT_NEAR(pkt->data.psnr.psnr[1], frame0_psnr_y_,
+                kPsnrFluctuation[psnr_index]);
+  }
+
+  int lossless_;
+  std::vector<int> chroma_value_list_;
+  double frame0_psnr_y_;
+};
+
+TEST_P(MonochromeTest, TestMonochromeEncoding) {
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 5);
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 600;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_min_quantizer = 2;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_undershoot_pct = 50;
+  cfg_.rc_overshoot_pct = 50;
+  cfg_.rc_end_usage = AOM_CBR;
+  cfg_.g_lag_in_frames = 1;
+  cfg_.kf_min_dist = cfg_.kf_max_dist = 3000;
+  // Enable dropped frames.
+  cfg_.rc_dropframe_thresh = 1;
+  // Run at low bitrate.
+  cfg_.rc_target_bitrate = 40;
+  // Set monochrome encoding flag
+  cfg_.monochrome = 1;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  // Check that the chroma planes are equal across all frames
+  std::vector<int>::const_iterator iter = chroma_value_list_.begin();
+  int initial_chroma_value = *iter;
+  for (; iter != chroma_value_list_.end(); ++iter) {
+    // Check that all decoded frames have the same constant chroma planes.
+    EXPECT_EQ(*iter, initial_chroma_value);
+  }
+}
+
+class MonochromeAllIntraTest : public MonochromeTest {};
+
+TEST_P(MonochromeAllIntraTest, TestMonochromeEncoding) {
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 5);
+  init_flags_ = AOM_CODEC_USE_PSNR;
+  // Set monochrome encoding flag
+  cfg_.monochrome = 1;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  // Check that the chroma planes are equal across all frames
+  std::vector<int>::const_iterator iter = chroma_value_list_.begin();
+  int initial_chroma_value = *iter;
+  for (; iter != chroma_value_list_.end(); ++iter) {
+    // Check that all decoded frames have the same constant chroma planes.
+    EXPECT_EQ(*iter, initial_chroma_value);
+  }
+}
+
+class MonochromeRealtimeTest : public MonochromeTest {};
+
+TEST_P(MonochromeRealtimeTest, TestMonochromeEncoding) {
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 30);
+  init_flags_ = AOM_CODEC_USE_PSNR;
+  // Set monochrome encoding flag
+  cfg_.monochrome = 1;
+  // Run at low bitrate.
+  cfg_.rc_target_bitrate = 40;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  // Check that the chroma planes are equal across all frames
+  std::vector<int>::const_iterator iter = chroma_value_list_.begin();
+  int initial_chroma_value = *iter;
+  for (; iter != chroma_value_list_.end(); ++iter) {
+    // Check that all decoded frames have the same constant chroma planes.
+    EXPECT_EQ(*iter, initial_chroma_value);
+  }
+}
+
+AV1_INSTANTIATE_TEST_SUITE(MonochromeTest,
+                           ::testing::Values(::libaom_test::kOnePassGood,
+                                             ::libaom_test::kTwoPassGood),
+                           ::testing::Values(0),   // lossless
+                           ::testing::Values(0));  // cpu_used
+
+AV1_INSTANTIATE_TEST_SUITE(MonochromeAllIntraTest,
+                           ::testing::Values(::libaom_test::kAllIntra),
+                           ::testing::Values(0, 1),   // lossless
+                           ::testing::Values(6, 9));  // cpu_used
+
+AV1_INSTANTIATE_TEST_SUITE(MonochromeRealtimeTest,
+                           ::testing::Values(::libaom_test::kRealTime),
+                           ::testing::Values(0),          // lossless
+                           ::testing::Values(6, 8, 10));  // cpu_used
+
+}  // namespace
diff --git a/third_party/aom/test/motion_vector_test.cc b/third_party/aom/test/motion_vector_test.cc
new file mode 100644
index 0000000000..4fc8d53d95
--- /dev/null
+++ b/third_party/aom/test/motion_vector_test.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+#define MAX_EXTREME_MV 1
+#define MIN_EXTREME_MV 2
+
+// Encoding modes
+const libaom_test::TestMode kEncodingModeVectors[] = {
+  ::libaom_test::kTwoPassGood,
+  ::libaom_test::kOnePassGood,
+};
+
+// Encoding speeds
+const int kCpuUsedVectors[] = { 1, 5 };
+
+// MV test modes: 1 - always use maximum MV; 2 - always use minimum MV.
+const int kMVTestModes[] = { MAX_EXTREME_MV, MIN_EXTREME_MV };
+
+class MotionVectorTestLarge
+    : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int,
+                                                 int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  MotionVectorTestLarge()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        cpu_used_(GET_PARAM(2)), mv_test_mode_(GET_PARAM(3)) {}
+
+  ~MotionVectorTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    if (encoding_mode_ != ::libaom_test::kRealTime) {
+      cfg_.g_lag_in_frames = 3;
+    } else {
+      cfg_.rc_buf_sz = 1000;
+      cfg_.rc_buf_initial_sz = 500;
+      cfg_.rc_buf_optimal_sz = 600;
+    }
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST, mv_test_mode_);
+      if (encoding_mode_ != ::libaom_test::kRealTime) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      }
+    }
+  }
+
+  libaom_test::TestMode encoding_mode_;
+  int cpu_used_;
+  int mv_test_mode_;
+};
+
+TEST_P(MotionVectorTestLarge, OverallTest) {
+  int width = 3840;
+  int height = 2160;
+
+  // Reduce the test clip's resolution while testing on 32-bit system.
+  if (sizeof(void *) == 4) {
+    width = 2048;
+    height = 360;
+  }
+
+  cfg_.rc_target_bitrate = 24000;
+  cfg_.g_profile = 0;
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  std::unique_ptr<libaom_test::VideoSource> video;
+  video.reset(new libaom_test::YUVVideoSource(
+      "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, width, height, 30, 1, 0, 3));
+
+  ASSERT_NE(video, nullptr);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+}
+
+AV1_INSTANTIATE_TEST_SUITE(MotionVectorTestLarge,
+                           ::testing::ValuesIn(kEncodingModeVectors),
+                           ::testing::ValuesIn(kCpuUsedVectors),
+                           ::testing::ValuesIn(kMVTestModes));
+}  // namespace
diff --git a/third_party/aom/test/mv_cost_test.cc b/third_party/aom/test/mv_cost_test.cc
new file mode 100644
index 0000000000..73d56665bf
--- /dev/null
+++ b/third_party/aom/test/mv_cost_test.cc
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "av1/encoder/cost.h"
+#include "av1/encoder/encodemv.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+void ReferenceBuildNmvComponentCostTable(int *mvcost,
+                                         const nmv_component *const mvcomp,
+                                         MvSubpelPrecision precision) {
+  int i, v;
+  int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
+  int bits_cost[MV_OFFSET_BITS][2];
+  int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE], fp_cost[MV_FP_SIZE];
+  int class0_hp_cost[2], hp_cost[2];
+  av1_cost_tokens_from_cdf(sign_cost, mvcomp->sign_cdf, nullptr);
+  av1_cost_tokens_from_cdf(class_cost, mvcomp->classes_cdf, nullptr);
+  av1_cost_tokens_from_cdf(class0_cost, mvcomp->class0_cdf, nullptr);
+  for (i = 0; i < MV_OFFSET_BITS; ++i) {
+    av1_cost_tokens_from_cdf(bits_cost[i], mvcomp->bits_cdf[i], nullptr);
+  }
+  for (i = 0; i < CLASS0_SIZE; ++i)
+    av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i],
+                             nullptr);
+  av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, nullptr);
+  if (precision > MV_SUBPEL_LOW_PRECISION) {
+    av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, nullptr);
+    av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, nullptr);
+  }
+  mvcost[0] = 0;
+  for (v = 1; v <= MV_MAX; ++v) {
+    int z, c, o, d, e, f, cost = 0;
+    z = v - 1;
+    c = av1_get_mv_class(z, &o);
+    cost += class_cost[c];
+    d = (o >> 3);     /* int mv data */
+    f = (o >> 1) & 3; /* fractional pel mv data */
+    e = (o & 1);      /* high precision mv data */
+    if (c == MV_CLASS_0) {
+      cost += class0_cost[d];
+    } else {
+      const int b = c + CLASS0_BITS - 1; /* number of bits */
+      for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)];
+    }
+    if (precision > MV_SUBPEL_NONE) {
+      if (c == MV_CLASS_0) {
+        cost += class0_fp_cost[d][f];
+      } else {
+        cost += fp_cost[f];
+      }
+      if (precision > MV_SUBPEL_LOW_PRECISION) {
+        if (c == MV_CLASS_0) {
+          cost += class0_hp_cost[e];
+        } else {
+          cost += hp_cost[e];
+        }
+      }
+    }
+    mvcost[v] = cost + sign_cost[0];
+    mvcost[-v] = cost + sign_cost[1];
+  }
+}
+
+// Test using the default context, except for sign
+static const nmv_component kTestComponentContext = {
+  { AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740, 32757, 32762,
+              32767) },  // class_cdf // fp
+  { { AOM_CDF4(16384, 24576, 26624) },
+    { AOM_CDF4(12288, 21248, 24128) } },  // class0_fp_cdf
+  { AOM_CDF4(8192, 17408, 21248) },       // fp_cdf
+  { AOM_CDF2(70 * 128) },                 // sign_cdf
+  { AOM_CDF2(160 * 128) },                // class0_hp_cdf
+  { AOM_CDF2(128 * 128) },                // hp_cdf
+  { AOM_CDF2(216 * 128) },                // class0_cdf
+  { { AOM_CDF2(128 * 136) },
+    { AOM_CDF2(128 * 140) },
+    { AOM_CDF2(128 * 148) },
+    { AOM_CDF2(128 * 160) },
+    { AOM_CDF2(128 * 176) },
+    { AOM_CDF2(128 * 192) },
+    { AOM_CDF2(128 * 224) },
+    { AOM_CDF2(128 * 234) },
+    { AOM_CDF2(128 * 234) },
+    { AOM_CDF2(128 * 240) } },  // bits_cdf
+};
+
+void TestMvComponentCostTable(MvSubpelPrecision precision) {
+  std::unique_ptr<int[]> mvcost_ref_buf(new int[MV_VALS]);
+  std::unique_ptr<int[]> mvcost_buf(new int[MV_VALS]);
+  int *mvcost_ref = mvcost_ref_buf.get() + MV_MAX;
+  int *mvcost = mvcost_buf.get() + MV_MAX;
+
+  ReferenceBuildNmvComponentCostTable(mvcost_ref, &kTestComponentContext,
+                                      precision);
+  av1_build_nmv_component_cost_table(mvcost, &kTestComponentContext, precision);
+
+  for (int v = 0; v <= MV_MAX; ++v) {
+    ASSERT_EQ(mvcost_ref[v], mvcost[v]) << "v = " << v;
+    ASSERT_EQ(mvcost_ref[-v], mvcost[-v]) << "v = " << v;
+  }
+}
+
+TEST(MvCostTest, BuildMvComponentCostTableTest1) {
+  TestMvComponentCostTable(MV_SUBPEL_NONE);
+}
+
+TEST(MvCostTest, BuildMvComponentCostTableTest2) {
+  TestMvComponentCostTable(MV_SUBPEL_LOW_PRECISION);
+}
+
+TEST(MvCostTest, BuildMvComponentCostTableTest3) {
+  TestMvComponentCostTable(MV_SUBPEL_HIGH_PRECISION);
+}
+
+}  // namespace
+\ No newline at end of file
diff --git a/third_party/aom/test/noise_model_test.cc b/third_party/aom/test/noise_model_test.cc
new file mode 100644
index 0000000000..b3edcc218e
--- /dev/null
+++ b/third_party/aom/test/noise_model_test.cc
@@ -0,0 +1,1372 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <limits.h>
+#include <math.h>
+#include <algorithm>
+#include <vector>
+
+#include "aom_dsp/noise_model.h"
+#include "aom_dsp/noise_util.h"
+#include "config/aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+// Return normally distrbuted values with standard deviation of sigma.
+double randn(libaom_test::ACMRandom *random, double sigma) {
+  while (true) {
+    const double u = 2.0 * ((double)random->Rand31() /
+                            testing::internal::Random::kMaxRange) -
+                     1.0;
+    const double v = 2.0 * ((double)random->Rand31() /
+                            testing::internal::Random::kMaxRange) -
+                     1.0;
+    const double s = u * u + v * v;
+    if (s > 0 && s < 1) {
+      return sigma * (u * sqrt(-2.0 * log(s) / s));
+    }
+  }
+}
+
+// Synthesizes noise using the auto-regressive filter of the given lag,
+// with the provided n coefficients sampled at the given coords.
+void noise_synth(libaom_test::ACMRandom *random, int lag, int n,
+                 const int (*coords)[2], const double *coeffs, double *data,
+                 int w, int h) {
+  const int pad_size = 3 * lag;
+  const int padded_w = w + pad_size;
+  const int padded_h = h + pad_size;
+  int x = 0, y = 0;
+  std::vector<double> padded(padded_w * padded_h);
+
+  for (y = 0; y < padded_h; ++y) {
+    for (x = 0; x < padded_w; ++x) {
+      padded[y * padded_w + x] = randn(random, 1.0);
+    }
+  }
+  for (y = lag; y < padded_h; ++y) {
+    for (x = lag; x < padded_w; ++x) {
+      double sum = 0;
+      int i = 0;
+      for (i = 0; i < n; ++i) {
+        const int dx = coords[i][0];
+        const int dy = coords[i][1];
+        sum += padded[(y + dy) * padded_w + (x + dx)] * coeffs[i];
+      }
+      padded[y * padded_w + x] += sum;
+    }
+  }
+  // Copy over the padded rows to the output
+  for (y = 0; y < h; ++y) {
+    memcpy(data + y * w, &padded[0] + y * padded_w, sizeof(*data) * w);
+  }
+}
+
+std::vector<float> get_noise_psd(double *noise, int width, int height,
+                                 int block_size) {
+  float *block =
+      (float *)aom_memalign(32, block_size * block_size * sizeof(block));
+  std::vector<float> psd(block_size * block_size);
+  if (block == nullptr) {
+    EXPECT_NE(block, nullptr);
+    return psd;
+  }
+  int num_blocks = 0;
+  struct aom_noise_tx_t *tx = aom_noise_tx_malloc(block_size);
+  if (tx == nullptr) {
+    EXPECT_NE(tx, nullptr);
+    return psd;
+  }
+  for (int y = 0; y <= height - block_size; y += block_size / 2) {
+    for (int x = 0; x <= width - block_size; x += block_size / 2) {
+      for (int yy = 0; yy < block_size; ++yy) {
+        for (int xx = 0; xx < block_size; ++xx) {
+          block[yy * block_size + xx] = (float)noise[(y + yy) * width + x + xx];
+        }
+      }
+      aom_noise_tx_forward(tx, &block[0]);
+      aom_noise_tx_add_energy(tx, &psd[0]);
+      num_blocks++;
+    }
+  }
+  for (int yy = 0; yy < block_size; ++yy) {
+    for (int xx = 0; xx <= block_size / 2; ++xx) {
+      psd[yy * block_size + xx] /= num_blocks;
+    }
+  }
+  // Fill in the data that is missing due to symmetries
+  for (int xx = 1; xx < block_size / 2; ++xx) {
+    psd[(block_size - xx)] = psd[xx];
+  }
+  for (int yy = 1; yy < block_size; ++yy) {
+    for (int xx = 1; xx < block_size / 2; ++xx) {
+      psd[(block_size - yy) * block_size + (block_size - xx)] =
+          psd[yy * block_size + xx];
+    }
+  }
+  aom_noise_tx_free(tx);
+  aom_free(block);
+  return psd;
+}
+
+}  // namespace
+
+TEST(NoiseStrengthSolver, GetCentersTwoBins) {
+  aom_noise_strength_solver_t solver;
+  aom_noise_strength_solver_init(&solver, 2, 8);
+  EXPECT_NEAR(0, aom_noise_strength_solver_get_center(&solver, 0), 1e-5);
+  EXPECT_NEAR(255, aom_noise_strength_solver_get_center(&solver, 1), 1e-5);
+  aom_noise_strength_solver_free(&solver);
+}
+
+TEST(NoiseStrengthSolver, GetCentersTwoBins10bit) {
+  aom_noise_strength_solver_t solver;
+  aom_noise_strength_solver_init(&solver, 2, 10);
+  EXPECT_NEAR(0, aom_noise_strength_solver_get_center(&solver, 0), 1e-5);
+  EXPECT_NEAR(1023, aom_noise_strength_solver_get_center(&solver, 1), 1e-5);
+  aom_noise_strength_solver_free(&solver);
+}
+
+TEST(NoiseStrengthSolver, GetCenters256Bins) {
+  const int num_bins = 256;
+  aom_noise_strength_solver_t solver;
+  aom_noise_strength_solver_init(&solver, num_bins, 8);
+
+  for (int i = 0; i < 256; ++i) {
+    EXPECT_NEAR(i, aom_noise_strength_solver_get_center(&solver, i), 1e-5);
+  }
+  aom_noise_strength_solver_free(&solver);
+}
+
+// Tests that the noise strength solver returns the identity transform when
+// given identity-like constraints.
+TEST(NoiseStrengthSolver, ObserveIdentity) {
+  const int num_bins = 256;
+  aom_noise_strength_solver_t solver;
+  ASSERT_EQ(1, aom_noise_strength_solver_init(&solver, num_bins, 8));
+
+  // We have to add a big more strength to constraints at the boundary to
+  // overcome any regularization.
+  for (int j = 0; j < 5; ++j) {
+    aom_noise_strength_solver_add_measurement(&solver, 0, 0);
+    aom_noise_strength_solver_add_measurement(&solver, 255, 255);
+  }
+  for (int i = 0; i < 256; ++i) {
+    aom_noise_strength_solver_add_measurement(&solver, i, i);
+  }
+  EXPECT_EQ(1, aom_noise_strength_solver_solve(&solver));
+  for (int i = 2; i < num_bins - 2; ++i) {
+    EXPECT_NEAR(i, solver.eqns.x[i], 0.1);
+  }
+
+  aom_noise_strength_lut_t lut;
+  EXPECT_EQ(1, aom_noise_strength_solver_fit_piecewise(&solver, 2, &lut));
+
+  ASSERT_EQ(2, lut.num_points);
+  EXPECT_NEAR(0.0, lut.points[0][0], 1e-5);
+  EXPECT_NEAR(0.0, lut.points[0][1], 0.5);
+  EXPECT_NEAR(255.0, lut.points[1][0], 1e-5);
+  EXPECT_NEAR(255.0, lut.points[1][1], 0.5);
+
+  aom_noise_strength_lut_free(&lut);
+  aom_noise_strength_solver_free(&solver);
+}
+
+TEST(NoiseStrengthSolver, SimplifiesCurve) {
+  const int num_bins = 256;
+  aom_noise_strength_solver_t solver;
+  EXPECT_EQ(1, aom_noise_strength_solver_init(&solver, num_bins, 8));
+
+  // Create a parabolic input
+  for (int i = 0; i < 256; ++i) {
+    const double x = (i - 127.5) / 63.5;
+    aom_noise_strength_solver_add_measurement(&solver, i, x * x);
+  }
+  EXPECT_EQ(1, aom_noise_strength_solver_solve(&solver));
+
+  // First try to fit an unconstrained lut
+  aom_noise_strength_lut_t lut;
+  EXPECT_EQ(1, aom_noise_strength_solver_fit_piecewise(&solver, -1, &lut));
+  ASSERT_LE(20, lut.num_points);
+  aom_noise_strength_lut_free(&lut);
+
+  // Now constrain the maximum number of points
+  const int kMaxPoints = 9;
+  EXPECT_EQ(1,
+            aom_noise_strength_solver_fit_piecewise(&solver, kMaxPoints, &lut));
+  ASSERT_EQ(kMaxPoints, lut.num_points);
+
+  // Check that the input parabola is still well represented
+  EXPECT_NEAR(0.0, lut.points[0][0], 1e-5);
+  EXPECT_NEAR(4.0, lut.points[0][1], 0.1);
+  for (int i = 1; i < lut.num_points - 1; ++i) {
+    const double x = (lut.points[i][0] - 128.) / 64.;
+    EXPECT_NEAR(x * x, lut.points[i][1], 0.1);
+  }
+  EXPECT_NEAR(255.0, lut.points[kMaxPoints - 1][0], 1e-5);
+
+  EXPECT_NEAR(4.0, lut.points[kMaxPoints - 1][1], 0.1);
+  aom_noise_strength_lut_free(&lut);
+  aom_noise_strength_solver_free(&solver);
+}
+
+TEST(NoiseStrengthLut, LutInitNegativeOrZeroSize) {
+  aom_noise_strength_lut_t lut;
+  ASSERT_FALSE(aom_noise_strength_lut_init(&lut, -1));
+  ASSERT_FALSE(aom_noise_strength_lut_init(&lut, 0));
+}
+
+TEST(NoiseStrengthLut, LutEvalSinglePoint) {
+  aom_noise_strength_lut_t lut;
+  ASSERT_TRUE(aom_noise_strength_lut_init(&lut, 1));
+  ASSERT_EQ(1, lut.num_points);
+  lut.points[0][0] = 0;
+  lut.points[0][1] = 1;
+  EXPECT_EQ(1, aom_noise_strength_lut_eval(&lut, -1));
+  EXPECT_EQ(1, aom_noise_strength_lut_eval(&lut, 0));
+  EXPECT_EQ(1, aom_noise_strength_lut_eval(&lut, 1));
+  aom_noise_strength_lut_free(&lut);
+}
+
+TEST(NoiseStrengthLut, LutEvalMultiPointInterp) {
+  const double kEps = 1e-5;
+  aom_noise_strength_lut_t lut;
+  ASSERT_TRUE(aom_noise_strength_lut_init(&lut, 4));
+  ASSERT_EQ(4, lut.num_points);
+
+  lut.points[0][0] = 0;
+  lut.points[0][1] = 0;
+
+  lut.points[1][0] = 1;
+  lut.points[1][1] = 1;
+
+  lut.points[2][0] = 2;
+  lut.points[2][1] = 1;
+
+  lut.points[3][0] = 100;
+  lut.points[3][1] = 1001;
+
+  // Test lower boundary
+  EXPECT_EQ(0, aom_noise_strength_lut_eval(&lut, -1));
+  EXPECT_EQ(0, aom_noise_strength_lut_eval(&lut, 0));
+
+  // Test first part that should be identity
+  EXPECT_NEAR(0.25, aom_noise_strength_lut_eval(&lut, 0.25), kEps);
+  EXPECT_NEAR(0.75, aom_noise_strength_lut_eval(&lut, 0.75), kEps);
+
+  // This is a constant section (should evaluate to 1)
+  EXPECT_NEAR(1.0, aom_noise_strength_lut_eval(&lut, 1.25), kEps);
+  EXPECT_NEAR(1.0, aom_noise_strength_lut_eval(&lut, 1.75), kEps);
+
+  // Test interpolation between to non-zero y coords.
+  EXPECT_NEAR(1, aom_noise_strength_lut_eval(&lut, 2), kEps);
+  EXPECT_NEAR(251, aom_noise_strength_lut_eval(&lut, 26.5), kEps);
+  EXPECT_NEAR(751, aom_noise_strength_lut_eval(&lut, 75.5), kEps);
+
+  // Test upper boundary
+  EXPECT_EQ(1001, aom_noise_strength_lut_eval(&lut, 100));
+  EXPECT_EQ(1001, aom_noise_strength_lut_eval(&lut, 101));
+
+  aom_noise_strength_lut_free(&lut);
+}
+
+TEST(NoiseModel, InitSuccessWithValidSquareShape) {
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 2, 8, 0 };
+  aom_noise_model_t model;
+
+  EXPECT_TRUE(aom_noise_model_init(&model, params));
+
+  const int kNumCoords = 12;
+  const int kCoords[][2] = { { -2, -2 }, { -1, -2 }, { 0, -2 },  { 1, -2 },
+                             { 2, -2 },  { -2, -1 }, { -1, -1 }, { 0, -1 },
+                             { 1, -1 },  { 2, -1 },  { -2, 0 },  { -1, 0 } };
+  EXPECT_EQ(kNumCoords, model.n);
+  for (int i = 0; i < kNumCoords; ++i) {
+    const int *coord = kCoords[i];
+    EXPECT_EQ(coord[0], model.coords[i][0]);
+    EXPECT_EQ(coord[1], model.coords[i][1]);
+  }
+  aom_noise_model_free(&model);
+}
+
+TEST(NoiseModel, InitSuccessWithValidDiamondShape) {
+  aom_noise_model_t model;
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_DIAMOND, 2, 8, 0 };
+  EXPECT_TRUE(aom_noise_model_init(&model, params));
+  EXPECT_EQ(6, model.n);
+  const int kNumCoords = 6;
+  const int kCoords[][2] = { { 0, -2 }, { -1, -1 }, { 0, -1 },
+                             { 1, -1 }, { -2, 0 },  { -1, 0 } };
+  EXPECT_EQ(kNumCoords, model.n);
+  for (int i = 0; i < kNumCoords; ++i) {
+    const int *coord = kCoords[i];
+    EXPECT_EQ(coord[0], model.coords[i][0]);
+    EXPECT_EQ(coord[1], model.coords[i][1]);
+  }
+  aom_noise_model_free(&model);
+}
+
+TEST(NoiseModel, InitFailsWithTooLargeLag) {
+  aom_noise_model_t model;
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 10, 8, 0 };
+  EXPECT_FALSE(aom_noise_model_init(&model, params));
+  aom_noise_model_free(&model);
+}
+
+TEST(NoiseModel, InitFailsWithTooSmallLag) {
+  aom_noise_model_t model;
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 0, 8, 0 };
+  EXPECT_FALSE(aom_noise_model_init(&model, params));
+  aom_noise_model_free(&model);
+}
+
+TEST(NoiseModel, InitFailsWithInvalidShape) {
+  aom_noise_model_t model;
+  aom_noise_model_params_t params = { aom_noise_shape(100), 3, 8, 0 };
+  EXPECT_FALSE(aom_noise_model_init(&model, params));
+  aom_noise_model_free(&model);
+}
+
+TEST(NoiseModel, InitFailsWithInvalidBitdepth) {
+  aom_noise_model_t model;
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 2, 8, 0 };
+  for (int i = 0; i <= 32; ++i) {
+    params.bit_depth = i;
+    if (i == 8 || i == 10 || i == 12) {
+      EXPECT_TRUE(aom_noise_model_init(&model, params)) << "bit_depth: " << i;
+      aom_noise_model_free(&model);
+    } else {
+      EXPECT_FALSE(aom_noise_model_init(&model, params)) << "bit_depth: " << i;
+    }
+  }
+  params.bit_depth = INT_MAX;
+  EXPECT_FALSE(aom_noise_model_init(&model, params));
+}
+
+// A container template class to hold a data type and extra arguments.
+// All of these args are bundled into one struct so that we can use
+// parameterized tests on combinations of supported data types
+// (uint8_t and uint16_t) and bit depths (8, 10, 12).
+template <typename T, int bit_depth, bool use_highbd>
+struct BitDepthParams {
+  typedef T data_type_t;
+  static const int kBitDepth = bit_depth;
+  static const bool kUseHighBD = use_highbd;
+};
+
+template <typename T>
+class FlatBlockEstimatorTest : public ::testing::Test, public T {
+ public:
+  void SetUp() override { random_.Reset(171); }
+  typedef std::vector<typename T::data_type_t> VecType;
+  VecType data_;
+  libaom_test::ACMRandom random_;
+};
+
+TYPED_TEST_SUITE_P(FlatBlockEstimatorTest);
+
+TYPED_TEST_P(FlatBlockEstimatorTest, ExtractBlock) {
+  const int kBlockSize = 16;
+  aom_flat_block_finder_t flat_block_finder;
+  ASSERT_EQ(1, aom_flat_block_finder_init(&flat_block_finder, kBlockSize,
+                                          this->kBitDepth, this->kUseHighBD));
+  const double normalization = flat_block_finder.normalization;
+
+  // Test with an image of more than one block.
+  const int h = 2 * kBlockSize;
+  const int w = 2 * kBlockSize;
+  const int stride = 2 * kBlockSize;
+  this->data_.resize(h * stride, 128);
+
+  // Set up the (0,0) block to be a plane and the (0,1) block to be a
+  // checkerboard
+  const int shift = this->kBitDepth - 8;
+  for (int y = 0; y < kBlockSize; ++y) {
+    for (int x = 0; x < kBlockSize; ++x) {
+      this->data_[y * stride + x] = (-y + x + 128) << shift;
+      this->data_[y * stride + x + kBlockSize] =
+          ((x % 2 + y % 2) % 2 ? 128 - 20 : 128 + 20) << shift;
+    }
+  }
+  std::vector<double> block(kBlockSize * kBlockSize, 1);
+  std::vector<double> plane(kBlockSize * kBlockSize, 1);
+
+  // The block data should be a constant (zero) and the rest of the plane
+  // trend is covered in the plane data.
+  aom_flat_block_finder_extract_block(&flat_block_finder,
+                                      (uint8_t *)&this->data_[0], w, h, stride,
+                                      0, 0, &plane[0], &block[0]);
+  for (int y = 0; y < kBlockSize; ++y) {
+    for (int x = 0; x < kBlockSize; ++x) {
+      EXPECT_NEAR(0, block[y * kBlockSize + x], 1e-5);
+      EXPECT_NEAR((double)(this->data_[y * stride + x]) / normalization,
+                  plane[y * kBlockSize + x], 1e-5);
+    }
+  }
+
+  // The plane trend is a constant, and the block is a zero mean checkerboard.
+  aom_flat_block_finder_extract_block(&flat_block_finder,
+                                      (uint8_t *)&this->data_[0], w, h, stride,
+                                      kBlockSize, 0, &plane[0], &block[0]);
+  const int mid = 128 << shift;
+  for (int y = 0; y < kBlockSize; ++y) {
+    for (int x = 0; x < kBlockSize; ++x) {
+      EXPECT_NEAR(((double)this->data_[y * stride + x + kBlockSize] - mid) /
+                      normalization,
+                  block[y * kBlockSize + x], 1e-5);
+      EXPECT_NEAR(mid / normalization, plane[y * kBlockSize + x], 1e-5);
+    }
+  }
+  aom_flat_block_finder_free(&flat_block_finder);
+}
+
+TYPED_TEST_P(FlatBlockEstimatorTest, FindFlatBlocks) {
+  const int kBlockSize = 32;
+  aom_flat_block_finder_t flat_block_finder;
+  ASSERT_EQ(1, aom_flat_block_finder_init(&flat_block_finder, kBlockSize,
+                                          this->kBitDepth, this->kUseHighBD));
+
+  const int num_blocks_w = 8;
+  const int h = kBlockSize;
+  const int w = kBlockSize * num_blocks_w;
+  const int stride = w;
+  this->data_.resize(h * stride, 128);
+  std::vector<uint8_t> flat_blocks(num_blocks_w, 0);
+
+  const int shift = this->kBitDepth - 8;
+  for (int y = 0; y < kBlockSize; ++y) {
+    for (int x = 0; x < kBlockSize; ++x) {
+      // Block 0 (not flat): constant doesn't have enough variance to qualify
+      this->data_[y * stride + x + 0 * kBlockSize] = 128 << shift;
+
+      // Block 1 (not flat): too high of variance is hard to validate as flat
+      this->data_[y * stride + x + 1 * kBlockSize] =
+          ((uint8_t)(128 + randn(&this->random_, 5))) << shift;
+
+      // Block 2 (flat): slight checkerboard added to constant
+      const int check = (x % 2 + y % 2) % 2 ? -2 : 2;
+      this->data_[y * stride + x + 2 * kBlockSize] = (128 + check) << shift;
+
+      // Block 3 (flat): planar block with checkerboard pattern is also flat
+      this->data_[y * stride + x + 3 * kBlockSize] =
+          (y * 2 - x / 2 + 128 + check) << shift;
+
+      // Block 4 (flat): gaussian random with standard deviation 1.
+      this->data_[y * stride + x + 4 * kBlockSize] =
+          ((uint8_t)(randn(&this->random_, 1) + x + 128.0)) << shift;
+
+      // Block 5 (flat): gaussian random with standard deviation 2.
+      this->data_[y * stride + x + 5 * kBlockSize] =
+          ((uint8_t)(randn(&this->random_, 2) + y + 128.0)) << shift;
+
+      // Block 6 (not flat): too high of directional gradient.
+      const int strong_edge = x > kBlockSize / 2 ? 64 : 0;
+      this->data_[y * stride + x + 6 * kBlockSize] =
+          ((uint8_t)(randn(&this->random_, 1) + strong_edge + 128.0)) << shift;
+
+      // Block 7 (not flat): too high gradient.
+      const int big_check = ((x >> 2) % 2 + (y >> 2) % 2) % 2 ? -16 : 16;
+      this->data_[y * stride + x + 7 * kBlockSize] =
+          ((uint8_t)(randn(&this->random_, 1) + big_check + 128.0)) << shift;
+    }
+  }
+
+  EXPECT_EQ(4, aom_flat_block_finder_run(&flat_block_finder,
+                                         (uint8_t *)&this->data_[0], w, h,
+                                         stride, &flat_blocks[0]));
+
+  // First two blocks are not flat
+  EXPECT_EQ(0, flat_blocks[0]);
+  EXPECT_EQ(0, flat_blocks[1]);
+
+  // Next 4 blocks are flat.
+  EXPECT_EQ(255, flat_blocks[2]);
+  EXPECT_EQ(255, flat_blocks[3]);
+  EXPECT_EQ(255, flat_blocks[4]);
+  EXPECT_EQ(255, flat_blocks[5]);
+
+  // Last 2 are not flat by threshold
+  EXPECT_EQ(0, flat_blocks[6]);
+  EXPECT_EQ(0, flat_blocks[7]);
+
+  // Add the noise from non-flat block 1 to every block.
+  for (int y = 0; y < kBlockSize; ++y) {
+    for (int x = 0; x < kBlockSize * num_blocks_w; ++x) {
+      this->data_[y * stride + x] +=
+          (this->data_[y * stride + x % kBlockSize + kBlockSize] -
+           (128 << shift));
+    }
+  }
+  // Now the scored selection will pick the one that is most likely flat (block
+  // 0)
+  EXPECT_EQ(1, aom_flat_block_finder_run(&flat_block_finder,
+                                         (uint8_t *)&this->data_[0], w, h,
+                                         stride, &flat_blocks[0]));
+  EXPECT_EQ(1, flat_blocks[0]);
+  EXPECT_EQ(0, flat_blocks[1]);
+  EXPECT_EQ(0, flat_blocks[2]);
+  EXPECT_EQ(0, flat_blocks[3]);
+  EXPECT_EQ(0, flat_blocks[4]);
+  EXPECT_EQ(0, flat_blocks[5]);
+  EXPECT_EQ(0, flat_blocks[6]);
+  EXPECT_EQ(0, flat_blocks[7]);
+
+  aom_flat_block_finder_free(&flat_block_finder);
+}
+
+REGISTER_TYPED_TEST_SUITE_P(FlatBlockEstimatorTest, ExtractBlock,
+                            FindFlatBlocks);
+
+typedef ::testing::Types<BitDepthParams<uint8_t, 8, false>,   // lowbd
+                         BitDepthParams<uint16_t, 8, true>,   // lowbd in 16-bit
+                         BitDepthParams<uint16_t, 10, true>,  // highbd data
+                         BitDepthParams<uint16_t, 12, true> >
+    AllBitDepthParams;
+INSTANTIATE_TYPED_TEST_SUITE_P(FlatBlockInstatiation, FlatBlockEstimatorTest,
+                               AllBitDepthParams);
+
+template <typename T>
+class NoiseModelUpdateTest : public ::testing::Test, public T {
+ public:
+  static const int kWidth = 128;
+  static const int kHeight = 128;
+  static const int kBlockSize = 16;
+  static const int kNumBlocksX = kWidth / kBlockSize;
+  static const int kNumBlocksY = kHeight / kBlockSize;
+
+  void SetUp() override {
+    const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
+                                              T::kBitDepth, T::kUseHighBD };
+    ASSERT_TRUE(aom_noise_model_init(&model_, params));
+
+    random_.Reset(100171);
+
+    data_.resize(kWidth * kHeight * 3);
+    denoised_.resize(kWidth * kHeight * 3);
+    noise_.resize(kWidth * kHeight * 3);
+    renoise_.resize(kWidth * kHeight);
+    flat_blocks_.resize(kNumBlocksX * kNumBlocksY);
+
+    for (int c = 0, offset = 0; c < 3; ++c, offset += kWidth * kHeight) {
+      data_ptr_[c] = &data_[offset];
+      noise_ptr_[c] = &noise_[offset];
+      denoised_ptr_[c] = &denoised_[offset];
+      strides_[c] = kWidth;
+
+      data_ptr_raw_[c] = (uint8_t *)&data_[offset];
+      denoised_ptr_raw_[c] = (uint8_t *)&denoised_[offset];
+    }
+    chroma_sub_[0] = 0;
+    chroma_sub_[1] = 0;
+  }
+
+  int NoiseModelUpdate(int block_size = kBlockSize) {
+    return aom_noise_model_update(&model_, data_ptr_raw_, denoised_ptr_raw_,
+                                  kWidth, kHeight, strides_, chroma_sub_,
+                                  &flat_blocks_[0], block_size);
+  }
+
+  void TearDown() override { aom_noise_model_free(&model_); }
+
+ protected:
+  aom_noise_model_t model_;
+  std::vector<typename T::data_type_t> data_;
+  std::vector<typename T::data_type_t> denoised_;
+
+  std::vector<double> noise_;
+  std::vector<double> renoise_;
+  std::vector<uint8_t> flat_blocks_;
+
+  typename T::data_type_t *data_ptr_[3];
+  typename T::data_type_t *denoised_ptr_[3];
+
+  double *noise_ptr_[3];
+  int strides_[3];
+  int chroma_sub_[2];
+  libaom_test::ACMRandom random_;
+
+ private:
+  uint8_t *data_ptr_raw_[3];
+  uint8_t *denoised_ptr_raw_[3];
+};
+
+TYPED_TEST_SUITE_P(NoiseModelUpdateTest);
+
+TYPED_TEST_P(NoiseModelUpdateTest, UpdateFailsNoFlatBlocks) {
+  EXPECT_EQ(AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS,
+            this->NoiseModelUpdate());
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForZeroNoiseAllFlat) {
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+  this->denoised_.assign(this->denoised_.size(), 128);
+  this->data_.assign(this->denoised_.size(), 128);
+  EXPECT_EQ(AOM_NOISE_STATUS_INTERNAL_ERROR, this->NoiseModelUpdate());
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest, UpdateFailsBlockSizeTooSmall) {
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+  this->denoised_.assign(this->denoised_.size(), 128);
+  this->data_.assign(this->denoised_.size(), 128);
+  EXPECT_EQ(AOM_NOISE_STATUS_INVALID_ARGUMENT,
+            this->NoiseModelUpdate(6 /* block_size=6 is too small*/));
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForWhiteRandomNoise) {
+  aom_noise_model_t &model = this->model_;
+  const int width = this->kWidth;
+  const int height = this->kHeight;
+
+  const int shift = this->kBitDepth - 8;
+  for (int y = 0; y < height; ++y) {
+    for (int x = 0; x < width; ++x) {
+      this->data_ptr_[0][y * width + x] = int(64 + y + randn(&this->random_, 1))
+                                          << shift;
+      this->denoised_ptr_[0][y * width + x] = (64 + y) << shift;
+      // Make the chroma planes completely correlated with the Y plane
+      for (int c = 1; c < 3; ++c) {
+        this->data_ptr_[c][y * width + x] = this->data_ptr_[0][y * width + x];
+        this->denoised_ptr_[c][y * width + x] =
+            this->denoised_ptr_[0][y * width + x];
+      }
+    }
+  }
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+  EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
+
+  const double kCoeffEps = 0.075;
+  const int n = model.n;
+  for (int c = 0; c < 3; ++c) {
+    for (int i = 0; i < n; ++i) {
+      EXPECT_NEAR(0, model.latest_state[c].eqns.x[i], kCoeffEps);
+      EXPECT_NEAR(0, model.combined_state[c].eqns.x[i], kCoeffEps);
+    }
+    // The second and third channels are highly correlated with the first.
+    if (c > 0) {
+      ASSERT_EQ(n + 1, model.latest_state[c].eqns.n);
+      ASSERT_EQ(n + 1, model.combined_state[c].eqns.n);
+
+      EXPECT_NEAR(1, model.latest_state[c].eqns.x[n], kCoeffEps);
+      EXPECT_NEAR(1, model.combined_state[c].eqns.x[n], kCoeffEps);
+    }
+  }
+
+  // The fitted noise strength should be close to the standard deviation
+  // for all intensity bins.
+  const double kStdEps = 0.1;
+  const double normalize = 1 << shift;
+
+  for (int i = 0; i < model.latest_state[0].strength_solver.eqns.n; ++i) {
+    EXPECT_NEAR(1.0,
+                model.latest_state[0].strength_solver.eqns.x[i] / normalize,
+                kStdEps);
+    EXPECT_NEAR(1.0,
+                model.combined_state[0].strength_solver.eqns.x[i] / normalize,
+                kStdEps);
+  }
+
+  aom_noise_strength_lut_t lut;
+  aom_noise_strength_solver_fit_piecewise(
+      &model.latest_state[0].strength_solver, -1, &lut);
+  ASSERT_EQ(2, lut.num_points);
+  EXPECT_NEAR(0.0, lut.points[0][0], 1e-5);
+  EXPECT_NEAR(1.0, lut.points[0][1] / normalize, kStdEps);
+  EXPECT_NEAR((1 << this->kBitDepth) - 1, lut.points[1][0], 1e-5);
+  EXPECT_NEAR(1.0, lut.points[1][1] / normalize, kStdEps);
+  aom_noise_strength_lut_free(&lut);
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForScaledWhiteNoise) {
+  aom_noise_model_t &model = this->model_;
+  const int width = this->kWidth;
+  const int height = this->kHeight;
+
+  const double kCoeffEps = 0.055;
+  const double kLowStd = 1;
+  const double kHighStd = 4;
+  const int shift = this->kBitDepth - 8;
+  for (int y = 0; y < height; ++y) {
+    for (int x = 0; x < width; ++x) {
+      for (int c = 0; c < 3; ++c) {
+        // The image data is bimodal:
+        // Bottom half has low intensity and low noise strength
+        // Top half has high intensity and high noise strength
+        const int avg = (y < height / 2) ? 4 : 245;
+        const double std = (y < height / 2) ? kLowStd : kHighStd;
+        this->data_ptr_[c][y * width + x] =
+            ((uint8_t)std::min((int)255,
+                               (int)(2 + avg + randn(&this->random_, std))))
+            << shift;
+        this->denoised_ptr_[c][y * width + x] = (2 + avg) << shift;
+      }
+    }
+  }
+  // Label all blocks as flat for the update
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+  EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
+
+  const int n = model.n;
+  // The noise is uncorrelated spatially and with the y channel.
+  // All coefficients should be reasonably close to zero.
+  for (int c = 0; c < 3; ++c) {
+    for (int i = 0; i < n; ++i) {
+      EXPECT_NEAR(0, model.latest_state[c].eqns.x[i], kCoeffEps);
+      EXPECT_NEAR(0, model.combined_state[c].eqns.x[i], kCoeffEps);
+    }
+    if (c > 0) {
+      ASSERT_EQ(n + 1, model.latest_state[c].eqns.n);
+      ASSERT_EQ(n + 1, model.combined_state[c].eqns.n);
+
+      // The correlation to the y channel should be low (near zero)
+      EXPECT_NEAR(0, model.latest_state[c].eqns.x[n], kCoeffEps);
+      EXPECT_NEAR(0, model.combined_state[c].eqns.x[n], kCoeffEps);
+    }
+  }
+
+  // Noise strength should vary between kLowStd and kHighStd.
+  const double kStdEps = 0.15;
+  // We have to normalize fitted standard deviation based on bit depth.
+  const double normalize = (1 << shift);
+
+  ASSERT_EQ(20, model.latest_state[0].strength_solver.eqns.n);
+  for (int i = 0; i < model.latest_state[0].strength_solver.eqns.n; ++i) {
+    const double a = i / 19.0;
+    const double expected = (kLowStd * (1.0 - a) + kHighStd * a);
+    EXPECT_NEAR(expected,
+                model.latest_state[0].strength_solver.eqns.x[i] / normalize,
+                kStdEps);
+    EXPECT_NEAR(expected,
+                model.combined_state[0].strength_solver.eqns.x[i] / normalize,
+                kStdEps);
+  }
+
+  // If we fit a piecewise linear model, there should be two points:
+  // one near kLowStd at 0, and the other near kHighStd and 255.
+  aom_noise_strength_lut_t lut;
+  aom_noise_strength_solver_fit_piecewise(
+      &model.latest_state[0].strength_solver, 2, &lut);
+  ASSERT_EQ(2, lut.num_points);
+  EXPECT_NEAR(0, lut.points[0][0], 1e-4);
+  EXPECT_NEAR(kLowStd, lut.points[0][1] / normalize, kStdEps);
+  EXPECT_NEAR((1 << this->kBitDepth) - 1, lut.points[1][0], 1e-5);
+  EXPECT_NEAR(kHighStd, lut.points[1][1] / normalize, kStdEps);
+  aom_noise_strength_lut_free(&lut);
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForCorrelatedNoise) {
+  aom_noise_model_t &model = this->model_;
+  const int width = this->kWidth;
+  const int height = this->kHeight;
+  const int kNumCoeffs = 24;
+  const double kStd = 4;
+  const double kStdEps = 0.3;
+  const double kCoeffEps = 0.065;
+  // Use different coefficients for each channel
+  const double kCoeffs[3][24] = {
+    { 0.02884, -0.03356, 0.00633,  0.01757,  0.02849,  -0.04620,
+      0.02833, -0.07178, 0.07076,  -0.11603, -0.10413, -0.16571,
+      0.05158, -0.07969, 0.02640,  -0.07191, 0.02530,  0.41968,
+      0.21450, -0.00702, -0.01401, -0.03676, -0.08713, 0.44196 },
+    { 0.00269, -0.01291, -0.01513, 0.07234,  0.03208,   0.00477,
+      0.00226, -0.00254, 0.03533,  0.12841,  -0.25970,  -0.06336,
+      0.05238, -0.00845, -0.03118, 0.09043,  -0.36558,  0.48903,
+      0.00595, -0.11938, 0.02106,  0.095956, -0.350139, 0.59305 },
+    { -0.00643, -0.01080, -0.01466, 0.06951, 0.03707,  -0.00482,
+      0.00817,  -0.00909, 0.02949,  0.12181, -0.25210, -0.07886,
+      0.06083,  -0.01210, -0.03108, 0.08944, -0.35875, 0.49150,
+      0.00415,  -0.12905, 0.02870,  0.09740, -0.34610, 0.58824 },
+  };
+
+  ASSERT_EQ(model.n, kNumCoeffs);
+  this->chroma_sub_[0] = this->chroma_sub_[1] = 1;
+
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+
+  // Add different noise onto each plane
+  const int shift = this->kBitDepth - 8;
+  for (int c = 0; c < 3; ++c) {
+    noise_synth(&this->random_, model.params.lag, model.n, model.coords,
+                kCoeffs[c], this->noise_ptr_[c], width, height);
+    const int x_shift = c > 0 ? this->chroma_sub_[0] : 0;
+    const int y_shift = c > 0 ? this->chroma_sub_[1] : 0;
+    for (int y = 0; y < (height >> y_shift); ++y) {
+      for (int x = 0; x < (width >> x_shift); ++x) {
+        const uint8_t value = 64 + x / 2 + y / 4;
+        this->data_ptr_[c][y * width + x] =
+            (uint8_t(value + this->noise_ptr_[c][y * width + x] * kStd))
+            << shift;
+        this->denoised_ptr_[c][y * width + x] = value << shift;
+      }
+    }
+  }
+  EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
+
+  // For the Y plane, the solved coefficients should be close to the original
+  const int n = model.n;
+  for (int c = 0; c < 3; ++c) {
+    for (int i = 0; i < n; ++i) {
+      EXPECT_NEAR(kCoeffs[c][i], model.latest_state[c].eqns.x[i], kCoeffEps);
+      EXPECT_NEAR(kCoeffs[c][i], model.combined_state[c].eqns.x[i], kCoeffEps);
+    }
+    // The chroma planes should be uncorrelated with the luma plane
+    if (c > 0) {
+      EXPECT_NEAR(0, model.latest_state[c].eqns.x[n], kCoeffEps);
+      EXPECT_NEAR(0, model.combined_state[c].eqns.x[n], kCoeffEps);
+    }
+    // Correlation between the coefficient vector and the fitted coefficients
+    // should be close to 1.
+    EXPECT_LT(0.98, aom_normalized_cross_correlation(
+                        model.latest_state[c].eqns.x, kCoeffs[c], kNumCoeffs));
+
+    noise_synth(&this->random_, model.params.lag, model.n, model.coords,
+                model.latest_state[c].eqns.x, &this->renoise_[0], width,
+                height);
+
+    EXPECT_TRUE(aom_noise_data_validate(&this->renoise_[0], width, height));
+  }
+
+  // Check fitted noise strength
+  const double normalize = 1 << shift;
+  for (int c = 0; c < 3; ++c) {
+    for (int i = 0; i < model.latest_state[c].strength_solver.eqns.n; ++i) {
+      EXPECT_NEAR(kStd,
+                  model.latest_state[c].strength_solver.eqns.x[i] / normalize,
+                  kStdEps);
+    }
+  }
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest,
+             NoiseStrengthChangeSignalsDifferentNoiseType) {
+  aom_noise_model_t &model = this->model_;
+  const int width = this->kWidth;
+  const int height = this->kHeight;
+  const int block_size = this->kBlockSize;
+  // Create a gradient image with std = 2 uncorrelated noise
+  const double kStd = 2;
+  const int shift = this->kBitDepth - 8;
+
+  for (int i = 0; i < width * height; ++i) {
+    const uint8_t val = (i % width) < width / 2 ? 64 : 192;
+    for (int c = 0; c < 3; ++c) {
+      this->noise_ptr_[c][i] = randn(&this->random_, 1);
+      this->data_ptr_[c][i] = ((uint8_t)(this->noise_ptr_[c][i] * kStd + val))
+                              << shift;
+      this->denoised_ptr_[c][i] = val << shift;
+    }
+  }
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+  EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
+
+  const int kNumBlocks = width * height / block_size / block_size;
+  EXPECT_EQ(kNumBlocks, model.latest_state[0].strength_solver.num_equations);
+  EXPECT_EQ(kNumBlocks, model.latest_state[1].strength_solver.num_equations);
+  EXPECT_EQ(kNumBlocks, model.latest_state[2].strength_solver.num_equations);
+  EXPECT_EQ(kNumBlocks, model.combined_state[0].strength_solver.num_equations);
+  EXPECT_EQ(kNumBlocks, model.combined_state[1].strength_solver.num_equations);
+  EXPECT_EQ(kNumBlocks, model.combined_state[2].strength_solver.num_equations);
+
+  // Bump up noise by an insignificant amount
+  for (int i = 0; i < width * height; ++i) {
+    const uint8_t val = (i % width) < width / 2 ? 64 : 192;
+    this->data_ptr_[0][i] =
+        ((uint8_t)(this->noise_ptr_[0][i] * (kStd + 0.085) + val)) << shift;
+  }
+  EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
+
+  const double kARGainTolerance = 0.02;
+  for (int c = 0; c < 3; ++c) {
+    EXPECT_EQ(kNumBlocks, model.latest_state[c].strength_solver.num_equations);
+    EXPECT_EQ(15250, model.latest_state[c].num_observations);
+    EXPECT_NEAR(1, model.latest_state[c].ar_gain, kARGainTolerance);
+
+    EXPECT_EQ(2 * kNumBlocks,
+              model.combined_state[c].strength_solver.num_equations);
+    EXPECT_EQ(2 * 15250, model.combined_state[c].num_observations);
+    EXPECT_NEAR(1, model.combined_state[c].ar_gain, kARGainTolerance);
+  }
+
+  // Bump up the noise strength on half the image for one channel by a
+  // significant amount.
+  for (int i = 0; i < width * height; ++i) {
+    const uint8_t val = (i % width) < width / 2 ? 64 : 128;
+    if (i % width < width / 2) {
+      this->data_ptr_[0][i] =
+          ((uint8_t)(randn(&this->random_, kStd + 0.5) + val)) << shift;
+    }
+  }
+  EXPECT_EQ(AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE, this->NoiseModelUpdate());
+
+  // Since we didn't update the combined state, it should still be at 2 *
+  // num_blocks
+  EXPECT_EQ(kNumBlocks, model.latest_state[0].strength_solver.num_equations);
+  EXPECT_EQ(2 * kNumBlocks,
+            model.combined_state[0].strength_solver.num_equations);
+
+  // In normal operation, the "latest" estimate can be saved to the "combined"
+  // state for continued updates.
+  aom_noise_model_save_latest(&model);
+  for (int c = 0; c < 3; ++c) {
+    EXPECT_EQ(kNumBlocks, model.latest_state[c].strength_solver.num_equations);
+    EXPECT_EQ(15250, model.latest_state[c].num_observations);
+    EXPECT_NEAR(1, model.latest_state[c].ar_gain, kARGainTolerance);
+
+    EXPECT_EQ(kNumBlocks,
+              model.combined_state[c].strength_solver.num_equations);
+    EXPECT_EQ(15250, model.combined_state[c].num_observations);
+    EXPECT_NEAR(1, model.combined_state[c].ar_gain, kARGainTolerance);
+  }
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest, NoiseCoeffsSignalsDifferentNoiseType) {
+  aom_noise_model_t &model = this->model_;
+  const int width = this->kWidth;
+  const int height = this->kHeight;
+  const double kCoeffs[2][24] = {
+    { 0.02884, -0.03356, 0.00633,  0.01757,  0.02849,  -0.04620,
+      0.02833, -0.07178, 0.07076,  -0.11603, -0.10413, -0.16571,
+      0.05158, -0.07969, 0.02640,  -0.07191, 0.02530,  0.41968,
+      0.21450, -0.00702, -0.01401, -0.03676, -0.08713, 0.44196 },
+    { 0.00269, -0.01291, -0.01513, 0.07234,  0.03208,   0.00477,
+      0.00226, -0.00254, 0.03533,  0.12841,  -0.25970,  -0.06336,
+      0.05238, -0.00845, -0.03118, 0.09043,  -0.36558,  0.48903,
+      0.00595, -0.11938, 0.02106,  0.095956, -0.350139, 0.59305 }
+  };
+
+  noise_synth(&this->random_, model.params.lag, model.n, model.coords,
+              kCoeffs[0], this->noise_ptr_[0], width, height);
+  for (int i = 0; i < width * height; ++i) {
+    this->data_ptr_[0][i] = (uint8_t)(128 + this->noise_ptr_[0][i]);
+  }
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+  EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
+
+  // Now try with the second set of AR coefficients
+  noise_synth(&this->random_, model.params.lag, model.n, model.coords,
+              kCoeffs[1], this->noise_ptr_[0], width, height);
+  for (int i = 0; i < width * height; ++i) {
+    this->data_ptr_[0][i] = (uint8_t)(128 + this->noise_ptr_[0][i]);
+  }
+  EXPECT_EQ(AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE, this->NoiseModelUpdate());
+}
+REGISTER_TYPED_TEST_SUITE_P(NoiseModelUpdateTest, UpdateFailsNoFlatBlocks,
+                            UpdateSuccessForZeroNoiseAllFlat,
+                            UpdateFailsBlockSizeTooSmall,
+                            UpdateSuccessForWhiteRandomNoise,
+                            UpdateSuccessForScaledWhiteNoise,
+                            UpdateSuccessForCorrelatedNoise,
+                            NoiseStrengthChangeSignalsDifferentNoiseType,
+                            NoiseCoeffsSignalsDifferentNoiseType);
+
+INSTANTIATE_TYPED_TEST_SUITE_P(NoiseModelUpdateTestInstatiation,
+                               NoiseModelUpdateTest, AllBitDepthParams);
+
+TEST(NoiseModelGetGrainParameters, TestLagSize) {
+  aom_film_grain_t film_grain;
+  for (int lag = 1; lag <= 3; ++lag) {
+    aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 };
+    aom_noise_model_t model;
+    EXPECT_TRUE(aom_noise_model_init(&model, params));
+    EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain));
+    EXPECT_EQ(lag, film_grain.ar_coeff_lag);
+    aom_noise_model_free(&model);
+  }
+
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 4, 8, 0 };
+  aom_noise_model_t model;
+  EXPECT_TRUE(aom_noise_model_init(&model, params));
+  EXPECT_FALSE(aom_noise_model_get_grain_parameters(&model, &film_grain));
+  aom_noise_model_free(&model);
+}
+
+TEST(NoiseModelGetGrainParameters, TestARCoeffShiftBounds) {
+  struct TestCase {
+    double max_input_value;
+    int expected_ar_coeff_shift;
+    int expected_value;
+  };
+  const int lag = 1;
+  const int kNumTestCases = 19;
+  const TestCase test_cases[] = {
+    // Test cases for ar_coeff_shift = 9
+    { 0, 9, 0 },
+    { 0.125, 9, 64 },
+    { -0.125, 9, -64 },
+    { 0.2499, 9, 127 },
+    { -0.25, 9, -128 },
+    // Test cases for ar_coeff_shift = 8
+    { 0.25, 8, 64 },
+    { -0.2501, 8, -64 },
+    { 0.499, 8, 127 },
+    { -0.5, 8, -128 },
+    // Test cases for ar_coeff_shift = 7
+    { 0.5, 7, 64 },
+    { -0.5001, 7, -64 },
+    { 0.999, 7, 127 },
+    { -1, 7, -128 },
+    // Test cases for ar_coeff_shift = 6
+    { 1.0, 6, 64 },
+    { -1.0001, 6, -64 },
+    { 2.0, 6, 127 },
+    { -2.0, 6, -128 },
+    { 4, 6, 127 },
+    { -4, 6, -128 },
+  };
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 };
+  aom_noise_model_t model;
+  EXPECT_TRUE(aom_noise_model_init(&model, params));
+
+  for (int i = 0; i < kNumTestCases; ++i) {
+    const TestCase &test_case = test_cases[i];
+    model.combined_state[0].eqns.x[0] = test_case.max_input_value;
+
+    aom_film_grain_t film_grain;
+    EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain));
+    EXPECT_EQ(1, film_grain.ar_coeff_lag);
+    EXPECT_EQ(test_case.expected_ar_coeff_shift, film_grain.ar_coeff_shift);
+    EXPECT_EQ(test_case.expected_value, film_grain.ar_coeffs_y[0]);
+  }
+  aom_noise_model_free(&model);
+}
+
+TEST(NoiseModelGetGrainParameters, TestNoiseStrengthShiftBounds) {
+  struct TestCase {
+    double max_input_value;
+    int expected_scaling_shift;
+    int expected_value;
+  };
+  const int kNumTestCases = 10;
+  const TestCase test_cases[] = {
+    { 0, 11, 0 },      { 1, 11, 64 },     { 2, 11, 128 }, { 3.99, 11, 255 },
+    { 4, 10, 128 },    { 7.99, 10, 255 }, { 8, 9, 128 },  { 16, 8, 128 },
+    { 31.99, 8, 255 }, { 64, 8, 255 },  // clipped
+  };
+  const int lag = 1;
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 };
+  aom_noise_model_t model;
+  EXPECT_TRUE(aom_noise_model_init(&model, params));
+
+  for (int i = 0; i < kNumTestCases; ++i) {
+    const TestCase &test_case = test_cases[i];
+    aom_equation_system_t &eqns = model.combined_state[0].strength_solver.eqns;
+    // Set the fitted scale parameters to be a constant value.
+    for (int j = 0; j < eqns.n; ++j) {
+      eqns.x[j] = test_case.max_input_value;
+    }
+    aom_film_grain_t film_grain;
+    EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain));
+    // We expect a single constant segemnt
+    EXPECT_EQ(test_case.expected_scaling_shift, film_grain.scaling_shift);
+    EXPECT_EQ(test_case.expected_value, film_grain.scaling_points_y[0][1]);
+    EXPECT_EQ(test_case.expected_value, film_grain.scaling_points_y[1][1]);
+  }
+  aom_noise_model_free(&model);
+}
+
+// The AR coefficients are the same inputs used to generate "Test 2" in the test
+// vectors
+TEST(NoiseModelGetGrainParameters, GetGrainParametersReal) {
+  const double kInputCoeffsY[] = { 0.0315,  0.0073,  0.0218,  0.00235, 0.00511,
+                                   -0.0222, 0.0627,  -0.022,  0.05575, -0.1816,
+                                   0.0107,  -0.1966, 0.00065, -0.0809, 0.04934,
+                                   -0.1349, -0.0352, 0.41772, 0.27973, 0.04207,
+                                   -0.0429, -0.1372, 0.06193, 0.52032 };
+  const double kInputCoeffsCB[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,
+                                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5 };
+  const double kInputCoeffsCR[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+                                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.5 };
+  const int kExpectedARCoeffsY[] = { 4,  1,   3,  0,   1,  -3,  8, -3,
+                                     7,  -23, 1,  -25, 0,  -10, 6, -17,
+                                     -5, 53,  36, 5,   -5, -18, 8, 67 };
+  const int kExpectedARCoeffsCB[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84 };
+  const int kExpectedARCoeffsCR[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+                                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -126 };
+  // Scaling function is initialized analytically with a sqrt function.
+  const int kNumScalingPointsY = 12;
+  const int kExpectedScalingPointsY[][2] = {
+    { 0, 0 },     { 13, 44 },   { 27, 62 },   { 40, 76 },
+    { 54, 88 },   { 67, 98 },   { 94, 117 },  { 121, 132 },
+    { 148, 146 }, { 174, 159 }, { 201, 171 }, { 255, 192 },
+  };
+
+  const int lag = 3;
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 };
+  aom_noise_model_t model;
+  EXPECT_TRUE(aom_noise_model_init(&model, params));
+
+  // Setup the AR coeffs
+  memcpy(model.combined_state[0].eqns.x, kInputCoeffsY, sizeof(kInputCoeffsY));
+  memcpy(model.combined_state[1].eqns.x, kInputCoeffsCB,
+         sizeof(kInputCoeffsCB));
+  memcpy(model.combined_state[2].eqns.x, kInputCoeffsCR,
+         sizeof(kInputCoeffsCR));
+  for (int i = 0; i < model.combined_state[0].strength_solver.num_bins; ++i) {
+    const double x =
+        ((double)i) / (model.combined_state[0].strength_solver.num_bins - 1.0);
+    model.combined_state[0].strength_solver.eqns.x[i] = 6 * sqrt(x);
+    model.combined_state[1].strength_solver.eqns.x[i] = 3;
+    model.combined_state[2].strength_solver.eqns.x[i] = 2;
+
+    // Inject some observations into the strength solver, as during film grain
+    // parameter extraction an estimate of the average strength will be used to
+    // adjust correlation.
+    const int n = model.combined_state[0].strength_solver.num_bins;
+    for (int j = 0; j < model.combined_state[0].strength_solver.num_bins; ++j) {
+      model.combined_state[0].strength_solver.eqns.A[i * n + j] = 1;
+      model.combined_state[1].strength_solver.eqns.A[i * n + j] = 1;
+      model.combined_state[2].strength_solver.eqns.A[i * n + j] = 1;
+    }
+  }
+
+  aom_film_grain_t film_grain;
+  EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain));
+  EXPECT_EQ(lag, film_grain.ar_coeff_lag);
+  EXPECT_EQ(3, film_grain.ar_coeff_lag);
+  EXPECT_EQ(7, film_grain.ar_coeff_shift);
+  EXPECT_EQ(10, film_grain.scaling_shift);
+  EXPECT_EQ(kNumScalingPointsY, film_grain.num_y_points);
+  EXPECT_EQ(1, film_grain.update_parameters);
+  EXPECT_EQ(1, film_grain.apply_grain);
+
+  const int kNumARCoeffs = 24;
+  for (int i = 0; i < kNumARCoeffs; ++i) {
+    EXPECT_EQ(kExpectedARCoeffsY[i], film_grain.ar_coeffs_y[i]);
+  }
+  for (int i = 0; i < kNumARCoeffs + 1; ++i) {
+    EXPECT_EQ(kExpectedARCoeffsCB[i], film_grain.ar_coeffs_cb[i]);
+  }
+  for (int i = 0; i < kNumARCoeffs + 1; ++i) {
+    EXPECT_EQ(kExpectedARCoeffsCR[i], film_grain.ar_coeffs_cr[i]);
+  }
+  for (int i = 0; i < kNumScalingPointsY; ++i) {
+    EXPECT_EQ(kExpectedScalingPointsY[i][0], film_grain.scaling_points_y[i][0]);
+    EXPECT_EQ(kExpectedScalingPointsY[i][1], film_grain.scaling_points_y[i][1]);
+  }
+
+  // CB strength should just be a piecewise segment
+  EXPECT_EQ(2, film_grain.num_cb_points);
+  EXPECT_EQ(0, film_grain.scaling_points_cb[0][0]);
+  EXPECT_EQ(255, film_grain.scaling_points_cb[1][0]);
+  EXPECT_EQ(96, film_grain.scaling_points_cb[0][1]);
+  EXPECT_EQ(96, film_grain.scaling_points_cb[1][1]);
+
+  // CR strength should just be a piecewise segment
+  EXPECT_EQ(2, film_grain.num_cr_points);
+  EXPECT_EQ(0, film_grain.scaling_points_cr[0][0]);
+  EXPECT_EQ(255, film_grain.scaling_points_cr[1][0]);
+  EXPECT_EQ(64, film_grain.scaling_points_cr[0][1]);
+  EXPECT_EQ(64, film_grain.scaling_points_cr[1][1]);
+
+  EXPECT_EQ(128, film_grain.cb_mult);
+  EXPECT_EQ(192, film_grain.cb_luma_mult);
+  EXPECT_EQ(256, film_grain.cb_offset);
+  EXPECT_EQ(128, film_grain.cr_mult);
+  EXPECT_EQ(192, film_grain.cr_luma_mult);
+  EXPECT_EQ(256, film_grain.cr_offset);
+  EXPECT_EQ(0, film_grain.chroma_scaling_from_luma);
+  EXPECT_EQ(0, film_grain.grain_scale_shift);
+
+  aom_noise_model_free(&model);
+}
+
+template <typename T>
+class WienerDenoiseTest : public ::testing::Test, public T {
+ public:
+  static void SetUpTestSuite() { aom_dsp_rtcd(); }
+
+ protected:
+  void SetUp() override {
+    static const float kNoiseLevel = 5.f;
+    static const float kStd = 4.0;
+    static const double kMaxValue = (1 << T::kBitDepth) - 1;
+
+    chroma_sub_[0] = 1;
+    chroma_sub_[1] = 1;
+    stride_[0] = kWidth;
+    stride_[1] = kWidth / 2;
+    stride_[2] = kWidth / 2;
+    for (int k = 0; k < 3; ++k) {
+      data_[k].resize(kWidth * kHeight);
+      denoised_[k].resize(kWidth * kHeight);
+      noise_psd_[k].resize(kBlockSize * kBlockSize);
+    }
+
+    const double kCoeffsY[] = { 0.0406, -0.116, -0.078, -0.152, 0.0033, -0.093,
+                                0.048,  0.404,  0.2353, -0.035, -0.093, 0.441 };
+    const int kCoords[12][2] = {
+      { -2, -2 }, { -1, -2 }, { 0, -2 }, { 1, -2 }, { 2, -2 }, { -2, -1 },
+      { -1, -1 }, { 0, -1 },  { 1, -1 }, { 2, -1 }, { -2, 0 }, { -1, 0 }
+    };
+    const int kLag = 2;
+    const int kLength = 12;
+    libaom_test::ACMRandom random;
+    std::vector<double> noise(kWidth * kHeight);
+    noise_synth(&random, kLag, kLength, kCoords, kCoeffsY, &noise[0], kWidth,
+                kHeight);
+    noise_psd_[0] = get_noise_psd(&noise[0], kWidth, kHeight, kBlockSize);
+    for (int i = 0; i < kBlockSize * kBlockSize; ++i) {
+      noise_psd_[0][i] = (float)(noise_psd_[0][i] * kStd * kStd * kScaleNoise *
+                                 kScaleNoise / (kMaxValue * kMaxValue));
+    }
+
+    float psd_value =
+        aom_noise_psd_get_default_value(kBlockSizeChroma, kNoiseLevel);
+    for (int i = 0; i < kBlockSizeChroma * kBlockSizeChroma; ++i) {
+      noise_psd_[1][i] = psd_value;
+      noise_psd_[2][i] = psd_value;
+    }
+    for (int y = 0; y < kHeight; ++y) {
+      for (int x = 0; x < kWidth; ++x) {
+        data_[0][y * stride_[0] + x] = (typename T::data_type_t)fclamp(
+            (x + noise[y * stride_[0] + x] * kStd) * kScaleNoise, 0, kMaxValue);
+      }
+    }
+
+    for (int c = 1; c < 3; ++c) {
+      for (int y = 0; y < (kHeight >> 1); ++y) {
+        for (int x = 0; x < (kWidth >> 1); ++x) {
+          data_[c][y * stride_[c] + x] = (typename T::data_type_t)fclamp(
+              (x + randn(&random, kStd)) * kScaleNoise, 0, kMaxValue);
+        }
+      }
+    }
+    for (int k = 0; k < 3; ++k) {
+      noise_psd_ptrs_[k] = &noise_psd_[k][0];
+    }
+  }
+  static const int kBlockSize = 32;
+  static const int kBlockSizeChroma = 16;
+  static const int kWidth = 256;
+  static const int kHeight = 256;
+  static const int kScaleNoise = 1 << (T::kBitDepth - 8);
+
+  std::vector<typename T::data_type_t> data_[3];
+  std::vector<typename T::data_type_t> denoised_[3];
+  std::vector<float> noise_psd_[3];
+  int chroma_sub_[2];
+  float *noise_psd_ptrs_[3];
+  int stride_[3];
+};
+
+TYPED_TEST_SUITE_P(WienerDenoiseTest);
+
+TYPED_TEST_P(WienerDenoiseTest, InvalidBlockSize) {
+  const uint8_t *const data_ptrs[3] = {
+    reinterpret_cast<uint8_t *>(&this->data_[0][0]),
+    reinterpret_cast<uint8_t *>(&this->data_[1][0]),
+    reinterpret_cast<uint8_t *>(&this->data_[2][0]),
+  };
+  uint8_t *denoised_ptrs[3] = {
+    reinterpret_cast<uint8_t *>(&this->denoised_[0][0]),
+    reinterpret_cast<uint8_t *>(&this->denoised_[1][0]),
+    reinterpret_cast<uint8_t *>(&this->denoised_[2][0]),
+  };
+  EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
+                                     this->kHeight, this->stride_,
+                                     this->chroma_sub_, this->noise_psd_ptrs_,
+                                     18, this->kBitDepth, this->kUseHighBD));
+  EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
+                                     this->kHeight, this->stride_,
+                                     this->chroma_sub_, this->noise_psd_ptrs_,
+                                     48, this->kBitDepth, this->kUseHighBD));
+  EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
+                                     this->kHeight, this->stride_,
+                                     this->chroma_sub_, this->noise_psd_ptrs_,
+                                     64, this->kBitDepth, this->kUseHighBD));
+}
+
+TYPED_TEST_P(WienerDenoiseTest, InvalidChromaSubsampling) {
+  const uint8_t *const data_ptrs[3] = {
+    reinterpret_cast<uint8_t *>(&this->data_[0][0]),
+    reinterpret_cast<uint8_t *>(&this->data_[1][0]),
+    reinterpret_cast<uint8_t *>(&this->data_[2][0]),
+  };
+  uint8_t *denoised_ptrs[3] = {
+    reinterpret_cast<uint8_t *>(&this->denoised_[0][0]),
+    reinterpret_cast<uint8_t *>(&this->denoised_[1][0]),
+    reinterpret_cast<uint8_t *>(&this->denoised_[2][0]),
+  };
+  int chroma_sub[2] = { 1, 0 };
+  EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
+                                     this->kHeight, this->stride_, chroma_sub,
+                                     this->noise_psd_ptrs_, 32, this->kBitDepth,
+                                     this->kUseHighBD));
+
+  chroma_sub[0] = 0;
+  chroma_sub[1] = 1;
+  EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
+                                     this->kHeight, this->stride_, chroma_sub,
+                                     this->noise_psd_ptrs_, 32, this->kBitDepth,
+                                     this->kUseHighBD));
+}
+
+TYPED_TEST_P(WienerDenoiseTest, GradientTest) {
+  const int width = this->kWidth;
+  const int height = this->kHeight;
+  const int block_size = this->kBlockSize;
+  const uint8_t *const data_ptrs[3] = {
+    reinterpret_cast<uint8_t *>(&this->data_[0][0]),
+    reinterpret_cast<uint8_t *>(&this->data_[1][0]),
+    reinterpret_cast<uint8_t *>(&this->data_[2][0]),
+  };
+  uint8_t *denoised_ptrs[3] = {
+    reinterpret_cast<uint8_t *>(&this->denoised_[0][0]),
+    reinterpret_cast<uint8_t *>(&this->denoised_[1][0]),
+    reinterpret_cast<uint8_t *>(&this->denoised_[2][0]),
+  };
+  const int ret = aom_wiener_denoise_2d(
+      data_ptrs, denoised_ptrs, width, height, this->stride_, this->chroma_sub_,
+      this->noise_psd_ptrs_, block_size, this->kBitDepth, this->kUseHighBD);
+  EXPECT_EQ(1, ret);
+
+  // Check the noise on the denoised image (from the analytical gradient)
+  // and make sure that it is less than what we added.
+  for (int c = 0; c < 3; ++c) {
+    std::vector<double> measured_noise(width * height);
+
+    double var = 0;
+    const int shift = (c > 0);
+    for (int x = 0; x < (width >> shift); ++x) {
+      for (int y = 0; y < (height >> shift); ++y) {
+        const double diff = this->denoised_[c][y * this->stride_[c] + x] -
+                            x * this->kScaleNoise;
+        var += diff * diff;
+        measured_noise[y * width + x] = diff;
+      }
+    }
+    var /= (width * height);
+    const double std = sqrt(std::max(0.0, var));
+    EXPECT_LE(std, 1.25f * this->kScaleNoise);
+    if (c == 0) {
+      std::vector<float> measured_psd =
+          get_noise_psd(&measured_noise[0], width, height, block_size);
+      std::vector<double> measured_psd_d(block_size * block_size);
+      std::vector<double> noise_psd_d(block_size * block_size);
+      std::copy(measured_psd.begin(), measured_psd.end(),
+                measured_psd_d.begin());
+      std::copy(this->noise_psd_[0].begin(), this->noise_psd_[0].end(),
+                noise_psd_d.begin());
+      EXPECT_LT(
+          aom_normalized_cross_correlation(&measured_psd_d[0], &noise_psd_d[0],
+                                           (int)(noise_psd_d.size())),
+          0.35);
+    }
+  }
+}
+
+REGISTER_TYPED_TEST_SUITE_P(WienerDenoiseTest, InvalidBlockSize,
+                            InvalidChromaSubsampling, GradientTest);
+
+INSTANTIATE_TYPED_TEST_SUITE_P(WienerDenoiseTestInstatiation, WienerDenoiseTest,
+                               AllBitDepthParams);
diff --git a/third_party/aom/test/obmc_sad_test.cc b/third_party/aom/test/obmc_sad_test.cc
new file mode 100644
index 0000000000..967b677666
--- /dev/null
+++ b/third_party/aom/test/obmc_sad_test.cc
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/function_equivalence_test.h"
+#include "test/register_state_check.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom/aom_integer.h"
+
+#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
+
+using libaom_test::FunctionEquivalenceTest;
+
+namespace {
+
+static const int kIterations = 1000;
+static const int kMaskMax = 64;
+
+typedef unsigned int (*ObmcSadF)(const uint8_t *pre, int pre_stride,
+                                 const int32_t *wsrc, const int32_t *mask);
+typedef libaom_test::FuncParam<ObmcSadF> TestFuncs;
+
+////////////////////////////////////////////////////////////////////////////////
+// 8 bit
+////////////////////////////////////////////////////////////////////////////////
+
+class ObmcSadTest : public FunctionEquivalenceTest<ObmcSadF> {};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ObmcSadTest);
+
+TEST_P(ObmcSadTest, RandomValues) {
+  DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    const int pre_stride = rng_(MAX_SB_SIZE + 1);
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = rng_.Rand8();
+      wsrc[i] = rng_.Rand8() * rng_(kMaskMax * kMaskMax + 1);
+      mask[i] = rng_(kMaskMax * kMaskMax + 1);
+    }
+
+    const unsigned int ref_res = params_.ref_func(pre, pre_stride, wsrc, mask);
+    unsigned int tst_res;
+    API_REGISTER_STATE_CHECK(tst_res =
+                                 params_.tst_func(pre, pre_stride, wsrc, mask));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+TEST_P(ObmcSadTest, ExtremeValues) {
+  DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
+    const int pre_stride = iter;
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = UINT8_MAX;
+      wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax;
+      mask[i] = kMaskMax * kMaskMax;
+    }
+
+    const unsigned int ref_res = params_.ref_func(pre, pre_stride, wsrc, mask);
+    unsigned int tst_res;
+    API_REGISTER_STATE_CHECK(tst_res =
+                                 params_.tst_func(pre, pre_stride, wsrc, mask));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+#if HAVE_SSE4_1
+const ObmcSadTest::ParamType sse4_functions[] = {
+  TestFuncs(aom_obmc_sad128x128_c, aom_obmc_sad128x128_sse4_1),
+  TestFuncs(aom_obmc_sad128x64_c, aom_obmc_sad128x64_sse4_1),
+  TestFuncs(aom_obmc_sad64x128_c, aom_obmc_sad64x128_sse4_1),
+  TestFuncs(aom_obmc_sad64x64_c, aom_obmc_sad64x64_sse4_1),
+  TestFuncs(aom_obmc_sad64x32_c, aom_obmc_sad64x32_sse4_1),
+  TestFuncs(aom_obmc_sad32x64_c, aom_obmc_sad32x64_sse4_1),
+  TestFuncs(aom_obmc_sad32x32_c, aom_obmc_sad32x32_sse4_1),
+  TestFuncs(aom_obmc_sad32x16_c, aom_obmc_sad32x16_sse4_1),
+  TestFuncs(aom_obmc_sad16x32_c, aom_obmc_sad16x32_sse4_1),
+  TestFuncs(aom_obmc_sad16x16_c, aom_obmc_sad16x16_sse4_1),
+  TestFuncs(aom_obmc_sad16x8_c, aom_obmc_sad16x8_sse4_1),
+  TestFuncs(aom_obmc_sad8x16_c, aom_obmc_sad8x16_sse4_1),
+  TestFuncs(aom_obmc_sad8x8_c, aom_obmc_sad8x8_sse4_1),
+  TestFuncs(aom_obmc_sad8x4_c, aom_obmc_sad8x4_sse4_1),
+  TestFuncs(aom_obmc_sad4x8_c, aom_obmc_sad4x8_sse4_1),
+  TestFuncs(aom_obmc_sad4x4_c, aom_obmc_sad4x4_sse4_1),
+
+  TestFuncs(aom_obmc_sad64x16_c, aom_obmc_sad64x16_sse4_1),
+  TestFuncs(aom_obmc_sad16x64_c, aom_obmc_sad16x64_sse4_1),
+  TestFuncs(aom_obmc_sad32x8_c, aom_obmc_sad32x8_sse4_1),
+  TestFuncs(aom_obmc_sad8x32_c, aom_obmc_sad8x32_sse4_1),
+  TestFuncs(aom_obmc_sad16x4_c, aom_obmc_sad16x4_sse4_1),
+  TestFuncs(aom_obmc_sad4x16_c, aom_obmc_sad4x16_sse4_1),
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE4_1, ObmcSadTest,
+                         ::testing::ValuesIn(sse4_functions));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+const ObmcSadTest::ParamType avx2_functions[] = {
+  TestFuncs(aom_obmc_sad128x128_c, aom_obmc_sad128x128_avx2),
+  TestFuncs(aom_obmc_sad128x64_c, aom_obmc_sad128x64_avx2),
+  TestFuncs(aom_obmc_sad64x128_c, aom_obmc_sad64x128_avx2),
+  TestFuncs(aom_obmc_sad64x64_c, aom_obmc_sad64x64_avx2),
+  TestFuncs(aom_obmc_sad64x32_c, aom_obmc_sad64x32_avx2),
+  TestFuncs(aom_obmc_sad32x64_c, aom_obmc_sad32x64_avx2),
+  TestFuncs(aom_obmc_sad32x32_c, aom_obmc_sad32x32_avx2),
+  TestFuncs(aom_obmc_sad32x16_c, aom_obmc_sad32x16_avx2),
+  TestFuncs(aom_obmc_sad16x32_c, aom_obmc_sad16x32_avx2),
+  TestFuncs(aom_obmc_sad16x16_c, aom_obmc_sad16x16_avx2),
+  TestFuncs(aom_obmc_sad16x8_c, aom_obmc_sad16x8_avx2),
+  TestFuncs(aom_obmc_sad8x16_c, aom_obmc_sad8x16_avx2),
+  TestFuncs(aom_obmc_sad8x8_c, aom_obmc_sad8x8_avx2),
+  TestFuncs(aom_obmc_sad8x4_c, aom_obmc_sad8x4_avx2),
+  TestFuncs(aom_obmc_sad4x8_c, aom_obmc_sad4x8_avx2),
+  TestFuncs(aom_obmc_sad4x4_c, aom_obmc_sad4x4_avx2),
+
+  TestFuncs(aom_obmc_sad64x16_c, aom_obmc_sad64x16_avx2),
+  TestFuncs(aom_obmc_sad16x64_c, aom_obmc_sad16x64_avx2),
+  TestFuncs(aom_obmc_sad32x8_c, aom_obmc_sad32x8_avx2),
+  TestFuncs(aom_obmc_sad8x32_c, aom_obmc_sad8x32_avx2),
+  TestFuncs(aom_obmc_sad16x4_c, aom_obmc_sad16x4_avx2),
+  TestFuncs(aom_obmc_sad4x16_c, aom_obmc_sad4x16_avx2),
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, ObmcSadTest,
+                         ::testing::ValuesIn(avx2_functions));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+const ObmcSadTest::ParamType neon_functions[] = {
+  TestFuncs(aom_obmc_sad128x128_c, aom_obmc_sad128x128_neon),
+  TestFuncs(aom_obmc_sad128x64_c, aom_obmc_sad128x64_neon),
+  TestFuncs(aom_obmc_sad64x128_c, aom_obmc_sad64x128_neon),
+  TestFuncs(aom_obmc_sad64x64_c, aom_obmc_sad64x64_neon),
+  TestFuncs(aom_obmc_sad64x32_c, aom_obmc_sad64x32_neon),
+  TestFuncs(aom_obmc_sad32x64_c, aom_obmc_sad32x64_neon),
+  TestFuncs(aom_obmc_sad32x32_c, aom_obmc_sad32x32_neon),
+  TestFuncs(aom_obmc_sad32x16_c, aom_obmc_sad32x16_neon),
+  TestFuncs(aom_obmc_sad16x32_c, aom_obmc_sad16x32_neon),
+  TestFuncs(aom_obmc_sad16x16_c, aom_obmc_sad16x16_neon),
+  TestFuncs(aom_obmc_sad16x8_c, aom_obmc_sad16x8_neon),
+  TestFuncs(aom_obmc_sad8x16_c, aom_obmc_sad8x16_neon),
+  TestFuncs(aom_obmc_sad8x8_c, aom_obmc_sad8x8_neon),
+  TestFuncs(aom_obmc_sad8x4_c, aom_obmc_sad8x4_neon),
+  TestFuncs(aom_obmc_sad4x8_c, aom_obmc_sad4x8_neon),
+  TestFuncs(aom_obmc_sad4x4_c, aom_obmc_sad4x4_neon),
+
+  TestFuncs(aom_obmc_sad64x16_c, aom_obmc_sad64x16_neon),
+  TestFuncs(aom_obmc_sad16x64_c, aom_obmc_sad16x64_neon),
+  TestFuncs(aom_obmc_sad32x8_c, aom_obmc_sad32x8_neon),
+  TestFuncs(aom_obmc_sad8x32_c, aom_obmc_sad8x32_neon),
+  TestFuncs(aom_obmc_sad16x4_c, aom_obmc_sad16x4_neon),
+  TestFuncs(aom_obmc_sad4x16_c, aom_obmc_sad4x16_neon),
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, ObmcSadTest,
+                         ::testing::ValuesIn(neon_functions));
+#endif  // HAVE_NEON
+
+#if CONFIG_AV1_HIGHBITDEPTH
+////////////////////////////////////////////////////////////////////////////////
+// High bit-depth
+////////////////////////////////////////////////////////////////////////////////
+
+class ObmcSadHBDTest : public FunctionEquivalenceTest<ObmcSadF> {};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ObmcSadHBDTest);
+
+TEST_P(ObmcSadHBDTest, RandomValues) {
+  DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    const int pre_stride = rng_(MAX_SB_SIZE + 1);
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = rng_(1 << 12);
+      wsrc[i] = rng_(1 << 12) * rng_(kMaskMax * kMaskMax + 1);
+      mask[i] = rng_(kMaskMax * kMaskMax + 1);
+    }
+
+    const unsigned int ref_res =
+        params_.ref_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask);
+    unsigned int tst_res;
+    API_REGISTER_STATE_CHECK(
+        tst_res =
+            params_.tst_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+TEST_P(ObmcSadHBDTest, ExtremeValues) {
+  DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
+    const int pre_stride = iter;
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = (1 << 12) - 1;
+      wsrc[i] = ((1 << 12) - 1) * kMaskMax * kMaskMax;
+      mask[i] = kMaskMax * kMaskMax;
+    }
+
+    const unsigned int ref_res =
+        params_.ref_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask);
+    unsigned int tst_res;
+    API_REGISTER_STATE_CHECK(
+        tst_res =
+            params_.tst_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+#if HAVE_NEON
+ObmcSadHBDTest::ParamType neon_functions_hbd[] = {
+  TestFuncs(aom_highbd_obmc_sad128x128_c, aom_highbd_obmc_sad128x128_neon),
+  TestFuncs(aom_highbd_obmc_sad128x64_c, aom_highbd_obmc_sad128x64_neon),
+  TestFuncs(aom_highbd_obmc_sad64x128_c, aom_highbd_obmc_sad64x128_neon),
+  TestFuncs(aom_highbd_obmc_sad64x64_c, aom_highbd_obmc_sad64x64_neon),
+  TestFuncs(aom_highbd_obmc_sad64x32_c, aom_highbd_obmc_sad64x32_neon),
+  TestFuncs(aom_highbd_obmc_sad32x64_c, aom_highbd_obmc_sad32x64_neon),
+  TestFuncs(aom_highbd_obmc_sad32x32_c, aom_highbd_obmc_sad32x32_neon),
+  TestFuncs(aom_highbd_obmc_sad32x16_c, aom_highbd_obmc_sad32x16_neon),
+  TestFuncs(aom_highbd_obmc_sad16x32_c, aom_highbd_obmc_sad16x32_neon),
+  TestFuncs(aom_highbd_obmc_sad16x16_c, aom_highbd_obmc_sad16x16_neon),
+  TestFuncs(aom_highbd_obmc_sad16x8_c, aom_highbd_obmc_sad16x8_neon),
+  TestFuncs(aom_highbd_obmc_sad8x16_c, aom_highbd_obmc_sad8x16_neon),
+  TestFuncs(aom_highbd_obmc_sad8x8_c, aom_highbd_obmc_sad8x8_neon),
+  TestFuncs(aom_highbd_obmc_sad8x4_c, aom_highbd_obmc_sad8x4_neon),
+  TestFuncs(aom_highbd_obmc_sad4x8_c, aom_highbd_obmc_sad4x8_neon),
+  TestFuncs(aom_highbd_obmc_sad4x4_c, aom_highbd_obmc_sad4x4_neon),
+#if !CONFIG_REALTIME_ONLY
+  TestFuncs(aom_highbd_obmc_sad64x16_c, aom_highbd_obmc_sad64x16_neon),
+  TestFuncs(aom_highbd_obmc_sad16x64_c, aom_highbd_obmc_sad16x64_neon),
+  TestFuncs(aom_highbd_obmc_sad32x8_c, aom_highbd_obmc_sad32x8_neon),
+  TestFuncs(aom_highbd_obmc_sad8x32_c, aom_highbd_obmc_sad8x32_neon),
+  TestFuncs(aom_highbd_obmc_sad16x4_c, aom_highbd_obmc_sad16x4_neon),
+  TestFuncs(aom_highbd_obmc_sad4x16_c, aom_highbd_obmc_sad4x16_neon),
+#endif  // !CONFIG_REALTIME_ONLY
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, ObmcSadHBDTest,
+                         ::testing::ValuesIn(neon_functions_hbd));
+#endif  // HAVE_NEON
+
+#if HAVE_SSE4_1
+ObmcSadHBDTest::ParamType sse4_functions_hbd[] = {
+  TestFuncs(aom_highbd_obmc_sad128x128_c, aom_highbd_obmc_sad128x128_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad128x64_c, aom_highbd_obmc_sad128x64_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad64x128_c, aom_highbd_obmc_sad64x128_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad64x64_c, aom_highbd_obmc_sad64x64_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad64x32_c, aom_highbd_obmc_sad64x32_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad32x64_c, aom_highbd_obmc_sad32x64_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad32x32_c, aom_highbd_obmc_sad32x32_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad32x16_c, aom_highbd_obmc_sad32x16_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad16x32_c, aom_highbd_obmc_sad16x32_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad16x16_c, aom_highbd_obmc_sad16x16_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad16x8_c, aom_highbd_obmc_sad16x8_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad8x16_c, aom_highbd_obmc_sad8x16_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad8x8_c, aom_highbd_obmc_sad8x8_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad8x4_c, aom_highbd_obmc_sad8x4_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad4x8_c, aom_highbd_obmc_sad4x8_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad4x4_c, aom_highbd_obmc_sad4x4_sse4_1),
+
+  TestFuncs(aom_highbd_obmc_sad64x16_c, aom_highbd_obmc_sad64x16_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad16x64_c, aom_highbd_obmc_sad16x64_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad32x8_c, aom_highbd_obmc_sad32x8_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad8x32_c, aom_highbd_obmc_sad8x32_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad16x4_c, aom_highbd_obmc_sad16x4_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad4x16_c, aom_highbd_obmc_sad4x16_sse4_1),
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE4_1, ObmcSadHBDTest,
+                         ::testing::ValuesIn(sse4_functions_hbd));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+ObmcSadHBDTest::ParamType avx2_functions_hbd[] = {
+  TestFuncs(aom_highbd_obmc_sad128x128_c, aom_highbd_obmc_sad128x128_avx2),
+  TestFuncs(aom_highbd_obmc_sad128x64_c, aom_highbd_obmc_sad128x64_avx2),
+  TestFuncs(aom_highbd_obmc_sad64x128_c, aom_highbd_obmc_sad64x128_avx2),
+  TestFuncs(aom_highbd_obmc_sad64x64_c, aom_highbd_obmc_sad64x64_avx2),
+  TestFuncs(aom_highbd_obmc_sad64x32_c, aom_highbd_obmc_sad64x32_avx2),
+  TestFuncs(aom_highbd_obmc_sad32x64_c, aom_highbd_obmc_sad32x64_avx2),
+  TestFuncs(aom_highbd_obmc_sad32x32_c, aom_highbd_obmc_sad32x32_avx2),
+  TestFuncs(aom_highbd_obmc_sad32x16_c, aom_highbd_obmc_sad32x16_avx2),
+  TestFuncs(aom_highbd_obmc_sad16x32_c, aom_highbd_obmc_sad16x32_avx2),
+  TestFuncs(aom_highbd_obmc_sad16x16_c, aom_highbd_obmc_sad16x16_avx2),
+  TestFuncs(aom_highbd_obmc_sad16x8_c, aom_highbd_obmc_sad16x8_avx2),
+  TestFuncs(aom_highbd_obmc_sad8x16_c, aom_highbd_obmc_sad8x16_avx2),
+  TestFuncs(aom_highbd_obmc_sad8x8_c, aom_highbd_obmc_sad8x8_avx2),
+  TestFuncs(aom_highbd_obmc_sad8x4_c, aom_highbd_obmc_sad8x4_avx2),
+  TestFuncs(aom_highbd_obmc_sad4x8_c, aom_highbd_obmc_sad4x8_avx2),
+  TestFuncs(aom_highbd_obmc_sad4x4_c, aom_highbd_obmc_sad4x4_avx2),
+
+  TestFuncs(aom_highbd_obmc_sad64x16_c, aom_highbd_obmc_sad64x16_avx2),
+  TestFuncs(aom_highbd_obmc_sad16x64_c, aom_highbd_obmc_sad16x64_avx2),
+  TestFuncs(aom_highbd_obmc_sad32x8_c, aom_highbd_obmc_sad32x8_avx2),
+  TestFuncs(aom_highbd_obmc_sad8x32_c, aom_highbd_obmc_sad8x32_avx2),
+  TestFuncs(aom_highbd_obmc_sad16x4_c, aom_highbd_obmc_sad16x4_avx2),
+  TestFuncs(aom_highbd_obmc_sad4x16_c, aom_highbd_obmc_sad4x16_avx2),
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, ObmcSadHBDTest,
+                         ::testing::ValuesIn(avx2_functions_hbd));
+#endif  // HAVE_AVX2
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/obmc_variance_test.cc b/third_party/aom/test/obmc_variance_test.cc
new file mode 100644
index 0000000000..5f21a8a6c1
--- /dev/null
+++ b/third_party/aom/test/obmc_variance_test.cc
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+
+#include "test/function_equivalence_test.h"
+#include "test/register_state_check.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom/aom_integer.h"
+
+#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
+
+using libaom_test::ACMRandom;
+using libaom_test::FunctionEquivalenceTest;
+
+namespace {
+
+static const int kIterations = 1000;
+static const int kMaskMax = 64;
+
+typedef unsigned int (*ObmcVarF)(const uint8_t *pre, int pre_stride,
+                                 const int32_t *wsrc, const int32_t *mask,
+                                 unsigned int *sse);
+typedef libaom_test::FuncParam<ObmcVarF> TestFuncs;
+
+////////////////////////////////////////////////////////////////////////////////
+// 8 bit
+////////////////////////////////////////////////////////////////////////////////
+
+class ObmcVarianceTest : public FunctionEquivalenceTest<ObmcVarF> {};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ObmcVarianceTest);
+
+TEST_P(ObmcVarianceTest, RandomValues) {
+  DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    const int pre_stride = this->rng_(MAX_SB_SIZE + 1);
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = this->rng_.Rand8();
+      wsrc[i] = this->rng_.Rand8() * this->rng_(kMaskMax * kMaskMax + 1);
+      mask[i] = this->rng_(kMaskMax * kMaskMax + 1);
+    }
+
+    unsigned int ref_sse, tst_sse;
+    const unsigned int ref_res =
+        params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse);
+    unsigned int tst_res;
+    API_REGISTER_STATE_CHECK(
+        tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse));
+
+    ASSERT_EQ(ref_res, tst_res);
+    ASSERT_EQ(ref_sse, tst_sse);
+  }
+}
+
+TEST_P(ObmcVarianceTest, ExtremeValues) {
+  DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
+    const int pre_stride = iter;
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = UINT8_MAX;
+      wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax;
+      mask[i] = kMaskMax * kMaskMax;
+    }
+
+    unsigned int ref_sse, tst_sse;
+    const unsigned int ref_res =
+        params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse);
+    unsigned int tst_res;
+    API_REGISTER_STATE_CHECK(
+        tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse));
+
+    ASSERT_EQ(ref_res, tst_res);
+    ASSERT_EQ(ref_sse, tst_sse);
+  }
+}
+
+TEST_P(ObmcVarianceTest, DISABLED_Speed) {
+  DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  const int pre_stride = this->rng_(MAX_SB_SIZE + 1);
+
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    pre[i] = this->rng_.Rand8();
+    wsrc[i] = this->rng_.Rand8() * this->rng_(kMaskMax * kMaskMax + 1);
+    mask[i] = this->rng_(kMaskMax * kMaskMax + 1);
+  }
+
+  const int num_loops = 1000000;
+  unsigned int ref_sse, tst_sse;
+  aom_usec_timer ref_timer, test_timer;
+
+  aom_usec_timer_start(&ref_timer);
+  for (int i = 0; i < num_loops; ++i) {
+    params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse);
+  }
+  aom_usec_timer_mark(&ref_timer);
+  const int elapsed_time_c =
+      static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
+
+  aom_usec_timer_start(&test_timer);
+  for (int i = 0; i < num_loops; ++i) {
+    params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse);
+  }
+  aom_usec_timer_mark(&test_timer);
+  const int elapsed_time_simd =
+      static_cast<int>(aom_usec_timer_elapsed(&test_timer));
+
+  printf("c_time=%d \t simd_time=%d \t gain=%f \n", elapsed_time_c,
+         elapsed_time_simd,
+         static_cast<double>(elapsed_time_c) / elapsed_time_simd);
+}
+
+#if HAVE_SSE4_1
+const ObmcVarianceTest::ParamType sse4_functions[] = {
+  TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_sse4_1),
+  TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_sse4_1),
+  TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_sse4_1),
+  TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_sse4_1),
+  TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_sse4_1),
+  TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_sse4_1),
+  TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_sse4_1),
+  TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_sse4_1),
+  TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_sse4_1),
+  TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_sse4_1),
+  TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_sse4_1),
+  TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_sse4_1),
+  TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_sse4_1),
+  TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_sse4_1),
+  TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_sse4_1),
+  TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_sse4_1),
+
+  TestFuncs(aom_obmc_variance64x16_c, aom_obmc_variance64x16_sse4_1),
+  TestFuncs(aom_obmc_variance16x64_c, aom_obmc_variance16x64_sse4_1),
+  TestFuncs(aom_obmc_variance32x8_c, aom_obmc_variance32x8_sse4_1),
+  TestFuncs(aom_obmc_variance8x32_c, aom_obmc_variance8x32_sse4_1),
+  TestFuncs(aom_obmc_variance16x4_c, aom_obmc_variance16x4_sse4_1),
+  TestFuncs(aom_obmc_variance4x16_c, aom_obmc_variance4x16_sse4_1),
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE4_1, ObmcVarianceTest,
+                         ::testing::ValuesIn(sse4_functions));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+const ObmcVarianceTest::ParamType avx2_functions[] = {
+  TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_avx2),
+  TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_avx2),
+  TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_avx2),
+  TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_avx2),
+  TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_avx2),
+  TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_avx2),
+  TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_avx2),
+  TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_avx2),
+  TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_avx2),
+  TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_avx2),
+  TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_avx2),
+  TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_avx2),
+  TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_avx2),
+  TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_avx2),
+  TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_avx2),
+  TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_avx2),
+
+  TestFuncs(aom_obmc_variance64x16_c, aom_obmc_variance64x16_avx2),
+  TestFuncs(aom_obmc_variance16x64_c, aom_obmc_variance16x64_avx2),
+  TestFuncs(aom_obmc_variance32x8_c, aom_obmc_variance32x8_avx2),
+  TestFuncs(aom_obmc_variance8x32_c, aom_obmc_variance8x32_avx2),
+  TestFuncs(aom_obmc_variance16x4_c, aom_obmc_variance16x4_avx2),
+  TestFuncs(aom_obmc_variance4x16_c, aom_obmc_variance4x16_avx2),
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, ObmcVarianceTest,
+                         ::testing::ValuesIn(avx2_functions));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+const ObmcVarianceTest::ParamType neon_functions[] = {
+  TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_neon),
+  TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_neon),
+  TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_neon),
+  TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_neon),
+  TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_neon),
+  TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_neon),
+  TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_neon),
+  TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_neon),
+  TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_neon),
+  TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_neon),
+  TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_neon),
+  TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_neon),
+  TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_neon),
+  TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_neon),
+  TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_neon),
+  TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_neon),
+
+  TestFuncs(aom_obmc_variance64x16_c, aom_obmc_variance64x16_neon),
+  TestFuncs(aom_obmc_variance16x64_c, aom_obmc_variance16x64_neon),
+  TestFuncs(aom_obmc_variance32x8_c, aom_obmc_variance32x8_neon),
+  TestFuncs(aom_obmc_variance8x32_c, aom_obmc_variance8x32_neon),
+  TestFuncs(aom_obmc_variance16x4_c, aom_obmc_variance16x4_neon),
+  TestFuncs(aom_obmc_variance4x16_c, aom_obmc_variance4x16_neon),
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, ObmcVarianceTest,
+                         ::testing::ValuesIn(neon_functions));
+#endif  // HAVE_NEON
+
+////////////////////////////////////////////////////////////////////////////////
+// High bit-depth
+////////////////////////////////////////////////////////////////////////////////
+#if CONFIG_AV1_HIGHBITDEPTH && !CONFIG_REALTIME_ONLY
+class ObmcVarianceHBDTest : public FunctionEquivalenceTest<ObmcVarF> {};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ObmcVarianceHBDTest);
+
+TEST_P(ObmcVarianceHBDTest, RandomValues) {
+  DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    const int pre_stride = this->rng_(MAX_SB_SIZE + 1);
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = this->rng_(1 << params_.bit_depth);
+      wsrc[i] = this->rng_(1 << params_.bit_depth) *
+                this->rng_(kMaskMax * kMaskMax + 1);
+      mask[i] = this->rng_(kMaskMax * kMaskMax + 1);
+    }
+
+    unsigned int ref_sse, tst_sse;
+    const unsigned int ref_res = params_.ref_func(
+        CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse);
+    unsigned int tst_res;
+    API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre),
+                                                        pre_stride, wsrc, mask,
+                                                        &tst_sse));
+
+    ASSERT_EQ(ref_res, tst_res);
+    ASSERT_EQ(ref_sse, tst_sse);
+  }
+}
+
+TEST_P(ObmcVarianceHBDTest, ExtremeValues) {
+  DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
+    const int pre_stride = iter;
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = (1 << params_.bit_depth) - 1;
+      wsrc[i] = ((1 << params_.bit_depth) - 1) * kMaskMax * kMaskMax;
+      mask[i] = kMaskMax * kMaskMax;
+    }
+
+    unsigned int ref_sse, tst_sse;
+    const unsigned int ref_res = params_.ref_func(
+        CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse);
+    unsigned int tst_res;
+    API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre),
+                                                        pre_stride, wsrc, mask,
+                                                        &tst_sse));
+
+    ASSERT_EQ(ref_res, tst_res);
+    ASSERT_EQ(ref_sse, tst_sse);
+  }
+}
+
+#if HAVE_NEON
+ObmcVarianceHBDTest::ParamType neon_functions_hbd[] = {
+  TestFuncs(aom_highbd_8_obmc_variance128x128_c,
+            aom_highbd_8_obmc_variance128x128_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance128x64_c,
+            aom_highbd_8_obmc_variance128x64_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance64x128_c,
+            aom_highbd_8_obmc_variance64x128_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance64x64_c,
+            aom_highbd_8_obmc_variance64x64_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance64x32_c,
+            aom_highbd_8_obmc_variance64x32_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance32x64_c,
+            aom_highbd_8_obmc_variance32x64_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance32x32_c,
+            aom_highbd_8_obmc_variance32x32_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance32x16_c,
+            aom_highbd_8_obmc_variance32x16_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance16x32_c,
+            aom_highbd_8_obmc_variance16x32_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance16x16_c,
+            aom_highbd_8_obmc_variance16x16_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance16x8_c,
+            aom_highbd_8_obmc_variance16x8_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance8x16_c,
+            aom_highbd_8_obmc_variance8x16_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance8x8_c, aom_highbd_8_obmc_variance8x8_neon,
+            8),
+  TestFuncs(aom_highbd_8_obmc_variance8x4_c, aom_highbd_8_obmc_variance8x4_neon,
+            8),
+  TestFuncs(aom_highbd_8_obmc_variance4x8_c, aom_highbd_8_obmc_variance4x8_neon,
+            8),
+  TestFuncs(aom_highbd_8_obmc_variance4x4_c, aom_highbd_8_obmc_variance4x4_neon,
+            8),
+  TestFuncs(aom_highbd_10_obmc_variance128x128_c,
+            aom_highbd_10_obmc_variance128x128_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance128x64_c,
+            aom_highbd_10_obmc_variance128x64_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance64x128_c,
+            aom_highbd_10_obmc_variance64x128_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance64x64_c,
+            aom_highbd_10_obmc_variance64x64_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance64x32_c,
+            aom_highbd_10_obmc_variance64x32_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance32x64_c,
+            aom_highbd_10_obmc_variance32x64_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance32x32_c,
+            aom_highbd_10_obmc_variance32x32_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance32x16_c,
+            aom_highbd_10_obmc_variance32x16_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance16x32_c,
+            aom_highbd_10_obmc_variance16x32_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance16x16_c,
+            aom_highbd_10_obmc_variance16x16_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance16x8_c,
+            aom_highbd_10_obmc_variance16x8_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance8x16_c,
+            aom_highbd_10_obmc_variance8x16_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance8x8_c,
+            aom_highbd_10_obmc_variance8x8_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance8x4_c,
+            aom_highbd_10_obmc_variance8x4_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance4x8_c,
+            aom_highbd_10_obmc_variance4x8_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance4x4_c,
+            aom_highbd_10_obmc_variance4x4_neon, 10),
+  TestFuncs(aom_highbd_12_obmc_variance128x128_c,
+            aom_highbd_12_obmc_variance128x128_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance128x64_c,
+            aom_highbd_12_obmc_variance128x64_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance64x128_c,
+            aom_highbd_12_obmc_variance64x128_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance64x64_c,
+            aom_highbd_12_obmc_variance64x64_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance64x32_c,
+            aom_highbd_12_obmc_variance64x32_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance32x64_c,
+            aom_highbd_12_obmc_variance32x64_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance32x32_c,
+            aom_highbd_12_obmc_variance32x32_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance32x16_c,
+            aom_highbd_12_obmc_variance32x16_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance16x32_c,
+            aom_highbd_12_obmc_variance16x32_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance16x16_c,
+            aom_highbd_12_obmc_variance16x16_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance16x8_c,
+            aom_highbd_12_obmc_variance16x8_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance8x16_c,
+            aom_highbd_12_obmc_variance8x16_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance8x8_c,
+            aom_highbd_12_obmc_variance8x8_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance8x4_c,
+            aom_highbd_12_obmc_variance8x4_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance4x8_c,
+            aom_highbd_12_obmc_variance4x8_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance4x4_c,
+            aom_highbd_12_obmc_variance4x4_neon, 12),
+  TestFuncs(aom_highbd_8_obmc_variance64x16_c,
+            aom_highbd_8_obmc_variance64x16_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance16x64_c,
+            aom_highbd_8_obmc_variance16x64_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance32x8_c,
+            aom_highbd_8_obmc_variance32x8_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance8x32_c,
+            aom_highbd_8_obmc_variance8x32_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance16x4_c,
+            aom_highbd_8_obmc_variance16x4_neon, 8),
+  TestFuncs(aom_highbd_8_obmc_variance4x16_c,
+            aom_highbd_8_obmc_variance4x16_neon, 8),
+  TestFuncs(aom_highbd_10_obmc_variance64x16_c,
+            aom_highbd_10_obmc_variance64x16_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance16x64_c,
+            aom_highbd_10_obmc_variance16x64_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance32x8_c,
+            aom_highbd_10_obmc_variance32x8_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance8x32_c,
+            aom_highbd_10_obmc_variance8x32_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance16x4_c,
+            aom_highbd_10_obmc_variance16x4_neon, 10),
+  TestFuncs(aom_highbd_10_obmc_variance4x16_c,
+            aom_highbd_10_obmc_variance4x16_neon, 10),
+  TestFuncs(aom_highbd_12_obmc_variance64x16_c,
+            aom_highbd_12_obmc_variance64x16_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance16x64_c,
+            aom_highbd_12_obmc_variance16x64_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance32x8_c,
+            aom_highbd_12_obmc_variance32x8_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance8x32_c,
+            aom_highbd_12_obmc_variance8x32_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance16x4_c,
+            aom_highbd_12_obmc_variance16x4_neon, 12),
+  TestFuncs(aom_highbd_12_obmc_variance4x16_c,
+            aom_highbd_12_obmc_variance4x16_neon, 12),
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, ObmcVarianceHBDTest,
+                         ::testing::ValuesIn(neon_functions_hbd));
+#endif  // HAVE_NEON
+
+#if HAVE_SSE4_1
+ObmcVarianceHBDTest::ParamType sse4_functions_hbd[] = {
+  TestFuncs(aom_highbd_8_obmc_variance128x128_c,
+            aom_highbd_8_obmc_variance128x128_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance128x64_c,
+            aom_highbd_8_obmc_variance128x64_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance64x128_c,
+            aom_highbd_8_obmc_variance64x128_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance64x64_c,
+            aom_highbd_8_obmc_variance64x64_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance64x32_c,
+            aom_highbd_8_obmc_variance64x32_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance32x64_c,
+            aom_highbd_8_obmc_variance32x64_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance32x32_c,
+            aom_highbd_8_obmc_variance32x32_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance32x16_c,
+            aom_highbd_8_obmc_variance32x16_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance16x32_c,
+            aom_highbd_8_obmc_variance16x32_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance16x16_c,
+            aom_highbd_8_obmc_variance16x16_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance16x8_c,
+            aom_highbd_8_obmc_variance16x8_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance8x16_c,
+            aom_highbd_8_obmc_variance8x16_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance8x8_c,
+            aom_highbd_8_obmc_variance8x8_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance8x4_c,
+            aom_highbd_8_obmc_variance8x4_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance4x8_c,
+            aom_highbd_8_obmc_variance4x8_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance4x4_c,
+            aom_highbd_8_obmc_variance4x4_sse4_1, 8),
+  TestFuncs(aom_highbd_10_obmc_variance128x128_c,
+            aom_highbd_10_obmc_variance128x128_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance128x64_c,
+            aom_highbd_10_obmc_variance128x64_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance64x128_c,
+            aom_highbd_10_obmc_variance64x128_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance64x64_c,
+            aom_highbd_10_obmc_variance64x64_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance64x32_c,
+            aom_highbd_10_obmc_variance64x32_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance32x64_c,
+            aom_highbd_10_obmc_variance32x64_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance32x32_c,
+            aom_highbd_10_obmc_variance32x32_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance32x16_c,
+            aom_highbd_10_obmc_variance32x16_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance16x32_c,
+            aom_highbd_10_obmc_variance16x32_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance16x16_c,
+            aom_highbd_10_obmc_variance16x16_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance16x8_c,
+            aom_highbd_10_obmc_variance16x8_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance8x16_c,
+            aom_highbd_10_obmc_variance8x16_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance8x8_c,
+            aom_highbd_10_obmc_variance8x8_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance8x4_c,
+            aom_highbd_10_obmc_variance8x4_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance4x8_c,
+            aom_highbd_10_obmc_variance4x8_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance4x4_c,
+            aom_highbd_10_obmc_variance4x4_sse4_1, 10),
+  TestFuncs(aom_highbd_12_obmc_variance128x128_c,
+            aom_highbd_12_obmc_variance128x128_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance128x64_c,
+            aom_highbd_12_obmc_variance128x64_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance64x128_c,
+            aom_highbd_12_obmc_variance64x128_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance64x64_c,
+            aom_highbd_12_obmc_variance64x64_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance64x32_c,
+            aom_highbd_12_obmc_variance64x32_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance32x64_c,
+            aom_highbd_12_obmc_variance32x64_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance32x32_c,
+            aom_highbd_12_obmc_variance32x32_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance32x16_c,
+            aom_highbd_12_obmc_variance32x16_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance16x32_c,
+            aom_highbd_12_obmc_variance16x32_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance16x16_c,
+            aom_highbd_12_obmc_variance16x16_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance16x8_c,
+            aom_highbd_12_obmc_variance16x8_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance8x16_c,
+            aom_highbd_12_obmc_variance8x16_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance8x8_c,
+            aom_highbd_12_obmc_variance8x8_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance8x4_c,
+            aom_highbd_12_obmc_variance8x4_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance4x8_c,
+            aom_highbd_12_obmc_variance4x8_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance4x4_c,
+            aom_highbd_12_obmc_variance4x4_sse4_1, 12),
+
+  TestFuncs(aom_highbd_8_obmc_variance64x16_c,
+            aom_highbd_8_obmc_variance64x16_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance16x64_c,
+            aom_highbd_8_obmc_variance16x64_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance32x8_c,
+            aom_highbd_8_obmc_variance32x8_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance8x32_c,
+            aom_highbd_8_obmc_variance8x32_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance16x4_c,
+            aom_highbd_8_obmc_variance16x4_sse4_1, 8),
+  TestFuncs(aom_highbd_8_obmc_variance4x16_c,
+            aom_highbd_8_obmc_variance4x16_sse4_1, 8),
+  TestFuncs(aom_highbd_10_obmc_variance64x16_c,
+            aom_highbd_10_obmc_variance64x16_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance16x64_c,
+            aom_highbd_10_obmc_variance16x64_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance32x8_c,
+            aom_highbd_10_obmc_variance32x8_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance8x32_c,
+            aom_highbd_10_obmc_variance8x32_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance16x4_c,
+            aom_highbd_10_obmc_variance16x4_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance4x16_c,
+            aom_highbd_10_obmc_variance4x16_sse4_1, 10),
+  TestFuncs(aom_highbd_12_obmc_variance64x16_c,
+            aom_highbd_12_obmc_variance64x16_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance16x64_c,
+            aom_highbd_12_obmc_variance16x64_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance32x8_c,
+            aom_highbd_12_obmc_variance32x8_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance8x32_c,
+            aom_highbd_12_obmc_variance8x32_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance16x4_c,
+            aom_highbd_12_obmc_variance16x4_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance4x16_c,
+            aom_highbd_12_obmc_variance4x16_sse4_1, 12),
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE4_1, ObmcVarianceHBDTest,
+                         ::testing::ValuesIn(sse4_functions_hbd));
+#endif  // HAVE_SSE4_1
+#endif  // CONFIG_AV1_HIGHBITDEPTH && !CONFIG_REALTIME_ONLY
+}  // namespace
diff --git a/third_party/aom/test/pickrst_test.cc b/third_party/aom/test/pickrst_test.cc
new file mode 100644
index 0000000000..04b6f45652
--- /dev/null
+++ b/third_party/aom/test/pickrst_test.cc
@@ -0,0 +1,750 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/register_state_check.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom/aom_integer.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/encoder/pickrst.h"
+
+#define MAX_DATA_BLOCK 384
+
+namespace pickrst_test_lowbd {
+static const int kIterations = 100;
+
+typedef int64_t (*lowbd_pixel_proj_error_func)(
+    const uint8_t *src8, int width, int height, int src_stride,
+    const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride,
+    int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params);
+
+////////////////////////////////////////////////////////////////////////////////
+// 8 bit
+////////////////////////////////////////////////////////////////////////////////
+
+typedef std::tuple<const lowbd_pixel_proj_error_func> PixelProjErrorTestParam;
+
+class PixelProjErrorTest
+    : public ::testing::TestWithParam<PixelProjErrorTestParam> {
+ public:
+  void SetUp() override {
+    target_func_ = GET_PARAM(0);
+    src_ = (uint8_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
+                                  sizeof(*src_)));
+    ASSERT_NE(src_, nullptr);
+    dgd_ = (uint8_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
+                                  sizeof(*dgd_)));
+    ASSERT_NE(dgd_, nullptr);
+    flt0_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
+                                   sizeof(*flt0_)));
+    ASSERT_NE(flt0_, nullptr);
+    flt1_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
+                                   sizeof(*flt1_)));
+    ASSERT_NE(flt1_, nullptr);
+  }
+  void TearDown() override {
+    aom_free(src_);
+    aom_free(dgd_);
+    aom_free(flt0_);
+    aom_free(flt1_);
+  }
+  void RunPixelProjErrorTest(int32_t run_times);
+  void RunPixelProjErrorTest_ExtremeValues();
+
+ private:
+  lowbd_pixel_proj_error_func target_func_;
+  libaom_test::ACMRandom rng_;
+  uint8_t *src_;
+  uint8_t *dgd_;
+  int32_t *flt0_;
+  int32_t *flt1_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PixelProjErrorTest);
+
+void PixelProjErrorTest::RunPixelProjErrorTest(int32_t run_times) {
+  int h_end = run_times != 1 ? 128 : (rng_.Rand16() % MAX_DATA_BLOCK) + 1;
+  int v_end = run_times != 1 ? 128 : (rng_.Rand16() % MAX_DATA_BLOCK) + 1;
+  const int dgd_stride = MAX_DATA_BLOCK;
+  const int src_stride = MAX_DATA_BLOCK;
+  const int flt0_stride = MAX_DATA_BLOCK;
+  const int flt1_stride = MAX_DATA_BLOCK;
+  sgr_params_type params;
+  int xq[2];
+  const int iters = run_times == 1 ? kIterations : 4;
+  for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
+    int64_t err_ref = 0, err_test = 1;
+    for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
+      dgd_[i] = rng_.Rand8();
+      src_[i] = rng_.Rand8();
+      flt0_[i] = rng_.Rand15Signed();
+      flt1_[i] = rng_.Rand15Signed();
+    }
+    xq[0] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS);
+    xq[1] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS);
+    params.r[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter % 2);
+    params.r[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter / 2);
+    params.s[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter % 2);
+    params.s[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter / 2);
+    uint8_t *dgd = dgd_;
+    uint8_t *src = src_;
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      err_ref = av1_lowbd_pixel_proj_error_c(src, h_end, v_end, src_stride, dgd,
+                                             dgd_stride, flt0_, flt0_stride,
+                                             flt1_, flt1_stride, xq, &params);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      err_test =
+          target_func_(src, h_end, v_end, src_stride, dgd, dgd_stride, flt0_,
+                       flt0_stride, flt1_, flt1_stride, xq, &params);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    if (run_times > 10) {
+      printf("r0 %d r1 %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", params.r[0],
+             params.r[1], h_end, v_end, time1, time2, time1 / time2);
+    }
+    ASSERT_EQ(err_ref, err_test);
+  }
+}
+
+void PixelProjErrorTest::RunPixelProjErrorTest_ExtremeValues() {
+  const int h_start = 0;
+  int h_end = 192;
+  const int v_start = 0;
+  int v_end = 192;
+  const int dgd_stride = MAX_DATA_BLOCK;
+  const int src_stride = MAX_DATA_BLOCK;
+  const int flt0_stride = MAX_DATA_BLOCK;
+  const int flt1_stride = MAX_DATA_BLOCK;
+  sgr_params_type params;
+  int xq[2];
+  const int iters = kIterations;
+  for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
+    int64_t err_ref = 0, err_test = 1;
+    for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
+      dgd_[i] = 0;
+      src_[i] = 255;
+      flt0_[i] = rng_.Rand15Signed();
+      flt1_[i] = rng_.Rand15Signed();
+    }
+    xq[0] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS);
+    xq[1] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS);
+    params.r[0] = rng_.Rand8() % MAX_RADIUS;
+    params.r[1] = rng_.Rand8() % MAX_RADIUS;
+    params.s[0] = rng_.Rand8() % MAX_RADIUS;
+    params.s[1] = rng_.Rand8() % MAX_RADIUS;
+    uint8_t *dgd = dgd_;
+    uint8_t *src = src_;
+
+    err_ref = av1_lowbd_pixel_proj_error_c(
+        src, h_end - h_start, v_end - v_start, src_stride, dgd, dgd_stride,
+        flt0_, flt0_stride, flt1_, flt1_stride, xq, &params);
+
+    err_test = target_func_(src, h_end - h_start, v_end - v_start, src_stride,
+                            dgd, dgd_stride, flt0_, flt0_stride, flt1_,
+                            flt1_stride, xq, &params);
+
+    ASSERT_EQ(err_ref, err_test);
+  }
+}
+
+TEST_P(PixelProjErrorTest, RandomValues) { RunPixelProjErrorTest(1); }
+
+TEST_P(PixelProjErrorTest, ExtremeValues) {
+  RunPixelProjErrorTest_ExtremeValues();
+}
+
+TEST_P(PixelProjErrorTest, DISABLED_Speed) { RunPixelProjErrorTest(200000); }
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE4_1, PixelProjErrorTest,
+                         ::testing::Values(av1_lowbd_pixel_proj_error_sse4_1));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+
+INSTANTIATE_TEST_SUITE_P(AVX2, PixelProjErrorTest,
+                         ::testing::Values(av1_lowbd_pixel_proj_error_avx2));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+
+INSTANTIATE_TEST_SUITE_P(NEON, PixelProjErrorTest,
+                         ::testing::Values(av1_lowbd_pixel_proj_error_neon));
+#endif  // HAVE_NEON
+
+}  // namespace pickrst_test_lowbd
+
+#if CONFIG_AV1_HIGHBITDEPTH
+namespace pickrst_test_highbd {
+static const int kIterations = 100;
+
+typedef int64_t (*highbd_pixel_proj_error_func)(
+    const uint8_t *src8, int width, int height, int src_stride,
+    const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride,
+    int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params);
+
+////////////////////////////////////////////////////////////////////////////////
+// High bit-depth
+////////////////////////////////////////////////////////////////////////////////
+
+typedef std::tuple<const highbd_pixel_proj_error_func> PixelProjErrorTestParam;
+
+class PixelProjHighbdErrorTest
+    : public ::testing::TestWithParam<PixelProjErrorTestParam> {
+ public:
+  void SetUp() override {
+    target_func_ = GET_PARAM(0);
+    src_ =
+        (uint16_t *)aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*src_));
+    ASSERT_NE(src_, nullptr);
+    dgd_ =
+        (uint16_t *)aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*dgd_));
+    ASSERT_NE(dgd_, nullptr);
+    flt0_ =
+        (int32_t *)aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*flt0_));
+    ASSERT_NE(flt0_, nullptr);
+    flt1_ =
+        (int32_t *)aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*flt1_));
+    ASSERT_NE(flt1_, nullptr);
+  }
+  void TearDown() override {
+    aom_free(src_);
+    aom_free(dgd_);
+    aom_free(flt0_);
+    aom_free(flt1_);
+  }
+  void RunPixelProjErrorTest(int32_t run_times);
+  void RunPixelProjErrorTest_ExtremeValues();
+
+ private:
+  highbd_pixel_proj_error_func target_func_;
+  libaom_test::ACMRandom rng_;
+  uint16_t *src_;
+  uint16_t *dgd_;
+  int32_t *flt0_;
+  int32_t *flt1_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PixelProjHighbdErrorTest);
+
+void PixelProjHighbdErrorTest::RunPixelProjErrorTest(int32_t run_times) {
+  int h_end = run_times != 1 ? 128 : (rng_.Rand16() % MAX_DATA_BLOCK) + 1;
+  int v_end = run_times != 1 ? 128 : (rng_.Rand16() % MAX_DATA_BLOCK) + 1;
+  const int dgd_stride = MAX_DATA_BLOCK;
+  const int src_stride = MAX_DATA_BLOCK;
+  const int flt0_stride = MAX_DATA_BLOCK;
+  const int flt1_stride = MAX_DATA_BLOCK;
+  sgr_params_type params;
+  int xq[2];
+  const int iters = run_times == 1 ? kIterations : 4;
+  for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
+    int64_t err_ref = 0, err_test = 1;
+    for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
+      dgd_[i] = rng_.Rand16() % (1 << 12);
+      src_[i] = rng_.Rand16() % (1 << 12);
+      flt0_[i] = rng_.Rand15Signed();
+      flt1_[i] = rng_.Rand15Signed();
+    }
+    xq[0] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS);
+    xq[1] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS);
+    params.r[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter % 2);
+    params.r[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter / 2);
+    params.s[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter % 2);
+    params.s[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter / 2);
+    uint8_t *dgd8 = CONVERT_TO_BYTEPTR(dgd_);
+    uint8_t *src8 = CONVERT_TO_BYTEPTR(src_);
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      err_ref = av1_highbd_pixel_proj_error_c(
+          src8, h_end, v_end, src_stride, dgd8, dgd_stride, flt0_, flt0_stride,
+          flt1_, flt1_stride, xq, &params);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      err_test =
+          target_func_(src8, h_end, v_end, src_stride, dgd8, dgd_stride, flt0_,
+                       flt0_stride, flt1_, flt1_stride, xq, &params);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    if (run_times > 10) {
+      printf("r0 %d r1 %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", params.r[0],
+             params.r[1], h_end, v_end, time1, time2, time1 / time2);
+    }
+    ASSERT_EQ(err_ref, err_test);
+  }
+}
+
+void PixelProjHighbdErrorTest::RunPixelProjErrorTest_ExtremeValues() {
+  const int h_start = 0;
+  int h_end = 192;
+  const int v_start = 0;
+  int v_end = 192;
+  const int dgd_stride = MAX_DATA_BLOCK;
+  const int src_stride = MAX_DATA_BLOCK;
+  const int flt0_stride = MAX_DATA_BLOCK;
+  const int flt1_stride = MAX_DATA_BLOCK;
+  sgr_params_type params;
+  int xq[2];
+  const int iters = kIterations;
+  for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
+    int64_t err_ref = 0, err_test = 1;
+    for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
+      dgd_[i] = 0;
+      src_[i] = (1 << 12) - 1;
+      flt0_[i] = rng_.Rand15Signed();
+      flt1_[i] = rng_.Rand15Signed();
+    }
+    xq[0] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS);
+    xq[1] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS);
+    params.r[0] = rng_.Rand8() % MAX_RADIUS;
+    params.r[1] = rng_.Rand8() % MAX_RADIUS;
+    params.s[0] = rng_.Rand8() % MAX_RADIUS;
+    params.s[1] = rng_.Rand8() % MAX_RADIUS;
+    uint8_t *dgd8 = CONVERT_TO_BYTEPTR(dgd_);
+    uint8_t *src8 = CONVERT_TO_BYTEPTR(src_);
+
+    err_ref = av1_highbd_pixel_proj_error_c(
+        src8, h_end - h_start, v_end - v_start, src_stride, dgd8, dgd_stride,
+        flt0_, flt0_stride, flt1_, flt1_stride, xq, &params);
+
+    err_test = target_func_(src8, h_end - h_start, v_end - v_start, src_stride,
+                            dgd8, dgd_stride, flt0_, flt0_stride, flt1_,
+                            flt1_stride, xq, &params);
+
+    ASSERT_EQ(err_ref, err_test);
+  }
+}
+
+TEST_P(PixelProjHighbdErrorTest, RandomValues) { RunPixelProjErrorTest(1); }
+
+TEST_P(PixelProjHighbdErrorTest, ExtremeValues) {
+  RunPixelProjErrorTest_ExtremeValues();
+}
+
+TEST_P(PixelProjHighbdErrorTest, DISABLED_Speed) {
+  RunPixelProjErrorTest(200000);
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE4_1, PixelProjHighbdErrorTest,
+                         ::testing::Values(av1_highbd_pixel_proj_error_sse4_1));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+
+INSTANTIATE_TEST_SUITE_P(AVX2, PixelProjHighbdErrorTest,
+                         ::testing::Values(av1_highbd_pixel_proj_error_avx2));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+
+INSTANTIATE_TEST_SUITE_P(NEON, PixelProjHighbdErrorTest,
+                         ::testing::Values(av1_highbd_pixel_proj_error_neon));
+#endif  // HAVE_NEON
+
+}  // namespace pickrst_test_highbd
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+////////////////////////////////////////////////////////////////////////////////
+// Get_proj_subspace_Test
+////////////////////////////////////////////////////////////////////////////////
+
+namespace get_proj_subspace_test_lowbd {
+static const int kIterations = 100;
+
+typedef void (*set_get_proj_subspace)(const uint8_t *src8, int width,
+                                      int height, int src_stride,
+                                      const uint8_t *dat8, int dat_stride,
+                                      int32_t *flt0, int flt0_stride,
+                                      int32_t *flt1, int flt1_stride,
+                                      int64_t H[2][2], int64_t C[2],
+                                      const sgr_params_type *params);
+
+typedef std::tuple<const set_get_proj_subspace> GetProjSubspaceTestParam;
+
+class GetProjSubspaceTest
+    : public ::testing::TestWithParam<GetProjSubspaceTestParam> {
+ public:
+  void SetUp() override {
+    target_func_ = GET_PARAM(0);
+    src_ = (uint8_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
+                                  sizeof(*src_)));
+    ASSERT_NE(src_, nullptr);
+    dgd_ = (uint8_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
+                                  sizeof(*dgd_)));
+    ASSERT_NE(dgd_, nullptr);
+    flt0_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
+                                   sizeof(*flt0_)));
+    ASSERT_NE(flt0_, nullptr);
+    flt1_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
+                                   sizeof(*flt1_)));
+    ASSERT_NE(flt1_, nullptr);
+  }
+  void TearDown() override {
+    aom_free(src_);
+    aom_free(dgd_);
+    aom_free(flt0_);
+    aom_free(flt1_);
+  }
+  void RunGetProjSubspaceTest(int32_t run_times);
+  void RunGetProjSubspaceTest_ExtremeValues();
+
+ private:
+  set_get_proj_subspace target_func_;
+  libaom_test::ACMRandom rng_;
+  uint8_t *src_;
+  uint8_t *dgd_;
+  int32_t *flt0_;
+  int32_t *flt1_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GetProjSubspaceTest);
+
+void GetProjSubspaceTest::RunGetProjSubspaceTest(int32_t run_times) {
+  int h_end = run_times != 1
+                  ? 128
+                  : ((rng_.Rand16() % MAX_DATA_BLOCK) &
+                     2147483640);  // We test for widths divisible by 8.
+  int v_end =
+      run_times != 1 ? 128 : ((rng_.Rand16() % MAX_DATA_BLOCK) & 2147483640);
+  const int dgd_stride = MAX_DATA_BLOCK;
+  const int src_stride = MAX_DATA_BLOCK;
+  const int flt0_stride = MAX_DATA_BLOCK;
+  const int flt1_stride = MAX_DATA_BLOCK;
+  sgr_params_type params;
+  const int iters = run_times == 1 ? kIterations : 3;
+  static constexpr int kR0[3] = { 1, 1, 0 };
+  static constexpr int kR1[3] = { 1, 0, 1 };
+  for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
+    int64_t C_ref[2] = { 0 }, C_test[2] = { 0 };
+    int64_t H_ref[2][2] = { { 0, 0 }, { 0, 0 } };
+    int64_t H_test[2][2] = { { 0, 0 }, { 0, 0 } };
+    for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
+      dgd_[i] = rng_.Rand8();
+      src_[i] = rng_.Rand8();
+      flt0_[i] = rng_.Rand15Signed();
+      flt1_[i] = rng_.Rand15Signed();
+    }
+
+    params.r[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : kR0[iter];
+    params.r[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : kR1[iter];
+    uint8_t *dgd = dgd_;
+    uint8_t *src = src_;
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      av1_calc_proj_params_c(src, v_end, h_end, src_stride, dgd, dgd_stride,
+                             flt0_, flt0_stride, flt1_, flt1_stride, H_ref,
+                             C_ref, &params);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      target_func_(src, v_end, h_end, src_stride, dgd, dgd_stride, flt0_,
+                   flt0_stride, flt1_, flt1_stride, H_test, C_test, &params);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    if (run_times > 10) {
+      printf("r0 %d r1 %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", params.r[0],
+             params.r[1], h_end, v_end, time1, time2, time1 / time2);
+    } else {
+      ASSERT_EQ(H_ref[0][0], H_test[0][0]);
+      ASSERT_EQ(H_ref[0][1], H_test[0][1]);
+      ASSERT_EQ(H_ref[1][0], H_test[1][0]);
+      ASSERT_EQ(H_ref[1][1], H_test[1][1]);
+      ASSERT_EQ(C_ref[0], C_test[0]);
+      ASSERT_EQ(C_ref[1], C_test[1]);
+    }
+  }
+}
+
+void GetProjSubspaceTest::RunGetProjSubspaceTest_ExtremeValues() {
+  const int h_start = 0;
+  int h_end = MAX_DATA_BLOCK;
+  const int v_start = 0;
+  int v_end = MAX_DATA_BLOCK;
+  const int dgd_stride = MAX_DATA_BLOCK;
+  const int src_stride = MAX_DATA_BLOCK;
+  const int flt0_stride = MAX_DATA_BLOCK;
+  const int flt1_stride = MAX_DATA_BLOCK;
+  sgr_params_type params;
+  const int iters = kIterations;
+  static constexpr int kR0[3] = { 1, 1, 0 };
+  static constexpr int kR1[3] = { 1, 0, 1 };
+  for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
+    int64_t C_ref[2] = { 0 }, C_test[2] = { 0 };
+    int64_t H_ref[2][2] = { { 0, 0 }, { 0, 0 } };
+    int64_t H_test[2][2] = { { 0, 0 }, { 0, 0 } };
+    for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
+      dgd_[i] = 0;
+      src_[i] = 255;
+      flt0_[i] = rng_.Rand15Signed();
+      flt1_[i] = rng_.Rand15Signed();
+    }
+    params.r[0] = kR0[iter % 3];
+    params.r[1] = kR1[iter % 3];
+    uint8_t *dgd = dgd_;
+    uint8_t *src = src_;
+
+    av1_calc_proj_params_c(src, h_end - h_start, v_end - v_start, src_stride,
+                           dgd, dgd_stride, flt0_, flt0_stride, flt1_,
+                           flt1_stride, H_ref, C_ref, &params);
+
+    target_func_(src, h_end - h_start, v_end - v_start, src_stride, dgd,
+                 dgd_stride, flt0_, flt0_stride, flt1_, flt1_stride, H_test,
+                 C_test, &params);
+
+    ASSERT_EQ(H_ref[0][0], H_test[0][0]);
+    ASSERT_EQ(H_ref[0][1], H_test[0][1]);
+    ASSERT_EQ(H_ref[1][0], H_test[1][0]);
+    ASSERT_EQ(H_ref[1][1], H_test[1][1]);
+    ASSERT_EQ(C_ref[0], C_test[0]);
+    ASSERT_EQ(C_ref[1], C_test[1]);
+  }
+}
+
+TEST_P(GetProjSubspaceTest, RandomValues) { RunGetProjSubspaceTest(1); }
+
+TEST_P(GetProjSubspaceTest, ExtremeValues) {
+  RunGetProjSubspaceTest_ExtremeValues();
+}
+
+TEST_P(GetProjSubspaceTest, DISABLED_Speed) { RunGetProjSubspaceTest(200000); }
+
+#if HAVE_SSE4_1
+
+INSTANTIATE_TEST_SUITE_P(SSE4_1, GetProjSubspaceTest,
+                         ::testing::Values(av1_calc_proj_params_sse4_1));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+
+INSTANTIATE_TEST_SUITE_P(AVX2, GetProjSubspaceTest,
+                         ::testing::Values(av1_calc_proj_params_avx2));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+
+INSTANTIATE_TEST_SUITE_P(NEON, GetProjSubspaceTest,
+                         ::testing::Values(av1_calc_proj_params_neon));
+#endif  // HAVE_NEON
+
+}  // namespace get_proj_subspace_test_lowbd
+
+#if CONFIG_AV1_HIGHBITDEPTH
+namespace get_proj_subspace_test_hbd {
+static const int kIterations = 100;
+
+typedef void (*set_get_proj_subspace_hbd)(const uint8_t *src8, int width,
+                                          int height, int src_stride,
+                                          const uint8_t *dat8, int dat_stride,
+                                          int32_t *flt0, int flt0_stride,
+                                          int32_t *flt1, int flt1_stride,
+                                          int64_t H[2][2], int64_t C[2],
+                                          const sgr_params_type *params);
+
+typedef std::tuple<const set_get_proj_subspace_hbd> GetProjSubspaceHBDTestParam;
+
+class GetProjSubspaceTestHBD
+    : public ::testing::TestWithParam<GetProjSubspaceHBDTestParam> {
+ public:
+  void SetUp() override {
+    target_func_ = GET_PARAM(0);
+    src_ = (uint16_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
+                                   sizeof(*src_)));
+    ASSERT_NE(src_, nullptr);
+    dgd_ = (uint16_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
+                                   sizeof(*dgd_)));
+    ASSERT_NE(dgd_, nullptr);
+    flt0_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
+                                   sizeof(*flt0_)));
+    ASSERT_NE(flt0_, nullptr);
+    flt1_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
+                                   sizeof(*flt1_)));
+    ASSERT_NE(flt1_, nullptr);
+  }
+  void TearDown() override {
+    aom_free(src_);
+    aom_free(dgd_);
+    aom_free(flt0_);
+    aom_free(flt1_);
+  }
+  void RunGetProjSubspaceTestHBD(int32_t run_times);
+  void RunGetProjSubspaceTestHBD_ExtremeValues();
+
+ private:
+  set_get_proj_subspace_hbd target_func_;
+  libaom_test::ACMRandom rng_;
+  uint16_t *src_;
+  uint16_t *dgd_;
+  int32_t *flt0_;
+  int32_t *flt1_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GetProjSubspaceTestHBD);
+
+void GetProjSubspaceTestHBD::RunGetProjSubspaceTestHBD(int32_t run_times) {
+  int h_end = run_times != 1
+                  ? 128
+                  : ((rng_.Rand16() % MAX_DATA_BLOCK) &
+                     2147483640);  // We test for widths divisible by 8.
+  int v_end =
+      run_times != 1 ? 128 : ((rng_.Rand16() % MAX_DATA_BLOCK) & 2147483640);
+  const int dgd_stride = MAX_DATA_BLOCK;
+  const int src_stride = MAX_DATA_BLOCK;
+  const int flt0_stride = MAX_DATA_BLOCK;
+  const int flt1_stride = MAX_DATA_BLOCK;
+  sgr_params_type params;
+  const int iters = run_times == 1 ? kIterations : 3;
+  static constexpr int kR0[3] = { 1, 1, 0 };
+  static constexpr int kR1[3] = { 1, 0, 1 };
+  for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
+    int64_t C_ref[2] = { 0 }, C_test[2] = { 0 };
+    int64_t H_ref[2][2] = { { 0, 0 }, { 0, 0 } };
+    int64_t H_test[2][2] = { { 0, 0 }, { 0, 0 } };
+    for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
+      dgd_[i] = rng_.Rand16() % 4095;
+      src_[i] = rng_.Rand16() % 4095;
+      flt0_[i] = rng_.Rand15Signed();
+      flt1_[i] = rng_.Rand15Signed();
+    }
+
+    params.r[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : kR0[iter];
+    params.r[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : kR1[iter];
+    uint8_t *dgd = CONVERT_TO_BYTEPTR(dgd_);
+    uint8_t *src = CONVERT_TO_BYTEPTR(src_);
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      av1_calc_proj_params_high_bd_c(src, v_end, h_end, src_stride, dgd,
+                                     dgd_stride, flt0_, flt0_stride, flt1_,
+                                     flt1_stride, H_ref, C_ref, &params);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      target_func_(src, v_end, h_end, src_stride, dgd, dgd_stride, flt0_,
+                   flt0_stride, flt1_, flt1_stride, H_test, C_test, &params);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    if (run_times > 10) {
+      printf("r0 %d r1 %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", params.r[0],
+             params.r[1], h_end, v_end, time1, time2, time1 / time2);
+    } else {
+      ASSERT_EQ(H_ref[0][0], H_test[0][0]);
+      ASSERT_EQ(H_ref[0][1], H_test[0][1]);
+      ASSERT_EQ(H_ref[1][0], H_test[1][0]);
+      ASSERT_EQ(H_ref[1][1], H_test[1][1]);
+      ASSERT_EQ(C_ref[0], C_test[0]);
+      ASSERT_EQ(C_ref[1], C_test[1]);
+    }
+  }
+}
+
+void GetProjSubspaceTestHBD::RunGetProjSubspaceTestHBD_ExtremeValues() {
+  const int h_start = 0;
+  int h_end = MAX_DATA_BLOCK;
+  const int v_start = 0;
+  int v_end = MAX_DATA_BLOCK;
+  const int dgd_stride = MAX_DATA_BLOCK;
+  const int src_stride = MAX_DATA_BLOCK;
+  const int flt0_stride = MAX_DATA_BLOCK;
+  const int flt1_stride = MAX_DATA_BLOCK;
+  sgr_params_type params;
+  const int iters = kIterations;
+  static constexpr int kR0[3] = { 1, 1, 0 };
+  static constexpr int kR1[3] = { 1, 0, 1 };
+  for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
+    int64_t C_ref[2] = { 0 }, C_test[2] = { 0 };
+    int64_t H_ref[2][2] = { { 0, 0 }, { 0, 0 } };
+    int64_t H_test[2][2] = { { 0, 0 }, { 0, 0 } };
+    for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
+      dgd_[i] = 0;
+      src_[i] = 4095;
+      flt0_[i] = rng_.Rand15Signed();
+      flt1_[i] = rng_.Rand15Signed();
+    }
+    params.r[0] = kR0[iter % 3];
+    params.r[1] = kR1[iter % 3];
+    uint8_t *dgd = CONVERT_TO_BYTEPTR(dgd_);
+    uint8_t *src = CONVERT_TO_BYTEPTR(src_);
+
+    av1_calc_proj_params_high_bd_c(
+        src, h_end - h_start, v_end - v_start, src_stride, dgd, dgd_stride,
+        flt0_, flt0_stride, flt1_, flt1_stride, H_ref, C_ref, &params);
+
+    target_func_(src, h_end - h_start, v_end - v_start, src_stride, dgd,
+                 dgd_stride, flt0_, flt0_stride, flt1_, flt1_stride, H_test,
+                 C_test, &params);
+
+    ASSERT_EQ(H_ref[0][0], H_test[0][0]);
+    ASSERT_EQ(H_ref[0][1], H_test[0][1]);
+    ASSERT_EQ(H_ref[1][0], H_test[1][0]);
+    ASSERT_EQ(H_ref[1][1], H_test[1][1]);
+    ASSERT_EQ(C_ref[0], C_test[0]);
+    ASSERT_EQ(C_ref[1], C_test[1]);
+  }
+}
+
+TEST_P(GetProjSubspaceTestHBD, RandomValues) { RunGetProjSubspaceTestHBD(1); }
+
+TEST_P(GetProjSubspaceTestHBD, ExtremeValues) {
+  RunGetProjSubspaceTestHBD_ExtremeValues();
+}
+
+TEST_P(GetProjSubspaceTestHBD, DISABLED_Speed) {
+  RunGetProjSubspaceTestHBD(200000);
+}
+
+#if HAVE_SSE4_1
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, GetProjSubspaceTestHBD,
+    ::testing::Values(av1_calc_proj_params_high_bd_sse4_1));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+
+INSTANTIATE_TEST_SUITE_P(AVX2, GetProjSubspaceTestHBD,
+                         ::testing::Values(av1_calc_proj_params_high_bd_avx2));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+
+INSTANTIATE_TEST_SUITE_P(NEON, GetProjSubspaceTestHBD,
+                         ::testing::Values(av1_calc_proj_params_high_bd_neon));
+#endif  // HAVE_NEON
+}  // namespace get_proj_subspace_test_hbd
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
diff --git a/third_party/aom/test/postproc_filters_test.cc b/third_party/aom/test/postproc_filters_test.cc
new file mode 100644
index 0000000000..9584dd8c35
--- /dev/null
+++ b/third_party/aom/test/postproc_filters_test.cc
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+
+class PostprocFiltersTest
+    : public ::libaom_test::CodecTestWith2Params<int, unsigned int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  PostprocFiltersTest()
+      : EncoderTest(GET_PARAM(0)), set_skip_postproc_filtering_(false),
+        frame_number_(0), cpu_used_(GET_PARAM(1)), bd_(GET_PARAM(2)) {}
+
+  void SetUp() override {
+    InitializeConfig(::libaom_test::kAllIntra);
+    cfg_.g_input_bit_depth = bd_;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    frame_number_ = video->frame();
+    if (frame_number_ == 0) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel);
+    }
+    if (set_skip_postproc_filtering_) {
+      if (frame_number_ == 0) {
+        encoder->Control(AV1E_SET_SKIP_POSTPROC_FILTERING, 1);
+      } else if (frame_number_ == 10) {
+        encoder->Control(AV1E_SET_SKIP_POSTPROC_FILTERING, 0);
+      } else if (frame_number_ == 20) {
+        encoder->Control(AV1E_SET_SKIP_POSTPROC_FILTERING, 1);
+      }
+    }
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    ::libaom_test::MD5 md5_enc;
+    md5_enc.Add(reinterpret_cast<uint8_t *>(pkt->data.frame.buf),
+                pkt->data.frame.sz);
+    md5_enc_.push_back(md5_enc.Get());
+  }
+
+  void PostEncodeFrameHook(::libaom_test::Encoder *encoder) override {
+    const aom_image_t *img_enc = encoder->GetPreviewFrame();
+    if (!set_skip_postproc_filtering_) {
+      ASSERT_NE(img_enc, nullptr);
+    } else {
+      // Null will be returned if we query the reconstructed frame when
+      // AV1E_SET_SKIP_POSTPROC_FILTERING is set to 1.
+      if (frame_number_ < 10) {
+        ASSERT_EQ(img_enc, nullptr);
+      } else if (frame_number_ < 20) {
+        // Reconstructed frame cannot be null when
+        // AV1E_SET_SKIP_POSTPROC_FILTERING is set to 0.
+        ASSERT_NE(img_enc, nullptr);
+      } else {
+        ASSERT_EQ(img_enc, nullptr);
+      }
+    }
+  }
+
+  // The encoder config flag 'AV1E_SET_SKIP_POSTPROC_FILTERING' can be used to
+  // skip the application of post-processing filters on reconstructed frame for
+  // ALLINTRA encode. This unit-test validates the bit exactness of 2 encoded
+  // streams with 'AV1E_SET_SKIP_POSTPROC_FILTERING':
+  // 1. disabled for all frames (default case)
+  // 2. enabled and disabled at different frame indices using control calls.
+  void DoTest() {
+    std::unique_ptr<libaom_test::VideoSource> video(
+        new libaom_test::YUVVideoSource("niklas_640_480_30.yuv",
+                                        AOM_IMG_FMT_I420, 640, 480, 30, 1, 0,
+                                        kFrames));
+    ASSERT_NE(video, nullptr);
+
+    // First encode: 'AV1E_SET_SKIP_POSTPROC_FILTERING' disabled for all frames
+    // (default case).
+    set_skip_postproc_filtering_ = false;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    std::vector<std::string> apply_postproc_filters_md5_enc =
+        std::move(md5_enc_);
+    md5_enc_.clear();
+
+    // Second encode: 'AV1E_SET_SKIP_POSTPROC_FILTERING' enabled and disabled at
+    // different frame intervals.
+    set_skip_postproc_filtering_ = true;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    std::vector<std::string> toggle_apply_postproc_filters_md5_enc =
+        std::move(md5_enc_);
+    md5_enc_.clear();
+
+    // Check for bit match.
+    ASSERT_EQ(apply_postproc_filters_md5_enc,
+              toggle_apply_postproc_filters_md5_enc);
+  }
+
+  bool set_skip_postproc_filtering_;
+  unsigned int frame_number_;
+  std::vector<std::string> md5_enc_;
+
+ private:
+  static constexpr int kFrames = 30;
+  static constexpr unsigned int kCqLevel = 18;
+  int cpu_used_;
+  unsigned int bd_;
+};
+
+class PostprocFiltersTestLarge : public PostprocFiltersTest {};
+
+TEST_P(PostprocFiltersTest, MD5Match) { DoTest(); }
+
+TEST_P(PostprocFiltersTestLarge, MD5Match) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_SUITE(PostprocFiltersTest, ::testing::Values(9),
+                           ::testing::Values(8, 10));
+
+// Test cpu_used 3 and 6.
+AV1_INSTANTIATE_TEST_SUITE(PostprocFiltersTestLarge, ::testing::Values(3, 6),
+                           ::testing::Values(8, 10));
+
+}  // namespace
diff --git a/third_party/aom/test/quant_test.cc b/third_party/aom/test/quant_test.cc
new file mode 100644
index 0000000000..afbabb3147
--- /dev/null
+++ b/third_party/aom/test/quant_test.cc
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include "config/aom_config.h"
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "av1/encoder/av1_quantize.h"
+#include "test/y4m_video_source.h"
+
+namespace {
+
+const ::libaom_test::TestMode kTestMode[] =
+#if CONFIG_REALTIME_ONLY
+    { ::libaom_test::kRealTime };
+#else
+    { ::libaom_test::kRealTime, ::libaom_test::kOnePassGood };
+#endif
+
+class QMTest
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  QMTest() : EncoderTest(GET_PARAM(0)) {}
+  ~QMTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(GET_PARAM(1));
+    set_cpu_used_ = GET_PARAM(2);
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AV1E_SET_ENABLE_QM, 1);
+      encoder->Control(AV1E_SET_QM_MIN, qm_min_);
+      encoder->Control(AV1E_SET_QM_MAX, qm_max_);
+
+      encoder->Control(AOME_SET_MAX_INTRA_BITRATE_PCT, 100);
+      if (mode_ == ::libaom_test::kRealTime) {
+        encoder->Control(AV1E_SET_ALLOW_WARPED_MOTION, 0);
+        encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
+        encoder->Control(AV1E_SET_ENABLE_OBMC, 0);
+      }
+    }
+  }
+
+  void DoTest(int qm_min, int qm_max) {
+    qm_min_ = qm_min;
+    qm_max_ = qm_max;
+    cfg_.kf_max_dist = 12;
+    cfg_.rc_min_quantizer = 8;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 6;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_target_bitrate = 300;
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 15);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  int set_cpu_used_;
+  int qm_min_;
+  int qm_max_;
+};
+
+// encodes and decodes without a mismatch.
+TEST_P(QMTest, TestNoMisMatchQM1) { DoTest(5, 9); }
+
+// encodes and decodes without a mismatch.
+TEST_P(QMTest, TestNoMisMatchQM2) { DoTest(0, 8); }
+
+// encodes and decodes without a mismatch.
+TEST_P(QMTest, TestNoMisMatchQM3) { DoTest(9, 15); }
+
+AV1_INSTANTIATE_TEST_SUITE(QMTest, ::testing::ValuesIn(kTestMode),
+                           ::testing::Range(5, 9));
+
+#if !CONFIG_REALTIME_ONLY
+typedef struct {
+  const unsigned int min_q;
+  const unsigned int max_q;
+} QuantParam;
+
+const QuantParam QuantTestParams[] = {
+  { 0, 10 }, { 0, 60 }, { 20, 35 }, { 35, 50 }, { 50, 63 }
+};
+
+std::ostream &operator<<(std::ostream &os, const QuantParam &test_arg) {
+  return os << "QuantParam { min_q:" << test_arg.min_q
+            << " max_q:" << test_arg.max_q << " }";
+}
+
+/*
+ * This class is used to test whether base_qindex is within min
+ * and max quantizer range configured by user.
+ */
+class QuantizerBoundsCheckTestLarge
+    : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode,
+                                                 QuantParam, aom_rc_mode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  QuantizerBoundsCheckTestLarge()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        quant_param_(GET_PARAM(2)), rc_end_usage_(GET_PARAM(3)) {
+    quant_bound_violated_ = false;
+  }
+  ~QuantizerBoundsCheckTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = rc_end_usage_;
+    cfg_.g_threads = 1;
+    cfg_.rc_min_quantizer = quant_param_.min_q;
+    cfg_.rc_max_quantizer = quant_param_.max_q;
+    cfg_.g_lag_in_frames = 35;
+    if (rc_end_usage_ != AOM_Q) {
+      cfg_.rc_target_bitrate = 400;
+    }
+  }
+
+  bool DoDecode() const override { return true; }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 5);
+    }
+  }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    if (AOM_CODEC_OK == res_dec) {
+      aom_codec_ctx_t *ctx_dec = decoder->GetDecoder();
+      AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_LAST_QUANTIZER,
+                                    &base_qindex_);
+      min_bound_qindex_ = av1_quantizer_to_qindex(cfg_.rc_min_quantizer);
+      max_bound_qindex_ = av1_quantizer_to_qindex(cfg_.rc_max_quantizer);
+      if ((base_qindex_ < min_bound_qindex_ ||
+           base_qindex_ > max_bound_qindex_) &&
+          quant_bound_violated_ == false) {
+        quant_bound_violated_ = true;
+      }
+    }
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  ::libaom_test::TestMode encoding_mode_;
+  const QuantParam quant_param_;
+  int base_qindex_;
+  int min_bound_qindex_;
+  int max_bound_qindex_;
+  bool quant_bound_violated_;
+  aom_rc_mode rc_end_usage_;
+};
+
+TEST_P(QuantizerBoundsCheckTestLarge, QuantizerBoundsCheckEncodeTest) {
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 50);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(quant_bound_violated_, false);
+}
+
+AV1_INSTANTIATE_TEST_SUITE(QuantizerBoundsCheckTestLarge,
+                           ::testing::Values(::libaom_test::kOnePassGood,
+                                             ::libaom_test::kTwoPassGood),
+                           ::testing::ValuesIn(QuantTestParams),
+                           ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ));
+#endif  // !CONFIG_REALTIME_ONLY
+}  // namespace
diff --git a/third_party/aom/test/quantize_func_test.cc b/third_party/aom/test/quantize_func_test.cc
new file mode 100644
index 0000000000..328d5b10df
--- /dev/null
+++ b/third_party/aom/test/quantize_func_test.cc
@@ -0,0 +1,795 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <algorithm>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_codec.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/encoder/encoder.h"
+#include "av1/common/scan.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+using libaom_test::ACMRandom;
+
+#define QUAN_PARAM_LIST                                                       \
+  const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,    \
+      const int16_t *round_ptr, const int16_t *quant_ptr,                     \
+      const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,                 \
+      tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, \
+      const int16_t *scan, const int16_t *iscan
+
+#define LP_QUANTIZE_PARAM_LIST                                             \
+  const int16_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr,   \
+      const int16_t *quant_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, \
+      const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan,  \
+      const int16_t *iscan
+
+typedef void (*LPQuantizeFunc)(LP_QUANTIZE_PARAM_LIST);
+typedef void (*QuantizeFunc)(QUAN_PARAM_LIST);
+typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST, int log_scale);
+
+#undef LP_QUANTIZE_PARAM_LIST
+
+#define HBD_QUAN_FUNC                                                      \
+  fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
+     qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, log_scale)
+
+#define LBD_QUAN_FUNC                                                      \
+  fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
+     qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan)
+
+template <QuantizeFuncHbd fn>
+void highbd_quan16x16_wrapper(QUAN_PARAM_LIST) {
+  const int log_scale = 0;
+  HBD_QUAN_FUNC;
+}
+
+template <QuantizeFuncHbd fn>
+void highbd_quan32x32_wrapper(QUAN_PARAM_LIST) {
+  const int log_scale = 1;
+  HBD_QUAN_FUNC;
+}
+
+template <QuantizeFuncHbd fn>
+void highbd_quan64x64_wrapper(QUAN_PARAM_LIST) {
+  const int log_scale = 2;
+  HBD_QUAN_FUNC;
+}
+
+enum QuantType { TYPE_B, TYPE_DC, TYPE_FP };
+
+using std::tuple;
+
+template <typename FuncType>
+using QuantizeParam =
+    tuple<FuncType, FuncType, TX_SIZE, QuantType, aom_bit_depth_t>;
+
+typedef struct {
+  QUANTS quant;
+  Dequants dequant;
+} QuanTable;
+
+const int kTestNum = 1000;
+
+#define GET_TEMPLATE_PARAM(k) std::get<k>(this->GetParam())
+
+template <typename CoeffType, typename FuncType>
+class QuantizeTestBase
+    : public ::testing::TestWithParam<QuantizeParam<FuncType>> {
+ protected:
+  QuantizeTestBase()
+      : quant_ref_(GET_TEMPLATE_PARAM(0)), quant_(GET_TEMPLATE_PARAM(1)),
+        tx_size_(GET_TEMPLATE_PARAM(2)), type_(GET_TEMPLATE_PARAM(3)),
+        bd_(GET_TEMPLATE_PARAM(4)) {}
+
+  ~QuantizeTestBase() override = default;
+
+  void SetUp() override {
+    qtab_ = reinterpret_cast<QuanTable *>(aom_memalign(32, sizeof(*qtab_)));
+    ASSERT_NE(qtab_, nullptr);
+    const int n_coeffs = coeff_num();
+    coeff_ = reinterpret_cast<CoeffType *>(
+        aom_memalign(32, 6 * n_coeffs * sizeof(CoeffType)));
+    ASSERT_NE(coeff_, nullptr);
+    InitQuantizer();
+  }
+
+  void TearDown() override {
+    aom_free(qtab_);
+    qtab_ = nullptr;
+    aom_free(coeff_);
+    coeff_ = nullptr;
+  }
+
+  void InitQuantizer() {
+    av1_build_quantizer(bd_, 0, 0, 0, 0, 0, &qtab_->quant, &qtab_->dequant);
+  }
+
+  virtual void RunQuantizeFunc(
+      const CoeffType *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+      const int16_t *round_ptr, const int16_t *quant_ptr,
+      const int16_t *quant_shift_ptr, CoeffType *qcoeff_ptr,
+      CoeffType *qcoeff_ref_ptr, CoeffType *dqcoeff_ptr,
+      CoeffType *dqcoeff_ref_ptr, const int16_t *dequant_ptr,
+      uint16_t *eob_ref_ptr, uint16_t *eob_ptr, const int16_t *scan,
+      const int16_t *iscan) = 0;
+
+  void QuantizeRun(bool is_loop, int q = 0, int test_num = 1) {
+    CoeffType *coeff_ptr = coeff_;
+    const intptr_t n_coeffs = coeff_num();
+
+    CoeffType *qcoeff_ref = coeff_ptr + n_coeffs;
+    CoeffType *dqcoeff_ref = qcoeff_ref + n_coeffs;
+
+    CoeffType *qcoeff = dqcoeff_ref + n_coeffs;
+    CoeffType *dqcoeff = qcoeff + n_coeffs;
+    uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);
+
+    // Testing uses 2-D DCT scan order table
+    const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);
+
+    // Testing uses luminance quantization table
+    const int16_t *zbin = qtab_->quant.y_zbin[q];
+
+    const int16_t *round = nullptr;
+    const int16_t *quant = nullptr;
+    if (type_ == TYPE_B) {
+      round = qtab_->quant.y_round[q];
+      quant = qtab_->quant.y_quant[q];
+    } else if (type_ == TYPE_FP) {
+      round = qtab_->quant.y_round_fp[q];
+      quant = qtab_->quant.y_quant_fp[q];
+    }
+
+    const int16_t *quant_shift = qtab_->quant.y_quant_shift[q];
+    const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];
+
+    for (int i = 0; i < test_num; ++i) {
+      if (is_loop) FillCoeffRandom();
+
+      memset(qcoeff_ref, 0, 5 * n_coeffs * sizeof(*qcoeff_ref));
+
+      RunQuantizeFunc(coeff_ptr, n_coeffs, zbin, round, quant, quant_shift,
+                      qcoeff, qcoeff_ref, dqcoeff, dqcoeff_ref, dequant,
+                      &eob[0], &eob[1], sc->scan, sc->iscan);
+
+      for (int j = 0; j < n_coeffs; ++j) {
+        ASSERT_EQ(qcoeff_ref[j], qcoeff[j])
+            << "Q mismatch on test: " << i << " at position: " << j
+            << " Q: " << q << " coeff: " << coeff_ptr[j];
+      }
+
+      for (int j = 0; j < n_coeffs; ++j) {
+        ASSERT_EQ(dqcoeff_ref[j], dqcoeff[j])
+            << "Dq mismatch on test: " << i << " at position: " << j
+            << " Q: " << q << " coeff: " << coeff_ptr[j];
+      }
+
+      ASSERT_EQ(eob[0], eob[1])
+          << "eobs mismatch on test: " << i << " Q: " << q;
+    }
+  }
+
+  void CompareResults(const CoeffType *buf_ref, const CoeffType *buf, int size,
+                      const char *text, int q, int number) {
+    int i;
+    for (i = 0; i < size; ++i) {
+      ASSERT_EQ(buf_ref[i], buf[i]) << text << " mismatch on test: " << number
+                                    << " at position: " << i << " Q: " << q;
+    }
+  }
+
+  int coeff_num() const { return av1_get_max_eob(tx_size_); }
+
+  void FillCoeff(CoeffType c) {
+    const int n_coeffs = coeff_num();
+    for (int i = 0; i < n_coeffs; ++i) {
+      coeff_[i] = c;
+    }
+  }
+
+  void FillCoeffRandom() {
+    const int n_coeffs = coeff_num();
+    FillCoeffZero();
+    const int num = rnd_.Rand16() % n_coeffs;
+    // Randomize the first non zero coeff position.
+    const int start = rnd_.Rand16() % n_coeffs;
+    const int end = std::min(start + num, n_coeffs);
+    for (int i = start; i < end; ++i) {
+      coeff_[i] = GetRandomCoeff();
+    }
+  }
+
+  void FillCoeffRandomRows(int num) {
+    FillCoeffZero();
+    for (int i = 0; i < num; ++i) {
+      coeff_[i] = GetRandomCoeff();
+    }
+  }
+
+  void FillCoeffZero() { FillCoeff(0); }
+
+  void FillCoeffConstant() {
+    CoeffType c = GetRandomCoeff();
+    FillCoeff(c);
+  }
+
+  void FillDcOnly() {
+    FillCoeffZero();
+    coeff_[0] = GetRandomCoeff();
+  }
+
+  void FillDcLargeNegative() {
+    FillCoeffZero();
+    // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues
+    // like BUG=883 where the constant being compared was incorrectly
+    // initialized.
+    coeff_[0] = -8191;
+  }
+
+  CoeffType GetRandomCoeff() {
+    CoeffType coeff;
+    if (bd_ == AOM_BITS_8) {
+      coeff =
+          clamp(static_cast<int16_t>(rnd_.Rand16()), INT16_MIN + 1, INT16_MAX);
+    } else {
+      CoeffType min = -(1 << (7 + bd_));
+      CoeffType max = -min - 1;
+      coeff = clamp(static_cast<CoeffType>(rnd_.Rand31()), min, max);
+    }
+    return coeff;
+  }
+
+  ACMRandom rnd_;
+  QuanTable *qtab_;
+  CoeffType *coeff_;
+  FuncType quant_ref_;
+  FuncType quant_;
+  TX_SIZE tx_size_;
+  QuantType type_;
+  aom_bit_depth_t bd_;
+};
+
+class FullPrecisionQuantizeTest
+    : public QuantizeTestBase<tran_low_t, QuantizeFunc> {
+  void RunQuantizeFunc(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                       const int16_t *zbin_ptr, const int16_t *round_ptr,
+                       const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+                       tran_low_t *qcoeff_ptr, tran_low_t *qcoeff_ref_ptr,
+                       tran_low_t *dqcoeff_ptr, tran_low_t *dqcoeff_ref_ptr,
+                       const int16_t *dequant_ptr, uint16_t *eob_ref_ptr,
+                       uint16_t *eob_ptr, const int16_t *scan,
+                       const int16_t *iscan) override {
+    quant_ref_(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
+               quant_shift_ptr, qcoeff_ref_ptr, dqcoeff_ref_ptr, dequant_ptr,
+               eob_ref_ptr, scan, iscan);
+
+    API_REGISTER_STATE_CHECK(quant_(
+        coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
+        qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan));
+  }
+};
+
+class LowPrecisionQuantizeTest
+    : public QuantizeTestBase<int16_t, LPQuantizeFunc> {
+  void RunQuantizeFunc(const int16_t *coeff_ptr, intptr_t n_coeffs,
+                       const int16_t * /*zbin_ptr*/, const int16_t *round_ptr,
+                       const int16_t *quant_ptr,
+                       const int16_t * /*quant_shift_ptr*/, int16_t *qcoeff_ptr,
+                       int16_t *qcoeff_ref_ptr, int16_t *dqcoeff_ptr,
+                       int16_t *dqcoeff_ref_ptr, const int16_t *dequant_ptr,
+                       uint16_t *eob_ref_ptr, uint16_t *eob_ptr,
+                       const int16_t *scan, const int16_t *iscan) override {
+    quant_ref_(coeff_ptr, n_coeffs, round_ptr, quant_ptr, qcoeff_ref_ptr,
+               dqcoeff_ref_ptr, dequant_ptr, eob_ref_ptr, scan, iscan);
+
+    API_REGISTER_STATE_CHECK(quant_(coeff_ptr, n_coeffs, round_ptr, quant_ptr,
+                                    qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
+                                    eob_ptr, scan, iscan));
+  }
+};
+
+TEST_P(FullPrecisionQuantizeTest, ZeroInput) {
+  FillCoeffZero();
+  QuantizeRun(false);
+}
+
+TEST_P(FullPrecisionQuantizeTest, LargeNegativeInput) {
+  FillDcLargeNegative();
+  QuantizeRun(false, 0, 1);
+}
+
+TEST_P(FullPrecisionQuantizeTest, DcOnlyInput) {
+  FillDcOnly();
+  QuantizeRun(false, 0, 1);
+}
+
+TEST_P(FullPrecisionQuantizeTest, RandomInput) {
+  QuantizeRun(true, 0, kTestNum);
+}
+
+TEST_P(FullPrecisionQuantizeTest, MultipleQ) {
+  for (int q = 0; q < QINDEX_RANGE; ++q) {
+    QuantizeRun(true, q, kTestNum);
+  }
+}
+
+// Force the coeff to be half the value of the dequant.  This exposes a
+// mismatch found in av1_quantize_fp_sse2().
+TEST_P(FullPrecisionQuantizeTest, CoeffHalfDequant) {
+  FillCoeff(16);
+  QuantizeRun(false, 25, 1);
+}
+
+TEST_P(FullPrecisionQuantizeTest, DISABLED_Speed) {
+  tran_low_t *coeff_ptr = coeff_;
+  const intptr_t n_coeffs = coeff_num();
+
+  tran_low_t *qcoeff_ref = coeff_ptr + n_coeffs;
+  tran_low_t *dqcoeff_ref = qcoeff_ref + n_coeffs;
+
+  tran_low_t *qcoeff = dqcoeff_ref + n_coeffs;
+  tran_low_t *dqcoeff = qcoeff + n_coeffs;
+  uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);
+
+  // Testing uses 2-D DCT scan order table
+  const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);
+
+  // Testing uses luminance quantization table
+  const int q = 22;
+  const int16_t *zbin = qtab_->quant.y_zbin[q];
+  const int16_t *round_fp = qtab_->quant.y_round_fp[q];
+  const int16_t *quant_fp = qtab_->quant.y_quant_fp[q];
+  const int16_t *quant_shift = qtab_->quant.y_quant_shift[q];
+  const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];
+  const int kNumTests = 5000000;
+  aom_usec_timer timer, simd_timer;
+  int rows = tx_size_high[tx_size_];
+  int cols = tx_size_wide[tx_size_];
+  rows = AOMMIN(32, rows);
+  cols = AOMMIN(32, cols);
+  for (int cnt = 0; cnt <= rows; cnt++) {
+    FillCoeffRandomRows(cnt * cols);
+
+    aom_usec_timer_start(&timer);
+    for (int n = 0; n < kNumTests; ++n) {
+      quant_ref_(coeff_ptr, n_coeffs, zbin, round_fp, quant_fp, quant_shift,
+                 qcoeff, dqcoeff, dequant, eob, sc->scan, sc->iscan);
+    }
+    aom_usec_timer_mark(&timer);
+
+    aom_usec_timer_start(&simd_timer);
+    for (int n = 0; n < kNumTests; ++n) {
+      quant_(coeff_ptr, n_coeffs, zbin, round_fp, quant_fp, quant_shift, qcoeff,
+             dqcoeff, dequant, eob, sc->scan, sc->iscan);
+    }
+    aom_usec_timer_mark(&simd_timer);
+
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    const int simd_elapsed_time =
+        static_cast<int>(aom_usec_timer_elapsed(&simd_timer));
+    printf("c_time = %d \t simd_time = %d \t Gain = %f \n", elapsed_time,
+           simd_elapsed_time, ((float)elapsed_time / simd_elapsed_time));
+  }
+}
+
+// TODO(crbug.com/aomedia/2796)
+TEST_P(LowPrecisionQuantizeTest, ZeroInput) {
+  FillCoeffZero();
+  QuantizeRun(false);
+}
+
+TEST_P(LowPrecisionQuantizeTest, LargeNegativeInput) {
+  FillDcLargeNegative();
+  QuantizeRun(false, 0, 1);
+}
+
+TEST_P(LowPrecisionQuantizeTest, DcOnlyInput) {
+  FillDcOnly();
+  QuantizeRun(false, 0, 1);
+}
+
+TEST_P(LowPrecisionQuantizeTest, RandomInput) {
+  QuantizeRun(true, 0, kTestNum);
+}
+
+TEST_P(LowPrecisionQuantizeTest, MultipleQ) {
+  for (int q = 0; q < QINDEX_RANGE; ++q) {
+    QuantizeRun(true, q, kTestNum);
+  }
+}
+
+// Force the coeff to be half the value of the dequant.  This exposes a
+// mismatch found in av1_quantize_fp_sse2().
+TEST_P(LowPrecisionQuantizeTest, CoeffHalfDequant) {
+  FillCoeff(16);
+  QuantizeRun(false, 25, 1);
+}
+
+TEST_P(LowPrecisionQuantizeTest, DISABLED_Speed) {
+  int16_t *coeff_ptr = coeff_;
+  const intptr_t n_coeffs = coeff_num();
+
+  int16_t *qcoeff_ref = coeff_ptr + n_coeffs;
+  int16_t *dqcoeff_ref = qcoeff_ref + n_coeffs;
+
+  int16_t *qcoeff = dqcoeff_ref + n_coeffs;
+  int16_t *dqcoeff = qcoeff + n_coeffs;
+  uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);
+
+  // Testing uses 2-D DCT scan order table
+  const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);
+
+  // Testing uses luminance quantization table
+  const int q = 22;
+  const int16_t *round_fp = qtab_->quant.y_round_fp[q];
+  const int16_t *quant_fp = qtab_->quant.y_quant_fp[q];
+  const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];
+  const int kNumTests = 5000000;
+  aom_usec_timer timer, simd_timer;
+  int rows = tx_size_high[tx_size_];
+  int cols = tx_size_wide[tx_size_];
+  rows = AOMMIN(32, rows);
+  cols = AOMMIN(32, cols);
+  for (int cnt = 0; cnt <= rows; cnt++) {
+    FillCoeffRandomRows(cnt * cols);
+
+    aom_usec_timer_start(&timer);
+    for (int n = 0; n < kNumTests; ++n) {
+      quant_ref_(coeff_ptr, n_coeffs, round_fp, quant_fp, qcoeff, dqcoeff,
+                 dequant, eob, sc->scan, sc->iscan);
+    }
+    aom_usec_timer_mark(&timer);
+
+    aom_usec_timer_start(&simd_timer);
+    for (int n = 0; n < kNumTests; ++n) {
+      quant_(coeff_ptr, n_coeffs, round_fp, quant_fp, qcoeff, dqcoeff, dequant,
+             eob, sc->scan, sc->iscan);
+    }
+    aom_usec_timer_mark(&simd_timer);
+
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    const int simd_elapsed_time =
+        static_cast<int>(aom_usec_timer_elapsed(&simd_timer));
+    printf("c_time = %d \t simd_time = %d \t Gain = %f \n", elapsed_time,
+           simd_elapsed_time, ((float)elapsed_time / simd_elapsed_time));
+  }
+}
+
+using std::make_tuple;
+
+#if HAVE_AVX2
+
+const QuantizeParam<LPQuantizeFunc> kLPQParamArrayAvx2[] = {
+  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8)
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, LowPrecisionQuantizeTest,
+                         ::testing::ValuesIn(kLPQParamArrayAvx2));
+
+const QuantizeParam<QuantizeFunc> kQParamArrayAvx2[] = {
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,
+             static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,
+             static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,
+             static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,
+             static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2,
+             static_cast<TX_SIZE>(TX_16X64), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2,
+             static_cast<TX_SIZE>(TX_64X16), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_avx2,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
+             &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
+  make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
+             &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_10),
+  make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
+             &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_12),
+  make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
+             &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8),
+  make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
+             &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_10),
+  make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
+             &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_12),
+  make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
+             &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8),
+  make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
+             &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_10),
+  make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
+             &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_12),
+  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10),
+  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),
+  make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_avx2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),
+  make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_avx2,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_highbd_quantize_b_adaptive_c,
+             &aom_highbd_quantize_b_adaptive_avx2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_highbd_quantize_b_adaptive_c,
+             &aom_highbd_quantize_b_adaptive_avx2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10),
+  make_tuple(&aom_highbd_quantize_b_adaptive_c,
+             &aom_highbd_quantize_b_adaptive_avx2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),
+  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,
+             &aom_highbd_quantize_b_32x32_adaptive_avx2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,
+             &aom_highbd_quantize_b_32x32_adaptive_avx2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10),
+  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,
+             &aom_highbd_quantize_b_32x32_adaptive_avx2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),
+#endif  // !CONFIG_REALTIME_ONLY
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2,
+             static_cast<TX_SIZE>(TX_8X8), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2,
+             static_cast<TX_SIZE>(TX_4X4), TYPE_B, AOM_BITS_8),
+#endif  // !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_quantize_b_c, &aom_quantize_b_avx2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_avx2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_avx2,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8),
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, FullPrecisionQuantizeTest,
+                         ::testing::ValuesIn(kQParamArrayAvx2));
+#endif  // HAVE_AVX2
+
+#if HAVE_SSE2
+
+const QuantizeParam<LPQuantizeFunc> kLPQParamArraySSE2[] = {
+  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2,
+             static_cast<TX_SIZE>(TX_8X8), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2,
+             static_cast<TX_SIZE>(TX_4X4), TYPE_FP, AOM_BITS_8)
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, LowPrecisionQuantizeTest,
+                         ::testing::ValuesIn(kLPQParamArraySSE2));
+
+const QuantizeParam<QuantizeFunc> kQParamArraySSE2[] = {
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,
+             static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,
+             static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,
+             static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,
+             static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_c, &aom_quantize_b_sse2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10),
+  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_highbd_quantize_b_adaptive_c,
+             &aom_highbd_quantize_b_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_highbd_quantize_b_adaptive_c,
+             &aom_highbd_quantize_b_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10),
+  make_tuple(&aom_highbd_quantize_b_adaptive_c,
+             &aom_highbd_quantize_b_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),
+  make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10),
+  make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),
+  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,
+             &aom_highbd_quantize_b_32x32_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,
+             &aom_highbd_quantize_b_32x32_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10),
+  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,
+             &aom_highbd_quantize_b_32x32_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),
+#endif  // !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_10),
+  make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c,
+             &aom_highbd_quantize_b_64x64_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c,
+             &aom_highbd_quantize_b_64x64_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_10),
+  make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c,
+             &aom_highbd_quantize_b_64x64_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12),
+#endif  // !CONFIG_REALTIME_ONLY
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_8X8), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_4X4), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_32x32_adaptive_c,
+             &aom_quantize_b_32x32_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_32X16), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_32x32_adaptive_c,
+             &aom_quantize_b_32x32_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_16X32), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_32x32_adaptive_c,
+             &aom_quantize_b_32x32_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_64x64_adaptive_c,
+             &aom_quantize_b_64x64_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_32X64), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_64x64_adaptive_c,
+             &aom_quantize_b_64x64_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_64X32), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_64x64_adaptive_c,
+             &aom_quantize_b_64x64_adaptive_sse2,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8)
+#endif  // !CONFIG_REALTIME_ONLY
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, FullPrecisionQuantizeTest,
+                         ::testing::ValuesIn(kQParamArraySSE2));
+#endif
+
+#if HAVE_NEON
+
+const QuantizeParam<LPQuantizeFunc> kLPQParamArrayNEON[] = {
+  make_tuple(av1_quantize_lp_c, av1_quantize_lp_neon,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
+  make_tuple(av1_quantize_lp_c, av1_quantize_lp_neon,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8),
+  make_tuple(av1_quantize_lp_c, av1_quantize_lp_neon,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8)
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, LowPrecisionQuantizeTest,
+                         ::testing::ValuesIn(kLPQParamArrayNEON));
+
+const QuantizeParam<QuantizeFunc> kQParamArrayNEON[] = {
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+             static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+             static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+             static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+             static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_neon,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_neon,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_c, &aom_quantize_b_neon,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_neon,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
+  make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_neon,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8),
+
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
+             &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_neon>,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_12),
+  make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
+             &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_neon>,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_12),
+  make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
+             &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_neon>,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_12),
+  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_neon,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),
+  make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_neon,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),
+  make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_neon,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(&aom_highbd_quantize_b_adaptive_c,
+             &aom_highbd_quantize_b_adaptive_neon,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),
+  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,
+             &aom_highbd_quantize_b_32x32_adaptive_neon,
+             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),
+  make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c,
+             &aom_highbd_quantize_b_64x64_adaptive_neon,
+             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12),
+#endif  // !CONFIG_REALTIME_ONLY
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, FullPrecisionQuantizeTest,
+                         ::testing::ValuesIn(kQParamArrayNEON));
+#endif
+
+#if HAVE_SSSE3 && AOM_ARCH_X86_64
+INSTANTIATE_TEST_SUITE_P(
+    SSSE3, FullPrecisionQuantizeTest,
+    ::testing::Values(
+        make_tuple(&aom_quantize_b_c, &aom_quantize_b_ssse3,
+                   static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
+        make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_ssse3,
+                   static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
+        make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_ssse3,
+                   static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8)));
+
+#endif  // HAVE_SSSE3 && AOM_ARCH_X86_64
+
+#if HAVE_AVX
+INSTANTIATE_TEST_SUITE_P(
+    AVX, FullPrecisionQuantizeTest,
+    ::testing::Values(
+        make_tuple(&aom_quantize_b_c, &aom_quantize_b_avx,
+                   static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
+        make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_avx,
+                   static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8)));
+
+#endif  // HAVE_AVX
+
+}  // namespace
diff --git a/third_party/aom/test/ratectrl_rtc_test.cc b/third_party/aom/test/ratectrl_rtc_test.cc
new file mode 100644
index 0000000000..cc054b6926
--- /dev/null
+++ b/third_party/aom/test/ratectrl_rtc_test.cc
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "av1/ratectrl_rtc.h"
+
+#include <memory>
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/i420_video_source.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+constexpr size_t kNumFrames = 450;
+
+const int kTemporalId3Layer[4] = { 0, 2, 1, 2 };
+const int kTemporalId2Layer[2] = { 0, 1 };
+const int kTemporalRateAllocation3Layer[3] = { 50, 70, 100 };
+const int kTemporalRateAllocation2Layer[2] = { 60, 100 };
+const int kSpatialLayerBitrate[3] = { 200, 500, 900 };
+
+// Parameter: aq mode: 0 and 3
+class RcInterfaceTest : public ::libaom_test::EncoderTest,
+                        public ::libaom_test::CodecTestWithParam<int> {
+ public:
+  RcInterfaceTest()
+      : EncoderTest(GET_PARAM(0)), aq_mode_(GET_PARAM(1)), key_interval_(3000),
+        encoder_exit_(false), layer_frame_cnt_(0), superframe_cnt_(0),
+        frame_cnt_(0), dynamic_temporal_layers_(false),
+        dynamic_spatial_layers_(false), num_drops_(0), max_consec_drop_(0),
+        frame_drop_thresh_(0) {
+    memset(&svc_params_, 0, sizeof(svc_params_));
+    memset(&layer_id_, 0, sizeof(layer_id_));
+  }
+
+  ~RcInterfaceTest() override = default;
+
+ protected:
+  void SetUp() override { InitializeConfig(::libaom_test::kRealTime); }
+
+  int GetNumSpatialLayers() override { return rc_cfg_.ss_number_layers; }
+
+  void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                          libaom_test::Encoder *encoder) override {
+    int key_int = key_interval_;
+    const int use_svc =
+        rc_cfg_.ss_number_layers > 1 || rc_cfg_.ts_number_layers > 1;
+    encoder->Control(AV1E_SET_RTC_EXTERNAL_RC, 1);
+    if (video->frame() == 0 && layer_frame_cnt_ == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 7);
+      encoder->Control(AV1E_SET_AQ_MODE, aq_mode_);
+      if (rc_cfg_.is_screen) {
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
+      } else {
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
+      }
+      encoder->Control(AOME_SET_MAX_INTRA_BITRATE_PCT,
+                       rc_cfg_.max_intra_bitrate_pct);
+      if (use_svc) encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_);
+      encoder->Control(AV1E_SET_MAX_CONSEC_FRAME_DROP_CBR, max_consec_drop_);
+    }
+    // SVC specific settings
+    if (use_svc) {
+      frame_params_.spatial_layer_id =
+          layer_frame_cnt_ % rc_cfg_.ss_number_layers;
+      if (rc_cfg_.ts_number_layers == 3)
+        frame_params_.temporal_layer_id =
+            kTemporalId3Layer[superframe_cnt_ % 4];
+      else if (rc_cfg_.ts_number_layers == 2)
+        frame_params_.temporal_layer_id =
+            kTemporalId2Layer[superframe_cnt_ % 2];
+      else
+        frame_params_.temporal_layer_id = 0;
+      layer_id_.spatial_layer_id = frame_params_.spatial_layer_id;
+      layer_id_.temporal_layer_id = frame_params_.temporal_layer_id;
+      encoder->Control(AV1E_SET_SVC_LAYER_ID, &layer_id_);
+      key_int = key_interval_ * rc_cfg_.ss_number_layers;
+    }
+    frame_params_.frame_type =
+        layer_frame_cnt_ % key_int == 0 ? aom::kKeyFrame : aom::kInterFrame;
+    encoder_exit_ = video->frame() == kNumFrames;
+    frame_flags_ = 0;
+
+    if (dynamic_temporal_layers_) {
+      if (superframe_cnt_ == 100 && layer_id_.spatial_layer_id == 0) {
+        // Go down to 2 temporal layers.
+        SetConfigSvc(3, 2);
+        encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_);
+        ASSERT_TRUE(rc_api_->UpdateRateControl(rc_cfg_));
+      } else if (superframe_cnt_ == 200 && layer_id_.spatial_layer_id == 0) {
+        // Go down to 1 temporal layer.
+        SetConfigSvc(3, 1);
+        encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_);
+        ASSERT_TRUE(rc_api_->UpdateRateControl(rc_cfg_));
+      } else if (superframe_cnt_ == 300 && layer_id_.spatial_layer_id == 0) {
+        // Go back up to 3 temporal layers.
+        SetConfigSvc(3, 3);
+        encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_);
+        ASSERT_TRUE(rc_api_->UpdateRateControl(rc_cfg_));
+      }
+    } else if (dynamic_spatial_layers_) {
+      // In this example the #spatial layers is modified on the fly,
+      // so we go from (120p,240p,480p) to (240p,480p), etc.
+      if (superframe_cnt_ == 100 && layer_id_.spatial_layer_id == 0) {
+        // Change to 2 spatial layers (240p, 480p).
+        SetConfigSvc(2, 3);
+        encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_);
+        ASSERT_TRUE(rc_api_->UpdateRateControl(rc_cfg_));
+      } else if (superframe_cnt_ == 200 && layer_id_.spatial_layer_id == 0) {
+        // Change to 1 spatial layer (480p).
+        SetConfigSvc(1, 3);
+        encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_);
+        ASSERT_TRUE(rc_api_->UpdateRateControl(rc_cfg_));
+      } else if (superframe_cnt_ == 300 && layer_id_.spatial_layer_id == 0) {
+        // Go back to 3 spatial layers (120p, 240p, 480p).
+        SetConfigSvc(3, 3);
+        encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_);
+        // In the fixed SVC mode (which is what is used in this test):
+        // Key frame is required here on SL0 since 120p will try to predict
+        // from LAST which was the 480p, so decoder will throw an error
+        // (reference must be smaller than 4x4). In the flexible mode
+        // (not used here) we can set the frame flags to predict off the 2x2
+        // reference instead,
+        frame_flags_ = AOM_EFLAG_FORCE_KF;
+        frame_params_.frame_type = aom::kKeyFrame;
+        ASSERT_TRUE(rc_api_->UpdateRateControl(rc_cfg_));
+      }
+    }
+    // TODO(marpan): Add dynamic spatial layers based on 0 layer bitrate.
+    // That is actual usage in SW where configuration (#spatial, #temporal)
+    // layers is fixed, but top layer is dropped or re-enabled based on
+    // bitrate. This requires external RC to handle dropped (zero-size) frames.
+  }
+
+  void PostEncodeFrameHook(::libaom_test::Encoder *encoder) override {
+    if (encoder_exit_) {
+      return;
+    }
+    layer_frame_cnt_++;
+    frame_cnt_++;
+    if (layer_id_.spatial_layer_id == rc_cfg_.ss_number_layers - 1)
+      superframe_cnt_++;
+    int qp;
+    encoder->Control(AOME_GET_LAST_QUANTIZER, &qp);
+    if (rc_api_->ComputeQP(frame_params_) == aom::FrameDropDecision::kOk) {
+      ASSERT_EQ(rc_api_->GetQP(), qp) << "at frame " << frame_cnt_ - 1;
+      int encoder_lpf_level;
+      encoder->Control(AOME_GET_LOOPFILTER_LEVEL, &encoder_lpf_level);
+      aom::AV1LoopfilterLevel loopfilter_level = rc_api_->GetLoopfilterLevel();
+      ASSERT_EQ(loopfilter_level.filter_level[0], encoder_lpf_level);
+      aom::AV1CdefInfo cdef_level = rc_api_->GetCdefInfo();
+      int cdef_y_strengths[16];
+      encoder->Control(AV1E_GET_LUMA_CDEF_STRENGTH, cdef_y_strengths);
+      ASSERT_EQ(cdef_level.cdef_strength_y, cdef_y_strengths[0]);
+    } else {
+      num_drops_++;
+    }
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    if (layer_id_.spatial_layer_id == 0)
+      rc_api_->PostEncodeUpdate(pkt->data.frame.sz - 2);
+    else
+      rc_api_->PostEncodeUpdate(pkt->data.frame.sz);
+  }
+
+  void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) override {
+    (void)img1;
+    (void)img2;
+  }
+
+  void RunOneLayer() {
+    key_interval_ = 10000;
+    SetConfig();
+    rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_);
+    frame_params_.spatial_layer_id = 0;
+    frame_params_.temporal_layer_id = 0;
+
+    ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
+                                         1, 0, kNumFrames);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  void RunOneLayerScreen() {
+    key_interval_ = 10000;
+    SetConfig();
+    rc_cfg_.is_screen = true;
+    rc_cfg_.width = 352;
+    rc_cfg_.height = 288;
+    rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_);
+    frame_params_.spatial_layer_id = 0;
+    frame_params_.temporal_layer_id = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 140);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  void RunOneLayerDropFramesCBR() {
+    key_interval_ = 10000;
+    max_consec_drop_ = 8;
+    frame_drop_thresh_ = 30;
+    SetConfig();
+    rc_cfg_.target_bandwidth = 100;
+    cfg_.rc_target_bitrate = 100;
+    rc_cfg_.max_quantizer = 50;
+    cfg_.rc_max_quantizer = 50;
+    rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_);
+    frame_params_.spatial_layer_id = 0;
+    frame_params_.temporal_layer_id = 0;
+
+    ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
+                                         1, 0, kNumFrames);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    // Check that some frames were dropped, otherwise test has no value.
+    ASSERT_GE(num_drops_, 1);
+  }
+
+  void RunOneLayerPeriodicKey() {
+    key_interval_ = 100;
+    SetConfig();
+    rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_);
+    frame_params_.spatial_layer_id = 0;
+    frame_params_.temporal_layer_id = 0;
+
+    ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
+                                         1, 0, kNumFrames);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  void RunSvc() {
+    key_interval_ = 10000;
+    SetConfigSvc(3, 3);
+    rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_);
+    frame_params_.spatial_layer_id = 0;
+    frame_params_.temporal_layer_id = 0;
+
+    ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
+                                         1, 0, kNumFrames);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  void RunSvcPeriodicKey() {
+    key_interval_ = 100;
+    SetConfigSvc(3, 3);
+    rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_);
+    frame_params_.spatial_layer_id = 0;
+    frame_params_.temporal_layer_id = 0;
+
+    ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
+                                         1, 0, kNumFrames);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  void RunSvcDynamicTemporal() {
+    dynamic_temporal_layers_ = true;
+    key_interval_ = 10000;
+    SetConfigSvc(3, 3);
+    rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_);
+    frame_params_.spatial_layer_id = 0;
+    frame_params_.temporal_layer_id = 0;
+
+    ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
+                                         1, 0, kNumFrames);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  void RunSvcDynamicSpatial() {
+    dynamic_spatial_layers_ = true;
+    key_interval_ = 10000;
+    SetConfigSvc(3, 3);
+    rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_);
+    frame_params_.spatial_layer_id = 0;
+    frame_params_.temporal_layer_id = 0;
+
+    ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
+                                         1, 0, kNumFrames);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+ private:
+  void SetConfig() {
+    rc_cfg_.width = 640;
+    rc_cfg_.height = 480;
+    rc_cfg_.max_quantizer = 52;
+    rc_cfg_.min_quantizer = 2;
+    rc_cfg_.target_bandwidth = 1000;
+    rc_cfg_.buf_initial_sz = 600;
+    rc_cfg_.buf_optimal_sz = 600;
+    rc_cfg_.buf_sz = 1000;
+    rc_cfg_.undershoot_pct = 50;
+    rc_cfg_.overshoot_pct = 50;
+    rc_cfg_.max_intra_bitrate_pct = 1000;
+    rc_cfg_.framerate = 30.0;
+    rc_cfg_.ss_number_layers = 1;
+    rc_cfg_.ts_number_layers = 1;
+    rc_cfg_.scaling_factor_num[0] = 1;
+    rc_cfg_.scaling_factor_den[0] = 1;
+    rc_cfg_.layer_target_bitrate[0] = 1000;
+    rc_cfg_.max_quantizers[0] = 52;
+    rc_cfg_.min_quantizers[0] = 2;
+    rc_cfg_.aq_mode = aq_mode_;
+    rc_cfg_.frame_drop_thresh = frame_drop_thresh_;
+    rc_cfg_.max_consec_drop = max_consec_drop_;
+
+    // Encoder settings for ground truth.
+    cfg_.g_w = 640;
+    cfg_.g_h = 480;
+    cfg_.rc_undershoot_pct = 50;
+    cfg_.rc_overshoot_pct = 50;
+    cfg_.rc_buf_initial_sz = 600;
+    cfg_.rc_buf_optimal_sz = 600;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 52;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+    cfg_.rc_target_bitrate = 1000;
+    cfg_.kf_min_dist = key_interval_;
+    cfg_.kf_max_dist = key_interval_;
+    cfg_.rc_dropframe_thresh = frame_drop_thresh_;
+  }
+
+  void SetConfigSvc(int number_spatial_layers, int number_temporal_layers) {
+    rc_cfg_.width = 640;
+    rc_cfg_.height = 480;
+    rc_cfg_.max_quantizer = 56;
+    rc_cfg_.min_quantizer = 2;
+    rc_cfg_.buf_initial_sz = 600;
+    rc_cfg_.buf_optimal_sz = 600;
+    rc_cfg_.buf_sz = 1000;
+    rc_cfg_.undershoot_pct = 50;
+    rc_cfg_.overshoot_pct = 50;
+    rc_cfg_.max_intra_bitrate_pct = 1000;
+    rc_cfg_.framerate = 30.0;
+    rc_cfg_.aq_mode = aq_mode_;
+    rc_cfg_.ss_number_layers = number_spatial_layers;
+    rc_cfg_.ts_number_layers = number_temporal_layers;
+
+    // Encoder settings for ground truth.
+    cfg_.g_w = 640;
+    cfg_.g_h = 480;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_buf_initial_sz = 600;
+    cfg_.rc_buf_optimal_sz = 600;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_overshoot_pct = 50;
+    cfg_.rc_undershoot_pct = 50;
+    cfg_.g_threads = 1;
+    cfg_.kf_min_dist = key_interval_;
+    cfg_.kf_max_dist = key_interval_;
+    cfg_.g_timebase.num = 1;
+    cfg_.g_timebase.den = 30;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+    svc_params_.number_spatial_layers = number_spatial_layers;
+    svc_params_.number_temporal_layers = number_temporal_layers;
+
+    // Scale factors.
+    if (number_spatial_layers == 3) {
+      rc_cfg_.scaling_factor_num[0] = 1;
+      rc_cfg_.scaling_factor_den[0] = 4;
+      rc_cfg_.scaling_factor_num[1] = 2;
+      rc_cfg_.scaling_factor_den[1] = 4;
+      rc_cfg_.scaling_factor_num[2] = 4;
+      rc_cfg_.scaling_factor_den[2] = 4;
+      svc_params_.scaling_factor_num[0] = 1;
+      svc_params_.scaling_factor_den[0] = 4;
+      svc_params_.scaling_factor_num[1] = 2;
+      svc_params_.scaling_factor_den[1] = 4;
+      svc_params_.scaling_factor_num[2] = 4;
+      svc_params_.scaling_factor_den[2] = 4;
+    } else if (number_spatial_layers == 2) {
+      rc_cfg_.scaling_factor_num[0] = 1;
+      rc_cfg_.scaling_factor_den[0] = 2;
+      rc_cfg_.scaling_factor_num[1] = 2;
+      rc_cfg_.scaling_factor_den[1] = 2;
+      svc_params_.scaling_factor_num[0] = 1;
+      svc_params_.scaling_factor_den[0] = 2;
+      svc_params_.scaling_factor_num[1] = 2;
+      svc_params_.scaling_factor_den[1] = 2;
+    } else if (number_spatial_layers == 1) {
+      rc_cfg_.scaling_factor_num[0] = 1;
+      rc_cfg_.scaling_factor_den[0] = 1;
+      svc_params_.scaling_factor_num[0] = 1;
+      svc_params_.scaling_factor_den[0] = 1;
+    }
+
+    // TS rate decimator.
+    if (number_temporal_layers == 3) {
+      rc_cfg_.ts_rate_decimator[0] = 4;
+      rc_cfg_.ts_rate_decimator[1] = 2;
+      rc_cfg_.ts_rate_decimator[2] = 1;
+      svc_params_.framerate_factor[0] = 4;
+      svc_params_.framerate_factor[1] = 2;
+      svc_params_.framerate_factor[2] = 1;
+    } else if (number_temporal_layers == 2) {
+      rc_cfg_.ts_rate_decimator[0] = 2;
+      rc_cfg_.ts_rate_decimator[1] = 1;
+      svc_params_.framerate_factor[0] = 2;
+      svc_params_.framerate_factor[1] = 1;
+    } else if (number_temporal_layers == 1) {
+      rc_cfg_.ts_rate_decimator[0] = 1;
+      svc_params_.framerate_factor[0] = 1;
+    }
+
+    // Bitate.
+    rc_cfg_.target_bandwidth = 0;
+    cfg_.rc_target_bitrate = 0;
+    for (int sl = 0; sl < number_spatial_layers; sl++) {
+      int spatial_bitrate = 0;
+      if (number_spatial_layers <= 3)
+        spatial_bitrate = kSpatialLayerBitrate[sl];
+      for (int tl = 0; tl < number_temporal_layers; tl++) {
+        int layer = sl * number_temporal_layers + tl;
+        if (number_temporal_layers == 3) {
+          rc_cfg_.layer_target_bitrate[layer] =
+              kTemporalRateAllocation3Layer[tl] * spatial_bitrate / 100;
+          svc_params_.layer_target_bitrate[layer] =
+              kTemporalRateAllocation3Layer[tl] * spatial_bitrate / 100;
+        } else if (number_temporal_layers == 2) {
+          rc_cfg_.layer_target_bitrate[layer] =
+              kTemporalRateAllocation2Layer[tl] * spatial_bitrate / 100;
+          svc_params_.layer_target_bitrate[layer] =
+              kTemporalRateAllocation2Layer[tl] * spatial_bitrate / 100;
+        } else if (number_temporal_layers == 1) {
+          rc_cfg_.layer_target_bitrate[layer] = spatial_bitrate;
+          svc_params_.layer_target_bitrate[layer] = spatial_bitrate;
+        }
+      }
+      rc_cfg_.target_bandwidth += spatial_bitrate;
+      cfg_.rc_target_bitrate += spatial_bitrate;
+    }
+
+    // Layer min/max quantizer.
+    for (int sl = 0; sl < number_spatial_layers; ++sl) {
+      for (int tl = 0; tl < number_temporal_layers; ++tl) {
+        const int i = sl * number_temporal_layers + tl;
+        rc_cfg_.max_quantizers[i] = rc_cfg_.max_quantizer;
+        rc_cfg_.min_quantizers[i] = rc_cfg_.min_quantizer;
+        svc_params_.max_quantizers[i] = cfg_.rc_max_quantizer;
+        svc_params_.min_quantizers[i] = cfg_.rc_min_quantizer;
+      }
+    }
+  }
+
+  std::unique_ptr<aom::AV1RateControlRTC> rc_api_;
+  aom::AV1RateControlRtcConfig rc_cfg_;
+  int aq_mode_;
+  int key_interval_;
+  aom::AV1FrameParamsRTC frame_params_;
+  bool encoder_exit_;
+  aom_svc_params_t svc_params_;
+  aom_svc_layer_id_t layer_id_;
+  int layer_frame_cnt_;
+  int superframe_cnt_;
+  int frame_cnt_;
+  bool dynamic_temporal_layers_;
+  bool dynamic_spatial_layers_;
+  int num_drops_;
+  int max_consec_drop_;
+  int frame_drop_thresh_;
+};
+
+TEST_P(RcInterfaceTest, OneLayer) { RunOneLayer(); }
+
+TEST_P(RcInterfaceTest, OneLayerDropFramesCBR) { RunOneLayerDropFramesCBR(); }
+
+TEST_P(RcInterfaceTest, OneLayerPeriodicKey) { RunOneLayerPeriodicKey(); }
+
+TEST_P(RcInterfaceTest, OneLayerScreen) { RunOneLayerScreen(); }
+
+TEST_P(RcInterfaceTest, Svc) { RunSvc(); }
+
+TEST_P(RcInterfaceTest, SvcPeriodicKey) { RunSvcPeriodicKey(); }
+
+TEST_P(RcInterfaceTest, SvcDynamicTemporal) { RunSvcDynamicTemporal(); }
+
+TEST_P(RcInterfaceTest, SvcDynamicSpatial) { RunSvcDynamicSpatial(); }
+
+AV1_INSTANTIATE_TEST_SUITE(RcInterfaceTest, ::testing::Values(0, 3));
+
+}  // namespace
diff --git a/third_party/aom/test/ratectrl_test.cc b/third_party/aom/test/ratectrl_test.cc
new file mode 100644
index 0000000000..d951b1197f
--- /dev/null
+++ b/third_party/aom/test/ratectrl_test.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "av1/encoder/firstpass.h"
+#include "av1/encoder/ratectrl.h"
+#include "av1/encoder/tpl_model.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+TEST(RatectrlTest, QModeGetQIndexTest) {
+  int base_q_index = 36;
+  int gf_update_type = INTNL_ARF_UPDATE;
+  int gf_pyramid_level = 1;
+  int arf_q = 100;
+  int q_index = av1_q_mode_get_q_index(base_q_index, gf_update_type,
+                                       gf_pyramid_level, arf_q);
+  EXPECT_EQ(q_index, arf_q);
+
+  gf_update_type = INTNL_ARF_UPDATE;
+  gf_pyramid_level = 3;
+  q_index = av1_q_mode_get_q_index(base_q_index, gf_update_type,
+                                   gf_pyramid_level, arf_q);
+  EXPECT_LT(q_index, arf_q);
+
+  gf_update_type = LF_UPDATE;
+  q_index = av1_q_mode_get_q_index(base_q_index, gf_update_type,
+                                   gf_pyramid_level, arf_q);
+  EXPECT_EQ(q_index, base_q_index);
+}
+}  // namespace
diff --git a/third_party/aom/test/rd_test.cc b/third_party/aom/test/rd_test.cc
new file mode 100644
index 0000000000..0c481fcbb6
--- /dev/null
+++ b/third_party/aom/test/rd_test.cc
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <vector>
+
+#include "av1/common/quant_common.h"
+#include "av1/encoder/rd.h"
+#include "aom/aom_codec.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+TEST(RdTest, GetDeltaqOffsetValueTest1) {
+  aom_bit_depth_t bit_depth = AOM_BITS_8;
+  double beta = 4;
+  int q_index = 29;
+  int dc_q_step =
+      av1_dc_quant_QTX(q_index, 0, static_cast<aom_bit_depth_t>(bit_depth));
+  EXPECT_EQ(dc_q_step, 32);
+
+  int ref_new_dc_q_step = static_cast<int>(round(dc_q_step / sqrt(beta)));
+  EXPECT_EQ(ref_new_dc_q_step, 16);
+
+  int delta_q = av1_get_deltaq_offset(bit_depth, q_index, beta);
+  int new_dc_q_step = av1_dc_quant_QTX(q_index, delta_q,
+                                       static_cast<aom_bit_depth_t>(bit_depth));
+
+  EXPECT_EQ(new_dc_q_step, ref_new_dc_q_step);
+}
+
+TEST(RdTest, GetDeltaqOffsetValueTest2) {
+  aom_bit_depth_t bit_depth = AOM_BITS_8;
+  double beta = 1.0 / 4.0;
+  int q_index = 29;
+  int dc_q_step =
+      av1_dc_quant_QTX(q_index, 0, static_cast<aom_bit_depth_t>(bit_depth));
+  EXPECT_EQ(dc_q_step, 32);
+
+  int ref_new_dc_q_step = static_cast<int>(round(dc_q_step / sqrt(beta)));
+  EXPECT_EQ(ref_new_dc_q_step, 64);
+
+  int delta_q = av1_get_deltaq_offset(bit_depth, q_index, beta);
+  int new_dc_q_step = av1_dc_quant_QTX(q_index, delta_q,
+                                       static_cast<aom_bit_depth_t>(bit_depth));
+
+  EXPECT_EQ(new_dc_q_step, ref_new_dc_q_step);
+}
+
+TEST(RdTest, GetDeltaqOffsetBoundaryTest1) {
+  aom_bit_depth_t bit_depth = AOM_BITS_8;
+  double beta = 0.000000001;
+  std::vector<int> q_index_ls = { 254, 255 };
+  for (auto q_index : q_index_ls) {
+    int delta_q = av1_get_deltaq_offset(bit_depth, q_index, beta);
+    EXPECT_EQ(q_index + delta_q, 255);
+  }
+}
+
+TEST(RdTest, GetDeltaqOffsetBoundaryTest2) {
+  aom_bit_depth_t bit_depth = AOM_BITS_8;
+  double beta = 100;
+  std::vector<int> q_index_ls = { 1, 0 };
+  for (auto q_index : q_index_ls) {
+    int delta_q = av1_get_deltaq_offset(bit_depth, q_index, beta);
+    EXPECT_EQ(q_index + delta_q, 0);
+  }
+}
+
+TEST(RdTest, GetDeltaqOffsetUnitaryTest1) {
+  aom_bit_depth_t bit_depth = AOM_BITS_8;
+  double beta = 1;
+  for (int q_index = 0; q_index < 255; ++q_index) {
+    int delta_q = av1_get_deltaq_offset(bit_depth, q_index, beta);
+    EXPECT_EQ(delta_q, 0);
+  }
+}
+
+}  // namespace
diff --git a/third_party/aom/test/reconinter_test.cc b/third_party/aom/test/reconinter_test.cc
new file mode 100644
index 0000000000..ee1a9893db
--- /dev/null
+++ b/third_party/aom/test/reconinter_test.cc
@@ -0,0 +1,372 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <tuple>
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/mem.h"
+#include "av1/common/scan.h"
+#include "av1/common/txb_common.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+using libaom_test::ACMRandom;
+
+using BuildCompDiffWtdMaskFunc = void (*)(uint8_t *mask,
+                                          DIFFWTD_MASK_TYPE mask_type,
+                                          const uint8_t *src0, int src0_stride,
+                                          const uint8_t *src1, int src1_stride,
+                                          int h, int w);
+
+using BuildCompDiffwtdMaskDParam =
+    std::tuple<BLOCK_SIZE, BuildCompDiffWtdMaskFunc>;
+
+#if HAVE_SSE4_1 || HAVE_AVX2 || HAVE_NEON
+::testing::internal::ParamGenerator<BuildCompDiffwtdMaskDParam> BuildParams(
+    BuildCompDiffWtdMaskFunc filter) {
+  return ::testing::Combine(::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL),
+                            ::testing::Values(filter));
+}
+#endif
+
+class BuildCompDiffwtdMaskTest
+    : public ::testing::TestWithParam<BuildCompDiffwtdMaskDParam> {
+ public:
+  BuildCompDiffwtdMaskTest() : rnd_(ACMRandom::DeterministicSeed()) {}
+  ~BuildCompDiffwtdMaskTest() override = default;
+
+ protected:
+  void RunTest(BuildCompDiffWtdMaskFunc test_impl, bool is_speed,
+               const DIFFWTD_MASK_TYPE type) {
+    const int sb_type = GET_PARAM(0);
+    const int width = block_size_wide[sb_type];
+    const int height = block_size_high[sb_type];
+    DECLARE_ALIGNED(16, uint8_t, mask_ref[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(16, uint8_t, mask_test[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(16, uint8_t, src0[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(16, uint8_t, src1[MAX_SB_SQUARE]);
+    for (int i = 0; i < width * height; i++) {
+      src0[i] = rnd_.Rand8();
+      src1[i] = rnd_.Rand8();
+    }
+    const int run_times = is_speed ? (10000000 / (width + height)) : 1;
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      av1_build_compound_diffwtd_mask_c(mask_ref, type, src0, width, src1,
+                                        width, height, width);
+    }
+    const double t1 = get_time_mark(&timer);
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      test_impl(mask_test, type, src0, width, src1, width, height, width);
+    }
+    const double t2 = get_time_mark(&timer);
+    if (is_speed) {
+      printf("mask %d %3dx%-3d:%7.2f/%7.2fns", type, width, height, t1, t2);
+      printf("(%3.2f)\n", t1 / t2);
+    }
+    for (int r = 0; r < height; ++r) {
+      for (int c = 0; c < width; ++c) {
+        ASSERT_EQ(mask_ref[c + r * width], mask_test[c + r * width])
+            << "[" << r << "," << c << "] " << run_times << " @ " << width
+            << "x" << height << " inv " << type;
+      }
+    }
+  }
+
+ private:
+  ACMRandom rnd_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BuildCompDiffwtdMaskTest);
+
+TEST_P(BuildCompDiffwtdMaskTest, match) {
+  RunTest(GET_PARAM(1), 0, DIFFWTD_38);
+  RunTest(GET_PARAM(1), 0, DIFFWTD_38_INV);
+}
+TEST_P(BuildCompDiffwtdMaskTest, DISABLED_Speed) {
+  RunTest(GET_PARAM(1), 1, DIFFWTD_38);
+  RunTest(GET_PARAM(1), 1, DIFFWTD_38_INV);
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE4_1, BuildCompDiffwtdMaskTest,
+                         BuildParams(av1_build_compound_diffwtd_mask_sse4_1));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, BuildCompDiffwtdMaskTest,
+                         BuildParams(av1_build_compound_diffwtd_mask_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, BuildCompDiffwtdMaskTest,
+                         BuildParams(av1_build_compound_diffwtd_mask_neon));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+
+using BuildCompDiffWtdMaskHighbdFunc =
+    void (*)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
+             int src0_stride, const uint8_t *src1, int src1_stride, int h,
+             int w, int bd);
+
+using BuildCompDiffwtdMaskHighbdParam =
+    std::tuple<BLOCK_SIZE, int, BuildCompDiffWtdMaskHighbdFunc>;
+
+#if HAVE_SSSE3 || HAVE_AVX2 || HAVE_NEON
+::testing::internal::ParamGenerator<BuildCompDiffwtdMaskHighbdParam>
+BuildParamsHighbd(BuildCompDiffWtdMaskHighbdFunc filter) {
+  return ::testing::Combine(::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL),
+                            ::testing::Values(8, 10, 12),
+                            ::testing::Values(filter));
+}
+#endif
+
+class BuildCompDiffwtdMaskHighbdTest
+    : public ::testing::TestWithParam<BuildCompDiffwtdMaskHighbdParam> {
+ public:
+  BuildCompDiffwtdMaskHighbdTest() : rnd_(ACMRandom::DeterministicSeed()) {}
+  ~BuildCompDiffwtdMaskHighbdTest() override = default;
+
+ protected:
+  void RunTest(BuildCompDiffWtdMaskHighbdFunc test_impl, bool is_speed,
+               const DIFFWTD_MASK_TYPE type) {
+    const int sb_type = GET_PARAM(0);
+    const int bd = GET_PARAM(1);
+    const int width = block_size_wide[sb_type];
+    const int height = block_size_high[sb_type];
+    const int mask = (1 << bd) - 1;
+    DECLARE_ALIGNED(16, uint8_t, mask_ref[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(16, uint8_t, mask_test[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(16, uint16_t, src0[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(16, uint16_t, src1[MAX_SB_SQUARE]);
+    for (int i = 0; i < width * height; i++) {
+      src0[i] = rnd_.Rand16() & mask;
+      src1[i] = rnd_.Rand16() & mask;
+    }
+    const int run_times = is_speed ? (10000000 / (width + height)) : 1;
+    aom_usec_timer timer;
+
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      uint8_t *src0_8 = CONVERT_TO_BYTEPTR(src0);
+      uint8_t *src1_8 = CONVERT_TO_BYTEPTR(src1);
+      av1_build_compound_diffwtd_mask_highbd_c(
+          mask_ref, type, src0_8, width, src1_8, width, height, width, bd);
+    }
+    const double t1 = get_time_mark(&timer);
+
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      uint8_t *src0_8 = CONVERT_TO_BYTEPTR(src0);
+      uint8_t *src1_8 = CONVERT_TO_BYTEPTR(src1);
+      test_impl(mask_test, type, src0_8, width, src1_8, width, height, width,
+                bd);
+    }
+    const double t2 = get_time_mark(&timer);
+
+    if (is_speed) {
+      printf("mask %d %3dx%-3d:%7.2f/%7.2fns", type, width, height, t1, t2);
+      printf("(%3.2f)\n", t1 / t2);
+    }
+    for (int r = 0; r < height; ++r) {
+      for (int c = 0; c < width; ++c) {
+        ASSERT_EQ(mask_ref[c + r * width], mask_test[c + r * width])
+            << "[" << r << "," << c << "] " << run_times << " @ " << width
+            << "x" << height << " inv " << type;
+      }
+    }
+  }
+
+ private:
+  ACMRandom rnd_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BuildCompDiffwtdMaskHighbdTest);
+
+TEST_P(BuildCompDiffwtdMaskHighbdTest, match) {
+  RunTest(GET_PARAM(2), 0, DIFFWTD_38);
+  RunTest(GET_PARAM(2), 0, DIFFWTD_38_INV);
+}
+TEST_P(BuildCompDiffwtdMaskHighbdTest, DISABLED_Speed) {
+  RunTest(GET_PARAM(2), 1, DIFFWTD_38);
+  RunTest(GET_PARAM(2), 1, DIFFWTD_38_INV);
+}
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_SUITE_P(
+    SSSE3, BuildCompDiffwtdMaskHighbdTest,
+    BuildParamsHighbd(av1_build_compound_diffwtd_mask_highbd_ssse3));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, BuildCompDiffwtdMaskHighbdTest,
+    BuildParamsHighbd(av1_build_compound_diffwtd_mask_highbd_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, BuildCompDiffwtdMaskHighbdTest,
+    BuildParamsHighbd(av1_build_compound_diffwtd_mask_highbd_neon));
+#endif
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+using BuildCompDiffWtdMaskD16Func = void (*)(
+    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
+    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
+    ConvolveParams *conv_params, int bd);
+
+using BuildCompDiffwtdMaskD16Param =
+    std::tuple<int, BuildCompDiffWtdMaskD16Func, BLOCK_SIZE>;
+
+#if HAVE_SSE4_1 || HAVE_AVX2 || HAVE_NEON
+::testing::internal::ParamGenerator<BuildCompDiffwtdMaskD16Param> BuildParams(
+    BuildCompDiffWtdMaskD16Func filter) {
+  return ::testing::Combine(::testing::Range(8, 13, 2),
+                            ::testing::Values(filter),
+                            ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
+}
+#endif
+
+class BuildCompDiffwtdMaskD16Test
+    : public ::testing::TestWithParam<BuildCompDiffwtdMaskD16Param> {
+ public:
+  BuildCompDiffwtdMaskD16Test() : rnd_(ACMRandom::DeterministicSeed()) {}
+  ~BuildCompDiffwtdMaskD16Test() override = default;
+
+ protected:
+  void RunCheckOutput(BuildCompDiffWtdMaskD16Func test_impl) {
+    const int block_idx = GET_PARAM(2);
+    const int bd = GET_PARAM(0);
+    const int width = block_size_wide[block_idx];
+    const int height = block_size_high[block_idx];
+    DECLARE_ALIGNED(16, uint8_t, mask_ref[2 * MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(16, uint8_t, mask_test[2 * MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, uint16_t, src0[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, uint16_t, src1[MAX_SB_SQUARE]);
+
+    ConvolveParams conv_params =
+        get_conv_params_no_round(0, 0, nullptr, 0, 1, bd);
+
+    const int in_precision =
+        bd + 2 * FILTER_BITS - conv_params.round_0 - conv_params.round_1 + 2;
+
+    for (int i = 0; i < MAX_SB_SQUARE; i++) {
+      src0[i] = rnd_.Rand16() & ((1 << in_precision) - 1);
+      src1[i] = rnd_.Rand16() & ((1 << in_precision) - 1);
+    }
+
+    for (int mask_type = 0; mask_type < DIFFWTD_MASK_TYPES; mask_type++) {
+      av1_build_compound_diffwtd_mask_d16_c(
+          mask_ref, (DIFFWTD_MASK_TYPE)mask_type, src0, width, src1, width,
+          height, width, &conv_params, bd);
+
+      test_impl(mask_test, (DIFFWTD_MASK_TYPE)mask_type, src0, width, src1,
+                width, height, width, &conv_params, bd);
+
+      for (int r = 0; r < height; ++r) {
+        for (int c = 0; c < width; ++c) {
+          ASSERT_EQ(mask_ref[c + r * width], mask_test[c + r * width])
+              << "Mismatch at unit tests for BuildCompDiffwtdMaskD16Test\n"
+              << " Pixel mismatch at index "
+              << "[" << r << "," << c << "] "
+              << " @ " << width << "x" << height << " inv " << mask_type;
+        }
+      }
+    }
+  }
+
+  void RunSpeedTest(BuildCompDiffWtdMaskD16Func test_impl,
+                    DIFFWTD_MASK_TYPE mask_type) {
+    const int block_idx = GET_PARAM(2);
+    const int bd = GET_PARAM(0);
+    const int width = block_size_wide[block_idx];
+    const int height = block_size_high[block_idx];
+    DECLARE_ALIGNED(16, uint8_t, mask[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, uint16_t, src0[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(32, uint16_t, src1[MAX_SB_SQUARE]);
+
+    ConvolveParams conv_params =
+        get_conv_params_no_round(0, 0, nullptr, 0, 1, bd);
+
+    const int in_precision =
+        bd + 2 * FILTER_BITS - conv_params.round_0 - conv_params.round_1 + 2;
+
+    for (int i = 0; i < MAX_SB_SQUARE; i++) {
+      src0[i] = rnd_.Rand16() & ((1 << in_precision) - 1);
+      src1[i] = rnd_.Rand16() & ((1 << in_precision) - 1);
+    }
+
+    const int num_loops = 10000000 / (width + height);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+
+    for (int i = 0; i < num_loops; ++i)
+      av1_build_compound_diffwtd_mask_d16_c(mask, mask_type, src0, width, src1,
+                                            width, height, width, &conv_params,
+                                            bd);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+
+    aom_usec_timer timer1;
+    aom_usec_timer_start(&timer1);
+
+    for (int i = 0; i < num_loops; ++i)
+      test_impl(mask, mask_type, src0, width, src1, width, height, width,
+                &conv_params, bd);
+
+    aom_usec_timer_mark(&timer1);
+    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+    printf("av1_build_compound_diffwtd_mask_d16  %3dx%-3d: %7.2f \n", width,
+           height, elapsed_time / double(elapsed_time1));
+  }
+
+ private:
+  ACMRandom rnd_;
+};  // class BuildCompDiffwtdMaskD16Test
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BuildCompDiffwtdMaskD16Test);
+
+TEST_P(BuildCompDiffwtdMaskD16Test, CheckOutput) {
+  RunCheckOutput(GET_PARAM(1));
+}
+
+TEST_P(BuildCompDiffwtdMaskD16Test, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(1), DIFFWTD_38);
+  RunSpeedTest(GET_PARAM(1), DIFFWTD_38_INV);
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, BuildCompDiffwtdMaskD16Test,
+    BuildParams(av1_build_compound_diffwtd_mask_d16_sse4_1));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, BuildCompDiffwtdMaskD16Test,
+                         BuildParams(av1_build_compound_diffwtd_mask_d16_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, BuildCompDiffwtdMaskD16Test,
+                         BuildParams(av1_build_compound_diffwtd_mask_d16_neon));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/register_state_check.h b/third_party/aom/test/register_state_check.h
new file mode 100644
index 0000000000..4aad81469e
--- /dev/null
+++ b/third_party/aom/test/register_state_check.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_TEST_REGISTER_STATE_CHECK_H_
+#define AOM_TEST_REGISTER_STATE_CHECK_H_
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+
+#include "aom/aom_integer.h"
+
+// API_REGISTER_STATE_CHECK(function)
+//   Validates the environment pre & post function execution to ensure the
+//   environment is in a consistent state. This should be used with API
+//   function sand assembly functions which are not expected to fully restore
+//   the system state.
+//   See platform implementations of RegisterStateCheck and
+//   RegisterStateCheckMMX for details.
+
+#if defined(_WIN64) && AOM_ARCH_X86_64
+
+#undef NOMINMAX
+#define NOMINMAX
+#undef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <winnt.h>
+
+inline bool operator==(const M128A &lhs, const M128A &rhs) {
+  return (lhs.Low == rhs.Low && lhs.High == rhs.High);
+}
+
+namespace libaom_test {
+
+// Compares the state of xmm[6-15] at construction with their state at
+// destruction. These registers should be preserved by the callee on
+// Windows x64.
+class RegisterStateCheck {
+ public:
+  RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); }
+  ~RegisterStateCheck() { Check(); }
+
+ private:
+  static bool StoreRegisters(CONTEXT *const context) {
+    const HANDLE this_thread = GetCurrentThread();
+    EXPECT_NE(this_thread, nullptr);
+    context->ContextFlags = CONTEXT_FLOATING_POINT;
+    const bool context_saved = GetThreadContext(this_thread, context) == TRUE;
+    EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError();
+    return context_saved;
+  }
+
+  // Compares the register state. Returns true if the states match.
+  void Check() const {
+    ASSERT_TRUE(initialized_);
+    CONTEXT post_context;
+    ASSERT_TRUE(StoreRegisters(&post_context));
+
+    const M128A *xmm_pre = &pre_context_.Xmm6;
+    const M128A *xmm_post = &post_context.Xmm6;
+    for (int i = 6; i <= 15; ++i) {
+      EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!";
+      ++xmm_pre;
+      ++xmm_post;
+    }
+  }
+
+  bool initialized_;
+  CONTEXT pre_context_;
+};
+}  // namespace libaom_test
+
+#else
+
+namespace libaom_test {
+
+class RegisterStateCheck {};
+}  // namespace libaom_test
+
+#endif  // _WIN64 && AOM_ARCH_X86_64
+
+#if (AOM_ARCH_X86 || AOM_ARCH_X86_64) && defined(__GNUC__)
+namespace libaom_test {
+
+// Checks the FPU tag word pre/post execution to ensure emms has been called.
+class RegisterStateCheckMMX {
+ public:
+  RegisterStateCheckMMX() {
+    __asm__ volatile("fstenv %0" : "=rm"(pre_fpu_env_));
+  }
+  ~RegisterStateCheckMMX() { Check(); }
+
+ private:
+  // Checks the FPU tag word pre/post execution, returning false if not cleared
+  // to 0xffff.
+  void Check() const {
+    EXPECT_EQ(0xffff, pre_fpu_env_[4])
+        << "FPU was in an inconsistent state prior to call";
+
+    uint16_t post_fpu_env[14];
+    __asm__ volatile("fstenv %0" : "=rm"(post_fpu_env));
+    EXPECT_EQ(0xffff, post_fpu_env[4])
+        << "FPU was left in an inconsistent state after call";
+  }
+
+  uint16_t pre_fpu_env_[14];
+};
+}  // namespace libaom_test
+
+#else
+namespace libaom_test {
+
+class RegisterStateCheckMMX {};
+}  // namespace libaom_test
+
+#endif  // (AOM_ARCH_X86 || AOM_ARCH_X86_64) && defined(__GNUC__)
+
+#define API_REGISTER_STATE_CHECK(statement)           \
+  do {                                                \
+    libaom_test::RegisterStateCheck reg_check;        \
+    libaom_test::RegisterStateCheckMMX reg_check_mmx; \
+    statement;                                        \
+    (void)reg_check_mmx;                              \
+    (void)reg_check;                                  \
+  } while (false)
+
+#endif  // AOM_TEST_REGISTER_STATE_CHECK_H_
diff --git a/third_party/aom/test/resize_test.cc b/third_party/aom/test/resize_test.cc
new file mode 100644
index 0000000000..7bad45300a
--- /dev/null
+++ b/third_party/aom/test/resize_test.cc
@@ -0,0 +1,1136 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <climits>
+#include <vector>
+#include "aom_dsp/aom_dsp_common.h"
+#include "common/tools_common.h"
+#include "av1/encoder/encoder.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/video_source.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+// Enable(1) or Disable(0) writing of the compressed bitstream.
+#define WRITE_COMPRESSED_STREAM 0
+
+namespace {
+
+#if WRITE_COMPRESSED_STREAM
+static void mem_put_le16(char *const mem, unsigned int val) {
+  mem[0] = val;
+  mem[1] = val >> 8;
+}
+
+static void mem_put_le32(char *const mem, unsigned int val) {
+  mem[0] = val;
+  mem[1] = val >> 8;
+  mem[2] = val >> 16;
+  mem[3] = val >> 24;
+}
+
+static void write_ivf_file_header(const aom_codec_enc_cfg_t *const cfg,
+                                  int frame_cnt, FILE *const outfile) {
+  char header[32];
+
+  header[0] = 'D';
+  header[1] = 'K';
+  header[2] = 'I';
+  header[3] = 'F';
+  mem_put_le16(header + 4, 0);                    /* version */
+  mem_put_le16(header + 6, 32);                   /* headersize */
+  mem_put_le32(header + 8, AV1_FOURCC);           /* fourcc (av1) */
+  mem_put_le16(header + 12, cfg->g_w);            /* width */
+  mem_put_le16(header + 14, cfg->g_h);            /* height */
+  mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
+  mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
+  mem_put_le32(header + 24, frame_cnt);           /* length */
+  mem_put_le32(header + 28, 0);                   /* unused */
+
+  (void)fwrite(header, 1, 32, outfile);
+}
+
+static void write_ivf_frame_size(FILE *const outfile, const size_t size) {
+  char header[4];
+  mem_put_le32(header, static_cast<unsigned int>(size));
+  (void)fwrite(header, 1, 4, outfile);
+}
+
+static void write_ivf_frame_header(const aom_codec_cx_pkt_t *const pkt,
+                                   FILE *const outfile) {
+  char header[12];
+  aom_codec_pts_t pts;
+
+  if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) return;
+
+  pts = pkt->data.frame.pts;
+  mem_put_le32(header, static_cast<unsigned int>(pkt->data.frame.sz));
+  mem_put_le32(header + 4, pts & 0xFFFFFFFF);
+  mem_put_le32(header + 8, pts >> 32);
+
+  (void)fwrite(header, 1, 12, outfile);
+}
+#endif  // WRITE_COMPRESSED_STREAM
+
+const unsigned int kInitialWidth = 320;
+const unsigned int kInitialHeight = 240;
+
+struct FrameInfo {
+  FrameInfo(aom_codec_pts_t _pts, unsigned int _w, unsigned int _h)
+      : pts(_pts), w(_w), h(_h) {}
+
+  aom_codec_pts_t pts;
+  unsigned int w;
+  unsigned int h;
+};
+
+void ScaleForFrameNumber(unsigned int frame, unsigned int initial_w,
+                         unsigned int initial_h, int flag_codec,
+                         bool change_start_resln, unsigned int *w,
+                         unsigned int *h) {
+  if (frame < 10) {
+    if (change_start_resln) {
+      *w = initial_w / 4;
+      *h = initial_h / 4;
+    } else {
+      *w = initial_w;
+      *h = initial_h;
+    }
+    return;
+  }
+  if (frame < 20) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 30) {
+    *w = initial_w / 2;
+    *h = initial_h / 2;
+    return;
+  }
+  if (frame < 40) {
+    *w = initial_w;
+    *h = initial_h;
+    return;
+  }
+  if (frame < 50) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 60) {
+    *w = initial_w / 2;
+    *h = initial_h / 2;
+    return;
+  }
+  if (frame < 70) {
+    *w = initial_w;
+    *h = initial_h;
+    return;
+  }
+  if (frame < 80) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 90) {
+    *w = initial_w / 2;
+    *h = initial_h / 2;
+    return;
+  }
+  if (frame < 100) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 110) {
+    *w = initial_w;
+    *h = initial_h;
+    return;
+  }
+  // Go down very low
+  if (frame < 120) {
+    *w = initial_w / 4;
+    *h = initial_h / 4;
+    return;
+  }
+  if (flag_codec == 1) {
+    // Cases that only works for AV1.
+    // For AV1: Swap width and height of original.
+    if (frame < 140) {
+      *w = initial_h;
+      *h = initial_w;
+      return;
+    }
+  }
+  *w = initial_w;
+  *h = initial_h;
+}
+
+class ResizingVideoSource : public ::libaom_test::DummyVideoSource {
+ public:
+  ResizingVideoSource() {
+    SetSize(kInitialWidth, kInitialHeight);
+    limit_ = 150;
+  }
+  int flag_codec_;
+  bool change_start_resln_;
+  ~ResizingVideoSource() override = default;
+
+ protected:
+  void Begin() override {
+    frame_ = 0;
+    unsigned int width;
+    unsigned int height;
+    ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, flag_codec_,
+                        change_start_resln_, &width, &height);
+    SetSize(width, height);
+    FillFrame();
+  }
+  void Next() override {
+    ++frame_;
+    unsigned int width;
+    unsigned int height;
+    ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, flag_codec_,
+                        change_start_resln_, &width, &height);
+    SetSize(width, height);
+    FillFrame();
+  }
+};
+
+class ResizeTest
+    : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  ResizeTest() : EncoderTest(GET_PARAM(0)) {}
+
+  ~ResizeTest() override = default;
+
+  void SetUp() override { InitializeConfig(GET_PARAM(1)); }
+
+  void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                          libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      if (GET_PARAM(1) == ::libaom_test::kRealTime) {
+        encoder->Control(AV1E_SET_AQ_MODE, 3);
+        encoder->Control(AOME_SET_CPUUSED, 5);
+        encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      }
+    }
+  }
+
+  void DecompressedFrameHook(const aom_image_t &img,
+                             aom_codec_pts_t pts) override {
+    frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
+  }
+
+  std::vector<FrameInfo> frame_info_list_;
+};
+
+TEST_P(ResizeTest, TestExternalResizeWorks) {
+  ResizingVideoSource video;
+  video.flag_codec_ = 0;
+  video.change_start_resln_ = false;
+  cfg_.g_lag_in_frames = 0;
+  // We use max(kInitialWidth, kInitialHeight) because during the test
+  // the width and height of the frame are swapped
+  cfg_.g_forced_max_frame_width = cfg_.g_forced_max_frame_height =
+      AOMMAX(kInitialWidth, kInitialHeight);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  // Check we decoded the same number of frames as we attempted to encode
+  ASSERT_EQ(frame_info_list_.size(), video.limit());
+
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    const unsigned int frame = static_cast<unsigned>(info->pts);
+    unsigned int expected_w;
+    unsigned int expected_h;
+    ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, video.flag_codec_,
+                        video.change_start_resln_, &expected_w, &expected_h);
+    EXPECT_EQ(expected_w, info->w)
+        << "Frame " << frame << " had unexpected width";
+    EXPECT_EQ(expected_h, info->h)
+        << "Frame " << frame << " had unexpected height";
+  }
+}
+
+#if !CONFIG_REALTIME_ONLY
+const unsigned int kStepDownFrame = 3;
+const unsigned int kStepUpFrame = 6;
+
+class ResizeInternalTestLarge : public ResizeTest {
+ protected:
+#if WRITE_COMPRESSED_STREAM
+  ResizeInternalTestLarge()
+      : ResizeTest(), frame0_psnr_(0.0), outfile_(nullptr), out_frames_(0) {}
+#else
+  ResizeInternalTestLarge() : ResizeTest(), frame0_psnr_(0.0) {}
+#endif
+
+  ~ResizeInternalTestLarge() override = default;
+
+  void BeginPassHook(unsigned int /*pass*/) override {
+#if WRITE_COMPRESSED_STREAM
+    outfile_ = fopen("av10-2-05-resize.ivf", "wb");
+#endif
+  }
+
+  void EndPassHook() override {
+#if WRITE_COMPRESSED_STREAM
+    if (outfile_) {
+      if (!fseek(outfile_, 0, SEEK_SET))
+        write_ivf_file_header(&cfg_, out_frames_, outfile_);
+      fclose(outfile_);
+      outfile_ = nullptr;
+    }
+#endif
+  }
+
+  void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                          libaom_test::Encoder *encoder) override {
+    if (change_config_) {
+      int new_q = 60;
+      if (video->frame() == 0) {
+        struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
+        encoder->Control(AOME_SET_SCALEMODE, &mode);
+      } else if (video->frame() == 1) {
+        struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
+        encoder->Control(AOME_SET_SCALEMODE, &mode);
+        cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = new_q;
+        encoder->Config(&cfg_);
+      }
+    } else {
+      if (video->frame() >= kStepDownFrame && video->frame() < kStepUpFrame) {
+        struct aom_scaling_mode mode = { AOME_FOURFIVE, AOME_THREEFIVE };
+        encoder->Control(AOME_SET_SCALEMODE, &mode);
+      }
+      if (video->frame() >= kStepUpFrame) {
+        struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
+        encoder->Control(AOME_SET_SCALEMODE, &mode);
+      }
+    }
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0];
+    EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 4.1);
+  }
+
+#if WRITE_COMPRESSED_STREAM
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    ++out_frames_;
+
+    // Write initial file header if first frame.
+    if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_);
+
+    // Write frame header and data.
+    write_ivf_frame_header(pkt, outfile_);
+    (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
+  }
+#endif
+
+  double frame0_psnr_;
+  bool change_config_;
+#if WRITE_COMPRESSED_STREAM
+  FILE *outfile_;
+  unsigned int out_frames_;
+#endif
+};
+
+TEST_P(ResizeInternalTestLarge, TestInternalResizeWorks) {
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 10);
+  init_flags_ = AOM_CODEC_USE_PSNR;
+  change_config_ = false;
+
+  // q picked such that initial keyframe on this clip is ~30dB PSNR
+  cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
+
+  // If the number of frames being encoded is smaller than g_lag_in_frames
+  // the encoded frame is unavailable using the current API. Comparing
+  // frames to detect mismatch would then not be possible. Set
+  // g_lag_in_frames = 0 to get around this.
+  cfg_.g_lag_in_frames = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+  }
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    const aom_codec_pts_t pts = info->pts;
+    if (pts >= kStepDownFrame && pts < kStepUpFrame) {
+      ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width";
+      ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height";
+    } else {
+      EXPECT_EQ(352U, info->w) << "Frame " << pts << " had unexpected width";
+      EXPECT_EQ(288U, info->h) << "Frame " << pts << " had unexpected height";
+    }
+  }
+}
+
+TEST_P(ResizeInternalTestLarge, TestInternalResizeChangeConfig) {
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 10);
+  cfg_.g_w = 352;
+  cfg_.g_h = 288;
+  change_config_ = true;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+AV1_INSTANTIATE_TEST_SUITE(ResizeInternalTestLarge,
+                           ::testing::Values(::libaom_test::kOnePassGood));
+#endif
+
+// Parameters: test mode, speed, threads
+class ResizeRealtimeTest
+    : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int,
+                                                 int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  ResizeRealtimeTest()
+      : EncoderTest(GET_PARAM(0)), num_threads_(GET_PARAM(3)),
+        set_scale_mode_(false), set_scale_mode2_(false),
+        set_scale_mode3_(false) {}
+  ~ResizeRealtimeTest() override = default;
+
+  void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                          libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_AQ_MODE, 3);
+      encoder->Control(AV1E_SET_ALLOW_WARPED_MOTION, 0);
+      encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
+      encoder->Control(AV1E_SET_ENABLE_OBMC, 0);
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+    }
+    if (set_scale_mode_) {
+      struct aom_scaling_mode mode;
+      if (video->frame() <= 20)
+        mode = { AOME_ONETWO, AOME_ONETWO };
+      else if (video->frame() <= 40)
+        mode = { AOME_ONEFOUR, AOME_ONEFOUR };
+      else if (video->frame() > 40)
+        mode = { AOME_NORMAL, AOME_NORMAL };
+      encoder->Control(AOME_SET_SCALEMODE, &mode);
+    } else if (set_scale_mode2_) {
+      struct aom_scaling_mode mode;
+      if (video->frame() <= 20)
+        mode = { AOME_ONEFOUR, AOME_ONEFOUR };
+      else if (video->frame() <= 40)
+        mode = { AOME_ONETWO, AOME_ONETWO };
+      else if (video->frame() > 40)
+        mode = { AOME_THREEFOUR, AOME_THREEFOUR };
+      encoder->Control(AOME_SET_SCALEMODE, &mode);
+    } else if (set_scale_mode3_) {
+      struct aom_scaling_mode mode;
+      if (video->frame() <= 30)
+        mode = { AOME_ONETWO, AOME_NORMAL };
+      else
+        mode = { AOME_NORMAL, AOME_NORMAL };
+      encoder->Control(AOME_SET_SCALEMODE, &mode);
+    }
+
+    if (change_bitrate_ && video->frame() == frame_change_bitrate_) {
+      change_bitrate_ = false;
+      cfg_.rc_target_bitrate = 500;
+      encoder->Config(&cfg_);
+    }
+  }
+
+  void SetUp() override {
+    InitializeConfig(GET_PARAM(1));
+    set_cpu_used_ = GET_PARAM(2);
+  }
+
+  void DecompressedFrameHook(const aom_image_t &img,
+                             aom_codec_pts_t pts) override {
+    frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
+  }
+
+  void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) override {
+    double mismatch_psnr = compute_psnr(img1, img2);
+    mismatch_psnr_ += mismatch_psnr;
+    ++mismatch_nframes_;
+  }
+
+  unsigned int GetMismatchFrames() { return mismatch_nframes_; }
+
+  void DefaultConfig() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 600;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_undershoot_pct = 50;
+    cfg_.rc_overshoot_pct = 50;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.kf_mode = AOM_KF_AUTO;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.kf_min_dist = cfg_.kf_max_dist = 3000;
+    // Enable dropped frames.
+    cfg_.rc_dropframe_thresh = 1;
+    // Disable error_resilience mode.
+    cfg_.g_error_resilient = 0;
+    cfg_.g_threads = num_threads_;
+    // Run at low bitrate.
+    cfg_.rc_target_bitrate = 200;
+    // We use max(kInitialWidth, kInitialHeight) because during the test
+    // the width and height of the frame are swapped
+    cfg_.g_forced_max_frame_width = cfg_.g_forced_max_frame_height =
+        AOMMAX(kInitialWidth, kInitialHeight);
+    if (set_scale_mode_ || set_scale_mode2_ || set_scale_mode3_) {
+      cfg_.rc_dropframe_thresh = 0;
+      cfg_.g_forced_max_frame_width = 1280;
+      cfg_.g_forced_max_frame_height = 1280;
+    }
+  }
+
+  std::vector<FrameInfo> frame_info_list_;
+  int set_cpu_used_;
+  int num_threads_;
+  bool change_bitrate_;
+  unsigned int frame_change_bitrate_;
+  double mismatch_psnr_;
+  int mismatch_nframes_;
+  bool set_scale_mode_;
+  bool set_scale_mode2_;
+  bool set_scale_mode3_;
+};
+
+// Check the AOME_SET_SCALEMODE control by downsizing to
+// 1/2, then 1/4, and then back up to originsal.
+TEST_P(ResizeRealtimeTest, TestInternalResizeSetScaleMode1) {
+  ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+  cfg_.g_w = 1280;
+  cfg_.g_h = 720;
+  set_scale_mode_ = true;
+  set_scale_mode2_ = false;
+  set_scale_mode3_ = false;
+  DefaultConfig();
+  change_bitrate_ = false;
+  mismatch_nframes_ = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  // Check we decoded the same number of frames as we attempted to encode
+  ASSERT_EQ(frame_info_list_.size(), video.limit());
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    const auto frame = static_cast<unsigned>(info->pts);
+    unsigned int expected_w = 1280 >> 1;
+    unsigned int expected_h = 720 >> 1;
+    if (frame > 40) {
+      expected_w = 1280;
+      expected_h = 720;
+    } else if (frame > 20 && frame <= 40) {
+      expected_w = 1280 >> 2;
+      expected_h = 720 >> 2;
+    }
+    EXPECT_EQ(expected_w, info->w)
+        << "Frame " << frame << " had unexpected width";
+    EXPECT_EQ(expected_h, info->h)
+        << "Frame " << frame << " had unexpected height";
+    EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+  }
+}
+
+// Check the AOME_SET_SCALEMODE control by downsizing to
+// 1/2, then 1/4, and then back up to originsal.
+TEST_P(ResizeRealtimeTest, TestInternalResizeSetScaleMode1QVGA) {
+  ::libaom_test::I420VideoSource video("desktop1.320_180.yuv", 320, 180, 30, 1,
+                                       0, 80);
+  cfg_.g_w = 320;
+  cfg_.g_h = 180;
+  set_scale_mode_ = true;
+  set_scale_mode2_ = false;
+  set_scale_mode3_ = false;
+  DefaultConfig();
+  change_bitrate_ = false;
+  mismatch_nframes_ = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  // Check we decoded the same number of frames as we attempted to encode
+  ASSERT_EQ(frame_info_list_.size(), video.limit());
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    const auto frame = static_cast<unsigned>(info->pts);
+    unsigned int expected_w = 320 >> 1;
+    unsigned int expected_h = 180 >> 1;
+    if (frame > 40) {
+      expected_w = 320;
+      expected_h = 180;
+    } else if (frame > 20 && frame <= 40) {
+      expected_w = 320 >> 2;
+      expected_h = 180 >> 2;
+    }
+    EXPECT_EQ(expected_w, info->w)
+        << "Frame " << frame << " had unexpected width";
+    EXPECT_EQ(expected_h, info->h)
+        << "Frame " << frame << " had unexpected height";
+    EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+  }
+}
+
+// Check the AOME_SET_SCALEMODE control by downsizing to
+// 1/4, then 1/2, and then up to 3/4.
+TEST_P(ResizeRealtimeTest, TestInternalResizeSetScaleMode2) {
+  ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+  cfg_.g_w = 1280;
+  cfg_.g_h = 720;
+  set_scale_mode_ = false;
+  set_scale_mode2_ = true;
+  set_scale_mode3_ = false;
+  DefaultConfig();
+  change_bitrate_ = false;
+  mismatch_nframes_ = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  // Check we decoded the same number of frames as we attempted to encode
+  ASSERT_EQ(frame_info_list_.size(), video.limit());
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    const auto frame = static_cast<unsigned>(info->pts);
+    unsigned int expected_w = 1280 >> 2;
+    unsigned int expected_h = 720 >> 2;
+    if (frame > 40) {
+      expected_w = (3 * 1280) >> 2;
+      expected_h = (3 * 720) >> 2;
+    } else if (frame > 20 && frame <= 40) {
+      expected_w = 1280 >> 1;
+      expected_h = 720 >> 1;
+    }
+    EXPECT_EQ(expected_w, info->w)
+        << "Frame " << frame << " had unexpected width";
+    EXPECT_EQ(expected_h, info->h)
+        << "Frame " << frame << " had unexpected height";
+    EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+  }
+}
+
+// Check the AOME_SET_SCALEMODE control by downsizing to
+// 1/2 horizontally only and then back up to original.
+TEST_P(ResizeRealtimeTest, TestInternalResizeSetScaleMode3) {
+  ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+  cfg_.g_w = 1280;
+  cfg_.g_h = 720;
+  set_scale_mode_ = false;
+  set_scale_mode2_ = false;
+  set_scale_mode3_ = true;
+  DefaultConfig();
+  change_bitrate_ = false;
+  mismatch_nframes_ = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  // Check we decoded the same number of frames as we attempted to encode
+  ASSERT_EQ(frame_info_list_.size(), video.limit());
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    const auto frame = static_cast<unsigned>(info->pts);
+    unsigned int expected_w = 640;
+    unsigned int expected_h = 720;
+    if (frame > 30) {
+      expected_w = 1280;
+      expected_h = 720;
+    }
+    EXPECT_EQ(expected_w, info->w)
+        << "Frame " << frame << " had unexpected width";
+    EXPECT_EQ(expected_h, info->h)
+        << "Frame " << frame << " had unexpected height";
+    EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+  }
+}
+
+TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
+  ResizingVideoSource video;
+  video.flag_codec_ = 1;
+  change_bitrate_ = false;
+  set_scale_mode_ = false;
+  set_scale_mode2_ = false;
+  set_scale_mode3_ = false;
+  mismatch_psnr_ = 0.0;
+  mismatch_nframes_ = 0;
+  DefaultConfig();
+  // Test external resizing with start resolution equal to
+  // 1. kInitialWidth and kInitialHeight
+  // 2. down-scaled kInitialWidth and kInitialHeight
+  for (int i = 0; i < 2; i++) {
+    video.change_start_resln_ = static_cast<bool>(i);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+    // Check we decoded the same number of frames as we attempted to encode
+    ASSERT_EQ(frame_info_list_.size(), video.limit());
+    for (const auto &info : frame_info_list_) {
+      const unsigned int frame = static_cast<unsigned>(info.pts);
+      unsigned int expected_w;
+      unsigned int expected_h;
+      ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight,
+                          video.flag_codec_, video.change_start_resln_,
+                          &expected_w, &expected_h);
+      EXPECT_EQ(expected_w, info.w)
+          << "Frame " << frame << " had unexpected width";
+      EXPECT_EQ(expected_h, info.h)
+          << "Frame " << frame << " had unexpected height";
+      EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+    }
+    frame_info_list_.clear();
+  }
+}
+
+// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
+// Run at low bitrate, with resize_allowed = 1, and verify that we get
+// one resize down event.
+TEST_P(ResizeRealtimeTest, TestInternalResizeDown) {
+  ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
+  cfg_.g_w = 640;
+  cfg_.g_h = 480;
+  change_bitrate_ = false;
+  set_scale_mode_ = false;
+  set_scale_mode2_ = false;
+  set_scale_mode3_ = false;
+  mismatch_psnr_ = 0.0;
+  mismatch_nframes_ = 0;
+  DefaultConfig();
+  // Disable dropped frames.
+  cfg_.rc_dropframe_thresh = 0;
+  // Starting bitrate low.
+  cfg_.rc_target_bitrate = 150;
+  cfg_.rc_resize_mode = RESIZE_DYNAMIC;
+  cfg_.g_forced_max_frame_width = 1280;
+  cfg_.g_forced_max_frame_height = 1280;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  unsigned int last_w = cfg_.g_w;
+  unsigned int last_h = cfg_.g_h;
+  int resize_down_count = 0;
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    if (info->w != last_w || info->h != last_h) {
+      // Verify that resize down occurs.
+      if (info->w < last_w && info->h < last_h) {
+        resize_down_count++;
+      }
+      last_w = info->w;
+      last_h = info->h;
+    }
+  }
+
+#if CONFIG_AV1_DECODER
+  // Verify that we get at lease 1 resize down event in this test.
+  ASSERT_GE(resize_down_count, 1) << "Resizing should occur.";
+  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+#else
+  printf("Warning: AV1 decoder unavailable, unable to check resize count!\n");
+#endif
+}
+
+// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
+// Start at low target bitrate, raise the bitrate in the middle of the clip
+// (at frame# = frame_change_bitrate_), scaling-up should occur after bitrate
+// is increased.
+TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
+  ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
+  cfg_.g_w = 640;
+  cfg_.g_h = 480;
+  change_bitrate_ = true;
+  frame_change_bitrate_ = 120;
+  set_scale_mode_ = false;
+  set_scale_mode2_ = false;
+  set_scale_mode3_ = false;
+  mismatch_psnr_ = 0.0;
+  mismatch_nframes_ = 0;
+  DefaultConfig();
+  // Disable dropped frames.
+  cfg_.rc_dropframe_thresh = 0;
+  // Starting bitrate low.
+  cfg_.rc_target_bitrate = 150;
+  cfg_.rc_resize_mode = RESIZE_DYNAMIC;
+  cfg_.g_forced_max_frame_width = 1280;
+  cfg_.g_forced_max_frame_height = 1280;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  unsigned int last_w = cfg_.g_w;
+  unsigned int last_h = cfg_.g_h;
+  unsigned int frame_number = 0;
+  int resize_down_count = 0;
+  int resize_up_count = 0;
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    if (info->w != last_w || info->h != last_h) {
+      if (frame_number < frame_change_bitrate_) {
+        // Verify that resize down occurs, before bitrate is increased.
+        ASSERT_LT(info->w, last_w);
+        ASSERT_LT(info->h, last_h);
+        resize_down_count++;
+      } else {
+        // Verify that resize up occurs, after bitrate is increased.
+        ASSERT_GT(info->w, last_w);
+        ASSERT_GT(info->h, last_h);
+        resize_up_count++;
+      }
+      last_w = info->w;
+      last_h = info->h;
+    }
+    frame_number++;
+  }
+
+#if CONFIG_AV1_DECODER
+  // Verify that we get at least 2 resize events in this test.
+  ASSERT_GE(resize_up_count, 1) << "Resizing up should occur at lease once.";
+  ASSERT_GE(resize_down_count, 1)
+      << "Resizing down should occur at lease once.";
+  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+#else
+  printf("Warning: AV1 decoder unavailable, unable to check resize count!\n");
+#endif
+}
+
+class ResizeCspTest : public ResizeTest {
+ protected:
+#if WRITE_COMPRESSED_STREAM
+  ResizeCspTest()
+      : ResizeTest(), frame0_psnr_(0.0), outfile_(nullptr), out_frames_(0) {}
+#else
+  ResizeCspTest() : ResizeTest(), frame0_psnr_(0.0) {}
+#endif
+
+  ~ResizeCspTest() override = default;
+
+  void BeginPassHook(unsigned int /*pass*/) override {
+#if WRITE_COMPRESSED_STREAM
+    outfile_ = fopen("av11-2-05-cspchape.ivf", "wb");
+#endif
+  }
+
+  void EndPassHook() override {
+#if WRITE_COMPRESSED_STREAM
+    if (outfile_) {
+      if (!fseek(outfile_, 0, SEEK_SET))
+        write_ivf_file_header(&cfg_, out_frames_, outfile_);
+      fclose(outfile_);
+      outfile_ = nullptr;
+    }
+#endif
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0];
+    EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0);
+  }
+
+#if WRITE_COMPRESSED_STREAM
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    ++out_frames_;
+
+    // Write initial file header if first frame.
+    if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_);
+
+    // Write frame header and data.
+    write_ivf_frame_header(pkt, outfile_);
+    (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
+  }
+#endif
+
+  double frame0_psnr_;
+#if WRITE_COMPRESSED_STREAM
+  FILE *outfile_;
+  unsigned int out_frames_;
+#endif
+};
+
+class ResizingCspVideoSource : public ::libaom_test::DummyVideoSource {
+ public:
+  explicit ResizingCspVideoSource(aom_img_fmt_t image_format) {
+    SetSize(kInitialWidth, kInitialHeight);
+    SetImageFormat(image_format);
+    limit_ = 30;
+  }
+
+  ~ResizingCspVideoSource() override = default;
+};
+
+#if (defined(DISABLE_TRELLISQ_SEARCH) && DISABLE_TRELLISQ_SEARCH) || \
+    (defined(CONFIG_MAX_DECODE_PROFILE) && CONFIG_MAX_DECODE_PROFILE < 1)
+TEST_P(ResizeCspTest, DISABLED_TestResizeCspWorks) {
+#else
+TEST_P(ResizeCspTest, TestResizeCspWorks) {
+#endif
+  const aom_img_fmt_t image_formats[] = { AOM_IMG_FMT_I420, AOM_IMG_FMT_I444 };
+  for (const aom_img_fmt_t &img_format : image_formats) {
+    ResizingCspVideoSource video(img_format);
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_profile = (img_format == AOM_IMG_FMT_I420) ? 0 : 1;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+    // Check we decoded the same number of frames as we attempted to encode
+    ASSERT_EQ(frame_info_list_.size(), video.limit());
+    frame_info_list_.clear();
+  }
+}
+
+#if !CONFIG_REALTIME_ONLY
+// This class is used to check if there are any fatal
+// failures while encoding with resize-mode > 0
+class ResizeModeTestLarge
+    : public ::libaom_test::CodecTestWith5Params<libaom_test::TestMode, int,
+                                                 int, int, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  ResizeModeTestLarge()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        resize_mode_(GET_PARAM(2)), resize_denominator_(GET_PARAM(3)),
+        resize_kf_denominator_(GET_PARAM(4)), cpu_used_(GET_PARAM(5)) {}
+  ~ResizeModeTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.g_threads = 1;
+    cfg_.g_lag_in_frames = 35;
+    cfg_.rc_target_bitrate = 1000;
+    cfg_.rc_resize_mode = resize_mode_;
+    cfg_.rc_resize_denominator = resize_denominator_;
+    cfg_.rc_resize_kf_denominator = resize_kf_denominator_;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+    }
+  }
+
+  ::libaom_test::TestMode encoding_mode_;
+  int resize_mode_;
+  int resize_denominator_;
+  int resize_kf_denominator_;
+  int cpu_used_;
+};
+
+TEST_P(ResizeModeTestLarge, ResizeModeTest) {
+  ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 30);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ResizeModeTestLarge);
+AV1_INSTANTIATE_TEST_SUITE(ResizeModeTestLarge,
+                           ::testing::Values(::libaom_test::kOnePassGood,
+                                             ::libaom_test::kTwoPassGood),
+                           ::testing::Values(1, 2), ::testing::Values(8, 12),
+                           ::testing::Values(10, 14), ::testing::Values(3, 6));
+#endif  // !CONFIG_REALTIME_ONLY
+
+AV1_INSTANTIATE_TEST_SUITE(ResizeTest,
+                           ::testing::Values(::libaom_test::kRealTime));
+AV1_INSTANTIATE_TEST_SUITE(ResizeRealtimeTest,
+                           ::testing::Values(::libaom_test::kRealTime),
+                           ::testing::Range(6, 10), ::testing::Values(1, 2, 4));
+AV1_INSTANTIATE_TEST_SUITE(ResizeCspTest,
+                           ::testing::Values(::libaom_test::kRealTime));
+
+// A test that reproduces crbug.com/1393384. In realtime usage mode, encode
+// frames of sizes 202x202, 1x202, and 202x202. ASan should report no memory
+// errors.
+TEST(ResizeSimpleTest, TemporarySmallerFrameSize) {
+  constexpr int kWidth = 202;
+  constexpr int kHeight = 202;
+  // Dummy buffer of zero samples.
+  constexpr size_t kBufferSize =
+      kWidth * kHeight + 2 * (kWidth + 1) / 2 * (kHeight + 1) / 2;
+  std::vector<unsigned char> buffer(kBufferSize);
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1,
+                               buffer.data()));
+  aom_image_t img2;
+  EXPECT_EQ(&img2, aom_img_wrap(&img2, AOM_IMG_FMT_I420, 1, kHeight, 1,
+                                buffer.data()));
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME));
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 5));
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+
+  cfg.g_w = 1;
+  cfg.g_h = kHeight;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img2, 1, 1, 0));
+
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 2, 1, 0));
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+// A test that reproduces crbug.com/1410766. In realtime usage mode
+// for SVC with temporal layers, encode frames of sizes 600x600,
+// 600x600, and 100x480. ASan should report no memory errors.
+TEST(ResizeSimpleTest, SmallerFrameSizeSVC) {
+  constexpr int kWidth = 600;
+  constexpr int kHeight = 600;
+  // Dummy buffer of zero samples.
+  constexpr size_t kBufferSize =
+      kWidth * kHeight + 2 * (kWidth + 1) / 2 * (kHeight + 1) / 2;
+  std::vector<unsigned char> buffer(kBufferSize);
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1,
+                               buffer.data()));
+  aom_image_t img2;
+  EXPECT_EQ(&img2,
+            aom_img_wrap(&img2, AOM_IMG_FMT_I420, 100, 480, 1, buffer.data()));
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME));
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 5));
+
+  aom_svc_params_t svc_params = {};
+  aom_svc_layer_id_t layer_id;
+  svc_params.number_spatial_layers = 1;
+  svc_params.framerate_factor[0] = 2;
+  svc_params.framerate_factor[1] = 1;
+  svc_params.number_temporal_layers = 2;
+  // Bitrate allocation L0: 60% L1: 40%
+  svc_params.layer_target_bitrate[0] = 60 * cfg.rc_target_bitrate / 100;
+  svc_params.layer_target_bitrate[1] = cfg.rc_target_bitrate;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AV1E_SET_SVC_PARAMS, &svc_params));
+
+  layer_id.spatial_layer_id = 0;
+  layer_id.temporal_layer_id = 0;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AV1E_SET_SVC_LAYER_ID, &layer_id));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  layer_id.temporal_layer_id = 1;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AV1E_SET_SVC_LAYER_ID, &layer_id));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 1, 1, 0));
+
+  cfg.g_w = 100;
+  cfg.g_h = 480;
+  layer_id.temporal_layer_id = 0;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AV1E_SET_SVC_LAYER_ID, &layer_id));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img2, 2, 1, 0));
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+const int kUsages[] =
+#if CONFIG_REALTIME_ONLY
+    { AOM_USAGE_REALTIME };
+#else
+    { AOM_USAGE_GOOD_QUALITY, AOM_USAGE_REALTIME, AOM_USAGE_ALL_INTRA };
+#endif
+
+const int kNumThreads[] = { 2, 4, 8 };
+
+class FrameSizeChangeTest
+    : public ::libaom_test::CodecTestWith3Params<int, int, int> {
+ protected:
+  FrameSizeChangeTest() {}
+  ~FrameSizeChangeTest() override = default;
+
+  void DoTest(int change_thread) {
+    usage_ = GET_PARAM(1);
+    cpu_used_ = GET_PARAM(2);
+    threads_ = GET_PARAM(3);
+    constexpr int kWidth = 512;
+    constexpr int kHeight = 512;
+    constexpr int kFirstWidth = 256;
+    constexpr int kFirstHeight = 256;
+    // Buffer of zero samples.
+    constexpr size_t kBufferSize = 3 * kWidth * kHeight;
+    std::vector<unsigned char> buffer(kBufferSize,
+                                      static_cast<unsigned char>(0));
+
+    aom_image_t img1;
+    EXPECT_EQ(&img1, aom_img_wrap(&img1, AOM_IMG_FMT_I420, kFirstWidth,
+                                  kFirstHeight, 1, buffer.data()));
+
+    aom_image_t img2;
+    EXPECT_EQ(&img2, aom_img_wrap(&img2, AOM_IMG_FMT_I420, kWidth, kHeight, 1,
+                                  buffer.data()));
+
+    aom_codec_iface_t *iface = aom_codec_av1_cx();
+    aom_codec_enc_cfg_t cfg;
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, usage_));
+    cfg.g_threads = threads_;
+    cfg.g_lag_in_frames = usage_ == AOM_USAGE_ALL_INTRA ? 0 : 1;
+    cfg.g_w = kFirstWidth;
+    cfg.g_h = kFirstHeight;
+    cfg.g_forced_max_frame_width = kWidth;
+    cfg.g_forced_max_frame_height = kHeight;
+    aom_codec_ctx_t enc;
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0));
+    EXPECT_EQ(AOM_CODEC_OK,
+              aom_codec_control(&enc, AOME_SET_CPUUSED, cpu_used_));
+
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img1, 0, 1, 0));
+
+    if (change_thread == 1) {
+      cfg.g_threads = AOMMAX(1, threads_ / 2);
+    } else if (change_thread == 2) {
+      cfg.g_threads = threads_ * 2;
+    }
+    cfg.g_w = kWidth;
+    cfg.g_h = kHeight;
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg));
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img2, 1, 1, 0));
+
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0));
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+  }
+
+  int cpu_used_;
+  int threads_;
+  int usage_;
+};
+
+TEST_P(FrameSizeChangeTest, FixedThreads) { DoTest(0); }
+TEST_P(FrameSizeChangeTest, DecreasingThreads) { DoTest(1); }
+TEST_P(FrameSizeChangeTest, IncreasingThreads) { DoTest(2); }
+
+AV1_INSTANTIATE_TEST_SUITE(FrameSizeChangeTest, ::testing::ValuesIn(kUsages),
+                           ::testing::Range(6, 7),
+                           ::testing::ValuesIn(kNumThreads));
+
+}  // namespace
diff --git a/third_party/aom/test/rt_end_to_end_test.cc b/third_party/aom/test/rt_end_to_end_test.cc
new file mode 100644
index 0000000000..f1f9e019c2
--- /dev/null
+++ b/third_party/aom/test/rt_end_to_end_test.cc
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2019, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory>
+#include <ostream>
+#include <string>
+#include <unordered_map>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+
+const unsigned int kFrames = 10;
+const int kBitrate = 500;
+
+// List of psnr thresholds for speed settings 6-8
+// keys: video, speed, aq mode.
+std::unordered_map<std::string,
+                   std::unordered_map<int, std::unordered_map<int, double>>>
+    kPsnrThreshold = { { "park_joy_90p_8_420.y4m",
+                         { { 5, { { 0, 35.4 }, { 3, 36.3 } } },
+                           { 6, { { 0, 35.3 }, { 3, 36.2 } } },
+                           { 7, { { 0, 34.9 }, { 3, 35.8 } } },
+                           { 8, { { 0, 35.0 }, { 3, 35.8 } } },
+                           { 9, { { 0, 34.9 }, { 3, 35.5 } } },
+                           { 10, { { 0, 34.7 }, { 3, 35.3 } } } } },
+                       { "paris_352_288_30.y4m",
+                         { { 5, { { 0, 36.2 }, { 3, 36.7 } } },
+                           { 6, { { 0, 36.1 }, { 3, 36.48 } } },
+                           { 7, { { 0, 35.5 }, { 3, 36.0 } } },
+                           { 8, { { 0, 35.8 }, { 3, 36.4 } } },
+                           { 9, { { 0, 35.5 }, { 3, 36.0 } } },
+                           { 10, { { 0, 35.3 }, { 3, 35.9 } } } } },
+                       { "niklas_1280_720_30.y4m",
+                         { { 5, { { 0, 34.4 }, { 3, 34.2 } } },
+                           { 6, { { 0, 34.1 }, { 3, 34.0 } } },
+                           { 7, { { 0, 33.5 }, { 3, 33.1 } } },
+                           { 8, { { 0, 33.3 }, { 3, 33.3 } } },
+                           { 9, { { 0, 33.3 }, { 3, 33.3 } } },
+                           { 10, { { 0, 33.1 }, { 3, 33.1 } } } } },
+                       { "hantro_collage_w352h288_nv12.yuv",
+                         { { 5, { { 0, 34.4 }, { 3, 34.2 } } },
+                           { 6, { { 0, 34.1 }, { 3, 34.1 } } },
+                           { 7, { { 0, 33.6 }, { 3, 33.6 } } },
+                           { 8, { { 0, 33.3 }, { 3, 33.3 } } },
+                           { 9, { { 0, 33.3 }, { 3, 33.3 } } },
+                           { 10, { { 0, 33.1 }, { 3, 33.1 } } } } } };
+
+typedef struct {
+  const char *filename;
+  unsigned int input_bit_depth;
+  aom_img_fmt fmt;
+  aom_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) {
+  return os << "TestVideoParam { filename:" << test_arg.filename
+            << " input_bit_depth:" << test_arg.input_bit_depth
+            << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth
+            << " profile:" << test_arg.profile << " }";
+}
+
+const TestVideoParam kTestVectors[] = {
+  { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
+  { "paris_352_288_30.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
+  { "niklas_1280_720_30.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
+  { "hantro_collage_w352h288_nv12.yuv", 8, AOM_IMG_FMT_NV12, AOM_BITS_8, 0 },
+};
+
+// Params: test video, speed, aq mode, threads, tile columns, tile rows.
+class RTEndToEndTest
+    : public ::libaom_test::CodecTestWith6Params<TestVideoParam, int,
+                                                 unsigned int, int, int, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  RTEndToEndTest()
+      : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)),
+        cpu_used_(GET_PARAM(2)), psnr_(0.0), nframes_(0),
+        aq_mode_(GET_PARAM(3)), threads_(GET_PARAM(4)),
+        tile_columns_(GET_PARAM(5)), tile_rows_(GET_PARAM(6)) {}
+
+  ~RTEndToEndTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(::libaom_test::kRealTime);
+
+    cfg_.g_threads = threads_;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 600;
+    cfg_.kf_max_dist = 9999;
+    cfg_.kf_min_dist = 9999;
+  }
+
+  void BeginPassHook(unsigned int) override {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_ENABLE_RESTORATION, 0);
+      encoder->Control(AV1E_SET_ENABLE_OBMC, 0);
+      encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
+      encoder->Control(AV1E_SET_ENABLE_WARPED_MOTION, 0);
+      encoder->Control(AV1E_SET_DELTAQ_MODE, 0);
+      encoder->Control(AV1E_SET_ENABLE_TPL_MODEL, 0);
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, tile_columns_);
+      encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_);
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
+      encoder->Control(AV1E_SET_AQ_MODE, aq_mode_);
+      encoder->Control(AV1E_SET_ROW_MT, 1);
+      encoder->Control(AV1E_SET_ENABLE_CDEF, 1);
+      encoder->Control(AV1E_SET_COEFF_COST_UPD_FREQ, 2);
+      encoder->Control(AV1E_SET_MODE_COST_UPD_FREQ, 2);
+      encoder->Control(AV1E_SET_MV_COST_UPD_FREQ, 2);
+      encoder->Control(AV1E_SET_DV_COST_UPD_FREQ, 2);
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  double GetPsnrThreshold() {
+    return kPsnrThreshold[test_video_param_.filename][cpu_used_][aq_mode_];
+  }
+
+  void DoTest() {
+    cfg_.rc_target_bitrate = kBitrate;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_profile = test_video_param_.profile;
+    cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+    cfg_.g_bit_depth = test_video_param_.bit_depth;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+    std::unique_ptr<libaom_test::VideoSource> video;
+    if (is_extension_y4m(test_video_param_.filename))
+      video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                  kFrames));
+    else
+      video.reset(new libaom_test::YUVVideoSource(test_video_param_.filename,
+                                                  test_video_param_.fmt, 352,
+                                                  288, 30, 1, 0, kFrames));
+    ASSERT_NE(video, nullptr);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    const double psnr = GetAveragePsnr();
+    EXPECT_GT(psnr, GetPsnrThreshold())
+        << "cpu used = " << cpu_used_ << " aq mode = " << aq_mode_;
+  }
+
+  TestVideoParam test_video_param_;
+  int cpu_used_;
+
+ private:
+  double psnr_;
+  unsigned int nframes_;
+  unsigned int aq_mode_;
+  int threads_;
+  int tile_columns_;
+  int tile_rows_;
+};
+
+class RTEndToEndTestThreaded : public RTEndToEndTest {};
+
+TEST_P(RTEndToEndTest, EndtoEndPSNRTest) { DoTest(); }
+
+TEST_P(RTEndToEndTestThreaded, EndtoEndPSNRTest) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_SUITE(RTEndToEndTest, ::testing::ValuesIn(kTestVectors),
+                           ::testing::Range(5, 12),
+                           ::testing::Values<unsigned int>(0, 3),
+                           ::testing::Values(1), ::testing::Values(1),
+                           ::testing::Values(1));
+
+AV1_INSTANTIATE_TEST_SUITE(RTEndToEndTestThreaded,
+                           ::testing::ValuesIn(kTestVectors),
+                           ::testing::Range(5, 12),
+                           ::testing::Values<unsigned int>(0, 3),
+                           ::testing::Range(2, 6), ::testing::Range(1, 5),
+                           ::testing::Range(1, 5));
+}  // namespace
diff --git a/third_party/aom/test/run_encodes.sh b/third_party/aom/test/run_encodes.sh
new file mode 100755
index 0000000000..2096d8b158
--- /dev/null
+++ b/third_party/aom/test/run_encodes.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+#
+# Copyright (c) 2016, Alliance for Open Media. All rights reserved.
+#
+# This source code is subject to the terms of the BSD 2 Clause License and
+# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+# was not distributed with this source code in the LICENSE file, you can
+# obtain it at www.aomedia.org/license/software. If the Alliance for Open
+# Media Patent License 1.0 was not distributed with this source code in the
+# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+#
+# Author: jimbankoski@google.com (Jim Bankoski)
+
+if [[ $# -ne 4 ]]; then
+  echo Encodes all the y4m files in the directory at the bitrates specified by
+  echo the first 3 parameters and stores the results in a subdirectory named by
+  echo the 4th parameter:
+  echo
+  echo Usage:    run_encodes.sh start-kbps end-kbps step-kbps output-directory
+  echo Example:  run_encodes.sh 200 500 50 baseline
+  exit
+fi
+
+s=$1
+e=$2
+step=$3
+newdir=$4
+
+for i in ./*y4m; do
+  for (( b=$s; b<= $e; b+= $step ))
+  do
+    best_encode.sh $i $b
+  done
+  mv opsnr.stt $i.stt
+done
+
+mkdir $newdir
+mv *.stt $newdir
+mv *.webm $newdir
diff --git a/third_party/aom/test/sad_test.cc b/third_party/aom/test/sad_test.cc
new file mode 100644
index 0000000000..521274863c
--- /dev/null
+++ b/third_party/aom/test/sad_test.cc
@@ -0,0 +1,3353 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <string.h>
+#include <limits.h>
+#include <stdio.h>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "aom/aom_codec.h"
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/mem.h"
+
+typedef unsigned int (*SadMxNFunc)(const uint8_t *src_ptr, int src_stride,
+                                   const uint8_t *ref_ptr, int ref_stride);
+typedef std::tuple<int, int, SadMxNFunc, int> SadMxNParam;
+
+typedef unsigned int (*SadSkipMxNFunc)(const uint8_t *src_ptr, int src_stride,
+                                       const uint8_t *ref_ptr, int ref_stride);
+typedef std::tuple<int, int, SadSkipMxNFunc, int> SadSkipMxNParam;
+
+typedef uint32_t (*SadMxNAvgFunc)(const uint8_t *src_ptr, int src_stride,
+                                  const uint8_t *ref_ptr, int ref_stride,
+                                  const uint8_t *second_pred);
+typedef std::tuple<int, int, SadMxNAvgFunc, int> SadMxNAvgParam;
+
+typedef unsigned int (*DistWtdSadMxhFunc)(const uint8_t *src_ptr,
+                                          int src_stride,
+                                          const uint8_t *ref_ptr,
+                                          int ref_stride, int width,
+                                          int height);
+typedef std::tuple<int, int, DistWtdSadMxhFunc, int> DistWtdSadMxhParam;
+
+typedef uint32_t (*DistWtdSadMxNAvgFunc)(const uint8_t *src_ptr, int src_stride,
+                                         const uint8_t *ref_ptr, int ref_stride,
+                                         const uint8_t *second_pred,
+                                         const DIST_WTD_COMP_PARAMS *jcp_param);
+typedef std::tuple<int, int, DistWtdSadMxNAvgFunc, int> DistWtdSadMxNAvgParam;
+
+typedef void (*SadMxNx4Func)(const uint8_t *src_ptr, int src_stride,
+                             const uint8_t *const ref_ptr[], int ref_stride,
+                             uint32_t *sad_array);
+typedef std::tuple<int, int, SadMxNx4Func, int> SadMxNx4Param;
+
+typedef void (*SadSkipMxNx4Func)(const uint8_t *src_ptr, int src_stride,
+                                 const uint8_t *const ref_ptr[], int ref_stride,
+                                 uint32_t *sad_array);
+typedef std::tuple<int, int, SadSkipMxNx4Func, int> SadSkipMxNx4Param;
+
+typedef void (*SadMxNx4AvgFunc)(const uint8_t *src_ptr, int src_stride,
+                                const uint8_t *const ref_ptr[], int ref_stride,
+                                const uint8_t *second_pred,
+                                uint32_t *sad_array);
+typedef std::tuple<int, int, SadMxNx4AvgFunc, int> SadMxNx4AvgParam;
+
+using libaom_test::ACMRandom;
+
+namespace {
+class SADTestBase : public ::testing::Test {
+ public:
+  SADTestBase(int width, int height, int bit_depth)
+      : width_(width), height_(height), bd_(bit_depth) {}
+
+  static void SetUpTestSuite() {
+    source_data8_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(kDataAlignment, kDataBlockSize));
+    ASSERT_NE(source_data8_, nullptr);
+    reference_data8_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(kDataAlignment, kDataBufferSize));
+    ASSERT_NE(reference_data8_, nullptr);
+    second_pred8_ =
+        reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
+    ASSERT_NE(second_pred8_, nullptr);
+    comp_pred8_ =
+        reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
+    ASSERT_NE(comp_pred8_, nullptr);
+    comp_pred8_test_ =
+        reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
+    ASSERT_NE(comp_pred8_test_, nullptr);
+    source_data16_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, kDataBlockSize * sizeof(uint16_t)));
+    ASSERT_NE(source_data16_, nullptr);
+    reference_data16_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, kDataBufferSize * sizeof(uint16_t)));
+    ASSERT_NE(reference_data16_, nullptr);
+    second_pred16_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
+    ASSERT_NE(second_pred16_, nullptr);
+    comp_pred16_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
+    ASSERT_NE(comp_pred16_, nullptr);
+    comp_pred16_test_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
+    ASSERT_NE(comp_pred16_test_, nullptr);
+  }
+
+  static void TearDownTestSuite() {
+    aom_free(source_data8_);
+    source_data8_ = nullptr;
+    aom_free(reference_data8_);
+    reference_data8_ = nullptr;
+    aom_free(second_pred8_);
+    second_pred8_ = nullptr;
+    aom_free(comp_pred8_);
+    comp_pred8_ = nullptr;
+    aom_free(comp_pred8_test_);
+    comp_pred8_test_ = nullptr;
+    aom_free(source_data16_);
+    source_data16_ = nullptr;
+    aom_free(reference_data16_);
+    reference_data16_ = nullptr;
+    aom_free(second_pred16_);
+    second_pred16_ = nullptr;
+    aom_free(comp_pred16_);
+    comp_pred16_ = nullptr;
+    aom_free(comp_pred16_test_);
+    comp_pred16_test_ = nullptr;
+  }
+
+ protected:
+  // Handle up to 4 128x128 blocks, with stride up to 256
+  static const int kDataAlignment = 16;
+  static const int kDataBlockSize = 128 * 256;
+  static const int kDataBufferSize = 4 * kDataBlockSize;
+
+  void SetUp() override {
+    if (bd_ == -1) {
+      use_high_bit_depth_ = false;
+      bit_depth_ = AOM_BITS_8;
+      source_data_ = source_data8_;
+      reference_data_ = reference_data8_;
+      second_pred_ = second_pred8_;
+      comp_pred_ = comp_pred8_;
+      comp_pred_test_ = comp_pred8_test_;
+    } else {
+      use_high_bit_depth_ = true;
+      bit_depth_ = static_cast<aom_bit_depth_t>(bd_);
+      source_data_ = CONVERT_TO_BYTEPTR(source_data16_);
+      reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_);
+      second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_);
+      comp_pred_ = CONVERT_TO_BYTEPTR(comp_pred16_);
+      comp_pred_test_ = CONVERT_TO_BYTEPTR(comp_pred16_test_);
+    }
+    mask_ = (1 << bit_depth_) - 1;
+    source_stride_ = (width_ + 31) & ~31;
+    reference_stride_ = width_ * 2;
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+  virtual uint8_t *GetReference(int block_idx) {
+    if (use_high_bit_depth_)
+      return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) +
+                                block_idx * kDataBlockSize);
+    return reference_data_ + block_idx * kDataBlockSize;
+  }
+
+  // Sum of Absolute Differences. Given two blocks, calculate the absolute
+  // difference between two pixels in the same relative location; accumulate.
+  unsigned int ReferenceSAD(int block_idx) {
+    unsigned int sad = 0;
+    const uint8_t *const reference8 = GetReference(block_idx);
+    const uint8_t *const source8 = source_data_;
+    const uint16_t *const reference16 =
+        CONVERT_TO_SHORTPTR(GetReference(block_idx));
+    const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          sad += abs(source8[h * source_stride_ + w] -
+                     reference8[h * reference_stride_ + w]);
+        } else {
+          sad += abs(source16[h * source_stride_ + w] -
+                     reference16[h * reference_stride_ + w]);
+        }
+      }
+    }
+    return sad;
+  }
+
+  // Sum of Absolute Differences Skip rows. Given two blocks,
+  // calculate the absolute  difference between two pixels in the same
+  // relative location every other row; accumulate and double the result at the
+  // end.
+  unsigned int ReferenceSADSkip(int block_idx) {
+    unsigned int sad = 0;
+    const uint8_t *const reference8 = GetReference(block_idx);
+    const uint8_t *const source8 = source_data_;
+    const uint16_t *const reference16 =
+        CONVERT_TO_SHORTPTR(GetReference(block_idx));
+    const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
+    for (int h = 0; h < height_; h += 2) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          sad += abs(source8[h * source_stride_ + w] -
+                     reference8[h * reference_stride_ + w]);
+        } else {
+          sad += abs(source16[h * source_stride_ + w] -
+                     reference16[h * reference_stride_ + w]);
+        }
+      }
+    }
+    return sad * 2;
+  }
+
+  // Sum of Absolute Differences Average. Given two blocks, and a prediction
+  // calculate the absolute difference between one pixel and average of the
+  // corresponding and predicted pixels; accumulate.
+  unsigned int ReferenceSADavg(int block_idx) {
+    unsigned int sad = 0;
+    const uint8_t *const reference8 = GetReference(block_idx);
+    const uint8_t *const source8 = source_data_;
+    const uint8_t *const second_pred8 = second_pred_;
+    const uint16_t *const reference16 =
+        CONVERT_TO_SHORTPTR(GetReference(block_idx));
+    const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
+    const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          const int tmp = second_pred8[h * width_ + w] +
+                          reference8[h * reference_stride_ + w];
+          const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
+          sad += abs(source8[h * source_stride_ + w] - comp_pred);
+        } else {
+          const int tmp = second_pred16[h * width_ + w] +
+                          reference16[h * reference_stride_ + w];
+          const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
+          sad += abs(source16[h * source_stride_ + w] - comp_pred);
+        }
+      }
+    }
+    return sad;
+  }
+
+  unsigned int ReferenceDistWtdSADavg(int block_idx) {
+    unsigned int sad = 0;
+    const uint8_t *const reference8 = GetReference(block_idx);
+    const uint8_t *const source8 = source_data_;
+    const uint8_t *const second_pred8 = second_pred_;
+    const uint16_t *const reference16 =
+        CONVERT_TO_SHORTPTR(GetReference(block_idx));
+    const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
+    const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          const int tmp =
+              second_pred8[h * width_ + w] * jcp_param_.bck_offset +
+              reference8[h * reference_stride_ + w] * jcp_param_.fwd_offset;
+          const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 4);
+          sad += abs(source8[h * source_stride_ + w] - comp_pred);
+        } else {
+          const int tmp =
+              second_pred16[h * width_ + w] * jcp_param_.bck_offset +
+              reference16[h * reference_stride_ + w] * jcp_param_.fwd_offset;
+          const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 4);
+          sad += abs(source16[h * source_stride_ + w] - comp_pred);
+        }
+      }
+    }
+    return sad;
+  }
+
+  void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) {
+    uint8_t *data8 = data;
+    uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          data8[h * stride + w] = static_cast<uint8_t>(fill_constant);
+        } else {
+          data16[h * stride + w] = fill_constant;
+        }
+      }
+    }
+  }
+
+  void FillRandom(uint8_t *data, int stride) {
+    uint8_t *data8 = data;
+    uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          data8[h * stride + w] = rnd_.Rand8();
+        } else {
+          data16[h * stride + w] = rnd_.Rand16() & mask_;
+        }
+      }
+    }
+  }
+
+  virtual void SADForSpeedTest(unsigned int *results,
+                               const uint8_t *const *references) {
+    (void)results;
+    (void)references;
+  }
+
+  void SpeedSAD() {
+    int test_count = 20000000;
+    unsigned int exp_sad[4];
+    const uint8_t *references[] = { GetReference(0), GetReference(1),
+                                    GetReference(2), GetReference(3) };
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    while (test_count > 0) {
+      SADForSpeedTest(exp_sad, references);
+      test_count -= 1;
+    }
+    aom_usec_timer_mark(&timer);
+    const int64_t time = aom_usec_timer_elapsed(&timer) / 1000;
+    std::cout << "BLOCK_" << width_ << "X" << height_
+              << ", bit_depth:" << bit_depth_ << ",Time: " << time << "ms"
+              << std::endl;
+  }
+
+  int width_, height_, mask_, bd_;
+  aom_bit_depth_t bit_depth_;
+  static uint8_t *source_data_;
+  static uint8_t *reference_data_;
+  static uint8_t *second_pred_;
+  int source_stride_;
+  bool use_high_bit_depth_;
+  static uint8_t *source_data8_;
+  static uint8_t *reference_data8_;
+  static uint8_t *second_pred8_;
+  static uint16_t *source_data16_;
+  static uint16_t *reference_data16_;
+  static uint16_t *second_pred16_;
+  int reference_stride_;
+  static uint8_t *comp_pred_;
+  static uint8_t *comp_pred8_;
+  static uint16_t *comp_pred16_;
+  static uint8_t *comp_pred_test_;
+  static uint8_t *comp_pred8_test_;
+  static uint16_t *comp_pred16_test_;
+  DIST_WTD_COMP_PARAMS jcp_param_;
+
+  ACMRandom rnd_;
+};
+
+class SADx4Test : public ::testing::WithParamInterface<SadMxNx4Param>,
+                  public SADTestBase {
+ public:
+  SADx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+  void SADs(unsigned int *results) {
+    const uint8_t *references[] = { GetReference(0), GetReference(1),
+                                    GetReference(2), GetReference(3) };
+
+    API_REGISTER_STATE_CHECK(GET_PARAM(2)(
+        source_data_, source_stride_, references, reference_stride_, results));
+  }
+
+  void SADForSpeedTest(unsigned int *results,
+                       const uint8_t *const *references) override {
+    GET_PARAM(2)
+    (source_data_, source_stride_, references, reference_stride_, results);
+  }
+
+  void CheckSADs() {
+    unsigned int reference_sad, exp_sad[4];
+    SADs(exp_sad);
+    for (int block = 0; block < 4; ++block) {
+      reference_sad = ReferenceSAD(block);
+
+      EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block;
+    }
+  }
+};
+
+class SADx3Test : public ::testing::WithParamInterface<SadMxNx4Param>,
+                  public SADTestBase {
+ public:
+  SADx3Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+  void SADs(unsigned int *results) {
+    const uint8_t *references[] = { GetReference(0), GetReference(1),
+                                    GetReference(2), GetReference(3) };
+
+    GET_PARAM(2)
+    (source_data_, source_stride_, references, reference_stride_, results);
+  }
+
+  void SADForSpeedTest(unsigned int *results,
+                       const uint8_t *const *references) override {
+    GET_PARAM(2)
+    (source_data_, source_stride_, references, reference_stride_, results);
+  }
+
+  void CheckSADs() {
+    unsigned int reference_sad, exp_sad[4];
+
+    SADs(exp_sad);
+    for (int block = 0; block < 3; ++block) {
+      reference_sad = ReferenceSAD(block);
+
+      EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block;
+    }
+  }
+};
+
+class SADSkipx4Test : public ::testing::WithParamInterface<SadMxNx4Param>,
+                      public SADTestBase {
+ public:
+  SADSkipx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+  void SADs(unsigned int *results) {
+    const uint8_t *references[] = { GetReference(0), GetReference(1),
+                                    GetReference(2), GetReference(3) };
+
+    API_REGISTER_STATE_CHECK(GET_PARAM(2)(
+        source_data_, source_stride_, references, reference_stride_, results));
+  }
+
+  void CheckSADs() {
+    unsigned int reference_sad, exp_sad[4];
+
+    SADs(exp_sad);
+    for (int block = 0; block < 4; ++block) {
+      reference_sad = ReferenceSADSkip(block);
+
+      EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block;
+    }
+  }
+
+  void SADForSpeedTest(unsigned int *results,
+                       const uint8_t *const *references) override {
+    GET_PARAM(2)
+    (source_data_, source_stride_, references, reference_stride_, results);
+  }
+};
+
+class SADTest : public ::testing::WithParamInterface<SadMxNParam>,
+                public SADTestBase {
+ public:
+  SADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+  unsigned int SAD(int block_idx) {
+    unsigned int ret;
+    const uint8_t *const reference = GetReference(block_idx);
+
+    API_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+                                                reference, reference_stride_));
+    return ret;
+  }
+
+  void CheckSAD() {
+    const unsigned int reference_sad = ReferenceSAD(0);
+    const unsigned int exp_sad = SAD(0);
+
+    ASSERT_EQ(reference_sad, exp_sad);
+  }
+
+  void SADForSpeedTest(unsigned int *results,
+                       const uint8_t *const *references) override {
+    GET_PARAM(2)
+    (source_data_, source_stride_, references[0], reference_stride_);
+    (void)results;
+  }
+};
+
+class SADSkipTest : public ::testing::WithParamInterface<SadMxNParam>,
+                    public SADTestBase {
+ public:
+  SADSkipTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+  unsigned int SAD(int block_idx) {
+    unsigned int ret;
+    const uint8_t *const reference = GetReference(block_idx);
+
+    API_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+                                                reference, reference_stride_));
+    return ret;
+  }
+
+  void CheckSAD() {
+    const unsigned int reference_sad = ReferenceSADSkip(0);
+    const unsigned int exp_sad = SAD(0);
+
+    ASSERT_EQ(reference_sad, exp_sad);
+  }
+
+  void SADForSpeedTest(unsigned int *results,
+                       const uint8_t *const *references) override {
+    GET_PARAM(2)
+    (source_data_, source_stride_, references[0], reference_stride_);
+    (void)results;
+  }
+};
+
+class SADavgTest : public ::testing::WithParamInterface<SadMxNAvgParam>,
+                   public SADTestBase {
+ public:
+  SADavgTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+  unsigned int SAD_avg(int block_idx) {
+    unsigned int ret;
+    const uint8_t *const reference = GetReference(block_idx);
+
+    API_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+                                                reference, reference_stride_,
+                                                second_pred_));
+    return ret;
+  }
+
+  void CheckSAD() {
+    const unsigned int reference_sad = ReferenceSADavg(0);
+    const unsigned int exp_sad = SAD_avg(0);
+
+    ASSERT_EQ(reference_sad, exp_sad);
+  }
+};
+
+class DistWtdSADavgTest
+    : public ::testing::WithParamInterface<DistWtdSadMxNAvgParam>,
+      public SADTestBase {
+ public:
+  DistWtdSADavgTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+  unsigned int dist_wtd_SAD_avg(int block_idx) {
+    unsigned int ret;
+    const uint8_t *const reference = GetReference(block_idx);
+
+    API_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+                                                reference, reference_stride_,
+                                                second_pred_, &jcp_param_));
+    return ret;
+  }
+
+  void CheckSAD() {
+    for (int j = 0; j < 2; ++j) {
+      for (int i = 0; i < 4; ++i) {
+        jcp_param_.fwd_offset = quant_dist_lookup_table[i][j];
+        jcp_param_.bck_offset = quant_dist_lookup_table[i][1 - j];
+
+        const unsigned int reference_sad = ReferenceDistWtdSADavg(0);
+        const unsigned int exp_sad = dist_wtd_SAD_avg(0);
+
+        ASSERT_EQ(reference_sad, exp_sad);
+      }
+    }
+  }
+};
+
+uint8_t *SADTestBase::source_data_ = nullptr;
+uint8_t *SADTestBase::reference_data_ = nullptr;
+uint8_t *SADTestBase::second_pred_ = nullptr;
+uint8_t *SADTestBase::comp_pred_ = nullptr;
+uint8_t *SADTestBase::comp_pred_test_ = nullptr;
+uint8_t *SADTestBase::source_data8_ = nullptr;
+uint8_t *SADTestBase::reference_data8_ = nullptr;
+uint8_t *SADTestBase::second_pred8_ = nullptr;
+uint8_t *SADTestBase::comp_pred8_ = nullptr;
+uint8_t *SADTestBase::comp_pred8_test_ = nullptr;
+uint16_t *SADTestBase::source_data16_ = nullptr;
+uint16_t *SADTestBase::reference_data16_ = nullptr;
+uint16_t *SADTestBase::second_pred16_ = nullptr;
+uint16_t *SADTestBase::comp_pred16_ = nullptr;
+uint16_t *SADTestBase::comp_pred16_test_ = nullptr;
+
+TEST_P(SADTest, MaxRef) {
+  FillConstant(source_data_, source_stride_, 0);
+  FillConstant(reference_data_, reference_stride_, mask_);
+  CheckSAD();
+}
+
+TEST_P(SADTest, MaxSrc) {
+  FillConstant(source_data_, source_stride_, mask_);
+  FillConstant(reference_data_, reference_stride_, 0);
+  CheckSAD();
+}
+
+TEST_P(SADTest, ShortRef) {
+  const int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADTest, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  const int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADTest, ShortSrc) {
+  const int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  int test_count = 2000;
+  while (test_count > 0) {
+    FillRandom(source_data_, source_stride_);
+    FillRandom(reference_data_, reference_stride_);
+    CheckSAD();
+    if (testing::Test::HasFatalFailure()) break;
+    test_count -= 1;
+  }
+  source_stride_ = tmp_stride;
+}
+
+TEST_P(SADTest, DISABLED_Speed) {
+  const int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  SpeedSAD();
+  source_stride_ = tmp_stride;
+}
+
+TEST_P(SADSkipTest, MaxRef) {
+  FillConstant(source_data_, source_stride_, 0);
+  FillConstant(reference_data_, reference_stride_, mask_);
+  CheckSAD();
+}
+
+TEST_P(SADSkipTest, MaxSrc) {
+  FillConstant(source_data_, source_stride_, mask_);
+  FillConstant(reference_data_, reference_stride_, 0);
+  CheckSAD();
+}
+
+TEST_P(SADSkipTest, ShortRef) {
+  const int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADSkipTest, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  const int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADSkipTest, ShortSrc) {
+  const int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  int test_count = 2000;
+  while (test_count > 0) {
+    FillRandom(source_data_, source_stride_);
+    FillRandom(reference_data_, reference_stride_);
+    CheckSAD();
+    if (testing::Test::HasFatalFailure()) break;
+    test_count -= 1;
+  }
+  source_stride_ = tmp_stride;
+}
+
+TEST_P(SADSkipTest, DISABLED_Speed) {
+  const int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  SpeedSAD();
+  source_stride_ = tmp_stride;
+}
+
+TEST_P(SADavgTest, MaxRef) {
+  FillConstant(source_data_, source_stride_, 0);
+  FillConstant(reference_data_, reference_stride_, mask_);
+  FillConstant(second_pred_, width_, 0);
+  CheckSAD();
+}
+TEST_P(SADavgTest, MaxSrc) {
+  FillConstant(source_data_, source_stride_, mask_);
+  FillConstant(reference_data_, reference_stride_, 0);
+  FillConstant(second_pred_, width_, 0);
+  CheckSAD();
+}
+
+TEST_P(SADavgTest, ShortRef) {
+  const int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  FillRandom(second_pred_, width_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADavgTest, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  const int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  FillRandom(second_pred_, width_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADavgTest, ShortSrc) {
+  const int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  int test_count = 2000;
+  while (test_count > 0) {
+    FillRandom(source_data_, source_stride_);
+    FillRandom(reference_data_, reference_stride_);
+    FillRandom(second_pred_, width_);
+    CheckSAD();
+    if (testing::Test::HasFatalFailure()) break;
+    test_count -= 1;
+  }
+  source_stride_ = tmp_stride;
+}
+
+TEST_P(DistWtdSADavgTest, MaxRef) {
+  FillConstant(source_data_, source_stride_, 0);
+  FillConstant(reference_data_, reference_stride_, mask_);
+  FillConstant(second_pred_, width_, 0);
+  CheckSAD();
+}
+TEST_P(DistWtdSADavgTest, MaxSrc) {
+  FillConstant(source_data_, source_stride_, mask_);
+  FillConstant(reference_data_, reference_stride_, 0);
+  FillConstant(second_pred_, width_, 0);
+  CheckSAD();
+}
+
+TEST_P(DistWtdSADavgTest, ShortRef) {
+  const int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  FillRandom(second_pred_, width_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(DistWtdSADavgTest, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  const int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  FillRandom(second_pred_, width_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(DistWtdSADavgTest, ShortSrc) {
+  const int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  int test_count = 2000;
+  while (test_count > 0) {
+    FillRandom(source_data_, source_stride_);
+    FillRandom(reference_data_, reference_stride_);
+    FillRandom(second_pred_, width_);
+    CheckSAD();
+    if (testing::Test::HasFatalFailure()) break;
+    test_count -= 1;
+  }
+  source_stride_ = tmp_stride;
+}
+
+// SADx4
+TEST_P(SADx4Test, MaxRef) {
+  FillConstant(source_data_, source_stride_, 0);
+  FillConstant(GetReference(0), reference_stride_, mask_);
+  FillConstant(GetReference(1), reference_stride_, mask_);
+  FillConstant(GetReference(2), reference_stride_, mask_);
+  FillConstant(GetReference(3), reference_stride_, mask_);
+  CheckSADs();
+}
+
+TEST_P(SADx4Test, MaxSrc) {
+  FillConstant(source_data_, source_stride_, mask_);
+  FillConstant(GetReference(0), reference_stride_, 0);
+  FillConstant(GetReference(1), reference_stride_, 0);
+  FillConstant(GetReference(2), reference_stride_, 0);
+  FillConstant(GetReference(3), reference_stride_, 0);
+  CheckSADs();
+}
+
+TEST_P(SADx4Test, ShortRef) {
+  int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  CheckSADs();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADx4Test, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  CheckSADs();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADx4Test, ShortSrc) {
+  int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  int test_count = 1000;
+  while (test_count > 0) {
+    FillRandom(source_data_, source_stride_);
+    FillRandom(GetReference(0), reference_stride_);
+    FillRandom(GetReference(1), reference_stride_);
+    FillRandom(GetReference(2), reference_stride_);
+    FillRandom(GetReference(3), reference_stride_);
+    CheckSADs();
+    test_count -= 1;
+  }
+  source_stride_ = tmp_stride;
+}
+
+TEST_P(SADx4Test, SrcAlignedByWidth) {
+  uint8_t *tmp_source_data = source_data_;
+  source_data_ += width_;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  CheckSADs();
+  source_data_ = tmp_source_data;
+}
+
+TEST_P(SADx4Test, DISABLED_Speed) {
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  SpeedSAD();
+}
+
+// SADx3
+TEST_P(SADx3Test, MaxRef) {
+  FillConstant(source_data_, source_stride_, 0);
+  FillConstant(GetReference(0), reference_stride_, mask_);
+  FillConstant(GetReference(1), reference_stride_, mask_);
+  FillConstant(GetReference(2), reference_stride_, mask_);
+  FillConstant(GetReference(3), reference_stride_, mask_);
+  CheckSADs();
+}
+
+TEST_P(SADx3Test, MaxSrc) {
+  FillConstant(source_data_, source_stride_, mask_);
+  FillConstant(GetReference(0), reference_stride_, 0);
+  FillConstant(GetReference(1), reference_stride_, 0);
+  FillConstant(GetReference(2), reference_stride_, 0);
+  FillConstant(GetReference(3), reference_stride_, 0);
+  CheckSADs();
+}
+
+TEST_P(SADx3Test, ShortRef) {
+  int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  CheckSADs();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADx3Test, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  CheckSADs();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADx3Test, ShortSrc) {
+  int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  int test_count = 1000;
+  while (test_count > 0) {
+    FillRandom(source_data_, source_stride_);
+    FillRandom(GetReference(0), reference_stride_);
+    FillRandom(GetReference(1), reference_stride_);
+    FillRandom(GetReference(2), reference_stride_);
+    FillRandom(GetReference(3), reference_stride_);
+    CheckSADs();
+    test_count -= 1;
+  }
+  source_stride_ = tmp_stride;
+}
+
+TEST_P(SADx3Test, SrcAlignedByWidth) {
+  uint8_t *tmp_source_data = source_data_;
+  source_data_ += width_;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  CheckSADs();
+  source_data_ = tmp_source_data;
+}
+
+TEST_P(SADx3Test, DISABLED_Speed) {
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  SpeedSAD();
+}
+
+// SADSkipx4
+TEST_P(SADSkipx4Test, MaxRef) {
+  FillConstant(source_data_, source_stride_, 0);
+  FillConstant(GetReference(0), reference_stride_, mask_);
+  FillConstant(GetReference(1), reference_stride_, mask_);
+  FillConstant(GetReference(2), reference_stride_, mask_);
+  FillConstant(GetReference(3), reference_stride_, mask_);
+  CheckSADs();
+}
+
+TEST_P(SADSkipx4Test, MaxSrc) {
+  FillConstant(source_data_, source_stride_, mask_);
+  FillConstant(GetReference(0), reference_stride_, 0);
+  FillConstant(GetReference(1), reference_stride_, 0);
+  FillConstant(GetReference(2), reference_stride_, 0);
+  FillConstant(GetReference(3), reference_stride_, 0);
+  CheckSADs();
+}
+
+TEST_P(SADSkipx4Test, ShortRef) {
+  int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  CheckSADs();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADSkipx4Test, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  CheckSADs();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADSkipx4Test, ShortSrc) {
+  int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  int test_count = 1000;
+  while (test_count > 0) {
+    FillRandom(source_data_, source_stride_);
+    FillRandom(GetReference(0), reference_stride_);
+    FillRandom(GetReference(1), reference_stride_);
+    FillRandom(GetReference(2), reference_stride_);
+    FillRandom(GetReference(3), reference_stride_);
+    CheckSADs();
+    test_count -= 1;
+  }
+  source_stride_ = tmp_stride;
+}
+
+TEST_P(SADSkipx4Test, SrcAlignedByWidth) {
+  uint8_t *tmp_source_data = source_data_;
+  source_data_ += width_;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  CheckSADs();
+  source_data_ = tmp_source_data;
+}
+
+TEST_P(SADSkipx4Test, DISABLED_Speed) {
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  SpeedSAD();
+}
+
+using std::make_tuple;
+
+//------------------------------------------------------------------------------
+// C functions
+const SadMxNParam c_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128_c, -1),
+  make_tuple(128, 64, &aom_sad128x64_c, -1),
+  make_tuple(64, 128, &aom_sad64x128_c, -1),
+  make_tuple(64, 64, &aom_sad64x64_c, -1),
+  make_tuple(64, 32, &aom_sad64x32_c, -1),
+  make_tuple(32, 64, &aom_sad32x64_c, -1),
+  make_tuple(32, 32, &aom_sad32x32_c, -1),
+  make_tuple(32, 16, &aom_sad32x16_c, -1),
+  make_tuple(16, 32, &aom_sad16x32_c, -1),
+  make_tuple(16, 16, &aom_sad16x16_c, -1),
+  make_tuple(16, 8, &aom_sad16x8_c, -1),
+  make_tuple(8, 16, &aom_sad8x16_c, -1),
+  make_tuple(8, 8, &aom_sad8x8_c, -1),
+  make_tuple(8, 4, &aom_sad8x4_c, -1),
+  make_tuple(4, 8, &aom_sad4x8_c, -1),
+  make_tuple(4, 4, &aom_sad4x4_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad128x128_c, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64_c, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128_c, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64_c, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_c, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_c, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_c, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_c, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_c, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_c, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_c, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16_c, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8_c, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4_c, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8_c, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4_c, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128_c, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64_c, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128_c, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64_c, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_c, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_c, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_c, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_c, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_c, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_c, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_c, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16_c, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8_c, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4_c, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8_c, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4_c, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128_c, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64_c, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128_c, 12),
+  make_tuple(64, 64, &aom_highbd_sad64x64_c, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_c, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_c, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_c, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_c, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_c, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_c, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_c, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16_c, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8_c, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4_c, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8_c, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4_c, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16_c, -1),
+  make_tuple(16, 64, &aom_sad16x64_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 16, &aom_highbd_sad64x16_c, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64_c, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16_c, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64_c, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16_c, 12),
+  make_tuple(16, 64, &aom_highbd_sad16x64_c, 12),
+#endif
+  make_tuple(32, 8, &aom_sad32x8_c, -1),
+  make_tuple(8, 32, &aom_sad8x32_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(32, 8, &aom_highbd_sad32x8_c, 8),
+  make_tuple(8, 32, &aom_highbd_sad8x32_c, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8_c, 10),
+  make_tuple(8, 32, &aom_highbd_sad8x32_c, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8_c, 12),
+  make_tuple(8, 32, &aom_highbd_sad8x32_c, 12),
+#endif
+  make_tuple(16, 4, &aom_sad16x4_c, -1),
+  make_tuple(4, 16, &aom_sad4x16_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(16, 4, &aom_highbd_sad16x4_c, 8),
+  make_tuple(4, 16, &aom_highbd_sad4x16_c, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4_c, 10),
+  make_tuple(4, 16, &aom_highbd_sad4x16_c, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4_c, 12),
+  make_tuple(4, 16, &aom_highbd_sad4x16_c, 12),
+#endif
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(C, SADTest, ::testing::ValuesIn(c_tests));
+
+const SadSkipMxNParam skip_c_tests[] = {
+  make_tuple(128, 128, &aom_sad_skip_128x128_c, -1),
+  make_tuple(128, 64, &aom_sad_skip_128x64_c, -1),
+  make_tuple(64, 128, &aom_sad_skip_64x128_c, -1),
+  make_tuple(64, 64, &aom_sad_skip_64x64_c, -1),
+  make_tuple(64, 32, &aom_sad_skip_64x32_c, -1),
+  make_tuple(32, 64, &aom_sad_skip_32x64_c, -1),
+  make_tuple(32, 32, &aom_sad_skip_32x32_c, -1),
+  make_tuple(32, 16, &aom_sad_skip_32x16_c, -1),
+  make_tuple(16, 32, &aom_sad_skip_16x32_c, -1),
+  make_tuple(16, 16, &aom_sad_skip_16x16_c, -1),
+  make_tuple(16, 8, &aom_sad_skip_16x8_c, -1),
+  make_tuple(8, 16, &aom_sad_skip_8x16_c, -1),
+  make_tuple(8, 8, &aom_sad_skip_8x8_c, -1),
+  make_tuple(8, 4, &aom_sad_skip_8x4_c, -1),
+  make_tuple(4, 8, &aom_sad_skip_4x8_c, -1),
+  make_tuple(4, 4, &aom_sad_skip_4x4_c, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad_skip_64x16_c, -1),
+  make_tuple(16, 64, &aom_sad_skip_16x64_c, -1),
+  make_tuple(32, 8, &aom_sad_skip_32x8_c, -1),
+  make_tuple(8, 32, &aom_sad_skip_8x32_c, -1),
+  make_tuple(16, 4, &aom_sad_skip_16x4_c, -1),
+  make_tuple(4, 16, &aom_sad_skip_4x16_c, -1),
+#endif
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128_c, 8),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64_c, 8),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128_c, 8),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64_c, 8),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32_c, 8),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64_c, 8),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32_c, 8),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16_c, 8),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32_c, 8),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16_c, 8),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8_c, 8),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16_c, 8),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8_c, 8),
+  make_tuple(8, 4, &aom_highbd_sad_skip_8x4_c, 8),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8_c, 8),
+  make_tuple(4, 4, &aom_highbd_sad_skip_4x4_c, 8),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16_c, 8),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64_c, 8),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8_c, 8),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32_c, 8),
+  make_tuple(16, 4, &aom_highbd_sad_skip_16x4_c, 8),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16_c, 8),
+#endif
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128_c, 10),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64_c, 10),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128_c, 10),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64_c, 10),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32_c, 10),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64_c, 10),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32_c, 10),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16_c, 10),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32_c, 10),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16_c, 10),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8_c, 10),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16_c, 10),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8_c, 10),
+  make_tuple(8, 4, &aom_highbd_sad_skip_8x4_c, 10),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8_c, 10),
+  make_tuple(4, 4, &aom_highbd_sad_skip_4x4_c, 10),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16_c, 10),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64_c, 10),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8_c, 10),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32_c, 10),
+  make_tuple(16, 4, &aom_highbd_sad_skip_16x4_c, 10),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16_c, 10),
+#endif
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128_c, 12),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64_c, 12),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128_c, 12),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64_c, 12),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32_c, 12),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64_c, 12),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32_c, 12),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16_c, 12),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32_c, 12),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16_c, 12),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8_c, 12),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16_c, 12),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8_c, 12),
+  make_tuple(8, 4, &aom_highbd_sad_skip_8x4_c, 12),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8_c, 12),
+  make_tuple(4, 4, &aom_highbd_sad_skip_4x4_c, 12),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16_c, 12),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64_c, 12),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8_c, 12),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32_c, 12),
+  make_tuple(16, 4, &aom_highbd_sad_skip_16x4_c, 12),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16_c, 12),
+#endif  // !CONFIG_REALTIME_ONLY
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_SUITE_P(C, SADSkipTest, ::testing::ValuesIn(skip_c_tests));
+
+const SadMxNAvgParam avg_c_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128_avg_c, -1),
+  make_tuple(128, 64, &aom_sad128x64_avg_c, -1),
+  make_tuple(64, 128, &aom_sad64x128_avg_c, -1),
+  make_tuple(64, 64, &aom_sad64x64_avg_c, -1),
+  make_tuple(64, 32, &aom_sad64x32_avg_c, -1),
+  make_tuple(32, 64, &aom_sad32x64_avg_c, -1),
+  make_tuple(32, 32, &aom_sad32x32_avg_c, -1),
+  make_tuple(32, 16, &aom_sad32x16_avg_c, -1),
+  make_tuple(16, 32, &aom_sad16x32_avg_c, -1),
+  make_tuple(16, 16, &aom_sad16x16_avg_c, -1),
+  make_tuple(16, 8, &aom_sad16x8_avg_c, -1),
+  make_tuple(8, 16, &aom_sad8x16_avg_c, -1),
+  make_tuple(8, 8, &aom_sad8x8_avg_c, -1),
+  make_tuple(8, 4, &aom_sad8x4_avg_c, -1),
+  make_tuple(4, 8, &aom_sad4x8_avg_c, -1),
+  make_tuple(4, 4, &aom_sad4x4_avg_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 12),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16_avg_c, -1),
+  make_tuple(16, 64, &aom_sad16x64_avg_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 16, &aom_highbd_sad64x16_avg_c, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avg_c, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16_avg_c, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avg_c, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16_avg_c, 12),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avg_c, 12),
+#endif
+  make_tuple(32, 8, &aom_sad32x8_avg_c, -1),
+  make_tuple(8, 32, &aom_sad8x32_avg_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(32, 8, &aom_highbd_sad32x8_avg_c, 8),
+  make_tuple(8, 32, &aom_highbd_sad8x32_avg_c, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8_avg_c, 10),
+  make_tuple(8, 32, &aom_highbd_sad8x32_avg_c, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8_avg_c, 12),
+  make_tuple(8, 32, &aom_highbd_sad8x32_avg_c, 12),
+#endif
+  make_tuple(16, 4, &aom_sad16x4_avg_c, -1),
+  make_tuple(4, 16, &aom_sad4x16_avg_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(16, 4, &aom_highbd_sad16x4_avg_c, 8),
+  make_tuple(4, 16, &aom_highbd_sad4x16_avg_c, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4_avg_c, 10),
+  make_tuple(4, 16, &aom_highbd_sad4x16_avg_c, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4_avg_c, 12),
+  make_tuple(4, 16, &aom_highbd_sad4x16_avg_c, 12),
+#endif
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests));
+
+const DistWtdSadMxNAvgParam dist_wtd_avg_c_tests[] = {
+  make_tuple(128, 128, &aom_dist_wtd_sad128x128_avg_c, -1),
+  make_tuple(128, 64, &aom_dist_wtd_sad128x64_avg_c, -1),
+  make_tuple(64, 128, &aom_dist_wtd_sad64x128_avg_c, -1),
+  make_tuple(64, 64, &aom_dist_wtd_sad64x64_avg_c, -1),
+  make_tuple(64, 32, &aom_dist_wtd_sad64x32_avg_c, -1),
+  make_tuple(32, 64, &aom_dist_wtd_sad32x64_avg_c, -1),
+  make_tuple(32, 32, &aom_dist_wtd_sad32x32_avg_c, -1),
+  make_tuple(32, 16, &aom_dist_wtd_sad32x16_avg_c, -1),
+  make_tuple(16, 32, &aom_dist_wtd_sad16x32_avg_c, -1),
+  make_tuple(16, 16, &aom_dist_wtd_sad16x16_avg_c, -1),
+  make_tuple(16, 8, &aom_dist_wtd_sad16x8_avg_c, -1),
+  make_tuple(8, 16, &aom_dist_wtd_sad8x16_avg_c, -1),
+  make_tuple(8, 8, &aom_dist_wtd_sad8x8_avg_c, -1),
+  make_tuple(8, 4, &aom_dist_wtd_sad8x4_avg_c, -1),
+  make_tuple(4, 8, &aom_dist_wtd_sad4x8_avg_c, -1),
+  make_tuple(4, 4, &aom_dist_wtd_sad4x4_avg_c, -1),
+
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_dist_wtd_sad64x16_avg_c, -1),
+  make_tuple(16, 64, &aom_dist_wtd_sad16x64_avg_c, -1),
+  make_tuple(32, 8, &aom_dist_wtd_sad32x8_avg_c, -1),
+  make_tuple(8, 32, &aom_dist_wtd_sad8x32_avg_c, -1),
+  make_tuple(16, 4, &aom_dist_wtd_sad16x4_avg_c, -1),
+  make_tuple(4, 16, &aom_dist_wtd_sad4x16_avg_c, -1),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(C, DistWtdSADavgTest,
+                         ::testing::ValuesIn(dist_wtd_avg_c_tests));
+
+const SadMxNx4Param x4d_c_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128x4d_c, -1),
+  make_tuple(128, 64, &aom_sad128x64x4d_c, -1),
+  make_tuple(64, 128, &aom_sad64x128x4d_c, -1),
+  make_tuple(64, 64, &aom_sad64x64x4d_c, -1),
+  make_tuple(64, 32, &aom_sad64x32x4d_c, -1),
+  make_tuple(32, 64, &aom_sad32x64x4d_c, -1),
+  make_tuple(32, 32, &aom_sad32x32x4d_c, -1),
+  make_tuple(32, 16, &aom_sad32x16x4d_c, -1),
+  make_tuple(16, 32, &aom_sad16x32x4d_c, -1),
+  make_tuple(16, 16, &aom_sad16x16x4d_c, -1),
+  make_tuple(16, 8, &aom_sad16x8x4d_c, -1),
+  make_tuple(8, 16, &aom_sad8x16x4d_c, -1),
+  make_tuple(8, 8, &aom_sad8x8x4d_c, -1),
+  make_tuple(8, 4, &aom_sad8x4x4d_c, -1),
+  make_tuple(4, 8, &aom_sad4x8x4d_c, -1),
+  make_tuple(4, 4, &aom_sad4x4x4d_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 12),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 12),
+#endif
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16x4d_c, -1),
+  make_tuple(16, 64, &aom_sad16x64x4d_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 16, &aom_highbd_sad64x16x4d_c, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64x4d_c, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16x4d_c, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64x4d_c, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16x4d_c, 12),
+  make_tuple(16, 64, &aom_highbd_sad16x64x4d_c, 12),
+#endif
+  make_tuple(32, 8, &aom_sad32x8x4d_c, -1),
+  make_tuple(8, 32, &aom_sad8x32x4d_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(32, 8, &aom_highbd_sad32x8x4d_c, 8),
+  make_tuple(8, 32, &aom_highbd_sad8x32x4d_c, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8x4d_c, 10),
+  make_tuple(8, 32, &aom_highbd_sad8x32x4d_c, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8x4d_c, 12),
+  make_tuple(8, 32, &aom_highbd_sad8x32x4d_c, 12),
+#endif
+  make_tuple(16, 4, &aom_sad16x4x4d_c, -1),
+  make_tuple(4, 16, &aom_sad4x16x4d_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(16, 4, &aom_highbd_sad16x4x4d_c, 8),
+  make_tuple(4, 16, &aom_highbd_sad4x16x4d_c, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4x4d_c, 10),
+  make_tuple(4, 16, &aom_highbd_sad4x16x4d_c, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4x4d_c, 12),
+  make_tuple(4, 16, &aom_highbd_sad4x16x4d_c, 12),
+#endif
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
+
+const SadMxNx4Param x3d_c_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128x3d_c, -1),
+  make_tuple(128, 64, &aom_sad128x64x3d_c, -1),
+  make_tuple(64, 128, &aom_sad64x128x3d_c, -1),
+  make_tuple(64, 64, &aom_sad64x64x3d_c, -1),
+  make_tuple(64, 32, &aom_sad64x32x3d_c, -1),
+  make_tuple(32, 64, &aom_sad32x64x3d_c, -1),
+  make_tuple(32, 32, &aom_sad32x32x3d_c, -1),
+  make_tuple(32, 16, &aom_sad32x16x3d_c, -1),
+  make_tuple(16, 32, &aom_sad16x32x3d_c, -1),
+  make_tuple(16, 16, &aom_sad16x16x3d_c, -1),
+  make_tuple(16, 8, &aom_sad16x8x3d_c, -1),
+  make_tuple(8, 16, &aom_sad8x16x3d_c, -1),
+  make_tuple(8, 8, &aom_sad8x8x3d_c, -1),
+  make_tuple(8, 4, &aom_sad8x4x3d_c, -1),
+  make_tuple(4, 8, &aom_sad4x8x3d_c, -1),
+  make_tuple(4, 4, &aom_sad4x4x3d_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad128x128x3d_c, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64x3d_c, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128x3d_c, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64x3d_c, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32x3d_c, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64x3d_c, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32x3d_c, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16x3d_c, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32x3d_c, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16x3d_c, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8x3d_c, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16x3d_c, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8x3d_c, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4x3d_c, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8x3d_c, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4x3d_c, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128x3d_c, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64x3d_c, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128x3d_c, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64x3d_c, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32x3d_c, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64x3d_c, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32x3d_c, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16x3d_c, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32x3d_c, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16x3d_c, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8x3d_c, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16x3d_c, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8x3d_c, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4x3d_c, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8x3d_c, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4x3d_c, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128x3d_c, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64x3d_c, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128x3d_c, 12),
+  make_tuple(64, 64, &aom_highbd_sad64x64x3d_c, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32x3d_c, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64x3d_c, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32x3d_c, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16x3d_c, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32x3d_c, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16x3d_c, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8x3d_c, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16x3d_c, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8x3d_c, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4x3d_c, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8x3d_c, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4x3d_c, 12),
+#endif
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16x3d_c, -1),
+  make_tuple(16, 64, &aom_sad16x64x3d_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 16, &aom_highbd_sad64x16x3d_c, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64x3d_c, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16x3d_c, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64x3d_c, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16x3d_c, 12),
+  make_tuple(16, 64, &aom_highbd_sad16x64x3d_c, 12),
+#endif
+  make_tuple(32, 8, &aom_sad32x8x3d_c, -1),
+  make_tuple(8, 32, &aom_sad8x32x3d_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(32, 8, &aom_highbd_sad32x8x3d_c, 8),
+  make_tuple(8, 32, &aom_highbd_sad8x32x3d_c, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8x3d_c, 10),
+  make_tuple(8, 32, &aom_highbd_sad8x32x3d_c, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8x3d_c, 12),
+  make_tuple(8, 32, &aom_highbd_sad8x32x3d_c, 12),
+#endif
+  make_tuple(16, 4, &aom_sad16x4x3d_c, -1),
+  make_tuple(4, 16, &aom_sad4x16x3d_c, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(16, 4, &aom_highbd_sad16x4x3d_c, 8),
+  make_tuple(4, 16, &aom_highbd_sad4x16x3d_c, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4x3d_c, 10),
+  make_tuple(4, 16, &aom_highbd_sad4x16x3d_c, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4x3d_c, 12),
+  make_tuple(4, 16, &aom_highbd_sad4x16x3d_c, 12),
+#endif
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(C, SADx3Test, ::testing::ValuesIn(x3d_c_tests));
+
+const SadMxNx4Param skip_x4d_c_tests[] = {
+  make_tuple(128, 128, &aom_sad_skip_128x128x4d_c, -1),
+  make_tuple(128, 64, &aom_sad_skip_128x64x4d_c, -1),
+  make_tuple(64, 128, &aom_sad_skip_64x128x4d_c, -1),
+  make_tuple(64, 64, &aom_sad_skip_64x64x4d_c, -1),
+  make_tuple(64, 32, &aom_sad_skip_64x32x4d_c, -1),
+  make_tuple(32, 64, &aom_sad_skip_32x64x4d_c, -1),
+  make_tuple(32, 32, &aom_sad_skip_32x32x4d_c, -1),
+  make_tuple(32, 16, &aom_sad_skip_32x16x4d_c, -1),
+  make_tuple(16, 32, &aom_sad_skip_16x32x4d_c, -1),
+  make_tuple(16, 16, &aom_sad_skip_16x16x4d_c, -1),
+  make_tuple(16, 8, &aom_sad_skip_16x8x4d_c, -1),
+  make_tuple(8, 16, &aom_sad_skip_8x16x4d_c, -1),
+  make_tuple(8, 8, &aom_sad_skip_8x8x4d_c, -1),
+  make_tuple(4, 8, &aom_sad_skip_4x8x4d_c, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad_skip_64x16x4d_c, -1),
+  make_tuple(16, 64, &aom_sad_skip_16x64x4d_c, -1),
+  make_tuple(32, 8, &aom_sad_skip_32x8x4d_c, -1),
+  make_tuple(8, 32, &aom_sad_skip_8x32x4d_c, -1),
+  make_tuple(4, 16, &aom_sad_skip_4x16x4d_c, -1),
+#endif
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_c, 8),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_c, 8),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_c, 8),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_c, 8),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_c, 8),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_c, 8),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_c, 8),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_c, 8),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_c, 8),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_c, 8),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_c, 8),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_c, 8),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_c, 8),
+  make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_c, 8),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_c, 8),
+  make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_c, 8),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_c, 8),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_c, 8),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_c, 8),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_c, 8),
+  make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_c, 8),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_c, 8),
+#endif
+
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_c, 10),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_c, 10),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_c, 10),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_c, 10),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_c, 10),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_c, 10),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_c, 10),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_c, 10),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_c, 10),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_c, 10),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_c, 10),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_c, 10),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_c, 10),
+  make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_c, 10),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_c, 10),
+  make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_c, 10),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_c, 10),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_c, 10),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_c, 10),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_c, 10),
+  make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_c, 10),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_c, 10),
+#endif
+
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_c, 12),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_c, 12),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_c, 12),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_c, 12),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_c, 12),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_c, 12),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_c, 12),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_c, 12),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_c, 12),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_c, 12),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_c, 12),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_c, 12),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_c, 12),
+  make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_c, 12),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_c, 12),
+  make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_c, 12),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_c, 12),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_c, 12),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_c, 12),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_c, 12),
+  make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_c, 12),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_c, 12),
+#endif
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_SUITE_P(C, SADSkipx4Test,
+                         ::testing::ValuesIn(skip_x4d_c_tests));
+
+//------------------------------------------------------------------------------
+// ARM functions
+#if HAVE_NEON
+const SadMxNParam neon_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128_neon, -1),
+  make_tuple(128, 64, &aom_sad128x64_neon, -1),
+  make_tuple(64, 128, &aom_sad64x128_neon, -1),
+  make_tuple(64, 64, &aom_sad64x64_neon, -1),
+  make_tuple(64, 32, &aom_sad64x32_neon, -1),
+  make_tuple(32, 64, &aom_sad32x64_neon, -1),
+  make_tuple(32, 32, &aom_sad32x32_neon, -1),
+  make_tuple(32, 16, &aom_sad32x16_neon, -1),
+  make_tuple(16, 32, &aom_sad16x32_neon, -1),
+  make_tuple(16, 16, &aom_sad16x16_neon, -1),
+  make_tuple(16, 8, &aom_sad16x8_neon, -1),
+  make_tuple(8, 16, &aom_sad8x16_neon, -1),
+  make_tuple(8, 8, &aom_sad8x8_neon, -1),
+  make_tuple(8, 4, &aom_sad8x4_neon, -1),
+  make_tuple(4, 8, &aom_sad4x8_neon, -1),
+  make_tuple(4, 4, &aom_sad4x4_neon, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad128x128_neon, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64_neon, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128_neon, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64_neon, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_neon, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_neon, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_neon, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_neon, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_neon, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_neon, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_neon, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16_neon, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8_neon, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4_neon, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8_neon, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4_neon, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128_neon, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64_neon, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128_neon, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64_neon, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_neon, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_neon, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_neon, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_neon, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_neon, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_neon, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_neon, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16_neon, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8_neon, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4_neon, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8_neon, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4_neon, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128_neon, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64_neon, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128_neon, 12),
+  make_tuple(64, 64, &aom_highbd_sad64x64_neon, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_neon, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_neon, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_neon, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_neon, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_neon, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_neon, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_neon, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16_neon, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8_neon, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4_neon, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8_neon, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4_neon, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16_neon, -1),
+  make_tuple(32, 8, &aom_sad32x8_neon, -1),
+  make_tuple(16, 64, &aom_sad16x64_neon, -1),
+  make_tuple(16, 4, &aom_sad16x4_neon, -1),
+  make_tuple(8, 32, &aom_sad8x32_neon, -1),
+  make_tuple(4, 16, &aom_sad4x16_neon, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 16, &aom_highbd_sad64x16_neon, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64_neon, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8_neon, 8),
+  make_tuple(8, 32, &aom_highbd_sad8x32_neon, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4_neon, 8),
+  make_tuple(4, 16, &aom_highbd_sad4x16_neon, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16_neon, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64_neon, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8_neon, 10),
+  make_tuple(8, 32, &aom_highbd_sad8x32_neon, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4_neon, 10),
+  make_tuple(4, 16, &aom_highbd_sad4x16_neon, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16_neon, 12),
+  make_tuple(16, 64, &aom_highbd_sad16x64_neon, 12),
+  make_tuple(32, 8, &aom_highbd_sad32x8_neon, 12),
+  make_tuple(8, 32, &aom_highbd_sad8x32_neon, 12),
+  make_tuple(16, 4, &aom_highbd_sad16x4_neon, 12),
+  make_tuple(4, 16, &aom_highbd_sad4x16_neon, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests));
+
+const SadMxNx4Param x4d_neon_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128x4d_neon, -1),
+  make_tuple(128, 64, &aom_sad128x64x4d_neon, -1),
+  make_tuple(64, 128, &aom_sad64x128x4d_neon, -1),
+  make_tuple(64, 64, &aom_sad64x64x4d_neon, -1),
+  make_tuple(64, 32, &aom_sad64x32x4d_neon, -1),
+  make_tuple(32, 64, &aom_sad32x64x4d_neon, -1),
+  make_tuple(32, 32, &aom_sad32x32x4d_neon, -1),
+  make_tuple(32, 16, &aom_sad32x16x4d_neon, -1),
+  make_tuple(16, 32, &aom_sad16x32x4d_neon, -1),
+  make_tuple(16, 16, &aom_sad16x16x4d_neon, -1),
+  make_tuple(16, 8, &aom_sad16x8x4d_neon, -1),
+  make_tuple(8, 16, &aom_sad8x16x4d_neon, -1),
+  make_tuple(8, 8, &aom_sad8x8x4d_neon, -1),
+  make_tuple(8, 4, &aom_sad8x4x4d_neon, -1),
+  make_tuple(4, 8, &aom_sad4x8x4d_neon, -1),
+  make_tuple(4, 4, &aom_sad4x4x4d_neon, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_neon, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_neon, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_neon, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_neon, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_neon, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_neon, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_neon, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_neon, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_neon, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_neon, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_neon, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_neon, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_neon, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_neon, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_neon, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_neon, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_neon, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_neon, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_neon, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_neon, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_neon, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_neon, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_neon, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_neon, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_neon, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_neon, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_neon, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_neon, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_neon, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_neon, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_neon, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_neon, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_neon, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_neon, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_neon, 12),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_neon, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_neon, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_neon, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_neon, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_neon, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_neon, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_neon, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_neon, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_neon, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_neon, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_neon, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_neon, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_neon, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16x4d_neon, -1),
+  make_tuple(32, 8, &aom_sad32x8x4d_neon, -1),
+  make_tuple(16, 64, &aom_sad16x64x4d_neon, -1),
+  make_tuple(16, 4, &aom_sad16x4x4d_neon, -1),
+  make_tuple(8, 32, &aom_sad8x32x4d_neon, -1),
+  make_tuple(4, 16, &aom_sad4x16x4d_neon, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 16, &aom_highbd_sad64x16x4d_neon, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64x4d_neon, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8x4d_neon, 8),
+  make_tuple(8, 32, &aom_highbd_sad8x32x4d_neon, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4x4d_neon, 8),
+  make_tuple(4, 16, &aom_highbd_sad4x16x4d_neon, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16x4d_neon, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64x4d_neon, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8x4d_neon, 10),
+  make_tuple(8, 32, &aom_highbd_sad8x32x4d_neon, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4x4d_neon, 10),
+  make_tuple(4, 16, &aom_highbd_sad4x16x4d_neon, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16x4d_neon, 12),
+  make_tuple(16, 64, &aom_highbd_sad16x64x4d_neon, 12),
+  make_tuple(32, 8, &aom_highbd_sad32x8x4d_neon, 12),
+  make_tuple(8, 32, &aom_highbd_sad8x32x4d_neon, 12),
+  make_tuple(16, 4, &aom_highbd_sad16x4x4d_neon, 12),
+  make_tuple(4, 16, &aom_highbd_sad4x16x4d_neon, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
+const SadSkipMxNParam skip_neon_tests[] = {
+  make_tuple(128, 128, &aom_sad_skip_128x128_neon, -1),
+  make_tuple(128, 64, &aom_sad_skip_128x64_neon, -1),
+  make_tuple(64, 128, &aom_sad_skip_64x128_neon, -1),
+  make_tuple(64, 64, &aom_sad_skip_64x64_neon, -1),
+  make_tuple(64, 32, &aom_sad_skip_64x32_neon, -1),
+  make_tuple(32, 64, &aom_sad_skip_32x64_neon, -1),
+  make_tuple(32, 32, &aom_sad_skip_32x32_neon, -1),
+  make_tuple(32, 16, &aom_sad_skip_32x16_neon, -1),
+  make_tuple(16, 32, &aom_sad_skip_16x32_neon, -1),
+  make_tuple(16, 16, &aom_sad_skip_16x16_neon, -1),
+  make_tuple(16, 8, &aom_sad_skip_16x8_neon, -1),
+  make_tuple(8, 16, &aom_sad_skip_8x16_neon, -1),
+  make_tuple(8, 8, &aom_sad_skip_8x8_neon, -1),
+  make_tuple(8, 4, &aom_sad_skip_8x4_neon, -1),
+  make_tuple(4, 8, &aom_sad_skip_4x8_neon, -1),
+  make_tuple(4, 4, &aom_sad_skip_4x4_neon, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128_neon, 8),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64_neon, 8),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128_neon, 8),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64_neon, 8),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32_neon, 8),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64_neon, 8),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32_neon, 8),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16_neon, 8),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32_neon, 8),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16_neon, 8),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8_neon, 8),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16_neon, 8),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8_neon, 8),
+  make_tuple(8, 4, &aom_highbd_sad_skip_8x4_neon, 8),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8_neon, 8),
+  make_tuple(4, 4, &aom_highbd_sad_skip_4x4_neon, 8),
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128_neon, 10),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64_neon, 10),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128_neon, 10),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64_neon, 10),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32_neon, 10),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64_neon, 10),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32_neon, 10),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16_neon, 10),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32_neon, 10),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16_neon, 10),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8_neon, 10),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16_neon, 10),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8_neon, 10),
+  make_tuple(8, 4, &aom_highbd_sad_skip_8x4_neon, 10),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8_neon, 10),
+  make_tuple(4, 4, &aom_highbd_sad_skip_4x4_neon, 10),
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128_neon, 12),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64_neon, 12),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128_neon, 12),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64_neon, 12),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32_neon, 12),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64_neon, 12),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32_neon, 12),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16_neon, 12),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32_neon, 12),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16_neon, 12),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8_neon, 12),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16_neon, 12),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8_neon, 12),
+  make_tuple(8, 4, &aom_highbd_sad_skip_8x4_neon, 12),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8_neon, 12),
+  make_tuple(4, 4, &aom_highbd_sad_skip_4x4_neon, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad_skip_64x16_neon, -1),
+  make_tuple(32, 8, &aom_sad_skip_32x8_neon, -1),
+  make_tuple(16, 64, &aom_sad_skip_16x64_neon, -1),
+  make_tuple(16, 4, &aom_sad_skip_16x4_neon, -1),
+  make_tuple(8, 32, &aom_sad_skip_8x32_neon, -1),
+  make_tuple(4, 16, &aom_sad_skip_4x16_neon, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16_neon, 8),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64_neon, 8),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8_neon, 8),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32_neon, 8),
+  make_tuple(16, 4, &aom_highbd_sad_skip_16x4_neon, 8),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16_neon, 8),
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16_neon, 10),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64_neon, 10),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8_neon, 10),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32_neon, 10),
+  make_tuple(16, 4, &aom_highbd_sad_skip_16x4_neon, 10),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16_neon, 10),
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16_neon, 12),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64_neon, 12),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8_neon, 12),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32_neon, 12),
+  make_tuple(16, 4, &aom_highbd_sad_skip_16x4_neon, 12),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16_neon, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(NEON, SADSkipTest,
+                         ::testing::ValuesIn(skip_neon_tests));
+
+const SadSkipMxNx4Param skip_x4d_neon_tests[] = {
+  make_tuple(128, 128, &aom_sad_skip_128x128x4d_neon, -1),
+  make_tuple(128, 64, &aom_sad_skip_128x64x4d_neon, -1),
+  make_tuple(64, 128, &aom_sad_skip_64x128x4d_neon, -1),
+  make_tuple(64, 64, &aom_sad_skip_64x64x4d_neon, -1),
+  make_tuple(64, 32, &aom_sad_skip_64x32x4d_neon, -1),
+  make_tuple(32, 64, &aom_sad_skip_32x64x4d_neon, -1),
+  make_tuple(32, 32, &aom_sad_skip_32x32x4d_neon, -1),
+  make_tuple(32, 16, &aom_sad_skip_32x16x4d_neon, -1),
+  make_tuple(16, 32, &aom_sad_skip_16x32x4d_neon, -1),
+  make_tuple(16, 16, &aom_sad_skip_16x16x4d_neon, -1),
+  make_tuple(16, 8, &aom_sad_skip_16x8x4d_neon, -1),
+  make_tuple(8, 16, &aom_sad_skip_8x16x4d_neon, -1),
+  make_tuple(8, 8, &aom_sad_skip_8x8x4d_neon, -1),
+  make_tuple(8, 4, &aom_sad_skip_8x4x4d_neon, -1),
+  make_tuple(4, 8, &aom_sad_skip_4x8x4d_neon, -1),
+  make_tuple(4, 4, &aom_sad_skip_4x4x4d_neon, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_neon, 8),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_neon, 8),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_neon, 8),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_neon, 8),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_neon, 8),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_neon, 8),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_neon, 8),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_neon, 8),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_neon, 8),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_neon, 8),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_neon, 8),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_neon, 8),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_neon, 8),
+  make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_neon, 8),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_neon, 8),
+  make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_neon, 8),
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_neon, 10),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_neon, 10),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_neon, 10),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_neon, 10),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_neon, 10),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_neon, 10),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_neon, 10),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_neon, 10),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_neon, 10),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_neon, 10),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_neon, 10),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_neon, 10),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_neon, 10),
+  make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_neon, 10),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_neon, 10),
+  make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_neon, 10),
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_neon, 12),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_neon, 12),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_neon, 12),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_neon, 12),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_neon, 12),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_neon, 12),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_neon, 12),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_neon, 12),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_neon, 12),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_neon, 12),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_neon, 12),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_neon, 12),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_neon, 12),
+  make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_neon, 12),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_neon, 12),
+  make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_neon, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad_skip_64x16x4d_neon, -1),
+  make_tuple(32, 8, &aom_sad_skip_32x8x4d_neon, -1),
+  make_tuple(16, 64, &aom_sad_skip_16x64x4d_neon, -1),
+  make_tuple(16, 4, &aom_sad_skip_16x4x4d_neon, -1),
+  make_tuple(8, 32, &aom_sad_skip_8x32x4d_neon, -1),
+  make_tuple(4, 16, &aom_sad_skip_4x16x4d_neon, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_neon, 8),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_neon, 8),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_neon, 8),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_neon, 8),
+  make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_neon, 8),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_neon, 8),
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_neon, 10),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_neon, 10),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_neon, 10),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_neon, 10),
+  make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_neon, 10),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_neon, 10),
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_neon, 12),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_neon, 12),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_neon, 12),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_neon, 12),
+  make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_neon, 12),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_neon, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(NEON, SADSkipx4Test,
+                         ::testing::ValuesIn(skip_x4d_neon_tests));
+
+const SadMxNAvgParam avg_neon_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128_avg_neon, -1),
+  make_tuple(128, 64, &aom_sad128x64_avg_neon, -1),
+  make_tuple(64, 128, &aom_sad64x128_avg_neon, -1),
+  make_tuple(64, 64, &aom_sad64x64_avg_neon, -1),
+  make_tuple(64, 32, &aom_sad64x32_avg_neon, -1),
+  make_tuple(32, 64, &aom_sad32x64_avg_neon, -1),
+  make_tuple(32, 32, &aom_sad32x32_avg_neon, -1),
+  make_tuple(32, 16, &aom_sad32x16_avg_neon, -1),
+  make_tuple(16, 32, &aom_sad16x32_avg_neon, -1),
+  make_tuple(16, 16, &aom_sad16x16_avg_neon, -1),
+  make_tuple(16, 8, &aom_sad16x8_avg_neon, -1),
+  make_tuple(8, 16, &aom_sad8x16_avg_neon, -1),
+  make_tuple(8, 8, &aom_sad8x8_avg_neon, -1),
+  make_tuple(8, 4, &aom_sad8x4_avg_neon, -1),
+  make_tuple(4, 8, &aom_sad4x8_avg_neon, -1),
+  make_tuple(4, 4, &aom_sad4x4_avg_neon, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_neon, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_neon, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_neon, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_neon, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_neon, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_neon, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_neon, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_neon, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_neon, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_neon, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_neon, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_neon, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_neon, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_neon, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8_avg_neon, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4_avg_neon, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_neon, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_neon, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_neon, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_neon, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_neon, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_neon, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_neon, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_neon, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_neon, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_neon, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_neon, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_neon, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_neon, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_neon, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8_avg_neon, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4_avg_neon, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_neon, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_neon, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_neon, 12),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_neon, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_neon, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_neon, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_neon, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_neon, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_neon, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_neon, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_neon, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_neon, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_neon, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_neon, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8_avg_neon, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4_avg_neon, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16_avg_neon, -1),
+  make_tuple(32, 8, &aom_sad32x8_avg_neon, -1),
+  make_tuple(16, 64, &aom_sad16x64_avg_neon, -1),
+  make_tuple(16, 4, &aom_sad16x4_avg_neon, -1),
+  make_tuple(8, 32, &aom_sad8x32_avg_neon, -1),
+  make_tuple(4, 16, &aom_sad4x16_avg_neon, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 16, &aom_highbd_sad64x16_avg_neon, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avg_neon, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8_avg_neon, 8),
+  make_tuple(8, 32, &aom_highbd_sad8x32_avg_neon, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4_avg_neon, 8),
+  make_tuple(4, 16, &aom_highbd_sad4x16_avg_neon, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16_avg_neon, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avg_neon, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8_avg_neon, 10),
+  make_tuple(8, 32, &aom_highbd_sad8x32_avg_neon, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4_avg_neon, 10),
+  make_tuple(4, 16, &aom_highbd_sad4x16_avg_neon, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16_avg_neon, 12),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avg_neon, 12),
+  make_tuple(32, 8, &aom_highbd_sad32x8_avg_neon, 12),
+  make_tuple(8, 32, &aom_highbd_sad8x32_avg_neon, 12),
+  make_tuple(16, 4, &aom_highbd_sad16x4_avg_neon, 12),
+  make_tuple(4, 16, &aom_highbd_sad4x16_avg_neon, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(NEON, SADavgTest, ::testing::ValuesIn(avg_neon_tests));
+
+const DistWtdSadMxNAvgParam dist_wtd_avg_neon_tests[] = {
+  make_tuple(128, 128, &aom_dist_wtd_sad128x128_avg_neon, -1),
+  make_tuple(128, 64, &aom_dist_wtd_sad128x64_avg_neon, -1),
+  make_tuple(64, 128, &aom_dist_wtd_sad64x128_avg_neon, -1),
+  make_tuple(64, 64, &aom_dist_wtd_sad64x64_avg_neon, -1),
+  make_tuple(64, 32, &aom_dist_wtd_sad64x32_avg_neon, -1),
+  make_tuple(32, 64, &aom_dist_wtd_sad32x64_avg_neon, -1),
+  make_tuple(32, 32, &aom_dist_wtd_sad32x32_avg_neon, -1),
+  make_tuple(32, 16, &aom_dist_wtd_sad32x16_avg_neon, -1),
+  make_tuple(16, 32, &aom_dist_wtd_sad16x32_avg_neon, -1),
+  make_tuple(16, 16, &aom_dist_wtd_sad16x16_avg_neon, -1),
+  make_tuple(16, 8, &aom_dist_wtd_sad16x8_avg_neon, -1),
+  make_tuple(8, 16, &aom_dist_wtd_sad8x16_avg_neon, -1),
+  make_tuple(8, 8, &aom_dist_wtd_sad8x8_avg_neon, -1),
+  make_tuple(8, 4, &aom_dist_wtd_sad8x4_avg_neon, -1),
+  make_tuple(4, 8, &aom_dist_wtd_sad4x8_avg_neon, -1),
+  make_tuple(4, 4, &aom_dist_wtd_sad4x4_avg_neon, -1),
+
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_dist_wtd_sad64x16_avg_neon, -1),
+  make_tuple(16, 64, &aom_dist_wtd_sad16x64_avg_neon, -1),
+  make_tuple(32, 8, &aom_dist_wtd_sad32x8_avg_neon, -1),
+  make_tuple(8, 32, &aom_dist_wtd_sad8x32_avg_neon, -1),
+  make_tuple(16, 4, &aom_dist_wtd_sad16x4_avg_neon, -1),
+  make_tuple(4, 16, &aom_dist_wtd_sad4x16_avg_neon, -1),
+#endif  // !CONFIG_REALTIME_ONLY
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, DistWtdSADavgTest,
+                         ::testing::ValuesIn(dist_wtd_avg_neon_tests));
+
+const SadMxNx4Param x3d_neon_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128x3d_neon, -1),
+  make_tuple(128, 64, &aom_sad128x64x3d_neon, -1),
+  make_tuple(64, 128, &aom_sad64x128x3d_neon, -1),
+  make_tuple(64, 64, &aom_sad64x64x3d_neon, -1),
+  make_tuple(64, 32, &aom_sad64x32x3d_neon, -1),
+  make_tuple(32, 64, &aom_sad32x64x3d_neon, -1),
+  make_tuple(32, 32, &aom_sad32x32x3d_neon, -1),
+  make_tuple(32, 16, &aom_sad32x16x3d_neon, -1),
+  make_tuple(16, 32, &aom_sad16x32x3d_neon, -1),
+  make_tuple(16, 16, &aom_sad16x16x3d_neon, -1),
+  make_tuple(16, 8, &aom_sad16x8x3d_neon, -1),
+  make_tuple(8, 16, &aom_sad8x16x3d_neon, -1),
+  make_tuple(8, 8, &aom_sad8x8x3d_neon, -1),
+  make_tuple(8, 4, &aom_sad8x4x3d_neon, -1),
+  make_tuple(4, 8, &aom_sad4x8x3d_neon, -1),
+  make_tuple(4, 4, &aom_sad4x4x3d_neon, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad128x128x3d_neon, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64x3d_neon, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128x3d_neon, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64x3d_neon, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32x3d_neon, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64x3d_neon, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32x3d_neon, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16x3d_neon, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32x3d_neon, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16x3d_neon, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8x3d_neon, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16x3d_neon, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8x3d_neon, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4x3d_neon, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8x3d_neon, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4x3d_neon, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128x3d_neon, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64x3d_neon, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128x3d_neon, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64x3d_neon, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32x3d_neon, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64x3d_neon, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32x3d_neon, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16x3d_neon, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32x3d_neon, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16x3d_neon, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8x3d_neon, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16x3d_neon, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8x3d_neon, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4x3d_neon, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8x3d_neon, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4x3d_neon, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128x3d_neon, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64x3d_neon, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128x3d_neon, 12),
+  make_tuple(64, 64, &aom_highbd_sad64x64x3d_neon, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32x3d_neon, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64x3d_neon, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32x3d_neon, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16x3d_neon, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32x3d_neon, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16x3d_neon, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8x3d_neon, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16x3d_neon, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8x3d_neon, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4x3d_neon, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8x3d_neon, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4x3d_neon, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16x3d_neon, -1),
+  make_tuple(32, 8, &aom_sad32x8x3d_neon, -1),
+  make_tuple(16, 64, &aom_sad16x64x3d_neon, -1),
+  make_tuple(16, 4, &aom_sad16x4x3d_neon, -1),
+  make_tuple(8, 32, &aom_sad8x32x3d_neon, -1),
+  make_tuple(4, 16, &aom_sad4x16x3d_neon, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 16, &aom_highbd_sad64x16x3d_neon, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64x3d_neon, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8x3d_neon, 8),
+  make_tuple(8, 32, &aom_highbd_sad8x32x3d_neon, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4x3d_neon, 8),
+  make_tuple(4, 16, &aom_highbd_sad4x16x3d_neon, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16x3d_neon, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64x3d_neon, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8x3d_neon, 10),
+  make_tuple(8, 32, &aom_highbd_sad8x32x3d_neon, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4x3d_neon, 10),
+  make_tuple(4, 16, &aom_highbd_sad4x16x3d_neon, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16x3d_neon, 12),
+  make_tuple(16, 64, &aom_highbd_sad16x64x3d_neon, 12),
+  make_tuple(32, 8, &aom_highbd_sad32x8x3d_neon, 12),
+  make_tuple(8, 32, &aom_highbd_sad8x32x3d_neon, 12),
+  make_tuple(16, 4, &aom_highbd_sad16x4x3d_neon, 12),
+  make_tuple(4, 16, &aom_highbd_sad4x16x3d_neon, 12),
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(NEON, SADx3Test, ::testing::ValuesIn(x3d_neon_tests));
+
+#endif  // HAVE_NEON
+
+#if HAVE_NEON_DOTPROD
+const SadMxNParam neon_dotprod_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128_neon_dotprod, -1),
+  make_tuple(128, 64, &aom_sad128x64_neon_dotprod, -1),
+  make_tuple(64, 128, &aom_sad64x128_neon_dotprod, -1),
+  make_tuple(64, 64, &aom_sad64x64_neon_dotprod, -1),
+  make_tuple(64, 32, &aom_sad64x32_neon_dotprod, -1),
+  make_tuple(32, 64, &aom_sad32x64_neon_dotprod, -1),
+  make_tuple(32, 32, &aom_sad32x32_neon_dotprod, -1),
+  make_tuple(32, 16, &aom_sad32x16_neon_dotprod, -1),
+  make_tuple(16, 32, &aom_sad16x32_neon_dotprod, -1),
+  make_tuple(16, 16, &aom_sad16x16_neon_dotprod, -1),
+  make_tuple(16, 8, &aom_sad16x8_neon_dotprod, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16_neon_dotprod, -1),
+  make_tuple(32, 8, &aom_sad32x8_neon_dotprod, -1),
+  make_tuple(16, 64, &aom_sad16x64_neon_dotprod, -1),
+  make_tuple(16, 4, &aom_sad16x4_neon_dotprod, -1),
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SADTest,
+                         ::testing::ValuesIn(neon_dotprod_tests));
+
+const SadMxNParam skip_neon_dotprod_tests[] = {
+  make_tuple(128, 128, &aom_sad_skip_128x128_neon_dotprod, -1),
+  make_tuple(128, 64, &aom_sad_skip_128x64_neon_dotprod, -1),
+  make_tuple(64, 128, &aom_sad_skip_64x128_neon_dotprod, -1),
+  make_tuple(64, 64, &aom_sad_skip_64x64_neon_dotprod, -1),
+  make_tuple(64, 32, &aom_sad_skip_64x32_neon_dotprod, -1),
+  make_tuple(32, 64, &aom_sad_skip_32x64_neon_dotprod, -1),
+  make_tuple(32, 32, &aom_sad_skip_32x32_neon_dotprod, -1),
+  make_tuple(32, 16, &aom_sad_skip_32x16_neon_dotprod, -1),
+  make_tuple(16, 32, &aom_sad_skip_16x32_neon_dotprod, -1),
+  make_tuple(16, 16, &aom_sad_skip_16x16_neon_dotprod, -1),
+  make_tuple(16, 8, &aom_sad_skip_16x8_neon_dotprod, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad_skip_64x16_neon_dotprod, -1),
+  make_tuple(32, 8, &aom_sad_skip_32x8_neon_dotprod, -1),
+  make_tuple(16, 64, &aom_sad_skip_16x64_neon_dotprod, -1),
+  make_tuple(16, 4, &aom_sad_skip_16x4_neon_dotprod, -1),
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SADSkipTest,
+                         ::testing::ValuesIn(skip_neon_dotprod_tests));
+
+const SadMxNAvgParam avg_neon_dotprod_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128_avg_neon_dotprod, -1),
+  make_tuple(128, 64, &aom_sad128x64_avg_neon_dotprod, -1),
+  make_tuple(64, 128, &aom_sad64x128_avg_neon_dotprod, -1),
+  make_tuple(64, 64, &aom_sad64x64_avg_neon_dotprod, -1),
+  make_tuple(64, 32, &aom_sad64x32_avg_neon_dotprod, -1),
+  make_tuple(32, 64, &aom_sad32x64_avg_neon_dotprod, -1),
+  make_tuple(32, 32, &aom_sad32x32_avg_neon_dotprod, -1),
+  make_tuple(32, 16, &aom_sad32x16_avg_neon_dotprod, -1),
+  make_tuple(16, 32, &aom_sad16x32_avg_neon_dotprod, -1),
+  make_tuple(16, 16, &aom_sad16x16_avg_neon_dotprod, -1),
+  make_tuple(16, 8, &aom_sad16x8_avg_neon_dotprod, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16_avg_neon_dotprod, -1),
+  make_tuple(32, 8, &aom_sad32x8_avg_neon_dotprod, -1),
+  make_tuple(16, 64, &aom_sad16x64_avg_neon_dotprod, -1),
+  make_tuple(16, 4, &aom_sad16x4_avg_neon_dotprod, -1),
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SADavgTest,
+                         ::testing::ValuesIn(avg_neon_dotprod_tests));
+
+const DistWtdSadMxNAvgParam dist_wtd_avg_neon_dotprod_tests[] = {
+  make_tuple(128, 128, &aom_dist_wtd_sad128x128_avg_neon_dotprod, -1),
+  make_tuple(128, 64, &aom_dist_wtd_sad128x64_avg_neon_dotprod, -1),
+  make_tuple(64, 128, &aom_dist_wtd_sad64x128_avg_neon_dotprod, -1),
+  make_tuple(64, 64, &aom_dist_wtd_sad64x64_avg_neon_dotprod, -1),
+  make_tuple(64, 32, &aom_dist_wtd_sad64x32_avg_neon_dotprod, -1),
+  make_tuple(32, 64, &aom_dist_wtd_sad32x64_avg_neon_dotprod, -1),
+  make_tuple(32, 32, &aom_dist_wtd_sad32x32_avg_neon_dotprod, -1),
+  make_tuple(32, 16, &aom_dist_wtd_sad32x16_avg_neon_dotprod, -1),
+  make_tuple(16, 32, &aom_dist_wtd_sad16x32_avg_neon_dotprod, -1),
+  make_tuple(16, 16, &aom_dist_wtd_sad16x16_avg_neon_dotprod, -1),
+  make_tuple(16, 8, &aom_dist_wtd_sad16x8_avg_neon_dotprod, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_dist_wtd_sad64x16_avg_neon_dotprod, -1),
+  make_tuple(16, 64, &aom_dist_wtd_sad16x64_avg_neon_dotprod, -1),
+  make_tuple(32, 8, &aom_dist_wtd_sad32x8_avg_neon_dotprod, -1),
+  make_tuple(16, 4, &aom_dist_wtd_sad16x4_avg_neon_dotprod, -1),
+#endif  // !CONFIG_REALTIME_ONLY
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, DistWtdSADavgTest,
+                         ::testing::ValuesIn(dist_wtd_avg_neon_dotprod_tests));
+
+const SadMxNx4Param x3d_neon_dotprod_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128x3d_neon_dotprod, -1),
+  make_tuple(128, 64, &aom_sad128x64x3d_neon_dotprod, -1),
+  make_tuple(64, 128, &aom_sad64x128x3d_neon_dotprod, -1),
+  make_tuple(64, 64, &aom_sad64x64x3d_neon_dotprod, -1),
+  make_tuple(64, 32, &aom_sad64x32x3d_neon_dotprod, -1),
+  make_tuple(32, 64, &aom_sad32x64x3d_neon_dotprod, -1),
+  make_tuple(32, 32, &aom_sad32x32x3d_neon_dotprod, -1),
+  make_tuple(32, 16, &aom_sad32x16x3d_neon_dotprod, -1),
+  make_tuple(16, 32, &aom_sad16x32x3d_neon_dotprod, -1),
+  make_tuple(16, 16, &aom_sad16x16x3d_neon_dotprod, -1),
+  make_tuple(16, 8, &aom_sad16x8x3d_neon_dotprod, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16x3d_neon_dotprod, -1),
+  make_tuple(32, 8, &aom_sad32x8x3d_neon_dotprod, -1),
+  make_tuple(16, 64, &aom_sad16x64x3d_neon_dotprod, -1),
+  make_tuple(16, 4, &aom_sad16x4x3d_neon_dotprod, -1),
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SADx3Test,
+                         ::testing::ValuesIn(x3d_neon_dotprod_tests));
+
+const SadMxNx4Param x4d_neon_dotprod_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128x4d_neon_dotprod, -1),
+  make_tuple(128, 64, &aom_sad128x64x4d_neon_dotprod, -1),
+  make_tuple(64, 128, &aom_sad64x128x4d_neon_dotprod, -1),
+  make_tuple(64, 64, &aom_sad64x64x4d_neon_dotprod, -1),
+  make_tuple(64, 32, &aom_sad64x32x4d_neon_dotprod, -1),
+  make_tuple(32, 64, &aom_sad32x64x4d_neon_dotprod, -1),
+  make_tuple(32, 32, &aom_sad32x32x4d_neon_dotprod, -1),
+  make_tuple(32, 16, &aom_sad32x16x4d_neon_dotprod, -1),
+  make_tuple(16, 32, &aom_sad16x32x4d_neon_dotprod, -1),
+  make_tuple(16, 16, &aom_sad16x16x4d_neon_dotprod, -1),
+  make_tuple(16, 8, &aom_sad16x8x4d_neon_dotprod, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16x4d_neon_dotprod, -1),
+  make_tuple(32, 8, &aom_sad32x8x4d_neon_dotprod, -1),
+  make_tuple(16, 64, &aom_sad16x64x4d_neon_dotprod, -1),
+  make_tuple(16, 4, &aom_sad16x4x4d_neon_dotprod, -1),
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SADx4Test,
+                         ::testing::ValuesIn(x4d_neon_dotprod_tests));
+
+const SadSkipMxNx4Param skip_x4d_neon_dotprod_tests[] = {
+  make_tuple(128, 128, &aom_sad_skip_128x128x4d_neon_dotprod, -1),
+  make_tuple(128, 64, &aom_sad_skip_128x64x4d_neon_dotprod, -1),
+  make_tuple(64, 128, &aom_sad_skip_64x128x4d_neon_dotprod, -1),
+  make_tuple(64, 64, &aom_sad_skip_64x64x4d_neon_dotprod, -1),
+  make_tuple(64, 32, &aom_sad_skip_64x32x4d_neon_dotprod, -1),
+  make_tuple(32, 64, &aom_sad_skip_32x64x4d_neon_dotprod, -1),
+  make_tuple(32, 32, &aom_sad_skip_32x32x4d_neon_dotprod, -1),
+  make_tuple(32, 16, &aom_sad_skip_32x16x4d_neon_dotprod, -1),
+  make_tuple(16, 32, &aom_sad_skip_16x32x4d_neon_dotprod, -1),
+  make_tuple(16, 16, &aom_sad_skip_16x16x4d_neon_dotprod, -1),
+  make_tuple(16, 8, &aom_sad_skip_16x8x4d_neon_dotprod, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad_skip_64x16x4d_neon_dotprod, -1),
+  make_tuple(32, 8, &aom_sad_skip_32x8x4d_neon_dotprod, -1),
+  make_tuple(16, 64, &aom_sad_skip_16x64x4d_neon_dotprod, -1),
+  make_tuple(16, 4, &aom_sad_skip_16x4x4d_neon_dotprod, -1),
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SADSkipx4Test,
+                         ::testing::ValuesIn(skip_x4d_neon_dotprod_tests));
+#endif  // HAVE_NEON_DOTPROD
+
+//------------------------------------------------------------------------------
+// x86 functions
+#if HAVE_SSE2
+const SadMxNParam sse2_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128_sse2, -1),
+  make_tuple(128, 64, &aom_sad128x64_sse2, -1),
+  make_tuple(64, 128, &aom_sad64x128_sse2, -1),
+  make_tuple(64, 64, &aom_sad64x64_sse2, -1),
+  make_tuple(64, 32, &aom_sad64x32_sse2, -1),
+  make_tuple(32, 64, &aom_sad32x64_sse2, -1),
+  make_tuple(32, 32, &aom_sad32x32_sse2, -1),
+  make_tuple(32, 16, &aom_sad32x16_sse2, -1),
+  make_tuple(16, 32, &aom_sad16x32_sse2, -1),
+  make_tuple(16, 16, &aom_sad16x16_sse2, -1),
+  make_tuple(16, 8, &aom_sad16x8_sse2, -1),
+  make_tuple(8, 16, &aom_sad8x16_sse2, -1),
+  make_tuple(8, 8, &aom_sad8x8_sse2, -1),
+  make_tuple(8, 4, &aom_sad8x4_sse2, -1),
+  make_tuple(4, 8, &aom_sad4x8_sse2, -1),
+  make_tuple(4, 4, &aom_sad4x4_sse2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8_sse2, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4_sse2, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8_sse2, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4_sse2, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8_sse2, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4_sse2, 12),
+#endif
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16_sse2, -1),
+  make_tuple(16, 64, &aom_sad16x64_sse2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 16, &aom_highbd_sad64x16_sse2, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64_sse2, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16_sse2, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64_sse2, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16_sse2, 12),
+  make_tuple(16, 64, &aom_highbd_sad16x64_sse2, 12),
+#endif
+  make_tuple(32, 8, &aom_sad32x8_sse2, -1),
+  make_tuple(8, 32, &aom_sad8x32_sse2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(32, 8, &aom_highbd_sad32x8_sse2, 8),
+  make_tuple(8, 32, &aom_highbd_sad8x32_sse2, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8_sse2, 10),
+  make_tuple(8, 32, &aom_highbd_sad8x32_sse2, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8_sse2, 12),
+  make_tuple(8, 32, &aom_highbd_sad8x32_sse2, 12),
+#endif
+  make_tuple(16, 4, &aom_sad16x4_sse2, -1),
+  make_tuple(4, 16, &aom_sad4x16_sse2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(16, 4, &aom_highbd_sad16x4_sse2, 8),
+  make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4_sse2, 10),
+  make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4_sse2, 12),
+  make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 12),
+#endif
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
+
+const SadSkipMxNParam skip_sse2_tests[] = {
+  make_tuple(128, 128, &aom_sad_skip_128x128_sse2, -1),
+  make_tuple(128, 64, &aom_sad_skip_128x64_sse2, -1),
+  make_tuple(64, 128, &aom_sad_skip_64x128_sse2, -1),
+  make_tuple(64, 64, &aom_sad_skip_64x64_sse2, -1),
+  make_tuple(64, 32, &aom_sad_skip_64x32_sse2, -1),
+  make_tuple(32, 64, &aom_sad_skip_32x64_sse2, -1),
+  make_tuple(32, 32, &aom_sad_skip_32x32_sse2, -1),
+  make_tuple(32, 16, &aom_sad_skip_32x16_sse2, -1),
+  make_tuple(16, 32, &aom_sad_skip_16x32_sse2, -1),
+  make_tuple(16, 16, &aom_sad_skip_16x16_sse2, -1),
+  make_tuple(16, 8, &aom_sad_skip_16x8_sse2, -1),
+  make_tuple(8, 16, &aom_sad_skip_8x16_sse2, -1),
+  make_tuple(8, 8, &aom_sad_skip_8x8_sse2, -1),
+  make_tuple(4, 8, &aom_sad_skip_4x8_sse2, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad_skip_64x16_sse2, -1),
+  make_tuple(16, 64, &aom_sad_skip_16x64_sse2, -1),
+  make_tuple(32, 8, &aom_sad_skip_32x8_sse2, -1),
+  make_tuple(8, 32, &aom_sad_skip_8x32_sse2, -1),
+  make_tuple(4, 16, &aom_sad_skip_4x16_sse2, -1),
+#endif
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64_sse2, 8),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32_sse2, 8),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64_sse2, 8),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 8),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 8),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 8),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 8),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 8),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 8),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 8),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 8),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16_sse2, 8),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64_sse2, 8),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8_sse2, 8),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32_sse2, 8),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16_sse2, 8),
+#endif
+
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64_sse2, 10),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32_sse2, 10),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64_sse2, 10),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 10),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 10),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 10),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 10),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 10),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 10),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 10),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 10),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16_sse2, 10),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64_sse2, 10),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8_sse2, 10),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32_sse2, 10),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16_sse2, 10),
+#endif
+
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64_sse2, 12),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32_sse2, 12),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64_sse2, 12),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 12),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 12),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 12),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 12),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 12),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 12),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 12),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 12),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16_sse2, 12),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64_sse2, 12),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8_sse2, 12),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32_sse2, 12),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16_sse2, 12),
+#endif
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, SADSkipTest,
+                         ::testing::ValuesIn(skip_sse2_tests));
+
+const SadMxNAvgParam avg_sse2_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128_avg_sse2, -1),
+  make_tuple(128, 64, &aom_sad128x64_avg_sse2, -1),
+  make_tuple(64, 128, &aom_sad64x128_avg_sse2, -1),
+  make_tuple(64, 64, &aom_sad64x64_avg_sse2, -1),
+  make_tuple(64, 32, &aom_sad64x32_avg_sse2, -1),
+  make_tuple(32, 64, &aom_sad32x64_avg_sse2, -1),
+  make_tuple(32, 32, &aom_sad32x32_avg_sse2, -1),
+  make_tuple(32, 16, &aom_sad32x16_avg_sse2, -1),
+  make_tuple(16, 32, &aom_sad16x32_avg_sse2, -1),
+  make_tuple(16, 16, &aom_sad16x16_avg_sse2, -1),
+  make_tuple(16, 8, &aom_sad16x8_avg_sse2, -1),
+  make_tuple(8, 16, &aom_sad8x16_avg_sse2, -1),
+  make_tuple(8, 8, &aom_sad8x8_avg_sse2, -1),
+  make_tuple(8, 4, &aom_sad8x4_avg_sse2, -1),
+  make_tuple(4, 8, &aom_sad4x8_avg_sse2, -1),
+  make_tuple(4, 4, &aom_sad4x4_avg_sse2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8_avg_sse2, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4_avg_sse2, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8_avg_sse2, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4_avg_sse2, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8_avg_sse2, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4_avg_sse2, 12),
+#endif
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16_avg_sse2, -1),
+  make_tuple(16, 64, &aom_sad16x64_avg_sse2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 16, &aom_highbd_sad64x16_avg_sse2, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avg_sse2, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16_avg_sse2, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avg_sse2, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16_avg_sse2, 12),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avg_sse2, 12),
+#endif
+  make_tuple(32, 8, &aom_sad32x8_avg_sse2, -1),
+  make_tuple(8, 32, &aom_sad8x32_avg_sse2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(32, 8, &aom_highbd_sad32x8_avg_sse2, 8),
+  make_tuple(8, 32, &aom_highbd_sad8x32_avg_sse2, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8_avg_sse2, 10),
+  make_tuple(8, 32, &aom_highbd_sad8x32_avg_sse2, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8_avg_sse2, 12),
+  make_tuple(8, 32, &aom_highbd_sad8x32_avg_sse2, 12),
+#endif
+  make_tuple(16, 4, &aom_sad16x4_avg_sse2, -1),
+  make_tuple(4, 16, &aom_sad4x16_avg_sse2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(16, 4, &aom_highbd_sad16x4_avg_sse2, 8),
+  make_tuple(4, 16, &aom_highbd_sad4x16_avg_sse2, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4_avg_sse2, 10),
+  make_tuple(4, 16, &aom_highbd_sad4x16_avg_sse2, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4_avg_sse2, 12),
+  make_tuple(4, 16, &aom_highbd_sad4x16_avg_sse2, 12),
+#endif
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, SADavgTest, ::testing::ValuesIn(avg_sse2_tests));
+
+const SadMxNx4Param x4d_sse2_tests[] = {
+  make_tuple(128, 128, &aom_sad128x128x4d_sse2, -1),
+  make_tuple(128, 64, &aom_sad128x64x4d_sse2, -1),
+  make_tuple(64, 128, &aom_sad64x128x4d_sse2, -1),
+  make_tuple(64, 64, &aom_sad64x64x4d_sse2, -1),
+  make_tuple(64, 32, &aom_sad64x32x4d_sse2, -1),
+  make_tuple(32, 64, &aom_sad32x64x4d_sse2, -1),
+  make_tuple(32, 32, &aom_sad32x32x4d_sse2, -1),
+  make_tuple(32, 16, &aom_sad32x16x4d_sse2, -1),
+  make_tuple(16, 32, &aom_sad16x32x4d_sse2, -1),
+  make_tuple(16, 16, &aom_sad16x16x4d_sse2, -1),
+  make_tuple(16, 8, &aom_sad16x8x4d_sse2, -1),
+  make_tuple(8, 16, &aom_sad8x16x4d_sse2, -1),
+  make_tuple(8, 8, &aom_sad8x8x4d_sse2, -1),
+  make_tuple(8, 4, &aom_sad8x4x4d_sse2, -1),
+  make_tuple(4, 8, &aom_sad4x8x4d_sse2, -1),
+  make_tuple(4, 4, &aom_sad4x4x4d_sse2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 12),
+#endif
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad64x16x4d_sse2, -1),
+  make_tuple(16, 64, &aom_sad16x64x4d_sse2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 16, &aom_highbd_sad64x16x4d_sse2, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64x4d_sse2, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16x4d_sse2, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64x4d_sse2, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16x4d_sse2, 12),
+  make_tuple(16, 64, &aom_highbd_sad16x64x4d_sse2, 12),
+#endif
+  make_tuple(32, 8, &aom_sad32x8x4d_sse2, -1),
+  make_tuple(8, 32, &aom_sad8x32x4d_sse2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(32, 8, &aom_highbd_sad32x8x4d_sse2, 8),
+  make_tuple(8, 32, &aom_highbd_sad8x32x4d_sse2, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8x4d_sse2, 10),
+  make_tuple(8, 32, &aom_highbd_sad8x32x4d_sse2, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8x4d_sse2, 12),
+  make_tuple(8, 32, &aom_highbd_sad8x32x4d_sse2, 12),
+#endif
+  make_tuple(16, 4, &aom_sad16x4x4d_sse2, -1),
+  make_tuple(4, 16, &aom_sad4x16x4d_sse2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(16, 4, &aom_highbd_sad16x4x4d_sse2, 8),
+  make_tuple(4, 16, &aom_highbd_sad4x16x4d_sse2, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4x4d_sse2, 10),
+  make_tuple(4, 16, &aom_highbd_sad4x16x4d_sse2, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4x4d_sse2, 12),
+  make_tuple(4, 16, &aom_highbd_sad4x16x4d_sse2, 12),
+#endif
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
+
+const SadSkipMxNx4Param skip_x4d_sse2_tests[] = {
+  make_tuple(128, 128, &aom_sad_skip_128x128x4d_sse2, -1),
+  make_tuple(128, 64, &aom_sad_skip_128x64x4d_sse2, -1),
+  make_tuple(64, 128, &aom_sad_skip_64x128x4d_sse2, -1),
+  make_tuple(64, 64, &aom_sad_skip_64x64x4d_sse2, -1),
+  make_tuple(64, 32, &aom_sad_skip_64x32x4d_sse2, -1),
+  make_tuple(32, 64, &aom_sad_skip_32x64x4d_sse2, -1),
+  make_tuple(32, 32, &aom_sad_skip_32x32x4d_sse2, -1),
+  make_tuple(32, 16, &aom_sad_skip_32x16x4d_sse2, -1),
+  make_tuple(16, 32, &aom_sad_skip_16x32x4d_sse2, -1),
+  make_tuple(16, 16, &aom_sad_skip_16x16x4d_sse2, -1),
+  make_tuple(16, 8, &aom_sad_skip_16x8x4d_sse2, -1),
+  make_tuple(8, 16, &aom_sad_skip_8x16x4d_sse2, -1),
+  make_tuple(8, 8, &aom_sad_skip_8x8x4d_sse2, -1),
+  make_tuple(4, 8, &aom_sad_skip_4x8x4d_sse2, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad_skip_64x16x4d_sse2, -1),
+  make_tuple(16, 64, &aom_sad_skip_16x64x4d_sse2, -1),
+  make_tuple(32, 8, &aom_sad_skip_32x8x4d_sse2, -1),
+  make_tuple(8, 32, &aom_sad_skip_8x32x4d_sse2, -1),
+  make_tuple(4, 16, &aom_sad_skip_4x16x4d_sse2, -1),
+#endif
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_sse2, 8),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_sse2, 8),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_sse2, 8),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_sse2, 8),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_sse2, 8),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_sse2, 8),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_sse2, 8),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_sse2, 8),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_sse2, 8),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_sse2, 8),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_sse2, 8),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_sse2, 8),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_sse2, 8),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_sse2, 8),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_sse2, 8),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_sse2, 8),
+#endif
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_sse2, 10),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_sse2, 10),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_sse2, 10),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_sse2, 10),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_sse2, 10),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_sse2, 10),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_sse2, 10),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_sse2, 10),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_sse2, 10),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_sse2, 10),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_sse2, 10),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_sse2, 10),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_sse2, 10),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_sse2, 10),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_sse2, 10),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_sse2, 10),
+#endif
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_sse2, 12),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_sse2, 12),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_sse2, 12),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_sse2, 12),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_sse2, 12),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_sse2, 12),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_sse2, 12),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_sse2, 12),
+  make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_sse2, 12),
+  make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_sse2, 12),
+  make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_sse2, 12),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_sse2, 12),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_sse2, 12),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_sse2, 12),
+  make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_sse2, 12),
+  make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_sse2, 12),
+#endif
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, SADSkipx4Test,
+                         ::testing::ValuesIn(skip_x4d_sse2_tests));
+
+const DistWtdSadMxNAvgParam dist_wtd_avg_sse2_tests[] = {
+  make_tuple(128, 128, &aom_dist_wtd_sad128x128_avg_sse2, -1),
+  make_tuple(128, 64, &aom_dist_wtd_sad128x64_avg_sse2, -1),
+  make_tuple(64, 128, &aom_dist_wtd_sad64x128_avg_sse2, -1),
+  make_tuple(64, 64, &aom_dist_wtd_sad64x64_avg_sse2, -1),
+  make_tuple(64, 32, &aom_dist_wtd_sad64x32_avg_sse2, -1),
+  make_tuple(32, 64, &aom_dist_wtd_sad32x64_avg_sse2, -1),
+  make_tuple(32, 32, &aom_dist_wtd_sad32x32_avg_sse2, -1),
+  make_tuple(32, 16, &aom_dist_wtd_sad32x16_avg_sse2, -1),
+  make_tuple(16, 32, &aom_dist_wtd_sad16x32_avg_sse2, -1),
+  make_tuple(16, 16, &aom_dist_wtd_sad16x16_avg_sse2, -1),
+  make_tuple(16, 8, &aom_dist_wtd_sad16x8_avg_sse2, -1),
+  make_tuple(8, 16, &aom_dist_wtd_sad8x16_avg_sse2, -1),
+  make_tuple(8, 8, &aom_dist_wtd_sad8x8_avg_sse2, -1),
+  make_tuple(8, 4, &aom_dist_wtd_sad8x4_avg_sse2, -1),
+  make_tuple(4, 8, &aom_dist_wtd_sad4x8_avg_sse2, -1),
+  make_tuple(4, 4, &aom_dist_wtd_sad4x4_avg_sse2, -1),
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_dist_wtd_sad64x16_avg_sse2, -1),
+  make_tuple(16, 64, &aom_dist_wtd_sad16x64_avg_sse2, -1),
+  make_tuple(32, 8, &aom_dist_wtd_sad32x8_avg_sse2, -1),
+  make_tuple(8, 32, &aom_dist_wtd_sad8x32_avg_sse2, -1),
+  make_tuple(16, 4, &aom_dist_wtd_sad16x4_avg_sse2, -1),
+  make_tuple(4, 16, &aom_dist_wtd_sad4x16_avg_sse2, -1),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(sse2, DistWtdSADavgTest,
+                         ::testing::ValuesIn(dist_wtd_avg_sse2_tests));
+#endif  // HAVE_SSE2
+
+#if HAVE_SSE3
+// Only functions are x3, which do not have tests.
+#endif  // HAVE_SSE3
+
+#if HAVE_SSE4_1
+// Only functions are x8, which do not have tests.
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+const SadMxNParam avx2_tests[] = {
+  make_tuple(64, 128, &aom_sad64x128_avx2, -1),
+  make_tuple(128, 64, &aom_sad128x64_avx2, -1),
+  make_tuple(128, 128, &aom_sad128x128_avx2, -1),
+  make_tuple(64, 64, &aom_sad64x64_avx2, -1),
+  make_tuple(64, 32, &aom_sad64x32_avx2, -1),
+  make_tuple(32, 64, &aom_sad32x64_avx2, -1),
+  make_tuple(32, 32, &aom_sad32x32_avx2, -1),
+  make_tuple(32, 16, &aom_sad32x16_avx2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 12),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 12),
+
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad64x16_avx2, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16_avx2, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16_avx2, 12),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avx2, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avx2, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avx2, 12),
+  make_tuple(32, 8, &aom_highbd_sad32x8_avx2, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8_avx2, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8_avx2, 12),
+  make_tuple(16, 4, &aom_highbd_sad16x4_avx2, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4_avx2, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4_avx2, 12),
+#endif
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests));
+
+const SadSkipMxNParam skip_avx2_tests[] = {
+  make_tuple(128, 128, &aom_sad_skip_128x128_avx2, -1),
+  make_tuple(128, 64, &aom_sad_skip_128x64_avx2, -1),
+  make_tuple(64, 128, &aom_sad_skip_64x128_avx2, -1),
+  make_tuple(64, 64, &aom_sad_skip_64x64_avx2, -1),
+  make_tuple(64, 32, &aom_sad_skip_64x32_avx2, -1),
+  make_tuple(32, 64, &aom_sad_skip_32x64_avx2, -1),
+  make_tuple(32, 32, &aom_sad_skip_32x32_avx2, -1),
+  make_tuple(32, 16, &aom_sad_skip_32x16_avx2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128_avx2, 8),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64_avx2, 8),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128_avx2, 8),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64_avx2, 8),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32_avx2, 8),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64_avx2, 8),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32_avx2, 8),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16_avx2, 8),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32_avx2, 8),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16_avx2, 8),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8_avx2, 8),
+
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128_avx2, 10),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64_avx2, 10),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128_avx2, 10),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64_avx2, 10),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32_avx2, 10),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64_avx2, 10),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32_avx2, 10),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16_avx2, 10),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32_avx2, 10),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16_avx2, 10),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8_avx2, 10),
+
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128_avx2, 12),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64_avx2, 12),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128_avx2, 12),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64_avx2, 12),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32_avx2, 12),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64_avx2, 12),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32_avx2, 12),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16_avx2, 12),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32_avx2, 12),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16_avx2, 12),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8_avx2, 12),
+
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64_avx2, 8),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64_avx2, 10),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64_avx2, 12),
+#endif
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, SADSkipTest,
+                         ::testing::ValuesIn(skip_avx2_tests));
+
+const SadMxNAvgParam avg_avx2_tests[] = {
+  make_tuple(64, 128, &aom_sad64x128_avg_avx2, -1),
+  make_tuple(128, 64, &aom_sad128x64_avg_avx2, -1),
+  make_tuple(128, 128, &aom_sad128x128_avg_avx2, -1),
+  make_tuple(64, 64, &aom_sad64x64_avg_avx2, -1),
+  make_tuple(64, 32, &aom_sad64x32_avg_avx2, -1),
+  make_tuple(32, 64, &aom_sad32x64_avg_avx2, -1),
+  make_tuple(32, 32, &aom_sad32x32_avg_avx2, -1),
+  make_tuple(32, 16, &aom_sad32x16_avg_avx2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 12),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 12),
+
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad64x16_avg_avx2, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16_avg_avx2, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16_avg_avx2, 12),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avg_avx2, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avg_avx2, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64_avg_avx2, 12),
+  make_tuple(32, 8, &aom_highbd_sad32x8_avg_avx2, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8_avg_avx2, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8_avg_avx2, 12),
+  make_tuple(16, 4, &aom_highbd_sad16x4_avg_avx2, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4_avg_avx2, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4_avg_avx2, 12),
+#endif
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, SADavgTest, ::testing::ValuesIn(avg_avx2_tests));
+
+const SadSkipMxNx4Param skip_x4d_avx2_tests[] = {
+  make_tuple(128, 128, &aom_sad_skip_128x128x4d_avx2, -1),
+  make_tuple(128, 64, &aom_sad_skip_128x64x4d_avx2, -1),
+  make_tuple(64, 128, &aom_sad_skip_64x128x4d_avx2, -1),
+  make_tuple(64, 64, &aom_sad_skip_64x64x4d_avx2, -1),
+  make_tuple(64, 32, &aom_sad_skip_64x32x4d_avx2, -1),
+  make_tuple(32, 64, &aom_sad_skip_32x64x4d_avx2, -1),
+  make_tuple(32, 32, &aom_sad_skip_32x32x4d_avx2, -1),
+  make_tuple(32, 16, &aom_sad_skip_32x16x4d_avx2, -1),
+  make_tuple(16, 32, &aom_sad_skip_16x32x4d_avx2, -1),
+  make_tuple(16, 16, &aom_sad_skip_16x16x4d_avx2, -1),
+  make_tuple(16, 8, &aom_sad_skip_16x8x4d_avx2, -1),
+
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_avx2, 8),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_avx2, 8),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_avx2, 8),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_avx2, 8),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_avx2, 8),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_avx2, 8),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_avx2, 8),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_avx2, 8),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_avx2, 8),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_avx2, 8),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_avx2, 8),
+
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_avx2, 10),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_avx2, 10),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_avx2, 10),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_avx2, 10),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_avx2, 10),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_avx2, 10),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_avx2, 10),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_avx2, 10),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_avx2, 10),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_avx2, 10),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_avx2, 10),
+
+  make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_avx2, 12),
+  make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_avx2, 12),
+  make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_avx2, 12),
+  make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_avx2, 12),
+  make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_avx2, 12),
+  make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_avx2, 12),
+  make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_avx2, 12),
+  make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_avx2, 12),
+  make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_avx2, 12),
+  make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_avx2, 12),
+  make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_avx2, 12),
+
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_avx2, 8),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_avx2, 8),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_avx2, 8),
+
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_avx2, 10),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_avx2, 10),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_avx2, 10),
+
+  make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_avx2, 12),
+  make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_avx2, 12),
+  make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_avx2, 12),
+#endif
+#endif
+
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(64, 16, &aom_sad_skip_64x16x4d_avx2, -1),
+  make_tuple(32, 8, &aom_sad_skip_32x8x4d_avx2, -1),
+
+  make_tuple(16, 64, &aom_sad_skip_16x64x4d_avx2, -1),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, SADSkipx4Test,
+                         ::testing::ValuesIn(skip_x4d_avx2_tests));
+
+const SadMxNx4Param x4d_avx2_tests[] = {
+  make_tuple(16, 32, &aom_sad16x32x4d_avx2, -1),
+  make_tuple(16, 16, &aom_sad16x16x4d_avx2, -1),
+  make_tuple(16, 8, &aom_sad16x8x4d_avx2, -1),
+  make_tuple(32, 64, &aom_sad32x64x4d_avx2, -1),
+  make_tuple(32, 32, &aom_sad32x32x4d_avx2, -1),
+  make_tuple(32, 16, &aom_sad32x16x4d_avx2, -1),
+  make_tuple(64, 128, &aom_sad64x128x4d_avx2, -1),
+  make_tuple(64, 64, &aom_sad64x64x4d_avx2, -1),
+  make_tuple(64, 32, &aom_sad64x32x4d_avx2, -1),
+  make_tuple(128, 128, &aom_sad128x128x4d_avx2, -1),
+  make_tuple(128, 64, &aom_sad128x64x4d_avx2, -1),
+
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(16, 64, &aom_sad16x64x4d_avx2, -1),
+  make_tuple(16, 4, &aom_sad16x4x4d_avx2, -1),
+  make_tuple(32, 8, &aom_sad32x8x4d_avx2, -1),
+  make_tuple(64, 16, &aom_sad64x16x4d_avx2, -1),
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 12),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 12),
+
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(16, 64, &aom_highbd_sad16x64x4d_avx2, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64x4d_avx2, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64x4d_avx2, 12),
+  make_tuple(64, 16, &aom_highbd_sad64x16x4d_avx2, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16x4d_avx2, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16x4d_avx2, 12),
+  make_tuple(32, 8, &aom_highbd_sad32x8x4d_avx2, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8x4d_avx2, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8x4d_avx2, 12),
+  make_tuple(16, 4, &aom_highbd_sad16x4x4d_avx2, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4x4d_avx2, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4x4d_avx2, 12),
+#endif
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
+
+const SadMxNx4Param x3d_avx2_tests[] = {
+  make_tuple(32, 64, &aom_sad32x64x3d_avx2, -1),
+  make_tuple(32, 32, &aom_sad32x32x3d_avx2, -1),
+  make_tuple(32, 16, &aom_sad32x16x3d_avx2, -1),
+  make_tuple(64, 128, &aom_sad64x128x3d_avx2, -1),
+  make_tuple(64, 64, &aom_sad64x64x3d_avx2, -1),
+  make_tuple(64, 32, &aom_sad64x32x3d_avx2, -1),
+  make_tuple(128, 128, &aom_sad128x128x3d_avx2, -1),
+  make_tuple(128, 64, &aom_sad128x64x3d_avx2, -1),
+
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(32, 8, &aom_sad32x8x3d_avx2, -1),
+  make_tuple(64, 16, &aom_sad64x16x3d_avx2, -1),
+#endif  // !CONFIG_REALTIME_ONLY
+
+#if CONFIG_AV1_HIGHBITDEPTH
+  make_tuple(128, 128, &aom_highbd_sad128x128x3d_avx2, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128x3d_avx2, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128x3d_avx2, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64x3d_avx2, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64x3d_avx2, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64x3d_avx2, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128x3d_avx2, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128x3d_avx2, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128x3d_avx2, 12),
+  make_tuple(64, 64, &aom_highbd_sad64x64x3d_avx2, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64x3d_avx2, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64x3d_avx2, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32x3d_avx2, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32x3d_avx2, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32x3d_avx2, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64x3d_avx2, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64x3d_avx2, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64x3d_avx2, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32x3d_avx2, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32x3d_avx2, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32x3d_avx2, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16x3d_avx2, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16x3d_avx2, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16x3d_avx2, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32x3d_avx2, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32x3d_avx2, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32x3d_avx2, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16x3d_avx2, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16x3d_avx2, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16x3d_avx2, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8x3d_avx2, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8x3d_avx2, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8x3d_avx2, 12),
+
+#if !CONFIG_REALTIME_ONLY
+  make_tuple(16, 64, &aom_highbd_sad16x64x3d_avx2, 8),
+  make_tuple(16, 64, &aom_highbd_sad16x64x3d_avx2, 10),
+  make_tuple(16, 64, &aom_highbd_sad16x64x3d_avx2, 12),
+  make_tuple(64, 16, &aom_highbd_sad64x16x3d_avx2, 8),
+  make_tuple(64, 16, &aom_highbd_sad64x16x3d_avx2, 10),
+  make_tuple(64, 16, &aom_highbd_sad64x16x3d_avx2, 12),
+  make_tuple(32, 8, &aom_highbd_sad32x8x3d_avx2, 8),
+  make_tuple(32, 8, &aom_highbd_sad32x8x3d_avx2, 10),
+  make_tuple(32, 8, &aom_highbd_sad32x8x3d_avx2, 12),
+  make_tuple(16, 4, &aom_highbd_sad16x4x3d_avx2, 8),
+  make_tuple(16, 4, &aom_highbd_sad16x4x3d_avx2, 10),
+  make_tuple(16, 4, &aom_highbd_sad16x4x3d_avx2, 12),
+#endif  // !CONFIG_REALTIME_ONLY
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, SADx3Test, ::testing::ValuesIn(x3d_avx2_tests));
+#endif  // HAVE_AVX2
+
+}  // namespace
diff --git a/third_party/aom/test/sb_multipass_test.cc b/third_party/aom/test/sb_multipass_test.cc
new file mode 100644
index 0000000000..e27a2c60ee
--- /dev/null
+++ b/third_party/aom/test/sb_multipass_test.cc
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2020, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <initializer_list>
+#include <string>
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+class AV1SBMultipassTest
+    : public ::libaom_test::CodecTestWith2Params<int, bool>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  AV1SBMultipassTest()
+      : EncoderTest(GET_PARAM(0)), set_cpu_used_(GET_PARAM(1)),
+        row_mt_(GET_PARAM(2)) {
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.w = 1280;
+    cfg.h = 720;
+    cfg.allow_lowbitdepth = 1;
+    decoder_ = codec_->CreateDecoder(cfg, 0);
+    if (decoder_->IsAV1()) {
+      decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+      decoder_->Control(AV1_SET_DECODE_TILE_COL, -1);
+    }
+
+    size_enc_.clear();
+    md5_dec_.clear();
+    md5_enc_.clear();
+  }
+  ~AV1SBMultipassTest() override { delete decoder_; }
+
+  void SetUp() override {
+    InitializeConfig(::libaom_test::kTwoPassGood);
+
+    cfg_.g_lag_in_frames = 5;
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.rc_2pass_vbr_minsection_pct = 5;
+    cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_min_quantizer = 0;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      SetTileSize(encoder);
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AV1E_ENABLE_SB_MULTIPASS_UNIT_TEST, use_multipass_);
+      encoder->Control(AV1E_SET_ROW_MT, row_mt_);
+
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+    }
+  }
+
+  virtual void SetTileSize(libaom_test::Encoder *encoder) {
+    encoder->Control(AV1E_SET_TILE_COLUMNS, 1);
+    encoder->Control(AV1E_SET_TILE_ROWS, 1);
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    size_enc_.push_back(pkt->data.frame.sz);
+
+    ::libaom_test::MD5 md5_enc;
+    md5_enc.Add(reinterpret_cast<uint8_t *>(pkt->data.frame.buf),
+                pkt->data.frame.sz);
+    md5_enc_.push_back(md5_enc.Get());
+
+    const aom_codec_err_t res = decoder_->DecodeFrame(
+        reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz);
+    if (res != AOM_CODEC_OK) {
+      abort_ = true;
+      ASSERT_EQ(AOM_CODEC_OK, res);
+    }
+    const aom_image_t *img = decoder_->GetDxData().Next();
+
+    if (img) {
+      ::libaom_test::MD5 md5_res;
+      md5_res.Add(img);
+      md5_dec_.push_back(md5_res.Get());
+    }
+  }
+
+  void DoTest() {
+    ::libaom_test::YUVVideoSource video(
+        "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, 640, 480, 30, 1, 0, 6);
+    cfg_.rc_target_bitrate = 1000;
+
+    // Encode while coding each sb once
+    use_multipass_ = false;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    std::vector<size_t> single_pass_size_enc;
+    std::vector<std::string> single_pass_md5_enc;
+    std::vector<std::string> single_pass_md5_dec;
+    single_pass_size_enc = size_enc_;
+    single_pass_md5_enc = md5_enc_;
+    single_pass_md5_dec = md5_dec_;
+    size_enc_.clear();
+    md5_enc_.clear();
+    md5_dec_.clear();
+
+    // Encode while coding each sb twice
+    use_multipass_ = true;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    std::vector<size_t> multi_pass_size_enc;
+    std::vector<std::string> multi_pass_md5_enc;
+    std::vector<std::string> multi_pass_md5_dec;
+    multi_pass_size_enc = size_enc_;
+    multi_pass_md5_enc = md5_enc_;
+    multi_pass_md5_dec = md5_dec_;
+    size_enc_.clear();
+    md5_enc_.clear();
+    md5_dec_.clear();
+
+    // Check that the vectors are equal.
+    ASSERT_EQ(single_pass_size_enc, multi_pass_size_enc);
+    ASSERT_EQ(single_pass_md5_enc, multi_pass_md5_enc);
+    ASSERT_EQ(single_pass_md5_dec, multi_pass_md5_dec);
+  }
+
+  bool use_multipass_;
+  int set_cpu_used_;
+  bool row_mt_;
+  ::libaom_test::Decoder *decoder_;
+  std::vector<size_t> size_enc_;
+  std::vector<std::string> md5_enc_;
+  std::vector<std::string> md5_dec_;
+};
+
+TEST_P(AV1SBMultipassTest, TwoPassMatchTest) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_SUITE(AV1SBMultipassTest, ::testing::Range(4, 6),
+                           ::testing::Bool());
+
+}  // namespace
diff --git a/third_party/aom/test/sb_qp_sweep_test.cc b/third_party/aom/test/sb_qp_sweep_test.cc
new file mode 100644
index 0000000000..6c76a40b2a
--- /dev/null
+++ b/third_party/aom/test/sb_qp_sweep_test.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <initializer_list>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/yuv_video_source.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+// Parameters: cpu-used, row-mt.
+class AV1SBQPSweepTest : public ::libaom_test::CodecTestWith2Params<int, bool>,
+                         public ::libaom_test::EncoderTest {
+ protected:
+  AV1SBQPSweepTest()
+      : EncoderTest(GET_PARAM(0)), set_cpu_used_(GET_PARAM(1)),
+        row_mt_(GET_PARAM(2)) {
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.w = 1280;
+    cfg.h = 720;
+    cfg.allow_lowbitdepth = 1;
+    decoder_ =
+        std::unique_ptr<::libaom_test::Decoder>(codec_->CreateDecoder(cfg, 0));
+  }
+  ~AV1SBQPSweepTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(::libaom_test::kTwoPassGood);
+
+    ASSERT_NE(decoder_, nullptr);
+    if (decoder_->IsAV1()) {
+      decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+      decoder_->Control(AV1_SET_DECODE_TILE_COL, -1);
+    }
+
+    cfg_.g_lag_in_frames = 5;
+    cfg_.rc_end_usage = AOM_Q;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      SetTileSize(encoder);
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AV1E_ENABLE_SB_QP_SWEEP, use_sb_sweep_);
+      encoder->Control(AV1E_SET_ROW_MT, row_mt_);
+
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+    }
+  }
+
+  virtual void SetTileSize(libaom_test::Encoder *encoder) {
+    encoder->Control(AV1E_SET_TILE_COLUMNS, 1);
+    encoder->Control(AV1E_SET_TILE_ROWS, 1);
+  }
+
+  void BeginPassHook(unsigned int) override {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  double GetAverageFrameSize() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    sum_frame_size_ += pkt->data.frame.sz;
+
+    const aom_codec_err_t res = decoder_->DecodeFrame(
+        reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz);
+    if (res != AOM_CODEC_OK) {
+      abort_ = true;
+      ASSERT_EQ(AOM_CODEC_OK, res);
+    }
+  }
+
+  void DoTest() {
+    ::libaom_test::YUVVideoSource video(
+        "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, 640, 480, 30, 1, 0, 6);
+    cfg_.rc_target_bitrate = 1000;
+
+    // Encode without sb_qp_sweep
+    use_sb_sweep_ = false;
+    sum_frame_size_ = 0;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    const double psnr_1 = GetAveragePsnr();
+    const size_t avg_frame_size_1 = sum_frame_size_ / nframes_;
+
+    // Encode with sb_qp_sweep
+    use_sb_sweep_ = true;
+    sum_frame_size_ = 0;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    const double psnr_2 = GetAveragePsnr();
+    const size_t avg_frame_size_2 = sum_frame_size_ / nframes_;
+
+    if (psnr_1 >= psnr_2) {
+      ASSERT_GE(avg_frame_size_1, avg_frame_size_2);
+    }
+    if (avg_frame_size_1 <= avg_frame_size_2) {
+      ASSERT_LE(psnr_1, psnr_2);
+    }
+  }
+
+  bool use_sb_sweep_;
+  int set_cpu_used_;
+  bool row_mt_;
+  double psnr_;
+  unsigned int nframes_;
+  size_t sum_frame_size_;
+  std::unique_ptr<::libaom_test::Decoder> decoder_;
+};
+
+TEST_P(AV1SBQPSweepTest, SweepMatchTest) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_SUITE(AV1SBQPSweepTest, ::testing::Range(4, 6),
+                           ::testing::Bool());
+
+}  // namespace
diff --git a/third_party/aom/test/scalability_test.cc b/third_party/aom/test/scalability_test.cc
new file mode 100644
index 0000000000..12cb03cac4
--- /dev/null
+++ b/third_party/aom/test/scalability_test.cc
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+const int kCpuUsed = 8;
+const int kBaseLayerQp = 55;
+const int kEnhancementLayerQp = 20;
+
+class ScalabilityTest
+    : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  ScalabilityTest() : EncoderTest(GET_PARAM(0)) {}
+  ~ScalabilityTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(GET_PARAM(1));
+    num_spatial_layers_ = 2;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
+      encoder->Control(AOME_SET_NUMBER_SPATIAL_LAYERS, num_spatial_layers_);
+    }
+    if (video->frame() % num_spatial_layers_) {
+      frame_flags_ = AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
+                     AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF |
+                     AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 |
+                     AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
+                     AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY;
+      encoder->Control(AOME_SET_SPATIAL_LAYER_ID, 1);
+      encoder->Control(AOME_SET_CQ_LEVEL, kEnhancementLayerQp);
+    } else {
+      frame_flags_ = AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
+                     AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF |
+                     AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 |
+                     AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF |
+                     AOM_EFLAG_NO_UPD_ENTROPY;
+      encoder->Control(AOME_SET_SPATIAL_LAYER_ID, 0);
+      encoder->Control(AOME_SET_CQ_LEVEL, kBaseLayerQp);
+    }
+  }
+
+  void DoTest(int num_spatial_layers) {
+    num_spatial_layers_ = num_spatial_layers;
+    cfg_.rc_end_usage = AOM_Q;
+    cfg_.g_lag_in_frames = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 18);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  int num_spatial_layers_;
+};
+
+TEST_P(ScalabilityTest, TestNoMismatch2SpatialLayers) { DoTest(2); }
+
+TEST_P(ScalabilityTest, TestNoMismatch3SpatialLayers) { DoTest(3); }
+
+AV1_INSTANTIATE_TEST_SUITE(ScalabilityTest,
+                           ::testing::Values(::libaom_test::kRealTime));
+
+}  // namespace
diff --git a/third_party/aom/test/scan_test.cc b/third_party/aom/test/scan_test.cc
new file mode 100644
index 0000000000..571658ee0a
--- /dev/null
+++ b/third_party/aom/test/scan_test.cc
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "av1/common/scan.h"
+#include "av1/common/txb_common.h"
+#include "test/av1_txfm_test.h"
+
+static int scan_test(const int16_t *scan, const int16_t *iscan, int si, int r,
+                     int c, int h) {
+  if (iscan[c * h + r] != si || scan[si] != c * h + r) {
+    printf("r %d c %d ref_iscan %d iscan %d ref_scan %d scan %d\n", r, c, si,
+           iscan[c * h + r], c * h + r, scan[si]);
+    return 1;
+  } else {
+    return 0;
+  }
+}
+
+int scan_order_test(const SCAN_ORDER *scan_order, int w, int h,
+                    SCAN_MODE mode) {
+  const int16_t *scan = scan_order->scan;
+  const int16_t *iscan = scan_order->iscan;
+  int dim = w + h - 1;
+  if (mode == SCAN_MODE_ZIG_ZAG) {
+    int si = 0;
+    for (int i = 0; i < dim; ++i) {
+      if (i % 2 == 0) {
+        for (int c = 0; c < w; ++c) {
+          int r = i - c;
+          if (r >= 0 && r < h) {
+            if (scan_test(scan, iscan, si, r, c, h)) return 1;
+            ++si;
+          }
+        }
+      } else {
+        for (int r = 0; r < h; ++r) {
+          int c = i - r;
+          if (c >= 0 && c < w) {
+            if (scan_test(scan, iscan, si, r, c, h)) return 1;
+            ++si;
+          }
+        }
+      }
+    }
+  } else if (mode == SCAN_MODE_COL_DIAG) {
+    int si = 0;
+    for (int i = 0; i < dim; ++i) {
+      for (int c = 0; c < w; ++c) {
+        int r = i - c;
+        if (r >= 0 && r < h) {
+          if (scan_test(scan, iscan, si, r, c, h)) return 1;
+          ++si;
+        }
+      }
+    }
+  } else if (mode == SCAN_MODE_ROW_DIAG) {
+    int si = 0;
+    for (int i = 0; i < dim; ++i) {
+      for (int r = 0; r < h; ++r) {
+        int c = i - r;
+        if (c >= 0 && c < w) {
+          if (scan_test(scan, iscan, si, r, c, h)) return 1;
+          ++si;
+        }
+      }
+    }
+  } else if (mode == SCAN_MODE_ROW_1D) {
+    int si = 0;
+    for (int r = 0; r < h; ++r) {
+      for (int c = 0; c < w; ++c) {
+        if (scan_test(scan, iscan, si, r, c, h)) return 1;
+        ++si;
+      }
+    }
+  } else {
+    assert(mode == SCAN_MODE_COL_1D);
+    int si = 0;
+    for (int c = 0; c < w; ++c) {
+      for (int r = 0; r < h; ++r) {
+        if (scan_test(scan, iscan, si, r, c, h)) return 1;
+        ++si;
+      }
+    }
+  }
+  return 0;
+}
+
+TEST(Av1ScanTest, Dependency) {
+  for (int tx_size = TX_4X4; tx_size < TX_SIZES_ALL; ++tx_size) {
+    const int org_rows = tx_size_high[(TX_SIZE)tx_size];
+    const int org_cols = tx_size_wide[(TX_SIZE)tx_size];
+    const int rows = get_txb_high((TX_SIZE)tx_size);
+    const int cols = get_txb_wide((TX_SIZE)tx_size);
+    for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+      if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(tx_size),
+                                         static_cast<TX_TYPE>(tx_type)) ==
+          false) {
+        continue;
+      }
+      SCAN_MODE scan_mode;
+      TX_CLASS tx_class = tx_type_to_class[(TX_TYPE)tx_type];
+      if (tx_class == TX_CLASS_2D) {
+        if (rows == cols) {
+          scan_mode = SCAN_MODE_ZIG_ZAG;
+        } else if (rows > cols) {
+          scan_mode = SCAN_MODE_ROW_DIAG;
+        } else {
+          scan_mode = SCAN_MODE_COL_DIAG;
+        }
+      } else if (tx_class == TX_CLASS_VERT) {
+        scan_mode = SCAN_MODE_ROW_1D;
+      } else {
+        assert(tx_class == TX_CLASS_HORIZ);
+        scan_mode = SCAN_MODE_COL_1D;
+      }
+      const SCAN_ORDER *scan_order =
+          get_default_scan((TX_SIZE)tx_size, (TX_TYPE)tx_type);
+      ASSERT_EQ(scan_order_test(scan_order, cols, rows, scan_mode), 0)
+          << "scan mismatch tx_class " << tx_class << " tx_type " << tx_type
+          << " tx_w " << org_cols << " tx_h " << org_rows << " scan_mode "
+          << scan_mode << "\n";
+    }
+  }
+}
diff --git a/third_party/aom/test/screen_content_test.cc b/third_party/aom/test/screen_content_test.cc
new file mode 100644
index 0000000000..974c50b3c6
--- /dev/null
+++ b/third_party/aom/test/screen_content_test.cc
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2020, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include "aom/aom_codec.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/y4m_video_source.h"
+#include "test/util.h"
+
+namespace {
+// This class is used to validate if screen_content_tools are turned on
+// appropriately.
+class ScreenContentToolsTestLarge
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode,
+                                                 aom_rc_mode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  ScreenContentToolsTestLarge()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        rc_end_usage_(GET_PARAM(2)) {
+    is_screen_content_violated_ = true;
+    tune_content_ = AOM_CONTENT_DEFAULT;
+  }
+  ~ScreenContentToolsTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = rc_end_usage_;
+    cfg_.g_threads = 1;
+    cfg_.g_lag_in_frames = 35;
+    cfg_.rc_target_bitrate = 1000;
+    cfg_.g_profile = 0;
+  }
+
+  bool DoDecode() const override { return true; }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 5);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AV1E_SET_TUNE_CONTENT, tune_content_);
+    }
+  }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    if (AOM_CODEC_OK == res_dec) {
+      aom_codec_ctx_t *ctx_dec = decoder->GetDecoder();
+      aom_screen_content_tools_info sc_info;
+
+      AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_SCREEN_CONTENT_TOOLS_INFO,
+                                    &sc_info);
+      if (sc_info.allow_screen_content_tools == 1) {
+        is_screen_content_violated_ = false;
+      }
+    }
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  ::libaom_test::TestMode encoding_mode_;
+  bool is_screen_content_violated_;
+  int tune_content_;
+  aom_rc_mode rc_end_usage_;
+};
+
+TEST_P(ScreenContentToolsTestLarge, ScreenContentToolsTest) {
+  // force screen content tools on
+  ::libaom_test::Y4mVideoSource video_nonsc("park_joy_90p_8_444.y4m", 0, 1);
+  cfg_.g_profile = 1;
+  tune_content_ = AOM_CONTENT_SCREEN;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video_nonsc));
+  ASSERT_EQ(is_screen_content_violated_, false)
+      << "Failed for tune_content_ = AOM_CONTENT_SCREEN";
+
+  // Don't force screen content, however as the input is screen content
+  // allow_screen_content_tools should still be turned on
+  ::libaom_test::Y4mVideoSource video_sc("desktop_credits.y4m", 0, 1);
+  cfg_.g_profile = 1;
+  is_screen_content_violated_ = true;
+  tune_content_ = AOM_CONTENT_DEFAULT;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video_sc));
+  ASSERT_EQ(is_screen_content_violated_, false)
+      << "Failed detection of screen content";
+
+  // TODO(anyone): Enable below test once low resolution screen content
+  // detection issues are fixed.
+  // low resolution test
+  //  ::libaom_test::Y4mVideoSource video_sc("screendata.y4m", 0, 1);
+  //  cfg_.g_profile = 0;
+  //  is_screen_content_violated_ = true;
+  //  tune_content_ = AOM_CONTENT_DEFAULT;
+  //  ASSERT_NO_FATAL_FAILURE(RunLoop(&video_sc));
+  //  ASSERT_EQ(is_screen_content_violated_, false)
+  //      << "Failed detection of screen content(lowres)";
+}
+
+AV1_INSTANTIATE_TEST_SUITE(ScreenContentToolsTestLarge,
+                           ::testing::Values(::libaom_test::kOnePassGood,
+                                             ::libaom_test::kTwoPassGood),
+                           ::testing::Values(AOM_Q));
+
+class ScreenContentToolsMultiThreadTestLarge
+    : public ScreenContentToolsTestLarge {};
+
+TEST_P(ScreenContentToolsMultiThreadTestLarge, ScreenContentToolsTest) {
+  // Don't force screen content, however as the input is screen content
+  // allow_screen_content_tools should still be turned on even with
+  // multi-threaded encoding.
+  ::libaom_test::Y4mVideoSource video_sc("desktop_credits.y4m", 0, 10);
+  cfg_.g_profile = 1;
+  cfg_.g_threads = 4;
+  is_screen_content_violated_ = true;
+  tune_content_ = AOM_CONTENT_DEFAULT;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video_sc));
+  ASSERT_EQ(is_screen_content_violated_, false)
+      << "Failed detection of screen content";
+}
+
+AV1_INSTANTIATE_TEST_SUITE(ScreenContentToolsMultiThreadTestLarge,
+                           ::testing::Values(::libaom_test::kOnePassGood,
+                                             ::libaom_test::kTwoPassGood),
+                           ::testing::Values(AOM_Q));
+}  // namespace
diff --git a/third_party/aom/test/segment_binarization_sync.cc b/third_party/aom/test/segment_binarization_sync.cc
new file mode 100644
index 0000000000..bd8cf11410
--- /dev/null
+++ b/third_party/aom/test/segment_binarization_sync.cc
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+
+using libaom_test::ACMRandom;
+
+extern "C" {
+int av1_neg_interleave(int x, int ref, int max);
+int av1_neg_deinterleave(int diff, int ref, int max);
+}
+
+namespace {
+
+struct Segment {
+  int id;
+  int pred;
+  int last_id;
+};
+
+Segment GenerateSegment(int seed) {
+  static const int MAX_SEGMENTS = 8;
+
+  ACMRandom rnd_(seed);
+
+  Segment segment;
+  const int last_segid = rnd_.PseudoUniform(MAX_SEGMENTS);
+  segment.last_id = last_segid;
+  segment.pred = rnd_.PseudoUniform(MAX_SEGMENTS);
+  segment.id = rnd_.PseudoUniform(last_segid + 1);
+
+  return segment;
+}
+
+// Try to reveal a mismatch between segment binarization and debinarization
+TEST(SegmentBinarizationSync, SearchForBinarizationMismatch) {
+  const int count_tests = 1000;
+  const int seed_init = 4321;
+
+  for (int i = 0; i < count_tests; ++i) {
+    const Segment seg = GenerateSegment(seed_init + i);
+
+    const int max_segid = seg.last_id + 1;
+    const int seg_diff = av1_neg_interleave(seg.id, seg.pred, max_segid);
+    const int decoded_segid =
+        av1_neg_deinterleave(seg_diff, seg.pred, max_segid);
+
+    ASSERT_EQ(decoded_segid, seg.id);
+  }
+}
+
+}  // namespace
diff --git a/third_party/aom/test/selfguided_filter_test.cc b/third_party/aom/test/selfguided_filter_test.cc
new file mode 100644
index 0000000000..3dd513b6e0
--- /dev/null
+++ b/third_party/aom/test/selfguided_filter_test.cc
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <ctime>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/av1_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "aom_ports/aom_timer.h"
+#include "av1/common/mv.h"
+#include "av1/common/restoration.h"
+
+namespace {
+
+using libaom_test::ACMRandom;
+using std::make_tuple;
+using std::tuple;
+
+typedef int (*SgrFunc)(const uint8_t *dat8, int width, int height, int stride,
+                       int eps, const int *xqd, uint8_t *dst8, int dst_stride,
+                       int32_t *tmpbuf, int bit_depth, int highbd);
+
+// Test parameter list:
+//  <tst_fun_>
+typedef tuple<SgrFunc> FilterTestParam;
+
+class AV1SelfguidedFilterTest
+    : public ::testing::TestWithParam<FilterTestParam> {
+ public:
+  ~AV1SelfguidedFilterTest() override = default;
+  void SetUp() override {}
+
+ protected:
+  void RunSpeedTest() {
+    tst_fun_ = GET_PARAM(0);
+    const int pu_width = RESTORATION_PROC_UNIT_SIZE;
+    const int pu_height = RESTORATION_PROC_UNIT_SIZE;
+    const int width = 256, height = 256, stride = 288, out_stride = 288;
+    const int NUM_ITERS = 2000;
+    int i, j, k;
+
+    uint8_t *input_ =
+        (uint8_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint8_t));
+    ASSERT_NE(input_, nullptr);
+    uint8_t *output_ = (uint8_t *)aom_memalign(
+        32, out_stride * (height + 32) * sizeof(uint8_t));
+    ASSERT_NE(output_, nullptr);
+    int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
+    ASSERT_NE(tmpbuf, nullptr);
+    uint8_t *input = input_ + stride * 16 + 16;
+    uint8_t *output = output_ + out_stride * 16 + 16;
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    for (i = -16; i < height + 16; ++i)
+      for (j = -16; j < width + 16; ++j)
+        input[i * stride + j] = rnd.Rand16() & 0xFF;
+
+    int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
+                                                        SGRPROJ_PRJ_MIN0),
+                   SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
+                                                        SGRPROJ_PRJ_MIN1) };
+    // Fix a parameter set, since the speed depends slightly on r.
+    // Change this to test different combinations of values of r.
+    int eps = 15;
+
+    av1_loop_restoration_precal();
+
+    aom_usec_timer ref_timer;
+    aom_usec_timer_start(&ref_timer);
+    for (i = 0; i < NUM_ITERS; ++i) {
+      for (k = 0; k < height; k += pu_height)
+        for (j = 0; j < width; j += pu_width) {
+          int w = AOMMIN(pu_width, width - j);
+          int h = AOMMIN(pu_height, height - k);
+          uint8_t *input_p = input + k * stride + j;
+          uint8_t *output_p = output + k * out_stride + j;
+          const int ret_c = av1_apply_selfguided_restoration_c(
+              input_p, w, h, stride, eps, xqd, output_p, out_stride, tmpbuf, 8,
+              0);
+          ASSERT_EQ(ret_c, 0);
+        }
+    }
+    aom_usec_timer_mark(&ref_timer);
+    const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+    aom_usec_timer tst_timer;
+    aom_usec_timer_start(&tst_timer);
+    for (i = 0; i < NUM_ITERS; ++i) {
+      for (k = 0; k < height; k += pu_height)
+        for (j = 0; j < width; j += pu_width) {
+          int w = AOMMIN(pu_width, width - j);
+          int h = AOMMIN(pu_height, height - k);
+          uint8_t *input_p = input + k * stride + j;
+          uint8_t *output_p = output + k * out_stride + j;
+          const int ret_tst = tst_fun_(input_p, w, h, stride, eps, xqd,
+                                       output_p, out_stride, tmpbuf, 8, 0);
+          ASSERT_EQ(ret_tst, 0);
+        }
+    }
+    aom_usec_timer_mark(&tst_timer);
+    const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+    std::cout << "[          ] C time = " << ref_time / 1000
+              << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+    EXPECT_GT(ref_time, tst_time)
+        << "Error: AV1SelfguidedFilterTest.SpeedTest, SIMD slower than C.\n"
+        << "C time: " << ref_time << " us\n"
+        << "SIMD time: " << tst_time << " us\n";
+
+    aom_free(input_);
+    aom_free(output_);
+    aom_free(tmpbuf);
+  }
+
+  void RunCorrectnessTest() {
+    tst_fun_ = GET_PARAM(0);
+    const int pu_width = RESTORATION_PROC_UNIT_SIZE;
+    const int pu_height = RESTORATION_PROC_UNIT_SIZE;
+    // Set the maximum width/height to test here. We actually test a small
+    // range of sizes *up to* this size, so that we can check, eg.,
+    // the behaviour on tiles which are not a multiple of 4 wide.
+    const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
+    const int NUM_ITERS = 81;
+    int i, j, k;
+
+    uint8_t *input_ =
+        (uint8_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint8_t));
+    ASSERT_NE(input_, nullptr);
+    uint8_t *output_ = (uint8_t *)aom_memalign(
+        32, out_stride * (max_h + 32) * sizeof(uint8_t));
+    ASSERT_NE(output_, nullptr);
+    uint8_t *output2_ = (uint8_t *)aom_memalign(
+        32, out_stride * (max_h + 32) * sizeof(uint8_t));
+    ASSERT_NE(output2_, nullptr);
+    int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
+    ASSERT_NE(tmpbuf, nullptr);
+
+    uint8_t *input = input_ + stride * 16 + 16;
+    uint8_t *output = output_ + out_stride * 16 + 16;
+    uint8_t *output2 = output2_ + out_stride * 16 + 16;
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    av1_loop_restoration_precal();
+
+    for (i = 0; i < NUM_ITERS; ++i) {
+      for (j = -16; j < max_h + 16; ++j)
+        for (k = -16; k < max_w + 16; ++k)
+          input[j * stride + k] = rnd.Rand16() & 0xFF;
+
+      int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
+                                                          SGRPROJ_PRJ_MIN0),
+                     SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
+                                                          SGRPROJ_PRJ_MIN1) };
+      int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
+
+      // Test various tile sizes around 256x256
+      int test_w = max_w - (i / 9);
+      int test_h = max_h - (i % 9);
+
+      for (k = 0; k < test_h; k += pu_height)
+        for (j = 0; j < test_w; j += pu_width) {
+          int w = AOMMIN(pu_width, test_w - j);
+          int h = AOMMIN(pu_height, test_h - k);
+          uint8_t *input_p = input + k * stride + j;
+          uint8_t *output_p = output + k * out_stride + j;
+          uint8_t *output2_p = output2 + k * out_stride + j;
+          const int ret_tst = tst_fun_(input_p, w, h, stride, eps, xqd,
+                                       output_p, out_stride, tmpbuf, 8, 0);
+          ASSERT_EQ(ret_tst, 0);
+          const int ret_c = av1_apply_selfguided_restoration_c(
+              input_p, w, h, stride, eps, xqd, output2_p, out_stride, tmpbuf, 8,
+              0);
+          ASSERT_EQ(ret_c, 0);
+        }
+
+      for (j = 0; j < test_h; ++j)
+        for (k = 0; k < test_w; ++k) {
+          ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
+        }
+    }
+
+    aom_free(input_);
+    aom_free(output_);
+    aom_free(output2_);
+    aom_free(tmpbuf);
+  }
+
+ private:
+  SgrFunc tst_fun_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1SelfguidedFilterTest);
+
+TEST_P(AV1SelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); }
+TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AV1SelfguidedFilterTest,
+    ::testing::Values(av1_apply_selfguided_restoration_sse4_1));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1SelfguidedFilterTest,
+    ::testing::Values(av1_apply_selfguided_restoration_avx2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1SelfguidedFilterTest,
+    ::testing::Values(av1_apply_selfguided_restoration_neon));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+// Test parameter list:
+//  <tst_fun_, bit_depth>
+typedef tuple<SgrFunc, int> HighbdFilterTestParam;
+
+class AV1HighbdSelfguidedFilterTest
+    : public ::testing::TestWithParam<HighbdFilterTestParam> {
+ public:
+  ~AV1HighbdSelfguidedFilterTest() override = default;
+  void SetUp() override {}
+
+ protected:
+  void RunSpeedTest() {
+    tst_fun_ = GET_PARAM(0);
+    const int pu_width = RESTORATION_PROC_UNIT_SIZE;
+    const int pu_height = RESTORATION_PROC_UNIT_SIZE;
+    const int width = 256, height = 256, stride = 288, out_stride = 288;
+    const int NUM_ITERS = 2000;
+    int i, j, k;
+    int bit_depth = GET_PARAM(1);
+    int mask = (1 << bit_depth) - 1;
+
+    uint16_t *input_ =
+        (uint16_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint16_t));
+    ASSERT_NE(input_, nullptr);
+    uint16_t *output_ = (uint16_t *)aom_memalign(
+        32, out_stride * (height + 32) * sizeof(uint16_t));
+    ASSERT_NE(output_, nullptr);
+    int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
+    ASSERT_NE(tmpbuf, nullptr);
+    uint16_t *input = input_ + stride * 16 + 16;
+    uint16_t *output = output_ + out_stride * 16 + 16;
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    for (i = -16; i < height + 16; ++i)
+      for (j = -16; j < width + 16; ++j)
+        input[i * stride + j] = rnd.Rand16() & mask;
+
+    int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
+                                                        SGRPROJ_PRJ_MIN0),
+                   SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
+                                                        SGRPROJ_PRJ_MIN1) };
+    // Fix a parameter set, since the speed depends slightly on r.
+    // Change this to test different combinations of values of r.
+    int eps = 15;
+
+    av1_loop_restoration_precal();
+
+    aom_usec_timer ref_timer;
+    aom_usec_timer_start(&ref_timer);
+    for (i = 0; i < NUM_ITERS; ++i) {
+      for (k = 0; k < height; k += pu_height)
+        for (j = 0; j < width; j += pu_width) {
+          int w = AOMMIN(pu_width, width - j);
+          int h = AOMMIN(pu_height, height - k);
+          uint16_t *input_p = input + k * stride + j;
+          uint16_t *output_p = output + k * out_stride + j;
+          av1_apply_selfguided_restoration_c(
+              CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
+              CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, 1);
+        }
+    }
+    aom_usec_timer_mark(&ref_timer);
+    const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+    aom_usec_timer tst_timer;
+    aom_usec_timer_start(&tst_timer);
+    for (i = 0; i < NUM_ITERS; ++i) {
+      for (k = 0; k < height; k += pu_height)
+        for (j = 0; j < width; j += pu_width) {
+          int w = AOMMIN(pu_width, width - j);
+          int h = AOMMIN(pu_height, height - k);
+          uint16_t *input_p = input + k * stride + j;
+          uint16_t *output_p = output + k * out_stride + j;
+          tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
+                   CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth,
+                   1);
+        }
+    }
+    aom_usec_timer_mark(&tst_timer);
+    const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+    std::cout << "[          ] C time = " << ref_time / 1000
+              << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+    EXPECT_GT(ref_time, tst_time)
+        << "Error: AV1HighbdSelfguidedFilterTest.SpeedTest, SIMD slower than "
+           "C.\n"
+        << "C time: " << ref_time << " us\n"
+        << "SIMD time: " << tst_time << " us\n";
+
+    aom_free(input_);
+    aom_free(output_);
+    aom_free(tmpbuf);
+  }
+
+  void RunCorrectnessTest() {
+    tst_fun_ = GET_PARAM(0);
+    const int pu_width = RESTORATION_PROC_UNIT_SIZE;
+    const int pu_height = RESTORATION_PROC_UNIT_SIZE;
+    // Set the maximum width/height to test here. We actually test a small
+    // range of sizes *up to* this size, so that we can check, eg.,
+    // the behaviour on tiles which are not a multiple of 4 wide.
+    const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
+    const int NUM_ITERS = 81;
+    int i, j, k;
+    int bit_depth = GET_PARAM(1);
+    int mask = (1 << bit_depth) - 1;
+
+    uint16_t *input_ =
+        (uint16_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint16_t));
+    ASSERT_NE(input_, nullptr);
+    uint16_t *output_ = (uint16_t *)aom_memalign(
+        32, out_stride * (max_h + 32) * sizeof(uint16_t));
+    ASSERT_NE(output_, nullptr);
+    uint16_t *output2_ = (uint16_t *)aom_memalign(
+        32, out_stride * (max_h + 32) * sizeof(uint16_t));
+    ASSERT_NE(output2_, nullptr);
+    int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
+    ASSERT_NE(tmpbuf, nullptr);
+
+    uint16_t *input = input_ + stride * 16 + 16;
+    uint16_t *output = output_ + out_stride * 16 + 16;
+    uint16_t *output2 = output2_ + out_stride * 16 + 16;
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    av1_loop_restoration_precal();
+
+    for (i = 0; i < NUM_ITERS; ++i) {
+      for (j = -16; j < max_h + 16; ++j)
+        for (k = -16; k < max_w + 16; ++k)
+          input[j * stride + k] = rnd.Rand16() & mask;
+
+      int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
+                                                          SGRPROJ_PRJ_MIN0),
+                     SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
+                                                          SGRPROJ_PRJ_MIN1) };
+      int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
+
+      // Test various tile sizes around 256x256
+      int test_w = max_w - (i / 9);
+      int test_h = max_h - (i % 9);
+
+      for (k = 0; k < test_h; k += pu_height)
+        for (j = 0; j < test_w; j += pu_width) {
+          int w = AOMMIN(pu_width, test_w - j);
+          int h = AOMMIN(pu_height, test_h - k);
+          uint16_t *input_p = input + k * stride + j;
+          uint16_t *output_p = output + k * out_stride + j;
+          uint16_t *output2_p = output2 + k * out_stride + j;
+          tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
+                   CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth,
+                   1);
+          av1_apply_selfguided_restoration_c(
+              CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
+              CONVERT_TO_BYTEPTR(output2_p), out_stride, tmpbuf, bit_depth, 1);
+        }
+
+      for (j = 0; j < test_h; ++j)
+        for (k = 0; k < test_w; ++k)
+          ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
+    }
+
+    aom_free(input_);
+    aom_free(output_);
+    aom_free(output2_);
+    aom_free(tmpbuf);
+  }
+
+ private:
+  SgrFunc tst_fun_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdSelfguidedFilterTest);
+
+TEST_P(AV1HighbdSelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); }
+TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
+
+#if HAVE_SSE4_1
+const int highbd_params_sse4_1[] = { 8, 10, 12 };
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AV1HighbdSelfguidedFilterTest,
+    ::testing::Combine(
+        ::testing::Values(av1_apply_selfguided_restoration_sse4_1),
+        ::testing::ValuesIn(highbd_params_sse4_1)));
+#endif
+
+#if HAVE_AVX2
+const int highbd_params_avx2[] = { 8, 10, 12 };
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1HighbdSelfguidedFilterTest,
+    ::testing::Combine(::testing::Values(av1_apply_selfguided_restoration_avx2),
+                       ::testing::ValuesIn(highbd_params_avx2)));
+#endif
+
+#if HAVE_NEON
+const int highbd_params_neon[] = { 8, 10, 12 };
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1HighbdSelfguidedFilterTest,
+    ::testing::Combine(::testing::Values(av1_apply_selfguided_restoration_neon),
+                       ::testing::ValuesIn(highbd_params_neon)));
+#endif
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/set_maps.sh b/third_party/aom/test/set_maps.sh
new file mode 100755
index 0000000000..b79357a2b8
--- /dev/null
+++ b/third_party/aom/test/set_maps.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom set_maps example. To add new tests to this file,
+## do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to set_maps_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required, and set_maps must exist in
+# $LIBAOM_BIN_PATH.
+set_maps_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+  if [ -z "$(aom_tool_path set_maps)" ]; then
+    elog "set_maps not found. It must exist in LIBAOM_BIN_PATH or its parent."
+    return 1
+  fi
+}
+
+# Runs set_maps using the codec specified by $1.
+set_maps() {
+  local encoder="$(aom_tool_path set_maps)"
+  local codec="$1"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/set_maps_${codec}.ivf"
+
+  eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
+      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \
+      ${devnull} || return 1
+
+  [ -e "${output_file}" ] || return 1
+}
+
+set_maps_av1() {
+  if [ "$(av1_encode_available)" = "yes" ]; then
+    set_maps av1 || return 1
+  fi
+}
+
+set_maps_tests="set_maps_av1"
+
+run_tests set_maps_verify_environment "${set_maps_tests}"
diff --git a/third_party/aom/test/sharpness_test.cc b/third_party/aom/test/sharpness_test.cc
new file mode 100644
index 0000000000..64465c88eb
--- /dev/null
+++ b/third_party/aom/test/sharpness_test.cc
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <unordered_map>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+namespace {
+const unsigned int kFrames = 10;
+const int kBitrate = 500;
+const unsigned int kCqLevel = 18;
+
+// List of psnr thresholds for different test combinations
+// keys: test-mode, cpu-used, sharpness.
+const std::unordered_map<
+    int, std::unordered_map<int, std::unordered_map<int, double>>>
+    kPsnrThreshold = { { static_cast<int>(::libaom_test::kTwoPassGood),
+                         { { 2, { { 2, 37.6 }, { 5, 37.6 } } },
+                           { 4, { { 2, 37.5 }, { 5, 37.5 } } },
+                           { 6, { { 2, 37.5 }, { 5, 37.5 } } } } },
+                       { static_cast<int>(::libaom_test::kAllIntra),
+                         { { 3, { { 2, 42.2 }, { 5, 42.2 } } },
+                           { 6, { { 2, 41.8 }, { 4, 41.9 }, { 5, 41.9 } } },
+                           { 9, { { 2, 41.0 }, { 5, 41.0 } } } } } };
+
+// This class is used to test sharpness parameter configured through control
+// call using AOME_SET_SHARPNESS for different encoder configurations.
+class SharpnessTest
+    : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int,
+                                                 int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  SharpnessTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        cpu_used_(GET_PARAM(2)), sharpness_level_(GET_PARAM(3)), psnr_(0.0),
+        nframes_(0) {}
+
+  ~SharpnessTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    if (encoding_mode_ == ::libaom_test::kTwoPassGood) {
+      cfg_.rc_target_bitrate = kBitrate;
+      cfg_.g_lag_in_frames = 5;
+    }
+  }
+
+  void BeginPassHook(unsigned int) override {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AOME_SET_SHARPNESS, sharpness_level_);
+      if (encoding_mode_ == ::libaom_test::kTwoPassGood) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      } else if (encoding_mode_ == ::libaom_test::kAllIntra) {
+        encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel);
+      }
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  double GetPsnrThreshold() {
+    return kPsnrThreshold.at(encoding_mode_).at(cpu_used_).at(sharpness_level_);
+  }
+
+  void DoTest() {
+    init_flags_ = AOM_CODEC_USE_PSNR;
+
+    std::unique_ptr<libaom_test::VideoSource> video(
+        new libaom_test::Y4mVideoSource("paris_352_288_30.y4m", 0, kFrames));
+    ASSERT_NE(video, nullptr);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    const double psnr = GetAveragePsnr();
+    EXPECT_GT(psnr, GetPsnrThreshold())
+        << "encoding mode = " << encoding_mode_ << ", cpu used = " << cpu_used_
+        << ", sharpness level = " << sharpness_level_;
+  }
+
+ private:
+  const libaom_test::TestMode encoding_mode_;
+  const int cpu_used_;
+  const int sharpness_level_;
+  double psnr_;
+  unsigned int nframes_;
+};
+
+class SharpnessTestLarge : public SharpnessTest {};
+
+class SharpnessAllIntraTest : public SharpnessTest {};
+
+class SharpnessAllIntraTestLarge : public SharpnessTest {};
+
+TEST_P(SharpnessTestLarge, SharpnessPSNRTest) { DoTest(); }
+
+TEST_P(SharpnessAllIntraTest, SharpnessPSNRTest) { DoTest(); }
+
+TEST_P(SharpnessAllIntraTestLarge, SharpnessPSNRTest) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_SUITE(SharpnessTestLarge,
+                           ::testing::Values(::libaom_test::kTwoPassGood),
+                           ::testing::Values(2, 4, 6),  // cpu_used
+                           ::testing::Values(2, 5));    // sharpness level
+
+AV1_INSTANTIATE_TEST_SUITE(SharpnessAllIntraTest,
+                           ::testing::Values(::libaom_test::kAllIntra),
+                           ::testing::Values(6),   // cpu_used
+                           ::testing::Values(4));  // sharpness level
+
+AV1_INSTANTIATE_TEST_SUITE(SharpnessAllIntraTestLarge,
+                           ::testing::Values(::libaom_test::kAllIntra),
+                           ::testing::Values(3, 6, 9),  // cpu_used
+                           ::testing::Values(2, 5));    // sharpness level
+}  // namespace
diff --git a/third_party/aom/test/simd_avx2_test.cc b/third_party/aom/test/simd_avx2_test.cc
new file mode 100644
index 0000000000..8a012bff88
--- /dev/null
+++ b/third_party/aom/test/simd_avx2_test.cc
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#define ARCH AVX2
+#define ARCH_POSTFIX(name) name##_avx2
+#define SIMD_NAMESPACE simd_test_avx2
+#include "test/simd_impl.h"
diff --git a/third_party/aom/test/simd_cmp_avx2.cc b/third_party/aom/test/simd_cmp_avx2.cc
new file mode 100644
index 0000000000..cda632bcdf
--- /dev/null
+++ b/third_party/aom/test/simd_cmp_avx2.cc
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#define ARCH AVX2
+#define ARCH_POSTFIX(name) name##_avx2
+#define SIMD_NAMESPACE simd_test_avx2
+#include "test/simd_cmp_impl.h"
diff --git a/third_party/aom/test/simd_cmp_impl.h b/third_party/aom/test/simd_cmp_impl.h
new file mode 100644
index 0000000000..cf85a471cd
--- /dev/null
+++ b/third_party/aom/test/simd_cmp_impl.h
@@ -0,0 +1,2175 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include <string>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+// Inlining not forced for the compiler due to some tests calling
+// SIMD_INLINE functions via function pointers
+#undef SIMD_INLINE
+#define SIMD_INLINE static inline
+#include "aom_dsp/aom_simd.h"
+#include "aom_dsp/simd/v256_intrinsics_c.h"
+
+// Machine tuned code goes into this file. This file is included from
+// simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros
+// ARCH (=neon, sse2, ssse3, etc), SIMD_NAMESPACE and ARCH_POSTFIX().
+
+#ifdef _MSC_VER
+// Disable "value of intrinsic immediate argument 'value' is out of range
+// 'lowerbound - upperbound'" warning. Visual Studio emits this warning though
+// the parameters are conditionally checked in e.g., v256_shr_n_byte. Adding a
+// mask doesn't always appear to be sufficient.
+#pragma warning(disable : 4556)
+#endif
+
+using libaom_test::ACMRandom;
+
+namespace SIMD_NAMESPACE {
+
+// Wrap templates around intrinsics using immediate values
+template <int shift>
+v64 imm_v64_shl_n_byte(v64 a) {
+  return v64_shl_n_byte(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_byte(v64 a) {
+  return v64_shr_n_byte(a, shift);
+}
+template <int shift>
+v64 imm_v64_shl_n_8(v64 a) {
+  return v64_shl_n_8(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_u8(v64 a) {
+  return v64_shr_n_u8(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_s8(v64 a) {
+  return v64_shr_n_s8(a, shift);
+}
+template <int shift>
+v64 imm_v64_shl_n_16(v64 a) {
+  return v64_shl_n_16(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_u16(v64 a) {
+  return v64_shr_n_u16(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_s16(v64 a) {
+  return v64_shr_n_s16(a, shift);
+}
+template <int shift>
+v64 imm_v64_shl_n_32(v64 a) {
+  return v64_shl_n_32(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_u32(v64 a) {
+  return v64_shr_n_u32(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_s32(v64 a) {
+  return v64_shr_n_s32(a, shift);
+}
+template <int shift>
+v64 imm_v64_align(v64 a, v64 b) {
+  return v64_align(a, b, shift);
+}
+
+// Wrap templates around corresponding C implementations of the above
+template <int shift>
+c_v64 c_imm_v64_shl_n_byte(c_v64 a) {
+  return c_v64_shl_n_byte(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_byte(c_v64 a) {
+  return c_v64_shr_n_byte(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shl_n_8(c_v64 a) {
+  return c_v64_shl_n_8(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_u8(c_v64 a) {
+  return c_v64_shr_n_u8(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_s8(c_v64 a) {
+  return c_v64_shr_n_s8(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shl_n_16(c_v64 a) {
+  return c_v64_shl_n_16(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_u16(c_v64 a) {
+  return c_v64_shr_n_u16(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_s16(c_v64 a) {
+  return c_v64_shr_n_s16(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shl_n_32(c_v64 a) {
+  return c_v64_shl_n_32(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_u32(c_v64 a) {
+  return c_v64_shr_n_u32(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_s32(c_v64 a) {
+  return c_v64_shr_n_s32(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_align(c_v64 a, c_v64 b) {
+  return c_v64_align(a, b, shift);
+}
+
+template <int shift>
+v128 imm_v128_shl_n_byte(v128 a) {
+  return v128_shl_n_byte(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_byte(v128 a) {
+  return v128_shr_n_byte(a, shift);
+}
+template <int shift>
+v128 imm_v128_shl_n_8(v128 a) {
+  return v128_shl_n_8(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_u8(v128 a) {
+  return v128_shr_n_u8(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_s8(v128 a) {
+  return v128_shr_n_s8(a, shift);
+}
+template <int shift>
+v128 imm_v128_shl_n_16(v128 a) {
+  return v128_shl_n_16(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_u16(v128 a) {
+  return v128_shr_n_u16(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_s16(v128 a) {
+  return v128_shr_n_s16(a, shift);
+}
+template <int shift>
+v128 imm_v128_shl_n_32(v128 a) {
+  return v128_shl_n_32(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_u32(v128 a) {
+  return v128_shr_n_u32(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_s32(v128 a) {
+  return v128_shr_n_s32(a, shift);
+}
+template <int shift>
+v128 imm_v128_shl_n_64(v128 a) {
+  return v128_shl_n_64(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_u64(v128 a) {
+  return v128_shr_n_u64(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_s64(v128 a) {
+  return v128_shr_n_s64(a, shift);
+}
+template <int shift>
+v128 imm_v128_align(v128 a, v128 b) {
+  return v128_align(a, b, shift);
+}
+
+template <int shift>
+c_v128 c_imm_v128_shl_n_byte(c_v128 a) {
+  return c_v128_shl_n_byte(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_byte(c_v128 a) {
+  return c_v128_shr_n_byte(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shl_n_8(c_v128 a) {
+  return c_v128_shl_n_8(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_u8(c_v128 a) {
+  return c_v128_shr_n_u8(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_s8(c_v128 a) {
+  return c_v128_shr_n_s8(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shl_n_16(c_v128 a) {
+  return c_v128_shl_n_16(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_u16(c_v128 a) {
+  return c_v128_shr_n_u16(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_s16(c_v128 a) {
+  return c_v128_shr_n_s16(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shl_n_32(c_v128 a) {
+  return c_v128_shl_n_32(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_u32(c_v128 a) {
+  return c_v128_shr_n_u32(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_s32(c_v128 a) {
+  return c_v128_shr_n_s32(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shl_n_64(c_v128 a) {
+  return c_v128_shl_n_64(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_u64(c_v128 a) {
+  return c_v128_shr_n_u64(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_s64(c_v128 a) {
+  return c_v128_shr_n_s64(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_align(c_v128 a, c_v128 b) {
+  return c_v128_align(a, b, shift);
+}
+
+template <int shift>
+v256 imm_v256_shl_n_word(v256 a) {
+  return v256_shl_n_word(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_word(v256 a) {
+  return v256_shr_n_word(a, shift);
+}
+template <int shift>
+v256 imm_v256_shl_n_byte(v256 a) {
+  return v256_shl_n_byte(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_byte(v256 a) {
+  return v256_shr_n_byte(a, shift);
+}
+template <int shift>
+v256 imm_v256_shl_n_8(v256 a) {
+  return v256_shl_n_8(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_u8(v256 a) {
+  return v256_shr_n_u8(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_s8(v256 a) {
+  return v256_shr_n_s8(a, shift);
+}
+template <int shift>
+v256 imm_v256_shl_n_16(v256 a) {
+  return v256_shl_n_16(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_u16(v256 a) {
+  return v256_shr_n_u16(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_s16(v256 a) {
+  return v256_shr_n_s16(a, shift);
+}
+template <int shift>
+v256 imm_v256_shl_n_32(v256 a) {
+  return v256_shl_n_32(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_u32(v256 a) {
+  return v256_shr_n_u32(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_s32(v256 a) {
+  return v256_shr_n_s32(a, shift);
+}
+template <int shift>
+v256 imm_v256_shl_n_64(v256 a) {
+  return v256_shl_n_64(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_u64(v256 a) {
+  return v256_shr_n_u64(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_s64(v256 a) {
+  return v256_shr_n_s64(a, shift);
+}
+template <int shift>
+v256 imm_v256_align(v256 a, v256 b) {
+  return v256_align(a, b, shift);
+}
+
+template <int shift>
+c_v256 c_imm_v256_shl_n_word(c_v256 a) {
+  return c_v256_shl_n_word(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_word(c_v256 a) {
+  return c_v256_shr_n_word(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shl_n_byte(c_v256 a) {
+  return c_v256_shl_n_byte(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_byte(c_v256 a) {
+  return c_v256_shr_n_byte(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shl_n_8(c_v256 a) {
+  return c_v256_shl_n_8(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_u8(c_v256 a) {
+  return c_v256_shr_n_u8(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_s8(c_v256 a) {
+  return c_v256_shr_n_s8(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shl_n_16(c_v256 a) {
+  return c_v256_shl_n_16(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_u16(c_v256 a) {
+  return c_v256_shr_n_u16(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_s16(c_v256 a) {
+  return c_v256_shr_n_s16(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shl_n_32(c_v256 a) {
+  return c_v256_shl_n_32(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_u32(c_v256 a) {
+  return c_v256_shr_n_u32(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_s32(c_v256 a) {
+  return c_v256_shr_n_s32(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shl_n_64(c_v256 a) {
+  return c_v256_shl_n_64(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_u64(c_v256 a) {
+  return c_v256_shr_n_u64(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_s64(c_v256 a) {
+  return c_v256_shr_n_s64(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_align(c_v256 a, c_v256 b) {
+  return c_v256_align(a, b, shift);
+}
+
+// Wrappers around the the SAD and SSD functions
+uint32_t v64_sad_u8(v64 a, v64 b) {
+  return v64_sad_u8_sum(::v64_sad_u8(v64_sad_u8_init(), a, b));
+}
+uint32_t v64_ssd_u8(v64 a, v64 b) {
+  return v64_ssd_u8_sum(::v64_ssd_u8(v64_ssd_u8_init(), a, b));
+}
+
+uint32_t c_v64_sad_u8(c_v64 a, c_v64 b) {
+  return c_v64_sad_u8_sum(::c_v64_sad_u8(c_v64_sad_u8_init(), a, b));
+}
+uint32_t c_v64_ssd_u8(c_v64 a, c_v64 b) {
+  return c_v64_ssd_u8_sum(::c_v64_ssd_u8(c_v64_ssd_u8_init(), a, b));
+}
+uint32_t v128_sad_u8(v128 a, v128 b) {
+  return v128_sad_u8_sum(::v128_sad_u8(v128_sad_u8_init(), a, b));
+}
+uint32_t v128_ssd_u8(v128 a, v128 b) {
+  return v128_ssd_u8_sum(::v128_ssd_u8(v128_ssd_u8_init(), a, b));
+}
+uint32_t c_v128_sad_u8(c_v128 a, c_v128 b) {
+  return c_v128_sad_u8_sum(::c_v128_sad_u8(c_v128_sad_u8_init(), a, b));
+}
+uint32_t c_v128_ssd_u8(c_v128 a, c_v128 b) {
+  return c_v128_ssd_u8_sum(::c_v128_ssd_u8(c_v128_ssd_u8_init(), a, b));
+}
+uint32_t v128_sad_u16(v128 a, v128 b) {
+  return v128_sad_u16_sum(::v128_sad_u16(v128_sad_u16_init(), a, b));
+}
+uint64_t v128_ssd_s16(v128 a, v128 b) {
+  return v128_ssd_s16_sum(::v128_ssd_s16(v128_ssd_s16_init(), a, b));
+}
+uint32_t c_v128_sad_u16(c_v128 a, c_v128 b) {
+  return c_v128_sad_u16_sum(::c_v128_sad_u16(c_v128_sad_u16_init(), a, b));
+}
+uint64_t c_v128_ssd_s16(c_v128 a, c_v128 b) {
+  return c_v128_ssd_s16_sum(::c_v128_ssd_s16(c_v128_ssd_s16_init(), a, b));
+}
+uint32_t v256_sad_u8(v256 a, v256 b) {
+  return v256_sad_u8_sum(::v256_sad_u8(v256_sad_u8_init(), a, b));
+}
+uint32_t v256_ssd_u8(v256 a, v256 b) {
+  return v256_ssd_u8_sum(::v256_ssd_u8(v256_ssd_u8_init(), a, b));
+}
+uint32_t c_v256_sad_u8(c_v256 a, c_v256 b) {
+  return c_v256_sad_u8_sum(::c_v256_sad_u8(c_v256_sad_u8_init(), a, b));
+}
+uint32_t c_v256_ssd_u8(c_v256 a, c_v256 b) {
+  return c_v256_ssd_u8_sum(::c_v256_ssd_u8(c_v256_ssd_u8_init(), a, b));
+}
+uint32_t v256_sad_u16(v256 a, v256 b) {
+  return v256_sad_u16_sum(::v256_sad_u16(v256_sad_u16_init(), a, b));
+}
+uint64_t v256_ssd_s16(v256 a, v256 b) {
+  return v256_ssd_s16_sum(::v256_ssd_s16(v256_ssd_s16_init(), a, b));
+}
+uint32_t c_v256_sad_u16(c_v256 a, c_v256 b) {
+  return c_v256_sad_u16_sum(::c_v256_sad_u16(c_v256_sad_u16_init(), a, b));
+}
+uint64_t c_v256_ssd_s16(c_v256 a, c_v256 b) {
+  return c_v256_ssd_s16_sum(::c_v256_ssd_s16(c_v256_ssd_s16_init(), a, b));
+}
+
+namespace {
+
+typedef void (*fptr)();
+
+typedef struct {
+  const char *name;
+  fptr ref;
+  fptr simd;
+} mapping;
+
+#define MAP(name) \
+  { #name, reinterpret_cast < fptr>(c_##name), reinterpret_cast < fptr>(name) }
+
+const mapping m[] = { MAP(v64_sad_u8),
+                      MAP(v64_ssd_u8),
+                      MAP(v64_add_8),
+                      MAP(v64_add_16),
+                      MAP(v64_sadd_s8),
+                      MAP(v64_sadd_u8),
+                      MAP(v64_sadd_s16),
+                      MAP(v64_add_32),
+                      MAP(v64_sub_8),
+                      MAP(v64_ssub_u8),
+                      MAP(v64_ssub_s8),
+                      MAP(v64_sub_16),
+                      MAP(v64_ssub_s16),
+                      MAP(v64_ssub_u16),
+                      MAP(v64_sub_32),
+                      MAP(v64_ziplo_8),
+                      MAP(v64_ziphi_8),
+                      MAP(v64_ziplo_16),
+                      MAP(v64_ziphi_16),
+                      MAP(v64_ziplo_32),
+                      MAP(v64_ziphi_32),
+                      MAP(v64_pack_s32_u16),
+                      MAP(v64_pack_s32_s16),
+                      MAP(v64_pack_s16_u8),
+                      MAP(v64_pack_s16_s8),
+                      MAP(v64_unziphi_8),
+                      MAP(v64_unziplo_8),
+                      MAP(v64_unziphi_16),
+                      MAP(v64_unziplo_16),
+                      MAP(v64_or),
+                      MAP(v64_xor),
+                      MAP(v64_and),
+                      MAP(v64_andn),
+                      MAP(v64_mullo_s16),
+                      MAP(v64_mulhi_s16),
+                      MAP(v64_mullo_s32),
+                      MAP(v64_madd_s16),
+                      MAP(v64_madd_us8),
+                      MAP(v64_avg_u8),
+                      MAP(v64_rdavg_u8),
+                      MAP(v64_rdavg_u16),
+                      MAP(v64_avg_u16),
+                      MAP(v64_min_u8),
+                      MAP(v64_max_u8),
+                      MAP(v64_min_s8),
+                      MAP(v64_max_s8),
+                      MAP(v64_min_s16),
+                      MAP(v64_max_s16),
+                      MAP(v64_cmpgt_s8),
+                      MAP(v64_cmplt_s8),
+                      MAP(v64_cmpeq_8),
+                      MAP(v64_cmpgt_s16),
+                      MAP(v64_cmplt_s16),
+                      MAP(v64_cmpeq_16),
+                      MAP(v64_shuffle_8),
+                      MAP(imm_v64_align<1>),
+                      MAP(imm_v64_align<2>),
+                      MAP(imm_v64_align<3>),
+                      MAP(imm_v64_align<4>),
+                      MAP(imm_v64_align<5>),
+                      MAP(imm_v64_align<6>),
+                      MAP(imm_v64_align<7>),
+                      MAP(v64_abs_s8),
+                      MAP(v64_abs_s16),
+                      MAP(v64_unpacklo_u8_s16),
+                      MAP(v64_unpackhi_u8_s16),
+                      MAP(v64_unpacklo_s8_s16),
+                      MAP(v64_unpackhi_s8_s16),
+                      MAP(v64_unpacklo_u16_s32),
+                      MAP(v64_unpacklo_s16_s32),
+                      MAP(v64_unpackhi_u16_s32),
+                      MAP(v64_unpackhi_s16_s32),
+                      MAP(imm_v64_shr_n_byte<1>),
+                      MAP(imm_v64_shr_n_byte<2>),
+                      MAP(imm_v64_shr_n_byte<3>),
+                      MAP(imm_v64_shr_n_byte<4>),
+                      MAP(imm_v64_shr_n_byte<5>),
+                      MAP(imm_v64_shr_n_byte<6>),
+                      MAP(imm_v64_shr_n_byte<7>),
+                      MAP(imm_v64_shl_n_byte<1>),
+                      MAP(imm_v64_shl_n_byte<2>),
+                      MAP(imm_v64_shl_n_byte<3>),
+                      MAP(imm_v64_shl_n_byte<4>),
+                      MAP(imm_v64_shl_n_byte<5>),
+                      MAP(imm_v64_shl_n_byte<6>),
+                      MAP(imm_v64_shl_n_byte<7>),
+                      MAP(imm_v64_shl_n_8<1>),
+                      MAP(imm_v64_shl_n_8<2>),
+                      MAP(imm_v64_shl_n_8<3>),
+                      MAP(imm_v64_shl_n_8<4>),
+                      MAP(imm_v64_shl_n_8<5>),
+                      MAP(imm_v64_shl_n_8<6>),
+                      MAP(imm_v64_shl_n_8<7>),
+                      MAP(imm_v64_shr_n_u8<1>),
+                      MAP(imm_v64_shr_n_u8<2>),
+                      MAP(imm_v64_shr_n_u8<3>),
+                      MAP(imm_v64_shr_n_u8<4>),
+                      MAP(imm_v64_shr_n_u8<5>),
+                      MAP(imm_v64_shr_n_u8<6>),
+                      MAP(imm_v64_shr_n_u8<7>),
+                      MAP(imm_v64_shr_n_s8<1>),
+                      MAP(imm_v64_shr_n_s8<2>),
+                      MAP(imm_v64_shr_n_s8<3>),
+                      MAP(imm_v64_shr_n_s8<4>),
+                      MAP(imm_v64_shr_n_s8<5>),
+                      MAP(imm_v64_shr_n_s8<6>),
+                      MAP(imm_v64_shr_n_s8<7>),
+                      MAP(imm_v64_shl_n_16<1>),
+                      MAP(imm_v64_shl_n_16<2>),
+                      MAP(imm_v64_shl_n_16<4>),
+                      MAP(imm_v64_shl_n_16<6>),
+                      MAP(imm_v64_shl_n_16<8>),
+                      MAP(imm_v64_shl_n_16<10>),
+                      MAP(imm_v64_shl_n_16<12>),
+                      MAP(imm_v64_shl_n_16<14>),
+                      MAP(imm_v64_shr_n_u16<1>),
+                      MAP(imm_v64_shr_n_u16<2>),
+                      MAP(imm_v64_shr_n_u16<4>),
+                      MAP(imm_v64_shr_n_u16<6>),
+                      MAP(imm_v64_shr_n_u16<8>),
+                      MAP(imm_v64_shr_n_u16<10>),
+                      MAP(imm_v64_shr_n_u16<12>),
+                      MAP(imm_v64_shr_n_u16<14>),
+                      MAP(imm_v64_shr_n_s16<1>),
+                      MAP(imm_v64_shr_n_s16<2>),
+                      MAP(imm_v64_shr_n_s16<4>),
+                      MAP(imm_v64_shr_n_s16<6>),
+                      MAP(imm_v64_shr_n_s16<8>),
+                      MAP(imm_v64_shr_n_s16<10>),
+                      MAP(imm_v64_shr_n_s16<12>),
+                      MAP(imm_v64_shr_n_s16<14>),
+                      MAP(imm_v64_shl_n_32<1>),
+                      MAP(imm_v64_shl_n_32<4>),
+                      MAP(imm_v64_shl_n_32<8>),
+                      MAP(imm_v64_shl_n_32<12>),
+                      MAP(imm_v64_shl_n_32<16>),
+                      MAP(imm_v64_shl_n_32<20>),
+                      MAP(imm_v64_shl_n_32<24>),
+                      MAP(imm_v64_shl_n_32<28>),
+                      MAP(imm_v64_shr_n_u32<1>),
+                      MAP(imm_v64_shr_n_u32<4>),
+                      MAP(imm_v64_shr_n_u32<8>),
+                      MAP(imm_v64_shr_n_u32<12>),
+                      MAP(imm_v64_shr_n_u32<16>),
+                      MAP(imm_v64_shr_n_u32<20>),
+                      MAP(imm_v64_shr_n_u32<24>),
+                      MAP(imm_v64_shr_n_u32<28>),
+                      MAP(imm_v64_shr_n_s32<1>),
+                      MAP(imm_v64_shr_n_s32<4>),
+                      MAP(imm_v64_shr_n_s32<8>),
+                      MAP(imm_v64_shr_n_s32<12>),
+                      MAP(imm_v64_shr_n_s32<16>),
+                      MAP(imm_v64_shr_n_s32<20>),
+                      MAP(imm_v64_shr_n_s32<24>),
+                      MAP(imm_v64_shr_n_s32<28>),
+                      MAP(v64_shl_8),
+                      MAP(v64_shr_u8),
+                      MAP(v64_shr_s8),
+                      MAP(v64_shl_16),
+                      MAP(v64_shr_u16),
+                      MAP(v64_shr_s16),
+                      MAP(v64_shl_32),
+                      MAP(v64_shr_u32),
+                      MAP(v64_shr_s32),
+                      MAP(v64_hadd_u8),
+                      MAP(v64_hadd_s16),
+                      MAP(v64_dotp_s16),
+                      MAP(v64_dotp_su8),
+                      MAP(v64_u64),
+                      MAP(v64_low_u32),
+                      MAP(v64_high_u32),
+                      MAP(v64_low_s32),
+                      MAP(v64_high_s32),
+                      MAP(v64_dup_8),
+                      MAP(v64_dup_16),
+                      MAP(v64_dup_32),
+                      MAP(v64_from_32),
+                      MAP(v64_zero),
+                      MAP(v64_from_16),
+                      MAP(v128_sad_u8),
+                      MAP(v128_ssd_u8),
+                      MAP(v128_sad_u16),
+                      MAP(v128_ssd_s16),
+                      MAP(v128_add_8),
+                      MAP(v128_add_16),
+                      MAP(v128_sadd_s8),
+                      MAP(v128_sadd_u8),
+                      MAP(v128_sadd_s16),
+                      MAP(v128_add_32),
+                      MAP(v128_add_64),
+                      MAP(v128_sub_8),
+                      MAP(v128_ssub_u8),
+                      MAP(v128_ssub_s8),
+                      MAP(v128_sub_16),
+                      MAP(v128_ssub_s16),
+                      MAP(v128_ssub_u16),
+                      MAP(v128_sub_32),
+                      MAP(v128_sub_64),
+                      MAP(v128_ziplo_8),
+                      MAP(v128_ziphi_8),
+                      MAP(v128_ziplo_16),
+                      MAP(v128_ziphi_16),
+                      MAP(v128_ziplo_32),
+                      MAP(v128_ziphi_32),
+                      MAP(v128_ziplo_64),
+                      MAP(v128_ziphi_64),
+                      MAP(v128_unziphi_8),
+                      MAP(v128_unziplo_8),
+                      MAP(v128_unziphi_16),
+                      MAP(v128_unziplo_16),
+                      MAP(v128_unziphi_32),
+                      MAP(v128_unziplo_32),
+                      MAP(v128_pack_s32_u16),
+                      MAP(v128_pack_s32_s16),
+                      MAP(v128_pack_s16_u8),
+                      MAP(v128_pack_s16_s8),
+                      MAP(v128_or),
+                      MAP(v128_xor),
+                      MAP(v128_and),
+                      MAP(v128_andn),
+                      MAP(v128_mullo_s16),
+                      MAP(v128_mulhi_s16),
+                      MAP(v128_mullo_s32),
+                      MAP(v128_madd_s16),
+                      MAP(v128_madd_us8),
+                      MAP(v128_avg_u8),
+                      MAP(v128_rdavg_u8),
+                      MAP(v128_rdavg_u16),
+                      MAP(v128_avg_u16),
+                      MAP(v128_min_u8),
+                      MAP(v128_max_u8),
+                      MAP(v128_min_s8),
+                      MAP(v128_max_s8),
+                      MAP(v128_min_s16),
+                      MAP(v128_max_s16),
+                      MAP(v128_min_s32),
+                      MAP(v128_max_s32),
+                      MAP(v128_cmpgt_s8),
+                      MAP(v128_cmplt_s8),
+                      MAP(v128_cmpeq_8),
+                      MAP(v128_cmpgt_s16),
+                      MAP(v128_cmpeq_16),
+                      MAP(v128_cmplt_s16),
+                      MAP(v128_cmpgt_s32),
+                      MAP(v128_cmpeq_32),
+                      MAP(v128_cmplt_s32),
+                      MAP(v128_shuffle_8),
+                      MAP(imm_v128_align<1>),
+                      MAP(imm_v128_align<2>),
+                      MAP(imm_v128_align<3>),
+                      MAP(imm_v128_align<4>),
+                      MAP(imm_v128_align<5>),
+                      MAP(imm_v128_align<6>),
+                      MAP(imm_v128_align<7>),
+                      MAP(imm_v128_align<8>),
+                      MAP(imm_v128_align<9>),
+                      MAP(imm_v128_align<10>),
+                      MAP(imm_v128_align<11>),
+                      MAP(imm_v128_align<12>),
+                      MAP(imm_v128_align<13>),
+                      MAP(imm_v128_align<14>),
+                      MAP(imm_v128_align<15>),
+                      MAP(v128_abs_s8),
+                      MAP(v128_abs_s16),
+                      MAP(v128_padd_u8),
+                      MAP(v128_padd_s16),
+                      MAP(v128_unpacklo_u16_s32),
+                      MAP(v128_unpacklo_s16_s32),
+                      MAP(v128_unpackhi_u16_s32),
+                      MAP(v128_unpackhi_s16_s32),
+                      MAP(imm_v128_shr_n_byte<1>),
+                      MAP(imm_v128_shr_n_byte<2>),
+                      MAP(imm_v128_shr_n_byte<3>),
+                      MAP(imm_v128_shr_n_byte<4>),
+                      MAP(imm_v128_shr_n_byte<5>),
+                      MAP(imm_v128_shr_n_byte<6>),
+                      MAP(imm_v128_shr_n_byte<7>),
+                      MAP(imm_v128_shr_n_byte<8>),
+                      MAP(imm_v128_shr_n_byte<9>),
+                      MAP(imm_v128_shr_n_byte<10>),
+                      MAP(imm_v128_shr_n_byte<11>),
+                      MAP(imm_v128_shr_n_byte<12>),
+                      MAP(imm_v128_shr_n_byte<13>),
+                      MAP(imm_v128_shr_n_byte<14>),
+                      MAP(imm_v128_shr_n_byte<15>),
+                      MAP(imm_v128_shl_n_byte<1>),
+                      MAP(imm_v128_shl_n_byte<2>),
+                      MAP(imm_v128_shl_n_byte<3>),
+                      MAP(imm_v128_shl_n_byte<4>),
+                      MAP(imm_v128_shl_n_byte<5>),
+                      MAP(imm_v128_shl_n_byte<6>),
+                      MAP(imm_v128_shl_n_byte<7>),
+                      MAP(imm_v128_shl_n_byte<8>),
+                      MAP(imm_v128_shl_n_byte<9>),
+                      MAP(imm_v128_shl_n_byte<10>),
+                      MAP(imm_v128_shl_n_byte<11>),
+                      MAP(imm_v128_shl_n_byte<12>),
+                      MAP(imm_v128_shl_n_byte<13>),
+                      MAP(imm_v128_shl_n_byte<14>),
+                      MAP(imm_v128_shl_n_byte<15>),
+                      MAP(imm_v128_shl_n_8<1>),
+                      MAP(imm_v128_shl_n_8<2>),
+                      MAP(imm_v128_shl_n_8<3>),
+                      MAP(imm_v128_shl_n_8<4>),
+                      MAP(imm_v128_shl_n_8<5>),
+                      MAP(imm_v128_shl_n_8<6>),
+                      MAP(imm_v128_shl_n_8<7>),
+                      MAP(imm_v128_shr_n_u8<1>),
+                      MAP(imm_v128_shr_n_u8<2>),
+                      MAP(imm_v128_shr_n_u8<3>),
+                      MAP(imm_v128_shr_n_u8<4>),
+                      MAP(imm_v128_shr_n_u8<5>),
+                      MAP(imm_v128_shr_n_u8<6>),
+                      MAP(imm_v128_shr_n_u8<7>),
+                      MAP(imm_v128_shr_n_s8<1>),
+                      MAP(imm_v128_shr_n_s8<2>),
+                      MAP(imm_v128_shr_n_s8<3>),
+                      MAP(imm_v128_shr_n_s8<4>),
+                      MAP(imm_v128_shr_n_s8<5>),
+                      MAP(imm_v128_shr_n_s8<6>),
+                      MAP(imm_v128_shr_n_s8<7>),
+                      MAP(imm_v128_shl_n_16<1>),
+                      MAP(imm_v128_shl_n_16<2>),
+                      MAP(imm_v128_shl_n_16<4>),
+                      MAP(imm_v128_shl_n_16<6>),
+                      MAP(imm_v128_shl_n_16<8>),
+                      MAP(imm_v128_shl_n_16<10>),
+                      MAP(imm_v128_shl_n_16<12>),
+                      MAP(imm_v128_shl_n_16<14>),
+                      MAP(imm_v128_shr_n_u16<1>),
+                      MAP(imm_v128_shr_n_u16<2>),
+                      MAP(imm_v128_shr_n_u16<4>),
+                      MAP(imm_v128_shr_n_u16<6>),
+                      MAP(imm_v128_shr_n_u16<8>),
+                      MAP(imm_v128_shr_n_u16<10>),
+                      MAP(imm_v128_shr_n_u16<12>),
+                      MAP(imm_v128_shr_n_u16<14>),
+                      MAP(imm_v128_shr_n_s16<1>),
+                      MAP(imm_v128_shr_n_s16<2>),
+                      MAP(imm_v128_shr_n_s16<4>),
+                      MAP(imm_v128_shr_n_s16<6>),
+                      MAP(imm_v128_shr_n_s16<8>),
+                      MAP(imm_v128_shr_n_s16<10>),
+                      MAP(imm_v128_shr_n_s16<12>),
+                      MAP(imm_v128_shr_n_s16<14>),
+                      MAP(imm_v128_shl_n_32<1>),
+                      MAP(imm_v128_shl_n_32<4>),
+                      MAP(imm_v128_shl_n_32<8>),
+                      MAP(imm_v128_shl_n_32<12>),
+                      MAP(imm_v128_shl_n_32<16>),
+                      MAP(imm_v128_shl_n_32<20>),
+                      MAP(imm_v128_shl_n_32<24>),
+                      MAP(imm_v128_shl_n_32<28>),
+                      MAP(imm_v128_shr_n_u32<1>),
+                      MAP(imm_v128_shr_n_u32<4>),
+                      MAP(imm_v128_shr_n_u32<8>),
+                      MAP(imm_v128_shr_n_u32<12>),
+                      MAP(imm_v128_shr_n_u32<16>),
+                      MAP(imm_v128_shr_n_u32<20>),
+                      MAP(imm_v128_shr_n_u32<24>),
+                      MAP(imm_v128_shr_n_u32<28>),
+                      MAP(imm_v128_shr_n_s32<1>),
+                      MAP(imm_v128_shr_n_s32<4>),
+                      MAP(imm_v128_shr_n_s32<8>),
+                      MAP(imm_v128_shr_n_s32<12>),
+                      MAP(imm_v128_shr_n_s32<16>),
+                      MAP(imm_v128_shr_n_s32<20>),
+                      MAP(imm_v128_shr_n_s32<24>),
+                      MAP(imm_v128_shr_n_s32<28>),
+                      MAP(imm_v128_shl_n_64<1>),
+                      MAP(imm_v128_shl_n_64<4>),
+                      MAP(imm_v128_shl_n_64<8>),
+                      MAP(imm_v128_shl_n_64<12>),
+                      MAP(imm_v128_shl_n_64<16>),
+                      MAP(imm_v128_shl_n_64<20>),
+                      MAP(imm_v128_shl_n_64<24>),
+                      MAP(imm_v128_shl_n_64<28>),
+                      MAP(imm_v128_shl_n_64<32>),
+                      MAP(imm_v128_shl_n_64<36>),
+                      MAP(imm_v128_shl_n_64<40>),
+                      MAP(imm_v128_shl_n_64<44>),
+                      MAP(imm_v128_shl_n_64<48>),
+                      MAP(imm_v128_shl_n_64<52>),
+                      MAP(imm_v128_shl_n_64<56>),
+                      MAP(imm_v128_shl_n_64<60>),
+                      MAP(imm_v128_shr_n_u64<1>),
+                      MAP(imm_v128_shr_n_u64<4>),
+                      MAP(imm_v128_shr_n_u64<8>),
+                      MAP(imm_v128_shr_n_u64<12>),
+                      MAP(imm_v128_shr_n_u64<16>),
+                      MAP(imm_v128_shr_n_u64<20>),
+                      MAP(imm_v128_shr_n_u64<24>),
+                      MAP(imm_v128_shr_n_u64<28>),
+                      MAP(imm_v128_shr_n_u64<32>),
+                      MAP(imm_v128_shr_n_u64<36>),
+                      MAP(imm_v128_shr_n_u64<40>),
+                      MAP(imm_v128_shr_n_u64<44>),
+                      MAP(imm_v128_shr_n_u64<48>),
+                      MAP(imm_v128_shr_n_u64<52>),
+                      MAP(imm_v128_shr_n_u64<56>),
+                      MAP(imm_v128_shr_n_u64<60>),
+                      MAP(imm_v128_shr_n_s64<1>),
+                      MAP(imm_v128_shr_n_s64<4>),
+                      MAP(imm_v128_shr_n_s64<8>),
+                      MAP(imm_v128_shr_n_s64<12>),
+                      MAP(imm_v128_shr_n_s64<16>),
+                      MAP(imm_v128_shr_n_s64<20>),
+                      MAP(imm_v128_shr_n_s64<24>),
+                      MAP(imm_v128_shr_n_s64<28>),
+                      MAP(imm_v128_shr_n_s64<32>),
+                      MAP(imm_v128_shr_n_s64<36>),
+                      MAP(imm_v128_shr_n_s64<40>),
+                      MAP(imm_v128_shr_n_s64<44>),
+                      MAP(imm_v128_shr_n_s64<48>),
+                      MAP(imm_v128_shr_n_s64<52>),
+                      MAP(imm_v128_shr_n_s64<56>),
+                      MAP(imm_v128_shr_n_s64<60>),
+                      MAP(v128_from_v64),
+                      MAP(v128_zip_8),
+                      MAP(v128_zip_16),
+                      MAP(v128_zip_32),
+                      MAP(v128_mul_s16),
+                      MAP(v128_unpack_u8_s16),
+                      MAP(v128_unpack_s8_s16),
+                      MAP(v128_unpack_u16_s32),
+                      MAP(v128_unpack_s16_s32),
+                      MAP(v128_shl_8),
+                      MAP(v128_shr_u8),
+                      MAP(v128_shr_s8),
+                      MAP(v128_shl_16),
+                      MAP(v128_shr_u16),
+                      MAP(v128_shr_s16),
+                      MAP(v128_shl_32),
+                      MAP(v128_shr_u32),
+                      MAP(v128_shr_s32),
+                      MAP(v128_shl_64),
+                      MAP(v128_shr_u64),
+                      MAP(v128_shr_s64),
+                      MAP(v128_hadd_u8),
+                      MAP(v128_dotp_su8),
+                      MAP(v128_dotp_s16),
+                      MAP(v128_dotp_s32),
+                      MAP(v128_low_u32),
+                      MAP(v128_low_v64),
+                      MAP(v128_high_v64),
+                      MAP(v128_from_64),
+                      MAP(v128_from_32),
+                      MAP(v128_movemask_8),
+                      MAP(v128_zero),
+                      MAP(v128_dup_8),
+                      MAP(v128_dup_16),
+                      MAP(v128_dup_32),
+                      MAP(v128_dup_64),
+                      MAP(v128_unpacklo_u8_s16),
+                      MAP(v128_unpackhi_u8_s16),
+                      MAP(v128_unpacklo_s8_s16),
+                      MAP(v128_unpackhi_s8_s16),
+                      MAP(v128_blend_8),
+                      MAP(u32_load_unaligned),
+                      MAP(u32_store_unaligned),
+                      MAP(v64_load_unaligned),
+                      MAP(v64_store_unaligned),
+                      MAP(v128_load_unaligned),
+                      MAP(v128_store_unaligned),
+                      MAP(v256_sad_u8),
+                      MAP(v256_ssd_u8),
+                      MAP(v256_sad_u16),
+                      MAP(v256_ssd_s16),
+                      MAP(v256_hadd_u8),
+                      MAP(v256_low_u64),
+                      MAP(v256_dotp_su8),
+                      MAP(v256_dotp_s16),
+                      MAP(v256_dotp_s32),
+                      MAP(v256_add_8),
+                      MAP(v256_add_16),
+                      MAP(v256_sadd_s8),
+                      MAP(v256_sadd_u8),
+                      MAP(v256_sadd_s16),
+                      MAP(v256_add_32),
+                      MAP(v256_add_64),
+                      MAP(v256_sub_8),
+                      MAP(v256_ssub_u8),
+                      MAP(v256_ssub_s8),
+                      MAP(v256_sub_16),
+                      MAP(v256_ssub_u16),
+                      MAP(v256_ssub_s16),
+                      MAP(v256_sub_32),
+                      MAP(v256_sub_64),
+                      MAP(v256_ziplo_8),
+                      MAP(v256_ziphi_8),
+                      MAP(v256_ziplo_16),
+                      MAP(v256_ziphi_16),
+                      MAP(v256_ziplo_32),
+                      MAP(v256_ziphi_32),
+                      MAP(v256_ziplo_64),
+                      MAP(v256_ziphi_64),
+                      MAP(v256_unziphi_8),
+                      MAP(v256_unziplo_8),
+                      MAP(v256_unziphi_16),
+                      MAP(v256_unziplo_16),
+                      MAP(v256_unziphi_32),
+                      MAP(v256_unziplo_32),
+                      MAP(v256_unziphi_64),
+                      MAP(v256_unziplo_64),
+                      MAP(v256_pack_s32_u16),
+                      MAP(v256_pack_s32_s16),
+                      MAP(v256_pack_s16_u8),
+                      MAP(v256_pack_s16_s8),
+                      MAP(v256_or),
+                      MAP(v256_xor),
+                      MAP(v256_and),
+                      MAP(v256_andn),
+                      MAP(v256_mullo_s16),
+                      MAP(v256_mulhi_s16),
+                      MAP(v256_mullo_s32),
+                      MAP(v256_madd_s16),
+                      MAP(v256_madd_us8),
+                      MAP(v256_avg_u8),
+                      MAP(v256_rdavg_u8),
+                      MAP(v256_rdavg_u16),
+                      MAP(v256_avg_u16),
+                      MAP(v256_min_u8),
+                      MAP(v256_max_u8),
+                      MAP(v256_min_s8),
+                      MAP(v256_max_s8),
+                      MAP(v256_min_s16),
+                      MAP(v256_max_s16),
+                      MAP(v256_min_s32),
+                      MAP(v256_max_s32),
+                      MAP(v256_cmpgt_s8),
+                      MAP(v256_cmplt_s8),
+                      MAP(v256_cmpeq_8),
+                      MAP(v256_cmpgt_s16),
+                      MAP(v256_cmplt_s16),
+                      MAP(v256_cmpeq_16),
+                      MAP(v256_cmpgt_s32),
+                      MAP(v256_cmplt_s32),
+                      MAP(v256_cmpeq_32),
+                      MAP(v256_shuffle_8),
+                      MAP(v256_pshuffle_8),
+                      MAP(v256_wideshuffle_8),
+                      MAP(imm_v256_align<1>),
+                      MAP(imm_v256_align<2>),
+                      MAP(imm_v256_align<3>),
+                      MAP(imm_v256_align<4>),
+                      MAP(imm_v256_align<5>),
+                      MAP(imm_v256_align<6>),
+                      MAP(imm_v256_align<7>),
+                      MAP(imm_v256_align<8>),
+                      MAP(imm_v256_align<9>),
+                      MAP(imm_v256_align<10>),
+                      MAP(imm_v256_align<11>),
+                      MAP(imm_v256_align<12>),
+                      MAP(imm_v256_align<13>),
+                      MAP(imm_v256_align<14>),
+                      MAP(imm_v256_align<15>),
+                      MAP(imm_v256_align<16>),
+                      MAP(imm_v256_align<17>),
+                      MAP(imm_v256_align<18>),
+                      MAP(imm_v256_align<19>),
+                      MAP(imm_v256_align<20>),
+                      MAP(imm_v256_align<21>),
+                      MAP(imm_v256_align<22>),
+                      MAP(imm_v256_align<23>),
+                      MAP(imm_v256_align<24>),
+                      MAP(imm_v256_align<25>),
+                      MAP(imm_v256_align<26>),
+                      MAP(imm_v256_align<27>),
+                      MAP(imm_v256_align<28>),
+                      MAP(imm_v256_align<29>),
+                      MAP(imm_v256_align<30>),
+                      MAP(imm_v256_align<31>),
+                      MAP(v256_from_v128),
+                      MAP(v256_zip_8),
+                      MAP(v256_zip_16),
+                      MAP(v256_zip_32),
+                      MAP(v256_mul_s16),
+                      MAP(v256_unpack_u8_s16),
+                      MAP(v256_unpack_s8_s16),
+                      MAP(v256_unpack_u16_s32),
+                      MAP(v256_unpack_s16_s32),
+                      MAP(v256_shl_8),
+                      MAP(v256_shr_u8),
+                      MAP(v256_shr_s8),
+                      MAP(v256_shl_16),
+                      MAP(v256_shr_u16),
+                      MAP(v256_shr_s16),
+                      MAP(v256_shl_32),
+                      MAP(v256_shr_u32),
+                      MAP(v256_shr_s32),
+                      MAP(v256_shl_64),
+                      MAP(v256_shr_u64),
+                      MAP(v256_shr_s64),
+                      MAP(v256_abs_s8),
+                      MAP(v256_abs_s16),
+                      MAP(v256_padd_u8),
+                      MAP(v256_padd_s16),
+                      MAP(v256_unpacklo_u16_s32),
+                      MAP(v256_unpacklo_s16_s32),
+                      MAP(v256_unpackhi_u16_s32),
+                      MAP(v256_unpackhi_s16_s32),
+                      MAP(imm_v256_shr_n_word<1>),
+                      MAP(imm_v256_shr_n_word<2>),
+                      MAP(imm_v256_shr_n_word<3>),
+                      MAP(imm_v256_shr_n_word<4>),
+                      MAP(imm_v256_shr_n_word<5>),
+                      MAP(imm_v256_shr_n_word<6>),
+                      MAP(imm_v256_shr_n_word<7>),
+                      MAP(imm_v256_shr_n_word<8>),
+                      MAP(imm_v256_shr_n_word<9>),
+                      MAP(imm_v256_shr_n_word<10>),
+                      MAP(imm_v256_shr_n_word<11>),
+                      MAP(imm_v256_shr_n_word<12>),
+                      MAP(imm_v256_shr_n_word<13>),
+                      MAP(imm_v256_shr_n_word<14>),
+                      MAP(imm_v256_shr_n_word<15>),
+                      MAP(imm_v256_shl_n_word<1>),
+                      MAP(imm_v256_shl_n_word<2>),
+                      MAP(imm_v256_shl_n_word<3>),
+                      MAP(imm_v256_shl_n_word<4>),
+                      MAP(imm_v256_shl_n_word<5>),
+                      MAP(imm_v256_shl_n_word<6>),
+                      MAP(imm_v256_shl_n_word<7>),
+                      MAP(imm_v256_shl_n_word<8>),
+                      MAP(imm_v256_shl_n_word<9>),
+                      MAP(imm_v256_shl_n_word<10>),
+                      MAP(imm_v256_shl_n_word<11>),
+                      MAP(imm_v256_shl_n_word<12>),
+                      MAP(imm_v256_shl_n_word<13>),
+                      MAP(imm_v256_shl_n_word<14>),
+                      MAP(imm_v256_shl_n_word<15>),
+                      MAP(imm_v256_shr_n_byte<1>),
+                      MAP(imm_v256_shr_n_byte<2>),
+                      MAP(imm_v256_shr_n_byte<3>),
+                      MAP(imm_v256_shr_n_byte<4>),
+                      MAP(imm_v256_shr_n_byte<5>),
+                      MAP(imm_v256_shr_n_byte<6>),
+                      MAP(imm_v256_shr_n_byte<7>),
+                      MAP(imm_v256_shr_n_byte<8>),
+                      MAP(imm_v256_shr_n_byte<9>),
+                      MAP(imm_v256_shr_n_byte<10>),
+                      MAP(imm_v256_shr_n_byte<11>),
+                      MAP(imm_v256_shr_n_byte<12>),
+                      MAP(imm_v256_shr_n_byte<13>),
+                      MAP(imm_v256_shr_n_byte<14>),
+                      MAP(imm_v256_shr_n_byte<15>),
+                      MAP(imm_v256_shr_n_byte<16>),
+                      MAP(imm_v256_shr_n_byte<17>),
+                      MAP(imm_v256_shr_n_byte<18>),
+                      MAP(imm_v256_shr_n_byte<19>),
+                      MAP(imm_v256_shr_n_byte<20>),
+                      MAP(imm_v256_shr_n_byte<21>),
+                      MAP(imm_v256_shr_n_byte<22>),
+                      MAP(imm_v256_shr_n_byte<23>),
+                      MAP(imm_v256_shr_n_byte<24>),
+                      MAP(imm_v256_shr_n_byte<25>),
+                      MAP(imm_v256_shr_n_byte<26>),
+                      MAP(imm_v256_shr_n_byte<27>),
+                      MAP(imm_v256_shr_n_byte<28>),
+                      MAP(imm_v256_shr_n_byte<29>),
+                      MAP(imm_v256_shr_n_byte<30>),
+                      MAP(imm_v256_shr_n_byte<31>),
+                      MAP(imm_v256_shl_n_byte<1>),
+                      MAP(imm_v256_shl_n_byte<2>),
+                      MAP(imm_v256_shl_n_byte<3>),
+                      MAP(imm_v256_shl_n_byte<4>),
+                      MAP(imm_v256_shl_n_byte<5>),
+                      MAP(imm_v256_shl_n_byte<6>),
+                      MAP(imm_v256_shl_n_byte<7>),
+                      MAP(imm_v256_shl_n_byte<8>),
+                      MAP(imm_v256_shl_n_byte<9>),
+                      MAP(imm_v256_shl_n_byte<10>),
+                      MAP(imm_v256_shl_n_byte<11>),
+                      MAP(imm_v256_shl_n_byte<12>),
+                      MAP(imm_v256_shl_n_byte<13>),
+                      MAP(imm_v256_shl_n_byte<14>),
+                      MAP(imm_v256_shl_n_byte<15>),
+                      MAP(imm_v256_shl_n_byte<16>),
+                      MAP(imm_v256_shl_n_byte<17>),
+                      MAP(imm_v256_shl_n_byte<18>),
+                      MAP(imm_v256_shl_n_byte<19>),
+                      MAP(imm_v256_shl_n_byte<20>),
+                      MAP(imm_v256_shl_n_byte<21>),
+                      MAP(imm_v256_shl_n_byte<22>),
+                      MAP(imm_v256_shl_n_byte<23>),
+                      MAP(imm_v256_shl_n_byte<24>),
+                      MAP(imm_v256_shl_n_byte<25>),
+                      MAP(imm_v256_shl_n_byte<26>),
+                      MAP(imm_v256_shl_n_byte<27>),
+                      MAP(imm_v256_shl_n_byte<28>),
+                      MAP(imm_v256_shl_n_byte<29>),
+                      MAP(imm_v256_shl_n_byte<30>),
+                      MAP(imm_v256_shl_n_byte<31>),
+                      MAP(imm_v256_shl_n_8<1>),
+                      MAP(imm_v256_shl_n_8<2>),
+                      MAP(imm_v256_shl_n_8<3>),
+                      MAP(imm_v256_shl_n_8<4>),
+                      MAP(imm_v256_shl_n_8<5>),
+                      MAP(imm_v256_shl_n_8<6>),
+                      MAP(imm_v256_shl_n_8<7>),
+                      MAP(imm_v256_shr_n_u8<1>),
+                      MAP(imm_v256_shr_n_u8<2>),
+                      MAP(imm_v256_shr_n_u8<3>),
+                      MAP(imm_v256_shr_n_u8<4>),
+                      MAP(imm_v256_shr_n_u8<5>),
+                      MAP(imm_v256_shr_n_u8<6>),
+                      MAP(imm_v256_shr_n_u8<7>),
+                      MAP(imm_v256_shr_n_s8<1>),
+                      MAP(imm_v256_shr_n_s8<2>),
+                      MAP(imm_v256_shr_n_s8<3>),
+                      MAP(imm_v256_shr_n_s8<4>),
+                      MAP(imm_v256_shr_n_s8<5>),
+                      MAP(imm_v256_shr_n_s8<6>),
+                      MAP(imm_v256_shr_n_s8<7>),
+                      MAP(imm_v256_shl_n_16<1>),
+                      MAP(imm_v256_shl_n_16<2>),
+                      MAP(imm_v256_shl_n_16<4>),
+                      MAP(imm_v256_shl_n_16<6>),
+                      MAP(imm_v256_shl_n_16<8>),
+                      MAP(imm_v256_shl_n_16<10>),
+                      MAP(imm_v256_shl_n_16<12>),
+                      MAP(imm_v256_shl_n_16<14>),
+                      MAP(imm_v256_shr_n_u16<1>),
+                      MAP(imm_v256_shr_n_u16<2>),
+                      MAP(imm_v256_shr_n_u16<4>),
+                      MAP(imm_v256_shr_n_u16<6>),
+                      MAP(imm_v256_shr_n_u16<8>),
+                      MAP(imm_v256_shr_n_u16<10>),
+                      MAP(imm_v256_shr_n_u16<12>),
+                      MAP(imm_v256_shr_n_u16<14>),
+                      MAP(imm_v256_shr_n_s16<1>),
+                      MAP(imm_v256_shr_n_s16<2>),
+                      MAP(imm_v256_shr_n_s16<4>),
+                      MAP(imm_v256_shr_n_s16<6>),
+                      MAP(imm_v256_shr_n_s16<8>),
+                      MAP(imm_v256_shr_n_s16<10>),
+                      MAP(imm_v256_shr_n_s16<12>),
+                      MAP(imm_v256_shr_n_s16<14>),
+                      MAP(imm_v256_shl_n_32<1>),
+                      MAP(imm_v256_shl_n_32<4>),
+                      MAP(imm_v256_shl_n_32<8>),
+                      MAP(imm_v256_shl_n_32<12>),
+                      MAP(imm_v256_shl_n_32<16>),
+                      MAP(imm_v256_shl_n_32<20>),
+                      MAP(imm_v256_shl_n_32<24>),
+                      MAP(imm_v256_shl_n_32<28>),
+                      MAP(imm_v256_shr_n_u32<1>),
+                      MAP(imm_v256_shr_n_u32<4>),
+                      MAP(imm_v256_shr_n_u32<8>),
+                      MAP(imm_v256_shr_n_u32<12>),
+                      MAP(imm_v256_shr_n_u32<16>),
+                      MAP(imm_v256_shr_n_u32<20>),
+                      MAP(imm_v256_shr_n_u32<24>),
+                      MAP(imm_v256_shr_n_u32<28>),
+                      MAP(imm_v256_shr_n_s32<1>),
+                      MAP(imm_v256_shr_n_s32<4>),
+                      MAP(imm_v256_shr_n_s32<8>),
+                      MAP(imm_v256_shr_n_s32<12>),
+                      MAP(imm_v256_shr_n_s32<16>),
+                      MAP(imm_v256_shr_n_s32<20>),
+                      MAP(imm_v256_shr_n_s32<24>),
+                      MAP(imm_v256_shr_n_s32<28>),
+                      MAP(imm_v256_shl_n_64<1>),
+                      MAP(imm_v256_shl_n_64<4>),
+                      MAP(imm_v256_shl_n_64<8>),
+                      MAP(imm_v256_shl_n_64<12>),
+                      MAP(imm_v256_shl_n_64<16>),
+                      MAP(imm_v256_shl_n_64<20>),
+                      MAP(imm_v256_shl_n_64<24>),
+                      MAP(imm_v256_shl_n_64<28>),
+                      MAP(imm_v256_shl_n_64<32>),
+                      MAP(imm_v256_shl_n_64<36>),
+                      MAP(imm_v256_shl_n_64<40>),
+                      MAP(imm_v256_shl_n_64<44>),
+                      MAP(imm_v256_shl_n_64<48>),
+                      MAP(imm_v256_shl_n_64<52>),
+                      MAP(imm_v256_shl_n_64<56>),
+                      MAP(imm_v256_shl_n_64<60>),
+                      MAP(imm_v256_shr_n_u64<1>),
+                      MAP(imm_v256_shr_n_u64<4>),
+                      MAP(imm_v256_shr_n_u64<8>),
+                      MAP(imm_v256_shr_n_u64<12>),
+                      MAP(imm_v256_shr_n_u64<16>),
+                      MAP(imm_v256_shr_n_u64<20>),
+                      MAP(imm_v256_shr_n_u64<24>),
+                      MAP(imm_v256_shr_n_u64<28>),
+                      MAP(imm_v256_shr_n_u64<32>),
+                      MAP(imm_v256_shr_n_u64<36>),
+                      MAP(imm_v256_shr_n_u64<40>),
+                      MAP(imm_v256_shr_n_u64<44>),
+                      MAP(imm_v256_shr_n_u64<48>),
+                      MAP(imm_v256_shr_n_u64<52>),
+                      MAP(imm_v256_shr_n_u64<56>),
+                      MAP(imm_v256_shr_n_u64<60>),
+                      MAP(imm_v256_shr_n_s64<1>),
+                      MAP(imm_v256_shr_n_s64<4>),
+                      MAP(imm_v256_shr_n_s64<8>),
+                      MAP(imm_v256_shr_n_s64<12>),
+                      MAP(imm_v256_shr_n_s64<16>),
+                      MAP(imm_v256_shr_n_s64<20>),
+                      MAP(imm_v256_shr_n_s64<24>),
+                      MAP(imm_v256_shr_n_s64<28>),
+                      MAP(imm_v256_shr_n_s64<32>),
+                      MAP(imm_v256_shr_n_s64<36>),
+                      MAP(imm_v256_shr_n_s64<40>),
+                      MAP(imm_v256_shr_n_s64<44>),
+                      MAP(imm_v256_shr_n_s64<48>),
+                      MAP(imm_v256_shr_n_s64<52>),
+                      MAP(imm_v256_shr_n_s64<56>),
+                      MAP(imm_v256_shr_n_s64<60>),
+                      MAP(v256_movemask_8),
+                      MAP(v256_zero),
+                      MAP(v256_dup_8),
+                      MAP(v256_dup_16),
+                      MAP(v256_dup_32),
+                      MAP(v256_dup_64),
+                      MAP(v256_low_u32),
+                      MAP(v256_low_v64),
+                      MAP(v256_from_64),
+                      MAP(v256_from_v64),
+                      MAP(v256_ziplo_128),
+                      MAP(v256_ziphi_128),
+                      MAP(v256_unpacklo_u8_s16),
+                      MAP(v256_unpackhi_u8_s16),
+                      MAP(v256_unpacklo_s8_s16),
+                      MAP(v256_unpackhi_s8_s16),
+                      MAP(v256_blend_8),
+                      { nullptr, nullptr, nullptr } };
+#undef MAP
+
+// Map reference functions to machine tuned functions. Since the
+// functions depend on machine tuned types, the non-machine tuned
+// instantiations of the test can't refer to these functions directly,
+// so we refer to them by name and do the mapping here.
+void Map(const char *name, fptr *ref, fptr *simd) {
+  unsigned int i;
+  for (i = 0; m[i].name && strcmp(name, m[i].name); i++) {
+  }
+
+  *ref = m[i].ref;
+  *simd = m[i].simd;
+}
+
+// Used for printing errors in TestSimd1Arg, TestSimd2Args and TestSimd3Args
+std::string Print(const uint8_t *a, int size) {
+  std::string text = "0x";
+  for (int i = 0; i < size; i++) {
+    const uint8_t c = a[!CONFIG_BIG_ENDIAN ? size - 1 - i : i];
+    // Same as snprintf(..., ..., "%02x", c)
+    text += (c >> 4) + '0' + ((c >> 4) > 9) * ('a' - '0' - 10);
+    text += (c & 15) + '0' + ((c & 15) > 9) * ('a' - '0' - 10);
+  }
+
+  return text;
+}
+
+// Used in TestSimd1Arg, TestSimd2Args and TestSimd3Args to restrict argument
+// ranges
+void SetMask(uint8_t *s, int size, uint32_t mask, uint32_t maskwidth) {
+  switch (maskwidth) {
+    case 0: {
+      break;
+    }
+    case 8: {
+      for (int i = 0; i < size; i++) s[i] &= mask;
+      break;
+    }
+    case 16: {
+      uint16_t *t = reinterpret_cast<uint16_t *>(s);
+      assert(!(reinterpret_cast<uintptr_t>(s) & 1));
+      for (int i = 0; i < size / 2; i++) t[i] &= mask;
+      break;
+    }
+    case 32: {
+      uint32_t *t = reinterpret_cast<uint32_t *>(s);
+      assert(!(reinterpret_cast<uintptr_t>(s) & 3));
+      for (int i = 0; i < size / 4; i++) t[i] &= mask;
+      break;
+    }
+    case 64: {
+      uint64_t *t = reinterpret_cast<uint64_t *>(s);
+      assert(!(reinterpret_cast<uintptr_t>(s) & 7));
+      for (int i = 0; i < size / 8; i++) t[i] &= mask;
+      break;
+    }
+    default: {
+      FAIL() << "Unsupported mask width";
+      break;
+    }
+  }
+}
+
+// We need some extra load/store functions
+void u64_store_aligned(void *p, uint64_t a) {
+  v64_store_aligned(p, v64_from_64(a));
+}
+void s32_store_aligned(void *p, int32_t a) {
+  u32_store_aligned(p, static_cast<uint32_t>(a));
+}
+void s64_store_aligned(void *p, int64_t a) {
+  v64_store_aligned(p, v64_from_64(static_cast<uint64_t>(a)));
+}
+
+void c_u64_store_aligned(void *p, uint64_t a) {
+  c_v64_store_aligned(p, c_v64_from_64(a));
+}
+
+void c_s32_store_aligned(void *p, int32_t a) {
+  c_u32_store_aligned(p, static_cast<uint32_t>(a));
+}
+
+void c_s64_store_aligned(void *p, int64_t a) {
+  c_v64_store_aligned(p, c_v64_from_64(static_cast<uint64_t>(a)));
+}
+
+uint64_t u64_load_aligned(const void *p) {
+  return v64_u64(v64_load_aligned(p));
+}
+uint16_t u16_load_aligned(const void *p) {
+  return *(reinterpret_cast<const uint16_t *>(p));
+}
+uint8_t u8_load_aligned(const void *p) {
+  return *(reinterpret_cast<const uint8_t *>(p));
+}
+
+uint64_t c_u64_load_aligned(const void *p) {
+  return c_v64_u64(c_v64_load_aligned(p));
+}
+uint16_t c_u16_load_aligned(const void *p) {
+  return *(reinterpret_cast<const uint16_t *>(p));
+}
+uint8_t c_u8_load_aligned(const void *p) {
+  return *(reinterpret_cast<const uint8_t *>(p));
+}
+
+// CompareSimd1Arg, CompareSimd2Args and CompareSimd3Args compare
+// intrinsics taking 1, 2 or 3 arguments respectively with their
+// corresponding C reference.  Ideally, the loads and stores should
+// have gone into the template parameter list, but v64 and v128 could
+// be typedef'ed to the same type (which is the case on x86) and then
+// we can't instantiate both v64 and v128, so the function return and
+// argument types, including the always differing types in the C
+// equivalent are used instead.  The function arguments must be void
+// pointers and then go through a cast to avoid matching errors in the
+// branches eliminated by the typeid tests in the calling function.
+template <typename Ret, typename Arg, typename CRet, typename CArg>
+int CompareSimd1Arg(fptr store, fptr load, fptr simd, void *d, fptr c_store,
+                    fptr c_load, fptr c_simd, void *ref_d, const void *a) {
+  void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
+  Arg (*const my_load)(const void *) = (Arg(*const)(const void *))load;
+  Ret (*const my_simd)(Arg) = (Ret(*const)(Arg))simd;
+  void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
+  CArg (*const my_c_load)(const void *) = (CArg(*const)(const void *))c_load;
+  CRet (*const my_c_simd)(CArg) = (CRet(*const)(CArg))c_simd;
+
+  // Call reference and intrinsic
+  my_c_store(ref_d, my_c_simd(my_c_load(a)));
+  my_store(d, my_simd(my_load(a)));
+
+  // Compare results
+  return memcmp(ref_d, d, sizeof(CRet));
+}
+
+template <typename Ret, typename Arg1, typename Arg2, typename CRet,
+          typename CArg1, typename CArg2>
+int CompareSimd2Args(fptr store, fptr load1, fptr load2, fptr simd, void *d,
+                     fptr c_store, fptr c_load1, fptr c_load2, fptr c_simd,
+                     void *ref_d, const void *a, const void *b) {
+  void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
+  Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1;
+  Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2;
+  Ret (*const my_simd)(Arg1, Arg2) = (Ret(*const)(Arg1, Arg2))simd;
+  void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
+  CArg1 (*const my_c_load1)(const void *) =
+      (CArg1(*const)(const void *))c_load1;
+  CArg2 (*const my_c_load2)(const void *) =
+      (CArg2(*const)(const void *))c_load2;
+  CRet (*const my_c_simd)(CArg1, CArg2) = (CRet(*const)(CArg1, CArg2))c_simd;
+
+  // Call reference and intrinsic
+  my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b)));
+  my_store(d, my_simd(my_load1(a), my_load2(b)));
+
+  // Compare results
+  return memcmp(ref_d, d, sizeof(CRet));
+}
+
+template <typename Ret, typename Arg1, typename Arg2, typename Arg3,
+          typename CRet, typename CArg1, typename CArg2, typename CArg3>
+int CompareSimd3Args(fptr store, fptr load1, fptr load2, fptr load3, fptr simd,
+                     void *d, fptr c_store, fptr c_load1, fptr c_load2,
+                     fptr c_load3, fptr c_simd, void *ref_d, const void *a,
+                     const void *b, const void *c) {
+  void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
+  Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1;
+  Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2;
+  Arg3 (*const my_load3)(const void *) = (Arg3(*const)(const void *))load3;
+  Ret (*const my_simd)(Arg1, Arg2, Arg3) = (Ret(*const)(Arg1, Arg2, Arg3))simd;
+  void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
+  CArg1 (*const my_c_load1)(const void *) =
+      (CArg1(*const)(const void *))c_load1;
+  CArg2 (*const my_c_load2)(const void *) =
+      (CArg2(*const)(const void *))c_load2;
+  CArg3 (*const my_c_load3)(const void *) =
+      (CArg3(*const)(const void *))c_load3;
+  CRet (*const my_c_simd)(CArg1, CArg2, CArg3) =
+      (CRet(*const)(CArg1, CArg2, CArg3))c_simd;
+
+  // Call reference and intrinsic
+  my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b), my_c_load3(c)));
+  my_store(d, my_simd(my_load1(a), my_load2(b), my_load3(c)));
+
+  // Compare results
+  return memcmp(ref_d, d, sizeof(CRet));
+}
+
+}  // namespace
+
+template <typename CRet, typename CArg>
+void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
+                  const char *name) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  fptr ref_simd;
+  fptr simd;
+  int error = 0;
+  DECLARE_ALIGNED(32, uint8_t, s[32]);
+  DECLARE_ALIGNED(32, uint8_t, d[32]);
+  DECLARE_ALIGNED(32, uint8_t, ref_d[32]);
+  assert(sizeof(CArg) <= 32 && sizeof(CRet) <= 32);
+  memset(ref_d, 0, sizeof(ref_d));
+  memset(d, 0, sizeof(d));
+
+  Map(name, &ref_simd, &simd);
+  if (simd == nullptr || ref_simd == nullptr) {
+    FAIL() << "Internal error: Unknown intrinsic function " << name;
+  }
+  for (unsigned int count = 0;
+       count < iterations && !error && !testing::Test::HasFailure(); count++) {
+    for (unsigned int c = 0; c < sizeof(CArg); c++) s[c] = rnd.Rand8();
+
+    if (maskwidth) {
+      SetMask(s, sizeof(CArg), mask, maskwidth);
+    }
+
+    if (typeid(CRet) == typeid(c_v64) && typeid(CArg) == typeid(c_v64)) {
+      // V64_V64
+      error = CompareSimd1Arg<v64, v64, c_v64, c_v64>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg) == typeid(uint8_t)) {
+      // V64_U8
+      error = CompareSimd1Arg<v64, uint8_t, c_v64, uint8_t>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(u8_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg) == typeid(uint16_t)) {
+      // V64_U16
+      error = CompareSimd1Arg<v64, uint16_t, c_v64, uint16_t>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(u16_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg) == typeid(uint32_t)) {
+      // V64_U32
+      error = CompareSimd1Arg<v64, uint32_t, c_v64, uint32_t>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(uint64_t) &&
+               typeid(CArg) == typeid(c_v64)) {
+      // U64_V64
+      error = CompareSimd1Arg<uint64_t, v64, uint64_t, c_v64>(
+          reinterpret_cast<fptr>(u64_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u64_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(int64_t) &&
+               typeid(CArg) == typeid(c_v64)) {
+      // S64_V64
+      error = CompareSimd1Arg<int64_t, v64, int64_t, c_v64>(
+          reinterpret_cast<fptr>(s64_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_s64_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(uint32_t) &&
+               typeid(CArg) == typeid(c_v64)) {
+      // U32_V64
+      error = CompareSimd1Arg<uint32_t, v64, uint32_t, c_v64>(
+          reinterpret_cast<fptr>(u32_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u32_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(int32_t) &&
+               typeid(CArg) == typeid(c_v64)) {
+      // S32_V64
+      error = CompareSimd1Arg<int32_t, v64, int32_t, c_v64>(
+          reinterpret_cast<fptr>(s32_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_s32_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(uint32_t) &&
+               typeid(CArg) == typeid(c_v128)) {
+      // U32_V128
+      error = CompareSimd1Arg<uint32_t, v128, uint32_t, c_v128>(
+          reinterpret_cast<fptr>(u32_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u32_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(uint64_t) &&
+               typeid(CArg) == typeid(c_v128)) {
+      // U64_V128
+      error = CompareSimd1Arg<uint64_t, v128, uint64_t, c_v128>(
+          reinterpret_cast<fptr>(u64_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u64_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(uint64_t) &&
+               typeid(CArg) == typeid(c_v256)) {
+      // U64_V256
+      error = CompareSimd1Arg<uint64_t, v256, uint64_t, c_v256>(
+          reinterpret_cast<fptr>(u64_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u64_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg) == typeid(c_v128)) {
+      // V64_V128
+      error = CompareSimd1Arg<v64, v128, c_v64, c_v128>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg) == typeid(c_v128)) {
+      // V128_V128
+      error = CompareSimd1Arg<v128, v128, c_v128, c_v128>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg) == typeid(c_v64)) {
+      // V128_V64
+      error = CompareSimd1Arg<v128, v64, c_v128, c_v64>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg) == typeid(uint8_t)) {
+      // V128_U8
+      error = CompareSimd1Arg<v128, uint8_t, c_v128, uint8_t>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(u8_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg) == typeid(uint16_t)) {
+      // V128_U16
+      error = CompareSimd1Arg<v128, uint16_t, c_v128, uint16_t>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(u16_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg) == typeid(uint32_t)) {
+      // V128_U32
+      error = CompareSimd1Arg<v128, uint32_t, c_v128, uint32_t>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg) == typeid(uint64_t)) {
+      // V128_U64
+      error = CompareSimd1Arg<v128, uint64_t, c_v128, uint64_t>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(u64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_u64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg) == typeid(c_v256)) {
+      // V256_V256
+      error = CompareSimd1Arg<v256, v256, c_v256, c_v256>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg) == typeid(c_v128)) {
+      // V256_V128
+      error = CompareSimd1Arg<v256, v128, c_v256, c_v128>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg) == typeid(uint8_t)) {
+      // V256_U8
+      error = CompareSimd1Arg<v256, uint8_t, c_v256, uint8_t>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(u8_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg) == typeid(uint16_t)) {
+      // V256_U16
+      error = CompareSimd1Arg<v256, uint16_t, c_v256, uint16_t>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(u16_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg) == typeid(uint32_t)) {
+      // V256_U32
+      error = CompareSimd1Arg<v256, uint32_t, c_v256, uint32_t>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg) == typeid(uint64_t)) {
+      // V256_U64
+      error = CompareSimd1Arg<v256, uint64_t, c_v256, uint64_t>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(u64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_u64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(uint32_t) &&
+               typeid(CArg) == typeid(c_v256)) {
+      // U32_V256
+      error = CompareSimd1Arg<uint32_t, v256, uint32_t, c_v256>(
+          reinterpret_cast<fptr>(u32_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u32_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg) == typeid(c_v256)) {
+      // V64_V256
+      error = CompareSimd1Arg<v64, v256, c_v64, c_v256>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
+    } else {
+      FAIL() << "Internal error: Unknown intrinsic function "
+             << typeid(CRet).name() << " " << name << "(" << typeid(CArg).name()
+             << ")";
+    }
+  }
+
+  EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
+                      << Print(s, sizeof(CArg)) << ") -> "
+                      << Print(d, sizeof(CRet)) << " (simd), "
+                      << Print(ref_d, sizeof(CRet)) << " (ref)";
+}
+
+template <typename CRet, typename CArg1, typename CArg2>
+void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
+                   const char *name) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  fptr ref_simd;
+  fptr simd;
+  int error = 0;
+  DECLARE_ALIGNED(32, uint8_t, s1[32]);
+  DECLARE_ALIGNED(32, uint8_t, s2[32]);
+  DECLARE_ALIGNED(32, uint8_t, d[32]);
+  DECLARE_ALIGNED(32, uint8_t, ref_d[32]);
+  assert(sizeof(CArg1) <= 32 && sizeof(CArg2) <= 32 && sizeof(CRet) <= 32);
+  memset(ref_d, 0, sizeof(ref_d));
+  memset(d, 0, sizeof(d));
+
+  Map(name, &ref_simd, &simd);
+  if (simd == nullptr || ref_simd == nullptr) {
+    FAIL() << "Internal error: Unknown intrinsic function " << name;
+  }
+
+  for (unsigned int count = 0;
+       count < iterations && !error && !testing::Test::HasFailure(); count++) {
+    for (unsigned int c = 0; c < sizeof(CArg1); c++) s1[c] = rnd.Rand8();
+
+    for (unsigned int c = 0; c < sizeof(CArg2); c++) s2[c] = rnd.Rand8();
+
+    if (maskwidth) SetMask(s2, sizeof(CArg2), mask, maskwidth);
+
+    if (typeid(CRet) == typeid(c_v64) && typeid(CArg1) == typeid(c_v64) &&
+        typeid(CArg2) == typeid(c_v64)) {
+      // V64_V64V64
+      error = CompareSimd2Args<v64, v64, v64, c_v64, c_v64, c_v64>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg1) == typeid(uint32_t) &&
+               typeid(CArg2) == typeid(uint32_t)) {
+      // V64_U32U32
+      error =
+          CompareSimd2Args<v64, uint32_t, uint32_t, c_v64, uint32_t, uint32_t>(
+              reinterpret_cast<fptr>(v64_store_aligned),
+              reinterpret_cast<fptr>(u32_load_aligned),
+              reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+              reinterpret_cast<fptr>(c_v64_store_aligned),
+              reinterpret_cast<fptr>(c_u32_load_aligned),
+              reinterpret_cast<fptr>(c_u32_load_aligned),
+              reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(uint32_t) &&
+               typeid(CArg1) == typeid(c_v64) &&
+               typeid(CArg2) == typeid(c_v64)) {
+      // U32_V64V64
+      error = CompareSimd2Args<uint32_t, v64, v64, uint32_t, c_v64, c_v64>(
+          reinterpret_cast<fptr>(u32_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u32_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(int64_t) &&
+               typeid(CArg1) == typeid(c_v64) &&
+               typeid(CArg2) == typeid(c_v64)) {
+      // S64_V64V64
+      error = CompareSimd2Args<int64_t, v64, v64, int64_t, c_v64, c_v64>(
+          reinterpret_cast<fptr>(s64_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_s64_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg1) == typeid(c_v64) &&
+               typeid(CArg2) == typeid(uint32_t)) {
+      // V64_V64U32
+      error = CompareSimd2Args<v64, v64, uint32_t, c_v64, c_v64, uint32_t>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg1) == typeid(c_v128) &&
+               typeid(CArg2) == typeid(c_v128)) {
+      // V128_V128V128
+      error = CompareSimd2Args<v128, v128, v128, c_v128, c_v128, c_v128>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(uint32_t) &&
+               typeid(CArg1) == typeid(c_v128) &&
+               typeid(CArg2) == typeid(c_v128)) {
+      // U32_V128V128
+      error = CompareSimd2Args<uint32_t, v128, v128, uint32_t, c_v128, c_v128>(
+          reinterpret_cast<fptr>(u32_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u32_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(uint64_t) &&
+               typeid(CArg1) == typeid(c_v128) &&
+               typeid(CArg2) == typeid(c_v128)) {
+      // U64_V128V128
+      error = CompareSimd2Args<uint64_t, v128, v128, uint64_t, c_v128, c_v128>(
+          reinterpret_cast<fptr>(u64_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u64_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(int64_t) &&
+               typeid(CArg1) == typeid(c_v128) &&
+               typeid(CArg2) == typeid(c_v128)) {
+      // S64_V128V128
+      error = CompareSimd2Args<int64_t, v128, v128, int64_t, c_v128, c_v128>(
+          reinterpret_cast<fptr>(s64_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_s64_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg1) == typeid(uint64_t) &&
+               typeid(CArg2) == typeid(uint64_t)) {
+      // V128_U64U64
+      error = CompareSimd2Args<v128, uint64_t, uint64_t, c_v128, uint64_t,
+                               uint64_t>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(u64_load_aligned),
+          reinterpret_cast<fptr>(u64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_u64_load_aligned),
+          reinterpret_cast<fptr>(c_u64_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg1) == typeid(c_v64) &&
+               typeid(CArg2) == typeid(c_v64)) {
+      // V128_V64V64
+      error = CompareSimd2Args<v128, v64, v64, c_v128, c_v64, c_v64>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg1) == typeid(c_v128) &&
+               typeid(CArg2) == typeid(uint32_t)) {
+      // V128_V128U32
+      error = CompareSimd2Args<v128, v128, uint32_t, c_v128, c_v128, uint32_t>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg1) == typeid(c_v256) &&
+               typeid(CArg2) == typeid(c_v256)) {
+      // V256_V256V256
+      error = CompareSimd2Args<v256, v256, v256, c_v256, c_v256, c_v256>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(uint64_t) &&
+               typeid(CArg1) == typeid(c_v256) &&
+               typeid(CArg2) == typeid(c_v256)) {
+      // U64_V256V256
+      error = CompareSimd2Args<uint64_t, v256, v256, uint64_t, c_v256, c_v256>(
+          reinterpret_cast<fptr>(u64_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u64_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(int64_t) &&
+               typeid(CArg1) == typeid(c_v256) &&
+               typeid(CArg2) == typeid(c_v256)) {
+      // S64_V256V256
+      error = CompareSimd2Args<int64_t, v256, v256, int64_t, c_v256, c_v256>(
+          reinterpret_cast<fptr>(s64_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_s64_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(uint32_t) &&
+               typeid(CArg1) == typeid(c_v256) &&
+               typeid(CArg2) == typeid(c_v256)) {
+      // U32_V256V256
+      error = CompareSimd2Args<uint32_t, v256, v256, uint32_t, c_v256, c_v256>(
+          reinterpret_cast<fptr>(u32_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u32_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg1) == typeid(c_v128) &&
+               typeid(CArg2) == typeid(c_v128)) {
+      // V256_V128V128
+      error = CompareSimd2Args<v256, v128, v128, c_v256, c_v128, c_v128>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg1) == typeid(c_v256) &&
+               typeid(CArg2) == typeid(uint32_t)) {
+      // V256_V256U32
+      error = CompareSimd2Args<v256, v256, uint32_t, c_v256, c_v256, uint32_t>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+
+    } else {
+      FAIL() << "Internal error: Unknown intrinsic function "
+             << typeid(CRet).name() << " " << name << "("
+             << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ")";
+    }
+  }
+
+  EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
+                      << Print(s1, sizeof(CArg1)) << ", "
+                      << Print(s2, sizeof(CArg2)) << ") -> "
+                      << Print(d, sizeof(CRet)) << " (simd), "
+                      << Print(ref_d, sizeof(CRet)) << " (ref)";
+}
+
+template <typename CRet, typename CArg1, typename CArg2, typename CArg3>
+void TestSimd3Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
+                   const char *name) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  fptr ref_simd;
+  fptr simd;
+  int error = 0;
+  DECLARE_ALIGNED(32, uint8_t, s1[32]);
+  DECLARE_ALIGNED(32, uint8_t, s2[32]);
+  DECLARE_ALIGNED(32, uint8_t, s3[32]);
+  DECLARE_ALIGNED(32, uint8_t, d[32]);
+  DECLARE_ALIGNED(32, uint8_t, ref_d[32]);
+  assert(sizeof(CArg1) <= 32 && sizeof(CArg2) <= 32 && sizeof(CArg3) <= 32 &&
+         sizeof(CRet) <= 32);
+  memset(ref_d, 0, sizeof(ref_d));
+  memset(d, 0, sizeof(d));
+
+  Map(name, &ref_simd, &simd);
+  if (simd == nullptr || ref_simd == nullptr) {
+    FAIL() << "Internal error: Unknown intrinsic function " << name;
+  }
+
+  for (unsigned int count = 0;
+       count < iterations && !error && !testing::Test::HasFailure(); count++) {
+    for (unsigned int c = 0; c < sizeof(CArg1); c++) s1[c] = rnd.Rand8();
+
+    for (unsigned int c = 0; c < sizeof(CArg2); c++) s2[c] = rnd.Rand8();
+
+    for (unsigned int c = 0; c < sizeof(CArg3); c++) s3[c] = rnd.Rand8();
+
+    if (maskwidth) SetMask(s3, sizeof(CArg3), mask, maskwidth);
+
+    if (typeid(CRet) == typeid(c_v128) && typeid(CArg1) == typeid(c_v128) &&
+        typeid(CArg2) == typeid(c_v128) && typeid(CArg3) == typeid(c_v128)) {
+      // V128_V128V128V128
+      error = CompareSimd3Args<v128, v128, v128, v128, c_v128, c_v128, c_v128,
+                               c_v128>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2, s3);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg1) == typeid(c_v256) &&
+               typeid(CArg2) == typeid(c_v256) &&
+               typeid(CArg3) == typeid(c_v256)) {
+      // V256_V256V256V256
+      error = CompareSimd3Args<v256, v256, v256, v256, c_v256, c_v256, c_v256,
+                               c_v256>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2, s3);
+    } else {
+      FAIL() << "Internal error: Unknown intrinsic function "
+             << typeid(CRet).name() << " " << name << "("
+             << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ", "
+             << typeid(CArg3).name() << ")";
+    }
+  }
+
+  EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
+                      << Print(s1, sizeof(CArg1)) << ", "
+                      << Print(s2, sizeof(CArg2)) << ", "
+                      << Print(s3, sizeof(CArg3)) << ") -> "
+                      << Print(d, sizeof(CRet)) << " (simd), "
+                      << Print(ref_d, sizeof(CRet)) << " (ref)";
+}
+
+// Instantiations to make the functions callable from another files
+template void TestSimd1Arg<c_v64, uint8_t>(uint32_t, uint32_t, uint32_t,
+                                           const char *);
+template void TestSimd1Arg<c_v64, uint16_t>(uint32_t, uint32_t, uint32_t,
+                                            const char *);
+template void TestSimd1Arg<c_v64, uint32_t>(uint32_t, uint32_t, uint32_t,
+                                            const char *);
+template void TestSimd1Arg<c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
+                                         const char *);
+template void TestSimd1Arg<uint32_t, c_v64>(uint32_t, uint32_t, uint32_t,
+                                            const char *);
+template void TestSimd1Arg<int32_t, c_v64>(uint32_t, uint32_t, uint32_t,
+                                           const char *);
+template void TestSimd1Arg<uint64_t, c_v64>(uint32_t, uint32_t, uint32_t,
+                                            const char *);
+template void TestSimd1Arg<int64_t, c_v64>(uint32_t, uint32_t, uint32_t,
+                                           const char *);
+template void TestSimd2Args<c_v64, uint32_t, uint32_t>(uint32_t, uint32_t,
+                                                       uint32_t, const char *);
+template void TestSimd2Args<c_v64, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
+                                                 const char *);
+template void TestSimd2Args<c_v64, c_v64, uint32_t>(uint32_t, uint32_t,
+                                                    uint32_t, const char *);
+template void TestSimd2Args<int64_t, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
+                                                   const char *);
+template void TestSimd2Args<uint32_t, c_v64, c_v64>(uint32_t, uint32_t,
+                                                    uint32_t, const char *);
+template void TestSimd1Arg<c_v128, c_v128>(uint32_t, uint32_t, uint32_t,
+                                           const char *);
+template void TestSimd1Arg<c_v128, uint8_t>(uint32_t, uint32_t, uint32_t,
+                                            const char *);
+template void TestSimd1Arg<c_v128, uint16_t>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v128, uint32_t>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v128, uint64_t>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v128, c_v64>(uint32_t, uint32_t, uint32_t,
+                                          const char *);
+template void TestSimd1Arg<uint32_t, c_v128>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<uint64_t, c_v128>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v64, c_v128>(uint32_t, uint32_t, uint32_t,
+                                          const char *);
+template void TestSimd2Args<c_v128, c_v128, c_v128>(uint32_t, uint32_t,
+                                                    uint32_t, const char *);
+template void TestSimd2Args<c_v128, c_v128, uint32_t>(uint32_t, uint32_t,
+                                                      uint32_t, const char *);
+template void TestSimd2Args<c_v128, uint64_t, uint64_t>(uint32_t, uint32_t,
+                                                        uint32_t, const char *);
+template void TestSimd2Args<c_v128, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
+                                                  const char *);
+template void TestSimd2Args<uint64_t, c_v128, c_v128>(uint32_t, uint32_t,
+                                                      uint32_t, const char *);
+template void TestSimd2Args<int64_t, c_v128, c_v128>(uint32_t, uint32_t,
+                                                     uint32_t, const char *);
+template void TestSimd2Args<uint32_t, c_v128, c_v128>(uint32_t, uint32_t,
+                                                      uint32_t, const char *);
+template void TestSimd3Args<c_v128, c_v128, c_v128, c_v128>(uint32_t, uint32_t,
+                                                            uint32_t,
+                                                            const char *);
+template void TestSimd1Arg<c_v256, c_v128>(uint32_t, uint32_t, uint32_t,
+                                           const char *);
+template void TestSimd1Arg<c_v256, c_v256>(uint32_t, uint32_t, uint32_t,
+                                           const char *);
+template void TestSimd1Arg<uint64_t, c_v256>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v256, uint8_t>(uint32_t, uint32_t, uint32_t,
+                                            const char *);
+template void TestSimd1Arg<c_v256, uint16_t>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v256, uint32_t>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v256, uint64_t>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<uint32_t, c_v256>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v64, c_v256>(uint32_t, uint32_t, uint32_t,
+                                          const char *);
+template void TestSimd2Args<c_v256, c_v128, c_v128>(uint32_t, uint32_t,
+                                                    uint32_t, const char *);
+template void TestSimd2Args<c_v256, c_v256, c_v256>(uint32_t, uint32_t,
+                                                    uint32_t, const char *);
+template void TestSimd2Args<c_v256, c_v256, uint32_t>(uint32_t, uint32_t,
+                                                      uint32_t, const char *);
+template void TestSimd2Args<uint64_t, c_v256, c_v256>(uint32_t, uint32_t,
+                                                      uint32_t, const char *);
+template void TestSimd2Args<int64_t, c_v256, c_v256>(uint32_t, uint32_t,
+                                                     uint32_t, const char *);
+template void TestSimd2Args<uint32_t, c_v256, c_v256>(uint32_t, uint32_t,
+                                                      uint32_t, const char *);
+template void TestSimd3Args<c_v256, c_v256, c_v256, c_v256>(uint32_t, uint32_t,
+                                                            uint32_t,
+                                                            const char *);
+
+}  // namespace SIMD_NAMESPACE
diff --git a/third_party/aom/test/simd_cmp_sse2.cc b/third_party/aom/test/simd_cmp_sse2.cc
new file mode 100644
index 0000000000..f7827a7fa1
--- /dev/null
+++ b/third_party/aom/test/simd_cmp_sse2.cc
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
+    (!defined(__GNUC__) && !defined(_DEBUG))
+#define ARCH SSE2
+#define ARCH_POSTFIX(name) name##_sse2
+#define SIMD_NAMESPACE simd_test_sse2
+#include "test/simd_cmp_impl.h"
+#endif
diff --git a/third_party/aom/test/simd_cmp_sse4.cc b/third_party/aom/test/simd_cmp_sse4.cc
new file mode 100644
index 0000000000..3566764b64
--- /dev/null
+++ b/third_party/aom/test/simd_cmp_sse4.cc
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
+    (!defined(__GNUC__) && !defined(_DEBUG))
+#define ARCH SSE4_1
+#define ARCH_POSTFIX(name) name##_sse4_1
+#define SIMD_NAMESPACE simd_test_sse4_1
+#include "test/simd_cmp_impl.h"
+#endif
diff --git a/third_party/aom/test/simd_cmp_ssse3.cc b/third_party/aom/test/simd_cmp_ssse3.cc
new file mode 100644
index 0000000000..57bf135ddb
--- /dev/null
+++ b/third_party/aom/test/simd_cmp_ssse3.cc
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
+    (!defined(__GNUC__) && !defined(_DEBUG))
+#define ARCH SSSE3
+#define ARCH_POSTFIX(name) name##_ssse3
+#define SIMD_NAMESPACE simd_test_ssse3
+#include "test/simd_cmp_impl.h"
+#endif
diff --git a/third_party/aom/test/simd_impl.h b/third_party/aom/test/simd_impl.h
new file mode 100644
index 0000000000..b564a7f4b3
--- /dev/null
+++ b/third_party/aom/test/simd_impl.h
@@ -0,0 +1,1140 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tuple>
+
+#define SIMD_CHECK 1
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/register_state_check.h"
+#include "aom_dsp/aom_simd_inline.h"
+#include "aom_dsp/simd/v256_intrinsics_c.h"
+
+namespace SIMD_NAMESPACE {
+
+template <typename param_signature>
+class TestIntrinsic : public ::testing::TestWithParam<param_signature> {
+ public:
+  ~TestIntrinsic() override = default;
+  void SetUp() override {
+    mask = std::get<0>(this->GetParam());
+    maskwidth = std::get<1>(this->GetParam());
+    name = std::get<2>(this->GetParam());
+  }
+
+ protected:
+  uint32_t mask, maskwidth;
+  const char *name;
+};
+
+// Create one typedef for each function signature
+#define TYPEDEF_SIMD(name)                                             \
+  typedef TestIntrinsic<std::tuple<uint32_t, uint32_t, const char *> > \
+      ARCH_POSTFIX(name)
+
+TYPEDEF_SIMD(V64_U8);
+TYPEDEF_SIMD(V64_U16);
+TYPEDEF_SIMD(V64_U32);
+TYPEDEF_SIMD(V64_V64);
+TYPEDEF_SIMD(U32_V64);
+TYPEDEF_SIMD(S32_V64);
+TYPEDEF_SIMD(U64_V64);
+TYPEDEF_SIMD(S64_V64);
+TYPEDEF_SIMD(V64_U32U32);
+TYPEDEF_SIMD(V64_V64V64);
+TYPEDEF_SIMD(S64_V64V64);
+TYPEDEF_SIMD(V64_V64U32);
+TYPEDEF_SIMD(U32_V64V64);
+TYPEDEF_SIMD(V128_V64);
+TYPEDEF_SIMD(V128_V128);
+TYPEDEF_SIMD(U32_V128);
+TYPEDEF_SIMD(U64_V128);
+TYPEDEF_SIMD(V64_V128);
+TYPEDEF_SIMD(V128_U8);
+TYPEDEF_SIMD(V128_U16);
+TYPEDEF_SIMD(V128_U32);
+TYPEDEF_SIMD(V128_U64);
+TYPEDEF_SIMD(V128_U64U64);
+TYPEDEF_SIMD(V128_V64V64);
+TYPEDEF_SIMD(V128_V128V128);
+TYPEDEF_SIMD(V128_V128V128V128);
+TYPEDEF_SIMD(S64_V128V128);
+TYPEDEF_SIMD(V128_V128U32);
+TYPEDEF_SIMD(U32_V128V128);
+TYPEDEF_SIMD(U64_V128V128);
+TYPEDEF_SIMD(V256_V128);
+TYPEDEF_SIMD(V256_V256);
+TYPEDEF_SIMD(U64_V256);
+TYPEDEF_SIMD(V256_V128V128);
+TYPEDEF_SIMD(V256_V256V256);
+TYPEDEF_SIMD(V256_V256V256V256);
+TYPEDEF_SIMD(U64_V256V256);
+TYPEDEF_SIMD(S64_V256V256);
+TYPEDEF_SIMD(V256_V256U32);
+TYPEDEF_SIMD(U32_V256V256);
+TYPEDEF_SIMD(V256_U8);
+TYPEDEF_SIMD(V256_U16);
+TYPEDEF_SIMD(V256_U32);
+TYPEDEF_SIMD(V256_U64);
+TYPEDEF_SIMD(U32_V256);
+TYPEDEF_SIMD(V64_V256);
+
+// Google Test allows up to 50 tests per case, so split the largest
+typedef ARCH_POSTFIX(V64_V64) ARCH_POSTFIX(V64_V64_Part2);
+typedef ARCH_POSTFIX(V64_V64V64) ARCH_POSTFIX(V64_V64V64_Part2);
+typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part2);
+typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part3);
+typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part4);
+typedef ARCH_POSTFIX(V128_V128V128) ARCH_POSTFIX(V128_V128V128_Part2);
+typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part2);
+typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part3);
+typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part4);
+typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part5);
+typedef ARCH_POSTFIX(V256_V256V256) ARCH_POSTFIX(V256_V256V256_Part2);
+
+// These functions are machine tuned located elsewhere
+template <typename c_ret, typename c_arg>
+void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
+                  const char *name);
+
+template <typename c_ret, typename c_arg1, typename c_arg2>
+void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
+                   const char *name);
+
+template <typename c_ret, typename c_arg1, typename c_arg2, typename c_arg3>
+void TestSimd3Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
+                   const char *name);
+
+const int kIterations = 65536;
+
+// Add a macro layer since TEST_P will quote the name so we need to
+// expand it first with the prefix.
+#define MY_TEST_P(name, test) TEST_P(name, test)
+
+MY_TEST_P(ARCH_POSTFIX(V64_U8), TestIntrinsics) {
+  TestSimd1Arg<c_v64, uint8_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_U16), TestIntrinsics) {
+  TestSimd1Arg<c_v64, uint16_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_U32), TestIntrinsics) {
+  TestSimd1Arg<c_v64, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_V64), TestIntrinsics) {
+  TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U64_V64), TestIntrinsics) {
+  TestSimd1Arg<uint64_t, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(S64_V64), TestIntrinsics) {
+  TestSimd1Arg<int64_t, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U32_V64), TestIntrinsics) {
+  TestSimd1Arg<uint32_t, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(S32_V64), TestIntrinsics) {
+  TestSimd1Arg<int32_t, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_U32U32), TestIntrinsics) {
+  TestSimd2Args<c_v64, uint32_t, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_V64V64), TestIntrinsics) {
+  TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(S64_V64V64), TestIntrinsics) {
+  TestSimd2Args<int64_t, c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U32_V64V64), TestIntrinsics) {
+  TestSimd2Args<uint32_t, c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_V64U32), TestIntrinsics) {
+  TestSimd2Args<c_v64, c_v64, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+// Google Test allows up to 50 tests per case, so split the largest
+MY_TEST_P(ARCH_POSTFIX(V64_V64_Part2), TestIntrinsics) {
+  TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_V64V64_Part2), TestIntrinsics) {
+  TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U32_V128), TestIntrinsics) {
+  TestSimd1Arg<uint32_t, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U64_V128), TestIntrinsics) {
+  TestSimd1Arg<uint64_t, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_V128), TestIntrinsics) {
+  TestSimd1Arg<c_v64, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128), TestIntrinsics) {
+  TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_U8), TestIntrinsics) {
+  TestSimd1Arg<c_v128, uint8_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_U16), TestIntrinsics) {
+  TestSimd1Arg<c_v128, uint16_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_U32), TestIntrinsics) {
+  TestSimd1Arg<c_v128, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_U64), TestIntrinsics) {
+  TestSimd1Arg<c_v128, uint64_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V64), TestIntrinsics) {
+  TestSimd1Arg<c_v128, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128V128), TestIntrinsics) {
+  TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128V128V128), TestIntrinsics) {
+  TestSimd3Args<c_v128, c_v128, c_v128, c_v128>(kIterations, mask, maskwidth,
+                                                name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U32_V128V128), TestIntrinsics) {
+  TestSimd2Args<uint32_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U64_V128V128), TestIntrinsics) {
+  TestSimd2Args<uint64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(S64_V128V128), TestIntrinsics) {
+  TestSimd2Args<int64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_U64U64), TestIntrinsics) {
+  TestSimd2Args<c_v128, uint64_t, uint64_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V64V64), TestIntrinsics) {
+  TestSimd2Args<c_v128, c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128U32), TestIntrinsics) {
+  TestSimd2Args<c_v128, c_v128, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128V128_Part2), TestIntrinsics) {
+  TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128_Part2), TestIntrinsics) {
+  TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128_Part3), TestIntrinsics) {
+  TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128_Part4), TestIntrinsics) {
+  TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U64_V256), TestIntrinsics) {
+  TestSimd1Arg<uint64_t, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256), TestIntrinsics) {
+  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V128), TestIntrinsics) {
+  TestSimd1Arg<c_v256, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256V256), TestIntrinsics) {
+  TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256V256V256), TestIntrinsics) {
+  TestSimd3Args<c_v256, c_v256, c_v256, c_v256>(kIterations, mask, maskwidth,
+                                                name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V128V128), TestIntrinsics) {
+  TestSimd2Args<c_v256, c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U32_V256V256), TestIntrinsics) {
+  TestSimd2Args<uint32_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U64_V256V256), TestIntrinsics) {
+  TestSimd2Args<uint64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(S64_V256V256), TestIntrinsics) {
+  TestSimd2Args<int64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256V256_Part2), TestIntrinsics) {
+  TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256U32), TestIntrinsics) {
+  TestSimd2Args<c_v256, c_v256, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256_Part2), TestIntrinsics) {
+  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256_Part3), TestIntrinsics) {
+  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256_Part4), TestIntrinsics) {
+  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256_Part5), TestIntrinsics) {
+  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_U8), TestIntrinsics) {
+  TestSimd1Arg<c_v256, uint8_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_U16), TestIntrinsics) {
+  TestSimd1Arg<c_v256, uint16_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_U32), TestIntrinsics) {
+  TestSimd1Arg<c_v256, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_U64), TestIntrinsics) {
+  TestSimd1Arg<c_v256, uint64_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U32_V256), TestIntrinsics) {
+  TestSimd1Arg<uint32_t, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_V256), TestIntrinsics) {
+  TestSimd1Arg<c_v64, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+// Add a macro layer since INSTANTIATE_TEST_SUITE_P will quote the name
+// so we need to expand it first with the prefix
+#define INSTANTIATE(name, type, ...) \
+  INSTANTIATE_TEST_SUITE_P(name, type, ::testing::Values(__VA_ARGS__))
+
+#define SIMD_TUPLE(name, mask, maskwidth) \
+  std::make_tuple(mask, maskwidth, static_cast<const char *>(#name))
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64V64), SIMD_TUPLE(v64_sad_u8, 0U, 0U),
+            SIMD_TUPLE(v64_ssd_u8, 0U, 0U));
+
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V64_V64V64), SIMD_TUPLE(v64_add_8, 0U, 0U),
+    SIMD_TUPLE(v64_add_16, 0U, 0U), SIMD_TUPLE(v64_sadd_s16, 0U, 0U),
+    SIMD_TUPLE(v64_add_32, 0U, 0U), SIMD_TUPLE(v64_sub_8, 0U, 0U),
+    SIMD_TUPLE(v64_ssub_u8, 0U, 0U), SIMD_TUPLE(v64_ssub_s8, 0U, 0U),
+    SIMD_TUPLE(v64_sub_16, 0U, 0U), SIMD_TUPLE(v64_ssub_s16, 0U, 0U),
+    SIMD_TUPLE(v64_ssub_u16, 0U, 0U), SIMD_TUPLE(v64_sub_32, 0U, 0U),
+    SIMD_TUPLE(v64_ziplo_8, 0U, 0U), SIMD_TUPLE(v64_ziphi_8, 0U, 0U),
+    SIMD_TUPLE(v64_ziplo_16, 0U, 0U), SIMD_TUPLE(v64_ziphi_16, 0U, 0U),
+    SIMD_TUPLE(v64_ziplo_32, 0U, 0U), SIMD_TUPLE(v64_ziphi_32, 0U, 0U),
+    SIMD_TUPLE(v64_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v64_pack_s16_u8, 0U, 0U),
+    SIMD_TUPLE(v64_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v64_unziphi_8, 0U, 0U),
+    SIMD_TUPLE(v64_unziplo_8, 0U, 0U), SIMD_TUPLE(v64_unziphi_16, 0U, 0U),
+    SIMD_TUPLE(v64_unziplo_16, 0U, 0U), SIMD_TUPLE(v64_or, 0U, 0U),
+    SIMD_TUPLE(v64_xor, 0U, 0U), SIMD_TUPLE(v64_and, 0U, 0U),
+    SIMD_TUPLE(v64_andn, 0U, 0U), SIMD_TUPLE(v64_mullo_s16, 0U, 0U),
+    SIMD_TUPLE(v64_mulhi_s16, 0U, 0U), SIMD_TUPLE(v64_mullo_s32, 0U, 0U),
+    SIMD_TUPLE(v64_madd_s16, 0U, 0U), SIMD_TUPLE(v64_madd_us8, 0U, 0U),
+    SIMD_TUPLE(v64_avg_u8, 0U, 0U), SIMD_TUPLE(v64_rdavg_u8, 0U, 0U),
+    SIMD_TUPLE(v64_avg_u16, 0U, 0U), SIMD_TUPLE(v64_min_u8, 0U, 0U),
+    SIMD_TUPLE(v64_max_u8, 0U, 0U), SIMD_TUPLE(v64_min_s8, 0U, 0U),
+    SIMD_TUPLE(v64_max_s8, 0U, 0U), SIMD_TUPLE(v64_min_s16, 0U, 0U),
+    SIMD_TUPLE(v64_max_s16, 0U, 0U), SIMD_TUPLE(v64_cmpgt_s8, 0U, 0U),
+    SIMD_TUPLE(v64_cmplt_s8, 0U, 0U), SIMD_TUPLE(v64_cmpeq_8, 0U, 0U),
+    SIMD_TUPLE(v64_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v64_cmplt_s16, 0U, 0U),
+    SIMD_TUPLE(v64_cmpeq_16, 0U, 0U));
+
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V64_V64V64_Part2), SIMD_TUPLE(v64_shuffle_8, 7U, 8U),
+    SIMD_TUPLE(v64_pack_s32_u16, 0U, 0U), SIMD_TUPLE(v64_rdavg_u16, 0U, 0U),
+    SIMD_TUPLE(v64_sadd_s8, 0U, 0U), SIMD_TUPLE(v64_sadd_u8, 0U, 0U),
+    SIMD_TUPLE(imm_v64_align<1>, 0U, 0U), SIMD_TUPLE(imm_v64_align<2>, 0U, 0U),
+    SIMD_TUPLE(imm_v64_align<3>, 0U, 0U), SIMD_TUPLE(imm_v64_align<4>, 0U, 0U),
+    SIMD_TUPLE(imm_v64_align<5>, 0U, 0U), SIMD_TUPLE(imm_v64_align<6>, 0U, 0U),
+    SIMD_TUPLE(imm_v64_align<7>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64), SIMD_TUPLE(v64_abs_s8, 0U, 0U),
+            SIMD_TUPLE(v64_abs_s16, 0U, 0U),
+            SIMD_TUPLE(v64_unpacklo_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v64_unpackhi_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v64_unpacklo_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v64_unpackhi_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v64_unpacklo_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v64_unpacklo_s16_s32, 0U, 0U),
+            SIMD_TUPLE(v64_unpackhi_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v64_unpackhi_s16_s32, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<8>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64_Part2),
+            SIMD_TUPLE(imm_v64_shl_n_16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<28>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64U32), SIMD_TUPLE(v64_shl_8, 7U, 32U),
+            SIMD_TUPLE(v64_shr_u8, 7U, 32U), SIMD_TUPLE(v64_shr_s8, 7U, 32U),
+            SIMD_TUPLE(v64_shl_16, 15U, 32U), SIMD_TUPLE(v64_shr_u16, 15U, 32U),
+            SIMD_TUPLE(v64_shr_s16, 15U, 32U), SIMD_TUPLE(v64_shl_32, 31U, 32U),
+            SIMD_TUPLE(v64_shr_u32, 31U, 32U),
+            SIMD_TUPLE(v64_shr_s32, 31U, 32U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V64), SIMD_TUPLE(v64_hadd_u8, 0U, 0U),
+            SIMD_TUPLE(v64_u64, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64), SIMD_TUPLE(v64_hadd_s16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64), SIMD_TUPLE(v64_low_u32, 0U, 0U),
+            SIMD_TUPLE(v64_high_u32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(S32_V64), SIMD_TUPLE(v64_low_s32, 0U, 0U),
+            SIMD_TUPLE(v64_high_s32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64V64), SIMD_TUPLE(v64_dotp_s16, 0U, 0U),
+            SIMD_TUPLE(v64_dotp_su8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U8), SIMD_TUPLE(v64_dup_8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U16), SIMD_TUPLE(v64_dup_16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32), SIMD_TUPLE(v64_dup_32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32U32), SIMD_TUPLE(v64_from_32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128V128), SIMD_TUPLE(v128_sad_u8, 0U, 0U),
+            SIMD_TUPLE(v128_ssd_u8, 0U, 0U), SIMD_TUPLE(v128_sad_u16, 0U, 0U));
+INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128V128), SIMD_TUPLE(v128_ssd_s16, 0U, 0U));
+
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V128_V128V128), SIMD_TUPLE(v128_add_8, 0U, 0U),
+    SIMD_TUPLE(v128_add_16, 0U, 0U), SIMD_TUPLE(v128_sadd_s16, 0U, 0U),
+    SIMD_TUPLE(v128_add_32, 0U, 0U), SIMD_TUPLE(v128_sub_8, 0U, 0U),
+    SIMD_TUPLE(v128_ssub_u8, 0U, 0U), SIMD_TUPLE(v128_ssub_s8, 0U, 0U),
+    SIMD_TUPLE(v128_sub_16, 0U, 0U), SIMD_TUPLE(v128_ssub_s16, 0U, 0U),
+    SIMD_TUPLE(v128_ssub_u16, 0U, 0U), SIMD_TUPLE(v128_sub_32, 0U, 0U),
+    SIMD_TUPLE(v128_ziplo_8, 0U, 0U), SIMD_TUPLE(v128_ziphi_8, 0U, 0U),
+    SIMD_TUPLE(v128_ziplo_16, 0U, 0U), SIMD_TUPLE(v128_ziphi_16, 0U, 0U),
+    SIMD_TUPLE(v128_ziplo_32, 0U, 0U), SIMD_TUPLE(v128_ziphi_32, 0U, 0U),
+    SIMD_TUPLE(v128_ziplo_64, 0U, 0U), SIMD_TUPLE(v128_ziphi_64, 0U, 0U),
+    SIMD_TUPLE(v128_unziphi_8, 0U, 0U), SIMD_TUPLE(v128_unziplo_8, 0U, 0U),
+    SIMD_TUPLE(v128_unziphi_16, 0U, 0U), SIMD_TUPLE(v128_unziplo_16, 0U, 0U),
+    SIMD_TUPLE(v128_unziphi_32, 0U, 0U), SIMD_TUPLE(v128_unziplo_32, 0U, 0U),
+    SIMD_TUPLE(v128_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v128_pack_s16_u8, 0U, 0U),
+    SIMD_TUPLE(v128_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v128_or, 0U, 0U),
+    SIMD_TUPLE(v128_xor, 0U, 0U), SIMD_TUPLE(v128_and, 0U, 0U),
+    SIMD_TUPLE(v128_andn, 0U, 0U), SIMD_TUPLE(v128_mullo_s16, 0U, 0U),
+    SIMD_TUPLE(v128_mulhi_s16, 0U, 0U), SIMD_TUPLE(v128_mullo_s32, 0U, 0U),
+    SIMD_TUPLE(v128_madd_s16, 0U, 0U), SIMD_TUPLE(v128_madd_us8, 0U, 0U),
+    SIMD_TUPLE(v128_avg_u8, 0U, 0U), SIMD_TUPLE(v128_rdavg_u8, 0U, 0U),
+    SIMD_TUPLE(v128_avg_u16, 0U, 0U), SIMD_TUPLE(v128_min_u8, 0U, 0U),
+    SIMD_TUPLE(v128_max_u8, 0U, 0U), SIMD_TUPLE(v128_min_s8, 0U, 0U),
+    SIMD_TUPLE(v128_max_s8, 0U, 0U), SIMD_TUPLE(v128_min_s16, 0U, 0U),
+    SIMD_TUPLE(v128_max_s16, 0U, 0U), SIMD_TUPLE(v128_cmpgt_s8, 0U, 0U),
+    SIMD_TUPLE(v128_cmplt_s8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_8, 0U, 0U),
+    SIMD_TUPLE(v128_cmpgt_s16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128_Part2),
+            SIMD_TUPLE(v128_pack_s32_u16, 0U, 0U),
+            SIMD_TUPLE(v128_rdavg_u16, 0U, 0U), SIMD_TUPLE(v128_add_64, 0U, 0U),
+            SIMD_TUPLE(v128_sub_64, 0U, 0U), SIMD_TUPLE(v128_sadd_s8, 0U, 0U),
+            SIMD_TUPLE(v128_sadd_u8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_16, 0U, 0U),
+            SIMD_TUPLE(v128_cmplt_s16, 0U, 0U),
+            SIMD_TUPLE(v128_cmplt_s32, 0U, 0U),
+            SIMD_TUPLE(v128_cmpeq_32, 0U, 0U),
+            SIMD_TUPLE(v128_cmpgt_s32, 0U, 0U),
+            SIMD_TUPLE(v128_shuffle_8, 15U, 8U),
+            SIMD_TUPLE(v128_min_s32, 0U, 0U), SIMD_TUPLE(v128_max_s32, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<15>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128V128),
+            SIMD_TUPLE(v128_blend_8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128), SIMD_TUPLE(v128_abs_s8, 0U, 0U),
+            SIMD_TUPLE(v128_abs_s16, 0U, 0U), SIMD_TUPLE(v128_padd_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpacklo_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpacklo_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpacklo_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v128_unpacklo_s16_s32, 0U, 0U),
+            SIMD_TUPLE(v128_unpackhi_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpackhi_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpackhi_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v128_unpackhi_s16_s32, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<15>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<15>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u8<1>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part2),
+            SIMD_TUPLE(imm_v128_shr_n_u8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<4>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part3),
+            SIMD_TUPLE(imm_v128_shr_n_u32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<28>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part4),
+            SIMD_TUPLE(imm_v128_shl_n_64<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<32>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<36>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<40>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<44>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<48>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<52>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<56>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<60>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<32>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<36>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<40>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<44>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<48>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<52>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<56>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<60>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<32>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<36>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<40>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<44>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<48>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<52>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<56>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<60>, 0U, 0U),
+            SIMD_TUPLE(v128_padd_u8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64V64), SIMD_TUPLE(v128_from_v64, 0U, 0U),
+            SIMD_TUPLE(v128_zip_8, 0U, 0U), SIMD_TUPLE(v128_zip_16, 0U, 0U),
+            SIMD_TUPLE(v128_zip_32, 0U, 0U), SIMD_TUPLE(v128_mul_s16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64U64), SIMD_TUPLE(v128_from_64, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64),
+            SIMD_TUPLE(v128_unpack_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpack_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpack_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v128_unpack_s16_s32, 0U, 0U));
+
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V128_V128U32), SIMD_TUPLE(v128_shl_8, 7U, 32U),
+    SIMD_TUPLE(v128_shr_u8, 7U, 32U), SIMD_TUPLE(v128_shr_s8, 7U, 32U),
+    SIMD_TUPLE(v128_shl_16, 15U, 32U), SIMD_TUPLE(v128_shr_u16, 15U, 32U),
+    SIMD_TUPLE(v128_shr_s16, 15U, 32U), SIMD_TUPLE(v128_shl_32, 31U, 32U),
+    SIMD_TUPLE(v128_shr_u32, 31U, 32U), SIMD_TUPLE(v128_shr_s32, 31U, 32U),
+    SIMD_TUPLE(v128_shl_64, 63U, 32U), SIMD_TUPLE(v128_shr_u64, 63U, 32U),
+    SIMD_TUPLE(v128_shr_s64, 63U, 32U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128), SIMD_TUPLE(v128_low_u32, 0U, 0U),
+            SIMD_TUPLE(v128_movemask_8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128), SIMD_TUPLE(v128_hadd_u8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V128), SIMD_TUPLE(v128_low_v64, 0U, 0U),
+            SIMD_TUPLE(v128_high_v64, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U8), SIMD_TUPLE(v128_dup_8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U16), SIMD_TUPLE(v128_dup_16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U32), SIMD_TUPLE(v128_dup_32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64), SIMD_TUPLE(v128_dup_64, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V128V128), SIMD_TUPLE(v128_dotp_s16, 0U, 0U),
+            SIMD_TUPLE(v128_dotp_s32, 0U, 0U),
+            SIMD_TUPLE(v128_dotp_su8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256V256), SIMD_TUPLE(v256_sad_u8, 0U, 0U),
+            SIMD_TUPLE(v256_ssd_u8, 0U, 0U), SIMD_TUPLE(v256_sad_u16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256), SIMD_TUPLE(v256_hadd_u8, 0U, 0U),
+            SIMD_TUPLE(v256_low_u64, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V256V256), SIMD_TUPLE(v256_dotp_s16, 0U, 0U),
+            SIMD_TUPLE(v256_dotp_s32, 0U, 0U),
+            SIMD_TUPLE(v256_dotp_su8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256V256), SIMD_TUPLE(v256_ssd_s16, 0U, 0U));
+
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V256_V256V256), SIMD_TUPLE(v256_add_8, 0U, 0U),
+    SIMD_TUPLE(v256_add_16, 0U, 0U), SIMD_TUPLE(v256_sadd_s16, 0U, 0U),
+    SIMD_TUPLE(v256_add_32, 0U, 0U), SIMD_TUPLE(v256_sub_8, 0U, 0U),
+    SIMD_TUPLE(v256_ssub_u8, 0U, 0U), SIMD_TUPLE(v256_ssub_s8, 0U, 0U),
+    SIMD_TUPLE(v256_sub_16, 0U, 0U), SIMD_TUPLE(v256_ssub_s16, 0U, 0U),
+    SIMD_TUPLE(v256_ssub_u16, 0U, 0U), SIMD_TUPLE(v256_sub_32, 0U, 0U),
+    SIMD_TUPLE(v256_ziplo_8, 0U, 0U), SIMD_TUPLE(v256_ziphi_8, 0U, 0U),
+    SIMD_TUPLE(v256_ziplo_16, 0U, 0U), SIMD_TUPLE(v256_ziphi_16, 0U, 0U),
+    SIMD_TUPLE(v256_ziplo_32, 0U, 0U), SIMD_TUPLE(v256_ziphi_32, 0U, 0U),
+    SIMD_TUPLE(v256_ziplo_64, 0U, 0U), SIMD_TUPLE(v256_ziphi_64, 0U, 0U),
+    SIMD_TUPLE(v256_ziplo_128, 0U, 0U), SIMD_TUPLE(v256_ziphi_128, 0U, 0U),
+    SIMD_TUPLE(v256_unziphi_8, 0U, 0U), SIMD_TUPLE(v256_unziplo_8, 0U, 0U),
+    SIMD_TUPLE(v256_unziphi_16, 0U, 0U), SIMD_TUPLE(v256_unziplo_16, 0U, 0U),
+    SIMD_TUPLE(v256_unziphi_32, 0U, 0U), SIMD_TUPLE(v256_unziplo_32, 0U, 0U),
+    SIMD_TUPLE(v256_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v256_pack_s16_u8, 0U, 0U),
+    SIMD_TUPLE(v256_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v256_or, 0U, 0U),
+    SIMD_TUPLE(v256_xor, 0U, 0U), SIMD_TUPLE(v256_and, 0U, 0U),
+    SIMD_TUPLE(v256_andn, 0U, 0U), SIMD_TUPLE(v256_mullo_s16, 0U, 0U),
+    SIMD_TUPLE(v256_mulhi_s16, 0U, 0U), SIMD_TUPLE(v256_mullo_s32, 0U, 0U),
+    SIMD_TUPLE(v256_madd_s16, 0U, 0U), SIMD_TUPLE(v256_madd_us8, 0U, 0U),
+    SIMD_TUPLE(v256_avg_u8, 0U, 0U), SIMD_TUPLE(v256_rdavg_u8, 0U, 0U),
+    SIMD_TUPLE(v256_avg_u16, 0U, 0U), SIMD_TUPLE(v256_min_u8, 0U, 0U),
+    SIMD_TUPLE(v256_max_u8, 0U, 0U), SIMD_TUPLE(v256_min_s8, 0U, 0U),
+    SIMD_TUPLE(v256_max_s8, 0U, 0U), SIMD_TUPLE(v256_min_s16, 0U, 0U),
+    SIMD_TUPLE(v256_max_s16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s8, 0U, 0U),
+    SIMD_TUPLE(v256_cmplt_s8, 0U, 0U));
+
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V256_V256V256_Part2), SIMD_TUPLE(v256_cmpeq_8, 0U, 0U),
+    SIMD_TUPLE(v256_min_s32, 0U, 0U), SIMD_TUPLE(v256_max_s32, 0U, 0U),
+    SIMD_TUPLE(v256_add_64, 0U, 0U), SIMD_TUPLE(v256_sub_64, 0U, 0U),
+    SIMD_TUPLE(v256_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v256_cmplt_s16, 0U, 0U),
+    SIMD_TUPLE(v256_cmpeq_16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s32, 0U, 0U),
+    SIMD_TUPLE(v256_cmplt_s32, 0U, 0U), SIMD_TUPLE(v256_cmpeq_32, 0U, 0U),
+    SIMD_TUPLE(v256_shuffle_8, 31U, 8U), SIMD_TUPLE(v256_pshuffle_8, 15U, 8U),
+    SIMD_TUPLE(imm_v256_align<1>, 0U, 0U), SIMD_TUPLE(v256_sadd_s8, 0U, 0U),
+    SIMD_TUPLE(v256_sadd_u8, 0U, 0U), SIMD_TUPLE(v256_pack_s32_u16, 0U, 0U),
+    SIMD_TUPLE(v256_rdavg_u16, 0U, 0U), SIMD_TUPLE(imm_v256_align<2>, 0U, 0U),
+    SIMD_TUPLE(v256_unziphi_64, 0U, 0U), SIMD_TUPLE(v256_unziplo_64, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<3>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<4>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<5>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<6>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<7>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<8>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<9>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<10>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<11>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<12>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<13>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<14>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<15>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<16>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<17>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<18>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<19>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<20>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<21>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<22>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<23>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<24>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<25>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<26>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<27>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<28>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<29>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<30>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<31>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128V128),
+            SIMD_TUPLE(v256_from_v128, 0U, 0U), SIMD_TUPLE(v256_zip_8, 0U, 0U),
+            SIMD_TUPLE(v256_zip_16, 0U, 0U), SIMD_TUPLE(v256_zip_32, 0U, 0U),
+            SIMD_TUPLE(v256_mul_s16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128),
+            SIMD_TUPLE(v256_unpack_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpack_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpack_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v256_unpack_s16_s32, 0U, 0U));
+
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V256_V256U32), SIMD_TUPLE(v256_shl_8, 7U, 32U),
+    SIMD_TUPLE(v256_shr_u8, 7U, 32U), SIMD_TUPLE(v256_shr_s8, 7U, 32U),
+    SIMD_TUPLE(v256_shl_16, 15U, 32U), SIMD_TUPLE(v256_shr_u16, 15U, 32U),
+    SIMD_TUPLE(v256_shr_s16, 15U, 32U), SIMD_TUPLE(v256_shl_32, 31U, 32U),
+    SIMD_TUPLE(v256_shr_u32, 31U, 32U), SIMD_TUPLE(v256_shr_s32, 31U, 32U),
+    SIMD_TUPLE(v256_shl_64, 63U, 32U), SIMD_TUPLE(v256_shr_u64, 63U, 32U),
+    SIMD_TUPLE(v256_shr_s64, 63U, 32U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256), SIMD_TUPLE(v256_abs_s8, 0U, 0U),
+            SIMD_TUPLE(v256_abs_s16, 0U, 0U), SIMD_TUPLE(v256_padd_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpacklo_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpacklo_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpacklo_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v256_unpacklo_s16_s32, 0U, 0U),
+            SIMD_TUPLE(v256_unpackhi_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpackhi_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpackhi_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v256_unpackhi_s16_s32, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<15>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<17>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<18>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<19>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<21>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<22>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<23>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<25>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<26>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<27>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<29>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<30>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<31>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<8>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part2),
+            SIMD_TUPLE(imm_v256_shl_n_byte<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<15>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<17>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<18>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<19>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<21>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<22>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<23>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<25>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<26>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<27>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<29>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<30>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<31>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<10>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part3),
+            SIMD_TUPLE(imm_v256_shl_n_16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<28>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part4),
+            SIMD_TUPLE(imm_v256_shl_n_64<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<32>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<36>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<40>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<44>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<48>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<52>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<56>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<60>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<32>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<36>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<40>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<44>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<48>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<52>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<56>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<60>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<32>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<36>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<40>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<44>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<48>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<52>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<56>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<60>, 0U, 0U),
+            SIMD_TUPLE(v256_padd_u8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part5),
+            SIMD_TUPLE(imm_v256_shr_n_word<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<15>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<15>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256V256V256),
+            SIMD_TUPLE(v256_blend_8, 0U, 0U),
+            SIMD_TUPLE(v256_wideshuffle_8, 63U, 8U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U8), SIMD_TUPLE(v256_dup_8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U16), SIMD_TUPLE(v256_dup_16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U32), SIMD_TUPLE(v256_dup_32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U64), SIMD_TUPLE(v256_dup_64, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256), SIMD_TUPLE(v256_low_u32, 0U, 0U),
+            SIMD_TUPLE(v256_movemask_8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V256), SIMD_TUPLE(v256_low_v64, 0U, 0U));
+
+}  // namespace SIMD_NAMESPACE
diff --git a/third_party/aom/test/simd_sse2_test.cc b/third_party/aom/test/simd_sse2_test.cc
new file mode 100644
index 0000000000..b37a931b38
--- /dev/null
+++ b/third_party/aom/test/simd_sse2_test.cc
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
+    (!defined(__GNUC__) && !defined(_DEBUG))
+#define ARCH SSE2
+#define ARCH_POSTFIX(name) name##_sse2
+#define SIMD_NAMESPACE simd_test_sse2
+#include "test/simd_impl.h"
+#endif
diff --git a/third_party/aom/test/simd_sse4_test.cc b/third_party/aom/test/simd_sse4_test.cc
new file mode 100644
index 0000000000..b1c9d5cd88
--- /dev/null
+++ b/third_party/aom/test/simd_sse4_test.cc
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
+    (!defined(__GNUC__) && !defined(_DEBUG))
+#define ARCH SSE4_1
+#define ARCH_POSTFIX(name) name##_sse4_1
+#define SIMD_NAMESPACE simd_test_sse4_1
+#include "test/simd_impl.h"
+#endif
diff --git a/third_party/aom/test/simd_ssse3_test.cc b/third_party/aom/test/simd_ssse3_test.cc
new file mode 100644
index 0000000000..d95c26fb5e
--- /dev/null
+++ b/third_party/aom/test/simd_ssse3_test.cc
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
+    (!defined(__GNUC__) && !defined(_DEBUG))
+#define ARCH SSSE3
+#define ARCH_POSTFIX(name) name##_ssse3
+#define SIMD_NAMESPACE simd_test_ssse3
+#include "test/simd_impl.h"
+#endif
diff --git a/third_party/aom/test/simple_decoder.sh b/third_party/aom/test/simple_decoder.sh
new file mode 100755
index 0000000000..9b1aea1ed5
--- /dev/null
+++ b/third_party/aom/test/simple_decoder.sh
@@ -0,0 +1,58 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom simple_decoder example code. To add new tests to
+## this file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to simple_decoder_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+simple_decoder_verify_environment() {
+  if [ ! "$(av1_encode_available)" = "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then
+    return 1
+  fi
+}
+
+# Runs simple_decoder using $1 as input file. $2 is the codec name, and is used
+# solely to name the output file.
+simple_decoder() {
+  local decoder="$(aom_tool_path simple_decoder)"
+  local input_file="$1"
+  local codec="$2"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/simple_decoder_${codec}.raw"
+
+  if [ ! -x "${decoder}" ]; then
+    elog "${decoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \
+      ${devnull} || return 1
+
+  [ -e "${output_file}" ] || return 1
+}
+
+simple_decoder_av1() {
+  if [ "$(av1_decode_available)" = "yes" ]; then
+    if [ ! -e "${AV1_IVF_FILE}" ]; then
+      local file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf"
+      encode_yuv_raw_input_av1 "${file}" --ivf
+      simple_decoder "${file}" av1 || return 1
+    else
+      simple_decoder "${AV1_IVF_FILE}" av1 || return 1
+    fi
+  fi
+}
+
+simple_decoder_tests="simple_decoder_av1"
+
+run_tests simple_decoder_verify_environment "${simple_decoder_tests}"
diff --git a/third_party/aom/test/simple_encoder.sh b/third_party/aom/test/simple_encoder.sh
new file mode 100755
index 0000000000..dfb1a1b546
--- /dev/null
+++ b/third_party/aom/test/simple_encoder.sh
@@ -0,0 +1,53 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom simple_encoder example. To add new tests to this
+## file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to simple_encoder_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+simple_encoder_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs simple_encoder using the codec specified by $1 with a frame limit of 100.
+simple_encoder() {
+  local encoder="${LIBAOM_BIN_PATH}/simple_encoder${AOM_TEST_EXE_SUFFIX}"
+  local codec="$1"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/simple_encoder_${codec}.ivf"
+
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
+      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 0 5 \
+      ${devnull} || return 1
+
+  [ -e "${output_file}" ] || return 1
+}
+
+
+simple_encoder_av1() {
+  if [ "$(av1_encode_available)" = "yes" ]; then
+    simple_encoder av1 || return 1
+  fi
+}
+
+simple_encoder_tests="simple_encoder_av1"
+
+run_tests simple_encoder_verify_environment "${simple_encoder_tests}"
diff --git a/third_party/aom/test/sse_sum_test.cc b/third_party/aom/test/sse_sum_test.cc
new file mode 100644
index 0000000000..fd6fb886d3
--- /dev/null
+++ b/third_party/aom/test/sse_sum_test.cc
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2020, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cmath>
+#include <cstdlib>
+#include <string>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "test/function_equivalence_test.h"
+
+using libaom_test::ACMRandom;
+using libaom_test::FunctionEquivalenceTest;
+using ::testing::Combine;
+using ::testing::Range;
+using ::testing::Values;
+using ::testing::ValuesIn;
+
+namespace {
+const int kNumIterations = 10000;
+
+typedef uint64_t (*SSI16Func)(const int16_t *src, int src_stride, int width,
+                              int height, int *sum);
+typedef libaom_test::FuncParam<SSI16Func> TestFuncs;
+
+class SumSSETest : public ::testing::TestWithParam<TestFuncs> {
+ public:
+  ~SumSSETest() override = default;
+  void SetUp() override {
+    params_ = this->GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<int16_t *>(aom_memalign(16, 256 * 256 * 2));
+    ASSERT_NE(src_, nullptr);
+  }
+
+  void TearDown() override { aom_free(src_); }
+  void RunTest(int isRandom);
+  void RunSpeedTest();
+
+  void GenRandomData(int width, int height, int stride) {
+    const int msb = 11;  // Up to 12 bit input
+    const int limit = 1 << (msb + 1);
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        src_[ii * stride + jj] = rnd_(2) ? rnd_(limit) : -rnd_(limit);
+      }
+    }
+  }
+
+  void GenExtremeData(int width, int height, int stride) {
+    const int msb = 11;  // Up to 12 bit input
+    const int limit = 1 << (msb + 1);
+    const int val = rnd_(2) ? limit - 1 : -(limit - 1);
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        src_[ii * stride + jj] = val;
+      }
+    }
+  }
+
+ protected:
+  TestFuncs params_;
+  int16_t *src_;
+  ACMRandom rnd_;
+};
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSSETest);
+
+void SumSSETest::RunTest(int isRandom) {
+  for (int k = 0; k < kNumIterations; k++) {
+    const int width = 4 * (rnd_(31) + 1);   // Up to 128x128
+    const int height = 4 * (rnd_(31) + 1);  // Up to 128x128
+    int stride = 4 << rnd_(7);              // Up to 256 stride
+    while (stride < width) {                // Make sure it's valid
+      stride = 4 << rnd_(7);
+    }
+    if (isRandom) {
+      GenRandomData(width, height, stride);
+    } else {
+      GenExtremeData(width, height, stride);
+    }
+    int sum_ref = 0, sum_tst = 0;
+    const uint64_t sse_ref =
+        params_.ref_func(src_, stride, width, height, &sum_ref);
+    const uint64_t sse_tst =
+        params_.tst_func(src_, stride, width, height, &sum_tst);
+
+    EXPECT_EQ(sse_ref, sse_tst)
+        << "Error: SumSSETest [" << width << "x" << height
+        << "] C SSE does not match optimized output.";
+    EXPECT_EQ(sum_ref, sum_tst)
+        << "Error: SumSSETest [" << width << "x" << height
+        << "] C Sum does not match optimized output.";
+  }
+}
+
+void SumSSETest::RunSpeedTest() {
+  for (int block = BLOCK_4X4; block < BLOCK_SIZES_ALL; block++) {
+    const int width = block_size_wide[block];   // Up to 128x128
+    const int height = block_size_high[block];  // Up to 128x128
+    int stride = 4 << rnd_(7);                  // Up to 256 stride
+    while (stride < width) {                    // Make sure it's valid
+      stride = 4 << rnd_(7);
+    }
+    GenExtremeData(width, height, stride);
+    const int num_loops = 1000000000 / (width + height);
+    int sum_ref = 0, sum_tst = 0;
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+
+    for (int i = 0; i < num_loops; ++i)
+      params_.ref_func(src_, stride, width, height, &sum_ref);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    printf("SumSquaresTest C %3dx%-3d: %7.2f ns\n", width, height,
+           1000.0 * elapsed_time / num_loops);
+
+    aom_usec_timer timer1;
+    aom_usec_timer_start(&timer1);
+    for (int i = 0; i < num_loops; ++i)
+      params_.tst_func(src_, stride, width, height, &sum_tst);
+    aom_usec_timer_mark(&timer1);
+    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+    printf("SumSquaresTest Test %3dx%-3d: %7.2f ns\n", width, height,
+           1000.0 * elapsed_time1 / num_loops);
+  }
+}
+
+TEST_P(SumSSETest, OperationCheck) {
+  RunTest(1);  // GenRandomData
+}
+
+TEST_P(SumSSETest, ExtremeValues) {
+  RunTest(0);  // GenExtremeData
+}
+
+TEST_P(SumSSETest, DISABLED_Speed) { RunSpeedTest(); }
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, SumSSETest,
+                         ::testing::Values(TestFuncs(
+                             &aom_sum_sse_2d_i16_c, &aom_sum_sse_2d_i16_sse2)));
+
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, SumSSETest,
+                         ::testing::Values(TestFuncs(
+                             &aom_sum_sse_2d_i16_c, &aom_sum_sse_2d_i16_neon)));
+#endif  // HAVE_NEON
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, SumSSETest,
+                         ::testing::Values(TestFuncs(
+                             &aom_sum_sse_2d_i16_c, &aom_sum_sse_2d_i16_avx2)));
+#endif  // HAVE_AVX2
+
+#if HAVE_SVE
+INSTANTIATE_TEST_SUITE_P(SVE, SumSSETest,
+                         ::testing::Values(TestFuncs(&aom_sum_sse_2d_i16_c,
+                                                     &aom_sum_sse_2d_i16_sve)));
+#endif  // HAVE_SVE
+
+}  // namespace
diff --git a/third_party/aom/test/still_picture_test.cc b/third_party/aom/test/still_picture_test.cc
new file mode 100644
index 0000000000..3dfb1c8693
--- /dev/null
+++ b/third_party/aom/test/still_picture_test.cc
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2020, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+// This class is used to test the presence of still picture feature.
+class StillPicturePresenceTest
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  StillPicturePresenceTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        enable_full_header_(GET_PARAM(2)) {
+    still_picture_coding_violated_ = false;
+  }
+  ~StillPicturePresenceTest() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = AOM_Q;
+    cfg_.g_threads = 1;
+    cfg_.full_still_picture_hdr = enable_full_header_;
+    cfg_.g_limit = 1;
+  }
+
+  bool DoDecode() const override { return true; }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 5);
+      encoder->Control(AV1E_SET_FORCE_VIDEO_MODE, 0);
+    }
+  }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    if (AOM_CODEC_OK == res_dec) {
+      aom_codec_ctx_t *ctx_dec = decoder->GetDecoder();
+      AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_STILL_PICTURE,
+                                    &still_pic_info_);
+      if (still_pic_info_.is_still_picture != 1) {
+        still_picture_coding_violated_ = true;
+      }
+      if (still_pic_info_.is_reduced_still_picture_hdr == enable_full_header_) {
+        /* If full_still_picture_header is enabled in encoder config but
+         * bitstream contains reduced_still_picture_header set, then set
+         * still_picture_coding_violated_ to true.
+         * Similarly, if full_still_picture_header is disabled in encoder config
+         * but bitstream contains reduced_still_picture_header not set, then set
+         * still_picture_coding_violated_ to true.
+         */
+        still_picture_coding_violated_ = true;
+      }
+    }
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  ::libaom_test::TestMode encoding_mode_;
+  bool still_picture_coding_violated_;
+  int enable_full_header_;
+  aom_still_picture_info still_pic_info_;
+  aom_rc_mode end_usage_check_;
+};
+
+TEST_P(StillPicturePresenceTest, StillPictureEncodePresenceTest) {
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 1);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(still_picture_coding_violated_, false);
+}
+
+AV1_INSTANTIATE_TEST_SUITE(StillPicturePresenceTest,
+                           ::testing::Values(::libaom_test::kOnePassGood,
+                                             ::libaom_test::kTwoPassGood),
+                           ::testing::Values(1, 0));
+}  // namespace
diff --git a/third_party/aom/test/subtract_test.cc b/third_party/aom/test/subtract_test.cc
new file mode 100644
index 0000000000..e591e6543d
--- /dev/null
+++ b/third_party/aom/test/subtract_test.cc
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdint>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/blockd.h"
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/mem.h"
+
+typedef void (*SubtractFunc)(int rows, int cols, int16_t *diff_ptr,
+                             ptrdiff_t diff_stride, const uint8_t *src_ptr,
+                             ptrdiff_t src_stride, const uint8_t *pred_ptr,
+                             ptrdiff_t pred_stride);
+
+namespace {
+
+using std::get;
+using std::make_tuple;
+using std::tuple;
+
+using libaom_test::ACMRandom;
+
+// <BLOCK_SIZE, optimized subtract func, reference subtract func>
+using Params = tuple<BLOCK_SIZE, SubtractFunc, SubtractFunc>;
+
+class AV1SubtractBlockTestBase : public ::testing::Test {
+ public:
+  AV1SubtractBlockTestBase(BLOCK_SIZE bs, int bit_depth, SubtractFunc func,
+                           SubtractFunc ref_func) {
+    block_width_ = block_size_wide[bs];
+    block_height_ = block_size_high[bs];
+    func_ = func;
+    ref_func_ = ref_func;
+    if (bit_depth == -1) {
+      hbd_ = false;
+      bit_depth_ = AOM_BITS_8;
+    } else {
+      hbd_ = true;
+      bit_depth_ = static_cast<aom_bit_depth_t>(bit_depth);
+    }
+  }
+
+  void SetUp() override {
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+
+    const size_t max_width = 128;
+    const size_t max_block_size = max_width * max_width;
+    if (hbd_) {
+      src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
+          aom_memalign(16, max_block_size * sizeof(uint16_t))));
+      ASSERT_NE(src_, nullptr);
+      pred_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
+          aom_memalign(16, max_block_size * sizeof(uint16_t))));
+      ASSERT_NE(pred_, nullptr);
+    } else {
+      src_ = reinterpret_cast<uint8_t *>(
+          aom_memalign(16, max_block_size * sizeof(uint8_t)));
+      ASSERT_NE(src_, nullptr);
+      pred_ = reinterpret_cast<uint8_t *>(
+          aom_memalign(16, max_block_size * sizeof(uint8_t)));
+      ASSERT_NE(pred_, nullptr);
+    }
+    diff_ = reinterpret_cast<int16_t *>(
+        aom_memalign(32, max_block_size * sizeof(int16_t)));
+    ASSERT_NE(diff_, nullptr);
+  }
+
+  void TearDown() override {
+    if (hbd_) {
+      aom_free(CONVERT_TO_SHORTPTR(src_));
+      aom_free(CONVERT_TO_SHORTPTR(pred_));
+    } else {
+      aom_free(src_);
+      aom_free(pred_);
+    }
+    aom_free(diff_);
+  }
+
+ protected:
+  void CheckResult();
+  void RunForSpeed();
+
+ private:
+  void FillInputs();
+
+  ACMRandom rnd_;
+  int block_height_;
+  int block_width_;
+  bool hbd_;
+  aom_bit_depth_t bit_depth_;
+  SubtractFunc func_;
+  SubtractFunc ref_func_;
+  uint8_t *src_;
+  uint8_t *pred_;
+  int16_t *diff_;
+};
+
+void AV1SubtractBlockTestBase::FillInputs() {
+  const size_t max_width = 128;
+  const int max_block_size = max_width * max_width;
+  if (hbd_) {
+    const int mask = (1 << bit_depth_) - 1;
+    for (int i = 0; i < max_block_size; ++i) {
+      CONVERT_TO_SHORTPTR(src_)[i] = rnd_.Rand16() & mask;
+      CONVERT_TO_SHORTPTR(pred_)[i] = rnd_.Rand16() & mask;
+    }
+  } else {
+    if (src_ == nullptr) {
+      std::cerr << "gadfg" << std::endl;
+    }
+    for (int i = 0; i < max_block_size; ++i) {
+      src_[i] = rnd_.Rand8();
+      pred_[i] = rnd_.Rand8();
+    }
+  }
+}
+
+void AV1SubtractBlockTestBase::CheckResult() {
+  const int test_num = 100;
+  int i;
+
+  for (i = 0; i < test_num; ++i) {
+    FillInputs();
+
+    func_(block_height_, block_width_, diff_, block_width_, src_, block_width_,
+          pred_, block_width_);
+
+    if (hbd_)
+      for (int r = 0; r < block_height_; ++r) {
+        for (int c = 0; c < block_width_; ++c) {
+          EXPECT_EQ(diff_[r * block_width_ + c],
+                    (CONVERT_TO_SHORTPTR(src_)[r * block_width_ + c] -
+                     CONVERT_TO_SHORTPTR(pred_)[r * block_width_ + c]))
+              << "r = " << r << ", c = " << c << ", test: " << i;
+        }
+      }
+    else {
+      for (int r = 0; r < block_height_; ++r) {
+        for (int c = 0; c < block_width_; ++c) {
+          EXPECT_EQ(diff_[r * block_width_ + c],
+                    src_[r * block_width_ + c] - pred_[r * block_width_ + c])
+              << "r = " << r << ", c = " << c << ", test: " << i;
+        }
+      }
+    }
+  }
+}
+
+void AV1SubtractBlockTestBase::RunForSpeed() {
+  const int test_num = 200000;
+  int i;
+
+  if (ref_func_ == func_) GTEST_SKIP();
+
+  FillInputs();
+
+  aom_usec_timer ref_timer;
+  aom_usec_timer_start(&ref_timer);
+  for (i = 0; i < test_num; ++i) {
+    ref_func_(block_height_, block_width_, diff_, block_width_, src_,
+              block_width_, pred_, block_width_);
+  }
+  aom_usec_timer_mark(&ref_timer);
+  const int64_t ref_elapsed_time = aom_usec_timer_elapsed(&ref_timer);
+
+  FillInputs();
+
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+  for (i = 0; i < test_num; ++i) {
+    func_(block_height_, block_width_, diff_, block_width_, src_, block_width_,
+          pred_, block_width_);
+  }
+  aom_usec_timer_mark(&timer);
+  const int64_t elapsed_time = aom_usec_timer_elapsed(&timer);
+
+  printf(
+      "[%dx%d]: "
+      "ref_time=%6" PRId64 " \t simd_time=%6" PRId64
+      " \t "
+      "gain=%f \n",
+      block_width_, block_height_, ref_elapsed_time, elapsed_time,
+      static_cast<double>(ref_elapsed_time) /
+          static_cast<double>(elapsed_time));
+}
+
+class AV1SubtractBlockTest : public ::testing::WithParamInterface<Params>,
+                             public AV1SubtractBlockTestBase {
+ public:
+  AV1SubtractBlockTest()
+      : AV1SubtractBlockTestBase(GET_PARAM(0), -1, GET_PARAM(1), GET_PARAM(2)) {
+  }
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1SubtractBlockTest);
+
+TEST_P(AV1SubtractBlockTest, CheckResult) { CheckResult(); }
+TEST_P(AV1SubtractBlockTest, DISABLED_Speed) { RunForSpeed(); }
+
+const BLOCK_SIZE kValidBlockSize[] = { BLOCK_4X4,    BLOCK_4X8,    BLOCK_8X4,
+                                       BLOCK_8X8,    BLOCK_8X16,   BLOCK_16X8,
+                                       BLOCK_16X16,  BLOCK_16X32,  BLOCK_32X16,
+                                       BLOCK_32X32,  BLOCK_32X64,  BLOCK_64X32,
+                                       BLOCK_64X64,  BLOCK_64X128, BLOCK_128X64,
+                                       BLOCK_128X128 };
+
+INSTANTIATE_TEST_SUITE_P(
+    C, AV1SubtractBlockTest,
+    ::testing::Combine(::testing::ValuesIn(kValidBlockSize),
+                       ::testing::Values(&aom_subtract_block_c),
+                       ::testing::Values(&aom_subtract_block_c)));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, AV1SubtractBlockTest,
+    ::testing::Combine(::testing::ValuesIn(kValidBlockSize),
+                       ::testing::Values(&aom_subtract_block_sse2),
+                       ::testing::Values(&aom_subtract_block_c)));
+#endif
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1SubtractBlockTest,
+    ::testing::Combine(::testing::ValuesIn(kValidBlockSize),
+                       ::testing::Values(&aom_subtract_block_avx2),
+                       ::testing::Values(&aom_subtract_block_c)));
+
+#endif
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1SubtractBlockTest,
+    ::testing::Combine(::testing::ValuesIn(kValidBlockSize),
+                       ::testing::Values(&aom_subtract_block_neon),
+                       ::testing::Values(&aom_subtract_block_c)));
+
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+
+// <BLOCK_SIZE, bit_depth, optimized subtract func, reference subtract func>
+using ParamsHBD = tuple<BLOCK_SIZE, int, SubtractFunc, SubtractFunc>;
+
+class AV1HBDSubtractBlockTest : public ::testing::WithParamInterface<ParamsHBD>,
+                                public AV1SubtractBlockTestBase {
+ public:
+  AV1HBDSubtractBlockTest()
+      : AV1SubtractBlockTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2),
+                                 GET_PARAM(3)) {}
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HBDSubtractBlockTest);
+
+INSTANTIATE_TEST_SUITE_P(
+    C, AV1HBDSubtractBlockTest,
+    ::testing::Combine(::testing::ValuesIn(kValidBlockSize),
+                       ::testing::Values(12),
+                       ::testing::Values(&aom_highbd_subtract_block_c),
+                       ::testing::Values(&aom_highbd_subtract_block_c)));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, AV1HBDSubtractBlockTest,
+    ::testing::Combine(::testing::ValuesIn(kValidBlockSize),
+                       ::testing::Values(12),
+                       ::testing::Values(&aom_highbd_subtract_block_sse2),
+                       ::testing::Values(&aom_highbd_subtract_block_c)));
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1HBDSubtractBlockTest,
+    ::testing::Combine(::testing::ValuesIn(kValidBlockSize),
+                       ::testing::Values(12),
+                       ::testing::Values(&aom_highbd_subtract_block_neon),
+                       ::testing::Values(&aom_highbd_subtract_block_c)));
+#endif
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/sum_squares_test.cc b/third_party/aom/test/sum_squares_test.cc
new file mode 100644
index 0000000000..7b98ced523
--- /dev/null
+++ b/third_party/aom/test/sum_squares_test.cc
@@ -0,0 +1,928 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cmath>
+#include <cstdlib>
+#include <string>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_ports/mem.h"
+#include "av1/common/common_data.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "test/function_equivalence_test.h"
+
+using libaom_test::ACMRandom;
+using libaom_test::FunctionEquivalenceTest;
+using ::testing::Combine;
+using ::testing::Range;
+using ::testing::Values;
+using ::testing::ValuesIn;
+
+namespace {
+const int kNumIterations = 10000;
+
+static const int16_t kInt13Max = (1 << 12) - 1;
+
+typedef uint64_t (*SSI16Func)(const int16_t *src, int stride, int width,
+                              int height);
+typedef libaom_test::FuncParam<SSI16Func> TestFuncs;
+
+class SumSquaresTest : public ::testing::TestWithParam<TestFuncs> {
+ public:
+  ~SumSquaresTest() override = default;
+  void SetUp() override {
+    params_ = this->GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<int16_t *>(aom_memalign(16, 256 * 256 * 2));
+    ASSERT_NE(src_, nullptr);
+  }
+
+  void TearDown() override { aom_free(src_); }
+  void RunTest(bool is_random);
+  void RunSpeedTest();
+
+  void GenRandomData(int width, int height, int stride) {
+    const int msb = 11;  // Up to 12 bit input
+    const int limit = 1 << (msb + 1);
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        src_[ii * stride + jj] = rnd_(2) ? rnd_(limit) : -rnd_(limit);
+      }
+    }
+  }
+
+  void GenExtremeData(int width, int height, int stride) {
+    const int msb = 11;  // Up to 12 bit input
+    const int limit = 1 << (msb + 1);
+    const int val = rnd_(2) ? limit - 1 : -(limit - 1);
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        src_[ii * stride + jj] = val;
+      }
+    }
+  }
+
+ protected:
+  TestFuncs params_;
+  int16_t *src_;
+  ACMRandom rnd_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSquaresTest);
+
+void SumSquaresTest::RunTest(bool is_random) {
+  int failed = 0;
+  for (int k = 0; k < kNumIterations; k++) {
+    const int width = 4 * (rnd_(31) + 1);   // Up to 128x128
+    const int height = 4 * (rnd_(31) + 1);  // Up to 128x128
+    int stride = 4 << rnd_(7);              // Up to 256 stride
+    while (stride < width) {                // Make sure it's valid
+      stride = 4 << rnd_(7);
+    }
+    if (is_random) {
+      GenRandomData(width, height, stride);
+    } else {
+      GenExtremeData(width, height, stride);
+    }
+    const uint64_t res_ref = params_.ref_func(src_, stride, width, height);
+    uint64_t res_tst;
+    API_REGISTER_STATE_CHECK(res_tst =
+                                 params_.tst_func(src_, stride, width, height));
+
+    if (!failed) {
+      failed = res_ref != res_tst;
+      EXPECT_EQ(res_ref, res_tst)
+          << "Error: Sum Squares Test [" << width << "x" << height
+          << "] C output does not match optimized output.";
+    }
+  }
+}
+
+void SumSquaresTest::RunSpeedTest() {
+  for (int block = BLOCK_4X4; block < BLOCK_SIZES_ALL; block++) {
+    const int width = block_size_wide[block];   // Up to 128x128
+    const int height = block_size_high[block];  // Up to 128x128
+    int stride = 4 << rnd_(7);                  // Up to 256 stride
+    while (stride < width) {                    // Make sure it's valid
+      stride = 4 << rnd_(7);
+    }
+    GenExtremeData(width, height, stride);
+    const int num_loops = 1000000000 / (width + height);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+
+    for (int i = 0; i < num_loops; ++i)
+      params_.ref_func(src_, stride, width, height);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    printf("SumSquaresTest C %3dx%-3d: %7.2f ns\n", width, height,
+           1000.0 * elapsed_time / num_loops);
+
+    aom_usec_timer timer1;
+    aom_usec_timer_start(&timer1);
+    for (int i = 0; i < num_loops; ++i)
+      params_.tst_func(src_, stride, width, height);
+    aom_usec_timer_mark(&timer1);
+    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+    printf("SumSquaresTest Test %3dx%-3d: %7.2f ns\n", width, height,
+           1000.0 * elapsed_time1 / num_loops);
+  }
+}
+
+TEST_P(SumSquaresTest, OperationCheck) {
+  RunTest(true);  // GenRandomData
+}
+
+TEST_P(SumSquaresTest, ExtremeValues) {
+  RunTest(false);  // GenExtremeData
+}
+
+TEST_P(SumSquaresTest, DISABLED_Speed) { RunSpeedTest(); }
+
+#if HAVE_SSE2
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, SumSquaresTest,
+    ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c,
+                                &aom_sum_squares_2d_i16_sse2)));
+
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON, SumSquaresTest,
+    ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c,
+                                &aom_sum_squares_2d_i16_neon)));
+
+#endif  // HAVE_NEON
+
+#if HAVE_SVE
+INSTANTIATE_TEST_SUITE_P(
+    SVE, SumSquaresTest,
+    ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c,
+                                &aom_sum_squares_2d_i16_sve)));
+
+#endif  // HAVE_SVE
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, SumSquaresTest,
+    ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c,
+                                &aom_sum_squares_2d_i16_avx2)));
+#endif  // HAVE_AVX2
+
+//////////////////////////////////////////////////////////////////////////////
+// 1D version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef uint64_t (*F1D)(const int16_t *src, uint32_t n);
+typedef libaom_test::FuncParam<F1D> TestFuncs1D;
+
+class SumSquares1DTest : public FunctionEquivalenceTest<F1D> {
+ protected:
+  static const int kIterations = 1000;
+  static const int kMaxSize = 256;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSquares1DTest);
+
+TEST_P(SumSquares1DTest, RandomValues) {
+  DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < kMaxSize * kMaxSize; ++i)
+      src[i] = rng_(kInt13Max * 2 + 1) - kInt13Max;
+
+    // Block size is between 64 and 128 * 128 and is always a multiple of 64.
+    const int n = (rng_(255) + 1) * 64;
+
+    const uint64_t ref_res = params_.ref_func(src, n);
+    uint64_t tst_res;
+    API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, n));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+TEST_P(SumSquares1DTest, ExtremeValues) {
+  DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    if (rng_(2)) {
+      for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = kInt13Max;
+    } else {
+      for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = -kInt13Max;
+    }
+
+    // Block size is between 64 and 128 * 128 and is always a multiple of 64.
+    const int n = (rng_(255) + 1) * 64;
+
+    const uint64_t ref_res = params_.ref_func(src, n);
+    uint64_t tst_res;
+    API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, n));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(SSE2, SumSquares1DTest,
+                         ::testing::Values(TestFuncs1D(
+                             aom_sum_squares_i16_c, aom_sum_squares_i16_sse2)));
+
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, SumSquares1DTest,
+                         ::testing::Values(TestFuncs1D(
+                             aom_sum_squares_i16_c, aom_sum_squares_i16_neon)));
+
+#endif  // HAVE_NEON
+
+#if HAVE_SVE
+INSTANTIATE_TEST_SUITE_P(SVE, SumSquares1DTest,
+                         ::testing::Values(TestFuncs1D(
+                             aom_sum_squares_i16_c, aom_sum_squares_i16_sve)));
+
+#endif  // HAVE_SVE
+
+typedef int64_t (*SSEFunc)(const uint8_t *a, int a_stride, const uint8_t *b,
+                           int b_stride, int width, int height);
+typedef libaom_test::FuncParam<SSEFunc> TestSSEFuncs;
+
+typedef std::tuple<TestSSEFuncs, int> SSETestParam;
+
+class SSETest : public ::testing::TestWithParam<SSETestParam> {
+ public:
+  ~SSETest() override = default;
+  void SetUp() override {
+    params_ = GET_PARAM(0);
+    width_ = GET_PARAM(1);
+    is_hbd_ =
+#if CONFIG_AV1_HIGHBITDEPTH
+        params_.ref_func == aom_highbd_sse_c;
+#else
+        false;
+#endif
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2));
+    ref_ = reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2));
+    ASSERT_NE(src_, nullptr);
+    ASSERT_NE(ref_, nullptr);
+  }
+
+  void TearDown() override {
+    aom_free(src_);
+    aom_free(ref_);
+  }
+  void RunTest(bool is_random, int width, int height, int run_times);
+
+  void GenRandomData(int width, int height, int stride) {
+    uint16_t *src16 = reinterpret_cast<uint16_t *>(src_);
+    uint16_t *ref16 = reinterpret_cast<uint16_t *>(ref_);
+    const int msb = 11;  // Up to 12 bit input
+    const int limit = 1 << (msb + 1);
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        if (!is_hbd_) {
+          src_[ii * stride + jj] = rnd_.Rand8();
+          ref_[ii * stride + jj] = rnd_.Rand8();
+        } else {
+          src16[ii * stride + jj] = rnd_(limit);
+          ref16[ii * stride + jj] = rnd_(limit);
+        }
+      }
+    }
+  }
+
+  void GenExtremeData(int width, int height, int stride, uint8_t *data,
+                      int16_t val) {
+    uint16_t *data16 = reinterpret_cast<uint16_t *>(data);
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        if (!is_hbd_) {
+          data[ii * stride + jj] = static_cast<uint8_t>(val);
+        } else {
+          data16[ii * stride + jj] = val;
+        }
+      }
+    }
+  }
+
+ protected:
+  bool is_hbd_;
+  int width_;
+  TestSSEFuncs params_;
+  uint8_t *src_;
+  uint8_t *ref_;
+  ACMRandom rnd_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SSETest);
+
+void SSETest::RunTest(bool is_random, int width, int height, int run_times) {
+  int failed = 0;
+  aom_usec_timer ref_timer, test_timer;
+  for (int k = 0; k < 3; k++) {
+    int stride = 4 << rnd_(7);  // Up to 256 stride
+    while (stride < width) {    // Make sure it's valid
+      stride = 4 << rnd_(7);
+    }
+    if (is_random) {
+      GenRandomData(width, height, stride);
+    } else {
+      const int msb = is_hbd_ ? 12 : 8;  // Up to 12 bit input
+      const int limit = (1 << msb) - 1;
+      if (k == 0) {
+        GenExtremeData(width, height, stride, src_, 0);
+        GenExtremeData(width, height, stride, ref_, limit);
+      } else {
+        GenExtremeData(width, height, stride, src_, limit);
+        GenExtremeData(width, height, stride, ref_, 0);
+      }
+    }
+    int64_t res_ref, res_tst;
+    uint8_t *src = src_;
+    uint8_t *ref = ref_;
+    if (is_hbd_) {
+      src = CONVERT_TO_BYTEPTR(src_);
+      ref = CONVERT_TO_BYTEPTR(ref_);
+    }
+    res_ref = params_.ref_func(src, stride, ref, stride, width, height);
+    res_tst = params_.tst_func(src, stride, ref, stride, width, height);
+    if (run_times > 1) {
+      aom_usec_timer_start(&ref_timer);
+      for (int j = 0; j < run_times; j++) {
+        params_.ref_func(src, stride, ref, stride, width, height);
+      }
+      aom_usec_timer_mark(&ref_timer);
+      const int elapsed_time_c =
+          static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
+
+      aom_usec_timer_start(&test_timer);
+      for (int j = 0; j < run_times; j++) {
+        params_.tst_func(src, stride, ref, stride, width, height);
+      }
+      aom_usec_timer_mark(&test_timer);
+      const int elapsed_time_simd =
+          static_cast<int>(aom_usec_timer_elapsed(&test_timer));
+
+      printf(
+          "c_time=%d \t simd_time=%d \t "
+          "gain=%d\n",
+          elapsed_time_c, elapsed_time_simd,
+          (elapsed_time_c / elapsed_time_simd));
+    } else {
+      if (!failed) {
+        failed = res_ref != res_tst;
+        EXPECT_EQ(res_ref, res_tst)
+            << "Error:" << (is_hbd_ ? "hbd " : " ") << k << " SSE Test ["
+            << width << "x" << height
+            << "] C output does not match optimized output.";
+      }
+    }
+  }
+}
+
+TEST_P(SSETest, OperationCheck) {
+  for (int height = 4; height <= 128; height += 4) {
+    RunTest(true, width_, height, 1);  // GenRandomData
+  }
+}
+
+TEST_P(SSETest, ExtremeValues) {
+  for (int height = 4; height <= 128; height += 4) {
+    RunTest(false, width_, height, 1);
+  }
+}
+
+TEST_P(SSETest, DISABLED_Speed) {
+  for (int height = 4; height <= 128; height += 4) {
+    RunTest(true, width_, height, 100);
+  }
+}
+
+#if HAVE_NEON
+TestSSEFuncs sse_neon[] = {
+  TestSSEFuncs(&aom_sse_c, &aom_sse_neon),
+#if CONFIG_AV1_HIGHBITDEPTH
+  TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_neon)
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(NEON, SSETest,
+                         Combine(ValuesIn(sse_neon), Range(4, 129, 4)));
+#endif  // HAVE_NEON
+
+#if HAVE_NEON_DOTPROD
+TestSSEFuncs sse_neon_dotprod[] = {
+  TestSSEFuncs(&aom_sse_c, &aom_sse_neon_dotprod),
+};
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SSETest,
+                         Combine(ValuesIn(sse_neon_dotprod), Range(4, 129, 4)));
+#endif  // HAVE_NEON_DOTPROD
+
+#if HAVE_SSE4_1
+TestSSEFuncs sse_sse4[] = {
+  TestSSEFuncs(&aom_sse_c, &aom_sse_sse4_1),
+#if CONFIG_AV1_HIGHBITDEPTH
+  TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_sse4_1)
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(SSE4_1, SSETest,
+                         Combine(ValuesIn(sse_sse4), Range(4, 129, 4)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+
+TestSSEFuncs sse_avx2[] = {
+  TestSSEFuncs(&aom_sse_c, &aom_sse_avx2),
+#if CONFIG_AV1_HIGHBITDEPTH
+  TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_avx2)
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, SSETest,
+                         Combine(ValuesIn(sse_avx2), Range(4, 129, 4)));
+#endif  // HAVE_AVX2
+
+#if HAVE_SVE
+#if CONFIG_AV1_HIGHBITDEPTH
+TestSSEFuncs sse_sve[] = { TestSSEFuncs(&aom_highbd_sse_c,
+                                        &aom_highbd_sse_sve) };
+INSTANTIATE_TEST_SUITE_P(SVE, SSETest,
+                         Combine(ValuesIn(sse_sve), Range(4, 129, 4)));
+#endif
+#endif  // HAVE_SVE
+
+//////////////////////////////////////////////////////////////////////////////
+// get_blk sum squares test functions
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*sse_sum_func)(const int16_t *data, int stride, int bw, int bh,
+                             int *x_sum, int64_t *x2_sum);
+typedef libaom_test::FuncParam<sse_sum_func> TestSSE_SumFuncs;
+
+typedef std::tuple<TestSSE_SumFuncs, TX_SIZE> SSE_SumTestParam;
+
+class SSE_Sum_Test : public ::testing::TestWithParam<SSE_SumTestParam> {
+ public:
+  ~SSE_Sum_Test() override = default;
+  void SetUp() override {
+    params_ = GET_PARAM(0);
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<int16_t *>(aom_memalign(32, 256 * 256 * 2));
+    ASSERT_NE(src_, nullptr);
+  }
+
+  void TearDown() override { aom_free(src_); }
+  void RunTest(bool is_random, int tx_size, int run_times);
+
+  void GenRandomData(int width, int height, int stride) {
+    const int msb = 11;  // Up to 12 bit input
+    const int limit = 1 << (msb + 1);
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        src_[ii * stride + jj] = rnd_(limit);
+      }
+    }
+  }
+
+  void GenExtremeData(int width, int height, int stride, int16_t *data,
+                      int16_t val) {
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        data[ii * stride + jj] = val;
+      }
+    }
+  }
+
+ protected:
+  TestSSE_SumFuncs params_;
+  int16_t *src_;
+  ACMRandom rnd_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SSE_Sum_Test);
+
+void SSE_Sum_Test::RunTest(bool is_random, int tx_size, int run_times) {
+  aom_usec_timer ref_timer, test_timer;
+  int width = tx_size_wide[tx_size];
+  int height = tx_size_high[tx_size];
+  for (int k = 0; k < 3; k++) {
+    int stride = 4 << rnd_(7);  // Up to 256 stride
+    while (stride < width) {    // Make sure it's valid
+      stride = 4 << rnd_(7);
+    }
+    if (is_random) {
+      GenRandomData(width, height, stride);
+    } else {
+      const int msb = 12;  // Up to 12 bit input
+      const int limit = (1 << msb) - 1;
+      if (k == 0) {
+        GenExtremeData(width, height, stride, src_, limit);
+      } else {
+        GenExtremeData(width, height, stride, src_, -limit);
+      }
+    }
+    int sum_c = 0;
+    int64_t sse_intr = 0;
+    int sum_intr = 0;
+    int64_t sse_c = 0;
+
+    params_.ref_func(src_, stride, width, height, &sum_c, &sse_c);
+    params_.tst_func(src_, stride, width, height, &sum_intr, &sse_intr);
+
+    if (run_times > 1) {
+      aom_usec_timer_start(&ref_timer);
+      for (int j = 0; j < run_times; j++) {
+        params_.ref_func(src_, stride, width, height, &sum_c, &sse_c);
+      }
+      aom_usec_timer_mark(&ref_timer);
+      const int elapsed_time_c =
+          static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
+
+      aom_usec_timer_start(&test_timer);
+      for (int j = 0; j < run_times; j++) {
+        params_.tst_func(src_, stride, width, height, &sum_intr, &sse_intr);
+      }
+      aom_usec_timer_mark(&test_timer);
+      const int elapsed_time_simd =
+          static_cast<int>(aom_usec_timer_elapsed(&test_timer));
+
+      printf(
+          "c_time=%d \t simd_time=%d \t "
+          "gain=%f\t width=%d\t height=%d \n",
+          elapsed_time_c, elapsed_time_simd,
+          (float)((float)elapsed_time_c / (float)elapsed_time_simd), width,
+          height);
+
+    } else {
+      EXPECT_EQ(sum_c, sum_intr)
+          << "Error:" << k << " SSE Sum Test [" << width << "x" << height
+          << "] C output does not match optimized output.";
+      EXPECT_EQ(sse_c, sse_intr)
+          << "Error:" << k << " SSE Sum Test [" << width << "x" << height
+          << "] C output does not match optimized output.";
+    }
+  }
+}
+
+TEST_P(SSE_Sum_Test, OperationCheck) {
+  RunTest(true, GET_PARAM(1), 1);  // GenRandomData
+}
+
+TEST_P(SSE_Sum_Test, ExtremeValues) { RunTest(false, GET_PARAM(1), 1); }
+
+TEST_P(SSE_Sum_Test, DISABLED_Speed) { RunTest(true, GET_PARAM(1), 10000); }
+
+#if HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON
+const TX_SIZE kValidBlockSize[] = { TX_4X4,   TX_8X8,   TX_16X16, TX_32X32,
+                                    TX_64X64, TX_4X8,   TX_8X4,   TX_8X16,
+                                    TX_16X8,  TX_16X32, TX_32X16, TX_64X32,
+                                    TX_32X64, TX_4X16,  TX_16X4,  TX_8X32,
+                                    TX_32X8,  TX_16X64, TX_64X16 };
+#endif
+
+#if HAVE_SSE2
+TestSSE_SumFuncs sse_sum_sse2[] = { TestSSE_SumFuncs(
+    &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_sse2) };
+INSTANTIATE_TEST_SUITE_P(SSE2, SSE_Sum_Test,
+                         Combine(ValuesIn(sse_sum_sse2),
+                                 ValuesIn(kValidBlockSize)));
+#endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+TestSSE_SumFuncs sse_sum_avx2[] = { TestSSE_SumFuncs(
+    &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_avx2) };
+INSTANTIATE_TEST_SUITE_P(AVX2, SSE_Sum_Test,
+                         Combine(ValuesIn(sse_sum_avx2),
+                                 ValuesIn(kValidBlockSize)));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+TestSSE_SumFuncs sse_sum_neon[] = { TestSSE_SumFuncs(
+    &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_neon) };
+INSTANTIATE_TEST_SUITE_P(NEON, SSE_Sum_Test,
+                         Combine(ValuesIn(sse_sum_neon),
+                                 ValuesIn(kValidBlockSize)));
+#endif  // HAVE_NEON
+
+#if HAVE_SVE
+TestSSE_SumFuncs sse_sum_sve[] = { TestSSE_SumFuncs(&aom_get_blk_sse_sum_c,
+                                                    &aom_get_blk_sse_sum_sve) };
+INSTANTIATE_TEST_SUITE_P(SVE, SSE_Sum_Test,
+                         Combine(ValuesIn(sse_sum_sve),
+                                 ValuesIn(kValidBlockSize)));
+#endif  // HAVE_SVE
+
+//////////////////////////////////////////////////////////////////////////////
+// 2D Variance test functions
+//////////////////////////////////////////////////////////////////////////////
+
+typedef uint64_t (*Var2DFunc)(uint8_t *src, int stride, int width, int height);
+typedef libaom_test::FuncParam<Var2DFunc> TestFuncVar2D;
+
+const uint16_t test_block_size[2] = { 128, 256 };
+
+class Lowbd2dVarTest : public ::testing::TestWithParam<TestFuncVar2D> {
+ public:
+  ~Lowbd2dVarTest() override = default;
+  void SetUp() override {
+    params_ = this->GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, 512 * 512 * sizeof(uint8_t)));
+    ASSERT_NE(src_, nullptr);
+  }
+
+  void TearDown() override { aom_free(src_); }
+  void RunTest(bool is_random);
+  void RunSpeedTest();
+
+  void GenRandomData(int width, int height, int stride) {
+    const int msb = 7;  // Up to 8 bit input
+    const int limit = 1 << (msb + 1);
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        src_[ii * stride + jj] = rnd_(limit);
+      }
+    }
+  }
+
+  void GenExtremeData(int width, int height, int stride) {
+    const int msb = 7;  // Up to 8 bit input
+    const int limit = 1 << (msb + 1);
+    const int val = rnd_(2) ? limit - 1 : 0;
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        src_[ii * stride + jj] = val;
+      }
+    }
+  }
+
+ protected:
+  TestFuncVar2D params_;
+  uint8_t *src_;
+  ACMRandom rnd_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Lowbd2dVarTest);
+
+void Lowbd2dVarTest::RunTest(bool is_random) {
+  int failed = 0;
+  for (int k = 0; k < kNumIterations; k++) {
+    const int width = 4 * (rnd_(63) + 1);   // Up to 256x256
+    const int height = 4 * (rnd_(63) + 1);  // Up to 256x256
+    int stride = 4 << rnd_(8);              // Up to 512 stride
+    while (stride < width) {                // Make sure it's valid
+      stride = 4 << rnd_(8);
+    }
+    if (is_random) {
+      GenRandomData(width, height, stride);
+    } else {
+      GenExtremeData(width, height, stride);
+    }
+
+    const uint64_t res_ref = params_.ref_func(src_, stride, width, height);
+    uint64_t res_tst;
+    API_REGISTER_STATE_CHECK(res_tst =
+                                 params_.tst_func(src_, stride, width, height));
+
+    if (!failed) {
+      failed = res_ref != res_tst;
+      EXPECT_EQ(res_ref, res_tst)
+          << "Error: Sum Squares Test [" << width << "x" << height
+          << "] C output does not match optimized output.";
+    }
+  }
+}
+
+void Lowbd2dVarTest::RunSpeedTest() {
+  for (int block = 0; block < 2; block++) {
+    const int width = test_block_size[block];
+    const int height = test_block_size[block];
+    int stride = 4 << rnd_(8);  // Up to 512 stride
+    while (stride < width) {    // Make sure it's valid
+      stride = 4 << rnd_(8);
+    }
+    GenExtremeData(width, height, stride);
+    const int num_loops = 1000000000 / (width + height);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+
+    for (int i = 0; i < num_loops; ++i)
+      params_.ref_func(src_, stride, width, height);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+
+    aom_usec_timer timer1;
+    aom_usec_timer_start(&timer1);
+    for (int i = 0; i < num_loops; ++i)
+      params_.tst_func(src_, stride, width, height);
+    aom_usec_timer_mark(&timer1);
+    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+    printf("%3dx%-3d: Scaling = %.2f\n", width, height,
+           (double)elapsed_time / elapsed_time1);
+  }
+}
+
+TEST_P(Lowbd2dVarTest, OperationCheck) {
+  RunTest(true);  // GenRandomData
+}
+
+TEST_P(Lowbd2dVarTest, ExtremeValues) {
+  RunTest(false);  // GenExtremeData
+}
+
+TEST_P(Lowbd2dVarTest, DISABLED_Speed) { RunSpeedTest(); }
+
+#if HAVE_SSE2
+
+INSTANTIATE_TEST_SUITE_P(SSE2, Lowbd2dVarTest,
+                         ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c,
+                                                         &aom_var_2d_u8_sse2)));
+
+#endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+
+INSTANTIATE_TEST_SUITE_P(AVX2, Lowbd2dVarTest,
+                         ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c,
+                                                         &aom_var_2d_u8_avx2)));
+
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+
+INSTANTIATE_TEST_SUITE_P(NEON, Lowbd2dVarTest,
+                         ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c,
+                                                         &aom_var_2d_u8_neon)));
+
+#endif  // HAVE_NEON
+
+#if HAVE_NEON_DOTPROD
+
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, Lowbd2dVarTest,
+                         ::testing::Values(TestFuncVar2D(
+                             &aom_var_2d_u8_c, &aom_var_2d_u8_neon_dotprod)));
+
+#endif  // HAVE_NEON_DOTPROD
+
+class Highbd2dVarTest : public ::testing::TestWithParam<TestFuncVar2D> {
+ public:
+  ~Highbd2dVarTest() override = default;
+  void SetUp() override {
+    params_ = this->GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, 512 * 512 * sizeof(uint16_t)));
+    ASSERT_NE(src_, nullptr);
+  }
+
+  void TearDown() override { aom_free(src_); }
+  void RunTest(bool is_random);
+  void RunSpeedTest();
+
+  void GenRandomData(int width, int height, int stride) {
+    const int msb = 11;  // Up to 12 bit input
+    const int limit = 1 << (msb + 1);
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        src_[ii * stride + jj] = rnd_(limit);
+      }
+    }
+  }
+
+  void GenExtremeData(int width, int height, int stride) {
+    const int msb = 11;  // Up to 12 bit input
+    const int limit = 1 << (msb + 1);
+    const int val = rnd_(2) ? limit - 1 : 0;
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        src_[ii * stride + jj] = val;
+      }
+    }
+  }
+
+ protected:
+  TestFuncVar2D params_;
+  uint16_t *src_;
+  ACMRandom rnd_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Highbd2dVarTest);
+
+void Highbd2dVarTest::RunTest(bool is_random) {
+  int failed = 0;
+  for (int k = 0; k < kNumIterations; k++) {
+    const int width = 4 * (rnd_(63) + 1);   // Up to 256x256
+    const int height = 4 * (rnd_(63) + 1);  // Up to 256x256
+    int stride = 4 << rnd_(8);              // Up to 512 stride
+    while (stride < width) {                // Make sure it's valid
+      stride = 4 << rnd_(8);
+    }
+    if (is_random) {
+      GenRandomData(width, height, stride);
+    } else {
+      GenExtremeData(width, height, stride);
+    }
+
+    const uint64_t res_ref =
+        params_.ref_func(CONVERT_TO_BYTEPTR(src_), stride, width, height);
+    uint64_t res_tst;
+    API_REGISTER_STATE_CHECK(
+        res_tst =
+            params_.tst_func(CONVERT_TO_BYTEPTR(src_), stride, width, height));
+
+    if (!failed) {
+      failed = res_ref != res_tst;
+      EXPECT_EQ(res_ref, res_tst)
+          << "Error: Sum Squares Test [" << width << "x" << height
+          << "] C output does not match optimized output.";
+    }
+  }
+}
+
+void Highbd2dVarTest::RunSpeedTest() {
+  for (int block = 0; block < 2; block++) {
+    const int width = test_block_size[block];
+    const int height = test_block_size[block];
+    int stride = 4 << rnd_(8);  // Up to 512 stride
+    while (stride < width) {    // Make sure it's valid
+      stride = 4 << rnd_(8);
+    }
+    GenExtremeData(width, height, stride);
+    const int num_loops = 1000000000 / (width + height);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+
+    for (int i = 0; i < num_loops; ++i)
+      params_.ref_func(CONVERT_TO_BYTEPTR(src_), stride, width, height);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+
+    aom_usec_timer timer1;
+    aom_usec_timer_start(&timer1);
+    for (int i = 0; i < num_loops; ++i)
+      params_.tst_func(CONVERT_TO_BYTEPTR(src_), stride, width, height);
+    aom_usec_timer_mark(&timer1);
+    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+    printf("%3dx%-3d: Scaling = %.2f\n", width, height,
+           (double)elapsed_time / elapsed_time1);
+  }
+}
+
+TEST_P(Highbd2dVarTest, OperationCheck) {
+  RunTest(true);  // GenRandomData
+}
+
+TEST_P(Highbd2dVarTest, ExtremeValues) {
+  RunTest(false);  // GenExtremeData
+}
+
+TEST_P(Highbd2dVarTest, DISABLED_Speed) { RunSpeedTest(); }
+
+#if HAVE_SSE2
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, Highbd2dVarTest,
+    ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_sse2)));
+
+#endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, Highbd2dVarTest,
+    ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_avx2)));
+
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON, Highbd2dVarTest,
+    ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_neon)));
+
+#endif  // HAVE_NEON
+
+#if HAVE_SVE
+
+INSTANTIATE_TEST_SUITE_P(SVE, Highbd2dVarTest,
+                         ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c,
+                                                         &aom_var_2d_u16_sve)));
+
+#endif  // HAVE_SVE
+}  // namespace
diff --git a/third_party/aom/test/svc_datarate_test.cc b/third_party/aom/test/svc_datarate_test.cc
new file mode 100644
index 0000000000..cc3fb674b3
--- /dev/null
+++ b/third_party/aom/test/svc_datarate_test.cc
@@ -0,0 +1,2675 @@
+/*
+ * Copyright (c) 2019, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <climits>
+#include <vector>
+#include "config/aom_config.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/datarate_test.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "aom/aom_codec.h"
+#include "av1/common/enums.h"
+#include "av1/encoder/encoder.h"
+
+namespace datarate_test {
+namespace {
+
+struct FrameInfo {
+  FrameInfo(aom_codec_pts_t _pts, unsigned int _w, unsigned int _h)
+      : pts(_pts), w(_w), h(_h) {}
+
+  aom_codec_pts_t pts;
+  unsigned int w;
+  unsigned int h;
+};
+
+class DatarateTestSVC
+    : public ::libaom_test::CodecTestWith4Params<libaom_test::TestMode, int,
+                                                 unsigned int, int>,
+      public DatarateTest {
+ public:
+  DatarateTestSVC() : DatarateTest(GET_PARAM(0)) {
+    set_cpu_used_ = GET_PARAM(2);
+    aq_mode_ = GET_PARAM(3);
+  }
+
+ protected:
+  void SetUp() override {
+    InitializeConfig(GET_PARAM(1));
+    ResetModel();
+  }
+
+  void DecompressedFrameHook(const aom_image_t &img,
+                             aom_codec_pts_t pts) override {
+    frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
+    ++decoded_nframes_;
+  }
+
+  std::vector<FrameInfo> frame_info_list_;
+
+  int GetNumSpatialLayers() override { return number_spatial_layers_; }
+
+  void ResetModel() override {
+    DatarateTest::ResetModel();
+    layer_frame_cnt_ = 0;
+    superframe_cnt_ = 0;
+    number_temporal_layers_ = 1;
+    number_spatial_layers_ = 1;
+    for (int i = 0; i < AOM_MAX_LAYERS; i++) {
+      target_layer_bitrate_[i] = 0;
+      effective_datarate_tl[i] = 0.0;
+    }
+    memset(&layer_id_, 0, sizeof(aom_svc_layer_id_t));
+    memset(&svc_params_, 0, sizeof(aom_svc_params_t));
+    memset(&ref_frame_config_, 0, sizeof(aom_svc_ref_frame_config_t));
+    memset(&ref_frame_comp_pred_, 0, sizeof(aom_svc_ref_frame_comp_pred_t));
+    drop_frames_ = 0;
+    for (int i = 0; i < 1000; i++) drop_frames_list_[i] = 1000;
+    decoded_nframes_ = 0;
+    mismatch_nframes_ = 0;
+    mismatch_psnr_ = 0.0;
+    set_frame_level_er_ = 0;
+    multi_ref_ = 0;
+    use_fixed_mode_svc_ = 0;
+    comp_pred_ = 0;
+    dynamic_enable_disable_mode_ = 0;
+    intra_only_ = 0;
+    frame_to_start_decoding_ = 0;
+    layer_to_decode_ = 0;
+    frame_sync_ = 0;
+    current_video_frame_ = 0;
+    screen_mode_ = 0;
+    rps_mode_ = 0;
+    rps_recovery_frame_ = 0;
+    user_define_frame_qp_ = 0;
+    set_speed_per_layer_ = false;
+    simulcast_mode_ = false;
+  }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    int spatial_layer_id = 0;
+    current_video_frame_ = video->frame();
+    // video->frame() is called every superframe, so we should condition
+    // this on layer_frame_cnt_ = 0, so we only do this once on the very
+    // first frame.
+    if (video->frame() == 0 && layer_frame_cnt_ == 0) {
+      initialize_svc(number_temporal_layers_, number_spatial_layers_,
+                     &svc_params_);
+      if (dynamic_enable_disable_mode_ == 1) {
+        svc_params_.layer_target_bitrate[2] = 0;
+        cfg_.rc_target_bitrate -= target_layer_bitrate_[2];
+      }
+      encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_);
+      // TODO(aomedia:3032): Configure KSVC in fixed mode.
+      encoder->Control(AV1E_SET_ENABLE_ORDER_HINT, 0);
+      encoder->Control(AV1E_SET_ENABLE_TPL_MODEL, 0);
+      encoder->Control(AV1E_SET_DELTAQ_MODE, 0);
+      if (cfg_.g_threads > 1) {
+        if (cfg_.g_threads == 4) {
+          encoder->Control(AV1E_SET_TILE_COLUMNS, 2);
+          encoder->Control(AV1E_SET_TILE_ROWS, 2);
+        } else if (cfg_.g_threads == 8) {
+          encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
+          encoder->Control(AV1E_SET_TILE_ROWS, 2);
+        } else {
+          encoder->Control(AV1E_SET_TILE_COLUMNS, cfg_.g_threads >> 1);
+        }
+        encoder->Control(AV1E_SET_ROW_MT, 1);
+      }
+      if (screen_mode_) {
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
+      }
+    }
+    if (number_spatial_layers_ == 2) {
+      spatial_layer_id = (layer_frame_cnt_ % 2 == 0) ? 0 : 1;
+    } else if (number_spatial_layers_ == 3) {
+      spatial_layer_id = (layer_frame_cnt_ % 3 == 0)         ? 0
+                         : ((layer_frame_cnt_ - 1) % 3 == 0) ? 1
+                                                             : 2;
+    }
+    // Set the reference/update flags, layer_id, and reference_map
+    // buffer index.
+    frame_flags_ = set_layer_pattern(
+        video->frame(), &layer_id_, &ref_frame_config_, &ref_frame_comp_pred_,
+        spatial_layer_id, multi_ref_, comp_pred_,
+        (video->frame() % cfg_.kf_max_dist) == 0, dynamic_enable_disable_mode_,
+        rps_mode_, rps_recovery_frame_, simulcast_mode_);
+    if (intra_only_ == 1 && frame_sync_ > 0) {
+      // Set an Intra-only frame on SL0 at frame_sync_.
+      // In order to allow decoding to start on SL0 in mid-sequence we need to
+      // set and refresh all the slots used on SL0 stream, which is 0 and 3
+      // for this test pattern. The other slots (1, 2, 4, 5) are used for the
+      // SL > 0 layers and these slotes are not refreshed on frame_sync_, so
+      // temporal prediction for the top layers can continue.
+      if (spatial_layer_id == 0 && video->frame() == frame_sync_) {
+        ref_frame_config_.ref_idx[0] = 0;
+        ref_frame_config_.ref_idx[3] = 3;
+        ref_frame_config_.refresh[0] = 1;
+        ref_frame_config_.refresh[3] = 1;
+        for (int i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config_.reference[i] = 0;
+      }
+    }
+    if (intra_only_ && video->frame() == 50 && spatial_layer_id == 1) {
+      // Force an intra_only frame here, for SL1.
+      for (int i = 0; i < INTER_REFS_PER_FRAME; i++)
+        ref_frame_config_.reference[i] = 0;
+    }
+    encoder->Control(AV1E_SET_SVC_LAYER_ID, &layer_id_);
+    // The SET_SVC_REF_FRAME_CONFIG and AV1E_SET_SVC_REF_FRAME_COMP_PRED api is
+    // for the flexible SVC mode (i.e., use_fixed_mode_svc == 0).
+    if (!use_fixed_mode_svc_) {
+      encoder->Control(AV1E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config_);
+      encoder->Control(AV1E_SET_SVC_REF_FRAME_COMP_PRED, &ref_frame_comp_pred_);
+    }
+    if (set_speed_per_layer_) {
+      int speed_per_layer = 10;
+      if (layer_id_.spatial_layer_id == 0) {
+        // For for base SL0,TL0: use the speed the test loops over.
+        if (layer_id_.temporal_layer_id == 1) speed_per_layer = 7;
+        if (layer_id_.temporal_layer_id == 2) speed_per_layer = 8;
+      } else if (layer_id_.spatial_layer_id == 1) {
+        if (layer_id_.temporal_layer_id == 0) speed_per_layer = 7;
+        if (layer_id_.temporal_layer_id == 1) speed_per_layer = 8;
+        if (layer_id_.temporal_layer_id == 2) speed_per_layer = 9;
+      } else if (layer_id_.spatial_layer_id == 2) {
+        if (layer_id_.temporal_layer_id == 0) speed_per_layer = 8;
+        if (layer_id_.temporal_layer_id == 1) speed_per_layer = 9;
+        if (layer_id_.temporal_layer_id == 2) speed_per_layer = 10;
+      }
+      encoder->Control(AOME_SET_CPUUSED, speed_per_layer);
+    }
+    if (set_frame_level_er_) {
+      int mode =
+          (layer_id_.spatial_layer_id > 0 || layer_id_.temporal_layer_id > 0);
+      encoder->Control(AV1E_SET_ERROR_RESILIENT_MODE, mode);
+    }
+    if (dynamic_enable_disable_mode_ == 1) {
+      if (layer_frame_cnt_ == 300 && spatial_layer_id == 0) {
+        // Enable: set top spatial layer bitrate back to non-zero.
+        svc_params_.layer_target_bitrate[2] = target_layer_bitrate_[2];
+        cfg_.rc_target_bitrate += target_layer_bitrate_[2];
+        encoder->Config(&cfg_);
+        encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_);
+      }
+    } else if (dynamic_enable_disable_mode_ == 2) {
+      if (layer_frame_cnt_ == 300 && spatial_layer_id == 0) {
+        // Disable top spatial layer mid-stream.
+        svc_params_.layer_target_bitrate[2] = 0;
+        cfg_.rc_target_bitrate -= target_layer_bitrate_[2];
+        encoder->Config(&cfg_);
+        encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_);
+      } else if (layer_frame_cnt_ == 600 && spatial_layer_id == 0) {
+        // Enable top spatial layer mid-stream.
+        svc_params_.layer_target_bitrate[2] = target_layer_bitrate_[2];
+        cfg_.rc_target_bitrate += target_layer_bitrate_[2];
+        encoder->Config(&cfg_);
+        encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_);
+      }
+    }
+    layer_frame_cnt_++;
+    DatarateTest::PreEncodeFrameHook(video, encoder);
+
+    if (user_define_frame_qp_) {
+      frame_qp_ = rnd_.PseudoUniform(63);
+      encoder->Control(AV1E_SET_QUANTIZER_ONE_PASS, frame_qp_);
+    }
+  }
+
+  void PostEncodeFrameHook(::libaom_test::Encoder *encoder) override {
+    int num_operating_points;
+    encoder->Control(AV1E_GET_NUM_OPERATING_POINTS, &num_operating_points);
+    ASSERT_EQ(num_operating_points,
+              number_temporal_layers_ * number_spatial_layers_);
+
+    if (user_define_frame_qp_) {
+      if (current_video_frame_ >= static_cast<unsigned int>(total_frame_))
+        return;
+      int qp;
+      encoder->Control(AOME_GET_LAST_QUANTIZER_64, &qp);
+      ASSERT_EQ(qp, frame_qp_);
+    }
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
+    // Update the layer cumulative  bitrate.
+    for (int i = layer_id_.temporal_layer_id; i < number_temporal_layers_;
+         i++) {
+      int layer = layer_id_.spatial_layer_id * number_temporal_layers_ + i;
+      effective_datarate_tl[layer] += 1.0 * frame_size_in_bits;
+    }
+    if (layer_id_.spatial_layer_id == number_spatial_layers_ - 1) {
+      last_pts_ = pkt->data.frame.pts;
+      superframe_cnt_++;
+    }
+    // For simulcast mode: verify that for first frame to start decoding,
+    // for SL > 0, are Intra-only frames (not Key), whereas SL0 is Key.
+    if (simulcast_mode_ && superframe_cnt_ == (int)frame_to_start_decoding_) {
+      if (layer_id_.spatial_layer_id > 0) {
+        EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+      } else if (layer_id_.spatial_layer_id == 0) {
+        EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+      }
+    }
+  }
+
+  void EndPassHook() override {
+    duration_ = ((last_pts_ + 1) * timebase_);
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      effective_datarate_tl[i] = (effective_datarate_tl[i] / 1000) / duration_;
+    }
+  }
+
+  bool DoDecode() const override {
+    if (drop_frames_ > 0) {
+      for (unsigned int i = 0; i < drop_frames_; ++i) {
+        if (drop_frames_list_[i] == (unsigned int)superframe_cnt_) {
+          std::cout << "             Skipping decoding frame: "
+                    << drop_frames_list_[i] << "\n";
+          return false;
+        }
+      }
+    } else if (intra_only_ == 1) {
+      // Only start decoding at frames_to_start_decoding_.
+      if (current_video_frame_ < frame_to_start_decoding_) return false;
+      // Only decode base layer for 3SL, for layer_to_decode_ = 0.
+      if (layer_to_decode_ == 0 && frame_sync_ > 0 &&
+          (layer_frame_cnt_ - 1) % 3 != 0)
+        return false;
+    } else if (simulcast_mode_) {
+      // Only start decoding at frames_to_start_decoding_ and only
+      // for top spatial layer SL2 (layer_to_decode_).
+      if (current_video_frame_ < frame_to_start_decoding_) return false;
+      if (layer_id_.spatial_layer_id < (int)layer_to_decode_) return false;
+    }
+    return true;
+  }
+
+  void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) override {
+    double mismatch_psnr = compute_psnr(img1, img2);
+    mismatch_psnr_ += mismatch_psnr;
+    ++mismatch_nframes_;
+  }
+
+  unsigned int GetMismatchFrames() { return mismatch_nframes_; }
+  unsigned int GetDecodedFrames() { return decoded_nframes_; }
+
+  static void ref_config_rps(aom_svc_ref_frame_config_t *ref_frame_config,
+                             int frame_cnt, int rps_recovery_frame) {
+    // Pattern of 3 references with (ALTREF and GOLDEN) trailing
+    // LAST by 4 and 8 frame, with some switching logic to
+    // only predict from longer-term reference.
+    int last_idx = 0;
+    int last_idx_refresh = 0;
+    int gld_idx = 0;
+    int alt_ref_idx = 0;
+    const int lag_alt = 4;
+    const int lag_gld = 8;
+    const int sh = 8;  // slots 0 - 7.
+    // Moving index slot for last: 0 - (sh - 1)
+    if (frame_cnt > 1) last_idx = (frame_cnt - 1) % sh;
+    // Moving index for refresh of last: one ahead for next frame.
+    last_idx_refresh = frame_cnt % sh;
+    // Moving index for gld_ref, lag behind current by lag_gld
+    if (frame_cnt > lag_gld) gld_idx = (frame_cnt - lag_gld) % sh;
+    // Moving index for alt_ref, lag behind LAST by lag_alt frames.
+    if (frame_cnt > lag_alt) alt_ref_idx = (frame_cnt - lag_alt) % sh;
+    // Set the ref_idx.
+    // Default all references (7) to slot for last.
+    // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
+    // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
+    for (int i = 0; i < INTER_REFS_PER_FRAME; i++)
+      ref_frame_config->ref_idx[i] = last_idx;
+    // Set the ref_idx for the relevant references.
+    ref_frame_config->ref_idx[0] = last_idx;
+    ref_frame_config->ref_idx[1] = last_idx_refresh;
+    ref_frame_config->ref_idx[3] = gld_idx;
+    ref_frame_config->ref_idx[6] = alt_ref_idx;
+    // Refresh this slot, which will become LAST on next frame.
+    ref_frame_config->refresh[last_idx_refresh] = 1;
+    // Reference LAST, ALTREF, and GOLDEN
+    ref_frame_config->reference[0] = 1;
+    ref_frame_config->reference[6] = 1;
+    ref_frame_config->reference[3] = 1;
+    if (frame_cnt == rps_recovery_frame) {
+      // Switch to only reference GOLDEN at recovery_frame.
+      ref_frame_config->reference[0] = 0;
+      ref_frame_config->reference[6] = 0;
+      ref_frame_config->reference[3] = 1;
+    } else if (frame_cnt > rps_recovery_frame &&
+               frame_cnt < rps_recovery_frame + 8) {
+      // Go back to predicting from LAST, and after
+      // 8 frames (GOLDEN is 8 frames aways) go back
+      // to predicting off GOLDEN and ALTREF.
+      ref_frame_config->reference[0] = 1;
+      ref_frame_config->reference[6] = 0;
+      ref_frame_config->reference[3] = 0;
+    }
+  }
+
+  // Simulcast mode for 3 spatial and 3 temporal layers.
+  // No inter-layer predicton, only prediction is temporal and single
+  // reference (LAST).
+  // No overlap in buffer slots between spatial layers. So for example,
+  // SL0 only uses slots 0 and 1.
+  // SL1 only uses slots 2 and 3.
+  // SL2 only uses slots 4 and 5.
+  // All 7 references for each inter-frame must only access buffer slots
+  // for that spatial layer.
+  // On key (super)frames: SL1 and SL2 must have no references set
+  // and must refresh all the slots for that layer only (so 2 and 3
+  // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
+  // as a Key frame (refresh all slots). SL1/SL2 will be labelled
+  // internally as Intra-only frames that allow that stream to be decoded.
+  // These conditions will allow for each spatial stream to be
+  // independently decodeable.
+  static void ref_config_simulcast3SL3TL(
+      aom_svc_ref_frame_config_t *ref_frame_config,
+      aom_svc_layer_id_t *layer_id, int is_key_frame, int superframe_cnt) {
+    int i;
+    // Initialize all references to 0 (don't use reference).
+    for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+      ref_frame_config->reference[i] = 0;
+    // Initialize as no refresh/update for all slots.
+    for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
+    for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = 0;
+
+    if (is_key_frame) {
+      if (layer_id->spatial_layer_id == 0) {
+        // Assign LAST/GOLDEN to slot 0/1.
+        // Refesh slots 0 and 1 for SL0.
+        // SL0: this will get set to KEY frame internally.
+        ref_frame_config->ref_idx[0] = 0;
+        ref_frame_config->ref_idx[3] = 1;
+        ref_frame_config->refresh[0] = 1;
+        ref_frame_config->refresh[1] = 1;
+      } else if (layer_id->spatial_layer_id == 1) {
+        // Assign LAST/GOLDEN to slot 2/3.
+        // Refesh slots 2 and 3 for SL1.
+        // This will get set to Intra-only frame internally.
+        ref_frame_config->ref_idx[0] = 2;
+        ref_frame_config->ref_idx[3] = 3;
+        ref_frame_config->refresh[2] = 1;
+        ref_frame_config->refresh[3] = 1;
+      } else if (layer_id->spatial_layer_id == 2) {
+        // Assign LAST/GOLDEN to slot 4/5.
+        // Refresh slots 4 and 5 for SL2.
+        // This will get set to Intra-only frame internally.
+        ref_frame_config->ref_idx[0] = 4;
+        ref_frame_config->ref_idx[3] = 5;
+        ref_frame_config->refresh[4] = 1;
+        ref_frame_config->refresh[5] = 1;
+      }
+    } else if (superframe_cnt % 4 == 0) {
+      // Base temporal layer: TL0
+      layer_id->temporal_layer_id = 0;
+      if (layer_id->spatial_layer_id == 0) {  // SL0
+        // Reference LAST. Assign all references to either slot
+        // 0 or 1. Here we assign LAST to slot 0, all others to 1.
+        // Update slot 0 (LAST).
+        ref_frame_config->reference[0] = 1;
+        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config->ref_idx[i] = 1;
+        ref_frame_config->ref_idx[0] = 0;
+        ref_frame_config->refresh[0] = 1;
+      } else if (layer_id->spatial_layer_id == 1) {  // SL1
+        // Reference LAST. Assign all references to either slot
+        // 2 or 3. Here we assign LAST to slot 2, all others to 3.
+        // Update slot 2 (LAST).
+        ref_frame_config->reference[0] = 1;
+        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config->ref_idx[i] = 3;
+        ref_frame_config->ref_idx[0] = 2;
+        ref_frame_config->refresh[2] = 1;
+      } else if (layer_id->spatial_layer_id == 2) {  // SL2
+        // Reference LAST. Assign all references to either slot
+        // 4 or 5. Here we assign LAST to slot 4, all others to 5.
+        // Update slot 4 (LAST).
+        ref_frame_config->reference[0] = 1;
+        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config->ref_idx[i] = 5;
+        ref_frame_config->ref_idx[0] = 4;
+        ref_frame_config->refresh[4] = 1;
+      }
+    } else if ((superframe_cnt - 1) % 4 == 0) {
+      // First top temporal enhancement layer: TL2
+      layer_id->temporal_layer_id = 2;
+      if (layer_id->spatial_layer_id == 0) {  // SL0
+        // Reference LAST (slot 0). Assign other references to slot 1.
+        // No update/refresh on any slots.
+        ref_frame_config->reference[0] = 1;
+        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config->ref_idx[i] = 1;
+        ref_frame_config->ref_idx[0] = 0;
+      } else if (layer_id->spatial_layer_id == 1) {  // SL1
+        // Reference LAST (slot 2). Assign other references to slot 3.
+        // No update/refresh on any slots.
+        ref_frame_config->reference[0] = 1;
+        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config->ref_idx[i] = 3;
+        ref_frame_config->ref_idx[0] = 2;
+      } else if (layer_id->spatial_layer_id == 2) {  // SL2
+        // Reference LAST (slot 4). Assign other references to slot 4.
+        // No update/refresh on any slots.
+        ref_frame_config->reference[0] = 1;
+        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config->ref_idx[i] = 5;
+        ref_frame_config->ref_idx[0] = 4;
+      }
+    } else if ((superframe_cnt - 2) % 4 == 0) {
+      // Middle temporal enhancement layer: TL1
+      layer_id->temporal_layer_id = 1;
+      if (layer_id->spatial_layer_id == 0) {  // SL0
+        // Reference LAST (slot 0).
+        // Set GOLDEN to slot 1 and update slot 1.
+        // This will be used as reference for next TL2.
+        ref_frame_config->reference[0] = 1;
+        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config->ref_idx[i] = 1;
+        ref_frame_config->ref_idx[0] = 0;
+        ref_frame_config->refresh[1] = 1;
+      } else if (layer_id->spatial_layer_id == 1) {  // SL1
+        // Reference LAST (slot 2).
+        // Set GOLDEN to slot 3 and update slot 3.
+        // This will be used as reference for next TL2.
+        ref_frame_config->reference[0] = 1;
+        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config->ref_idx[i] = 3;
+        ref_frame_config->ref_idx[0] = 2;
+        ref_frame_config->refresh[3] = 1;
+      } else if (layer_id->spatial_layer_id == 2) {  // SL2
+        // Reference LAST (slot 4).
+        // Set GOLDEN to slot 5 and update slot 5.
+        // This will be used as reference for next TL2.
+        ref_frame_config->reference[0] = 1;
+        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config->ref_idx[i] = 5;
+        ref_frame_config->ref_idx[0] = 4;
+        ref_frame_config->refresh[5] = 1;
+      }
+    } else if ((superframe_cnt - 3) % 4 == 0) {
+      // Second top temporal enhancement layer: TL2
+      layer_id->temporal_layer_id = 2;
+      if (layer_id->spatial_layer_id == 0) {  // SL0
+        // Reference LAST (slot 1). Assign other references to slot 0.
+        // No update/refresh on any slots.
+        ref_frame_config->reference[0] = 1;
+        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config->ref_idx[i] = 0;
+        ref_frame_config->ref_idx[0] = 1;
+      } else if (layer_id->spatial_layer_id == 1) {  // SL1
+        // Reference LAST (slot 3). Assign other references to slot 2.
+        // No update/refresh on any slots.
+        ref_frame_config->reference[0] = 1;
+        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config->ref_idx[i] = 2;
+        ref_frame_config->ref_idx[0] = 3;
+      } else if (layer_id->spatial_layer_id == 2) {  // SL2
+        // Reference LAST (slot 5). Assign other references to slot 4.
+        // No update/refresh on any slots.
+        ref_frame_config->reference[0] = 1;
+        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config->ref_idx[i] = 4;
+        ref_frame_config->ref_idx[0] = 5;
+      }
+    }
+  }
+
+  // 3 spatial and 3 temporal layer.
+  // Overlap in the buffer slot updates: the slots 3 and 4 updated by
+  // first TL2 are reused for update in TL1 superframe.
+  static void ref_config_3SL3TL(aom_svc_ref_frame_config_t *ref_frame_config,
+                                aom_svc_layer_id_t *layer_id, int is_key_frame,
+                                int superframe_cnt) {
+    if (superframe_cnt % 4 == 0) {
+      // Base temporal layer.
+      layer_id->temporal_layer_id = 0;
+      if (layer_id->spatial_layer_id == 0) {
+        // Reference LAST, update LAST.
+        // Set all buffer_idx to 0.
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
+        ref_frame_config->refresh[0] = 1;
+      } else if (layer_id->spatial_layer_id == 1) {
+        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
+        // GOLDEN (and all other refs) to slot 0.
+        // Update slot 1 (LAST).
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
+        ref_frame_config->ref_idx[0] = 1;
+        ref_frame_config->refresh[1] = 1;
+      } else if (layer_id->spatial_layer_id == 2) {
+        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
+        // GOLDEN (and all other refs) to slot 1.
+        // Update slot 2 (LAST).
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 1;
+        ref_frame_config->ref_idx[0] = 2;
+        ref_frame_config->refresh[2] = 1;
+      }
+    } else if ((superframe_cnt - 1) % 4 == 0) {
+      // First top temporal enhancement layer.
+      layer_id->temporal_layer_id = 2;
+      if (layer_id->spatial_layer_id == 0) {
+        // Reference LAST (slot 0).
+        // Set GOLDEN to slot 3 and update slot 3.
+        // Set all other buffer_idx to slot 0.
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
+        ref_frame_config->ref_idx[3] = 3;
+        ref_frame_config->refresh[3] = 1;
+      } else if (layer_id->spatial_layer_id == 1) {
+        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
+        // GOLDEN (and all other refs) to slot 3.
+        // Set LAST2 to slot 4 and Update slot 4.
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 3;
+        ref_frame_config->ref_idx[0] = 1;
+        ref_frame_config->ref_idx[1] = 4;
+        ref_frame_config->refresh[4] = 1;
+      } else if (layer_id->spatial_layer_id == 2) {
+        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
+        // GOLDEN (and all other refs) to slot 4.
+        // No update.
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 4;
+        ref_frame_config->ref_idx[0] = 2;
+      }
+    } else if ((superframe_cnt - 2) % 4 == 0) {
+      // Middle temporal enhancement layer.
+      layer_id->temporal_layer_id = 1;
+      if (layer_id->spatial_layer_id == 0) {
+        // Reference LAST.
+        // Set all buffer_idx to 0.
+        // Set GOLDEN to slot 3 and update slot 3.
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
+        ref_frame_config->ref_idx[3] = 3;
+        ref_frame_config->refresh[3] = 1;
+      } else if (layer_id->spatial_layer_id == 1) {
+        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
+        // GOLDEN (and all other refs) to slot 3.
+        // Set LAST2 to slot 4 and update slot 4.
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 3;
+        ref_frame_config->ref_idx[0] = 1;
+        ref_frame_config->ref_idx[2] = 4;
+        ref_frame_config->refresh[4] = 1;
+      } else if (layer_id->spatial_layer_id == 2) {
+        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
+        // GOLDEN (and all other refs) to slot 4.
+        // Set LAST2 to slot 5 and update slot 5.
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 4;
+        ref_frame_config->ref_idx[0] = 2;
+        ref_frame_config->ref_idx[2] = 5;
+        ref_frame_config->refresh[5] = 1;
+      }
+    } else if ((superframe_cnt - 3) % 4 == 0) {
+      // Second top temporal enhancement layer.
+      layer_id->temporal_layer_id = 2;
+      if (layer_id->spatial_layer_id == 0) {
+        // Set LAST to slot 3 and reference LAST.
+        // Set GOLDEN to slot 3 and update slot 3.
+        // Set all other buffer_idx to 0.
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
+        ref_frame_config->ref_idx[0] = 3;
+        ref_frame_config->ref_idx[3] = 3;
+        ref_frame_config->refresh[3] = 1;
+      } else if (layer_id->spatial_layer_id == 1) {
+        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 4,
+        // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
+        ref_frame_config->ref_idx[0] = 4;
+        ref_frame_config->ref_idx[3] = 3;
+        ref_frame_config->ref_idx[1] = 4;
+        ref_frame_config->refresh[4] = 1;
+      } else if (layer_id->spatial_layer_id == 2) {
+        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 5,
+        // GOLDEN to slot 4. No update.
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
+        ref_frame_config->ref_idx[0] = 5;
+        ref_frame_config->ref_idx[3] = 4;
+      }
+    }
+    if (layer_id->spatial_layer_id > 0) {
+      // Always reference GOLDEN (inter-layer prediction).
+      ref_frame_config->reference[3] = 1;
+      if (is_key_frame && layer_id->spatial_layer_id > 0) {
+        // On superframes whose base is key: remove LAST since GOLDEN
+        // is used as reference.
+        ref_frame_config->reference[0] = 0;
+      }
+    }
+  }
+
+  // Layer pattern configuration.
+  virtual int set_layer_pattern(
+      int frame_cnt, aom_svc_layer_id_t *layer_id,
+      aom_svc_ref_frame_config_t *ref_frame_config,
+      aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int spatial_layer,
+      int multi_ref, int comp_pred, int is_key_frame,
+      int dynamic_enable_disable_mode, int rps_mode, int rps_recovery_frame,
+      int simulcast_mode) {
+    int lag_index = 0;
+    int base_count = frame_cnt >> 2;
+    layer_id->spatial_layer_id = spatial_layer;
+    // Set the reference map buffer idx for the 7 references:
+    // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
+    // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
+    for (int i = 0; i < INTER_REFS_PER_FRAME; i++) {
+      ref_frame_config->ref_idx[i] = i;
+      ref_frame_config->reference[i] = 0;
+    }
+    for (int i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
+    if (comp_pred) {
+      ref_frame_comp_pred->use_comp_pred[0] = 1;  // GOLDEN_LAST
+      ref_frame_comp_pred->use_comp_pred[1] = 1;  // LAST2_LAST
+      ref_frame_comp_pred->use_comp_pred[2] = 1;  // ALTREF_LAST
+    }
+    // Set layer_flags to 0 when using ref_frame_config->reference.
+    int layer_flags = 0;
+    // Always reference LAST.
+    ref_frame_config->reference[0] = 1;
+    if (number_temporal_layers_ == 1 && number_spatial_layers_ == 1) {
+      ref_frame_config->refresh[0] = 1;
+      if (rps_mode)
+        ref_config_rps(ref_frame_config, frame_cnt, rps_recovery_frame);
+    }
+    if (number_temporal_layers_ == 2 && number_spatial_layers_ == 1) {
+      // 2-temporal layer.
+      //    1    3    5
+      //  0    2    4
+      // Keep golden fixed at slot 3.
+      base_count = frame_cnt >> 1;
+      ref_frame_config->ref_idx[3] = 3;
+      // Cyclically refresh slots 5, 6, 7, for lag alt ref.
+      lag_index = 5;
+      if (base_count > 0) {
+        lag_index = 5 + (base_count % 3);
+        if (frame_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
+      }
+      // Set the altref slot to lag_index.
+      ref_frame_config->ref_idx[6] = lag_index;
+      if (frame_cnt % 2 == 0) {
+        layer_id->temporal_layer_id = 0;
+        // Update LAST on layer 0, reference LAST.
+        ref_frame_config->refresh[0] = 1;
+        ref_frame_config->reference[0] = 1;
+        // Refresh lag_index slot, needed for lagging golen.
+        ref_frame_config->refresh[lag_index] = 1;
+        // Refresh GOLDEN every x base layer frames.
+        if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
+      } else {
+        layer_id->temporal_layer_id = 1;
+        // No updates on layer 1, reference LAST (TL0).
+        ref_frame_config->reference[0] = 1;
+      }
+      // Always reference golden and altref on TL0.
+      if (layer_id->temporal_layer_id == 0) {
+        ref_frame_config->reference[3] = 1;
+        ref_frame_config->reference[6] = 1;
+      }
+    } else if (number_temporal_layers_ == 3 && number_spatial_layers_ == 1) {
+      // 3-layer:
+      //   1    3   5    7
+      //     2        6
+      // 0        4        8
+      if (multi_ref) {
+        // Keep golden fixed at slot 3.
+        ref_frame_config->ref_idx[3] = 3;
+        // Cyclically refresh slots 4, 5, 6, 7, for lag altref.
+        lag_index = 4 + (base_count % 4);
+        // Set the altref slot to lag_index.
+        ref_frame_config->ref_idx[6] = lag_index;
+      }
+      if (frame_cnt % 4 == 0) {
+        // Base layer.
+        layer_id->temporal_layer_id = 0;
+        // Update LAST on layer 0, reference LAST and GF.
+        ref_frame_config->refresh[0] = 1;
+        ref_frame_config->reference[3] = 1;
+        if (multi_ref) {
+          // Refresh GOLDEN every x ~10 base layer frames.
+          if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
+          // Refresh lag_index slot, needed for lagging altref.
+          ref_frame_config->refresh[lag_index] = 1;
+        }
+      } else if ((frame_cnt - 1) % 4 == 0) {
+        layer_id->temporal_layer_id = 2;
+        // First top layer: no updates, only reference LAST (TL0).
+      } else if ((frame_cnt - 2) % 4 == 0) {
+        layer_id->temporal_layer_id = 1;
+        // Middle layer (TL1): update LAST2, only reference LAST (TL0).
+        ref_frame_config->refresh[1] = 1;
+      } else if ((frame_cnt - 3) % 4 == 0) {
+        layer_id->temporal_layer_id = 2;
+        // Second top layer: no updates, only reference LAST.
+        // Set buffer idx for LAST to slot 1, since that was the slot
+        // updated in previous frame. So LAST is TL1 frame.
+        ref_frame_config->ref_idx[0] = 1;
+        ref_frame_config->ref_idx[1] = 0;
+      }
+      if (multi_ref) {
+        // Every frame can reference GOLDEN AND ALTREF.
+        ref_frame_config->reference[3] = 1;
+        ref_frame_config->reference[6] = 1;
+      }
+    } else if (number_temporal_layers_ == 1 && number_spatial_layers_ == 2) {
+      layer_id->temporal_layer_id = 0;
+      if (layer_id->spatial_layer_id == 0) {
+        // Reference LAST, update LAST. Keep LAST and GOLDEN in slots 0 and 3.
+        ref_frame_config->ref_idx[0] = 0;
+        ref_frame_config->ref_idx[3] = 3;
+        ref_frame_config->refresh[0] = 1;
+      } else if (layer_id->spatial_layer_id == 1) {
+        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 3
+        // and GOLDEN to slot 0. Update slot 3 (LAST).
+        ref_frame_config->ref_idx[0] = 3;
+        ref_frame_config->ref_idx[3] = 0;
+        ref_frame_config->refresh[3] = 1;
+      }
+      // Reference GOLDEN.
+      if (layer_id->spatial_layer_id > 0) ref_frame_config->reference[3] = 1;
+    } else if (number_temporal_layers_ == 1 && number_spatial_layers_ == 3) {
+      // 3 spatial layers, 1 temporal.
+      // Note for this case , we set the buffer idx for all references to be
+      // either LAST or GOLDEN, which are always valid references, since decoder
+      // will check if any of the 7 references is valid scale in
+      // valid_ref_frame_size().
+      layer_id->temporal_layer_id = 0;
+      if (layer_id->spatial_layer_id == 0) {
+        // Reference LAST, update LAST. Set all other buffer_idx to 0.
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
+        ref_frame_config->refresh[0] = 1;
+      } else if (layer_id->spatial_layer_id == 1) {
+        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
+        // and GOLDEN (and all other refs) to slot 0.
+        // Update slot 1 (LAST).
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
+        ref_frame_config->ref_idx[0] = 1;
+        ref_frame_config->refresh[1] = 1;
+      } else if (layer_id->spatial_layer_id == 2) {
+        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
+        // and GOLDEN (and all other refs) to slot 1.
+        // Update slot 2 (LAST).
+        for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 1;
+        ref_frame_config->ref_idx[0] = 2;
+        ref_frame_config->refresh[2] = 1;
+        if (multi_ref) {
+          ref_frame_config->ref_idx[6] = 7;
+          ref_frame_config->reference[6] = 1;
+          if (base_count % 10 == 0) ref_frame_config->refresh[7] = 1;
+        }
+      }
+      // Reference GOLDEN.
+      if (layer_id->spatial_layer_id > 0) ref_frame_config->reference[3] = 1;
+    } else if (number_temporal_layers_ == 3 && number_spatial_layers_ == 3) {
+      if (simulcast_mode) {
+        ref_config_simulcast3SL3TL(ref_frame_config, layer_id, is_key_frame,
+                                   superframe_cnt_);
+      } else {
+        ref_config_3SL3TL(ref_frame_config, layer_id, is_key_frame,
+                          superframe_cnt_);
+        // Allow for top spatial layer to use additional temporal reference.
+        // Additional reference is only updated on base temporal layer, every
+        // 10 TL0 frames here.
+        if (multi_ref && layer_id->spatial_layer_id == 2) {
+          ref_frame_config->ref_idx[6] = 7;
+          if (!is_key_frame) ref_frame_config->reference[6] = 1;
+          if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
+            ref_frame_config->refresh[7] = 1;
+        }
+      }
+    }
+    // If the top spatial layer is first-time encoded in mid-sequence
+    // (i.e., dynamic_enable_disable_mode = 1), then don't predict from LAST,
+    // since it will have been last updated on first key frame (SL0) and so
+    // be different resolution from SL2.
+    if (dynamic_enable_disable_mode == 1 &&
+        layer_id->spatial_layer_id == number_spatial_layers_ - 1)
+      ref_frame_config->reference[0] = 0;
+    return layer_flags;
+  }
+
+  virtual void initialize_svc(int number_temporal_layers,
+                              int number_spatial_layers,
+                              aom_svc_params *svc_params) {
+    svc_params->number_spatial_layers = number_spatial_layers;
+    svc_params->number_temporal_layers = number_temporal_layers;
+    for (int i = 0; i < number_temporal_layers * number_spatial_layers; ++i) {
+      svc_params->max_quantizers[i] = 60;
+      svc_params->min_quantizers[i] = 2;
+      svc_params->layer_target_bitrate[i] = target_layer_bitrate_[i];
+    }
+    // Do at most 3 spatial or temporal layers here.
+    svc_params->framerate_factor[0] = 1;
+    if (number_temporal_layers == 2) {
+      svc_params->framerate_factor[0] = 2;
+      svc_params->framerate_factor[1] = 1;
+    } else if (number_temporal_layers == 3) {
+      svc_params->framerate_factor[0] = 4;
+      svc_params->framerate_factor[1] = 2;
+      svc_params->framerate_factor[2] = 1;
+    }
+    svc_params->scaling_factor_num[0] = 1;
+    svc_params->scaling_factor_den[0] = 1;
+    if (number_spatial_layers == 2) {
+      svc_params->scaling_factor_num[0] = 1;
+      svc_params->scaling_factor_den[0] = 2;
+      svc_params->scaling_factor_num[1] = 1;
+      svc_params->scaling_factor_den[1] = 1;
+    } else if (number_spatial_layers == 3) {
+      svc_params->scaling_factor_num[0] = 1;
+      svc_params->scaling_factor_den[0] = 4;
+      svc_params->scaling_factor_num[1] = 1;
+      svc_params->scaling_factor_den[1] = 2;
+      svc_params->scaling_factor_num[2] = 1;
+      svc_params->scaling_factor_den[2] = 1;
+    }
+  }
+
+  virtual void BasicRateTargetingSVC3TL1SLTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 1;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 200, 550 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 3;
+    target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Top temporal layers are non_reference, so exlcude them from
+    // mismatch count, since loopfilter/cdef is not applied for these on
+    // encoder side, but is always applied on decoder.
+    // This means 150 = #frames(300) - #TL2_frames(150).
+    EXPECT_EQ((int)GetMismatchFrames(), 150);
+  }
+
+  virtual void SetFrameQpSVC3TL1SLTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 1;
+
+    user_define_frame_qp_ = 1;
+    total_frame_ = 300;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 200, 550 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 3;
+    target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  virtual void SetFrameQpSVC3TL3SLTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    user_define_frame_qp_ = 1;
+    total_frame_ = 300;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 600, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  virtual void BasicRateTargetingSVC3TL1SLScreenTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 60);
+
+    const int bitrate_array[2] = { 800, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    screen_mode_ = 1;
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 1;
+    target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.50)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.7)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Top temporal layers are non_reference, so exlcude them from
+    // mismatch count, since loopfilter/cdef is not applied for these on
+    // encoder side, but is always applied on decoder.
+    // This means 30 = #frames(60) - #TL2_frames(30).
+    // We use LE for screen since loopfilter level can become very small
+    // or zero and then the frame is not a mismatch.
+    EXPECT_LE((int)GetMismatchFrames(), 30);
+  }
+
+  virtual void BasicRateTargetingSVC2TL1SLScreenDropFrameTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 30;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 52;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+
+    const int bitrate_array[2] = { 60, 100 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    screen_mode_ = 1;
+    number_temporal_layers_ = 2;
+    number_spatial_layers_ = 1;
+    target_layer_bitrate_[0] = 60 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.75)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.5)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Top temporal layers are non_reference, so exlcude them from
+    // mismatch count, since loopfilter/cdef is not applied for these on
+    // encoder side, but is always applied on decoder.
+    // This means 300 = #frames(300) - #TL2_frames(150).
+    // We use LE for screen since loopfilter level can become very small
+    // or zero and then the frame is not a mismatch.
+    EXPECT_LE((int)GetMismatchFrames(), 150);
+  }
+
+  virtual void BasicRateTargetingSVC1TL3SLScreenTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+
+    const int bitrate_array[2] = { 800, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    screen_mode_ = 1;
+    number_temporal_layers_ = 1;
+    number_spatial_layers_ = 3;
+    target_layer_bitrate_[0] = 30 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = 60 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.50)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.5)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    EXPECT_EQ((int)GetMismatchFrames(), 0);
+  }
+
+  virtual void BasicRateTargetingSVC1TL1SLScreenScCutsMotionTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+
+    const int bitrate_array[2] = { 200, 500 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    screen_mode_ = 1;
+    number_temporal_layers_ = 1;
+    number_spatial_layers_ = 1;
+    target_layer_bitrate_[0] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.40)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.7)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    EXPECT_EQ((int)GetMismatchFrames(), 0);
+  }
+
+  virtual void BasicRateTargetingSVC3TL1SLResizeTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+    cfg_.rc_resize_mode = RESIZE_DYNAMIC;
+
+    ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
+                                         1, 0, 400);
+    cfg_.g_w = 640;
+    cfg_.g_h = 480;
+    const int bitrate_array[2] = { 80, 90 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 3;
+    target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    unsigned int last_w = cfg_.g_w;
+    unsigned int last_h = cfg_.g_h;
+    int resize_down_count = 0;
+    for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+         info != frame_info_list_.end(); ++info) {
+      if (info->w != last_w || info->h != last_h) {
+        // Verify that resize down occurs.
+        ASSERT_LT(info->w, last_w);
+        ASSERT_LT(info->h, last_h);
+        last_w = info->w;
+        last_h = info->h;
+        resize_down_count++;
+      }
+    }
+    // Must be at least one resize down.
+    ASSERT_GE(resize_down_count, 1);
+  }
+
+  virtual void BasicRateTargetingSVC1TL2SLTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 300, 600 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 1;
+    number_spatial_layers_ = 2;
+    target_layer_bitrate_[0] = 2 * cfg_.rc_target_bitrate / 4;
+    target_layer_bitrate_[1] = 2 * cfg_.rc_target_bitrate / 4;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC3TL3SLIntraStartDecodeBaseMidSeq() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 500, 1000 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    intra_only_ = 1;
+    frame_sync_ = 20;
+    frame_to_start_decoding_ = frame_sync_;
+    layer_to_decode_ = 0;
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    // Only check datarate on SL0 - this is layer that is decoded starting at
+    // frame_to_start_decoding_.
+    for (int i = 0; i < number_temporal_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.50)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Only base spatial layer is decoded and there are no non-referenece
+    // frames on S0, so #mismatch must be 0.
+    EXPECT_EQ((int)GetMismatchFrames(), 0);
+  }
+
+  virtual void BasicRateTargetingSVC3TL3SLIntraMidSeqDecodeAll() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 500, 1000 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    intra_only_ = 1;
+    frame_sync_ = 20;
+    frame_to_start_decoding_ = 0;
+    layer_to_decode_ = 3;
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.585)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // All 3 spatial layers are decoded, starting at frame 0, so there are
+    // and there 300/2 = 150 non-reference frames, so mismatch is 150.
+    EXPECT_EQ((int)GetMismatchFrames(), 150);
+  }
+
+  virtual void BasicRateTargetingSVC3TL3SLSimulcast() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+    cfg_.kf_max_dist = 150;
+    cfg_.kf_min_dist = 150;
+    int num_frames = 300;
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, num_frames);
+    const int bitrate_array[2] = { 500, 1000 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    simulcast_mode_ = 1;
+    frame_to_start_decoding_ = cfg_.kf_max_dist;
+    layer_to_decode_ = 2;  // SL2
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    // Only SL2 layer is decoded.
+    for (int tl = 0; tl < number_temporal_layers_; tl++) {
+      int i = layer_to_decode_ * number_temporal_layers_ + tl;
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.6)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.7)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Only top spatial layer (SL2) is decoded, starting at frame 150
+    // (frame_to_start_decoding_), so there (300 - 150) / 2 = 75
+    // non-reference frames, so mismatch is 75.
+    int num_mismatch = (num_frames - frame_to_start_decoding_) / 2;
+    EXPECT_EQ((int)GetMismatchFrames(), num_mismatch);
+  }
+
+  virtual void BasicRateTargetingSVC1TL2SLIntraOnlyTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 300, 600 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    intra_only_ = 1;
+    number_temporal_layers_ = 1;
+    number_spatial_layers_ = 2;
+    target_layer_bitrate_[0] = 2 * cfg_.rc_target_bitrate / 4;
+    target_layer_bitrate_[1] = 2 * cfg_.rc_target_bitrate / 4;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC1TL3SLTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 500, 1000 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 1;
+    number_spatial_layers_ = 3;
+    target_layer_bitrate_[0] = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[1] = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[2] = 4 * cfg_.rc_target_bitrate / 8;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC1TL3SLMultiRefTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 500, 1000 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    multi_ref_ = 1;
+    number_temporal_layers_ = 1;
+    number_spatial_layers_ = 3;
+    target_layer_bitrate_[0] = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[1] = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[2] = 4 * cfg_.rc_target_bitrate / 8;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC3TL3SLTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 600, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.50)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC3TL3SLHDTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+    const int bitrate_array[2] = { 600, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingFixedModeSVC3TL3SLHDTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+    const int bitrate_array[2] = { 600, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    use_fixed_mode_svc_ = 1;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC3TL3SLMultiThreadSpeedPerLayerTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_threads = 2;
+    ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
+                                         1, 0, 400);
+    cfg_.g_w = 640;
+    cfg_.g_h = 480;
+    const int bitrate_array[2] = { 600, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    set_speed_per_layer_ = true;
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC3TL3SLHDMultiThread2Test() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_threads = 2;
+
+    ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+    const int bitrate_array[2] = { 600, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC3TL3SLHDMultiThread4Test() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_threads = 4;
+
+    ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+    const int bitrate_array[2] = { 600, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC3TL3SLHDMultiRefTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+    const int bitrate_array[2] = { 600, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    multi_ref_ = 1;
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC3TL3SLKfTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+    cfg_.kf_mode = AOM_KF_AUTO;
+    cfg_.kf_min_dist = cfg_.kf_max_dist = 100;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 600, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.55)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.4)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargeting444SVC3TL3SLTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_profile = 1;
+
+    ::libaom_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);
+
+    const int bitrate_array[2] = { 600, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC3TL1SLMultiRefDropAllEnhTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    // error_resilient can set to off/0, since for SVC the context update
+    // is done per-layer.
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 200, 550 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    multi_ref_ = 1;
+    // Drop TL1 and TL2: #frames(300) - #TL0.
+    drop_frames_ = 300 - 300 / 4;
+    int n = 0;
+    for (int i = 0; i < 300; i++) {
+      if (i % 4 != 0) {
+        drop_frames_list_[n] = i;
+        n++;
+      }
+    }
+    number_temporal_layers_ = 3;
+    target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Test that no mismatches have been found.
+    std::cout << "          Decoded frames: " << GetDecodedFrames() << "\n";
+    std::cout << "          Mismatch frames: " << GetMismatchFrames() << "\n";
+    EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_);
+    EXPECT_EQ((int)GetMismatchFrames(), 0);
+  }
+
+  virtual void BasicRateTargetingSVC3TL1SLDropAllEnhTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    // error_resilient can set to off/0, since for SVC the context update
+    // is done per-layer.
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 200, 550 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    // Drop TL1 and TL2: #frames(300) - #TL0.
+    drop_frames_ = 300 - 300 / 4;
+    int n = 0;
+    for (int i = 0; i < 300; i++) {
+      if (i % 4 != 0) {
+        drop_frames_list_[n] = i;
+        n++;
+      }
+    }
+    number_temporal_layers_ = 3;
+    target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Test that no mismatches have been found.
+    std::cout << "          Decoded frames: " << GetDecodedFrames() << "\n";
+    std::cout << "          Mismatch frames: " << GetMismatchFrames() << "\n";
+    EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_);
+    EXPECT_EQ((int)GetMismatchFrames(), 0);
+  }
+
+  virtual void BasicRateTargetingSVC3TL1SLDropTL2EnhTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    // error_resilient for sequence can be off/0, since dropped frames (TL2)
+    // are non-reference frames.
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 200, 550 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    // Drop TL2: #frames(300) - (#TL0 + #TL1).
+    drop_frames_ = 300 - 300 / 2;
+    int n = 0;
+    for (int i = 0; i < 300; i++) {
+      if (i % 2 != 0) {
+        drop_frames_list_[n] = i;
+        n++;
+      }
+    }
+    number_temporal_layers_ = 3;
+    target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Test that no mismatches have been found.
+    std::cout << "          Decoded frames: " << GetDecodedFrames() << "\n";
+    std::cout << "          Mismatch frames: " << GetMismatchFrames() << "\n";
+    EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_);
+    EXPECT_EQ((int)GetMismatchFrames(), 0);
+  }
+
+  virtual void BasicRateTargetingSVC3TL1SLDropAllEnhFrameERTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 200, 550 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    // Set error_resilience at frame level, with codec control,
+    // on/1 for enahancement layers and off/0 for base layer frames.
+    set_frame_level_er_ = 1;
+
+    // Drop TL1 and TL2: #frames(300) - #TL0.
+    drop_frames_ = 300 - 300 / 4;
+    int n = 0;
+    for (int i = 0; i < 300; i++) {
+      if (i % 4 != 0) {
+        drop_frames_list_[n] = i;
+        n++;
+      }
+    }
+    number_temporal_layers_ = 3;
+    target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Test that no mismatches have been found.
+    std::cout << "          Decoded frames: " << GetDecodedFrames() << "\n";
+    std::cout << "          Mismatch frames: " << GetMismatchFrames() << "\n";
+    EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_);
+    EXPECT_EQ((int)GetMismatchFrames(), 0);
+  }
+
+  virtual void BasicRateTargetingSVC3TL1SLDropSetEnhFrameERTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 200, 550 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    // Set error_resilience at frame level, with codec control,
+    // on/1 for enahancement layers and off/0 for base layer frames.
+    set_frame_level_er_ = 1;
+
+    // Drop TL1 and TL2: for part of sequence. Start at first TL2 at
+    // frame 101, and end at second T2 at frame 199. Frame 200 is TL0,
+    // so we can continue decoding without mismatch (since LAST is the
+    // only reference and error_resilient = 1 on TL1/TL2 frames).
+    int n = 0;
+    int num_nonref = 300 / 2;
+    for (int i = 101; i < 200; i++) {
+      if (i % 4 != 0) {
+        drop_frames_list_[n] = i;
+        n++;
+        if (i % 2 != 0) num_nonref -= 1;
+      }
+    }
+    drop_frames_ = n;
+    number_temporal_layers_ = 3;
+    target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Test that no mismatches have been found.
+    std::cout << "          Decoded frames: " << GetDecodedFrames() << "\n";
+    std::cout << "          Mismatch frames: " << GetMismatchFrames() << "\n";
+    EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_);
+    EXPECT_EQ((int)GetMismatchFrames(), num_nonref);
+  }
+
+  virtual void BasicRateTargetingSVC2TL1SLDropSetEnhER0Test() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 200, 550 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+
+    // Set error_resilience off.
+    cfg_.g_error_resilient = 0;
+
+    // Drop TL1: for part of sequence. Start at first TL1 at
+    // frame 101, and end at frame 199. Frame 200 is TL0,
+    // so we can continue decoding without mismatch (since LAST is the
+    // only reference).
+    int n = 0;
+    int num_nonref = 300 / 2;
+    for (int i = 101; i < 200; i++) {
+      if (i % 2 != 0) {
+        drop_frames_list_[n] = i;
+        n++;
+        if (i % 2 != 0) num_nonref -= 1;
+      }
+    }
+    drop_frames_ = n;
+    number_temporal_layers_ = 2;
+    target_layer_bitrate_[0] = 70 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Test that no mismatches have been found.
+    std::cout << "          Decoded frames: " << GetDecodedFrames() << "\n";
+    std::cout << "          Mismatch frames: " << GetMismatchFrames() << "\n";
+    EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_);
+    EXPECT_EQ((int)GetMismatchFrames(), num_nonref);
+  }
+
+  virtual void BasicRateTargetingSVC3TL1SLDropSetEnhER0Test() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 200, 550 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+
+    // Set error_resilience off.
+    cfg_.g_error_resilient = 0;
+
+    // Drop TL1 and TL2: for part of sequence. Start at first TL2 at
+    // frame 101, and end at second T2 at frame 199. Frame 200 is TL0,
+    // so we can continue decoding without mismatch (since LAST is the
+    // only reference).
+    int n = 0;
+    int num_nonref = 300 / 2;
+    for (int i = 101; i < 200; i++) {
+      if (i % 4 != 0) {
+        drop_frames_list_[n] = i;
+        n++;
+        if (i % 2 != 0) num_nonref -= 1;
+      }
+    }
+    drop_frames_ = n;
+    number_temporal_layers_ = 3;
+    target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Test that no mismatches have been found.
+    std::cout << "          Decoded frames: " << GetDecodedFrames() << "\n";
+    std::cout << "          Mismatch frames: " << GetMismatchFrames() << "\n";
+    EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_);
+    EXPECT_EQ((int)GetMismatchFrames(), num_nonref);
+  }
+
+  virtual void BasicRateTargetingSVC3TL3SLDropSetEnhER0Test() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 200, 550 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    // Set error_resilience off.
+    cfg_.g_error_resilient = 0;
+    // Drop TL1 and TL2: for part of sequence. Start at first TL2 at
+    // frame 101, and end at second T2 at frame 199. Frame 200 is TL0,
+    // so we can continue decoding without mismatch (since LAST is the
+    // only reference).
+    // Drop here means drop whole superframe.
+    int n = 0;
+    int num_nonref = 300 / 2;
+    for (int i = 101; i < 200; i++) {
+      if (i % 4 != 0) {
+        drop_frames_list_[n] = i;
+        n++;
+        if (i % 2 != 0) num_nonref -= 1;
+      }
+    }
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    multi_ref_ = 1;
+    drop_frames_ = n * number_spatial_layers_;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Test that no mismatches have been found.
+    std::cout << "          Decoded frames: " << GetDecodedFrames() << "\n";
+    std::cout << "          Mismatch frames: " << GetMismatchFrames() << "\n";
+    EXPECT_EQ(300 * number_spatial_layers_ - GetDecodedFrames(), drop_frames_);
+    EXPECT_EQ((int)GetMismatchFrames(), num_nonref);
+  }
+
+  virtual void BasicRateTargetingSVC3TL1SLMultiRefCompoundTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
+                                         1, 0, 400);
+    cfg_.g_w = 640;
+    cfg_.g_h = 480;
+    const int bitrate_array[2] = { 400, 800 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    multi_ref_ = 1;
+    comp_pred_ = 1;
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 1;
+    target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC1TL3SLDynEnablTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
+                                         1, 0, 400);
+    const int bitrate_array[2] = { 500, 1000 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 1;
+    number_spatial_layers_ = 3;
+    target_layer_bitrate_[0] = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[1] = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[2] = 4 * cfg_.rc_target_bitrate / 8;
+    dynamic_enable_disable_mode_ = 1;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    // No need to check RC on top layer which is disabled part of the time.
+    for (int i = 0; i < number_spatial_layers_ - 1; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC1TL3SLDynDisEnablTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 500, 1000 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 1;
+    number_spatial_layers_ = 3;
+    target_layer_bitrate_[0] = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[1] = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[2] = 4 * cfg_.rc_target_bitrate / 8;
+    dynamic_enable_disable_mode_ = 2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    // No need to check RC on top layer which is disabled part of the time.
+    for (int i = 0; i < number_spatial_layers_ - 1; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingRPS1TL1SLDropFramesTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 100, 300 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    rps_mode_ = 1;
+    rps_recovery_frame_ = 100;
+    cfg_.g_error_resilient = 0;
+    // Drop x frames before the recovery frames (where the reference
+    // is switched to an older reference (golden or altref).
+    // GOLDEN is 8 frames behind (for the rps pattern example) so we can't
+    // drop more than 8 frames recovery frame, so choose x = 7.
+    int n = 0;
+    for (int i = rps_recovery_frame_ - 7; i < rps_recovery_frame_; i++) {
+      drop_frames_list_[n] = i;
+      n++;
+    }
+    drop_frames_ = n;
+    number_spatial_layers_ = 1;
+    number_temporal_layers_ = 1;
+    target_layer_bitrate_[0] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60)
+          << " The datarate for the file is greater than target by too much!";
+    }
+    // Test that no mismatches have been found.
+    std::cout << "          Decoded frames: " << GetDecodedFrames() << "\n";
+    std::cout << "          Mismatch frames: " << GetMismatchFrames() << "\n";
+    EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_);
+    EXPECT_EQ((int)GetMismatchFrames(), 0);
+  }
+
+  int layer_frame_cnt_;
+  int superframe_cnt_;
+  int number_temporal_layers_;
+  int number_spatial_layers_;
+  // Allow for up to 3 temporal layers.
+  int target_layer_bitrate_[AOM_MAX_LAYERS];
+  aom_svc_params_t svc_params_;
+  aom_svc_ref_frame_config_t ref_frame_config_;
+  aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred_;
+  aom_svc_layer_id_t layer_id_;
+  double effective_datarate_tl[AOM_MAX_LAYERS];
+  unsigned int drop_frames_;
+  unsigned int drop_frames_list_[1000];
+  unsigned int mismatch_nframes_;
+  unsigned int decoded_nframes_;
+  double mismatch_psnr_;
+  int set_frame_level_er_;
+  int multi_ref_;
+  int use_fixed_mode_svc_;
+  int comp_pred_;
+  int dynamic_enable_disable_mode_;
+  int intra_only_;
+  unsigned int frame_to_start_decoding_;
+  unsigned int layer_to_decode_;
+  unsigned int frame_sync_;
+  unsigned int current_video_frame_;
+  int screen_mode_;
+  int rps_mode_;
+  int rps_recovery_frame_;
+  int simulcast_mode_;
+
+  int user_define_frame_qp_;
+  int frame_qp_;
+  int total_frame_;
+  bool set_speed_per_layer_;
+  libaom_test::ACMRandom rnd_;
+};
+
+// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SL) {
+  BasicRateTargetingSVC3TL1SLTest();
+}
+
+TEST_P(DatarateTestSVC, SetFrameQpSVC3TL1SL) { SetFrameQpSVC3TL1SLTest(); }
+
+TEST_P(DatarateTestSVC, SetFrameQpSVC3TL3SL) { SetFrameQpSVC3TL3SLTest(); }
+
+// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial
+// for screen mode.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLScreen) {
+  BasicRateTargetingSVC3TL1SLScreenTest();
+}
+
+// Check basic rate targeting for CBR, for 2 temporal layers, 1 spatial
+// for screen mode, with frame dropper on at low bitrates
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC2TL1SLScreenDropFrame) {
+  BasicRateTargetingSVC2TL1SLScreenDropFrameTest();
+}
+// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal
+// for screen mode.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLScreen) {
+  BasicRateTargetingSVC1TL3SLScreenTest();
+}
+
+// Check basic rate targeting for CBR, for 1 temporal layer, 1 spatial
+// for screen mode, with source with many scene cuts and motion.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL1SLScreenScCutsMotion) {
+  BasicRateTargetingSVC1TL1SLScreenScCutsMotionTest();
+}
+
+// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial,
+// with dynamic resize on. Encode at very low bitrate and check that
+// there is at least one resize (down) event.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLResize) {
+  BasicRateTargetingSVC3TL1SLResizeTest();
+}
+
+// Check basic rate targeting for CBR, for 2 spatial layers, 1 temporal.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL2SL) {
+  BasicRateTargetingSVC1TL2SLTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial layers, 3 temporal,
+// with Intra-only frame inserted in the stream. Verify that we can start
+// decoding the SL0 stream at the intra_only frame in mid-sequence.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLIntraStartDecodeBaseMidSeq) {
+  BasicRateTargetingSVC3TL3SLIntraStartDecodeBaseMidSeq();
+}
+
+// Check basic rate targeting for CBR, for 3spatial layers, 3 temporal,
+// with Intra-only frame inserted in the stream. Verify that we can
+// decode all frames and layers with no mismatch.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLIntraMidSeqDecodeAll) {
+  BasicRateTargetingSVC3TL3SLIntraMidSeqDecodeAll();
+}
+
+// Check simulcast mode for 3 spatial layers, 3 temporal,
+// Key frame is inserted on base SLO in mid-stream, and verify that the
+// top spatial layer (SL2) case be decoded, starting with an Intra-only frame.
+// Verify that we can decode all frames for SL2 with no mismatch.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLSimulcast) {
+  BasicRateTargetingSVC3TL3SLSimulcast();
+}
+
+// Check basic rate targeting for CBR, for 2 spatial layers, 1 temporal,
+// with Intra-only frame inserted in the stream.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL2SLIntraOnly) {
+  BasicRateTargetingSVC1TL2SLIntraOnlyTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SL) {
+  BasicRateTargetingSVC1TL3SLTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal,
+// with additional temporal reference for top spatial layer.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLMultiRef) {
+  BasicRateTargetingSVC1TL3SLMultiRefTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SL) {
+  BasicRateTargetingSVC3TL3SLTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLHD) {
+  BasicRateTargetingSVC3TL3SLHDTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers,
+// for fixed mode SVC.
+TEST_P(DatarateTestSVC, BasicRateTargetingFixedModeSVC3TL3SLHD) {
+  BasicRateTargetingFixedModeSVC3TL3SLHDTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers,
+// for 2 threads, 2 tile_columns, row-mt enabled, and different speed
+// per layer.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLMultiThreadSpeedPerLayer) {
+  BasicRateTargetingSVC3TL3SLMultiThreadSpeedPerLayerTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers,
+// for 2 threads, 2 tile_columns, row-mt enabled.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLHDMultiThread2) {
+  BasicRateTargetingSVC3TL3SLHDMultiThread2Test();
+}
+// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers,
+// for 4 threads, 4 tile_columns, row-mt enabled.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLHDMultiThread4) {
+  BasicRateTargetingSVC3TL3SLHDMultiThread4Test();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers,
+// with additional temporal reference for top spatial layer.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLHDMultiRef) {
+  BasicRateTargetingSVC3TL3SLHDMultiRefTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers,
+// for auto key frame mode with short key frame period.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLKf) {
+  BasicRateTargetingSVC3TL3SLKfTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers,
+// for 4:4:4 input.
+#if defined(CONFIG_MAX_DECODE_PROFILE) && CONFIG_MAX_DECODE_PROFILE < 1
+TEST_P(DatarateTestSVC, DISABLED_BasicRateTargeting444SVC3TL3SL) {
+#else
+TEST_P(DatarateTestSVC, BasicRateTargeting444SVC3TL3SL) {
+#endif
+  BasicRateTargeting444SVC3TL3SLTest();
+}
+
+// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer,
+// with dropping of all enhancement layers (TL 1 and TL2). Check that the base
+// layer (TL0) can still be decodeable (with no mismatch) with the
+// error_resilient flag set to 0. This test used the pattern with multiple
+// references (last, golden, and altref), updated on base layer.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLMultiRefDropAllEnh) {
+  BasicRateTargetingSVC3TL1SLMultiRefDropAllEnhTest();
+}
+
+// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer,
+// with dropping of all enhancement layers (TL 1 and TL2). Check that the base
+// layer (TL0) can still be decodeable (with no mismatch) with the
+// error_resilient flag set to 0.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLDropAllEnh) {
+  BasicRateTargetingSVC3TL1SLDropAllEnhTest();
+}
+
+// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer,
+// with dropping of the TL2 enhancement layer, which are non-reference
+// (droppble) frames. For the base layer (TL0) and TL1 to still be decodeable
+// (with no mismatch), the error_resilient_flag may be off (set to 0),
+// since TL2 are non-reference frames.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLDropTL2Enh) {
+  BasicRateTargetingSVC3TL1SLDropTL2EnhTest();
+}
+
+// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer,
+// with dropping of all enhancement layers (TL 1 and TL2). Test that the
+// error_resilient flag can be set at frame level, with on/1 on
+// enhancement layers and off/0 on base layer.
+// This allows for successful decoding after dropping enhancement layer frames.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLDropAllEnhFrameER) {
+  BasicRateTargetingSVC3TL1SLDropAllEnhFrameERTest();
+}
+
+// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer,
+// with dropping set of enhancement layers (TL 1 and TL2) in middle of sequence.
+// Test that the error_resilient flag can be set at frame level, with on/1 on
+// enhancement layers and off/0 on base layer.
+// This allows for successful decoding after dropping a set enhancement layer
+// frames in the sequence.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLDropSetEnhFrameER) {
+  BasicRateTargetingSVC3TL1SLDropSetEnhFrameERTest();
+}
+
+// Check basic rate targeting for CBR, for 2 temporal layers, 1 spatial layer,
+// with dropping set of enhancement layers (TL 1) in middle of sequence.
+// Test that the error_resilient flag can be 0/off for all frames.
+// This allows for successful decoding after dropping a set enhancement layer
+// frames in the sequence.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC2TL1SLDropSetEnhER0) {
+  BasicRateTargetingSVC2TL1SLDropSetEnhER0Test();
+}
+
+// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer,
+// with dropping set of enhancement layers (TL 1 and TL2) in middle of sequence.
+// Test that the error_resilient flag can be 0/off for all frames.
+// This allows for successful decoding after dropping a set enhancement layer
+// frames in the sequence.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLDropSetEnhER0) {
+  BasicRateTargetingSVC3TL1SLDropSetEnhER0Test();
+}
+
+// Check basic rate targeting for CBR, for 3 temporal layers, 3 spatial layers,
+// with dropping set of enhancement layers (superframe TL 1 and TL2) in middle
+// of sequence. Test that the error_resilient flag can be 0/off for all frames.
+// This allows for successful decoding after dropping a set enhancement layer
+// frames in the sequence.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLDropSetEnhER0) {
+  BasicRateTargetingSVC3TL3SLDropSetEnhER0Test();
+}
+
+// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer,
+// with compound prediction on, for pattern with two additional refereces
+// (golden and altref), both updated on base TLO frames.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLMultiRefCompound) {
+  BasicRateTargetingSVC3TL1SLMultiRefCompoundTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal,
+// with the top spatial layer starting disabled (0 bitrate) and then
+// dynamically enabled after x frames with nonzero bitrate.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLDynEnabl) {
+  BasicRateTargetingSVC1TL3SLDynEnablTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal,
+// with the top spatial layer dynamically disabled snd enabled during the
+// middle of the sequence.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLDynDisEnabl) {
+  BasicRateTargetingSVC1TL3SLDynDisEnablTest();
+}
+
+// Check basic rate targeting and encoder/decodermismatch, for RPS
+// with 1 layer. A number of consecutive frames are lost midway in
+// sequence, and encoder resorts to a longer term reference to recovery
+// and continue decoding successfully.
+TEST_P(DatarateTestSVC, BasicRateTargetingRPS1TL1SLDropFrames) {
+  BasicRateTargetingRPS1TL1SLDropFramesTest();
+}
+
+TEST(SvcParams, BitrateOverflow) {
+  uint8_t buf[6] = { 0 };
+  aom_image_t img;
+  aom_codec_ctx_t enc;
+  aom_codec_enc_cfg_t cfg;
+
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, 1, 1, 1, buf));
+
+  aom_codec_iface_t *const iface = aom_codec_av1_cx();
+  EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME),
+            AOM_CODEC_OK);
+  cfg.g_w = 1;
+  cfg.g_h = 1;
+  ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK);
+
+  aom_svc_params_t svc_params = {};
+  svc_params.framerate_factor[0] = 1;
+  svc_params.framerate_factor[1] = 2;
+  svc_params.number_spatial_layers = 1;
+  svc_params.number_temporal_layers = 2;
+  svc_params.layer_target_bitrate[0] = INT_MAX;
+  svc_params.layer_target_bitrate[1] = INT_MAX;
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_SVC_PARAMS, &svc_params),
+            AOM_CODEC_OK);
+  EXPECT_EQ(
+      aom_codec_encode(&enc, &img, /*pts=*/0, /*duration=*/1, /*flags=*/0),
+      AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_encode(&enc, /*img=*/nullptr, /*pts=*/0, /*duration=*/0,
+                             /*flags=*/0),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+}
+
+AV1_INSTANTIATE_TEST_SUITE(DatarateTestSVC,
+                           ::testing::Values(::libaom_test::kRealTime),
+                           ::testing::Range(7, 12), ::testing::Values(0, 3),
+                           ::testing::Values(0, 1));
+
+}  // namespace
+}  // namespace datarate_test
diff --git a/third_party/aom/test/svc_encoder_rtc.sh b/third_party/aom/test/svc_encoder_rtc.sh
new file mode 100644
index 0000000000..735166d6f6
--- /dev/null
+++ b/third_party/aom/test/svc_encoder_rtc.sh
@@ -0,0 +1,85 @@
+#!/bin/sh
+## Copyright (c) 2023, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+svc_encoder_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+common_flags="-k 10000"
+common_flags="${common_flags} --max-q=63"
+common_flags="${common_flags} --error-resilient=0"
+
+# Runs svc_encoder_rtc using with 1 spatial layer 3 temporal layers.
+svc_encoder_s1_t3() {
+  local encoder="${LIBAOM_BIN_PATH}/svc_encoder_rtc${AOM_TEST_EXE_SUFFIX}"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/svc_encoder_rtc"
+
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${encoder}" "${common_flags}" \
+      "--width=${YUV_RAW_INPUT_WIDTH}" \
+      "--height=${YUV_RAW_INPUT_HEIGHT}" \
+      "-lm 2" \
+      "--speed=8" \
+      "--target-bitrate=400" \
+      "--bitrates=220,300,400" \
+      "--spatial-layers=1" \
+      "--temporal-layers=3" \
+      "--timebase=1/30" \
+      "${YUV_RAW_INPUT}" \
+      "-o ${output_file}" \
+      ${devnull} || return 1
+
+  [ -e "${output_file}" ] || return 1
+}
+
+# Runs svc_encoder_rtc using with 1 spatial layer 2 temporal layers with
+# speed 10.
+svc_encoder_s1_t2() {
+  local encoder="${LIBAOM_BIN_PATH}/svc_encoder_rtc${AOM_TEST_EXE_SUFFIX}"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/svc_encoder_rtc"
+
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${encoder}" "${common_flags}" \
+      "--width=${YUV_RAW_INPUT_WIDTH}" \
+      "--height=${YUV_RAW_INPUT_HEIGHT}" \
+      "-lm 1" \
+      "--speed=10" \
+      "--target-bitrate=400" \
+      "--bitrates=220,400" \
+      "--spatial-layers=1" \
+      "--temporal-layers=2" \
+      "--timebase=1/30" \
+      "${YUV_RAW_INPUT}" \
+      "-o ${output_file}" \
+      ${devnull} || return 1
+
+  [ -e "${output_file}" ] || return 1
+}
+
+if [ "$(av1_encode_available)" = "yes" ]; then
+  svc_encoder_rtc_tests="svc_encoder_s1_t3
+                         svc_encoder_s1_t2"
+  run_tests svc_encoder_verify_environment "${svc_encoder_rtc_tests}"
+fi
diff --git a/third_party/aom/test/temporal_filter_test.cc b/third_party/aom/test/temporal_filter_test.cc
new file mode 100644
index 0000000000..85f68b817e
--- /dev/null
+++ b/third_party/aom/test/temporal_filter_test.cc
@@ -0,0 +1,788 @@
+/*
+ * Copyright (c) 2019, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cmath>
+#include <cstdlib>
+#include <memory>
+#include <new>
+#include <string>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/mem.h"
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/temporal_filter.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "test/function_equivalence_test.h"
+
+using libaom_test::ACMRandom;
+using ::testing::Combine;
+using ::testing::Values;
+using ::testing::ValuesIn;
+
+#if !CONFIG_REALTIME_ONLY
+namespace {
+typedef enum {
+  I400,  // Monochrome
+  I420,  // 4:2:0
+  I422,  // 4:2:2
+  I444,  // 4:4:4
+} ColorFormat;
+static const char *color_fmt_str[] = { "I400", "I420", "I422", "I444" };
+typedef void (*TemporalFilterFunc)(
+    const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd,
+    const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
+    const int num_planes, const double *noise_level, const MV *subblock_mvs,
+    const int *subblock_mses, const int q_factor, const int filter_strength,
+    int tf_wgt_calc_lvl, const uint8_t *pred, uint32_t *accum, uint16_t *count);
+typedef libaom_test::FuncParam<TemporalFilterFunc> TemporalFilterFuncParam;
+
+typedef std::tuple<TemporalFilterFuncParam, int> TemporalFilterWithParam;
+
+class TemporalFilterTest
+    : public ::testing::TestWithParam<TemporalFilterWithParam> {
+ public:
+  ~TemporalFilterTest() override = default;
+  void SetUp() override {
+    params_ = GET_PARAM(0);
+    tf_wgt_calc_lvl_ = GET_PARAM(1);
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src1_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(8, sizeof(uint8_t) * MAX_MB_PLANE * BH * BW));
+    src2_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(8, sizeof(uint8_t) * MAX_MB_PLANE * BH * BW));
+
+    ASSERT_NE(src1_, nullptr);
+    ASSERT_NE(src2_, nullptr);
+  }
+
+  void TearDown() override {
+    aom_free(src1_);
+    aom_free(src2_);
+  }
+  void RunTest(int isRandom, int run_times, ColorFormat color_fmt);
+
+  void GenRandomData(int width, int height, int stride, int stride2,
+                     int num_planes, int subsampling_x, int subsampling_y) {
+    uint8_t *src1p = src1_;
+    uint8_t *src2p = src2_;
+    for (int plane = 0; plane < num_planes; plane++) {
+      int plane_w = plane ? width >> subsampling_x : width;
+      int plane_h = plane ? height >> subsampling_y : height;
+      int plane_stride = plane ? stride >> subsampling_x : stride;
+      int plane_stride2 = plane ? stride2 >> subsampling_x : stride2;
+      for (int ii = 0; ii < plane_h; ii++) {
+        for (int jj = 0; jj < plane_w; jj++) {
+          src1p[jj] = rnd_.Rand8();
+          src2p[jj] = rnd_.Rand8();
+        }
+        src1p += plane_stride;
+        src2p += plane_stride2;
+      }
+    }
+  }
+
+  void GenExtremeData(int width, int height, int stride, int stride2,
+                      int num_planes, int subsampling_x, int subsampling_y,
+                      uint8_t val) {
+    uint8_t *src1p = src1_;
+    uint8_t *src2p = src2_;
+    for (int plane = 0; plane < num_planes; plane++) {
+      int plane_w = plane ? width >> subsampling_x : width;
+      int plane_h = plane ? height >> subsampling_y : height;
+      int plane_stride = plane ? stride >> subsampling_x : stride;
+      int plane_stride2 = plane ? stride2 >> subsampling_x : stride2;
+      for (int ii = 0; ii < plane_h; ii++) {
+        for (int jj = 0; jj < plane_w; jj++) {
+          src1p[jj] = val;
+          src2p[jj] = (255 - val);
+        }
+        src1p += plane_stride;
+        src2p += plane_stride2;
+      }
+    }
+  }
+
+ protected:
+  TemporalFilterFuncParam params_;
+  int32_t tf_wgt_calc_lvl_;
+  uint8_t *src1_;
+  uint8_t *src2_;
+  ACMRandom rnd_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(TemporalFilterTest);
+
+void TemporalFilterTest::RunTest(int isRandom, int run_times,
+                                 ColorFormat color_fmt) {
+  aom_usec_timer ref_timer, test_timer;
+  const BLOCK_SIZE block_size = TF_BLOCK_SIZE;
+  static_assert(block_size == BLOCK_32X32, "");
+  const int width = 32;
+  const int height = 32;
+  int num_planes = MAX_MB_PLANE;
+  int subsampling_x = 0;
+  int subsampling_y = 0;
+  if (color_fmt == I420) {
+    subsampling_x = 1;
+    subsampling_y = 1;
+  } else if (color_fmt == I422) {
+    subsampling_x = 1;
+    subsampling_y = 0;
+  } else if (color_fmt == I400) {
+    num_planes = 1;
+  }
+  for (int k = 0; k < 3; k++) {
+    const int stride = width;
+    const int stride2 = width;
+    if (isRandom) {
+      GenRandomData(width, height, stride, stride2, num_planes, subsampling_x,
+                    subsampling_y);
+    } else {
+      const int msb = 8;  // Up to 8 bit input
+      const int limit = (1 << msb) - 1;
+      if (k == 0) {
+        GenExtremeData(width, height, stride, stride2, num_planes,
+                       subsampling_x, subsampling_y, limit);
+      } else {
+        GenExtremeData(width, height, stride, stride2, num_planes,
+                       subsampling_x, subsampling_y, 0);
+      }
+    }
+    double sigma[MAX_MB_PLANE] = { 2.1002103677063437, 2.1002103677063437,
+                                   2.1002103677063437 };
+    DECLARE_ALIGNED(16, unsigned int, accumulator_ref[1024 * 3]);
+    DECLARE_ALIGNED(16, uint16_t, count_ref[1024 * 3]);
+    memset(accumulator_ref, 0, 1024 * 3 * sizeof(accumulator_ref[0]));
+    memset(count_ref, 0, 1024 * 3 * sizeof(count_ref[0]));
+    DECLARE_ALIGNED(16, unsigned int, accumulator_mod[1024 * 3]);
+    DECLARE_ALIGNED(16, uint16_t, count_mod[1024 * 3]);
+    memset(accumulator_mod, 0, 1024 * 3 * sizeof(accumulator_mod[0]));
+    memset(count_mod, 0, 1024 * 3 * sizeof(count_mod[0]));
+
+    static_assert(width == 32 && height == 32, "");
+    const MV subblock_mvs[4] = { { 0, 0 }, { 5, 5 }, { 7, 8 }, { 2, 10 } };
+    const int subblock_mses[4] = { 15, 16, 17, 18 };
+    const int q_factor = 12;
+    const int filter_strength = 5;
+    const int mb_row = 0;
+    const int mb_col = 0;
+    std::unique_ptr<YV12_BUFFER_CONFIG> frame_to_filter(new (std::nothrow)
+                                                            YV12_BUFFER_CONFIG);
+    ASSERT_NE(frame_to_filter, nullptr);
+    frame_to_filter->y_crop_height = 360;
+    frame_to_filter->y_crop_width = 540;
+    frame_to_filter->heights[PLANE_TYPE_Y] = height;
+    frame_to_filter->heights[PLANE_TYPE_UV] = height >> subsampling_y;
+    frame_to_filter->strides[PLANE_TYPE_Y] = stride;
+    frame_to_filter->strides[PLANE_TYPE_UV] = stride >> subsampling_x;
+    DECLARE_ALIGNED(16, uint8_t, src[1024 * 3]);
+    frame_to_filter->buffer_alloc = src;
+    frame_to_filter->flags = 0;  // Only support low bit-depth test.
+    memcpy(src, src1_, 1024 * 3 * sizeof(uint8_t));
+
+    std::unique_ptr<MACROBLOCKD> mbd(new (std::nothrow) MACROBLOCKD);
+    ASSERT_NE(mbd, nullptr);
+    mbd->bd = 8;
+    for (int plane = AOM_PLANE_Y; plane < num_planes; plane++) {
+      int plane_height = plane ? height >> subsampling_y : height;
+      int plane_stride = plane ? stride >> subsampling_x : stride;
+      frame_to_filter->buffers[plane] =
+          frame_to_filter->buffer_alloc + plane * plane_stride * plane_height;
+      mbd->plane[plane].subsampling_x = plane ? subsampling_x : 0;
+      mbd->plane[plane].subsampling_y = plane ? subsampling_y : 0;
+    }
+
+    params_.ref_func(frame_to_filter.get(), mbd.get(), block_size, mb_row,
+                     mb_col, num_planes, sigma, subblock_mvs, subblock_mses,
+                     q_factor, filter_strength, tf_wgt_calc_lvl_, src2_,
+                     accumulator_ref, count_ref);
+    params_.tst_func(frame_to_filter.get(), mbd.get(), block_size, mb_row,
+                     mb_col, num_planes, sigma, subblock_mvs, subblock_mses,
+                     q_factor, filter_strength, tf_wgt_calc_lvl_, src2_,
+                     accumulator_mod, count_mod);
+
+    if (run_times > 1) {
+      aom_usec_timer_start(&ref_timer);
+      for (int j = 0; j < run_times; j++) {
+        params_.ref_func(frame_to_filter.get(), mbd.get(), block_size, mb_row,
+                         mb_col, num_planes, sigma, subblock_mvs, subblock_mses,
+                         q_factor, filter_strength, tf_wgt_calc_lvl_, src2_,
+                         accumulator_ref, count_ref);
+      }
+      aom_usec_timer_mark(&ref_timer);
+      const int elapsed_time_c =
+          static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
+
+      aom_usec_timer_start(&test_timer);
+      for (int j = 0; j < run_times; j++) {
+        params_.tst_func(frame_to_filter.get(), mbd.get(), block_size, mb_row,
+                         mb_col, num_planes, sigma, subblock_mvs, subblock_mses,
+                         q_factor, filter_strength, tf_wgt_calc_lvl_, src2_,
+                         accumulator_mod, count_mod);
+      }
+      aom_usec_timer_mark(&test_timer);
+      const int elapsed_time_simd =
+          static_cast<int>(aom_usec_timer_elapsed(&test_timer));
+
+      printf(
+          "c_time=%d \t simd_time=%d \t "
+          "gain=%f\t width=%d\t height=%d\t color_format=%s\n",
+          elapsed_time_c, elapsed_time_simd,
+          (float)((float)elapsed_time_c / (float)elapsed_time_simd), width,
+          height, color_fmt_str[color_fmt]);
+
+    } else {
+      for (int i = 0, l = 0; i < height; i++) {
+        for (int j = 0; j < width; j++, l++) {
+          EXPECT_EQ(accumulator_ref[l], accumulator_mod[l])
+              << "Error:" << k << " SSE Sum Test [" << width << "x" << height
+              << "] " << color_fmt_str[color_fmt]
+              << " C accumulator does not match optimized accumulator.";
+          EXPECT_EQ(count_ref[l], count_mod[l])
+              << "Error:" << k << " SSE Sum Test [" << width << "x" << height
+              << "] " << color_fmt_str[color_fmt]
+              << " count does not match optimized count.";
+        }
+      }
+    }
+  }
+}
+
+TEST_P(TemporalFilterTest, OperationCheck) {
+  RunTest(1, 1, I400);
+  RunTest(1, 1, I420);
+  RunTest(1, 1, I422);
+  RunTest(1, 1, I444);
+}
+
+TEST_P(TemporalFilterTest, ExtremeValues) {
+  RunTest(0, 1, I400);
+  RunTest(0, 1, I420);
+  RunTest(0, 1, I422);
+  RunTest(0, 1, I444);
+}
+
+TEST_P(TemporalFilterTest, DISABLED_Speed) {
+  RunTest(1, 100000, I400);
+  RunTest(1, 100000, I420);
+  RunTest(1, 100000, I422);
+  RunTest(1, 100000, I444);
+}
+
+#if HAVE_AVX2
+TemporalFilterFuncParam temporal_filter_test_avx2[] = { TemporalFilterFuncParam(
+    &av1_apply_temporal_filter_c, &av1_apply_temporal_filter_avx2) };
+INSTANTIATE_TEST_SUITE_P(AVX2, TemporalFilterTest,
+                         Combine(ValuesIn(temporal_filter_test_avx2),
+                                 Values(0, 1)));
+#endif  // HAVE_AVX2
+
+#if HAVE_SSE2
+TemporalFilterFuncParam temporal_filter_test_sse2[] = { TemporalFilterFuncParam(
+    &av1_apply_temporal_filter_c, &av1_apply_temporal_filter_sse2) };
+INSTANTIATE_TEST_SUITE_P(SSE2, TemporalFilterTest,
+                         Combine(ValuesIn(temporal_filter_test_sse2),
+                                 Values(0, 1)));
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+TemporalFilterFuncParam temporal_filter_test_neon[] = { TemporalFilterFuncParam(
+    &av1_apply_temporal_filter_c, &av1_apply_temporal_filter_neon) };
+INSTANTIATE_TEST_SUITE_P(NEON, TemporalFilterTest,
+                         Combine(ValuesIn(temporal_filter_test_neon),
+                                 Values(0, 1)));
+#endif  // HAVE_NEON
+
+#if HAVE_NEON_DOTPROD
+TemporalFilterFuncParam temporal_filter_test_neon_dotprod[] = {
+  TemporalFilterFuncParam(&av1_apply_temporal_filter_c,
+                          &av1_apply_temporal_filter_neon_dotprod)
+};
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, TemporalFilterTest,
+                         Combine(ValuesIn(temporal_filter_test_neon_dotprod),
+                                 Values(0, 1)));
+#endif  // HAVE_NEON_DOTPROD
+
+#if HAVE_AVX2 || HAVE_NEON
+// Width and height for which av1_estimate_noise_from_single_plane() will be
+// tested.
+const int kWidths[] = { 3840, 1920, 1280, 800, 640, 360, 357 };
+const int kHeights[] = { 2160, 1080, 720, 600, 480, 240, 237 };
+#endif  // HAVE_AVX2 || HAVE_NEON
+
+typedef double (*EstimateNoiseFunc)(const uint8_t *src, int height, int width,
+                                    int stride, int edge_thresh);
+
+typedef std::tuple<EstimateNoiseFunc, EstimateNoiseFunc, int, int>
+    EstimateNoiseWithParam;
+
+class EstimateNoiseTest
+    : public ::testing::TestWithParam<EstimateNoiseWithParam> {
+ public:
+  ~EstimateNoiseTest() override = default;
+  void SetUp() override {
+    ref_func = GET_PARAM(0);
+    tst_func = GET_PARAM(1);
+    width_ = GET_PARAM(2);
+    height_ = GET_PARAM(3);
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src1_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(8, sizeof(uint8_t) * width_ * height_));
+    GenRandomData(width_ * height_);
+    ASSERT_NE(src1_, nullptr);
+  }
+
+  void TearDown() override { aom_free(src1_); }
+
+  void RunTest(int run_times) {
+    stride_ = width_;
+
+    for (int i = 0; i < run_times; i++) {
+      double ref_out = ref_func(src1_, height_, width_, stride_,
+                                NOISE_ESTIMATION_EDGE_THRESHOLD);
+
+      double tst_out = tst_func(src1_, height_, width_, stride_,
+                                NOISE_ESTIMATION_EDGE_THRESHOLD);
+
+      EXPECT_EQ(ref_out, tst_out);
+    }
+  }
+
+  void SpeedTest(int run_times) {
+    stride_ = width_;
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; i++) {
+      ref_func(src1_, height_, width_, stride_,
+               NOISE_ESTIMATION_EDGE_THRESHOLD);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; i++) {
+      tst_func(src1_, height_, width_, stride_,
+               NOISE_ESTIMATION_EDGE_THRESHOLD);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    printf("(%3.2f)\n", time1 / time2);
+  }
+
+  void GenRandomData(int size) {
+    for (int ii = 0; ii < size; ii++) src1_[ii] = rnd_.Rand8();
+  }
+
+ protected:
+  EstimateNoiseFunc ref_func;
+  EstimateNoiseFunc tst_func;
+  ACMRandom rnd_;
+  uint8_t *src1_;
+  int width_;
+  int height_;
+  int stride_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(EstimateNoiseTest);
+
+TEST_P(EstimateNoiseTest, RandomValues) { RunTest(1); }
+
+TEST_P(EstimateNoiseTest, DISABLED_Speed) { SpeedTest(2000); }
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, EstimateNoiseTest,
+    ::testing::Combine(
+        ::testing::Values(av1_estimate_noise_from_single_plane_c),
+        ::testing::Values(av1_estimate_noise_from_single_plane_avx2),
+        ::testing::ValuesIn(kWidths), ::testing::ValuesIn(kHeights)));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, EstimateNoiseTest,
+    ::testing::Combine(
+        ::testing::Values(av1_estimate_noise_from_single_plane_c),
+        ::testing::Values(av1_estimate_noise_from_single_plane_neon),
+        ::testing::ValuesIn(kWidths), ::testing::ValuesIn(kHeights)));
+#endif  // HAVE_NEON
+
+#if CONFIG_AV1_HIGHBITDEPTH
+
+typedef void (*HBDTemporalFilterFunc)(
+    const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd,
+    const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
+    const int num_planes, const double *noise_level, const MV *subblock_mvs,
+    const int *subblock_mses, const int q_factor, const int filter_strength,
+    int tf_wgt_calc_lvl, const uint8_t *pred, uint32_t *accum, uint16_t *count);
+typedef libaom_test::FuncParam<HBDTemporalFilterFunc>
+    HBDTemporalFilterFuncParam;
+
+typedef std::tuple<HBDTemporalFilterFuncParam, int> HBDTemporalFilterWithParam;
+
+class HBDTemporalFilterTest
+    : public ::testing::TestWithParam<HBDTemporalFilterWithParam> {
+ public:
+  ~HBDTemporalFilterTest() override = default;
+  void SetUp() override {
+    params_ = GET_PARAM(0);
+    tf_wgt_calc_lvl_ = GET_PARAM(1);
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src1_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * MAX_MB_PLANE * BH * BW));
+    src2_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * MAX_MB_PLANE * BH * BW));
+
+    ASSERT_NE(src1_, nullptr);
+    ASSERT_NE(src2_, nullptr);
+  }
+
+  void TearDown() override {
+    aom_free(src1_);
+    aom_free(src2_);
+  }
+  void RunTest(int isRandom, int run_times, int bd, ColorFormat color_fmt);
+
+  void GenRandomData(int width, int height, int stride, int stride2, int bd,
+                     int subsampling_x, int subsampling_y, int num_planes) {
+    uint16_t *src1p = src1_;
+    uint16_t *src2p = src2_;
+    for (int plane = AOM_PLANE_Y; plane < num_planes; plane++) {
+      int plane_w = plane ? width >> subsampling_x : width;
+      int plane_h = plane ? height >> subsampling_y : height;
+      int plane_stride = plane ? stride >> subsampling_x : stride;
+      int plane_stride2 = plane ? stride2 >> subsampling_x : stride2;
+      const uint16_t max_val = (1 << bd) - 1;
+      for (int ii = 0; ii < plane_h; ii++) {
+        for (int jj = 0; jj < plane_w; jj++) {
+          src1p[jj] = rnd_.Rand16() & max_val;
+          src2p[jj] = rnd_.Rand16() & max_val;
+        }
+        src1p += plane_stride;
+        src2p += plane_stride2;
+      }
+    }
+  }
+
+  void GenExtremeData(int width, int height, int stride, int stride2, int bd,
+                      int subsampling_x, int subsampling_y, int num_planes,
+                      uint16_t val) {
+    uint16_t *src1p = src1_;
+    uint16_t *src2p = src2_;
+    for (int plane = AOM_PLANE_Y; plane < num_planes; plane++) {
+      int plane_w = plane ? width >> subsampling_x : width;
+      int plane_h = plane ? height >> subsampling_y : height;
+      int plane_stride = plane ? stride >> subsampling_x : stride;
+      int plane_stride2 = plane ? stride2 >> subsampling_x : stride2;
+      uint16_t max_val = (1 << bd) - 1;
+      for (int ii = 0; ii < plane_h; ii++) {
+        for (int jj = 0; jj < plane_w; jj++) {
+          src1p[jj] = val;
+          src2p[jj] = (max_val - val);
+        }
+        src1p += plane_stride;
+        src2p += plane_stride2;
+      }
+    }
+  }
+
+ protected:
+  HBDTemporalFilterFuncParam params_;
+  int tf_wgt_calc_lvl_;
+  uint16_t *src1_;
+  uint16_t *src2_;
+  ACMRandom rnd_;
+};
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HBDTemporalFilterTest);
+
+void HBDTemporalFilterTest::RunTest(int isRandom, int run_times, int BD,
+                                    ColorFormat color_fmt) {
+  aom_usec_timer ref_timer, test_timer;
+  const BLOCK_SIZE block_size = TF_BLOCK_SIZE;
+  static_assert(block_size == BLOCK_32X32, "");
+  const int width = 32;
+  const int height = 32;
+  int num_planes = MAX_MB_PLANE;
+  int subsampling_x = 0;
+  int subsampling_y = 0;
+  if (color_fmt == I420) {
+    subsampling_x = 1;
+    subsampling_y = 1;
+  } else if (color_fmt == I422) {
+    subsampling_x = 1;
+    subsampling_y = 0;
+  } else if (color_fmt == I400) {
+    num_planes = 1;
+  }
+  for (int k = 0; k < 3; k++) {
+    const int stride = width;
+    const int stride2 = width;
+    if (isRandom) {
+      GenRandomData(width, height, stride, stride2, BD, subsampling_x,
+                    subsampling_y, num_planes);
+    } else {
+      const int msb = BD;
+      const uint16_t limit = (1 << msb) - 1;
+      if (k == 0) {
+        GenExtremeData(width, height, stride, stride2, BD, subsampling_x,
+                       subsampling_y, num_planes, limit);
+      } else {
+        GenExtremeData(width, height, stride, stride2, BD, subsampling_x,
+                       subsampling_y, num_planes, 0);
+      }
+    }
+    double sigma[MAX_MB_PLANE] = { 2.1002103677063437, 2.1002103677063437,
+                                   2.1002103677063437 };
+    DECLARE_ALIGNED(16, unsigned int, accumulator_ref[1024 * 3]);
+    DECLARE_ALIGNED(16, uint16_t, count_ref[1024 * 3]);
+    memset(accumulator_ref, 0, 1024 * 3 * sizeof(accumulator_ref[0]));
+    memset(count_ref, 0, 1024 * 3 * sizeof(count_ref[0]));
+    DECLARE_ALIGNED(16, unsigned int, accumulator_mod[1024 * 3]);
+    DECLARE_ALIGNED(16, uint16_t, count_mod[1024 * 3]);
+    memset(accumulator_mod, 0, 1024 * 3 * sizeof(accumulator_mod[0]));
+    memset(count_mod, 0, 1024 * 3 * sizeof(count_mod[0]));
+
+    static_assert(width == 32 && height == 32, "");
+    const MV subblock_mvs[4] = { { 0, 0 }, { 5, 5 }, { 7, 8 }, { 2, 10 } };
+    const int subblock_mses[4] = { 15, 16, 17, 18 };
+    const int q_factor = 12;
+    const int filter_strength = 5;
+    const int mb_row = 0;
+    const int mb_col = 0;
+    std::unique_ptr<YV12_BUFFER_CONFIG> frame_to_filter(new (std::nothrow)
+                                                            YV12_BUFFER_CONFIG);
+    ASSERT_NE(frame_to_filter, nullptr);
+    frame_to_filter->y_crop_height = 360;
+    frame_to_filter->y_crop_width = 540;
+    frame_to_filter->heights[PLANE_TYPE_Y] = height;
+    frame_to_filter->heights[PLANE_TYPE_UV] = height >> subsampling_y;
+    frame_to_filter->strides[PLANE_TYPE_Y] = stride;
+    frame_to_filter->strides[PLANE_TYPE_UV] = stride >> subsampling_x;
+    DECLARE_ALIGNED(16, uint16_t, src[1024 * 3]);
+    frame_to_filter->buffer_alloc = CONVERT_TO_BYTEPTR(src);
+    frame_to_filter->flags =
+        YV12_FLAG_HIGHBITDEPTH;  // Only Hihgbd bit-depth test.
+    memcpy(src, src1_, 1024 * 3 * sizeof(uint16_t));
+
+    std::unique_ptr<MACROBLOCKD> mbd(new (std::nothrow) MACROBLOCKD);
+    ASSERT_NE(mbd, nullptr);
+    mbd->bd = BD;
+    for (int plane = AOM_PLANE_Y; plane < num_planes; plane++) {
+      int plane_height = plane ? height >> subsampling_y : height;
+      int plane_stride = plane ? stride >> subsampling_x : stride;
+      frame_to_filter->buffers[plane] =
+          frame_to_filter->buffer_alloc + plane * plane_stride * plane_height;
+      mbd->plane[plane].subsampling_x = plane ? subsampling_x : 0;
+      mbd->plane[plane].subsampling_y = plane ? subsampling_y : 0;
+    }
+
+    params_.ref_func(frame_to_filter.get(), mbd.get(), block_size, mb_row,
+                     mb_col, num_planes, sigma, subblock_mvs, subblock_mses,
+                     q_factor, filter_strength, tf_wgt_calc_lvl_,
+                     CONVERT_TO_BYTEPTR(src2_), accumulator_ref, count_ref);
+    params_.tst_func(frame_to_filter.get(), mbd.get(), block_size, mb_row,
+                     mb_col, num_planes, sigma, subblock_mvs, subblock_mses,
+                     q_factor, filter_strength, tf_wgt_calc_lvl_,
+                     CONVERT_TO_BYTEPTR(src2_), accumulator_mod, count_mod);
+
+    if (run_times > 1) {
+      aom_usec_timer_start(&ref_timer);
+      for (int j = 0; j < run_times; j++) {
+        params_.ref_func(frame_to_filter.get(), mbd.get(), block_size, mb_row,
+                         mb_col, num_planes, sigma, subblock_mvs, subblock_mses,
+                         q_factor, filter_strength, tf_wgt_calc_lvl_,
+                         CONVERT_TO_BYTEPTR(src2_), accumulator_ref, count_ref);
+      }
+      aom_usec_timer_mark(&ref_timer);
+      const int elapsed_time_c =
+          static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
+
+      aom_usec_timer_start(&test_timer);
+      for (int j = 0; j < run_times; j++) {
+        params_.tst_func(frame_to_filter.get(), mbd.get(), block_size, mb_row,
+                         mb_col, num_planes, sigma, subblock_mvs, subblock_mses,
+                         q_factor, filter_strength, tf_wgt_calc_lvl_,
+                         CONVERT_TO_BYTEPTR(src2_), accumulator_mod, count_mod);
+      }
+      aom_usec_timer_mark(&test_timer);
+      const int elapsed_time_simd =
+          static_cast<int>(aom_usec_timer_elapsed(&test_timer));
+
+      printf(
+          "c_time=%d \t simd_time=%d \t "
+          "gain=%f\t width=%d\t height=%d\t color_format=%s\n",
+          elapsed_time_c, elapsed_time_simd,
+          (float)((float)elapsed_time_c / (float)elapsed_time_simd), width,
+          height, color_fmt_str[color_fmt]);
+
+    } else {
+      for (int i = 0, l = 0; i < height; i++) {
+        for (int j = 0; j < width; j++, l++) {
+          EXPECT_EQ(accumulator_ref[l], accumulator_mod[l])
+              << "Error:" << k << " SSE Sum Test [" << width << "x" << height
+              << "] " << color_fmt_str[color_fmt]
+              << " C accumulator does not match optimized accumulator.";
+          EXPECT_EQ(count_ref[l], count_mod[l])
+              << "Error:" << k << " SSE Sum Test [" << width << "x" << height
+              << "] " << color_fmt_str[color_fmt]
+              << " C count does not match optimized count.";
+        }
+      }
+    }
+  }
+}
+
+TEST_P(HBDTemporalFilterTest, OperationCheck) {
+  RunTest(1, 1, 10, I400);
+  RunTest(1, 1, 10, I420);
+  RunTest(1, 1, 10, I422);
+  RunTest(1, 1, 10, I444);
+}
+
+TEST_P(HBDTemporalFilterTest, ExtremeValues) {
+  RunTest(0, 1, 10, I400);
+  RunTest(0, 1, 10, I420);
+  RunTest(0, 1, 10, I422);
+  RunTest(0, 1, 10, I444);
+}
+
+TEST_P(HBDTemporalFilterTest, DISABLED_Speed) {
+  RunTest(1, 100000, 10, I400);
+  RunTest(1, 100000, 10, I420);
+  RunTest(1, 100000, 10, I422);
+  RunTest(1, 100000, 10, I444);
+}
+#if HAVE_SSE2
+HBDTemporalFilterFuncParam HBDtemporal_filter_test_sse2[] = {
+  HBDTemporalFilterFuncParam(&av1_highbd_apply_temporal_filter_c,
+                             &av1_highbd_apply_temporal_filter_sse2)
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, HBDTemporalFilterTest,
+                         Combine(ValuesIn(HBDtemporal_filter_test_sse2),
+                                 Values(0, 1)));
+#endif  // HAVE_SSE2
+#if HAVE_AVX2
+HBDTemporalFilterFuncParam HBDtemporal_filter_test_avx2[] = {
+  HBDTemporalFilterFuncParam(&av1_highbd_apply_temporal_filter_c,
+                             &av1_highbd_apply_temporal_filter_avx2)
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, HBDTemporalFilterTest,
+                         Combine(ValuesIn(HBDtemporal_filter_test_avx2),
+                                 Values(0, 1)));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+HBDTemporalFilterFuncParam HBDtemporal_filter_test_neon[] = {
+  HBDTemporalFilterFuncParam(&av1_highbd_apply_temporal_filter_c,
+                             &av1_highbd_apply_temporal_filter_neon)
+};
+INSTANTIATE_TEST_SUITE_P(NEON, HBDTemporalFilterTest,
+                         Combine(ValuesIn(HBDtemporal_filter_test_neon),
+                                 Values(0, 1)));
+#endif  // HAVE_NEON
+
+using HBDEstimateNoiseFunc = double (*)(const uint16_t *src, int height,
+                                        int width, int stride, int bit_depth,
+                                        int edge_thresh);
+
+using HBDEstimateNoiseWithParam =
+    std::tuple<HBDEstimateNoiseFunc, HBDEstimateNoiseFunc, int, int, int>;
+
+class HBDEstimateNoiseTest
+    : public ::testing::TestWithParam<HBDEstimateNoiseWithParam> {
+ public:
+  HBDEstimateNoiseTest()
+      : ref_func_(GET_PARAM(0)), tst_func_(GET_PARAM(1)),
+        rnd_(libaom_test::ACMRandom::DeterministicSeed()), width_(GET_PARAM(2)),
+        height_(GET_PARAM(3)), bitdepth_(GET_PARAM(4)) {}
+  ~HBDEstimateNoiseTest() override = default;
+  void SetUp() override {
+    src1_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * width_ * height_));
+    ASSERT_NE(src1_, nullptr);
+    GenRandomData(width_ * height_);
+  }
+
+  void TearDown() override { aom_free(src1_); }
+
+  void RunTest() {
+    stride_ = width_;
+
+    double ref_out = ref_func_(src1_, height_, width_, stride_, bitdepth_,
+                               NOISE_ESTIMATION_EDGE_THRESHOLD);
+
+    double tst_out = tst_func_(src1_, height_, width_, stride_, bitdepth_,
+                               NOISE_ESTIMATION_EDGE_THRESHOLD);
+
+    EXPECT_EQ(ref_out, tst_out);
+  }
+
+  void SpeedTest(int run_times) {
+    stride_ = width_;
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; i++) {
+      ref_func_(src1_, height_, width_, stride_, bitdepth_,
+                NOISE_ESTIMATION_EDGE_THRESHOLD);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; i++) {
+      tst_func_(src1_, height_, width_, stride_, bitdepth_,
+                NOISE_ESTIMATION_EDGE_THRESHOLD);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+    printf("%d %dx%d :%7.2f/%7.2f (%3.2f)\n", bitdepth_, width_, height_, time1,
+           time2, time1 / time2);
+  }
+
+  void GenRandomData(int size) {
+    for (int ii = 0; ii < size; ii++) src1_[ii] = rnd_.Rand12();
+  }
+
+ private:
+  HBDEstimateNoiseFunc ref_func_;
+  HBDEstimateNoiseFunc tst_func_;
+  ACMRandom rnd_;
+  uint16_t *src1_;
+  int width_;
+  int height_;
+  int stride_;
+  int bitdepth_;
+};
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HBDEstimateNoiseTest);
+
+TEST_P(HBDEstimateNoiseTest, RandomValues) { RunTest(); }
+
+TEST_P(HBDEstimateNoiseTest, DISABLED_Speed) { SpeedTest(2000); }
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, HBDEstimateNoiseTest,
+    ::testing::Combine(
+        ::testing::Values(av1_highbd_estimate_noise_from_single_plane_c),
+        ::testing::Values(av1_highbd_estimate_noise_from_single_plane_neon),
+        ::testing::ValuesIn(kWidths), ::testing::ValuesIn(kHeights),
+        ::testing::ValuesIn({ 8, 10, 12 })));
+#endif  // HAVE_NEON
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+}  // namespace
+#endif
diff --git a/third_party/aom/test/test-data.sha1 b/third_party/aom/test/test-data.sha1
new file mode 100644
index 0000000000..4b4a96d444
--- /dev/null
+++ b/third_party/aom/test/test-data.sha1
@@ -0,0 +1,575 @@
+a0edab4ab4054127474074d967a33616ccdccc76 *hantro_collage_w176h144.yuv
+d5dfb0151c9051f8c85999255645d7a23916d3c0 *hantro_collage_w352h288.yuv
+b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv
+26b7f64399b84db4b4c9c915d743ec5c2619d4b9 *invalid-bug-1814.ivf
+d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-bug-1814.ivf.res
+09aa07e5325b3bb5462182eb30b8ecc914630740 *invalid-chromium-906381.ivf
+09d2af8dd22201dd8d48e5dcfcaed281ff9422c7 *invalid-chromium-906381.ivf.res
+f7c83c14aa35b928ba8b70f3eaa3b92070be4519 *invalid-google-142530197-1.ivf
+d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-google-142530197-1.ivf.res
+703c05720d5d67053bcee44987635cd78af2f971 *invalid-google-142530197.ivf
+d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-google-142530197.ivf.res
+fa06784f23751d8c37be94160fb821e855199af4 *invalid-oss-fuzz-10061.ivf
+b055f06b9a95aaa5697fa26497b592a47843a7c8 *invalid-oss-fuzz-10061.ivf.res
+c9e06c4c7fb7d69fd635a1f606a5e478d60e99cf *invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf
+88e18e61bd2b7457b4c71ebefbdff0029c41cc04 *invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf.res
+91a5bedeb4832c1c2900736cc0f644bb63971bbc *invalid-oss-fuzz-10227.ivf
+b055f06b9a95aaa5697fa26497b592a47843a7c8 *invalid-oss-fuzz-10227.ivf.res
+b2d0a29a65879436bf483d04865faca7d11cc2ee *invalid-oss-fuzz-10389.ivf
+f4ce175af1d871ed1603c8936f6b78e968f93c85 *invalid-oss-fuzz-10389.ivf.res.4
+11df8e9a068669c678097d460b63609d3da73828 *invalid-oss-fuzz-10555.ivf
+b055f06b9a95aaa5697fa26497b592a47843a7c8 *invalid-oss-fuzz-10555.ivf.res
+cf5945085fe85456a1f74bf4cc7998b88b3f4b62 *invalid-oss-fuzz-10705.ivf
+758671858368ffd2a2c0727898de5661f7cf7d68 *invalid-oss-fuzz-10705.ivf.res
+88e29851122cca3f336824f7fa4d9f757f91110c *invalid-oss-fuzz-10723.ivf
+64f8a208dec7f1580fbe0371aa15e62bb1262715 *invalid-oss-fuzz-10723.ivf.res.2
+1af486cd2cc83ebeddc76ca7a1c512cc0ec568d5 *invalid-oss-fuzz-10723.ivf.res.3
+0784acc8931090ec24eba752d6c27e359e68fe7d *invalid-oss-fuzz-10779.ivf
+5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-oss-fuzz-10779.ivf.res
+7d37be9357f89a100ced694aee1ca5a6fad35ba9 *invalid-oss-fuzz-11477.ivf
+15932651aacfc4622f0910f728f3f95e08e1753d *invalid-oss-fuzz-11477.ivf.res
+1674787c38ddf82a2e5c804203f04f56a304e8e0 *invalid-oss-fuzz-11479.ivf
+64f8a208dec7f1580fbe0371aa15e62bb1262715 *invalid-oss-fuzz-11479.ivf.res.2
+b1a45514f0c59be03c9991cd04882426b9b930fa *invalid-oss-fuzz-11523.ivf
+3198c7af55a7d50173ce3c369c0cf2d9cdfface6 *invalid-oss-fuzz-11523.ivf.res.2
+cb445173be760c3554f1740ce4d119f57a7be043 *invalid-oss-fuzz-15363.ivf
+d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-oss-fuzz-15363.ivf.res
+5b697360bf0f02de31bae9b8da78e93570958fa4 *invalid-oss-fuzz-16437.ivf
+d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-oss-fuzz-16437.ivf.res.2
+e821070cea8eb687be102a1a118e0341c2e9df69 *invalid-oss-fuzz-24706.ivf
+d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-oss-fuzz-24706.ivf.res
+c0c32af28c5c6672d14e76d197894723e8a07b07 *invalid-oss-fuzz-33030.ivf
+fb38337e7d6203618fcfce4bc2dc17d5a4f00638 *invalid-oss-fuzz-33030.ivf.res
+ccbe4081557eb44820a0e6337c4a094421826b9a *invalid-oss-fuzz-9288.ivf
+67c54283fe1a26ccf02cc991e4f9a1eea3ac5e78 *invalid-oss-fuzz-9288.ivf.res
+c0960f032484579f967881cc025b71cfd7a79ee1 *invalid-oss-fuzz-9463.ivf
+5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-oss-fuzz-9463.ivf.res.2
+f448caf378e250b7eea4fa2d1c3cd7ef4a3211ce *invalid-oss-fuzz-9482.ivf
+b055f06b9a95aaa5697fa26497b592a47843a7c8 *invalid-oss-fuzz-9482.ivf.res
+a686989de79af89136f631fd630df639c7861851 *invalid-oss-fuzz-9720.ivf
+d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-oss-fuzz-9720.ivf.res
+a432f96ff0a787268e2f94a8092ab161a18d1b06 *park_joy_90p_10_420.y4m
+0b194cc312c3a2e84d156a221b0a5eb615dfddc5 *park_joy_90p_10_422.y4m
+ff0e0a21dc2adc95b8c1b37902713700655ced17 *park_joy_90p_10_444.y4m
+c934da6fb8cc54ee2a8c17c54cf6076dac37ead0 *park_joy_90p_10_440.yuv
+614c32ae1eca391e867c70d19974f0d62664dd99 *park_joy_90p_12_420.y4m
+c92825f1ea25c5c37855083a69faac6ac4641a9e *park_joy_90p_12_422.y4m
+b592189b885b6cc85db55cc98512a197d73d3b34 *park_joy_90p_12_444.y4m
+82c1bfcca368c2f22bad7d693d690d5499ecdd11 *park_joy_90p_12_440.yuv
+b9e1e90aece2be6e2c90d89e6ab2372d5f8c792d *park_joy_90p_8_420_a10-1.y4m
+4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c *park_joy_90p_8_420.y4m
+7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947 *park_joy_90p_8_422.y4m
+bdb7856e6bc93599bdda05c2e773a9f22b6c6d03 *park_joy_90p_8_444.y4m
+81e1f3843748438b8f2e71db484eb22daf72e939 *park_joy_90p_8_440.yuv
+b1f1c3ec79114b9a0651af24ce634afb44a9a419 *rush_hour_444.y4m
+eb438c6540eb429f74404eedfa3228d409c57874 *desktop_640_360_30.yuv
+89e70ebd22c27d275fe14dc2f1a41841a6d8b9ab *kirland_640_480_30.yuv
+33c533192759e5bb4f07abfbac389dc259db4686 *macmarcomoving_640_480_30.yuv
+8bfaab121080821b8f03b23467911e59ec59b8fe *macmarcostationary_640_480_30.yuv
+9ec21aa2c4a8a9d46d5403ea20c93b0ff5ad74a1 *rand_noise_w1280h720.yuv
+70894878d916a599842d9ad0dcd24e10c13e5467 *niklas_640_480_30.yuv
+8784b6df2d8cc946195a90ac00540500d2e522e4 *tacomanarrows_640_480_30.yuv
+edd86a1f5e62fd9da9a9d46078247759c2638009 *tacomasmallcameramovement_640_480_30.yuv
+9a70e8b7d14fba9234d0e51dce876635413ce444 *thaloundeskmtg_640_480_30.yuv
+e7d315dbf4f3928779e0dc624311196d44491d32 *niklas_1280_720_30.yuv
+717da707afcaa1f692ff1946f291054eb75a4f06 *screendata.y4m
+9cfc855459e7549fd015c79e8eca512b2f2cb7e3 *niklas_1280_720_30.y4m
+5b5763b388b1b52a81bb82b39f7ec25c4bd3d0e1 *desktop_credits.y4m
+36ddab9b99eb7545aa0bf362d6f498212d596516 *vase10x10.yuv
+c542890ac929749000f7b3883174f2202070d834 *pixel_capture_w320h240.yuv
+c2e1ec9936b95254187a359e94aa32a9f3dad1b7 *av1-1-b8-00-quantizer-00.ivf
+26cd2a0321d01d9db5f6dace8b43a40cd5b9d58d *av1-1-b8-00-quantizer-00.ivf.md5
+a56dd02c0258d4afea1ee358a22b54e99e39d5e1 *av1-1-b8-00-quantizer-01.ivf
+b3d24124d81f1fbb26f5eb0036accb54f3ec69b2 *av1-1-b8-00-quantizer-01.ivf.md5
+3466327cb842a91d69839b11ef930a74f086f4c6 *av1-1-b8-00-quantizer-02.ivf
+c111dce946100efeaad34203080eee1d55464df6 *av1-1-b8-00-quantizer-02.ivf.md5
+d3f1f32de5e2c0c19a58bb8ef096108388c6a820 *av1-1-b8-00-quantizer-03.ivf
+6265321b31130545b4454982ca93e412a56845b8 *av1-1-b8-00-quantizer-03.ivf.md5
+f37c393ebe73266a5ec8508a2ca33c586ff28e64 *av1-1-b8-00-quantizer-04.ivf
+c6e979da71aecc593c0abb40135dd304152b00dd *av1-1-b8-00-quantizer-04.ivf.md5
+ac9c5e93cb19942a9be259d0567ec96c54dcdc7c *av1-1-b8-00-quantizer-05.ivf
+49e35a7399568a0e4f015ce323d5a45ea780ca87 *av1-1-b8-00-quantizer-05.ivf.md5
+461142b1b50ae74c6b698d23f5ed3b764eadfb89 *av1-1-b8-00-quantizer-06.ivf
+6477ff260624e0f76c94ac872d1e7d5576af4177 *av1-1-b8-00-quantizer-06.ivf.md5
+7f8113cd13d8faaa06fdbaaa50dc328daf037e6d *av1-1-b8-00-quantizer-07.ivf
+b26795c6cb408487c20737977cd6b77311772bf7 *av1-1-b8-00-quantizer-07.ivf.md5
+4218f7945a172e1fe4f9e77ec35085a394eda9f4 *av1-1-b8-00-quantizer-08.ivf
+ea5d7d501e9a69d805251e4871515d28468d8676 *av1-1-b8-00-quantizer-08.ivf.md5
+837f3bcadfe56cf302db2ebaf9a990446fb35801 *av1-1-b8-00-quantizer-09.ivf
+eede995cdac5fd01a411da2e74e86e8394138be1 *av1-1-b8-00-quantizer-09.ivf.md5
+adc229b3780a4968c18ded1bcbe72e3f04643833 *av1-1-b8-00-quantizer-10.ivf
+0799b7e54e54ee97bf0e8aad2b75509ce59c7097 *av1-1-b8-00-quantizer-10.ivf.md5
+44bac8247160a8d9a0ab19f890fc89cc9298de1d *av1-1-b8-00-quantizer-11.ivf
+cc6b2bf167e114599b242aba574e8c6f1fa2f047 *av1-1-b8-00-quantizer-11.ivf.md5
+ebb3af7dfc15567188bcb617021cdc95ebc560e3 *av1-1-b8-00-quantizer-12.ivf
+b716ae29d56cd0c052dbfa1b5dcf850cd0fa8ca7 *av1-1-b8-00-quantizer-12.ivf.md5
+46159641f981a26fb9c374a5ca41e44f0ce0a9f0 *av1-1-b8-00-quantizer-13.ivf
+c6db1b8b4a74f83e4a0647e053cea0fc00f6abab *av1-1-b8-00-quantizer-13.ivf.md5
+fadc909d18eb640760fbb075f922fb050e715470 *av1-1-b8-00-quantizer-14.ivf
+e36bb6b23273633ba3ef7d28160a7258840a1476 *av1-1-b8-00-quantizer-14.ivf.md5
+8befbd9cc1601dcd36ec6911613855f68e6fd40e *av1-1-b8-00-quantizer-15.ivf
+cfc2334b76fb5e7aa9d8607e89d37cbc7716d62e *av1-1-b8-00-quantizer-15.ivf.md5
+ca42e00ae27c6b7f684fe3d2a787d50d2827cb3f *av1-1-b8-00-quantizer-16.ivf
+f11278218a7c3c73cfaab2332bab55f06cedcc81 *av1-1-b8-00-quantizer-16.ivf.md5
+05270d365bdc067f9446eda3029a6f41571a5229 *av1-1-b8-00-quantizer-17.ivf
+fb6482f35e7ad04bf231ea1806226760abcb3c26 *av1-1-b8-00-quantizer-17.ivf.md5
+617bc72037165efbff478d5a0d342b3c20ffcafd *av1-1-b8-00-quantizer-18.ivf
+1ff68d5424f91322123fe0d58f436b8e49cfa99d *av1-1-b8-00-quantizer-18.ivf.md5
+821c3b1ae6054c7a91b2f64428806e57f1157ca6 *av1-1-b8-00-quantizer-19.ivf
+f2fd118e786697553d6987f786660a2bb9f00680 *av1-1-b8-00-quantizer-19.ivf.md5
+48bcf17c27d9a4eb73632a68c09f42eff9f9af99 *av1-1-b8-00-quantizer-20.ivf
+64d55e4c858414bc2837c9c3e2d5fb6d2208c4b8 *av1-1-b8-00-quantizer-20.ivf.md5
+d61ecdd4f0950bc5c8bae1270b22e711bdd22763 *av1-1-b8-00-quantizer-21.ivf
+9d447938596096704fd5f4d41bcdf6fabf9cdfb9 *av1-1-b8-00-quantizer-21.ivf.md5
+59b4b65d8e56ccdd1bddff26a03e991a63409334 *av1-1-b8-00-quantizer-22.ivf
+aa1be0c7c7622d612af85f9bf96a212f6fe5ab56 *av1-1-b8-00-quantizer-22.ivf.md5
+95ed96988eb9916cad956db9b929718769de49f1 *av1-1-b8-00-quantizer-23.ivf
+596b8a3aea468996d609624367465c412751f52b *av1-1-b8-00-quantizer-23.ivf.md5
+e6c2dc4ce725003152797b3d7b34d7eb34da50c8 *av1-1-b8-00-quantizer-24.ivf
+1cd3d7e8b3813a9e5591b94eaeb72d471780e64a *av1-1-b8-00-quantizer-24.ivf.md5
+6734e353008824e523939d1a18daa3f2ab2d8ec6 *av1-1-b8-00-quantizer-25.ivf
+c45cf440a05802c1f9e29472175ed397d130d988 *av1-1-b8-00-quantizer-25.ivf.md5
+3372b1c69fb39811156adcea4f6dba802c0918c2 *av1-1-b8-00-quantizer-26.ivf
+b1751d55bb3fb788751fe28fb7434bee153bda68 *av1-1-b8-00-quantizer-26.ivf.md5
+e7ddb19a6e2a798d6a4e7dfdfc10b4df777b60e3 *av1-1-b8-00-quantizer-27.ivf
+0e19d6b79cd71de69d03e0455349568af979b170 *av1-1-b8-00-quantizer-27.ivf.md5
+7f1c90a35543d6b673e353b3702baf3aa1caeaa7 *av1-1-b8-00-quantizer-28.ivf
+d9a4f9cb88103249a05a7e6aa616bf0c16bf9c95 *av1-1-b8-00-quantizer-28.ivf.md5
+28d741b923011c7fcc50a7318256a638d3110a07 *av1-1-b8-00-quantizer-29.ivf
+c68cacf2b2ff2694945a99ad836dcf1ee3961c09 *av1-1-b8-00-quantizer-29.ivf.md5
+9a5d9ea4bc76dd40d04e92f33f45e9c2e120e85d *av1-1-b8-00-quantizer-30.ivf
+eb02bb8c16c4c0368ddff83e05e516e84ec9eaf3 *av1-1-b8-00-quantizer-30.ivf.md5
+20193c372f44f522e094c2c05fc7e4aaa0717fa8 *av1-1-b8-00-quantizer-31.ivf
+a4c1a4ac332f4911f0d5abbd826ebecfb8432d6c *av1-1-b8-00-quantizer-31.ivf.md5
+9617bbd691f093d259dbc8a642a57a153c1fc00c *av1-1-b8-00-quantizer-32.ivf
+73d60a348454b126ea6368ea604954bc23f210ae *av1-1-b8-00-quantizer-32.ivf.md5
+d9aea9d72a686c59b60584d827f60ca1ee8eee26 *av1-1-b8-00-quantizer-33.ivf
+fbf64de376a63d2d3051da83b0e4e56579b55c0a *av1-1-b8-00-quantizer-33.ivf.md5
+791aaf067f125e5cf4a247cf06a2e29ab071ec90 *av1-1-b8-00-quantizer-34.ivf
+8e2e6efe4c069e54844da19125c4280b95990c69 *av1-1-b8-00-quantizer-34.ivf.md5
+01ba67bba5cbf7c94c65da8f4c9bd6e7db24cf3a *av1-1-b8-00-quantizer-35.ivf
+0c5e60704a4a6bd27e67b6fd72ca7d2cf7fff50f *av1-1-b8-00-quantizer-35.ivf.md5
+3e255b4a320c9522dcec539fef770b6920b9a102 *av1-1-b8-00-quantizer-36.ivf
+1241aab865fd7b4bae73736cbeec1866ea9c90ec *av1-1-b8-00-quantizer-36.ivf.md5
+44fa6fca109747d8f43f6c6aa46d782e5d476d54 *av1-1-b8-00-quantizer-37.ivf
+947f0f887c5ac9149cf85e8114a709d6f410fc32 *av1-1-b8-00-quantizer-37.ivf.md5
+8319ac1ddd6ce3279da5780175dff7a3a5fa1054 *av1-1-b8-00-quantizer-38.ivf
+5f571b7f88678eab9e54f162cc9898f14e437770 *av1-1-b8-00-quantizer-38.ivf.md5
+5975e7056e17608593a8c40619b68e6576d373d9 *av1-1-b8-00-quantizer-39.ivf
+7c870192d6eb70ce5367147a3d2c6a52e11f7bec *av1-1-b8-00-quantizer-39.ivf.md5
+47da942f1e455f1422fc65f06dd57304541d16ac *av1-1-b8-00-quantizer-40.ivf
+6ea7116c9ce3a1641c7060bab2f5e06fd0910d61 *av1-1-b8-00-quantizer-40.ivf.md5
+ab35c15dfde21c2572b14e04dbfd5fac1adae449 *av1-1-b8-00-quantizer-41.ivf
+19596f9849653b913186b9d6b7072984ede96177 *av1-1-b8-00-quantizer-41.ivf.md5
+23a5fa6c3d0eaffaf13f6402465f5dd33d8ea7f1 *av1-1-b8-00-quantizer-42.ivf
+5a2726f0d1b1799d4f70883f1bfe5c9d976c6cf5 *av1-1-b8-00-quantizer-42.ivf.md5
+86cddfc463d2b186ec5a1aa25c4562c05201e3c3 *av1-1-b8-00-quantizer-43.ivf
+674c64ec8487ee774ad09350380fa6ac43815807 *av1-1-b8-00-quantizer-43.ivf.md5
+6894c154eb56c4f3fe44d54fc4f9af468b03d175 *av1-1-b8-00-quantizer-44.ivf
+eca679a2781eb894d18b3d578e3aaf4f48019a15 *av1-1-b8-00-quantizer-44.ivf.md5
+0960bf018ada4224b8344519cf091850d50a57bd *av1-1-b8-00-quantizer-45.ivf
+291bb43b9e1ab167040b51019daf1ccf94fd1e50 *av1-1-b8-00-quantizer-45.ivf.md5
+ea644a4732f1a2534332802c2fa5073344f3c356 *av1-1-b8-00-quantizer-46.ivf
+4c7915382b1d6d08709c95525b04ab8830f20ca1 *av1-1-b8-00-quantizer-46.ivf.md5
+d1f8832d33234e2c74a2280090850153ea24ea82 *av1-1-b8-00-quantizer-47.ivf
+90eb9959e612602934dcc512fe6f54abf0c88d9c *av1-1-b8-00-quantizer-47.ivf.md5
+69c93f760e8b666eb5b98f510e09d90f9230ac9b *av1-1-b8-00-quantizer-48.ivf
+931f869e14bd455de9dac2101b383c29e7d6f04c *av1-1-b8-00-quantizer-48.ivf.md5
+8b660c577d95c031d6711c1134b8d115097f8d7e *av1-1-b8-00-quantizer-49.ivf
+0e3fe8b49d497050dc1a0eac5f3ad60f5fe068fe *av1-1-b8-00-quantizer-49.ivf.md5
+d40bb21448a6da0fc9b88cbcf76d2f4226573acb *av1-1-b8-00-quantizer-50.ivf
+bcd2a9c9a021ba44fc5dc74ae02194fe49ca76a4 *av1-1-b8-00-quantizer-50.ivf.md5
+3b5a1d464aa89b0f1a6ad4f5a03602292b826172 *av1-1-b8-00-quantizer-51.ivf
+49bcde0c56cf8b7fbe429336981be22d39025b74 *av1-1-b8-00-quantizer-51.ivf.md5
+38970a02fb38ddb4954fe4240164cb75de5fc744 *av1-1-b8-00-quantizer-52.ivf
+fd02b034d79d4be150efb02bd4349edfd0e41311 *av1-1-b8-00-quantizer-52.ivf.md5
+2fde7a7cf3014d5196d011c47de4a144227ed122 *av1-1-b8-00-quantizer-53.ivf
+0cb66e6d8fbb29962a69ae1703e22da50db2c92b *av1-1-b8-00-quantizer-53.ivf.md5
+89a69e9b9a601e40cb491ac3a1d32491f2468ac8 *av1-1-b8-00-quantizer-54.ivf
+2f8af51acc73c99b5af81db2bdd1883b611ad311 *av1-1-b8-00-quantizer-54.ivf.md5
+31ee4f56fcb0043e95fff7af49e4ef82aafa5543 *av1-1-b8-00-quantizer-55.ivf
+04a7104e02bdd0fa38c118202dbbecdbd11ace02 *av1-1-b8-00-quantizer-55.ivf.md5
+f262f0b234006a2652fceb77b1a8711aa53abb54 *av1-1-b8-00-quantizer-56.ivf
+bdd54dc25bc5a147c76163af0bced45c56435d79 *av1-1-b8-00-quantizer-56.ivf.md5
+1ef00617091db4b2b839de623bd6b4fb0b2f5f83 *av1-1-b8-00-quantizer-57.ivf
+714c65363a87ed5e6e4ad75c79ddb6af57d41fd9 *av1-1-b8-00-quantizer-57.ivf.md5
+43c9b02feccbb3c709d96015f126b7e3d4c24c64 *av1-1-b8-00-quantizer-58.ivf
+bae22b8d6377862bff8219470c0d87205d186a68 *av1-1-b8-00-quantizer-58.ivf.md5
+ca5f780abe4c02e48cceb9c804f3625723c359bf *av1-1-b8-00-quantizer-59.ivf
+c60a20bbf60b0b0a442ef3f7b682979053909d6e *av1-1-b8-00-quantizer-59.ivf.md5
+1f6f047e9f0e1da22fb514370d92c3c7c66dcf89 *av1-1-b8-00-quantizer-60.ivf
+86dc7fa59d363cf1ae4b027a57b119bda893c1c1 *av1-1-b8-00-quantizer-60.ivf.md5
+bcf0c3353568c47a043f2dc34c9abd3fc04eebd4 *av1-1-b8-00-quantizer-61.ivf
+66fc4f729c5915aa19939d1b6e28e5b398e747bb *av1-1-b8-00-quantizer-61.ivf.md5
+ac8d3c54451b52cf557ef435d33e7638088d66df *av1-1-b8-00-quantizer-62.ivf
+b57f4e1276ead626a3662339a86111ae6fda49d2 *av1-1-b8-00-quantizer-62.ivf.md5
+2a8aa33513d8e01ae9410c4bf5fe1e471b775482 *av1-1-b8-00-quantizer-63.ivf
+9f646ec35a168f495e144c64ba7ce9aeb41cd0a2 *av1-1-b8-00-quantizer-63.ivf.md5
+838388fbda4a1d91be81ff62694c3bf13c460d38 *av1-1-b8-01-size-16x16.ivf
+4229c1caf8e25eb3073456fb90ceed206753901e *av1-1-b8-01-size-16x16.ivf.md5
+23f4253bf71e02b2e8ead66da4b3de875e879ef2 *av1-1-b8-01-size-18x16.ivf
+af125644436d4b6897dade68336cedad663b6610 *av1-1-b8-01-size-18x16.ivf.md5
+94e4a75bd93052f79998e9e08e6b5dd73dc27e50 *av1-1-b8-01-size-32x16.ivf
+e7b3fbc5e4b2469838e7ae36512bd3ce0a81040c *av1-1-b8-01-size-32x16.ivf.md5
+f297bde01c05ec5c07ff8118a0280bd36c52b246 *av1-1-b8-01-size-34x16.ivf
+f6bbd94d6063c689de3c7cf94afa2c68b969d12c *av1-1-b8-01-size-34x16.ivf.md5
+1e18bdf68bab7e7282aacc77e423bc7d93d04a8e *av1-1-b8-01-size-64x16.ivf
+de75732fccfb385294b23c17f0f1a57b455edcf7 *av1-1-b8-01-size-64x16.ivf.md5
+26b1f6ae80b161e971468085778cc1ece502b330 *av1-1-b8-01-size-66x16.ivf
+48bd99813557c314d398e6952da78da07c79d416 *av1-1-b8-01-size-66x16.ivf.md5
+ff213ecf31b982a3a7f009c9739f64e066e1ffe9 *av1-1-b8-01-size-16x18.ivf
+86b20a13b1939dc5f678e80491f190d376233d58 *av1-1-b8-01-size-16x18.ivf.md5
+c90bd878c59263a15c6a6f515d1c7e071f141559 *av1-1-b8-01-size-18x18.ivf
+6f659036ffcd3dd380cf970cf1a06f7755e0b2de *av1-1-b8-01-size-18x18.ivf.md5
+e16a1411381b34817a4c0d8e5eeaeb8cddcc9c46 *av1-1-b8-01-size-32x18.ivf
+fdb1c4ec56f5aa690eadbe897340fee86a06ae2f *av1-1-b8-01-size-32x18.ivf.md5
+fac7052b39bd2d0ae107e0e94050226712c770c2 *av1-1-b8-01-size-34x18.ivf
+adb0d5a99228027eaa3b016963df447c9818c447 *av1-1-b8-01-size-34x18.ivf.md5
+b8be5e55d9be42746c2b547d0e26e80b21c9802a *av1-1-b8-01-size-64x18.ivf
+8f8f6da34cdf78c5a6551c637e1afe279cc3884e *av1-1-b8-01-size-64x18.ivf.md5
+9e066bdcc2cd789cdf551bd4c9c85c178887b880 *av1-1-b8-01-size-66x18.ivf
+e8ec6effa936423ae2eec2b60a3160720d2de912 *av1-1-b8-01-size-66x18.ivf.md5
+6ebe45085cdeebc2acd6da5abd542a59312c0ff4 *av1-1-b8-01-size-16x32.ivf
+044695669103dbf158591dce9c649317a177d5f6 *av1-1-b8-01-size-16x32.ivf.md5
+9fabb4f60641b8c7995d1dc451419165d41258ff *av1-1-b8-01-size-18x32.ivf
+7263764680dfec864c3fad5df824ab1973489a14 *av1-1-b8-01-size-18x32.ivf.md5
+3f72841a24a13e601d79cf029aa1fdb02970ce0b *av1-1-b8-01-size-32x32.ivf
+bbe1ae2888d291ec6bc98cd0784937580c554103 *av1-1-b8-01-size-32x32.ivf.md5
+392131a7c7609acd0dba88fee14f1ed042d23ab1 *av1-1-b8-01-size-34x32.ivf
+eea68165ebe9acd28693374bf2266374b9c77786 *av1-1-b8-01-size-34x32.ivf.md5
+78afdd96265811ab9466e906347b57161e5c010d *av1-1-b8-01-size-64x32.ivf
+47b317af582700b67f6e77659db1dfaa26c8cde6 *av1-1-b8-01-size-64x32.ivf.md5
+2b4d01f2c9f23044c0d886482c7073bd4d5d37d1 *av1-1-b8-01-size-66x32.ivf
+3ad5a58a0ee5086af370b22ab2b5b7592a4f33e7 *av1-1-b8-01-size-66x32.ivf.md5
+78ddae04eb8277ae605bd7017ad7ad27bfc82d39 *av1-1-b8-01-size-16x34.ivf
+d0c18e679f1fc51e4f7409831321eed9c4858f6f *av1-1-b8-01-size-16x34.ivf.md5
+38d8ed885f46aead6ec1271d8a5d4aee79b8eb68 *av1-1-b8-01-size-18x34.ivf
+097ddbd69b8f54826a35efeb0b8b07ec198bba6b *av1-1-b8-01-size-18x34.ivf.md5
+91a42720bc2e7ba701f4d97b463a098b6707cdbd *av1-1-b8-01-size-32x34.ivf
+c590d43d37095bd2e8f8d12c9278477419b72d1a *av1-1-b8-01-size-32x34.ivf.md5
+4cc2a437dba56e8878113d9b390b980522542028 *av1-1-b8-01-size-34x34.ivf
+57eeb971f00e64abde25be69dbcb4e3ce5065a57 *av1-1-b8-01-size-34x34.ivf.md5
+b36fee1b6ad69d1206466615d69c05e0a4407939 *av1-1-b8-01-size-64x34.ivf
+a78aea0250d0b32657dc0eaf2d8394bc766c0e35 *av1-1-b8-01-size-64x34.ivf.md5
+10e441209262e082e31fef8c15b51579c9e81509 *av1-1-b8-01-size-66x34.ivf
+558b46f6ef1662c208012d0b66d1857eeff3244e *av1-1-b8-01-size-66x34.ivf.md5
+dd44aad500c7ca0fc97e3d8f0abed3c83b24c79c *av1-1-b8-01-size-16x64.ivf
+a5b64e8063abcf3e4872dc4baf1c32384dc5cf83 *av1-1-b8-01-size-16x64.ivf.md5
+aa849f0d09bcb2ead44719d63043536932d5c9f2 *av1-1-b8-01-size-18x64.ivf
+bcdf2dea3590c7031158ffe7b907d9ee35e2fe57 *av1-1-b8-01-size-18x64.ivf.md5
+36e856d30e160ba2fbb00510296202f61afaae49 *av1-1-b8-01-size-32x64.ivf
+99299f75b82c40c13f168adf2d124f57044a39a2 *av1-1-b8-01-size-32x64.ivf.md5
+e3e03ec5d38eb25e97e4ec3adc6ed40ecdebd278 *av1-1-b8-01-size-34x64.ivf
+84625abf8a200a7d20dd3dd3b277b50b3d62ce32 *av1-1-b8-01-size-34x64.ivf.md5
+7d017daebef2d39ed42a505a8e6103ab0c0988c1 *av1-1-b8-01-size-64x64.ivf
+1ff38d5ecba82fb2e6ac3b09c29c9fe74885ac29 *av1-1-b8-01-size-64x64.ivf.md5
+e1b58ba0b462508593399a2ed84db5f1c59ffcd2 *av1-1-b8-01-size-66x64.ivf
+a6b2c84c94fe79ab0373d157d1203f8d66de0706 *av1-1-b8-01-size-66x64.ivf.md5
+7b4faa7eb7b73392b62de6613282a98dddc13bb6 *av1-1-b8-01-size-16x66.ivf
+a2dacf2bae3c4ab352af66a9600946d29ab9a6ee *av1-1-b8-01-size-16x66.ivf.md5
+0f97805fa30497d4cf39665150f00dfdea52d862 *av1-1-b8-01-size-18x66.ivf
+33d8ea0765953250f998da3fe161f2a8cfca2353 *av1-1-b8-01-size-18x66.ivf.md5
+c8bb00256de973e3b3ee31b924f554336d310cdb *av1-1-b8-01-size-32x66.ivf
+6a6588e6edc68ff7739968a9e7cc6d9eaaeed356 *av1-1-b8-01-size-32x66.ivf.md5
+75ec54fec5c36eecde6d0a16e0389a5f7ad8ec22 *av1-1-b8-01-size-34x66.ivf
+36101dfa9495c18696c0d7d61f25e748f4de7425 *av1-1-b8-01-size-34x66.ivf.md5
+7e5491716e70f8199156b8843513c935667b281e *av1-1-b8-01-size-64x66.ivf
+da38755bb0c9ef56b81617835ddf1340242c6dce *av1-1-b8-01-size-64x66.ivf.md5
+68b47b386f61d67cb5b824a7e6bf87c8b9c2bf7b *av1-1-b8-01-size-66x66.ivf
+25974893956ebd92df474325946130c34f880ea7 *av1-1-b8-01-size-66x66.ivf.md5
+9f386d19c87dbfd6ac84a06d2393dd88863ac003 *av1-1-b8-01-size-196x196.ivf
+788f77f655f55de3db94dd69870316134c149116 *av1-1-b8-01-size-196x196.ivf.md5
+ed3bb2bb52a9d1786e233ef38142b15b85097875 *av1-1-b8-01-size-198x196.ivf
+3bb6b6721ad9b2838b2d07e47b29d6c0117526b1 *av1-1-b8-01-size-198x196.ivf.md5
+49461772caaaa7b824d48f4e9c77a906b0dc02d5 *av1-1-b8-01-size-200x196.ivf
+f1cba00c36909c56097c8785df476d42bc91f259 *av1-1-b8-01-size-200x196.ivf.md5
+44a656a22958e26ed169a69deb8f373117224f06 *av1-1-b8-01-size-202x196.ivf
+69be876b52fe42811bba52d36d0bcc88d6c25b3f *av1-1-b8-01-size-202x196.ivf.md5
+0a6fe9b478363faedbfd465a75790b4c2661b9ba *av1-1-b8-01-size-208x196.ivf
+fc8e95a6860a8a37ccdf1dfe49828502fcf96a08 *av1-1-b8-01-size-208x196.ivf.md5
+8e05b5a20ec95afd92bb615a7daa2e17a7ef55a8 *av1-1-b8-01-size-210x196.ivf
+0add512bffbda3300d8f684a53b13b996fe2e46d *av1-1-b8-01-size-210x196.ivf.md5
+a15f12652c6b4d0c30f13a439c941bfc4a431d1a *av1-1-b8-01-size-224x196.ivf
+b904b93252175f79e0e2b28896131ce93d5fc925 *av1-1-b8-01-size-224x196.ivf.md5
+1a57b913443b267f4a31a6925c39f5b58022f550 *av1-1-b8-01-size-226x196.ivf
+7cf3087de5804763a82d2a798243a66459664772 *av1-1-b8-01-size-226x196.ivf.md5
+2cc28541a2a72e8b45a368f71e70fc294e2de3ab *av1-1-b8-01-size-196x198.ivf
+bb736eedb4bd1e39bf9d60435b4b27a12842e112 *av1-1-b8-01-size-196x198.ivf.md5
+c4ebf93fbf3ae52108fd7b39ddef3afae48188ea *av1-1-b8-01-size-198x198.ivf
+fa4de6881511728bafa15b5f441a0cfdf683cc75 *av1-1-b8-01-size-198x198.ivf.md5
+55fce983186d454b0eb15527393bb2465ba41c6b *av1-1-b8-01-size-200x198.ivf
+1ac8fb1ee622cbc4aa1b83cb46b4731c85efae62 *av1-1-b8-01-size-200x198.ivf.md5
+67d276c67886f0a91a7ee06751a64f95eeb7bc1f *av1-1-b8-01-size-202x198.ivf
+1633b62d9e4ea41737c42f70cbde9a5671da0cef *av1-1-b8-01-size-202x198.ivf.md5
+081cb3f29d3956d4d858d9661fd3d62c94b68867 *av1-1-b8-01-size-208x198.ivf
+871d1c99167408dd32fa7603a7296c9b99ccda15 *av1-1-b8-01-size-208x198.ivf.md5
+b2d80b42468d5f296ae240cfb1fc0b3dd3d96bbc *av1-1-b8-01-size-210x198.ivf
+6a3382656cb17b532a97b1061697f9a878fc58d1 *av1-1-b8-01-size-210x198.ivf.md5
+84d7994fa20fcf6c1d8dbd4c2060c988a6fce831 *av1-1-b8-01-size-224x198.ivf
+42ea12e15de81f2e8617b6de7bae76de2da4d648 *av1-1-b8-01-size-224x198.ivf.md5
+c74a9281cf98c597121df6bff0ac5312b887f969 *av1-1-b8-01-size-226x198.ivf
+4133aae0001804e2bbc7928fc065517a6dd8b288 *av1-1-b8-01-size-226x198.ivf.md5
+27adbf148c63f807bd617cfd78aeaedb8b0f2304 *av1-1-b8-01-size-196x200.ivf
+9253e525e6207ef1ce0839b8f88ea781e9abe41e *av1-1-b8-01-size-196x200.ivf.md5
+21c9ea4d882e48353d3df66fcde0e4746168163f *av1-1-b8-01-size-198x200.ivf
+3d5ee59fde9194f0eaff736051cfd1d7b7daeff1 *av1-1-b8-01-size-198x200.ivf.md5
+c27b0b57667910847122a0309c703315e444110f *av1-1-b8-01-size-200x200.ivf
+7b2a15a17b421ef07e285ca4e8a224f0512c434d *av1-1-b8-01-size-200x200.ivf.md5
+780de549e4163a52590f7c0f488e027a8a4aa053 *av1-1-b8-01-size-202x200.ivf
+cb0ec0969522ca60d79a639e9b9509363468ffd0 *av1-1-b8-01-size-202x200.ivf.md5
+2c59821904863e264ae61401cbd494a79bc04f13 *av1-1-b8-01-size-208x200.ivf
+9963955966a52b65cdd13465c9fb2ba3b5356755 *av1-1-b8-01-size-208x200.ivf.md5
+ff63121611ea9c0628c7e5af13de5e7786611ca6 *av1-1-b8-01-size-210x200.ivf
+2a5993be234e3af2af6d185b2a6f3aaf1979b83a *av1-1-b8-01-size-210x200.ivf.md5
+b8485ada95440d78b51153227231b1aced1a8273 *av1-1-b8-01-size-224x200.ivf
+9c3cd32ea6c006a91eb37d69dbeccf878de5d214 *av1-1-b8-01-size-224x200.ivf.md5
+1aa0ce3e3a74f9b600a146e98b05547a0b454c48 *av1-1-b8-01-size-226x200.ivf
+e045be96c3af16a9ddc10a9933e8ddfb3319d716 *av1-1-b8-01-size-226x200.ivf.md5
+e92b76480f4339855d998b97182f36b28deadcfa *av1-1-b8-01-size-196x202.ivf
+480c707abcd2a650e2160ec397f8348cecb45770 *av1-1-b8-01-size-196x202.ivf.md5
+137b9c0d10a3bdbdf6f97b3e6331f3e8acaf8f91 *av1-1-b8-01-size-198x202.ivf
+7429642146d0da55161ab13024a261094ee2ce87 *av1-1-b8-01-size-198x202.ivf.md5
+9cea71c44ad015ac702d675bacca17876e65cb1a *av1-1-b8-01-size-200x202.ivf
+76b1ec6c42da55f47e389a561590d1a7c713e495 *av1-1-b8-01-size-200x202.ivf.md5
+26dffdcd0dac9becf68d12e31fcd91eddf1f7154 *av1-1-b8-01-size-202x202.ivf
+ddb75e99123fed4ef05d9b85200cefd8985bc84c *av1-1-b8-01-size-202x202.ivf.md5
+04007e83bb66ba547d09f8926ea5bfc7fd9e4b2a *av1-1-b8-01-size-208x202.ivf
+5b72eb58db22087ad416c499119f41e718395b52 *av1-1-b8-01-size-208x202.ivf.md5
+721ff7c0ae0e2ed896b5acac230113f1404e769c *av1-1-b8-01-size-210x202.ivf
+187d2ef939fc26e1a1c7de65abe8e058d8aae17a *av1-1-b8-01-size-210x202.ivf.md5
+dba41421cc938bcf0234254f96be0325ab66186e *av1-1-b8-01-size-224x202.ivf
+58856038c1eb13a7bf0353a30b1affe844cd31b1 *av1-1-b8-01-size-224x202.ivf.md5
+55eba14878d25dcc351ee5e92fa06e559035b409 *av1-1-b8-01-size-226x202.ivf
+e295b3d791d40d7c1fff2c40a260078dccaef24a *av1-1-b8-01-size-226x202.ivf.md5
+6c777223990ddfd92040a8526646ed0f39299b0d *av1-1-b8-01-size-196x208.ivf
+5210daff766cddaf3945610ee05ff242aef8175a *av1-1-b8-01-size-196x208.ivf.md5
+252831abfb9f4a9a8556c21cc3bf60adfe88210f *av1-1-b8-01-size-198x208.ivf
+35ed9601e608a829980cec81e41b7bd3e5f4c2ce *av1-1-b8-01-size-198x208.ivf.md5
+e800ed893a88704a4576d4984957f3664560daa9 *av1-1-b8-01-size-200x208.ivf
+82c038f9072a2fcf8d55fb4a474fdd791ba9a290 *av1-1-b8-01-size-200x208.ivf.md5
+9ce7bb932dd99f86da8ff2ab89fa4d3089a78da8 *av1-1-b8-01-size-202x208.ivf
+0611bf0179abe3c820a447a2bd3a04c3790f3a87 *av1-1-b8-01-size-202x208.ivf.md5
+e5900d9150c8bebc49776227afd3b0a21f5a6ac6 *av1-1-b8-01-size-208x208.ivf
+86d6b9a3840aa0a77938547c905bd6f45d069681 *av1-1-b8-01-size-208x208.ivf.md5
+2758ba5dad16f4a91334f2ed07a4a037201bb873 *av1-1-b8-01-size-210x208.ivf
+78453b1fda2ccc6f35e0d762567807757bcddb16 *av1-1-b8-01-size-210x208.ivf.md5
+fff88fb8e833f6b4ad64cb591b219c7cceb7f2d2 *av1-1-b8-01-size-224x208.ivf
+87266fc34aaed82cdb98cbc309b221ad52eccd81 *av1-1-b8-01-size-224x208.ivf.md5
+dec839fe64046461015b56cda191835284f42a52 *av1-1-b8-01-size-226x208.ivf
+d7a15264fc3fd55d3aec0ccfaa7c434c6d90969f *av1-1-b8-01-size-226x208.ivf.md5
+584782e93ed1cb7797a90fece44becdd1e23bf0d *av1-1-b8-01-size-196x210.ivf
+ed76ec841b18a457853e368576967c4768fc2730 *av1-1-b8-01-size-196x210.ivf.md5
+dab625599b9f01398b593e865d9a4a95a029d60f *av1-1-b8-01-size-198x210.ivf
+b90e8d96a1f5b329b088b467a11fed2d055d74ca *av1-1-b8-01-size-198x210.ivf.md5
+6774bee17b9e50d2d8630e2e1afc30ded67e662d *av1-1-b8-01-size-200x210.ivf
+343a86bd54eb3dd5e9902eb62a3d776dcff2f4f3 *av1-1-b8-01-size-200x210.ivf.md5
+0456c3b8e242eeee019ca97d155f81124de62c90 *av1-1-b8-01-size-202x210.ivf
+5a6a6428c9858a0d3561db42ceaf981c143fe479 *av1-1-b8-01-size-202x210.ivf.md5
+6a3a8f65bf806b1be7726b983427880f772c9986 *av1-1-b8-01-size-208x210.ivf
+5563ea6d8c65887553ff3000addc6418913f1650 *av1-1-b8-01-size-208x210.ivf.md5
+5a8b69489f8e9b917ea7718ad2645101cdbe5644 *av1-1-b8-01-size-210x210.ivf
+f4b01604036fa23000d44fbf42097ae1181bcd62 *av1-1-b8-01-size-210x210.ivf.md5
+fb6f5b08a048698cfe324557ee8cd840c4a3f6ce *av1-1-b8-01-size-224x210.ivf
+3ce5c404e3ca09c8e994b3043bad42cd555b00c0 *av1-1-b8-01-size-224x210.ivf.md5
+2e9fc8510d2131b2f3c9a93bececac985e4426d2 *av1-1-b8-01-size-226x210.ivf
+897c537e259331ca86cdd6e4d2bd343f8538402e *av1-1-b8-01-size-226x210.ivf.md5
+8300512106fce3424eb74b5d4bc0f4f19f7c9af8 *av1-1-b8-01-size-196x224.ivf
+43662ea025ea79afe4964fd4d12a77f4aa4e565e *av1-1-b8-01-size-196x224.ivf.md5
+640f8fda7ade8f2850e2275a9f5e233e33a0ba8d *av1-1-b8-01-size-198x224.ivf
+9ac690bdbbce47d7b169128b568f955e70076f8c *av1-1-b8-01-size-198x224.ivf.md5
+ce2e9379c72fc924e364d5727605394a1438a211 *av1-1-b8-01-size-200x224.ivf
+1ec35a53d88072b96b255202f678178bc7e5bb20 *av1-1-b8-01-size-200x224.ivf.md5
+5d3af7921623deccb578115c8ce207c019f97f50 *av1-1-b8-01-size-202x224.ivf
+14eafd55b0cda3a3476cae7ad500dbd5ee899dd5 *av1-1-b8-01-size-202x224.ivf.md5
+6b6d78e466cf94a5ef8dfe252caa0948dd2ec175 *av1-1-b8-01-size-208x224.ivf
+e178b0c272dfcfe614c6b49cb28dad11781af0b6 *av1-1-b8-01-size-208x224.ivf.md5
+dd2232b9e18971d7e19650a1e3218aef1010247f *av1-1-b8-01-size-210x224.ivf
+40a66198c47820f5fa2d2e389ec0c1191ea4ffcc *av1-1-b8-01-size-210x224.ivf.md5
+9ec028b81a5ea311683328d856f436e6d0b0e6a0 *av1-1-b8-01-size-224x224.ivf
+143b9530ce722385db2c2d883daa649ed42b8d40 *av1-1-b8-01-size-224x224.ivf.md5
+bf833947e62935c54e1e727ccb36157f7c1e9e5d *av1-1-b8-01-size-226x224.ivf
+ca4f3b44463106e4f0bb54e490c3bd457d7d780b *av1-1-b8-01-size-226x224.ivf.md5
+5525f7e312ec073f480ed5a2be5bdc4f0ce51a09 *av1-1-b8-01-size-196x226.ivf
+062d4b240741184458d2d2abd243ed7877631de8 *av1-1-b8-01-size-196x226.ivf.md5
+e6b911142394b94c23191eaa63c9eb41a00f80b0 *av1-1-b8-01-size-198x226.ivf
+3b580d903dddf47082f5e055bfb01a4f05c09b7d *av1-1-b8-01-size-198x226.ivf.md5
+70feb5efeb28df25f7d1a661c73bf013c5ada9b4 *av1-1-b8-01-size-200x226.ivf
+f0b894e7f787e62f1492be62f3dedeb065062160 *av1-1-b8-01-size-200x226.ivf.md5
+7f9a10831e2389b31497fad50080b4d5452d6e91 *av1-1-b8-01-size-202x226.ivf
+45b7194eba9367c8059403c23ca4ae49e988dfaf *av1-1-b8-01-size-202x226.ivf.md5
+967837a2cfbf9aa3131f73aec6a52dcdd82926c7 *av1-1-b8-01-size-208x226.ivf
+c8baedb48fd5d4c956aa8d73fd957370f718f047 *av1-1-b8-01-size-208x226.ivf.md5
+9c926226b9f6b015501d8ac1e3f95e8570283a05 *av1-1-b8-01-size-210x226.ivf
+57d4837667fd4c5a7aeb908626d701b632852c60 *av1-1-b8-01-size-210x226.ivf.md5
+25a4940922761239809d82c45c2be1c5e4f48785 *av1-1-b8-01-size-224x226.ivf
+87ae7e7558241bf3575a333f56fbad4dfdade8ff *av1-1-b8-01-size-224x226.ivf.md5
+40dd208eb525cd90d7c0674cf787097fb909afae *av1-1-b8-01-size-226x226.ivf
+34bdef682a4eae0e0a05e4486a968af1df8b220a *av1-1-b8-01-size-226x226.ivf.md5
+9bbe8499796aa588ff02e313fb0d4349940d2fea *av1-1-b10-00-quantizer-00.ivf
+36b402eedad2bacee8ac09acce44e2fc356dd80b *av1-1-b10-00-quantizer-00.ivf.md5
+1d5e1d2827624f328020bf123df213bb175577e0 *av1-1-b10-00-quantizer-01.ivf
+16c529be5502369e43ce9c6fe99a9709968e3daf *av1-1-b10-00-quantizer-01.ivf.md5
+39abc20739242a8f05efd4b35d7603c8ad7ff45d *av1-1-b10-00-quantizer-02.ivf
+81faa72c3d43b003966fe09ffaae51b07b1059be *av1-1-b10-00-quantizer-02.ivf.md5
+92ebf349b803333a43824a83d997b8cf76f656f9 *av1-1-b10-00-quantizer-03.ivf
+5e7556dc998cb8b506a43cc078e30802d7e600e6 *av1-1-b10-00-quantizer-03.ivf.md5
+1c496177c66e49f2e3556af87ec67afb5060170b *av1-1-b10-00-quantizer-04.ivf
+560fea4800a44fe19ed8d3e74f425bdbf1fb8abd *av1-1-b10-00-quantizer-04.ivf.md5
+7de864b8475ce0acd0ecb01827f2c9add815352b *av1-1-b10-00-quantizer-05.ivf
+1c1aea3db3f54a91866d89fd3b1a0d285ca10310 *av1-1-b10-00-quantizer-05.ivf.md5
+b6501c165619b036d0f7864fd4739973d2d18970 *av1-1-b10-00-quantizer-06.ivf
+d758c8eff275651006c41e7dd447cac13b489ad7 *av1-1-b10-00-quantizer-06.ivf.md5
+e4df6f588f156dffaafd9517b64f753cfc9ccf05 *av1-1-b10-00-quantizer-07.ivf
+3c577f67dade4537de642fd457ea2b367424f336 *av1-1-b10-00-quantizer-07.ivf.md5
+07e9c4c18abb36c8699c1c12bebcc727f090b525 *av1-1-b10-00-quantizer-08.ivf
+4981568ade3170f311cb114fa2689edc4bc35e67 *av1-1-b10-00-quantizer-08.ivf.md5
+2268ecd2899f1b41ae9898925b1d62cfefa30282 *av1-1-b10-00-quantizer-09.ivf
+029b03029b65b7c4c208961f0820467ad42fd3d6 *av1-1-b10-00-quantizer-09.ivf.md5
+3d2adaf6441cfa9585dcbf7d19d65bf6992a29a3 *av1-1-b10-00-quantizer-10.ivf
+017b7fb4c3ba0747c2d5688d493da33ef993d110 *av1-1-b10-00-quantizer-10.ivf.md5
+006535760bd7dc1cfc95e648b05215954a2e76c2 *av1-1-b10-00-quantizer-11.ivf
+c0ae083deb8e820aa49034af4d100944dd977018 *av1-1-b10-00-quantizer-11.ivf.md5
+840e0cbfe1acc8a7a45c823dc55ab44a0b6b553e *av1-1-b10-00-quantizer-12.ivf
+49232ea38bdef650c94808f53834f1137cd4bf39 *av1-1-b10-00-quantizer-12.ivf.md5
+04b0e5a7387e07474f51be4b2c3e05211b40f0d0 *av1-1-b10-00-quantizer-13.ivf
+a51b5ec4b890df3a64f9f0d866b8c41296c9e081 *av1-1-b10-00-quantizer-13.ivf.md5
+5dc47a140fbcbf08bf91481ee3585e9e067561ab *av1-1-b10-00-quantizer-14.ivf
+2625319eef69d6225e6ab6e5ce7790491406cb5d *av1-1-b10-00-quantizer-14.ivf.md5
+f866be86d8d8aa08ded30e42988b0936c1a16064 *av1-1-b10-00-quantizer-15.ivf
+03b7c1eefb54d99e30051c7123c0453f04a6579d *av1-1-b10-00-quantizer-15.ivf.md5
+548df2371dfb485419ed9baf28e3f495c64f364a *av1-1-b10-00-quantizer-16.ivf
+8a0d6bf1626b05b65c77331305414fe9be54e8c6 *av1-1-b10-00-quantizer-16.ivf.md5
+0077c82f96a2e095a3cb8de9bfa63715e3c9f438 *av1-1-b10-00-quantizer-17.ivf
+5d85f77f3087f4b206930722a945c60039262be4 *av1-1-b10-00-quantizer-17.ivf.md5
+1e0f1245ecb4c903b5dc7072d959fc43a7bba381 *av1-1-b10-00-quantizer-18.ivf
+06316ae2b45f2359a70cc3855ffd6ab81048b41a *av1-1-b10-00-quantizer-18.ivf.md5
+f197198f7ec058110185fda5297a1a43993654df *av1-1-b10-00-quantizer-19.ivf
+bac522c7f234d506c75b5495d74b3fa57c83a4df *av1-1-b10-00-quantizer-19.ivf.md5
+c2f57324d000b349323f37d5ebebde8c2b861f30 *av1-1-b10-00-quantizer-20.ivf
+999c6110786cbc25e67792234a5a02f2cb4553c7 *av1-1-b10-00-quantizer-20.ivf.md5
+2ffad9adfd19286fe2166ba877289d201c9a634f *av1-1-b10-00-quantizer-21.ivf
+d55713eaa791cfd7bf69b6c26d5032029d9a0f06 *av1-1-b10-00-quantizer-21.ivf.md5
+382528db53328c1a38976f5d9b579eef35d839f4 *av1-1-b10-00-quantizer-22.ivf
+cb5bd459e1a90126da9264cff4281515f95755b2 *av1-1-b10-00-quantizer-22.ivf.md5
+b52cc6160fc66f72ad66c198d275a1c73f925022 *av1-1-b10-00-quantizer-23.ivf
+c0f9d6659e1f283e9356fd7b4ac9f7cc5544cdc2 *av1-1-b10-00-quantizer-23.ivf.md5
+e11f15e3b63e7606b1122bb3670ee77c09c04840 *av1-1-b10-00-quantizer-24.ivf
+e9f141b924440e044270c81a68458fe498599a8e *av1-1-b10-00-quantizer-24.ivf.md5
+fb91793b69824c99b0218788dcea0a74ebd7e84e *av1-1-b10-00-quantizer-25.ivf
+434e33d609b2683c3cfbcc3a2cdfc26339590fb6 *av1-1-b10-00-quantizer-25.ivf.md5
+d82e38f31cdcf8b43479e6ddaa83373de38f70a2 *av1-1-b10-00-quantizer-26.ivf
+183943b851ba383a536f13c83b93f61ac8961ad5 *av1-1-b10-00-quantizer-26.ivf.md5
+6bf5e4e8e0aca699e493b9eb3672d2117494d74d *av1-1-b10-00-quantizer-27.ivf
+f0fb7e0a99180828b0e38b2cfe0622eecc2d26b8 *av1-1-b10-00-quantizer-27.ivf.md5
+d5adee2567544c3ae4223b3f3528a770377878d2 *av1-1-b10-00-quantizer-28.ivf
+14edf588efc67570e529b0ff8aeb8e7a0c69238b *av1-1-b10-00-quantizer-28.ivf.md5
+e6dcdc106847956035e3f00aabf4470f97e1887e *av1-1-b10-00-quantizer-29.ivf
+413c5cb778611c7c1a810b53861b9ab1fb391f17 *av1-1-b10-00-quantizer-29.ivf.md5
+b5e98b3f6b1db04d46bf43064c6ac64f797aff00 *av1-1-b10-00-quantizer-30.ivf
+d1a603661d76c28658c7cd2892b408e91d77893e *av1-1-b10-00-quantizer-30.ivf.md5
+80168371d1150e82e3f46bcbbcabba458b835b19 *av1-1-b10-00-quantizer-31.ivf
+904ecd033d4af5239c4d5b3f86e51ed5c3c2e3fb *av1-1-b10-00-quantizer-31.ivf.md5
+96291f6ace85980892d135a5b74188cd629c325f *av1-1-b10-00-quantizer-32.ivf
+a5ceace390d4a75d48281fe29060c21557e4f5ae *av1-1-b10-00-quantizer-32.ivf.md5
+0f80495de34eae07c4905b72573a315a879390ec *av1-1-b10-00-quantizer-33.ivf
+72b8f662973a660412946687dff878b276ae518e *av1-1-b10-00-quantizer-33.ivf.md5
+24905e3be7db320994b7fb8311dfd50a7c9e54da *av1-1-b10-00-quantizer-34.ivf
+cea514bb1b7b064c4d31914a2cb266611c278577 *av1-1-b10-00-quantizer-34.ivf.md5
+083012960dd7c17d3b00fa0e807759c98faded8f *av1-1-b10-00-quantizer-35.ivf
+de5fdb9e1e581484af1cc7d2dd3c3e84c90cebb2 *av1-1-b10-00-quantizer-35.ivf.md5
+f725f179aeee5b413620c0dd81b007b245c2a7ed *av1-1-b10-00-quantizer-36.ivf
+246b1931c04c02df1f168090e2650827cd5dbabd *av1-1-b10-00-quantizer-36.ivf.md5
+f6aa824156e9848f237481889a8103eb6130f31d *av1-1-b10-00-quantizer-37.ivf
+a8f78dd15fc2994369a08c2ddddcd0760c62ea5b *av1-1-b10-00-quantizer-37.ivf.md5
+a8dd662338c493aea266b99203e70af25982633f *av1-1-b10-00-quantizer-38.ivf
+09f36d998e85d0450060f540e50b075ae1432fc6 *av1-1-b10-00-quantizer-38.ivf.md5
+d97428871720ed658da6ed0e3f7c15da83387e4c *av1-1-b10-00-quantizer-39.ivf
+8c5230048909ee8f86f87c116f153cd910d0141f *av1-1-b10-00-quantizer-39.ivf.md5
+86e754e55e9b63c6e0a4fef01761414f8a6b61ca *av1-1-b10-00-quantizer-40.ivf
+99a71accf6457264e45ca80d3b1f082ee5acdecc *av1-1-b10-00-quantizer-40.ivf.md5
+9d18b7236506ab7e107c062620b64096ec0cf423 *av1-1-b10-00-quantizer-41.ivf
+5771159a9a7c7b66c9e13bb13ec3d53b37860208 *av1-1-b10-00-quantizer-41.ivf.md5
+54b72bc879a80e66613f421e67db62bba1c0041b *av1-1-b10-00-quantizer-42.ivf
+bf958236883ee7209ef4cb0b7503b430634a291e *av1-1-b10-00-quantizer-42.ivf.md5
+a06d5321a51d90404dd7085ae511d7df5d5e1e05 *av1-1-b10-00-quantizer-43.ivf
+ddb25723d976043d863634b9dc3b5fb84a245803 *av1-1-b10-00-quantizer-43.ivf.md5
+2ea0b64c170d7299dae1c14a8a49349aee8e0d08 *av1-1-b10-00-quantizer-44.ivf
+d18bde1b4893792173fa2014665e9364395ad5e9 *av1-1-b10-00-quantizer-44.ivf.md5
+73e506a32d3518e23424f231c7b5323d7a34a3d6 *av1-1-b10-00-quantizer-45.ivf
+be6224ebc77a3e5fb9c1645b876007e584a09d89 *av1-1-b10-00-quantizer-45.ivf.md5
+841223871374464194edc739c48dc7cefd1ff255 *av1-1-b10-00-quantizer-46.ivf
+4766d616f923496a8dc113c9b7f875f0c0735f9a *av1-1-b10-00-quantizer-46.ivf.md5
+8bbbbea130aaea453f7b826956a5520d10a0eccf *av1-1-b10-00-quantizer-47.ivf
+3ea21fac0c492b03d8ec25e4ee0971cd57e5f71a *av1-1-b10-00-quantizer-47.ivf.md5
+3ce83e0f1e1835b9a6c10fe502a16fd3650839e0 *av1-1-b10-00-quantizer-48.ivf
+b468de2c09fca5a6b2bb7a20bab4afd8d192c31d *av1-1-b10-00-quantizer-48.ivf.md5
+f3a757c678aa00f9a9c4c4658d37733fd935925a *av1-1-b10-00-quantizer-49.ivf
+f888dc88db576122695d4eb41c486aacd28a2d1d *av1-1-b10-00-quantizer-49.ivf.md5
+a9d78aaef105cc5a95b7ebb54783f37e75673123 *av1-1-b10-00-quantizer-50.ivf
+06d0c5e79cc794030c4be022089b1d12c1383f71 *av1-1-b10-00-quantizer-50.ivf.md5
+165c20ee372f83682d094541097e375227353239 *av1-1-b10-00-quantizer-51.ivf
+b3d90214b8c6e6f6d9357bb5784d10081325c356 *av1-1-b10-00-quantizer-51.ivf.md5
+5b3ea7a18654d943065f5c176974c3960b56664e *av1-1-b10-00-quantizer-52.ivf
+dc61a6e4e2549074130023b14b137fb4fe442ce3 *av1-1-b10-00-quantizer-52.ivf.md5
+74c3b5851b6a94d33b575a689eb8d34592e95d5f *av1-1-b10-00-quantizer-53.ivf
+a80e43a0fb2b852426bd941b8d4b8f56690e9bc9 *av1-1-b10-00-quantizer-53.ivf.md5
+d05b8dea2cddd4f0d9e792f42f71afbd29f7811c *av1-1-b10-00-quantizer-54.ivf
+432937893321f4bd25fa400b8988c5788cb06ecf *av1-1-b10-00-quantizer-54.ivf.md5
+4eaee0f1970426be0bbeb7d4fccdc7e804e9bea4 *av1-1-b10-00-quantizer-55.ivf
+710ab95ce1dcd2540db4477ff4ee6ab771fe0759 *av1-1-b10-00-quantizer-55.ivf.md5
+fe637930c9faa8744cba37effc4cb5510315d1c0 *av1-1-b10-00-quantizer-56.ivf
+2f9431b30523fb6a3e4122f22c6c3ff7b96a7987 *av1-1-b10-00-quantizer-56.ivf.md5
+ed54fc7fcec194eef1f50adbbe12a6a36ab6836b *av1-1-b10-00-quantizer-57.ivf
+43bccac7800b399210cf15520a83739c23a5d9c7 *av1-1-b10-00-quantizer-57.ivf.md5
+a7b8d628ba3e4c5f37aa6a3d7b82afda73ac89dc *av1-1-b10-00-quantizer-58.ivf
+b26638272b787df54f45a46629b852acbcb73e3d *av1-1-b10-00-quantizer-58.ivf.md5
+c077f22ff547fb5ffd020e8dac91d05942fb52df *av1-1-b10-00-quantizer-59.ivf
+4efd99cc0891bf345b8cd2ae8e21709d61be497b *av1-1-b10-00-quantizer-59.ivf.md5
+301ab53039d75e1ffa8cc6a0874d9ea94e4a6a0d *av1-1-b10-00-quantizer-60.ivf
+4729bd734a6edd2d8d0432a3f66b3d91d565050e *av1-1-b10-00-quantizer-60.ivf.md5
+c78640d3211034df9fcb273bdfc18625819652f2 *av1-1-b10-00-quantizer-61.ivf
+3d823eb2b33ccfea68db506626bcbecf49b0f167 *av1-1-b10-00-quantizer-61.ivf.md5
+bf241a449a28773b93e6e529a06dfc28109577e4 *av1-1-b10-00-quantizer-62.ivf
+75457d8476f1927f737d089dcf3d0f7f99f3c4fb *av1-1-b10-00-quantizer-62.ivf.md5
+8b6eb3fff2e0db7eac775b08c745250ca591e2d9 *av1-1-b10-00-quantizer-63.ivf
+63ea689d025593e5d91760785b8e446d04d4671e *av1-1-b10-00-quantizer-63.ivf.md5
+a9f7ea6312a533cc6426a6145edd190d45813c37 *av1-1-b8-02-allintra.ivf
+8fd8f789cfee1069d20f3e2c241f5cad7292239e *av1-1-b8-02-allintra.ivf.md5
+e69e41fee40b408b6eebcc79f266a95f2ee24f9e *av1-1-b8-03-sizedown.mkv
+8c528fb3ccda959a29721566e132f730935ca32b *av1-1-b8-03-sizedown.mkv.md5
+1889da5ee1708007e47bb887470ac477e1d7ba01 *av1-1-b8-03-sizeup.mkv
+8de81b170635d456602dc8923a8b39c534d01fa8 *av1-1-b8-03-sizeup.mkv.md5
+d3ed7de0aa8c155fe35e0f5f4203240710d31383 *park_joy_90p_8_420_monochrome.y4m
+5b3f0907407b809aa66b62cb080feda8c92454ca *park_joy_90p_8_420_vertical_csp.y4m
+caf8b6a5f1a5bcb38afae8a54a08c4f4459aafa3 *vase10x10_tiles.txt
+e14825f50ff845b8a6932c64cb254007a0b5e3a1 *av1-1-b8-22-svc-L2T1.ivf
+0f75f2ac44e61fc83be70c955410fa378e433237 *av1-1-b8-22-svc-L2T1.ivf.md5
+e94687eb0e90179b3800b6d5e11eb7e9bfb34eec *av1-1-b8-22-svc-L1T2.ivf
+2bc12b16385ea14323bc79607fb8dfbd7edaf8ef *av1-1-b8-22-svc-L1T2.ivf.md5
+32ef2f14ee9cb11a24a22934f4c065e926e5d236 *av1-1-b8-22-svc-L2T2.ivf
+f476a10ff06d750129f8229755d51e17ff141b2a *av1-1-b8-22-svc-L2T2.ivf.md5
+afca5502a489692b0a3c120370b0f43b8fc572a1 *av1-1-b8-04-cdfupdate.ivf
+13b9423155a08d5e3a2fd9ae4a973bb046718cdf *av1-1-b8-04-cdfupdate.ivf.md5
+f064290d7fcd3b3de19020e8aec6c43c88d3a505 *av1-1-b8-05-mv.ivf
+bff316e63ded5559116bdc2fa4aa97ad7b1a1761 *av1-1-b8-05-mv.ivf.md5
+b48a717c7c003b8dd23c3c2caed1ac673380fdb3 *av1-1-b8-06-mfmv.ivf
+1424e3cb53e00eb56b94f4c725826274212c42b6 *av1-1-b8-06-mfmv.ivf.md5
+f8724ed96272ddbc35776908f2df7cb9955766a9 *paris_352_288_30.y4m
+11bb40026103182c23a88133edafca369e5575e2 *av1-1-b8-23-film_grain-50.ivf
+c58ccf7ff04711acc559c06f0bfce3c5b14800c3 *av1-1-b8-23-film_grain-50.ivf.md5
+2f883c7e11c21a31f79bd9c809541be90b0c7c4a *av1-1-b10-23-film_grain-50.ivf
+83f2094fca597ad38b4fd623b807de1774c53ffb *av1-1-b10-23-film_grain-50.ivf.md5
+644e05c6bc0418a72b86427aa01e8b4ecea85e03 *desktop1.320_180.yuv
+ad18ca16f0a249fb3b7c38de0d9b327fed273f96 *hantro_collage_w352h288_nv12.yuv
+a17584012187cd886b64f8cb0f35bfd8d762f9dc *av1-1-b8-24-monochrome.ivf
+e71cd9a07f928c527c900daddd071ae60337426d *av1-1-b8-24-monochrome.ivf.md5
+03a8d002594ccc51932332002bb6f9837ef46d0f *av1-1-b10-24-monochrome.ivf
+e24aa6951afd7b2bb53eb1a73e25a19e7b189f82 *av1-1-b10-24-monochrome.ivf.md5
+df0c9481104aa8c81f9e3b61b6d147a331ad3e35 *firstpass_stats
+3eaf216d9fc8b4b9bb8c3956311f49a85974806c *bus_352x288_420_f20_b8.yuv
+c7f336958e7af6162c20ddc84d67c7dfa9826910 *av1-1-b8-16-intra_only-intrabc-extreme-dv.ivf
+36a4fcf07e645ed522cde5845dd9c6ab2b2d1502 *av1-1-b8-16-intra_only-intrabc-extreme-dv.ivf.md5
+9f935d391fdf4a6f7c320355d45770d2e7d6095c *desktopqvga2.320_240.yuv
+4d1ad6d3070268ccb000d7fc3ae0f5a9447bfe82 *test_input_w1h1.yuv
+ad9942a073e245585c93f764ea299382a65939a7 *crowd_run_360p_10_150f.y4m
+9c2aa2d0f63f706f775bf661dfa81e8bb3089d8b *wikipedia_420_360p_60f.y4m
diff --git a/third_party/aom/test/test.cmake b/third_party/aom/test/test.cmake
new file mode 100644
index 0000000000..ce94a5a657
--- /dev/null
+++ b/third_party/aom/test/test.cmake
@@ -0,0 +1,647 @@
+#
+# Copyright (c) 2017, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and the
+# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
+# not distributed with this source code in the LICENSE file, you can obtain it
+# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
+# License 1.0 was not distributed with this source code in the PATENTS file, you
+# can obtain it at www.aomedia.org/license/patent.
+#
+if(AOM_TEST_TEST_CMAKE_)
+  return()
+endif() # AOM_TEST_TEST_CMAKE_
+set(AOM_TEST_TEST_CMAKE_ 1)
+
+include(ProcessorCount)
+
+include("${AOM_ROOT}/test/test_data_util.cmake")
+
+set(AOM_UNIT_TEST_DATA_LIST_FILE "${AOM_ROOT}/test/test-data.sha1")
+set(AOM_IDE_TEST_FOLDER "test")
+set(AOM_IDE_TESTDATA_FOLDER "testdata")
+
+# Appends |AOM_TEST_SOURCE_VARS| with |src_list_name| at the caller's scope.
+# This collects all variables containing libaom test source files.
+function(add_to_libaom_test_srcs src_list_name)
+  list(APPEND AOM_TEST_SOURCE_VARS ${src_list_name})
+  set(AOM_TEST_SOURCE_VARS "${AOM_TEST_SOURCE_VARS}" PARENT_SCOPE)
+endfunction()
+
+list(APPEND AOM_UNIT_TEST_WRAPPER_SOURCES "${AOM_GEN_SRC_DIR}/usage_exit.c"
+            "${AOM_ROOT}/test/test_libaom.cc")
+add_to_libaom_test_srcs(AOM_UNIT_TEST_WRAPPER_SOURCES)
+
+list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+            "${AOM_ROOT}/test/acm_random.h"
+            "${AOM_ROOT}/test/aom_image_test.cc"
+            "${AOM_ROOT}/test/aom_integer_test.cc"
+            "${AOM_ROOT}/test/av1_config_test.cc"
+            "${AOM_ROOT}/test/av1_key_value_api_test.cc"
+            "${AOM_ROOT}/test/block_test.cc"
+            "${AOM_ROOT}/test/codec_factory.h"
+            "${AOM_ROOT}/test/function_equivalence_test.h"
+            "${AOM_ROOT}/test/log2_test.cc"
+            "${AOM_ROOT}/test/md5_helper.h"
+            "${AOM_ROOT}/test/register_state_check.h"
+            "${AOM_ROOT}/test/test_vectors.cc"
+            "${AOM_ROOT}/test/test_vectors.h"
+            "${AOM_ROOT}/test/transform_test_base.h"
+            "${AOM_ROOT}/test/util.h"
+            "${AOM_ROOT}/test/video_source.h")
+add_to_libaom_test_srcs(AOM_UNIT_TEST_COMMON_SOURCES)
+
+list(APPEND AOM_UNIT_TEST_DECODER_SOURCES "${AOM_ROOT}/test/decode_api_test.cc"
+            "${AOM_ROOT}/test/decode_scalability_test.cc"
+            "${AOM_ROOT}/test/external_frame_buffer_test.cc"
+            "${AOM_ROOT}/test/invalid_file_test.cc"
+            "${AOM_ROOT}/test/test_vector_test.cc"
+            "${AOM_ROOT}/test/ivf_video_source.h")
+add_to_libaom_test_srcs(AOM_UNIT_TEST_DECODER_SOURCES)
+
+list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
+            "${AOM_ROOT}/test/active_map_test.cc"
+            "${AOM_ROOT}/test/aq_segment_test.cc"
+            "${AOM_ROOT}/test/av1_external_partition_test.cc"
+            "${AOM_ROOT}/test/avif_progressive_test.cc"
+            "${AOM_ROOT}/test/borders_test.cc"
+            "${AOM_ROOT}/test/cpu_speed_test.cc"
+            "${AOM_ROOT}/test/cpu_used_firstpass_test.cc"
+            "${AOM_ROOT}/test/datarate_test.cc"
+            "${AOM_ROOT}/test/datarate_test.h"
+            "${AOM_ROOT}/test/deltaq_mode_test.cc"
+            "${AOM_ROOT}/test/dropframe_encode_test.cc"
+            "${AOM_ROOT}/test/svc_datarate_test.cc"
+            "${AOM_ROOT}/test/encode_api_test.cc"
+            "${AOM_ROOT}/test/encode_small_width_height_test.cc"
+            "${AOM_ROOT}/test/encode_test_driver.cc"
+            "${AOM_ROOT}/test/encode_test_driver.h"
+            "${AOM_ROOT}/test/end_to_end_psnr_test.cc"
+            "${AOM_ROOT}/test/forced_max_frame_width_height_test.cc"
+            "${AOM_ROOT}/test/force_key_frame_test.cc"
+            "${AOM_ROOT}/test/gf_pyr_height_test.cc"
+            "${AOM_ROOT}/test/rt_end_to_end_test.cc"
+            "${AOM_ROOT}/test/allintra_end_to_end_test.cc"
+            "${AOM_ROOT}/test/loopfilter_control_test.cc"
+            "${AOM_ROOT}/test/frame_size_tests.cc"
+            "${AOM_ROOT}/test/horz_superres_test.cc"
+            "${AOM_ROOT}/test/i420_video_source.h"
+            "${AOM_ROOT}/test/level_test.cc"
+            "${AOM_ROOT}/test/metadata_test.cc"
+            "${AOM_ROOT}/test/monochrome_test.cc"
+            "${AOM_ROOT}/test/postproc_filters_test.cc"
+            "${AOM_ROOT}/test/resize_test.cc"
+            "${AOM_ROOT}/test/scalability_test.cc"
+            "${AOM_ROOT}/test/sharpness_test.cc"
+            "${AOM_ROOT}/test/y4m_test.cc"
+            "${AOM_ROOT}/test/y4m_video_source.h"
+            "${AOM_ROOT}/test/yuv_video_source.h"
+            "${AOM_ROOT}/test/time_stamp_test.cc")
+add_to_libaom_test_srcs(AOM_UNIT_TEST_ENCODER_SOURCES)
+
+list(APPEND AOM_ENCODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/encode_perf_test.cc")
+list(APPEND AOM_UNIT_TEST_WEBM_SOURCES "${AOM_ROOT}/test/webm_video_source.h")
+add_to_libaom_test_srcs(AOM_UNIT_TEST_WEBM_SOURCES)
+list(APPEND AOM_TEST_INTRA_PRED_SPEED_SOURCES "${AOM_GEN_SRC_DIR}/usage_exit.c"
+            "${AOM_ROOT}/test/test_intra_pred_speed.cc")
+
+if(CONFIG_AV1_DECODER)
+  list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+              "${AOM_ROOT}/test/decode_test_driver.cc"
+              "${AOM_ROOT}/test/decode_test_driver.h")
+endif()
+
+if(CONFIG_INTERNAL_STATS AND CONFIG_AV1_HIGHBITDEPTH)
+  list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+              "${AOM_ROOT}/test/hbd_metrics_test.cc")
+endif()
+
+list(APPEND AOM_DECODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/decode_perf_test.cc")
+
+if(CONFIG_REALTIME_ONLY)
+  list(REMOVE_ITEM AOM_UNIT_TEST_ENCODER_SOURCES
+                   "${AOM_ROOT}/test/allintra_end_to_end_test.cc"
+                   "${AOM_ROOT}/test/av1_external_partition_test.cc"
+                   "${AOM_ROOT}/test/avif_progressive_test.cc"
+                   "${AOM_ROOT}/test/borders_test.cc"
+                   "${AOM_ROOT}/test/cpu_speed_test.cc"
+                   "${AOM_ROOT}/test/cpu_used_firstpass_test.cc"
+                   "${AOM_ROOT}/test/deltaq_mode_test.cc"
+                   "${AOM_ROOT}/test/dropframe_encode_test.cc"
+                   "${AOM_ROOT}/test/end_to_end_psnr_test.cc"
+                   "${AOM_ROOT}/test/force_key_frame_test.cc"
+                   "${AOM_ROOT}/test/gf_pyr_height_test.cc"
+                   "${AOM_ROOT}/test/horz_superres_test.cc"
+                   "${AOM_ROOT}/test/level_test.cc"
+                   "${AOM_ROOT}/test/metadata_test.cc"
+                   "${AOM_ROOT}/test/monochrome_test.cc"
+                   "${AOM_ROOT}/test/postproc_filters_test.cc"
+                   "${AOM_ROOT}/test/sharpness_test.cc")
+endif()
+
+if(NOT BUILD_SHARED_LIBS)
+  list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+              "${AOM_ROOT}/test/aom_mem_test.cc"
+              "${AOM_ROOT}/test/av1_common_int_test.cc"
+              "${AOM_ROOT}/test/cdef_test.cc"
+              "${AOM_ROOT}/test/cfl_test.cc"
+              "${AOM_ROOT}/test/convolve_test.cc"
+              "${AOM_ROOT}/test/hiprec_convolve_test.cc"
+              "${AOM_ROOT}/test/hiprec_convolve_test_util.cc"
+              "${AOM_ROOT}/test/hiprec_convolve_test_util.h"
+              "${AOM_ROOT}/test/intrabc_test.cc"
+              "${AOM_ROOT}/test/intrapred_test.cc"
+              "${AOM_ROOT}/test/lpf_test.cc"
+              "${AOM_ROOT}/test/scan_test.cc"
+              "${AOM_ROOT}/test/selfguided_filter_test.cc"
+              "${AOM_ROOT}/test/simd_cmp_impl.h"
+              "${AOM_ROOT}/test/simd_impl.h")
+
+  if(HAVE_SSE2)
+    list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_SSE2
+                "${AOM_ROOT}/test/simd_cmp_sse2.cc")
+    add_to_libaom_test_srcs(AOM_UNIT_TEST_COMMON_INTRIN_SSE2)
+  endif()
+
+  if(HAVE_SSSE3)
+    list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_SSSE3
+                "${AOM_ROOT}/test/simd_cmp_ssse3.cc")
+    add_to_libaom_test_srcs(AOM_UNIT_TEST_COMMON_INTRIN_SSSE3)
+  endif()
+
+  if(HAVE_SSE4_1)
+    list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1
+                "${AOM_ROOT}/test/simd_cmp_sse4.cc")
+    add_to_libaom_test_srcs(AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1)
+  endif()
+
+  if(HAVE_AVX2)
+    list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_AVX2
+                "${AOM_ROOT}/test/simd_cmp_avx2.cc")
+    add_to_libaom_test_srcs(AOM_UNIT_TEST_COMMON_INTRIN_AVX2)
+  endif()
+
+  list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
+              "${AOM_ROOT}/test/arf_freq_test.cc"
+              "${AOM_ROOT}/test/av1_convolve_test.cc"
+              "${AOM_ROOT}/test/av1_fwd_txfm1d_test.cc"
+              "${AOM_ROOT}/test/av1_fwd_txfm2d_test.cc"
+              "${AOM_ROOT}/test/av1_inv_txfm1d_test.cc"
+              "${AOM_ROOT}/test/av1_inv_txfm2d_test.cc"
+              "${AOM_ROOT}/test/av1_k_means_test.cc"
+              "${AOM_ROOT}/test/av1_nn_predict_test.cc"
+              "${AOM_ROOT}/test/av1_round_shift_array_test.cc"
+              "${AOM_ROOT}/test/av1_softmax_test.cc"
+              "${AOM_ROOT}/test/av1_txfm_test.cc"
+              "${AOM_ROOT}/test/av1_txfm_test.h"
+              "${AOM_ROOT}/test/av1_wedge_utils_test.cc"
+              "${AOM_ROOT}/test/avg_test.cc"
+              "${AOM_ROOT}/test/blend_a64_mask_1d_test.cc"
+              "${AOM_ROOT}/test/blend_a64_mask_test.cc"
+              "${AOM_ROOT}/test/comp_avg_pred_test.cc"
+              "${AOM_ROOT}/test/comp_avg_pred_test.h"
+              "${AOM_ROOT}/test/comp_mask_pred_test.cc"
+              "${AOM_ROOT}/test/disflow_test.cc"
+              "${AOM_ROOT}/test/encodemb_test.cc"
+              "${AOM_ROOT}/test/encodetxb_test.cc"
+              "${AOM_ROOT}/test/end_to_end_qmpsnr_test.cc"
+              "${AOM_ROOT}/test/end_to_end_ssim_test.cc"
+              "${AOM_ROOT}/test/error_block_test.cc"
+              "${AOM_ROOT}/test/fdct4x4_test.cc"
+              "${AOM_ROOT}/test/fft_test.cc"
+              "${AOM_ROOT}/test/firstpass_test.cc"
+              "${AOM_ROOT}/test/fwht4x4_test.cc"
+              "${AOM_ROOT}/test/hadamard_test.cc"
+              "${AOM_ROOT}/test/horver_correlation_test.cc"
+              "${AOM_ROOT}/test/masked_sad_test.cc"
+              "${AOM_ROOT}/test/masked_variance_test.cc"
+              "${AOM_ROOT}/test/minmax_test.cc"
+              "${AOM_ROOT}/test/motion_vector_test.cc"
+              "${AOM_ROOT}/test/mv_cost_test.cc"
+              "${AOM_ROOT}/test/noise_model_test.cc"
+              "${AOM_ROOT}/test/obmc_sad_test.cc"
+              "${AOM_ROOT}/test/obmc_variance_test.cc"
+              "${AOM_ROOT}/test/pickrst_test.cc"
+              "${AOM_ROOT}/test/reconinter_test.cc"
+              "${AOM_ROOT}/test/sad_test.cc"
+              "${AOM_ROOT}/test/subtract_test.cc"
+              "${AOM_ROOT}/test/sum_squares_test.cc"
+              "${AOM_ROOT}/test/sse_sum_test.cc"
+              "${AOM_ROOT}/test/variance_test.cc"
+              "${AOM_ROOT}/test/warp_filter_test.cc"
+              "${AOM_ROOT}/test/warp_filter_test_util.cc"
+              "${AOM_ROOT}/test/warp_filter_test_util.h"
+              "${AOM_ROOT}/test/webmenc_test.cc"
+              "${AOM_ROOT}/test/wiener_test.cc")
+
+  if(NOT CONFIG_REALTIME_ONLY)
+    list(APPEND AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1
+                "${AOM_ROOT}/test/corner_match_test.cc")
+  endif()
+
+  if(CONFIG_ACCOUNTING)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/accounting_test.cc")
+  endif()
+
+  if(CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/altref_test.cc"
+                "${AOM_ROOT}/test/av1_encoder_parms_get_to_decoder.cc"
+                "${AOM_ROOT}/test/av1_ext_tile_test.cc"
+                "${AOM_ROOT}/test/binary_codes_test.cc"
+                "${AOM_ROOT}/test/boolcoder_test.cc"
+                "${AOM_ROOT}/test/cnn_test.cc"
+                "${AOM_ROOT}/test/decode_multithreaded_test.cc"
+                "${AOM_ROOT}/test/divu_small_test.cc"
+                "${AOM_ROOT}/test/dr_prediction_test.cc"
+                "${AOM_ROOT}/test/ec_test.cc"
+                "${AOM_ROOT}/test/error_resilience_test.cc"
+                "${AOM_ROOT}/test/ethread_test.cc"
+                "${AOM_ROOT}/test/film_grain_table_test.cc"
+                "${AOM_ROOT}/test/kf_test.cc"
+                "${AOM_ROOT}/test/lossless_test.cc"
+                "${AOM_ROOT}/test/quant_test.cc"
+                "${AOM_ROOT}/test/ratectrl_test.cc"
+                "${AOM_ROOT}/test/rd_test.cc"
+                "${AOM_ROOT}/test/sb_multipass_test.cc"
+                "${AOM_ROOT}/test/sb_qp_sweep_test.cc"
+                "${AOM_ROOT}/test/screen_content_test.cc"
+                "${AOM_ROOT}/test/segment_binarization_sync.cc"
+                "${AOM_ROOT}/test/still_picture_test.cc"
+                "${AOM_ROOT}/test/temporal_filter_test.cc"
+                "${AOM_ROOT}/test/tile_config_test.cc"
+                "${AOM_ROOT}/test/tile_independence_test.cc"
+                "${AOM_ROOT}/test/tpl_model_test.cc")
+    if(CONFIG_AV1_HIGHBITDEPTH)
+      list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                  "${AOM_ROOT}/test/coding_path_sync.cc")
+    endif()
+    if(CONFIG_REALTIME_ONLY)
+      list(REMOVE_ITEM AOM_UNIT_TEST_COMMON_SOURCES
+                       "${AOM_ROOT}/test/altref_test.cc"
+                       "${AOM_ROOT}/test/av1_encoder_parms_get_to_decoder.cc"
+                       "${AOM_ROOT}/test/av1_ext_tile_test.cc"
+                       "${AOM_ROOT}/test/cnn_test.cc"
+                       "${AOM_ROOT}/test/decode_multithreaded_test.cc"
+                       "${AOM_ROOT}/test/error_resilience_test.cc"
+                       "${AOM_ROOT}/test/kf_test.cc"
+                       "${AOM_ROOT}/test/lossless_test.cc"
+                       "${AOM_ROOT}/test/sb_multipass_test.cc"
+                       "${AOM_ROOT}/test/sb_qp_sweep_test.cc"
+                       "${AOM_ROOT}/test/selfguided_filter_test.cc"
+                       "${AOM_ROOT}/test/screen_content_test.cc"
+                       "${AOM_ROOT}/test/still_picture_test.cc"
+                       "${AOM_ROOT}/test/tile_independence_test.cc"
+                       "${AOM_ROOT}/test/tpl_model_test.cc")
+    endif()
+  endif()
+
+  if(CONFIG_FPMT_TEST AND (NOT CONFIG_REALTIME_ONLY))
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/frame_parallel_enc_test.cc")
+  endif()
+
+  if(HAVE_SSE2)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/simd_sse2_test.cc")
+  endif()
+
+  if(HAVE_SSSE3)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/simd_ssse3_test.cc")
+  endif()
+
+  if(HAVE_SSE4_1)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/simd_sse4_test.cc")
+  endif()
+
+  if(HAVE_SSE4_1 OR HAVE_NEON)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/filterintra_test.cc")
+
+    list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
+                "${AOM_ROOT}/test/av1_highbd_iht_test.cc")
+  endif()
+
+  if(HAVE_AVX2)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/simd_avx2_test.cc")
+  endif()
+
+  if(CONFIG_AV1_TEMPORAL_DENOISING AND (HAVE_SSE2 OR HAVE_NEON))
+    list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
+                "${AOM_ROOT}/test/av1_temporal_denoiser_test.cc")
+  endif()
+
+  if(CONFIG_AV1_HIGHBITDEPTH)
+    list(APPEND AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1
+                "${AOM_ROOT}/test/av1_quantize_test.cc")
+  endif()
+
+  if(HAVE_SSE2 OR HAVE_NEON)
+    list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
+                "${AOM_ROOT}/test/quantize_func_test.cc")
+  endif()
+
+  if(HAVE_SSE4_1)
+    list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
+                "${AOM_ROOT}/test/av1_convolve_scale_test.cc"
+                "${AOM_ROOT}/test/av1_horz_only_frame_superres_test.cc"
+                "${AOM_ROOT}/test/intra_edge_test.cc")
+  endif()
+
+  if(HAVE_NEON)
+    list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
+                "${AOM_ROOT}/test/av1_convolve_scale_test.cc"
+                "${AOM_ROOT}/test/av1_horz_only_frame_superres_test.cc"
+                "${AOM_ROOT}/test/intra_edge_test.cc")
+  endif()
+
+  if(HAVE_SSE4_2 OR HAVE_ARM_CRC32)
+    list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES "${AOM_ROOT}/test/hash_test.cc")
+  endif()
+
+  if(CONFIG_REALTIME_ONLY)
+    list(REMOVE_ITEM AOM_UNIT_TEST_ENCODER_SOURCES
+                     "${AOM_ROOT}/test/disflow_test.cc"
+                     "${AOM_ROOT}/test/end_to_end_qmpsnr_test.cc"
+                     "${AOM_ROOT}/test/end_to_end_ssim_test.cc"
+                     "${AOM_ROOT}/test/firstpass_test.cc"
+                     "${AOM_ROOT}/test/motion_vector_test.cc"
+                     "${AOM_ROOT}/test/obmc_sad_test.cc"
+                     "${AOM_ROOT}/test/obmc_variance_test.cc"
+                     "${AOM_ROOT}/test/pickrst_test.cc"
+                     "${AOM_ROOT}/test/warp_filter_test.cc"
+                     "${AOM_ROOT}/test/warp_filter_test_util.cc"
+                     "${AOM_ROOT}/test/warp_filter_test_util.h"
+                     "${AOM_ROOT}/test/wiener_test.cc")
+  endif()
+endif()
+
+if(CONFIG_AV1_ENCODER AND ENABLE_TESTS)
+  list(APPEND AOM_RC_TEST_SOURCES "${AOM_ROOT}/test/codec_factory.h"
+              "${AOM_ROOT}/test/decode_test_driver.cc"
+              "${AOM_ROOT}/test/decode_test_driver.h"
+              "${AOM_ROOT}/test/encode_test_driver.cc"
+              "${AOM_ROOT}/test/encode_test_driver.h"
+              "${AOM_ROOT}/test/i420_video_source.h"
+              "${AOM_ROOT}/test/ratectrl_rtc_test.cc"
+              "${AOM_ROOT}/test/test_aom_rc.cc" "${AOM_ROOT}/test/util.h")
+  if(CONFIG_THREE_PASS)
+    # Add the dependencies of "${AOM_ROOT}/common/ivfdec.c".
+    list(APPEND AOM_RC_TEST_SOURCES "${AOM_ROOT}/common/tools_common.c"
+                "${AOM_ROOT}/common/tools_common.h"
+                "${AOM_GEN_SRC_DIR}/usage_exit.c")
+  endif()
+endif()
+
+if(ENABLE_TESTS)
+  if(BUILD_SHARED_LIBS AND APPLE) # Silence an RPATH warning.
+    set(CMAKE_MACOSX_RPATH 1)
+  endif()
+
+  add_library(
+    aom_gtest STATIC
+    "${AOM_ROOT}/third_party/googletest/src/googletest/src/gtest-all.cc")
+  set_property(TARGET aom_gtest PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER})
+  target_include_directories(
+    aom_gtest
+    PUBLIC "${AOM_ROOT}/third_party/googletest/src/googletest/include"
+    PRIVATE "${AOM_ROOT}/third_party/googletest/src/googletest")
+
+  # The definition of GTEST_HAS_PTHREAD must be public, since it's checked by
+  # interface headers, not just by the implementation.
+  if(NOT (MSVC OR WIN32))
+    if(CONFIG_MULTITHREAD AND CMAKE_USE_PTHREADS_INIT)
+      target_compile_definitions(aom_gtest PUBLIC GTEST_HAS_PTHREAD=1)
+    else()
+      target_compile_definitions(aom_gtest PUBLIC GTEST_HAS_PTHREAD=0)
+    endif()
+  endif()
+
+  add_library(
+    aom_gmock STATIC
+    "${AOM_ROOT}/third_party/googletest/src/googlemock/src/gmock-all.cc")
+  set_property(TARGET aom_gmock PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER})
+  target_include_directories(
+    aom_gmock
+    PUBLIC "${AOM_ROOT}/third_party/googletest/src/googlemock/include"
+    PRIVATE "${AOM_ROOT}/third_party/googletest/src/googlemock")
+  target_link_libraries(aom_gmock ${AOM_LIB_LINK_TYPE} aom_gtest)
+endif()
+
+# Setup testdata download targets, test build targets, and test run targets. The
+# libaom and app util targets must exist before this function is called.
+function(setup_aom_test_targets)
+
+  # TODO(tomfinegan): Build speed optimization. $AOM_UNIT_TEST_COMMON_SOURCES
+  # and $AOM_UNIT_TEST_ENCODER_SOURCES are very large. The build of test targets
+  # could be sped up (on multicore build machines) by compiling sources in each
+  # list into separate object library targets, and then linking them into
+  # test_libaom.
+  add_library(test_aom_common OBJECT ${AOM_UNIT_TEST_COMMON_SOURCES})
+  set_property(TARGET test_aom_common PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER})
+  add_dependencies(test_aom_common aom)
+  target_link_libraries(test_aom_common ${AOM_LIB_LINK_TYPE} aom_gtest)
+
+  if(CONFIG_AV1_DECODER)
+    add_library(test_aom_decoder OBJECT ${AOM_UNIT_TEST_DECODER_SOURCES})
+    set_property(TARGET test_aom_decoder PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER})
+    add_dependencies(test_aom_decoder aom)
+    target_link_libraries(test_aom_decoder ${AOM_LIB_LINK_TYPE} aom_gtest)
+  endif()
+
+  if(CONFIG_AV1_ENCODER)
+    add_library(test_aom_encoder OBJECT ${AOM_UNIT_TEST_ENCODER_SOURCES})
+    set_property(TARGET test_aom_encoder PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER})
+    add_dependencies(test_aom_encoder aom)
+    target_link_libraries(test_aom_encoder ${AOM_LIB_LINK_TYPE} aom_gtest)
+  endif()
+
+  add_executable(test_libaom ${AOM_UNIT_TEST_WRAPPER_SOURCES}
+                             $<TARGET_OBJECTS:aom_common_app_util>
+                             $<TARGET_OBJECTS:test_aom_common>)
+  set_property(TARGET test_libaom PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER})
+  list(APPEND AOM_APP_TARGETS test_libaom)
+
+  if(CONFIG_AV1_DECODER)
+    target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:aom_decoder_app_util>
+                   $<TARGET_OBJECTS:test_aom_decoder>)
+
+    if(ENABLE_DECODE_PERF_TESTS AND CONFIG_WEBM_IO)
+      target_sources(test_libaom PRIVATE ${AOM_DECODE_PERF_TEST_SOURCES})
+    endif()
+  endif()
+
+  if(CONFIG_AV1_ENCODER)
+    target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:test_aom_encoder>
+                   $<TARGET_OBJECTS:aom_encoder_app_util>)
+
+    if(ENABLE_ENCODE_PERF_TESTS)
+      target_sources(test_libaom PRIVATE ${AOM_ENCODE_PERF_TEST_SOURCES})
+    endif()
+
+    if(NOT BUILD_SHARED_LIBS)
+      add_executable(test_intra_pred_speed
+                     ${AOM_TEST_INTRA_PRED_SPEED_SOURCES}
+                     $<TARGET_OBJECTS:aom_common_app_util>)
+      set_property(TARGET test_intra_pred_speed
+                   PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER})
+      target_link_libraries(test_intra_pred_speed ${AOM_LIB_LINK_TYPE} aom
+                            aom_gtest)
+      list(APPEND AOM_APP_TARGETS test_intra_pred_speed)
+    endif()
+  endif()
+
+  target_link_libraries(test_libaom ${AOM_LIB_LINK_TYPE} aom aom_gtest)
+
+  if(CONFIG_WEBM_IO)
+    target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:webm>)
+  endif()
+  if(HAVE_SSE2)
+    add_intrinsics_source_to_target("-msse2" "test_libaom"
+                                    "AOM_UNIT_TEST_COMMON_INTRIN_SSE2")
+  endif()
+  if(HAVE_SSSE3)
+    add_intrinsics_source_to_target("-mssse3" "test_libaom"
+                                    "AOM_UNIT_TEST_COMMON_INTRIN_SSSE3")
+  endif()
+  if(HAVE_SSE4_1)
+    add_intrinsics_source_to_target("-msse4.1" "test_libaom"
+                                    "AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1")
+    if(CONFIG_AV1_ENCODER)
+      if(AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1)
+        add_intrinsics_source_to_target("-msse4.1" "test_libaom"
+                                        "AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1")
+      endif()
+    endif()
+  endif()
+  if(HAVE_AVX2)
+    add_intrinsics_source_to_target("-mavx2" "test_libaom"
+                                    "AOM_UNIT_TEST_COMMON_INTRIN_AVX2")
+  endif()
+  if(HAVE_NEON)
+    add_intrinsics_source_to_target("${AOM_NEON_INTRIN_FLAG}" "test_libaom"
+                                    "AOM_UNIT_TEST_COMMON_INTRIN_NEON")
+  endif()
+
+  if(ENABLE_TESTDATA)
+    make_test_data_lists("${AOM_UNIT_TEST_DATA_LIST_FILE}" test_files
+                         test_file_checksums)
+    list(LENGTH test_files num_test_files)
+    list(LENGTH test_file_checksums num_test_file_checksums)
+
+    math(EXPR max_file_index "${num_test_files} - 1")
+    foreach(test_index RANGE ${max_file_index})
+      list(GET test_files ${test_index} test_file)
+      list(GET test_file_checksums ${test_index} test_file_checksum)
+      add_custom_target(
+        testdata_${test_index}
+        COMMAND ${CMAKE_COMMAND}
+                -DAOM_CONFIG_DIR="${AOM_CONFIG_DIR}" -DAOM_ROOT="${AOM_ROOT}"
+                -DAOM_TEST_FILE="${test_file}"
+                -DAOM_TEST_CHECKSUM=${test_file_checksum} -P
+                "${AOM_ROOT}/test/test_data_download_worker.cmake")
+      set_property(TARGET testdata_${test_index}
+                   PROPERTY FOLDER ${AOM_IDE_TESTDATA_FOLDER})
+      list(APPEND testdata_targets testdata_${test_index})
+    endforeach()
+
+    # Create a custom build target for running each test data download target.
+    add_custom_target(testdata)
+    add_dependencies(testdata ${testdata_targets})
+    set_property(TARGET testdata PROPERTY FOLDER ${AOM_IDE_TESTDATA_FOLDER})
+
+    # Skip creation of test run targets when generating for Visual Studio and
+    # Xcode unless the user explicitly requests IDE test hosting. This is done
+    # to make build cycles in the IDE tolerable when the IDE command for build
+    # project is used to build AOM. Default behavior in IDEs is to build all
+    # targets, and the test run takes hours.
+    if(((NOT MSVC) AND (NOT XCODE)) OR ENABLE_IDE_TEST_HOSTING)
+
+      # Pick a reasonable number of targets (this controls parallelization).
+      processorcount(num_test_targets)
+      if(num_test_targets EQUAL 0) # Just default to 10 targets when there's no
+                                   # processor count available.
+        set(num_test_targets 10)
+      endif()
+
+      math(EXPR max_shard_index "${num_test_targets} - 1")
+      foreach(shard_index RANGE ${max_shard_index})
+        set(test_name "test_${shard_index}")
+        add_custom_target(${test_name}
+                          COMMAND ${CMAKE_COMMAND}
+                                  -DGTEST_SHARD_INDEX=${shard_index}
+                                  -DGTEST_TOTAL_SHARDS=${num_test_targets}
+                                  -DTEST_LIBAOM=$<TARGET_FILE:test_libaom> -P
+                                  "${AOM_ROOT}/test/test_runner.cmake"
+                          DEPENDS testdata test_libaom)
+        set_property(TARGET ${test_name} PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER})
+        list(APPEND test_targets ${test_name})
+      endforeach()
+      add_custom_target(runtests)
+      set_property(TARGET runtests PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER})
+      add_dependencies(runtests ${test_targets})
+    endif()
+  endif()
+
+  # Libaom_test_srcs.txt generation.
+  set(libaom_test_srcs_txt_file "${AOM_CONFIG_DIR}/libaom_test_srcs.txt")
+  file(WRITE "${libaom_test_srcs_txt_file}"
+       "# This file is generated. DO NOT EDIT.\n")
+
+  # Static source file list first.
+  list(SORT AOM_TEST_SOURCE_VARS)
+  foreach(aom_test_source_var ${AOM_TEST_SOURCE_VARS})
+    if("${aom_test_source_var}" STREQUAL "${last_aom_test_source_var}")
+      message(
+        FATAL_ERROR
+          "Duplicate AOM_TEST_SOURCE_VARS entry: ${aom_test_source_var}")
+    endif()
+    foreach(file ${${aom_test_source_var}})
+      if(NOT "${file}" MATCHES "${AOM_CONFIG_DIR}")
+        string(REPLACE "${AOM_ROOT}/" "" file "${file}")
+        file(APPEND "${libaom_test_srcs_txt_file}" "${file}\n")
+      endif()
+    endforeach()
+    set(last_aom_test_source_var ${aom_test_source_var})
+  endforeach()
+
+  # libaom_test_srcs.gni generation
+  set(libaom_test_srcs_gni_file "${AOM_CONFIG_DIR}/libaom_test_srcs.gni")
+  file(WRITE "${libaom_test_srcs_gni_file}"
+       "# This file is generated. DO NOT EDIT.\n")
+
+  foreach(aom_test_source_var ${AOM_TEST_SOURCE_VARS})
+    string(TOLOWER "${aom_test_source_var}" aom_test_source_var_lowercase)
+    file(APPEND "${libaom_test_srcs_gni_file}"
+         "\n${aom_test_source_var_lowercase} = [\n")
+
+    foreach(file ${${aom_test_source_var}})
+      if(NOT "${file}" MATCHES "${AOM_CONFIG_DIR}")
+        string(REPLACE "${AOM_ROOT}/" "//third_party/libaom/source/libaom/" file
+                       "${file}")
+        file(APPEND "${libaom_test_srcs_gni_file}" "  \"${file}\",\n")
+      endif()
+    endforeach()
+
+    file(APPEND "${libaom_test_srcs_gni_file}" "]\n")
+  endforeach()
+
+  # Set up test for rc interface
+  if(CONFIG_AV1_ENCODER
+     AND ENABLE_TESTS
+     AND CONFIG_WEBM_IO
+     AND NOT BUILD_SHARED_LIBS
+     AND NOT CONFIG_REALTIME_ONLY)
+    add_executable(test_aom_rc ${AOM_RC_TEST_SOURCES})
+    target_link_libraries(test_aom_rc ${AOM_LIB_LINK_TYPE} aom aom_av1_rc
+                          aom_gtest aom_gmock webm)
+    set_property(TARGET test_aom_rc PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER})
+    list(APPEND AOM_APP_TARGETS test_aom_rc)
+  endif()
+
+  set(AOM_APP_TARGETS ${AOM_APP_TARGETS} PARENT_SCOPE)
+endfunction()
diff --git a/third_party/aom/test/test_aom_rc.cc b/third_party/aom/test/test_aom_rc.cc
new file mode 100644
index 0000000000..0182b62ec8
--- /dev/null
+++ b/third_party/aom/test/test_aom_rc.cc
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/third_party/aom/test/test_data_download_worker.cmake b/third_party/aom/test/test_data_download_worker.cmake
new file mode 100644
index 0000000000..a49038888d
--- /dev/null
+++ b/third_party/aom/test/test_data_download_worker.cmake
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2017, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and the
+# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
+# not distributed with this source code in the LICENSE file, you can obtain it
+# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
+# License 1.0 was not distributed with this source code in the PATENTS file, you
+# can obtain it at www.aomedia.org/license/patent.
+#
+include("${AOM_ROOT}/test/test_data_util.cmake")
+
+# https://github.com/cheshirekow/cmake_format/issues/34
+# cmake-format: off
+if (NOT AOM_ROOT OR NOT AOM_CONFIG_DIR OR NOT AOM_TEST_FILE
+    OR NOT AOM_TEST_CHECKSUM)
+  message(FATAL_ERROR
+          "AOM_ROOT, AOM_CONFIG_DIR, AOM_TEST_FILE and AOM_TEST_CHECKSUM must be
+          defined.")
+endif ()
+# cmake-format: on
+
+set(AOM_TEST_DATA_URL "https://storage.googleapis.com/aom-test-data")
+
+if(NOT AOM_TEST_DATA_PATH)
+  set(AOM_TEST_DATA_PATH "$ENV{LIBAOM_TEST_DATA_PATH}")
+endif()
+
+if("${AOM_TEST_DATA_PATH}" STREQUAL "")
+  message(
+    WARNING "Writing test data to ${AOM_CONFIG_DIR}, set "
+            "$LIBAOM_TEST_DATA_PATH in your environment to avoid this warning.")
+  set(AOM_TEST_DATA_PATH "${AOM_CONFIG_DIR}")
+endif()
+
+if(NOT EXISTS "${AOM_TEST_DATA_PATH}")
+  file(MAKE_DIRECTORY "${AOM_TEST_DATA_PATH}")
+endif()
+
+expand_test_file_paths("AOM_TEST_FILE" "${AOM_TEST_DATA_PATH}" "filepath")
+expand_test_file_paths("AOM_TEST_FILE" "${AOM_TEST_DATA_URL}" "url")
+
+check_file("${filepath}" "${AOM_TEST_CHECKSUM}" "needs_download")
+if(needs_download)
+  download_test_file("${url}" "${AOM_TEST_CHECKSUM}" "${filepath}")
+endif()
diff --git a/third_party/aom/test/test_data_util.cmake b/third_party/aom/test/test_data_util.cmake
new file mode 100644
index 0000000000..069e1ad526
--- /dev/null
+++ b/third_party/aom/test/test_data_util.cmake
@@ -0,0 +1,665 @@
+#
+# Copyright (c) 2017, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and the
+# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
+# not distributed with this source code in the LICENSE file, you can obtain it
+# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
+# License 1.0 was not distributed with this source code in the PATENTS file, you
+# can obtain it at www.aomedia.org/license/patent.
+#
+
+list(APPEND AOM_TEST_DATA_FILE_NAMES
+            "desktopqvga2.320_240.yuv"
+            "desktop1.320_180.yuv"
+            "hantro_collage_w176h144.yuv"
+            "hantro_collage_w352h288.yuv"
+            "hantro_collage_w352h288_nv12.yuv"
+            "hantro_odd.yuv"
+            "paris_352_288_30.y4m"
+            "park_joy_90p_10_420.y4m"
+            "park_joy_90p_10_422.y4m"
+            "park_joy_90p_10_444.y4m"
+            "park_joy_90p_12_420.y4m"
+            "park_joy_90p_12_422.y4m"
+            "park_joy_90p_12_444.y4m"
+            "park_joy_90p_8_420_a10-1.y4m"
+            "park_joy_90p_8_420.y4m"
+            "park_joy_90p_8_420_monochrome.y4m"
+            "park_joy_90p_8_420_vertical_csp.y4m"
+            "park_joy_90p_8_422.y4m"
+            "park_joy_90p_8_444.y4m"
+            "pixel_capture_w320h240.yuv"
+            "desktop_credits.y4m"
+            "rand_noise_w1280h720.yuv"
+            "niklas_1280_720_30.y4m"
+            "rush_hour_444.y4m"
+            "screendata.y4m"
+            "niklas_640_480_30.yuv"
+            "vase10x10.yuv"
+            "vase10x10_tiles.txt"
+            "bus_352x288_420_f20_b8.yuv"
+            "test_input_w1h1.yuv"
+            "crowd_run_360p_10_150f.y4m"
+            "wikipedia_420_360p_60f.y4m")
+
+if(ENABLE_DECODE_PERF_TESTS AND CONFIG_AV1_ENCODER)
+  list(APPEND AOM_TEST_DATA_FILE_NAMES "niklas_1280_720_30.yuv")
+endif()
+
+if(CONFIG_AV1_DECODER)
+  list(APPEND AOM_TEST_DATA_FILE_NAMES
+              "av1-1-b8-00-quantizer-00.ivf"
+              "av1-1-b8-00-quantizer-00.ivf.md5"
+              "av1-1-b8-00-quantizer-01.ivf"
+              "av1-1-b8-00-quantizer-01.ivf.md5"
+              "av1-1-b8-00-quantizer-02.ivf"
+              "av1-1-b8-00-quantizer-02.ivf.md5"
+              "av1-1-b8-00-quantizer-03.ivf"
+              "av1-1-b8-00-quantizer-03.ivf.md5"
+              "av1-1-b8-00-quantizer-04.ivf"
+              "av1-1-b8-00-quantizer-04.ivf.md5"
+              "av1-1-b8-00-quantizer-05.ivf"
+              "av1-1-b8-00-quantizer-05.ivf.md5"
+              "av1-1-b8-00-quantizer-06.ivf"
+              "av1-1-b8-00-quantizer-06.ivf.md5"
+              "av1-1-b8-00-quantizer-07.ivf"
+              "av1-1-b8-00-quantizer-07.ivf.md5"
+              "av1-1-b8-00-quantizer-08.ivf"
+              "av1-1-b8-00-quantizer-08.ivf.md5"
+              "av1-1-b8-00-quantizer-09.ivf"
+              "av1-1-b8-00-quantizer-09.ivf.md5"
+              "av1-1-b8-00-quantizer-10.ivf"
+              "av1-1-b8-00-quantizer-10.ivf.md5"
+              "av1-1-b8-00-quantizer-11.ivf"
+              "av1-1-b8-00-quantizer-11.ivf.md5"
+              "av1-1-b8-00-quantizer-12.ivf"
+              "av1-1-b8-00-quantizer-12.ivf.md5"
+              "av1-1-b8-00-quantizer-13.ivf"
+              "av1-1-b8-00-quantizer-13.ivf.md5"
+              "av1-1-b8-00-quantizer-14.ivf"
+              "av1-1-b8-00-quantizer-14.ivf.md5"
+              "av1-1-b8-00-quantizer-15.ivf"
+              "av1-1-b8-00-quantizer-15.ivf.md5"
+              "av1-1-b8-00-quantizer-16.ivf"
+              "av1-1-b8-00-quantizer-16.ivf.md5"
+              "av1-1-b8-00-quantizer-17.ivf"
+              "av1-1-b8-00-quantizer-17.ivf.md5"
+              "av1-1-b8-00-quantizer-18.ivf"
+              "av1-1-b8-00-quantizer-18.ivf.md5"
+              "av1-1-b8-00-quantizer-19.ivf"
+              "av1-1-b8-00-quantizer-19.ivf.md5"
+              "av1-1-b8-00-quantizer-20.ivf"
+              "av1-1-b8-00-quantizer-20.ivf.md5"
+              "av1-1-b8-00-quantizer-21.ivf"
+              "av1-1-b8-00-quantizer-21.ivf.md5"
+              "av1-1-b8-00-quantizer-22.ivf"
+              "av1-1-b8-00-quantizer-22.ivf.md5"
+              "av1-1-b8-00-quantizer-23.ivf"
+              "av1-1-b8-00-quantizer-23.ivf.md5"
+              "av1-1-b8-00-quantizer-24.ivf"
+              "av1-1-b8-00-quantizer-24.ivf.md5"
+              "av1-1-b8-00-quantizer-25.ivf"
+              "av1-1-b8-00-quantizer-25.ivf.md5"
+              "av1-1-b8-00-quantizer-26.ivf"
+              "av1-1-b8-00-quantizer-26.ivf.md5"
+              "av1-1-b8-00-quantizer-27.ivf"
+              "av1-1-b8-00-quantizer-27.ivf.md5"
+              "av1-1-b8-00-quantizer-28.ivf"
+              "av1-1-b8-00-quantizer-28.ivf.md5"
+              "av1-1-b8-00-quantizer-29.ivf"
+              "av1-1-b8-00-quantizer-29.ivf.md5"
+              "av1-1-b8-00-quantizer-30.ivf"
+              "av1-1-b8-00-quantizer-30.ivf.md5"
+              "av1-1-b8-00-quantizer-31.ivf"
+              "av1-1-b8-00-quantizer-31.ivf.md5"
+              "av1-1-b8-00-quantizer-32.ivf"
+              "av1-1-b8-00-quantizer-32.ivf.md5"
+              "av1-1-b8-00-quantizer-33.ivf"
+              "av1-1-b8-00-quantizer-33.ivf.md5"
+              "av1-1-b8-00-quantizer-34.ivf"
+              "av1-1-b8-00-quantizer-34.ivf.md5"
+              "av1-1-b8-00-quantizer-35.ivf"
+              "av1-1-b8-00-quantizer-35.ivf.md5"
+              "av1-1-b8-00-quantizer-36.ivf"
+              "av1-1-b8-00-quantizer-36.ivf.md5"
+              "av1-1-b8-00-quantizer-37.ivf"
+              "av1-1-b8-00-quantizer-37.ivf.md5"
+              "av1-1-b8-00-quantizer-38.ivf"
+              "av1-1-b8-00-quantizer-38.ivf.md5"
+              "av1-1-b8-00-quantizer-39.ivf"
+              "av1-1-b8-00-quantizer-39.ivf.md5"
+              "av1-1-b8-00-quantizer-40.ivf"
+              "av1-1-b8-00-quantizer-40.ivf.md5"
+              "av1-1-b8-00-quantizer-41.ivf"
+              "av1-1-b8-00-quantizer-41.ivf.md5"
+              "av1-1-b8-00-quantizer-42.ivf"
+              "av1-1-b8-00-quantizer-42.ivf.md5"
+              "av1-1-b8-00-quantizer-43.ivf"
+              "av1-1-b8-00-quantizer-43.ivf.md5"
+              "av1-1-b8-00-quantizer-44.ivf"
+              "av1-1-b8-00-quantizer-44.ivf.md5"
+              "av1-1-b8-00-quantizer-45.ivf"
+              "av1-1-b8-00-quantizer-45.ivf.md5"
+              "av1-1-b8-00-quantizer-46.ivf"
+              "av1-1-b8-00-quantizer-46.ivf.md5"
+              "av1-1-b8-00-quantizer-47.ivf"
+              "av1-1-b8-00-quantizer-47.ivf.md5"
+              "av1-1-b8-00-quantizer-48.ivf"
+              "av1-1-b8-00-quantizer-48.ivf.md5"
+              "av1-1-b8-00-quantizer-49.ivf"
+              "av1-1-b8-00-quantizer-49.ivf.md5"
+              "av1-1-b8-00-quantizer-50.ivf"
+              "av1-1-b8-00-quantizer-50.ivf.md5"
+              "av1-1-b8-00-quantizer-51.ivf"
+              "av1-1-b8-00-quantizer-51.ivf.md5"
+              "av1-1-b8-00-quantizer-52.ivf"
+              "av1-1-b8-00-quantizer-52.ivf.md5"
+              "av1-1-b8-00-quantizer-53.ivf"
+              "av1-1-b8-00-quantizer-53.ivf.md5"
+              "av1-1-b8-00-quantizer-54.ivf"
+              "av1-1-b8-00-quantizer-54.ivf.md5"
+              "av1-1-b8-00-quantizer-55.ivf"
+              "av1-1-b8-00-quantizer-55.ivf.md5"
+              "av1-1-b8-00-quantizer-56.ivf"
+              "av1-1-b8-00-quantizer-56.ivf.md5"
+              "av1-1-b8-00-quantizer-57.ivf"
+              "av1-1-b8-00-quantizer-57.ivf.md5"
+              "av1-1-b8-00-quantizer-58.ivf"
+              "av1-1-b8-00-quantizer-58.ivf.md5"
+              "av1-1-b8-00-quantizer-59.ivf"
+              "av1-1-b8-00-quantizer-59.ivf.md5"
+              "av1-1-b8-00-quantizer-60.ivf"
+              "av1-1-b8-00-quantizer-60.ivf.md5"
+              "av1-1-b8-00-quantizer-61.ivf"
+              "av1-1-b8-00-quantizer-61.ivf.md5"
+              "av1-1-b8-00-quantizer-62.ivf"
+              "av1-1-b8-00-quantizer-62.ivf.md5"
+              "av1-1-b8-00-quantizer-63.ivf"
+              "av1-1-b8-00-quantizer-63.ivf.md5"
+              "av1-1-b10-00-quantizer-00.ivf"
+              "av1-1-b10-00-quantizer-00.ivf.md5"
+              "av1-1-b10-00-quantizer-01.ivf"
+              "av1-1-b10-00-quantizer-01.ivf.md5"
+              "av1-1-b10-00-quantizer-02.ivf"
+              "av1-1-b10-00-quantizer-02.ivf.md5"
+              "av1-1-b10-00-quantizer-03.ivf"
+              "av1-1-b10-00-quantizer-03.ivf.md5"
+              "av1-1-b10-00-quantizer-04.ivf"
+              "av1-1-b10-00-quantizer-04.ivf.md5"
+              "av1-1-b10-00-quantizer-05.ivf"
+              "av1-1-b10-00-quantizer-05.ivf.md5"
+              "av1-1-b10-00-quantizer-06.ivf"
+              "av1-1-b10-00-quantizer-06.ivf.md5"
+              "av1-1-b10-00-quantizer-07.ivf"
+              "av1-1-b10-00-quantizer-07.ivf.md5"
+              "av1-1-b10-00-quantizer-08.ivf"
+              "av1-1-b10-00-quantizer-08.ivf.md5"
+              "av1-1-b10-00-quantizer-09.ivf"
+              "av1-1-b10-00-quantizer-09.ivf.md5"
+              "av1-1-b10-00-quantizer-10.ivf"
+              "av1-1-b10-00-quantizer-10.ivf.md5"
+              "av1-1-b10-00-quantizer-11.ivf"
+              "av1-1-b10-00-quantizer-11.ivf.md5"
+              "av1-1-b10-00-quantizer-12.ivf"
+              "av1-1-b10-00-quantizer-12.ivf.md5"
+              "av1-1-b10-00-quantizer-13.ivf"
+              "av1-1-b10-00-quantizer-13.ivf.md5"
+              "av1-1-b10-00-quantizer-14.ivf"
+              "av1-1-b10-00-quantizer-14.ivf.md5"
+              "av1-1-b10-00-quantizer-15.ivf"
+              "av1-1-b10-00-quantizer-15.ivf.md5"
+              "av1-1-b10-00-quantizer-16.ivf"
+              "av1-1-b10-00-quantizer-16.ivf.md5"
+              "av1-1-b10-00-quantizer-17.ivf"
+              "av1-1-b10-00-quantizer-17.ivf.md5"
+              "av1-1-b10-00-quantizer-18.ivf"
+              "av1-1-b10-00-quantizer-18.ivf.md5"
+              "av1-1-b10-00-quantizer-19.ivf"
+              "av1-1-b10-00-quantizer-19.ivf.md5"
+              "av1-1-b10-00-quantizer-20.ivf"
+              "av1-1-b10-00-quantizer-20.ivf.md5"
+              "av1-1-b10-00-quantizer-21.ivf"
+              "av1-1-b10-00-quantizer-21.ivf.md5"
+              "av1-1-b10-00-quantizer-22.ivf"
+              "av1-1-b10-00-quantizer-22.ivf.md5"
+              "av1-1-b10-00-quantizer-23.ivf"
+              "av1-1-b10-00-quantizer-23.ivf.md5"
+              "av1-1-b10-00-quantizer-24.ivf"
+              "av1-1-b10-00-quantizer-24.ivf.md5"
+              "av1-1-b10-00-quantizer-25.ivf"
+              "av1-1-b10-00-quantizer-25.ivf.md5"
+              "av1-1-b10-00-quantizer-26.ivf"
+              "av1-1-b10-00-quantizer-26.ivf.md5"
+              "av1-1-b10-00-quantizer-27.ivf"
+              "av1-1-b10-00-quantizer-27.ivf.md5"
+              "av1-1-b10-00-quantizer-28.ivf"
+              "av1-1-b10-00-quantizer-28.ivf.md5"
+              "av1-1-b10-00-quantizer-29.ivf"
+              "av1-1-b10-00-quantizer-29.ivf.md5"
+              "av1-1-b10-00-quantizer-30.ivf"
+              "av1-1-b10-00-quantizer-30.ivf.md5"
+              "av1-1-b10-00-quantizer-31.ivf"
+              "av1-1-b10-00-quantizer-31.ivf.md5"
+              "av1-1-b10-00-quantizer-32.ivf"
+              "av1-1-b10-00-quantizer-32.ivf.md5"
+              "av1-1-b10-00-quantizer-33.ivf"
+              "av1-1-b10-00-quantizer-33.ivf.md5"
+              "av1-1-b10-00-quantizer-34.ivf"
+              "av1-1-b10-00-quantizer-34.ivf.md5"
+              "av1-1-b10-00-quantizer-35.ivf"
+              "av1-1-b10-00-quantizer-35.ivf.md5"
+              "av1-1-b10-00-quantizer-36.ivf"
+              "av1-1-b10-00-quantizer-36.ivf.md5"
+              "av1-1-b10-00-quantizer-37.ivf"
+              "av1-1-b10-00-quantizer-37.ivf.md5"
+              "av1-1-b10-00-quantizer-38.ivf"
+              "av1-1-b10-00-quantizer-38.ivf.md5"
+              "av1-1-b10-00-quantizer-39.ivf"
+              "av1-1-b10-00-quantizer-39.ivf.md5"
+              "av1-1-b10-00-quantizer-40.ivf"
+              "av1-1-b10-00-quantizer-40.ivf.md5"
+              "av1-1-b10-00-quantizer-41.ivf"
+              "av1-1-b10-00-quantizer-41.ivf.md5"
+              "av1-1-b10-00-quantizer-42.ivf"
+              "av1-1-b10-00-quantizer-42.ivf.md5"
+              "av1-1-b10-00-quantizer-43.ivf"
+              "av1-1-b10-00-quantizer-43.ivf.md5"
+              "av1-1-b10-00-quantizer-44.ivf"
+              "av1-1-b10-00-quantizer-44.ivf.md5"
+              "av1-1-b10-00-quantizer-45.ivf"
+              "av1-1-b10-00-quantizer-45.ivf.md5"
+              "av1-1-b10-00-quantizer-46.ivf"
+              "av1-1-b10-00-quantizer-46.ivf.md5"
+              "av1-1-b10-00-quantizer-47.ivf"
+              "av1-1-b10-00-quantizer-47.ivf.md5"
+              "av1-1-b10-00-quantizer-48.ivf"
+              "av1-1-b10-00-quantizer-48.ivf.md5"
+              "av1-1-b10-00-quantizer-49.ivf"
+              "av1-1-b10-00-quantizer-49.ivf.md5"
+              "av1-1-b10-00-quantizer-50.ivf"
+              "av1-1-b10-00-quantizer-50.ivf.md5"
+              "av1-1-b10-00-quantizer-51.ivf"
+              "av1-1-b10-00-quantizer-51.ivf.md5"
+              "av1-1-b10-00-quantizer-52.ivf"
+              "av1-1-b10-00-quantizer-52.ivf.md5"
+              "av1-1-b10-00-quantizer-53.ivf"
+              "av1-1-b10-00-quantizer-53.ivf.md5"
+              "av1-1-b10-00-quantizer-54.ivf"
+              "av1-1-b10-00-quantizer-54.ivf.md5"
+              "av1-1-b10-00-quantizer-55.ivf"
+              "av1-1-b10-00-quantizer-55.ivf.md5"
+              "av1-1-b10-00-quantizer-56.ivf"
+              "av1-1-b10-00-quantizer-56.ivf.md5"
+              "av1-1-b10-00-quantizer-57.ivf"
+              "av1-1-b10-00-quantizer-57.ivf.md5"
+              "av1-1-b10-00-quantizer-58.ivf"
+              "av1-1-b10-00-quantizer-58.ivf.md5"
+              "av1-1-b10-00-quantizer-59.ivf"
+              "av1-1-b10-00-quantizer-59.ivf.md5"
+              "av1-1-b10-00-quantizer-60.ivf"
+              "av1-1-b10-00-quantizer-60.ivf.md5"
+              "av1-1-b10-00-quantizer-61.ivf"
+              "av1-1-b10-00-quantizer-61.ivf.md5"
+              "av1-1-b10-00-quantizer-62.ivf"
+              "av1-1-b10-00-quantizer-62.ivf.md5"
+              "av1-1-b10-00-quantizer-63.ivf"
+              "av1-1-b10-00-quantizer-63.ivf.md5"
+              "av1-1-b10-23-film_grain-50.ivf"
+              "av1-1-b10-23-film_grain-50.ivf.md5"
+              "av1-1-b10-24-monochrome.ivf"
+              "av1-1-b10-24-monochrome.ivf.md5"
+              "av1-1-b8-01-size-16x16.ivf"
+              "av1-1-b8-01-size-16x16.ivf.md5"
+              "av1-1-b8-01-size-16x18.ivf"
+              "av1-1-b8-01-size-16x18.ivf.md5"
+              "av1-1-b8-01-size-16x32.ivf"
+              "av1-1-b8-01-size-16x32.ivf.md5"
+              "av1-1-b8-01-size-16x34.ivf"
+              "av1-1-b8-01-size-16x34.ivf.md5"
+              "av1-1-b8-01-size-16x64.ivf"
+              "av1-1-b8-01-size-16x64.ivf.md5"
+              "av1-1-b8-01-size-16x66.ivf"
+              "av1-1-b8-01-size-16x66.ivf.md5"
+              "av1-1-b8-01-size-18x16.ivf"
+              "av1-1-b8-01-size-18x16.ivf.md5"
+              "av1-1-b8-01-size-18x18.ivf"
+              "av1-1-b8-01-size-18x18.ivf.md5"
+              "av1-1-b8-01-size-18x32.ivf"
+              "av1-1-b8-01-size-18x32.ivf.md5"
+              "av1-1-b8-01-size-18x34.ivf"
+              "av1-1-b8-01-size-18x34.ivf.md5"
+              "av1-1-b8-01-size-18x64.ivf"
+              "av1-1-b8-01-size-18x64.ivf.md5"
+              "av1-1-b8-01-size-18x66.ivf"
+              "av1-1-b8-01-size-18x66.ivf.md5"
+              "av1-1-b8-01-size-196x196.ivf"
+              "av1-1-b8-01-size-196x196.ivf.md5"
+              "av1-1-b8-01-size-196x198.ivf"
+              "av1-1-b8-01-size-196x198.ivf.md5"
+              "av1-1-b8-01-size-196x200.ivf"
+              "av1-1-b8-01-size-196x200.ivf.md5"
+              "av1-1-b8-01-size-196x202.ivf"
+              "av1-1-b8-01-size-196x202.ivf.md5"
+              "av1-1-b8-01-size-196x208.ivf"
+              "av1-1-b8-01-size-196x208.ivf.md5"
+              "av1-1-b8-01-size-196x210.ivf"
+              "av1-1-b8-01-size-196x210.ivf.md5"
+              "av1-1-b8-01-size-196x224.ivf"
+              "av1-1-b8-01-size-196x224.ivf.md5"
+              "av1-1-b8-01-size-196x226.ivf"
+              "av1-1-b8-01-size-196x226.ivf.md5"
+              "av1-1-b8-01-size-198x196.ivf"
+              "av1-1-b8-01-size-198x196.ivf.md5"
+              "av1-1-b8-01-size-198x198.ivf"
+              "av1-1-b8-01-size-198x198.ivf.md5"
+              "av1-1-b8-01-size-198x200.ivf"
+              "av1-1-b8-01-size-198x200.ivf.md5"
+              "av1-1-b8-01-size-198x202.ivf"
+              "av1-1-b8-01-size-198x202.ivf.md5"
+              "av1-1-b8-01-size-198x208.ivf"
+              "av1-1-b8-01-size-198x208.ivf.md5"
+              "av1-1-b8-01-size-198x210.ivf"
+              "av1-1-b8-01-size-198x210.ivf.md5"
+              "av1-1-b8-01-size-198x224.ivf"
+              "av1-1-b8-01-size-198x224.ivf.md5"
+              "av1-1-b8-01-size-198x226.ivf"
+              "av1-1-b8-01-size-198x226.ivf.md5"
+              "av1-1-b8-01-size-200x196.ivf"
+              "av1-1-b8-01-size-200x196.ivf.md5"
+              "av1-1-b8-01-size-200x198.ivf"
+              "av1-1-b8-01-size-200x198.ivf.md5"
+              "av1-1-b8-01-size-200x200.ivf"
+              "av1-1-b8-01-size-200x200.ivf.md5"
+              "av1-1-b8-01-size-200x202.ivf"
+              "av1-1-b8-01-size-200x202.ivf.md5"
+              "av1-1-b8-01-size-200x208.ivf"
+              "av1-1-b8-01-size-200x208.ivf.md5"
+              "av1-1-b8-01-size-200x210.ivf"
+              "av1-1-b8-01-size-200x210.ivf.md5"
+              "av1-1-b8-01-size-200x224.ivf"
+              "av1-1-b8-01-size-200x224.ivf.md5"
+              "av1-1-b8-01-size-200x226.ivf"
+              "av1-1-b8-01-size-200x226.ivf.md5"
+              "av1-1-b8-01-size-202x196.ivf"
+              "av1-1-b8-01-size-202x196.ivf.md5"
+              "av1-1-b8-01-size-202x198.ivf"
+              "av1-1-b8-01-size-202x198.ivf.md5"
+              "av1-1-b8-01-size-202x200.ivf"
+              "av1-1-b8-01-size-202x200.ivf.md5"
+              "av1-1-b8-01-size-202x202.ivf"
+              "av1-1-b8-01-size-202x202.ivf.md5"
+              "av1-1-b8-01-size-202x208.ivf"
+              "av1-1-b8-01-size-202x208.ivf.md5"
+              "av1-1-b8-01-size-202x210.ivf"
+              "av1-1-b8-01-size-202x210.ivf.md5"
+              "av1-1-b8-01-size-202x224.ivf"
+              "av1-1-b8-01-size-202x224.ivf.md5"
+              "av1-1-b8-01-size-202x226.ivf"
+              "av1-1-b8-01-size-202x226.ivf.md5"
+              "av1-1-b8-01-size-208x196.ivf"
+              "av1-1-b8-01-size-208x196.ivf.md5"
+              "av1-1-b8-01-size-208x198.ivf"
+              "av1-1-b8-01-size-208x198.ivf.md5"
+              "av1-1-b8-01-size-208x200.ivf"
+              "av1-1-b8-01-size-208x200.ivf.md5"
+              "av1-1-b8-01-size-208x202.ivf"
+              "av1-1-b8-01-size-208x202.ivf.md5"
+              "av1-1-b8-01-size-208x208.ivf"
+              "av1-1-b8-01-size-208x208.ivf.md5"
+              "av1-1-b8-01-size-208x210.ivf"
+              "av1-1-b8-01-size-208x210.ivf.md5"
+              "av1-1-b8-01-size-208x224.ivf"
+              "av1-1-b8-01-size-208x224.ivf.md5"
+              "av1-1-b8-01-size-208x226.ivf"
+              "av1-1-b8-01-size-208x226.ivf.md5"
+              "av1-1-b8-01-size-210x196.ivf"
+              "av1-1-b8-01-size-210x196.ivf.md5"
+              "av1-1-b8-01-size-210x198.ivf"
+              "av1-1-b8-01-size-210x198.ivf.md5"
+              "av1-1-b8-01-size-210x200.ivf"
+              "av1-1-b8-01-size-210x200.ivf.md5"
+              "av1-1-b8-01-size-210x202.ivf"
+              "av1-1-b8-01-size-210x202.ivf.md5"
+              "av1-1-b8-01-size-210x208.ivf"
+              "av1-1-b8-01-size-210x208.ivf.md5"
+              "av1-1-b8-01-size-210x210.ivf"
+              "av1-1-b8-01-size-210x210.ivf.md5"
+              "av1-1-b8-01-size-210x224.ivf"
+              "av1-1-b8-01-size-210x224.ivf.md5"
+              "av1-1-b8-01-size-210x226.ivf"
+              "av1-1-b8-01-size-210x226.ivf.md5"
+              "av1-1-b8-01-size-224x196.ivf"
+              "av1-1-b8-01-size-224x196.ivf.md5"
+              "av1-1-b8-01-size-224x198.ivf"
+              "av1-1-b8-01-size-224x198.ivf.md5"
+              "av1-1-b8-01-size-224x200.ivf"
+              "av1-1-b8-01-size-224x200.ivf.md5"
+              "av1-1-b8-01-size-224x202.ivf"
+              "av1-1-b8-01-size-224x202.ivf.md5"
+              "av1-1-b8-01-size-224x208.ivf"
+              "av1-1-b8-01-size-224x208.ivf.md5"
+              "av1-1-b8-01-size-224x210.ivf"
+              "av1-1-b8-01-size-224x210.ivf.md5"
+              "av1-1-b8-01-size-224x224.ivf"
+              "av1-1-b8-01-size-224x224.ivf.md5"
+              "av1-1-b8-01-size-224x226.ivf"
+              "av1-1-b8-01-size-224x226.ivf.md5"
+              "av1-1-b8-01-size-226x196.ivf"
+              "av1-1-b8-01-size-226x196.ivf.md5"
+              "av1-1-b8-01-size-226x198.ivf"
+              "av1-1-b8-01-size-226x198.ivf.md5"
+              "av1-1-b8-01-size-226x200.ivf"
+              "av1-1-b8-01-size-226x200.ivf.md5"
+              "av1-1-b8-01-size-226x202.ivf"
+              "av1-1-b8-01-size-226x202.ivf.md5"
+              "av1-1-b8-01-size-226x208.ivf"
+              "av1-1-b8-01-size-226x208.ivf.md5"
+              "av1-1-b8-01-size-226x210.ivf"
+              "av1-1-b8-01-size-226x210.ivf.md5"
+              "av1-1-b8-01-size-226x224.ivf"
+              "av1-1-b8-01-size-226x224.ivf.md5"
+              "av1-1-b8-01-size-226x226.ivf"
+              "av1-1-b8-01-size-226x226.ivf.md5"
+              "av1-1-b8-01-size-32x16.ivf"
+              "av1-1-b8-01-size-32x16.ivf.md5"
+              "av1-1-b8-01-size-32x18.ivf"
+              "av1-1-b8-01-size-32x18.ivf.md5"
+              "av1-1-b8-01-size-32x32.ivf"
+              "av1-1-b8-01-size-32x32.ivf.md5"
+              "av1-1-b8-01-size-32x34.ivf"
+              "av1-1-b8-01-size-32x34.ivf.md5"
+              "av1-1-b8-01-size-32x64.ivf"
+              "av1-1-b8-01-size-32x64.ivf.md5"
+              "av1-1-b8-01-size-32x66.ivf"
+              "av1-1-b8-01-size-32x66.ivf.md5"
+              "av1-1-b8-01-size-34x16.ivf"
+              "av1-1-b8-01-size-34x16.ivf.md5"
+              "av1-1-b8-01-size-34x18.ivf"
+              "av1-1-b8-01-size-34x18.ivf.md5"
+              "av1-1-b8-01-size-34x32.ivf"
+              "av1-1-b8-01-size-34x32.ivf.md5"
+              "av1-1-b8-01-size-34x34.ivf"
+              "av1-1-b8-01-size-34x34.ivf.md5"
+              "av1-1-b8-01-size-34x64.ivf"
+              "av1-1-b8-01-size-34x64.ivf.md5"
+              "av1-1-b8-01-size-34x66.ivf"
+              "av1-1-b8-01-size-34x66.ivf.md5"
+              "av1-1-b8-01-size-64x16.ivf"
+              "av1-1-b8-01-size-64x16.ivf.md5"
+              "av1-1-b8-01-size-64x18.ivf"
+              "av1-1-b8-01-size-64x18.ivf.md5"
+              "av1-1-b8-01-size-64x32.ivf"
+              "av1-1-b8-01-size-64x32.ivf.md5"
+              "av1-1-b8-01-size-64x34.ivf"
+              "av1-1-b8-01-size-64x34.ivf.md5"
+              "av1-1-b8-01-size-64x64.ivf"
+              "av1-1-b8-01-size-64x64.ivf.md5"
+              "av1-1-b8-01-size-64x66.ivf"
+              "av1-1-b8-01-size-64x66.ivf.md5"
+              "av1-1-b8-01-size-66x16.ivf"
+              "av1-1-b8-01-size-66x16.ivf.md5"
+              "av1-1-b8-01-size-66x18.ivf"
+              "av1-1-b8-01-size-66x18.ivf.md5"
+              "av1-1-b8-01-size-66x32.ivf"
+              "av1-1-b8-01-size-66x32.ivf.md5"
+              "av1-1-b8-01-size-66x34.ivf"
+              "av1-1-b8-01-size-66x34.ivf.md5"
+              "av1-1-b8-01-size-66x64.ivf"
+              "av1-1-b8-01-size-66x64.ivf.md5"
+              "av1-1-b8-01-size-66x66.ivf"
+              "av1-1-b8-01-size-66x66.ivf.md5"
+              "av1-1-b8-02-allintra.ivf"
+              "av1-1-b8-02-allintra.ivf.md5"
+              "av1-1-b8-03-sizeup.mkv"
+              "av1-1-b8-03-sizeup.mkv.md5"
+              "av1-1-b8-03-sizedown.mkv"
+              "av1-1-b8-03-sizedown.mkv.md5"
+              "av1-1-b8-04-cdfupdate.ivf"
+              "av1-1-b8-04-cdfupdate.ivf.md5"
+              "av1-1-b8-05-mv.ivf"
+              "av1-1-b8-05-mv.ivf.md5"
+              "av1-1-b8-06-mfmv.ivf"
+              "av1-1-b8-06-mfmv.ivf.md5"
+              "av1-1-b8-16-intra_only-intrabc-extreme-dv.ivf"
+              "av1-1-b8-16-intra_only-intrabc-extreme-dv.ivf.md5"
+              "av1-1-b8-22-svc-L2T1.ivf"
+              "av1-1-b8-22-svc-L2T1.ivf.md5"
+              "av1-1-b8-22-svc-L1T2.ivf"
+              "av1-1-b8-22-svc-L1T2.ivf.md5"
+              "av1-1-b8-22-svc-L2T2.ivf"
+              "av1-1-b8-22-svc-L2T2.ivf.md5"
+              "av1-1-b8-23-film_grain-50.ivf"
+              "av1-1-b8-23-film_grain-50.ivf.md5"
+              "av1-1-b8-24-monochrome.ivf"
+              "av1-1-b8-24-monochrome.ivf.md5"
+              "invalid-bug-1814.ivf"
+              "invalid-bug-1814.ivf.res"
+              "invalid-chromium-906381.ivf"
+              "invalid-chromium-906381.ivf.res"
+              "invalid-google-142530197-1.ivf"
+              "invalid-google-142530197-1.ivf.res"
+              "invalid-google-142530197.ivf"
+              "invalid-google-142530197.ivf.res"
+              "invalid-oss-fuzz-10061.ivf"
+              "invalid-oss-fuzz-10061.ivf.res"
+              "invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf"
+              "invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf.res"
+              "invalid-oss-fuzz-10227.ivf"
+              "invalid-oss-fuzz-10227.ivf.res"
+              "invalid-oss-fuzz-10389.ivf"
+              "invalid-oss-fuzz-10389.ivf.res.4"
+              "invalid-oss-fuzz-10555.ivf"
+              "invalid-oss-fuzz-10555.ivf.res"
+              "invalid-oss-fuzz-10705.ivf"
+              "invalid-oss-fuzz-10705.ivf.res"
+              "invalid-oss-fuzz-10723.ivf"
+              "invalid-oss-fuzz-10723.ivf.res.2"
+              "invalid-oss-fuzz-10723.ivf.res.3"
+              "invalid-oss-fuzz-10779.ivf"
+              "invalid-oss-fuzz-10779.ivf.res"
+              "invalid-oss-fuzz-11477.ivf"
+              "invalid-oss-fuzz-11477.ivf.res"
+              "invalid-oss-fuzz-11479.ivf"
+              "invalid-oss-fuzz-11479.ivf.res.2"
+              "invalid-oss-fuzz-11523.ivf"
+              "invalid-oss-fuzz-11523.ivf.res.2"
+              "invalid-oss-fuzz-15363.ivf"
+              "invalid-oss-fuzz-15363.ivf.res"
+              "invalid-oss-fuzz-16437.ivf"
+              "invalid-oss-fuzz-16437.ivf.res.2"
+              "invalid-oss-fuzz-24706.ivf"
+              "invalid-oss-fuzz-24706.ivf.res"
+              "invalid-oss-fuzz-33030.ivf"
+              "invalid-oss-fuzz-33030.ivf.res"
+              "invalid-oss-fuzz-9288.ivf"
+              "invalid-oss-fuzz-9288.ivf.res"
+              "invalid-oss-fuzz-9463.ivf"
+              "invalid-oss-fuzz-9463.ivf.res.2"
+              "invalid-oss-fuzz-9482.ivf"
+              "invalid-oss-fuzz-9482.ivf.res"
+              "invalid-oss-fuzz-9720.ivf"
+              "invalid-oss-fuzz-9720.ivf.res")
+endif()
+
+if(ENABLE_ENCODE_PERF_TESTS AND CONFIG_AV1_ENCODER)
+  list(APPEND AOM_TEST_DATA_FILE_NAMES "desktop_640_360_30.yuv"
+              "kirland_640_480_30.yuv" "macmarcomoving_640_480_30.yuv"
+              "macmarcostationary_640_480_30.yuv" "niklas_1280_720_30.yuv"
+              "tacomanarrows_640_480_30.yuv"
+              "tacomasmallcameramovement_640_480_30.yuv"
+              "thaloundeskmtg_640_480_30.yuv")
+endif()
+
+# Parses test/test-data.sha1 and writes captured file names and checksums to
+# $out_files and $out_checksums as lists.
+function(make_test_data_lists test_data_file out_files out_checksums)
+  if(NOT test_data_file OR NOT EXISTS "${test_data_file}")
+    message(FATAL_ERROR "Test info file missing or empty (${test_data_file})")
+  endif()
+
+  # Read $test_data_file into $files_and_checksums. $files_and_checksums becomes
+  # a list with an entry for each line from $test_data_file.
+  file(STRINGS "${test_data_file}" files_and_checksums)
+
+  # Iterate over the list of lines and split it into $checksums and $filenames.
+  foreach(line ${files_and_checksums})
+    string(FIND "${line}" " *" delim_pos)
+
+    math(EXPR filename_pos "${delim_pos} + 2")
+    string(SUBSTRING "${line}" 0 ${delim_pos} checksum)
+    string(SUBSTRING "${line}" ${filename_pos} -1 filename)
+
+    list(FIND AOM_TEST_DATA_FILE_NAMES ${filename} list_index)
+    if(NOT ${list_index} EQUAL -1)
+
+      # Include the name and checksum in output only when the file is needed.
+      set(checksums ${checksums} ${checksum})
+      set(filenames ${filenames} ${filename})
+    endif()
+  endforeach()
+
+  list(LENGTH filenames num_files)
+  list(LENGTH checksums num_checksums)
+  if(NOT checksums OR NOT filenames OR NOT num_files EQUAL num_checksums)
+    message(FATAL_ERROR "Parsing of ${test_data_file} failed.")
+  endif()
+
+  set(${out_checksums} ${checksums} PARENT_SCOPE)
+  set(${out_files} ${filenames} PARENT_SCOPE)
+endfunction()
+
+# Appends each file name in $test_files to $test_dir and adds the result path to
+# $out_path_list.
+function(expand_test_file_paths test_files test_dir out_path_list)
+  foreach(filename ${${test_files}})
+    set(path_list ${path_list} "${test_dir}/${filename}")
+  endforeach()
+  set(${out_path_list} ${path_list} PARENT_SCOPE)
+endfunction()
+
+function(check_file local_path expected_checksum out_needs_update)
+  if(EXISTS "${local_path}")
+    file(SHA1 "${local_path}" file_checksum)
+  else()
+    set(${out_needs_update} 1 PARENT_SCOPE)
+    return()
+  endif()
+
+  if("${file_checksum}" STREQUAL "${expected_checksum}")
+    unset(${out_needs_update} PARENT_SCOPE)
+  else()
+    set(${out_needs_update} 1 PARENT_SCOPE)
+    return()
+  endif()
+  message("${local_path} up to date.")
+endfunction()
+
+# Downloads data from $file_url, confirms that $file_checksum matches, and
+# writes it to $local_path.
+function(download_test_file file_url file_checksum local_path)
+  message("Downloading ${file_url} ...")
+  file(DOWNLOAD "${file_url}" "${local_path}" SHOW_PROGRESS EXPECTED_HASH
+                                              SHA1=${file_checksum})
+  message("Download of ${file_url} complete.")
+endfunction()
diff --git a/third_party/aom/test/test_intra_pred_speed.cc b/third_party/aom/test/test_intra_pred_speed.cc
new file mode 100644
index 0000000000..d5c94be092
--- /dev/null
+++ b/third_party/aom/test/test_intra_pred_speed.cc
@@ -0,0 +1,1742 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+//  Test and time AOM intra-predictor functions
+
+#include <stdio.h>
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/md5_helper.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/common/common_data.h"
+
+// -----------------------------------------------------------------------------
+
+namespace {
+
+// Note:
+// APPLY_UNIT_TESTS
+// 1: Do unit tests
+// 0: Generate MD5 array as required
+#define APPLY_UNIT_TESTS 1
+
+typedef void (*AvxPredFunc)(uint8_t *dst, ptrdiff_t y_stride,
+                            const uint8_t *above, const uint8_t *left);
+
+const int kBPS = 64;
+const int kTotalPixels = kBPS * kBPS;
+// 4 DC variants, V, H, PAETH, SMOOTH, SMOOTH_V, SMOOTH_H
+const int kNumAv1IntraFuncs = 10;
+
+#if APPLY_UNIT_TESTS
+const char *kAv1IntraPredNames[kNumAv1IntraFuncs] = {
+  "DC_PRED", "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED",   "V_PRED",
+  "H_PRED",  "PAETH_PRED",   "SMOOTH_PRED", "SMOOTH_V_PRED", "SMOOTH_H_PRED",
+};
+#endif  // APPLY_UNIT_TESTS
+
+template <typename Pixel>
+struct IntraPredTestMem {
+  void Init(int block_width, int block_height, int bd) {
+    ASSERT_LE(block_width, kBPS);
+    ASSERT_LE(block_height, kBPS);
+    // Note: for blocks having width <= 32 and height <= 32, we generate 32x32
+    // random pixels as before to avoid having to recalculate all hashes again.
+    const int block_size_upto_32 = (block_width <= 32) && (block_height <= 32);
+    stride = block_size_upto_32 ? 32 : kBPS;
+    num_pixels = stride * stride;
+    libaom_test::ACMRandom rnd(libaom_test::ACMRandom::DeterministicSeed());
+    above = above_mem + 16;
+    const int mask = (1 << bd) - 1;
+    for (int i = 0; i < num_pixels; ++i) ref_src[i] = rnd.Rand16() & mask;
+    for (int i = 0; i < stride; ++i) left[i] = rnd.Rand16() & mask;
+    for (int i = -1; i < stride; ++i) above[i] = rnd.Rand16() & mask;
+
+    for (int i = stride; i < 2 * stride; ++i) {
+      left[i] = rnd.Rand16() & mask;
+      above[i] = rnd.Rand16() & mask;
+    }
+  }
+
+  DECLARE_ALIGNED(16, Pixel, src[kTotalPixels]);
+  DECLARE_ALIGNED(16, Pixel, ref_src[kTotalPixels]);
+  DECLARE_ALIGNED(16, Pixel, left[2 * kBPS]);
+  Pixel *above;
+  int stride;
+  int num_pixels;
+
+ private:
+  DECLARE_ALIGNED(16, Pixel, above_mem[2 * kBPS + 16]);
+};
+
+// -----------------------------------------------------------------------------
+// Low Bittdepth
+
+typedef IntraPredTestMem<uint8_t> Av1IntraPredTestMem;
+
+static const char *const kTxSizeStrings[TX_SIZES_ALL] = {
+  "4X4",  "8X8",  "16X16", "32X32", "64X64", "4X8",   "8X4",
+  "8X16", "16X8", "16X32", "32X16", "32X64", "64X32", "4X16",
+  "16X4", "8X32", "32X8",  "16X64", "64X16",
+};
+
+void CheckMd5Signature(TX_SIZE tx_size, bool is_hbd,
+                       const char *const signatures[], const void *data,
+                       size_t data_size, int elapsed_time, int idx) {
+  const std::string hbd_str = is_hbd ? "Hbd " : "";
+  const std::string name_str = hbd_str + "Intra" + kTxSizeStrings[tx_size];
+  libaom_test::MD5 md5;
+  md5.Add(reinterpret_cast<const uint8_t *>(data), data_size);
+#if APPLY_UNIT_TESTS
+  printf("Mode %s[%13s]: %5d ms     MD5: %s\n", name_str.c_str(),
+         kAv1IntraPredNames[idx], elapsed_time, md5.Get());
+  EXPECT_STREQ(signatures[idx], md5.Get());
+#else
+  (void)signatures;
+  (void)elapsed_time;
+  (void)idx;
+  printf("\"%s\",\n", md5.Get());
+#endif
+}
+
+void TestIntraPred(TX_SIZE tx_size, AvxPredFunc const *pred_funcs,
+                   const char *const signatures[]) {
+  const int block_width = tx_size_wide[tx_size];
+  const int block_height = tx_size_high[tx_size];
+  const int num_pixels_per_test =
+      block_width * block_height * kNumAv1IntraFuncs;
+  const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
+  Av1IntraPredTestMem intra_pred_test_mem;
+  intra_pred_test_mem.Init(block_width, block_height, 8);
+
+  for (int k = 0; k < kNumAv1IntraFuncs; ++k) {
+    if (pred_funcs[k] == nullptr) continue;
+    memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src,
+           sizeof(intra_pred_test_mem.src));
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
+      pred_funcs[k](intra_pred_test_mem.src, intra_pred_test_mem.stride,
+                    intra_pred_test_mem.above, intra_pred_test_mem.left);
+    }
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time =
+        static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
+    CheckMd5Signature(
+        tx_size, false, signatures, intra_pred_test_mem.src,
+        intra_pred_test_mem.num_pixels * sizeof(*intra_pred_test_mem.src),
+        elapsed_time, k);
+  }
+}
+
+static const char *const kSignatures[TX_SIZES_ALL][kNumAv1IntraFuncs] = {
+  {
+      // 4X4
+      "e7ed7353c3383fff942e500e9bfe82fe",
+      "2a4a26fcc6ce005eadc08354d196c8a9",
+      "269d92eff86f315d9c38fe7640d85b15",
+      "ae2960eea9f71ee3dabe08b282ec1773",
+      "6c1abcc44e90148998b51acd11144e9c",
+      "f7bb3186e1ef8a2b326037ff898cad8e",
+      "59fc0e923a08cfac0a493fb38988e2bb",
+      "9ff8bb37d9c830e6ab8ecb0c435d3c91",
+      "de6937fca02354f2874dbc5dbec5d5b3",
+      "723cf948137f7d8c7860d814e55ae67d",
+  },
+  {
+      // 8X8
+      "d8bbae5d6547cfc17e4f5f44c8730e88",
+      "373bab6d931868d41a601d9d88ce9ac3",
+      "6fdd5ff4ff79656c14747598ca9e3706",
+      "d9661c2811d6a73674f40ffb2b841847",
+      "7c722d10b19ccff0b8c171868e747385",
+      "f81dd986eb2b50f750d3a7da716b7e27",
+      "064404361748dd111a890a1470d7f0ea",
+      "dc29b7e1f78cc8e7525d5ea4c0ab9b78",
+      "97111eb1bc26bade6272015df829f1ae",
+      "d19a8a73cc46b807f2c5e817576cc1e1",
+  },
+  {
+      // 16X16
+      "50971c07ce26977d30298538fffec619",
+      "527a6b9e0dc5b21b98cf276305432bef",
+      "7eff2868f80ebc2c43a4f367281d80f7",
+      "67cd60512b54964ef6aff1bd4816d922",
+      "48371c87dc95c08a33b2048f89cf6468",
+      "b0acf2872ee411d7530af6d2625a7084",
+      "93d6b5352b571805ab16a55e1bbed86a",
+      "03764e4c0aebbc180e4e2c68fb06df2b",
+      "bb6c74c9076c9f266ab11fb57060d8e6",
+      "0c5162bc28489756ddb847b5678e6f07",
+  },
+  {
+      // 32X32
+      "a0a618c900e65ae521ccc8af789729f2",
+      "985aaa7c72b4a6c2fb431d32100cf13a",
+      "10662d09febc3ca13ee4e700120daeb5",
+      "b3b01379ba08916ef6b1b35f7d9ad51c",
+      "9f4261755795af97e34679c333ec7004",
+      "bc2c9da91ad97ef0d1610fb0a9041657",
+      "ef1653982b69e1f64bee3759f3e1ec45",
+      "1a51a675deba2c83282142eb48d3dc3d",
+      "866c224746dc260cda861a7b1b383fb3",
+      "cea23799fc3526e1b6a6ff02b42b82af",
+  },
+  {
+      // 64X64
+      "6e1094fa7b50bc813aa2ba29f5df8755",
+      "afe020786b83b793c2bbd9468097ff6e",
+      "be91585259bc37bf4dc1651936e90b3e",
+      "a1650dbcd56e10288c3e269eca37967d",
+      "9e5c34f3797e0cdd3cd9d4c05b0d8950",
+      "bc87be7ac899cc6a28f399d7516c49fe",
+      "9811fd0d2dd515f06122f5d1bd18b784",
+      "3c140e466f2c2c0d9cb7d2157ab8dc27",
+      "9543de76c925a8f6adc884cc7f98dc91",
+      "df1df0376cc944afe7e74e94f53e575a",
+  },
+  {
+      // 4X8
+      "d9fbebdc85f71ab1e18461b2db4a2adc",
+      "5ccb2a68284bc9714d94b8a06ccadbb2",
+      "735d059abc2744f3ff3f9590f7191b37",
+      "d9fbebdc85f71ab1e18461b2db4a2adc",
+      "6819497c44cd0ace120add83672996ee",
+      "7e3244f5a2d3edf81c7e962a842b97f9",
+      "809350f164cd4d1650850bb0f59c3260",
+      "1b60a394331eeab6927a6f8aaff57040",
+      "5307de1bd7329ba6b281d2c1b0b457f9",
+      "24c58a8138339846d95568efb91751db",
+  },
+  {
+      // 8X4
+      "23f9fc11344426c9bee2e06d57dfd628",
+      "2d71a26d1bae1fb34734de7b42fc5eb7",
+      "5af9c1b2fd9d5721fad67b67b3f7c816",
+      "00d71b17be662753813d515f197d145e",
+      "bef10ec984427e28f4390f43809d10af",
+      "77773cdfb7ed6bc882ab202a64b0a470",
+      "2cc48bd66d6b0121b5221d52ccd732af",
+      "b302155e1c9eeeafe2ba2bf68e807a46",
+      "561bc8d0e76d5041ebd5168fc6a115e1",
+      "81d0113fb1d0a9a24ffd6f1987b77948",
+  },
+  {
+      // 8X16
+      "c849de88b24f773dfcdd1d48d1209796",
+      "6cb807c1897b94866a0f3d3c56ed8695",
+      "d56db05a8ac7981762f5b877f486c4ef",
+      "b4bc01eb6e59a40922ad17715cafb04b",
+      "09d178439534f4062ae687c351f66d64",
+      "644501399cf73080ac606e5cef7ca09b",
+      "278076495180e17c065a95ab7278539a",
+      "9dd7f324816f242be408ffeb0c673732",
+      "f520c4a20acfa0bea1d253c6f0f040fd",
+      "85f38df809df2c2d7c8b4a157a65cd44",
+  },
+  {
+      // 16X8
+      "b4cbdbdf10ce13300b4063a3daf99e04",
+      "3731e1e6202064a9d0604d7c293ecee4",
+      "6c856188c4256a06452f0d5d70cac436",
+      "1f2192b4c8c497589484ea7bf9c944e8",
+      "84011bd4b7f565119d06787840e333a0",
+      "0e48949f7a6aa36f0d76b5d01f91124a",
+      "60eff8064634b6c73b10681356baeee9",
+      "1559aeb081a9c0c71111d6093c2ff9fd",
+      "c15479b739713773e5cabb748451987b",
+      "72e33ec12c9b67aea26d8d005fb82de2",
+  },
+  {
+      // 16X32
+      "abe5233d189cdbf79424721571bbaa7b",
+      "282759f81e3cfb2e2d396fe406b72a8b",
+      "e2224926c264f6f174cbc3167a233168",
+      "6814e85c2b33f8c9415d62e80394b47b",
+      "99cbbb60459c08a3061d72c4e4f6276a",
+      "1d1567d40b8e816f8c1f71e576fe0f87",
+      "36fdd371b624a075814d497c4832ec85",
+      "8ab8da61b727442b6ff692b40d0df018",
+      "e35a10ad7fdf2327e821504a90f6a6eb",
+      "1f7211e727dc1de7d6a55d082fbdd821",
+  },
+  {
+      // 32X16
+      "d1aeb8d5fdcfd3307922af01a798a4dc",
+      "b0bcb514ebfbee065faea9d34c12ae75",
+      "d6a18c63b4e909871c0137ca652fad23",
+      "fd047f2fc1b8ffb95d0eeef3e8796a45",
+      "645ab60779ea348fd93c81561c31bab9",
+      "4409633c9db8dff41ade4292a3a56e7f",
+      "5e36a11e069b31c2a739f3a9c7b37c24",
+      "e83b9483d702cfae496991c3c7fa92c0",
+      "12f6ddf98c7f30a277307f1ea935b030",
+      "354321d6c32bbdb0739e4fa2acbf41e1",
+  },
+  {
+      // 32X64
+      "0ce332b343934b34cd4417725faa85cb",
+      "4e2a2cfd8f56f15939bdfc753145b303",
+      "0f46d124ba9f48cdd5d5290acf786d6d",
+      "e1e8ed803236367821981500a3d9eebe",
+      "1d2f8e48e3adb7c448be05d9f66f4954",
+      "9fb2e176636a5689b26f73ca73fcc512",
+      "e720ebccae7e25e36f23da53ae5b5d6a",
+      "86fe4364734169aaa4520d799890d530",
+      "b1870290764bb1b100d1974e2bd70f1d",
+      "ce5b238e19d85ef69d85badfab4e63ae",
+  },
+  {
+      // 64X32
+      "a6c5aeb722615089efbca80b02951ceb",
+      "538424b24bd0830f21788e7238ca762f",
+      "80c15b303235f9bc2259027bb92dfdc4",
+      "e48e1ac15e97191a8fda08d62fff343e",
+      "12604b37875533665078405ef4582e35",
+      "0048afa17bd3e1632d68b96048836530",
+      "07a0cfcb56a5eed50c4bd6c26814336b",
+      "529d8a070de5bc6531fa3ee8f450c233",
+      "33c50a11c7d78f72434064f634305e95",
+      "e0ef7f0559c1a50ec5a8c12011b962f7",
+  },
+  {
+      // 4X16
+      "750491056568eb8fe15387b86bdf06b8",
+      "3a52dae9f599f08cfb3bd1b910dc0e11",
+      "af79f71e3e03dbeca44e2e13561f70c7",
+      "ca7dfd7624afc0c06fb5552f44398535",
+      "b591af115444bf43140c29c269f68fb2",
+      "483d942ae36e69e62f31eb215331416f",
+      "f14b58525e81870bc5d95c7ac71a347f",
+      "371208bb4027d9badb04095d1590bbc4",
+      "c7049c21b2924d70c7c12784d6b6b796",
+      "7d87233f4b5b0f12086045e5d7b2d4c2",
+  },
+  {
+      // 16X4
+      "7c6e325a65e77e732b3adbe237e045e4",
+      "24478f93ffcec47852e004d0fe948464",
+      "258d042c67d4ba3ecfa667f0adc9aebf",
+      "b2cd21d06959f159a1f3c4d9768ee7fb",
+      "b4e1f38157bf8410e7c3da02f687a343",
+      "869e703729eb0fc0711c254944ff5d5a",
+      "9638dd77105a640b146a8201ea7a0801",
+      "919d932c6af8a1cc7486e8ce996dd487",
+      "e1c9be493b6714c7ae48f30044c43140",
+      "bf0fe3889d654b2f6eb98c8fc751f9e4",
+  },
+  {
+      // 8X32
+      "8dfac4319fe0bd40013ffb3102da8c72",
+      "feb46b6dc4e2ca0a09533bfc51d4dcb0",
+      "850837ec714c37262216527aaf4cbbe9",
+      "4603c7800fb08361f163daca876e8bda",
+      "1ff95e7d2debc27b05806fb25abfd624",
+      "d81b9a51a062b23ca7823804cb7bec22",
+      "f1d8978158766f46335203608cb807e7",
+      "f3527096256258c0878d644a9d7d53ca",
+      "cbde98ac8b009953eb112807ad2ea29e",
+      "654fb1153415747feae599f538122af5",
+  },
+  {
+      // 32X8
+      "3d4ee16fab374357474f60b845327bc7",
+      "bc17c5059473a476df4e85f56395ad55",
+      "3d4ee16fab374357474f60b845327bc7",
+      "c14b8db34dc2355b84e3735c9ba16c7f",
+      "a71d25b5d47a92a8b9223c98f18458ee",
+      "6c1cfe2b1893f4576a80675687cb6426",
+      "92d11bbef8b85bb48d799bb055de3514",
+      "bcf81d1db8ae5cc03360467f44f498ec",
+      "79f8c564163555592e808e145eaf5c60",
+      "46fff139cef2ef773938bcc8b0e5abb8",
+  },
+  {
+      // 16X64
+      "3b2a053ee8b05a8ac35ad23b0422a151",
+      "12b0c69595328c465e0b25e0c9e3e9fc",
+      "f77c544ac8035e01920deae40cee7b07",
+      "727797ef15ccd8d325476fe8f12006a3",
+      "f3be77c0fe67eb5d9d515e92bec21eb7",
+      "f1ece6409e01e9dd98b800d49628247d",
+      "efd2ec9bfbbd4fd1f6604ea369df1894",
+      "ec703de918422b9e03197ba0ed60a199",
+      "739418efb89c07f700895deaa5d0b3e3",
+      "9943ae1bbeeebfe1d3a92dc39e049d63",
+  },
+  {
+      // 64X16
+      "821b76b1494d4f84d20817840f719a1a",
+      "69e462c3338a9aaf993c3f7cfbc15649",
+      "516d8f6eb054d74d150e7b444185b6b9",
+      "de1b736e9d99129609d6ef3a491507a0",
+      "fd9b4276e7affe1e0e4ce4f428058994",
+      "cd82fd361a4767ac29a9f406b480b8f3",
+      "2792c2f810157a4a6cb13c28529ff779",
+      "1220442d90c4255ba0969d28b91e93a6",
+      "c7253e10b45f7f67dfee3256c9b94825",
+      "879792198071c7e0b50b9b5010d8c18f",
+  },
+};
+
+}  // namespace
+
+// Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors
+// to TestIntraPred. The test name is 'arch.TestIntraPred_tx_size', e.g.,
+// C.TestIntraPred.0
+#define INTRA_PRED_TEST(arch, tx_size, dc, dc_left, dc_top, dc_128, v, h,  \
+                        paeth, smooth, smooth_v, smooth_h)                 \
+  TEST(arch, DISABLED_##TestIntraPred_##tx_size) {                         \
+    static const AvxPredFunc aom_intra_pred[] = {                          \
+      dc, dc_left, dc_top, dc_128, v, h, paeth, smooth, smooth_v, smooth_h \
+    };                                                                     \
+    TestIntraPred(tx_size, aom_intra_pred, kSignatures[tx_size]);          \
+  }
+
+// -----------------------------------------------------------------------------
+// 4x4, 4x8, 4x16
+
+INTRA_PRED_TEST(C, TX_4X4, aom_dc_predictor_4x4_c, aom_dc_left_predictor_4x4_c,
+                aom_dc_top_predictor_4x4_c, aom_dc_128_predictor_4x4_c,
+                aom_v_predictor_4x4_c, aom_h_predictor_4x4_c,
+                aom_paeth_predictor_4x4_c, aom_smooth_predictor_4x4_c,
+                aom_smooth_v_predictor_4x4_c, aom_smooth_h_predictor_4x4_c)
+INTRA_PRED_TEST(C, TX_4X8, aom_dc_predictor_4x8_c, aom_dc_left_predictor_4x8_c,
+                aom_dc_top_predictor_4x8_c, aom_dc_128_predictor_4x8_c,
+                aom_v_predictor_4x8_c, aom_h_predictor_4x8_c,
+                aom_paeth_predictor_4x8_c, aom_smooth_predictor_4x8_c,
+                aom_smooth_v_predictor_4x8_c, aom_smooth_h_predictor_4x8_c)
+INTRA_PRED_TEST(C, TX_4X16, aom_dc_predictor_4x16_c,
+                aom_dc_left_predictor_4x16_c, aom_dc_top_predictor_4x16_c,
+                aom_dc_128_predictor_4x16_c, aom_v_predictor_4x16_c,
+                aom_h_predictor_4x16_c, aom_paeth_predictor_4x16_c,
+                aom_smooth_predictor_4x16_c, aom_smooth_v_predictor_4x16_c,
+                aom_smooth_h_predictor_4x16_c)
+
+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2, TX_4X4, aom_dc_predictor_4x4_sse2,
+                aom_dc_left_predictor_4x4_sse2, aom_dc_top_predictor_4x4_sse2,
+                aom_dc_128_predictor_4x4_sse2, aom_v_predictor_4x4_sse2,
+                aom_h_predictor_4x4_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_4X8, aom_dc_predictor_4x8_sse2,
+                aom_dc_left_predictor_4x8_sse2, aom_dc_top_predictor_4x8_sse2,
+                aom_dc_128_predictor_4x8_sse2, aom_v_predictor_4x8_sse2,
+                aom_h_predictor_4x8_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_4X16, aom_dc_predictor_4x16_sse2,
+                aom_dc_left_predictor_4x16_sse2, aom_dc_top_predictor_4x16_sse2,
+                aom_dc_128_predictor_4x16_sse2, aom_v_predictor_4x16_sse2,
+                aom_h_predictor_4x16_sse2, nullptr, nullptr, nullptr, nullptr)
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TX_4X4, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_4x4_ssse3,
+                aom_smooth_predictor_4x4_ssse3,
+                aom_smooth_v_predictor_4x4_ssse3,
+                aom_smooth_h_predictor_4x4_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_4X8, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_4x8_ssse3,
+                aom_smooth_predictor_4x8_ssse3,
+                aom_smooth_v_predictor_4x8_ssse3,
+                aom_smooth_h_predictor_4x8_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_4X16, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_4x16_ssse3,
+                aom_smooth_predictor_4x16_ssse3,
+                aom_smooth_v_predictor_4x16_ssse3,
+                aom_smooth_h_predictor_4x16_ssse3)
+#endif  // HAVE_SSSE3
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TX_4X4, aom_dc_predictor_4x4_neon,
+                aom_dc_left_predictor_4x4_neon, aom_dc_top_predictor_4x4_neon,
+                aom_dc_128_predictor_4x4_neon, aom_v_predictor_4x4_neon,
+                aom_h_predictor_4x4_neon, aom_paeth_predictor_4x4_neon,
+                aom_smooth_predictor_4x4_neon, aom_smooth_v_predictor_4x4_neon,
+                aom_smooth_h_predictor_4x4_neon)
+INTRA_PRED_TEST(NEON, TX_4X8, aom_dc_predictor_4x8_neon,
+                aom_dc_left_predictor_4x8_neon, aom_dc_top_predictor_4x8_neon,
+                aom_dc_128_predictor_4x8_neon, aom_v_predictor_4x8_neon,
+                aom_h_predictor_4x8_neon, aom_paeth_predictor_4x8_neon,
+                aom_smooth_predictor_4x8_neon, aom_smooth_v_predictor_4x8_neon,
+                aom_smooth_h_predictor_4x8_neon)
+INTRA_PRED_TEST(NEON, TX_4X16, aom_dc_predictor_4x16_neon,
+                aom_dc_left_predictor_4x16_neon, aom_dc_top_predictor_4x16_neon,
+                aom_dc_128_predictor_4x16_neon, aom_v_predictor_4x16_neon,
+                aom_h_predictor_4x16_neon, aom_paeth_predictor_4x16_neon,
+                aom_smooth_predictor_4x16_neon,
+                aom_smooth_v_predictor_4x16_neon,
+                aom_smooth_h_predictor_4x16_neon)
+#endif  // HAVE_NEON
+
+// -----------------------------------------------------------------------------
+// 8x8, 8x4, 8x16, 8x32
+
+INTRA_PRED_TEST(C, TX_8X8, aom_dc_predictor_8x8_c, aom_dc_left_predictor_8x8_c,
+                aom_dc_top_predictor_8x8_c, aom_dc_128_predictor_8x8_c,
+                aom_v_predictor_8x8_c, aom_h_predictor_8x8_c,
+                aom_paeth_predictor_8x8_c, aom_smooth_predictor_8x8_c,
+                aom_smooth_v_predictor_8x8_c, aom_smooth_h_predictor_8x8_c)
+
+INTRA_PRED_TEST(C, TX_8X4, aom_dc_predictor_8x4_c, aom_dc_left_predictor_8x4_c,
+                aom_dc_top_predictor_8x4_c, aom_dc_128_predictor_8x4_c,
+                aom_v_predictor_8x4_c, aom_h_predictor_8x4_c,
+                aom_paeth_predictor_8x4_c, aom_smooth_predictor_8x4_c,
+                aom_smooth_v_predictor_8x4_c, aom_smooth_h_predictor_8x4_c)
+INTRA_PRED_TEST(C, TX_8X16, aom_dc_predictor_8x16_c,
+                aom_dc_left_predictor_8x16_c, aom_dc_top_predictor_8x16_c,
+                aom_dc_128_predictor_8x16_c, aom_v_predictor_8x16_c,
+                aom_h_predictor_8x16_c, aom_paeth_predictor_8x16_c,
+                aom_smooth_predictor_8x16_c, aom_smooth_v_predictor_8x16_c,
+                aom_smooth_h_predictor_8x16_c)
+INTRA_PRED_TEST(C, TX_8X32, aom_dc_predictor_8x32_c,
+                aom_dc_left_predictor_8x32_c, aom_dc_top_predictor_8x32_c,
+                aom_dc_128_predictor_8x32_c, aom_v_predictor_8x32_c,
+                aom_h_predictor_8x32_c, aom_paeth_predictor_8x32_c,
+                aom_smooth_predictor_8x32_c, aom_smooth_v_predictor_8x32_c,
+                aom_smooth_h_predictor_8x32_c)
+
+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2, TX_8X8, aom_dc_predictor_8x8_sse2,
+                aom_dc_left_predictor_8x8_sse2, aom_dc_top_predictor_8x8_sse2,
+                aom_dc_128_predictor_8x8_sse2, aom_v_predictor_8x8_sse2,
+                aom_h_predictor_8x8_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_8X4, aom_dc_predictor_8x4_sse2,
+                aom_dc_left_predictor_8x4_sse2, aom_dc_top_predictor_8x4_sse2,
+                aom_dc_128_predictor_8x4_sse2, aom_v_predictor_8x4_sse2,
+                aom_h_predictor_8x4_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_8X16, aom_dc_predictor_8x16_sse2,
+                aom_dc_left_predictor_8x16_sse2, aom_dc_top_predictor_8x16_sse2,
+                aom_dc_128_predictor_8x16_sse2, aom_v_predictor_8x16_sse2,
+                aom_h_predictor_8x16_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_8X32, aom_dc_predictor_8x32_sse2,
+                aom_dc_left_predictor_8x32_sse2, aom_dc_top_predictor_8x32_sse2,
+                aom_dc_128_predictor_8x32_sse2, aom_v_predictor_8x32_sse2,
+                aom_h_predictor_8x32_sse2, nullptr, nullptr, nullptr, nullptr)
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TX_8X8, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_8x8_ssse3,
+                aom_smooth_predictor_8x8_ssse3,
+                aom_smooth_v_predictor_8x8_ssse3,
+                aom_smooth_h_predictor_8x8_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_8X4, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_8x4_ssse3,
+                aom_smooth_predictor_8x4_ssse3,
+                aom_smooth_v_predictor_8x4_ssse3,
+                aom_smooth_h_predictor_8x4_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_8X16, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_8x16_ssse3,
+                aom_smooth_predictor_8x16_ssse3,
+                aom_smooth_v_predictor_8x16_ssse3,
+                aom_smooth_h_predictor_8x16_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_8X32, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_8x32_ssse3,
+                aom_smooth_predictor_8x32_ssse3,
+                aom_smooth_v_predictor_8x32_ssse3,
+                aom_smooth_h_predictor_8x32_ssse3)
+#endif  // HAVE_SSSE3
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TX_8X8, aom_dc_predictor_8x8_neon,
+                aom_dc_left_predictor_8x8_neon, aom_dc_top_predictor_8x8_neon,
+                aom_dc_128_predictor_8x8_neon, aom_v_predictor_8x8_neon,
+                aom_h_predictor_8x8_neon, aom_paeth_predictor_8x8_neon,
+                aom_smooth_predictor_8x8_neon, aom_smooth_v_predictor_8x8_neon,
+                aom_smooth_h_predictor_8x8_neon)
+INTRA_PRED_TEST(NEON, TX_8X4, aom_dc_predictor_8x4_neon,
+                aom_dc_left_predictor_8x4_neon, aom_dc_top_predictor_8x4_neon,
+                aom_dc_128_predictor_8x4_neon, aom_v_predictor_8x4_neon,
+                aom_h_predictor_8x4_neon, aom_paeth_predictor_8x4_neon,
+                aom_smooth_predictor_8x4_neon, aom_smooth_v_predictor_8x4_neon,
+                aom_smooth_h_predictor_8x4_neon)
+INTRA_PRED_TEST(NEON, TX_8X16, aom_dc_predictor_8x16_neon,
+                aom_dc_left_predictor_8x16_neon, aom_dc_top_predictor_8x16_neon,
+                aom_dc_128_predictor_8x16_neon, aom_v_predictor_8x16_neon,
+                aom_h_predictor_8x16_neon, aom_paeth_predictor_8x16_neon,
+                aom_smooth_predictor_8x16_neon,
+                aom_smooth_v_predictor_8x16_neon,
+                aom_smooth_h_predictor_8x16_neon)
+INTRA_PRED_TEST(NEON, TX_8X32, aom_dc_predictor_8x32_neon,
+                aom_dc_left_predictor_8x32_neon, aom_dc_top_predictor_8x32_neon,
+                aom_dc_128_predictor_8x32_neon, aom_v_predictor_8x32_neon,
+                aom_h_predictor_8x32_neon, aom_paeth_predictor_8x32_neon,
+                aom_smooth_predictor_8x32_neon,
+                aom_smooth_v_predictor_8x32_neon,
+                aom_smooth_h_predictor_8x32_neon)
+#endif  // HAVE_NEON
+
+// -----------------------------------------------------------------------------
+// 16x16, 16x8, 16x32, 16x4, 16x64
+
+INTRA_PRED_TEST(C, TX_16X16, aom_dc_predictor_16x16_c,
+                aom_dc_left_predictor_16x16_c, aom_dc_top_predictor_16x16_c,
+                aom_dc_128_predictor_16x16_c, aom_v_predictor_16x16_c,
+                aom_h_predictor_16x16_c, aom_paeth_predictor_16x16_c,
+                aom_smooth_predictor_16x16_c, aom_smooth_v_predictor_16x16_c,
+                aom_smooth_h_predictor_16x16_c)
+INTRA_PRED_TEST(C, TX_16X8, aom_dc_predictor_16x8_c,
+                aom_dc_left_predictor_16x8_c, aom_dc_top_predictor_16x8_c,
+                aom_dc_128_predictor_16x8_c, aom_v_predictor_16x8_c,
+                aom_h_predictor_16x8_c, aom_paeth_predictor_16x8_c,
+                aom_smooth_predictor_16x8_c, aom_smooth_v_predictor_16x8_c,
+                aom_smooth_h_predictor_16x8_c)
+INTRA_PRED_TEST(C, TX_16X32, aom_dc_predictor_16x32_c,
+                aom_dc_left_predictor_16x32_c, aom_dc_top_predictor_16x32_c,
+                aom_dc_128_predictor_16x32_c, aom_v_predictor_16x32_c,
+                aom_h_predictor_16x32_c, aom_paeth_predictor_16x32_c,
+                aom_smooth_predictor_16x32_c, aom_smooth_v_predictor_16x32_c,
+                aom_smooth_h_predictor_16x32_c)
+INTRA_PRED_TEST(C, TX_16X4, aom_dc_predictor_16x4_c,
+                aom_dc_left_predictor_16x4_c, aom_dc_top_predictor_16x4_c,
+                aom_dc_128_predictor_16x4_c, aom_v_predictor_16x4_c,
+                aom_h_predictor_16x4_c, aom_paeth_predictor_16x4_c,
+                aom_smooth_predictor_16x4_c, aom_smooth_v_predictor_16x4_c,
+                aom_smooth_h_predictor_16x4_c)
+INTRA_PRED_TEST(C, TX_16X64, aom_dc_predictor_16x64_c,
+                aom_dc_left_predictor_16x64_c, aom_dc_top_predictor_16x64_c,
+                aom_dc_128_predictor_16x64_c, aom_v_predictor_16x64_c,
+                aom_h_predictor_16x64_c, aom_paeth_predictor_16x64_c,
+                aom_smooth_predictor_16x64_c, aom_smooth_v_predictor_16x64_c,
+                aom_smooth_h_predictor_16x64_c)
+
+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2, TX_16X16, aom_dc_predictor_16x16_sse2,
+                aom_dc_left_predictor_16x16_sse2,
+                aom_dc_top_predictor_16x16_sse2,
+                aom_dc_128_predictor_16x16_sse2, aom_v_predictor_16x16_sse2,
+                aom_h_predictor_16x16_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_16X8, aom_dc_predictor_16x8_sse2,
+                aom_dc_left_predictor_16x8_sse2, aom_dc_top_predictor_16x8_sse2,
+                aom_dc_128_predictor_16x8_sse2, aom_v_predictor_16x8_sse2,
+                aom_h_predictor_16x8_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_16X32, aom_dc_predictor_16x32_sse2,
+                aom_dc_left_predictor_16x32_sse2,
+                aom_dc_top_predictor_16x32_sse2,
+                aom_dc_128_predictor_16x32_sse2, aom_v_predictor_16x32_sse2,
+                aom_h_predictor_16x32_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_16X64, aom_dc_predictor_16x64_sse2,
+                aom_dc_left_predictor_16x64_sse2,
+                aom_dc_top_predictor_16x64_sse2,
+                aom_dc_128_predictor_16x64_sse2, aom_v_predictor_16x64_sse2,
+                aom_h_predictor_16x64_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_16X4, aom_dc_predictor_16x4_sse2,
+                aom_dc_left_predictor_16x4_sse2, aom_dc_top_predictor_16x4_sse2,
+                aom_dc_128_predictor_16x4_sse2, aom_v_predictor_16x4_sse2,
+                aom_h_predictor_16x4_sse2, nullptr, nullptr, nullptr, nullptr)
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TX_16X16, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_16x16_ssse3,
+                aom_smooth_predictor_16x16_ssse3,
+                aom_smooth_v_predictor_16x16_ssse3,
+                aom_smooth_h_predictor_16x16_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_16X8, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_16x8_ssse3,
+                aom_smooth_predictor_16x8_ssse3,
+                aom_smooth_v_predictor_16x8_ssse3,
+                aom_smooth_h_predictor_16x8_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_16X32, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_16x32_ssse3,
+                aom_smooth_predictor_16x32_ssse3,
+                aom_smooth_v_predictor_16x32_ssse3,
+                aom_smooth_h_predictor_16x32_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_16X64, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_16x64_ssse3,
+                aom_smooth_predictor_16x64_ssse3,
+                aom_smooth_v_predictor_16x64_ssse3,
+                aom_smooth_h_predictor_16x64_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_16X4, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_16x4_ssse3,
+                aom_smooth_predictor_16x4_ssse3,
+                aom_smooth_v_predictor_16x4_ssse3,
+                aom_smooth_h_predictor_16x4_ssse3)
+#endif  // HAVE_SSSE3
+
+#if HAVE_AVX2
+INTRA_PRED_TEST(AVX2, TX_16X16, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_16x16_avx2, nullptr, nullptr,
+                nullptr)
+INTRA_PRED_TEST(AVX2, TX_16X8, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_16x8_avx2, nullptr, nullptr,
+                nullptr)
+INTRA_PRED_TEST(AVX2, TX_16X32, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_16x32_avx2, nullptr, nullptr,
+                nullptr)
+INTRA_PRED_TEST(AVX2, TX_16X64, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_16x64_avx2, nullptr, nullptr,
+                nullptr)
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TX_16X16, aom_dc_predictor_16x16_neon,
+                aom_dc_left_predictor_16x16_neon,
+                aom_dc_top_predictor_16x16_neon,
+                aom_dc_128_predictor_16x16_neon, aom_v_predictor_16x16_neon,
+                aom_h_predictor_16x16_neon, aom_paeth_predictor_16x16_neon,
+                aom_smooth_predictor_16x16_neon,
+                aom_smooth_v_predictor_16x16_neon,
+                aom_smooth_h_predictor_16x16_neon)
+INTRA_PRED_TEST(NEON, TX_16X8, aom_dc_predictor_16x8_neon,
+                aom_dc_left_predictor_16x8_neon, aom_dc_top_predictor_16x8_neon,
+                aom_dc_128_predictor_16x8_neon, aom_v_predictor_16x8_neon,
+                aom_h_predictor_16x8_neon, aom_paeth_predictor_16x8_neon,
+                aom_smooth_predictor_16x8_neon,
+                aom_smooth_v_predictor_16x8_neon,
+                aom_smooth_h_predictor_16x8_neon)
+INTRA_PRED_TEST(NEON, TX_16X32, aom_dc_predictor_16x32_neon,
+                aom_dc_left_predictor_16x32_neon,
+                aom_dc_top_predictor_16x32_neon,
+                aom_dc_128_predictor_16x32_neon, aom_v_predictor_16x32_neon,
+                aom_h_predictor_16x32_neon, aom_paeth_predictor_16x32_neon,
+                aom_smooth_predictor_16x32_neon,
+                aom_smooth_v_predictor_16x32_neon,
+                aom_smooth_h_predictor_16x32_neon)
+INTRA_PRED_TEST(NEON, TX_16X4, aom_dc_predictor_16x4_neon,
+                aom_dc_left_predictor_16x4_neon, aom_dc_top_predictor_16x4_neon,
+                aom_dc_128_predictor_16x4_neon, aom_v_predictor_16x4_neon,
+                aom_h_predictor_16x4_neon, aom_paeth_predictor_16x4_neon,
+                aom_smooth_predictor_16x4_neon,
+                aom_smooth_v_predictor_16x4_neon,
+                aom_smooth_h_predictor_16x4_neon)
+INTRA_PRED_TEST(NEON, TX_16X64, aom_dc_predictor_16x64_neon,
+                aom_dc_left_predictor_16x64_neon,
+                aom_dc_top_predictor_16x64_neon,
+                aom_dc_128_predictor_16x64_neon, aom_v_predictor_16x64_neon,
+                aom_h_predictor_16x64_neon, aom_paeth_predictor_16x64_neon,
+                aom_smooth_predictor_16x64_neon,
+                aom_smooth_v_predictor_16x64_neon,
+                aom_smooth_h_predictor_16x64_neon)
+#endif  // HAVE_NEON
+
+// -----------------------------------------------------------------------------
+// 32x32, 32x16, 32x64, 32x8
+
+INTRA_PRED_TEST(C, TX_32X32, aom_dc_predictor_32x32_c,
+                aom_dc_left_predictor_32x32_c, aom_dc_top_predictor_32x32_c,
+                aom_dc_128_predictor_32x32_c, aom_v_predictor_32x32_c,
+                aom_h_predictor_32x32_c, aom_paeth_predictor_32x32_c,
+                aom_smooth_predictor_32x32_c, aom_smooth_v_predictor_32x32_c,
+                aom_smooth_h_predictor_32x32_c)
+INTRA_PRED_TEST(C, TX_32X16, aom_dc_predictor_32x16_c,
+                aom_dc_left_predictor_32x16_c, aom_dc_top_predictor_32x16_c,
+                aom_dc_128_predictor_32x16_c, aom_v_predictor_32x16_c,
+                aom_h_predictor_32x16_c, aom_paeth_predictor_32x16_c,
+                aom_smooth_predictor_32x16_c, aom_smooth_v_predictor_32x16_c,
+                aom_smooth_h_predictor_32x16_c)
+INTRA_PRED_TEST(C, TX_32X64, aom_dc_predictor_32x64_c,
+                aom_dc_left_predictor_32x64_c, aom_dc_top_predictor_32x64_c,
+                aom_dc_128_predictor_32x64_c, aom_v_predictor_32x64_c,
+                aom_h_predictor_32x64_c, aom_paeth_predictor_32x64_c,
+                aom_smooth_predictor_32x64_c, aom_smooth_v_predictor_32x64_c,
+                aom_smooth_h_predictor_32x64_c)
+INTRA_PRED_TEST(C, TX_32X8, aom_dc_predictor_32x8_c,
+                aom_dc_left_predictor_32x8_c, aom_dc_top_predictor_32x8_c,
+                aom_dc_128_predictor_32x8_c, aom_v_predictor_32x8_c,
+                aom_h_predictor_32x8_c, aom_paeth_predictor_32x8_c,
+                aom_smooth_predictor_32x8_c, aom_smooth_v_predictor_32x8_c,
+                aom_smooth_h_predictor_32x8_c)
+
+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2, TX_32X32, aom_dc_predictor_32x32_sse2,
+                aom_dc_left_predictor_32x32_sse2,
+                aom_dc_top_predictor_32x32_sse2,
+                aom_dc_128_predictor_32x32_sse2, aom_v_predictor_32x32_sse2,
+                aom_h_predictor_32x32_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_32X16, aom_dc_predictor_32x16_sse2,
+                aom_dc_left_predictor_32x16_sse2,
+                aom_dc_top_predictor_32x16_sse2,
+                aom_dc_128_predictor_32x16_sse2, aom_v_predictor_32x16_sse2,
+                aom_h_predictor_32x16_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_32X64, aom_dc_predictor_32x64_sse2,
+                aom_dc_left_predictor_32x64_sse2,
+                aom_dc_top_predictor_32x64_sse2,
+                aom_dc_128_predictor_32x64_sse2, aom_v_predictor_32x64_sse2,
+                aom_h_predictor_32x64_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_32X8, aom_dc_predictor_32x8_sse2,
+                aom_dc_left_predictor_32x8_sse2, aom_dc_top_predictor_32x8_sse2,
+                aom_dc_128_predictor_32x8_sse2, aom_v_predictor_32x8_sse2,
+                aom_h_predictor_32x8_sse2, nullptr, nullptr, nullptr, nullptr)
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TX_32X32, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_32x32_ssse3,
+                aom_smooth_predictor_32x32_ssse3,
+                aom_smooth_v_predictor_32x32_ssse3,
+                aom_smooth_h_predictor_32x32_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_32X16, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_32x16_ssse3,
+                aom_smooth_predictor_32x16_ssse3,
+                aom_smooth_v_predictor_32x16_ssse3,
+                aom_smooth_h_predictor_32x16_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_32X64, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_32x64_ssse3,
+                aom_smooth_predictor_32x64_ssse3,
+                aom_smooth_v_predictor_32x64_ssse3,
+                aom_smooth_h_predictor_32x64_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_32X8, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_32x8_ssse3,
+                aom_smooth_predictor_32x8_ssse3,
+                aom_smooth_v_predictor_32x8_ssse3,
+                aom_smooth_h_predictor_32x8_ssse3)
+#endif  // HAVE_SSSE3
+
+#if HAVE_AVX2
+INTRA_PRED_TEST(AVX2, TX_32X32, aom_dc_predictor_32x32_avx2,
+                aom_dc_left_predictor_32x32_avx2,
+                aom_dc_top_predictor_32x32_avx2,
+                aom_dc_128_predictor_32x32_avx2, aom_v_predictor_32x32_avx2,
+                aom_h_predictor_32x32_avx2, aom_paeth_predictor_32x32_avx2,
+                nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(AVX2, TX_32X16, aom_dc_predictor_32x16_avx2,
+                aom_dc_left_predictor_32x16_avx2,
+                aom_dc_top_predictor_32x16_avx2,
+                aom_dc_128_predictor_32x16_avx2, aom_v_predictor_32x16_avx2,
+                nullptr, aom_paeth_predictor_32x16_avx2, nullptr, nullptr,
+                nullptr)
+INTRA_PRED_TEST(AVX2, TX_32X64, aom_dc_predictor_32x64_avx2,
+                aom_dc_left_predictor_32x64_avx2,
+                aom_dc_top_predictor_32x64_avx2,
+                aom_dc_128_predictor_32x64_avx2, aom_v_predictor_32x64_avx2,
+                nullptr, aom_paeth_predictor_32x64_avx2, nullptr, nullptr,
+                nullptr)
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TX_32X32, aom_dc_predictor_32x32_neon,
+                aom_dc_left_predictor_32x32_neon,
+                aom_dc_top_predictor_32x32_neon,
+                aom_dc_128_predictor_32x32_neon, aom_v_predictor_32x32_neon,
+                aom_h_predictor_32x32_neon, aom_paeth_predictor_32x32_neon,
+                aom_smooth_predictor_32x32_neon,
+                aom_smooth_v_predictor_32x32_neon,
+                aom_smooth_h_predictor_32x32_neon)
+INTRA_PRED_TEST(NEON, TX_32X16, aom_dc_predictor_32x16_neon,
+                aom_dc_left_predictor_32x16_neon,
+                aom_dc_top_predictor_32x16_neon,
+                aom_dc_128_predictor_32x16_neon, aom_v_predictor_32x16_neon,
+                aom_h_predictor_32x16_neon, aom_paeth_predictor_32x16_neon,
+                aom_smooth_predictor_32x16_neon,
+                aom_smooth_v_predictor_32x16_neon,
+                aom_smooth_h_predictor_32x16_neon)
+INTRA_PRED_TEST(NEON, TX_32X64, aom_dc_predictor_32x64_neon,
+                aom_dc_left_predictor_32x64_neon,
+                aom_dc_top_predictor_32x64_neon,
+                aom_dc_128_predictor_32x64_neon, aom_v_predictor_32x64_neon,
+                aom_h_predictor_32x64_neon, aom_paeth_predictor_32x64_neon,
+                aom_smooth_predictor_32x64_neon,
+                aom_smooth_v_predictor_32x64_neon,
+                aom_smooth_h_predictor_32x64_neon)
+INTRA_PRED_TEST(NEON, TX_32X8, aom_dc_predictor_32x8_neon,
+                aom_dc_left_predictor_32x8_neon, aom_dc_top_predictor_32x8_neon,
+                aom_dc_128_predictor_32x8_neon, aom_v_predictor_32x8_neon,
+                aom_h_predictor_32x8_neon, aom_paeth_predictor_32x8_neon,
+                aom_smooth_predictor_32x8_neon,
+                aom_smooth_v_predictor_32x8_neon,
+                aom_smooth_h_predictor_32x8_neon)
+#endif  // HAVE_NEON
+
+// -----------------------------------------------------------------------------
+// 64x64, 64x32, 64x16
+
+INTRA_PRED_TEST(C, TX_64X64, aom_dc_predictor_64x64_c,
+                aom_dc_left_predictor_64x64_c, aom_dc_top_predictor_64x64_c,
+                aom_dc_128_predictor_64x64_c, aom_v_predictor_64x64_c,
+                aom_h_predictor_64x64_c, aom_paeth_predictor_64x64_c,
+                aom_smooth_predictor_64x64_c, aom_smooth_v_predictor_64x64_c,
+                aom_smooth_h_predictor_64x64_c)
+INTRA_PRED_TEST(C, TX_64X32, aom_dc_predictor_64x32_c,
+                aom_dc_left_predictor_64x32_c, aom_dc_top_predictor_64x32_c,
+                aom_dc_128_predictor_64x32_c, aom_v_predictor_64x32_c,
+                aom_h_predictor_64x32_c, aom_paeth_predictor_64x32_c,
+                aom_smooth_predictor_64x32_c, aom_smooth_v_predictor_64x32_c,
+                aom_smooth_h_predictor_64x32_c)
+INTRA_PRED_TEST(C, TX_64X16, aom_dc_predictor_64x16_c,
+                aom_dc_left_predictor_64x16_c, aom_dc_top_predictor_64x16_c,
+                aom_dc_128_predictor_64x16_c, aom_v_predictor_64x16_c,
+                aom_h_predictor_64x16_c, aom_paeth_predictor_64x16_c,
+                aom_smooth_predictor_64x16_c, aom_smooth_v_predictor_64x16_c,
+                aom_smooth_h_predictor_64x16_c)
+
+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2, TX_64X64, aom_dc_predictor_64x64_sse2,
+                aom_dc_left_predictor_64x64_sse2,
+                aom_dc_top_predictor_64x64_sse2,
+                aom_dc_128_predictor_64x64_sse2, aom_v_predictor_64x64_sse2,
+                aom_h_predictor_64x64_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_64X32, aom_dc_predictor_64x32_sse2,
+                aom_dc_left_predictor_64x32_sse2,
+                aom_dc_top_predictor_64x32_sse2,
+                aom_dc_128_predictor_64x32_sse2, aom_v_predictor_64x32_sse2,
+                aom_h_predictor_64x32_sse2, nullptr, nullptr, nullptr, nullptr)
+INTRA_PRED_TEST(SSE2, TX_64X16, aom_dc_predictor_64x16_sse2,
+                aom_dc_left_predictor_64x16_sse2,
+                aom_dc_top_predictor_64x16_sse2,
+                aom_dc_128_predictor_64x16_sse2, aom_v_predictor_64x16_sse2,
+                aom_h_predictor_64x16_sse2, nullptr, nullptr, nullptr, nullptr)
+#endif
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TX_64X64, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_64x64_ssse3,
+                aom_smooth_predictor_64x64_ssse3,
+                aom_smooth_v_predictor_64x64_ssse3,
+                aom_smooth_h_predictor_64x64_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_64X32, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_64x32_ssse3,
+                aom_smooth_predictor_64x32_ssse3,
+                aom_smooth_v_predictor_64x32_ssse3,
+                aom_smooth_h_predictor_64x32_ssse3)
+INTRA_PRED_TEST(SSSE3, TX_64X16, nullptr, nullptr, nullptr, nullptr, nullptr,
+                nullptr, aom_paeth_predictor_64x16_ssse3,
+                aom_smooth_predictor_64x16_ssse3,
+                aom_smooth_v_predictor_64x16_ssse3,
+                aom_smooth_h_predictor_64x16_ssse3)
+#endif
+
+#if HAVE_AVX2
+INTRA_PRED_TEST(AVX2, TX_64X64, aom_dc_predictor_64x64_avx2,
+                aom_dc_left_predictor_64x64_avx2,
+                aom_dc_top_predictor_64x64_avx2,
+                aom_dc_128_predictor_64x64_avx2, aom_v_predictor_64x64_avx2,
+                nullptr, aom_paeth_predictor_64x64_avx2, nullptr, nullptr,
+                nullptr)
+INTRA_PRED_TEST(AVX2, TX_64X32, aom_dc_predictor_64x32_avx2,
+                aom_dc_left_predictor_64x32_avx2,
+                aom_dc_top_predictor_64x32_avx2,
+                aom_dc_128_predictor_64x32_avx2, aom_v_predictor_64x32_avx2,
+                nullptr, aom_paeth_predictor_64x32_avx2, nullptr, nullptr,
+                nullptr)
+INTRA_PRED_TEST(AVX2, TX_64X16, aom_dc_predictor_64x16_avx2,
+                aom_dc_left_predictor_64x16_avx2,
+                aom_dc_top_predictor_64x16_avx2,
+                aom_dc_128_predictor_64x16_avx2, aom_v_predictor_64x16_avx2,
+                nullptr, aom_paeth_predictor_64x16_avx2, nullptr, nullptr,
+                nullptr)
+#endif
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TX_64X64, aom_dc_predictor_64x64_neon,
+                aom_dc_left_predictor_64x64_neon,
+                aom_dc_top_predictor_64x64_neon,
+                aom_dc_128_predictor_64x64_neon, aom_v_predictor_64x64_neon,
+                aom_h_predictor_64x64_neon, aom_paeth_predictor_64x64_neon,
+                aom_smooth_predictor_64x64_neon,
+                aom_smooth_v_predictor_64x64_neon,
+                aom_smooth_h_predictor_64x64_neon)
+INTRA_PRED_TEST(NEON, TX_64X32, aom_dc_predictor_64x32_neon,
+                aom_dc_left_predictor_64x32_neon,
+                aom_dc_top_predictor_64x32_neon,
+                aom_dc_128_predictor_64x32_neon, aom_v_predictor_64x32_neon,
+                aom_h_predictor_64x32_neon, aom_paeth_predictor_64x32_neon,
+                aom_smooth_predictor_64x32_neon,
+                aom_smooth_v_predictor_64x32_neon,
+                aom_smooth_h_predictor_64x32_neon)
+INTRA_PRED_TEST(NEON, TX_64X16, aom_dc_predictor_64x16_neon,
+                aom_dc_left_predictor_64x16_neon,
+                aom_dc_top_predictor_64x16_neon,
+                aom_dc_128_predictor_64x16_neon, aom_v_predictor_64x16_neon,
+                aom_h_predictor_64x16_neon, aom_paeth_predictor_64x16_neon,
+                aom_smooth_predictor_64x16_neon,
+                aom_smooth_v_predictor_64x16_neon,
+                aom_smooth_h_predictor_64x16_neon)
+#endif  // HAVE_NEON
+
+#if CONFIG_AV1_HIGHBITDEPTH
+// -----------------------------------------------------------------------------
+// High Bitdepth
+namespace {
+
+typedef void (*AvxHighbdPredFunc)(uint16_t *dst, ptrdiff_t y_stride,
+                                  const uint16_t *above, const uint16_t *left,
+                                  int bd);
+
+typedef IntraPredTestMem<uint16_t> Av1HighbdIntraPredTestMem;
+
+void TestHighbdIntraPred(TX_SIZE tx_size, AvxHighbdPredFunc const *pred_funcs,
+                         const char *const signatures[]) {
+  const int block_width = tx_size_wide[tx_size];
+  const int block_height = tx_size_high[tx_size];
+  const int num_pixels_per_test =
+      block_width * block_height * kNumAv1IntraFuncs;
+  const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
+  Av1HighbdIntraPredTestMem intra_pred_test_mem;
+  const int bd = 12;
+  intra_pred_test_mem.Init(block_width, block_height, bd);
+
+  for (int k = 0; k < kNumAv1IntraFuncs; ++k) {
+    if (pred_funcs[k] == nullptr) continue;
+    memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src,
+           sizeof(intra_pred_test_mem.src));
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
+      pred_funcs[k](intra_pred_test_mem.src, intra_pred_test_mem.stride,
+                    intra_pred_test_mem.above, intra_pred_test_mem.left, bd);
+    }
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time =
+        static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
+    CheckMd5Signature(
+        tx_size, true, signatures, intra_pred_test_mem.src,
+        intra_pred_test_mem.num_pixels * sizeof(*intra_pred_test_mem.src),
+        elapsed_time, k);
+  }
+}
+
+static const char *const kHighbdSignatures[TX_SIZES_ALL][kNumAv1IntraFuncs] = {
+  {
+      // 4X4
+      "11f74af6c5737df472f3275cbde062fa",
+      "51bea056b6447c93f6eb8f6b7e8f6f71",
+      "27e97f946766331795886f4de04c5594",
+      "53ab15974b049111fb596c5168ec7e3f",
+      "f0b640bb176fbe4584cf3d32a9b0320a",
+      "729783ca909e03afd4b47111c80d967b",
+      "6e30009c45474a22032678b1bd579c8f",
+      "e57cba016d808aa8a35619df2a65f049",
+      "55a6c37f39afcbbf5abca4a985b96459",
+      "a623d45b37dafec1f8a75c4c5218913d",
+  },
+  {
+      // 8X8
+      "03da8829fe94663047fd108c5fcaa71d",
+      "ecdb37b8120a2d3a4c706b016bd1bfd7",
+      "1d4543ed8d2b9368cb96898095fe8a75",
+      "f791c9a67b913cbd82d9da8ecede30e2",
+      "065c70646f4dbaff913282f55a45a441",
+      "51f87123616662ef7c35691497dfd0ba",
+      "85c01ba03df68f9ece7bd3fa0f8980e6",
+      "ad19b7dac092f56df6d054e1f67f21e7",
+      "0edc415b5dd7299f7a34fb9f71d31d78",
+      "2bc8ec19e9f4b77a64b8a0a1f6aec7e7",
+  },
+  {
+      // 16X16
+      "e33cb3f56a878e2fddb1b2fc51cdd275",
+      "c7bff6f04b6052c8ab335d726dbbd52d",
+      "d0b0b47b654a9bcc5c6008110a44589b",
+      "78f5da7b10b2b9ab39f114a33b6254e9",
+      "c78e31d23831abb40d6271a318fdd6f3",
+      "90d1347f4ec9198a0320daecb6ff90b8",
+      "e63ded54ab3d0e8728b6f24d4f01e53f",
+      "35ce21fbe0ea114c089fc3489a78155d",
+      "f277f6ef8e4d717f1f0dfe2706ac197d",
+      "e8014d3f41256976c02e0f1e622ba2b9",
+  },
+  {
+      // 32X32
+      "a3e8056ba7e36628cce4917cd956fedd",
+      "cc7d3024fe8748b512407edee045377e",
+      "2aab0a0f330a1d3e19b8ecb8f06387a3",
+      "a547bc3fb7b06910bf3973122a426661",
+      "26f712514da95042f93d6e8dc8e431dc",
+      "bb08c6e16177081daa3d936538dbc2e3",
+      "84bf83f94a51b33654ca940c6f8bc057",
+      "7168b03fc31bf29596a344d6a35d007c",
+      "b073a70d3672f1282236994f5d12e94b",
+      "c51607aebad5dcb3c1e3b58ef9e5b84e",
+  },
+  {
+      // 64X64
+      "a6baa0d4bfb2269a94c7a38f86a4bccf",
+      "3f1ef5f473a49eba743f17a3324adf9d",
+      "12ac11889ae5f55b7781454efd706a6a",
+      "d9a906c0e692b22e1b4414e71a704b7e",
+      "47d4cadd56f70c11ff8f3e5d8df81161",
+      "de997744cf24c16c5ac2a36b02b351cc",
+      "23781211ae178ddeb6c4bb97a6bd7d83",
+      "a79d2e28340ca34b9e37daabbf030f63",
+      "0372bd3ddfc258750a6ac106b70587f4",
+      "228ef625d9460cbf6fa253a16a730976",
+  },
+  {
+      // 4X8
+      "22d519b796d59644043466320e4ccd14",
+      "09513a738c49b3f9542d27f34abbe1d5",
+      "807ae5e8813443ff01e71be6efacfb69",
+      "cbfa18d0293430b6e9708b0be1fd2394",
+      "346c354c34ec7fa780b576db355dab88",
+      "f97dae85c35359632380b09ca98d611e",
+      "698ae351d8896d89ed9e4e67b6e53eda",
+      "dcc197034a9c45a3d8238bf085835f4e",
+      "7a35e2c42ffdc2efc2d6d1d75a100fc7",
+      "41ab6cebd4516c87a91b2a593e2c2506",
+  },
+  {
+      // 8X4
+      "d58cd4c4bf3b7bbaa5db5e1a5622ec78",
+      "6e572c35aa782d00cafcb99e9ea047ea",
+      "e8c22a3702b416dc9ab974505afbed09",
+      "aaa4e4762a795aad7ad74de0c662c4e4",
+      "a19f9101967383c3dcbd516dc317a291",
+      "9ab8cb91f1a595b9ebe3fe8de58031aa",
+      "2cf9021d5f1169268699807ee118b65f",
+      "ee9605fcbd6fb871f1c5cd81a6989327",
+      "b4871af8316089e3e23522175df7e93f",
+      "d33301e1c2cb173be46792a22d19881a",
+  },
+  {
+      // 8X16
+      "4562de1d0336610880fdd5685498a9ec",
+      "16310fa7076394f16fc85c4b149d89c9",
+      "0e94af88e1dc573b6f0f499cddd1f530",
+      "dfd245ee20d091c67809160340365aa9",
+      "d3562504327f70c096c5be23fd8a3747",
+      "601b853558502acbb5135eadd2da117a",
+      "3c624345a723a1b2b1bea05a6a08bc99",
+      "2a9c781de609e0184cc7ab442050f4e5",
+      "0ddc5035c22252747126b61fc238c74d",
+      "e43f5d83bab759af69c7b6773fc8f9b2",
+  },
+  {
+      // 16X8
+      "a57d6b5a9bfd30c29591d8717ace9c51",
+      "f5907ba97ee6c53e339e953fc8d845ee",
+      "ea3aa727913ce45af06f89dd1808db5f",
+      "408af4f23e48d14b48ee35ae094fcd18",
+      "85c41cbcb5d744f7961e8950026fbffe",
+      "8a4e588a837638887ba671f8d4910485",
+      "b792d8826b67a21757ea7097cff9e05b",
+      "f94ce7101bb87fd3bb9312112527dbf4",
+      "688c6660a6dc6fa61fa1aa38e708c209",
+      "0cdf641b4f81d69509c92ae0b93ef5ff",
+  },
+  {
+      // 16X32
+      "aee4b3b0e3cc02d48e2c40d77f807927",
+      "8baef2b2e789f79c8df9d90ad10f34a4",
+      "038c38ee3c4f090bb8d736eab136aafc",
+      "1a3de2aaeaffd68a9fd6c7f6557b83f3",
+      "385c6e0ea29421dd81011a2934641e26",
+      "6cf96c285d1a2d4787f955dad715b08c",
+      "2d7f75dcd73b9528c8396279ff09ff3a",
+      "5a63cd1841e4ed470e4ca5ef845f2281",
+      "610d899ca945fbead33287d4335a8b32",
+      "6bafaad81fce37be46730187e78d8b11",
+  },
+  {
+      // 32X16
+      "290b23c9f5a1de7905bfa71a942da29b",
+      "701e7b82593c66da5052fc4b6afd79ce",
+      "4da828c5455cd246735a663fbb204989",
+      "e3fbeaf234efece8dbd752b77226200c",
+      "4d1d8c969f05155a7e7e84cf7aad021b",
+      "c22e4877c2c946d5bdc0d542e29e70cf",
+      "8ac1ce815e7780500f842b0beb0bb980",
+      "9fee2e2502b507f25bfad30a55b0b610",
+      "4ced9c212ec6f9956e27f68a91b59fef",
+      "4a7a0b93f138bb0863e4e465b01ec0b1",
+  },
+  {
+      // 32X64
+      "ad9cfc395a5c5644a21d958c7274ac14",
+      "f29d6d03c143ddf96fef04c19f2c8333",
+      "a8bdc852ef704dd4975c61893e8fbc3f",
+      "7d0bd7dea26226741dbca9a97f27fa74",
+      "45c27c5cca9a91b6ae8379feb0881c9f",
+      "8a0b78df1e001b85c874d686eac4aa1b",
+      "ce9fa75fac54a3f6c0cc3f2083b938f1",
+      "c0dca10d88762c954af18dc9e3791a39",
+      "61df229eddfccab913b8fda4bb02f9ac",
+      "4f4df6bc8d50a5600b573f0e44d70e66",
+  },
+  {
+      // 64X32
+      "db9d82921fd88b24fdff6f849f2f9c87",
+      "5ecc7fdc52d2f575ad4f2d0e9e6b1e11",
+      "b4581311a0a73d95dfac7f8f44591032",
+      "68bd283cfd1a125f6b2ee47cee874d36",
+      "804179f05c032908a5e36077bb87c994",
+      "fc5fd041a8ee779015394d0c066ee43c",
+      "68f5579ccadfe9a1baafb158334a3db2",
+      "fe237e45e215ab06d79046da9ad71e84",
+      "9a8a938a6824551bf7d21b8fd1d70ea1",
+      "eb7332f2017cd96882c76e7136aeaf53",
+  },
+  {
+      // 4X16
+      "7bafa307d507747b8132e7735b7f1c73",
+      "e58bc2d8213a97d1fea9cfb73d7a9633",
+      "435f8a8e8bbf14dbf2fe16b2be9e97aa",
+      "1d0e767b68d84acbfb50b7a04e633836",
+      "5f713bd7b324fe73bb7063e35ee14e5e",
+      "0dac4e1fa3d59814202715468c01ed56",
+      "47709d1db4a330c7a8900f450e6fddd1",
+      "258e0b930bb27db28f05da9cf7d1ee7c",
+      "36cf030fbae767912593efea045bfff5",
+      "248d7aceabb7499febae663fae41a920",
+  },
+  {
+      // 16X4
+      "04dde98e632670e393704742c89f9067",
+      "8c72543f1664651ae1fa08e2ac0adb9b",
+      "2354a2cdc2773aa2df8ab4010db1be39",
+      "6300ad3221c26da39b10e0e6d87ee3be",
+      "8ea30b661c6ba60b28d3167f19e449b8",
+      "fb6c1e4ff101a371cede63c2955cdb7e",
+      "a517c06433d6d7927b16a72184a23e92",
+      "393828be5d62ab6c48668bea5e2f801a",
+      "b1e510c542013eb9d6fb188dea2ce90a",
+      "569a8f2fe01679ca216535ecbcdccb62",
+  },
+  {
+      // 8X32
+      "9d541865c185ca7607852852613ac1fc",
+      "b96be67f08c6b5fa5ebd3411299c2f7c",
+      "75a2dcf50004b9d188849b048239767e",
+      "429492ff415c9fd9b050d73b2ad500f8",
+      "64b3606c1ccd036bd766bd5711392cf4",
+      "cb59844a0f01660ac955bae3511f1100",
+      "3e076155b7a70e8828618e3f33b51e3d",
+      "ed2d1f597ab7c50beff690f737cf9726",
+      "7909c6a26aaf20c59d996d3e5b5f9c29",
+      "965798807240c98c6f7cc9b457ed0773",
+  },
+  {
+      // 32X8
+      "36f391aa31619eec1f4d9ee95ea454cc",
+      "b82648f14eeba2527357cb50bc3223cb",
+      "7a7b2adf429125e8bee9d1d00a66e13f",
+      "4198e4d6ba503b7cc2d7e96bb845f661",
+      "96c160d2ec1be9fe0cdea9682f14d257",
+      "19a450bcebaa75afb4fc6bd1fd6434af",
+      "2bd2e35967d43d0ec1c6587a36f204d5",
+      "49799a99aa4ccfbd989bee92a99422f1",
+      "955530e99813812a74659edeac3f5475",
+      "f0316b84e378a19cd11b19a6e40b2914",
+  },
+  {
+      // 16X64
+      "8cba1b70a0bde29e8ef235cedc5faa7d",
+      "96d00ddc7537bf7f196006591b733b4e",
+      "cbf69d5d157c9f3355a4757b1d6e3414",
+      "3ac1f642019493dec1b737d7a3a1b4e5",
+      "35f9ee300d7fa3c97338e81a6f21dcd4",
+      "aae335442e77c8ebc280f16ea50ba9c7",
+      "a6140fdac2278644328be094d88731db",
+      "2df93621b6ff100f7008432d509f4161",
+      "c77bf5aee39e7ed4a3dd715f816f452a",
+      "02109bd63557d90225c32a8f1338258e",
+  },
+  {
+      // 64X16
+      "a5e2f9fb685d5f4a048e9a96affd25a4",
+      "1348f249690d9eefe09d9ad7ead2c801",
+      "525da4b187acd81b1ff1116b60461141",
+      "e99d072de858094c98b01bd4a6772634",
+      "873bfa9dc24693f19721f7c8d527f7d3",
+      "0acfc6507bd3468e9679efc127d6e4b9",
+      "57d03f8d079c7264854e22ac1157cfae",
+      "6c2c4036f70c7d957a9399b5436c0774",
+      "42b8e4a97b7f8416c72a5148c031c0b1",
+      "a38a2c5f79993dfae8530e9e25800893",
+  },
+};
+
+}  // namespace
+
+#define HIGHBD_INTRA_PRED_TEST(arch, tx_size, dc, dc_left, dc_top, dc_128, v, \
+                               h, paeth, smooth, smooth_v, smooth_h)          \
+  TEST(arch, DISABLED_##TestHighbdIntraPred_##tx_size) {                      \
+    static const AvxHighbdPredFunc aom_intra_pred[] = {                       \
+      dc, dc_left, dc_top, dc_128, v, h, paeth, smooth, smooth_v, smooth_h    \
+    };                                                                        \
+    TestHighbdIntraPred(tx_size, aom_intra_pred, kHighbdSignatures[tx_size]); \
+  }
+
+// -----------------------------------------------------------------------------
+// 4x4, 4x8, 4x16
+
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_4X4, aom_highbd_dc_predictor_4x4_c,
+    aom_highbd_dc_left_predictor_4x4_c, aom_highbd_dc_top_predictor_4x4_c,
+    aom_highbd_dc_128_predictor_4x4_c, aom_highbd_v_predictor_4x4_c,
+    aom_highbd_h_predictor_4x4_c, aom_highbd_paeth_predictor_4x4_c,
+    aom_highbd_smooth_predictor_4x4_c, aom_highbd_smooth_v_predictor_4x4_c,
+    aom_highbd_smooth_h_predictor_4x4_c)
+
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_4X8, aom_highbd_dc_predictor_4x8_c,
+    aom_highbd_dc_left_predictor_4x8_c, aom_highbd_dc_top_predictor_4x8_c,
+    aom_highbd_dc_128_predictor_4x8_c, aom_highbd_v_predictor_4x8_c,
+    aom_highbd_h_predictor_4x8_c, aom_highbd_paeth_predictor_4x8_c,
+    aom_highbd_smooth_predictor_4x8_c, aom_highbd_smooth_v_predictor_4x8_c,
+    aom_highbd_smooth_h_predictor_4x8_c)
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_4X16, aom_highbd_dc_predictor_4x16_c,
+    aom_highbd_dc_left_predictor_4x16_c, aom_highbd_dc_top_predictor_4x16_c,
+    aom_highbd_dc_128_predictor_4x16_c, aom_highbd_v_predictor_4x16_c,
+    aom_highbd_h_predictor_4x16_c, aom_highbd_paeth_predictor_4x16_c,
+    aom_highbd_smooth_predictor_4x16_c, aom_highbd_smooth_v_predictor_4x16_c,
+    aom_highbd_smooth_h_predictor_4x16_c)
+#if HAVE_SSE2
+HIGHBD_INTRA_PRED_TEST(SSE2, TX_4X4, aom_highbd_dc_predictor_4x4_sse2,
+                       aom_highbd_dc_left_predictor_4x4_sse2,
+                       aom_highbd_dc_top_predictor_4x4_sse2,
+                       aom_highbd_dc_128_predictor_4x4_sse2,
+                       aom_highbd_v_predictor_4x4_sse2,
+                       aom_highbd_h_predictor_4x4_sse2, nullptr, nullptr,
+                       nullptr, nullptr)
+
+HIGHBD_INTRA_PRED_TEST(SSE2, TX_4X8, aom_highbd_dc_predictor_4x8_sse2,
+                       aom_highbd_dc_left_predictor_4x8_sse2,
+                       aom_highbd_dc_top_predictor_4x8_sse2,
+                       aom_highbd_dc_128_predictor_4x8_sse2,
+                       aom_highbd_v_predictor_4x8_sse2,
+                       aom_highbd_h_predictor_4x8_sse2, nullptr, nullptr,
+                       nullptr, nullptr)
+#endif
+#if HAVE_NEON
+HIGHBD_INTRA_PRED_TEST(NEON, TX_4X4, aom_highbd_dc_predictor_4x4_neon,
+                       aom_highbd_dc_left_predictor_4x4_neon,
+                       aom_highbd_dc_top_predictor_4x4_neon,
+                       aom_highbd_dc_128_predictor_4x4_neon,
+                       aom_highbd_v_predictor_4x4_neon,
+                       aom_highbd_h_predictor_4x4_neon,
+                       aom_highbd_paeth_predictor_4x4_neon,
+                       aom_highbd_smooth_predictor_4x4_neon,
+                       aom_highbd_smooth_v_predictor_4x4_neon,
+                       aom_highbd_smooth_h_predictor_4x4_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_4X8, aom_highbd_dc_predictor_4x8_neon,
+                       aom_highbd_dc_left_predictor_4x8_neon,
+                       aom_highbd_dc_top_predictor_4x8_neon,
+                       aom_highbd_dc_128_predictor_4x8_neon,
+                       aom_highbd_v_predictor_4x8_neon,
+                       aom_highbd_h_predictor_4x8_neon,
+                       aom_highbd_paeth_predictor_4x8_neon,
+                       aom_highbd_smooth_predictor_4x8_neon,
+                       aom_highbd_smooth_v_predictor_4x8_neon,
+                       aom_highbd_smooth_h_predictor_4x8_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_4X16, aom_highbd_dc_predictor_4x16_neon,
+                       aom_highbd_dc_left_predictor_4x16_neon,
+                       aom_highbd_dc_top_predictor_4x16_neon,
+                       aom_highbd_dc_128_predictor_4x16_neon,
+                       aom_highbd_v_predictor_4x16_neon,
+                       aom_highbd_h_predictor_4x16_neon,
+                       aom_highbd_paeth_predictor_4x16_neon,
+                       aom_highbd_smooth_predictor_4x16_neon,
+                       aom_highbd_smooth_v_predictor_4x16_neon,
+                       aom_highbd_smooth_h_predictor_4x16_neon)
+#endif  // HAVE_NEON
+
+// -----------------------------------------------------------------------------
+// 8x8, 8x4, 8x16, 8x32
+
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_8X8, aom_highbd_dc_predictor_8x8_c,
+    aom_highbd_dc_left_predictor_8x8_c, aom_highbd_dc_top_predictor_8x8_c,
+    aom_highbd_dc_128_predictor_8x8_c, aom_highbd_v_predictor_8x8_c,
+    aom_highbd_h_predictor_8x8_c, aom_highbd_paeth_predictor_8x8_c,
+    aom_highbd_smooth_predictor_8x8_c, aom_highbd_smooth_v_predictor_8x8_c,
+    aom_highbd_smooth_h_predictor_8x8_c)
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_8X4, aom_highbd_dc_predictor_8x4_c,
+    aom_highbd_dc_left_predictor_8x4_c, aom_highbd_dc_top_predictor_8x4_c,
+    aom_highbd_dc_128_predictor_8x4_c, aom_highbd_v_predictor_8x4_c,
+    aom_highbd_h_predictor_8x4_c, aom_highbd_paeth_predictor_8x4_c,
+    aom_highbd_smooth_predictor_8x4_c, aom_highbd_smooth_v_predictor_8x4_c,
+    aom_highbd_smooth_h_predictor_8x4_c)
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_8X16, aom_highbd_dc_predictor_8x16_c,
+    aom_highbd_dc_left_predictor_8x16_c, aom_highbd_dc_top_predictor_8x16_c,
+    aom_highbd_dc_128_predictor_8x16_c, aom_highbd_v_predictor_8x16_c,
+    aom_highbd_h_predictor_8x16_c, aom_highbd_paeth_predictor_8x16_c,
+    aom_highbd_smooth_predictor_8x16_c, aom_highbd_smooth_v_predictor_8x16_c,
+    aom_highbd_smooth_h_predictor_8x16_c)
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_8X32, aom_highbd_dc_predictor_8x32_c,
+    aom_highbd_dc_left_predictor_8x32_c, aom_highbd_dc_top_predictor_8x32_c,
+    aom_highbd_dc_128_predictor_8x32_c, aom_highbd_v_predictor_8x32_c,
+    aom_highbd_h_predictor_8x32_c, aom_highbd_paeth_predictor_8x32_c,
+    aom_highbd_smooth_predictor_8x32_c, aom_highbd_smooth_v_predictor_8x32_c,
+    aom_highbd_smooth_h_predictor_8x32_c)
+
+#if HAVE_SSE2
+HIGHBD_INTRA_PRED_TEST(SSE2, TX_8X8, aom_highbd_dc_predictor_8x8_sse2,
+                       aom_highbd_dc_left_predictor_8x8_sse2,
+                       aom_highbd_dc_top_predictor_8x8_sse2,
+                       aom_highbd_dc_128_predictor_8x8_sse2,
+                       aom_highbd_v_predictor_8x8_sse2,
+                       aom_highbd_h_predictor_8x8_sse2, nullptr, nullptr,
+                       nullptr, nullptr)
+HIGHBD_INTRA_PRED_TEST(SSE2, TX_8X4, aom_highbd_dc_predictor_8x4_sse2,
+                       aom_highbd_dc_left_predictor_8x4_sse2,
+                       aom_highbd_dc_top_predictor_8x4_sse2,
+                       aom_highbd_dc_128_predictor_8x4_sse2,
+                       aom_highbd_v_predictor_8x4_sse2,
+                       aom_highbd_h_predictor_8x4_sse2, nullptr, nullptr,
+                       nullptr, nullptr)
+HIGHBD_INTRA_PRED_TEST(SSE2, TX_8X16, aom_highbd_dc_predictor_8x16_sse2,
+                       aom_highbd_dc_left_predictor_8x16_sse2,
+                       aom_highbd_dc_top_predictor_8x16_sse2,
+                       aom_highbd_dc_128_predictor_8x16_sse2,
+                       aom_highbd_v_predictor_8x16_sse2,
+                       aom_highbd_h_predictor_8x16_sse2, nullptr, nullptr,
+                       nullptr, nullptr)
+#endif
+
+#if HAVE_SSSE3
+HIGHBD_INTRA_PRED_TEST(SSSE3, TX_8X8, nullptr, nullptr, nullptr, nullptr,
+                       nullptr, nullptr, nullptr, nullptr, nullptr, nullptr)
+#endif
+
+#if HAVE_NEON
+HIGHBD_INTRA_PRED_TEST(NEON, TX_8X8, aom_highbd_dc_predictor_8x8_neon,
+                       aom_highbd_dc_left_predictor_8x8_neon,
+                       aom_highbd_dc_top_predictor_8x8_neon,
+                       aom_highbd_dc_128_predictor_8x8_neon,
+                       aom_highbd_v_predictor_8x8_neon,
+                       aom_highbd_h_predictor_8x8_neon,
+                       aom_highbd_paeth_predictor_8x8_neon,
+                       aom_highbd_smooth_predictor_8x8_neon,
+                       aom_highbd_smooth_v_predictor_8x8_neon,
+                       aom_highbd_smooth_h_predictor_8x8_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_8X4, aom_highbd_dc_predictor_8x4_neon,
+                       aom_highbd_dc_left_predictor_8x4_neon,
+                       aom_highbd_dc_top_predictor_8x4_neon,
+                       aom_highbd_dc_128_predictor_8x4_neon,
+                       aom_highbd_v_predictor_8x4_neon,
+                       aom_highbd_h_predictor_8x4_neon,
+                       aom_highbd_paeth_predictor_8x4_neon,
+                       aom_highbd_smooth_predictor_8x4_neon,
+                       aom_highbd_smooth_v_predictor_8x4_neon,
+                       aom_highbd_smooth_h_predictor_8x4_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_8X16, aom_highbd_dc_predictor_8x16_neon,
+                       aom_highbd_dc_left_predictor_8x16_neon,
+                       aom_highbd_dc_top_predictor_8x16_neon,
+                       aom_highbd_dc_128_predictor_8x16_neon,
+                       aom_highbd_v_predictor_8x16_neon,
+                       aom_highbd_h_predictor_8x16_neon,
+                       aom_highbd_paeth_predictor_8x16_neon,
+                       aom_highbd_smooth_predictor_8x16_neon,
+                       aom_highbd_smooth_v_predictor_8x16_neon,
+                       aom_highbd_smooth_h_predictor_8x16_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_8X32, aom_highbd_dc_predictor_8x32_neon,
+                       aom_highbd_dc_left_predictor_8x32_neon,
+                       aom_highbd_dc_top_predictor_8x32_neon,
+                       aom_highbd_dc_128_predictor_8x32_neon,
+                       aom_highbd_v_predictor_8x32_neon,
+                       aom_highbd_h_predictor_8x32_neon,
+                       aom_highbd_paeth_predictor_8x32_neon,
+                       aom_highbd_smooth_predictor_8x32_neon,
+                       aom_highbd_smooth_v_predictor_8x32_neon,
+                       aom_highbd_smooth_h_predictor_8x32_neon)
+#endif  // HAVE_NEON
+
+// -----------------------------------------------------------------------------
+// 16x16, 16x8, 16x32, 16x4, 16x64
+
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_16X16, aom_highbd_dc_predictor_16x16_c,
+    aom_highbd_dc_left_predictor_16x16_c, aom_highbd_dc_top_predictor_16x16_c,
+    aom_highbd_dc_128_predictor_16x16_c, aom_highbd_v_predictor_16x16_c,
+    aom_highbd_h_predictor_16x16_c, aom_highbd_paeth_predictor_16x16_c,
+    aom_highbd_smooth_predictor_16x16_c, aom_highbd_smooth_v_predictor_16x16_c,
+    aom_highbd_smooth_h_predictor_16x16_c)
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_16X8, aom_highbd_dc_predictor_16x8_c,
+    aom_highbd_dc_left_predictor_16x8_c, aom_highbd_dc_top_predictor_16x8_c,
+    aom_highbd_dc_128_predictor_16x8_c, aom_highbd_v_predictor_16x8_c,
+    aom_highbd_h_predictor_16x8_c, aom_highbd_paeth_predictor_16x8_c,
+    aom_highbd_smooth_predictor_16x8_c, aom_highbd_smooth_v_predictor_16x8_c,
+    aom_highbd_smooth_h_predictor_16x8_c)
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_16X32, aom_highbd_dc_predictor_16x32_c,
+    aom_highbd_dc_left_predictor_16x32_c, aom_highbd_dc_top_predictor_16x32_c,
+    aom_highbd_dc_128_predictor_16x32_c, aom_highbd_v_predictor_16x32_c,
+    aom_highbd_h_predictor_16x32_c, aom_highbd_paeth_predictor_16x32_c,
+    aom_highbd_smooth_predictor_16x32_c, aom_highbd_smooth_v_predictor_16x32_c,
+    aom_highbd_smooth_h_predictor_16x32_c)
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_16X4, aom_highbd_dc_predictor_16x4_c,
+    aom_highbd_dc_left_predictor_16x4_c, aom_highbd_dc_top_predictor_16x4_c,
+    aom_highbd_dc_128_predictor_16x4_c, aom_highbd_v_predictor_16x4_c,
+    aom_highbd_h_predictor_16x4_c, aom_highbd_paeth_predictor_16x4_c,
+    aom_highbd_smooth_predictor_16x4_c, aom_highbd_smooth_v_predictor_16x4_c,
+    aom_highbd_smooth_h_predictor_16x4_c)
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_16X64, aom_highbd_dc_predictor_16x64_c,
+    aom_highbd_dc_left_predictor_16x64_c, aom_highbd_dc_top_predictor_16x64_c,
+    aom_highbd_dc_128_predictor_16x64_c, aom_highbd_v_predictor_16x64_c,
+    aom_highbd_h_predictor_16x64_c, aom_highbd_paeth_predictor_16x64_c,
+    aom_highbd_smooth_predictor_16x64_c, aom_highbd_smooth_v_predictor_16x64_c,
+    aom_highbd_smooth_h_predictor_16x64_c)
+
+#if HAVE_SSE2
+HIGHBD_INTRA_PRED_TEST(SSE2, TX_16X16, aom_highbd_dc_predictor_16x16_sse2,
+                       aom_highbd_dc_left_predictor_16x16_sse2,
+                       aom_highbd_dc_top_predictor_16x16_sse2,
+                       aom_highbd_dc_128_predictor_16x16_sse2,
+                       aom_highbd_v_predictor_16x16_sse2,
+                       aom_highbd_h_predictor_16x16_sse2, nullptr, nullptr,
+                       nullptr, nullptr)
+HIGHBD_INTRA_PRED_TEST(SSE2, TX_16X8, aom_highbd_dc_predictor_16x8_sse2,
+                       aom_highbd_dc_left_predictor_16x8_sse2,
+                       aom_highbd_dc_top_predictor_16x8_sse2,
+                       aom_highbd_dc_128_predictor_16x8_sse2,
+                       aom_highbd_v_predictor_16x8_sse2,
+                       aom_highbd_h_predictor_16x8_sse2, nullptr, nullptr,
+                       nullptr, nullptr)
+HIGHBD_INTRA_PRED_TEST(SSE2, TX_16X32, aom_highbd_dc_predictor_16x32_sse2,
+                       aom_highbd_dc_left_predictor_16x32_sse2,
+                       aom_highbd_dc_top_predictor_16x32_sse2,
+                       aom_highbd_dc_128_predictor_16x32_sse2,
+                       aom_highbd_v_predictor_16x32_sse2,
+                       aom_highbd_h_predictor_16x32_sse2, nullptr, nullptr,
+                       nullptr, nullptr)
+#endif
+
+#if HAVE_SSSE3
+HIGHBD_INTRA_PRED_TEST(SSSE3, TX_16X16, nullptr, nullptr, nullptr, nullptr,
+                       nullptr, nullptr, nullptr, nullptr, nullptr, nullptr)
+#endif
+
+#if HAVE_AVX2
+HIGHBD_INTRA_PRED_TEST(AVX2, TX_16X16, nullptr, nullptr, nullptr, nullptr,
+                       nullptr, nullptr, nullptr, nullptr, nullptr, nullptr)
+
+HIGHBD_INTRA_PRED_TEST(AVX2, TX_16X8, nullptr, nullptr, nullptr, nullptr,
+                       nullptr, nullptr, nullptr, nullptr, nullptr, nullptr)
+
+HIGHBD_INTRA_PRED_TEST(AVX2, TX_16X32, nullptr, nullptr, nullptr, nullptr,
+                       nullptr, nullptr, nullptr, nullptr, nullptr, nullptr)
+#endif
+
+#if HAVE_NEON
+HIGHBD_INTRA_PRED_TEST(NEON, TX_16X16, aom_highbd_dc_predictor_16x16_neon,
+                       aom_highbd_dc_left_predictor_16x16_neon,
+                       aom_highbd_dc_top_predictor_16x16_neon,
+                       aom_highbd_dc_128_predictor_16x16_neon,
+                       aom_highbd_v_predictor_16x16_neon,
+                       aom_highbd_h_predictor_16x16_neon,
+                       aom_highbd_paeth_predictor_16x16_neon,
+                       aom_highbd_smooth_predictor_16x16_neon,
+                       aom_highbd_smooth_v_predictor_16x16_neon,
+                       aom_highbd_smooth_h_predictor_16x16_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_16X8, aom_highbd_dc_predictor_16x8_neon,
+                       aom_highbd_dc_left_predictor_16x8_neon,
+                       aom_highbd_dc_top_predictor_16x8_neon,
+                       aom_highbd_dc_128_predictor_16x8_neon,
+                       aom_highbd_v_predictor_16x8_neon,
+                       aom_highbd_h_predictor_16x8_neon,
+                       aom_highbd_paeth_predictor_16x8_neon,
+                       aom_highbd_smooth_predictor_16x8_neon,
+                       aom_highbd_smooth_v_predictor_16x8_neon,
+                       aom_highbd_smooth_h_predictor_16x8_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_16X32, aom_highbd_dc_predictor_16x32_neon,
+                       aom_highbd_dc_left_predictor_16x32_neon,
+                       aom_highbd_dc_top_predictor_16x32_neon,
+                       aom_highbd_dc_128_predictor_16x32_neon,
+                       aom_highbd_v_predictor_16x32_neon,
+                       aom_highbd_h_predictor_16x32_neon,
+                       aom_highbd_paeth_predictor_16x32_neon,
+                       aom_highbd_smooth_predictor_16x32_neon,
+                       aom_highbd_smooth_v_predictor_16x32_neon,
+                       aom_highbd_smooth_h_predictor_16x32_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_16X4, aom_highbd_dc_predictor_16x4_neon,
+                       aom_highbd_dc_left_predictor_16x4_neon,
+                       aom_highbd_dc_top_predictor_16x4_neon,
+                       aom_highbd_dc_128_predictor_16x4_neon,
+                       aom_highbd_v_predictor_16x4_neon,
+                       aom_highbd_h_predictor_16x4_neon,
+                       aom_highbd_paeth_predictor_16x4_neon,
+                       aom_highbd_smooth_predictor_16x4_neon,
+                       aom_highbd_smooth_v_predictor_16x4_neon,
+                       aom_highbd_smooth_h_predictor_16x4_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_16X64, aom_highbd_dc_predictor_16x64_neon,
+                       aom_highbd_dc_left_predictor_16x64_neon,
+                       aom_highbd_dc_top_predictor_16x64_neon,
+                       aom_highbd_dc_128_predictor_16x64_neon,
+                       aom_highbd_v_predictor_16x64_neon,
+                       aom_highbd_h_predictor_16x64_neon,
+                       aom_highbd_paeth_predictor_16x64_neon,
+                       aom_highbd_smooth_predictor_16x64_neon,
+                       aom_highbd_smooth_v_predictor_16x64_neon,
+                       aom_highbd_smooth_h_predictor_16x64_neon)
+#endif  // HAVE_NEON
+
+// -----------------------------------------------------------------------------
+// 32x32, 32x16, 32x64, 32x8
+
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_32X32, aom_highbd_dc_predictor_32x32_c,
+    aom_highbd_dc_left_predictor_32x32_c, aom_highbd_dc_top_predictor_32x32_c,
+    aom_highbd_dc_128_predictor_32x32_c, aom_highbd_v_predictor_32x32_c,
+    aom_highbd_h_predictor_32x32_c, aom_highbd_paeth_predictor_32x32_c,
+    aom_highbd_smooth_predictor_32x32_c, aom_highbd_smooth_v_predictor_32x32_c,
+    aom_highbd_smooth_h_predictor_32x32_c)
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_32X16, aom_highbd_dc_predictor_32x16_c,
+    aom_highbd_dc_left_predictor_32x16_c, aom_highbd_dc_top_predictor_32x16_c,
+    aom_highbd_dc_128_predictor_32x16_c, aom_highbd_v_predictor_32x16_c,
+    aom_highbd_h_predictor_32x16_c, aom_highbd_paeth_predictor_32x16_c,
+    aom_highbd_smooth_predictor_32x16_c, aom_highbd_smooth_v_predictor_32x16_c,
+    aom_highbd_smooth_h_predictor_32x16_c)
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_32X64, aom_highbd_dc_predictor_32x64_c,
+    aom_highbd_dc_left_predictor_32x64_c, aom_highbd_dc_top_predictor_32x64_c,
+    aom_highbd_dc_128_predictor_32x64_c, aom_highbd_v_predictor_32x64_c,
+    aom_highbd_h_predictor_32x64_c, aom_highbd_paeth_predictor_32x64_c,
+    aom_highbd_smooth_predictor_32x64_c, aom_highbd_smooth_v_predictor_32x64_c,
+    aom_highbd_smooth_h_predictor_32x64_c)
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_32X8, aom_highbd_dc_predictor_32x8_c,
+    aom_highbd_dc_left_predictor_32x8_c, aom_highbd_dc_top_predictor_32x8_c,
+    aom_highbd_dc_128_predictor_32x8_c, aom_highbd_v_predictor_32x8_c,
+    aom_highbd_h_predictor_32x8_c, aom_highbd_paeth_predictor_32x8_c,
+    aom_highbd_smooth_predictor_32x8_c, aom_highbd_smooth_v_predictor_32x8_c,
+    aom_highbd_smooth_h_predictor_32x8_c)
+
+#if HAVE_SSE2
+HIGHBD_INTRA_PRED_TEST(SSE2, TX_32X32, aom_highbd_dc_predictor_32x32_sse2,
+                       aom_highbd_dc_left_predictor_32x32_sse2,
+                       aom_highbd_dc_top_predictor_32x32_sse2,
+                       aom_highbd_dc_128_predictor_32x32_sse2,
+                       aom_highbd_v_predictor_32x32_sse2,
+                       aom_highbd_h_predictor_32x32_sse2, nullptr, nullptr,
+                       nullptr, nullptr)
+HIGHBD_INTRA_PRED_TEST(SSE2, TX_32X16, aom_highbd_dc_predictor_32x16_sse2,
+                       aom_highbd_dc_left_predictor_32x16_sse2,
+                       aom_highbd_dc_top_predictor_32x16_sse2,
+                       aom_highbd_dc_128_predictor_32x16_sse2,
+                       aom_highbd_v_predictor_32x16_sse2,
+                       aom_highbd_h_predictor_32x16_sse2, nullptr, nullptr,
+                       nullptr, nullptr)
+#endif
+
+#if HAVE_SSSE3
+HIGHBD_INTRA_PRED_TEST(SSSE3, TX_32X32, nullptr, nullptr, nullptr, nullptr,
+                       nullptr, nullptr, nullptr, nullptr, nullptr, nullptr)
+#endif
+
+#if HAVE_AVX2
+HIGHBD_INTRA_PRED_TEST(AVX2, TX_32X32, nullptr, nullptr, nullptr, nullptr,
+                       nullptr, nullptr, nullptr, nullptr, nullptr, nullptr)
+
+HIGHBD_INTRA_PRED_TEST(AVX2, TX_32X16, nullptr, nullptr, nullptr, nullptr,
+                       nullptr, nullptr, nullptr, nullptr, nullptr, nullptr)
+#endif
+
+#if HAVE_NEON
+HIGHBD_INTRA_PRED_TEST(NEON, TX_32X32, aom_highbd_dc_predictor_32x32_neon,
+                       aom_highbd_dc_left_predictor_32x32_neon,
+                       aom_highbd_dc_top_predictor_32x32_neon,
+                       aom_highbd_dc_128_predictor_32x32_neon,
+                       aom_highbd_v_predictor_32x32_neon,
+                       aom_highbd_h_predictor_32x32_neon,
+                       aom_highbd_paeth_predictor_32x32_neon,
+                       aom_highbd_smooth_predictor_32x32_neon,
+                       aom_highbd_smooth_v_predictor_32x32_neon,
+                       aom_highbd_smooth_h_predictor_32x32_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_32X16, aom_highbd_dc_predictor_32x16_neon,
+                       aom_highbd_dc_left_predictor_32x16_neon,
+                       aom_highbd_dc_top_predictor_32x16_neon,
+                       aom_highbd_dc_128_predictor_32x16_neon,
+                       aom_highbd_v_predictor_32x16_neon,
+                       aom_highbd_h_predictor_32x16_neon,
+                       aom_highbd_paeth_predictor_32x16_neon,
+                       aom_highbd_smooth_predictor_32x16_neon,
+                       aom_highbd_smooth_v_predictor_32x16_neon,
+                       aom_highbd_smooth_h_predictor_32x16_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_32X64, aom_highbd_dc_predictor_32x64_neon,
+                       aom_highbd_dc_left_predictor_32x64_neon,
+                       aom_highbd_dc_top_predictor_32x64_neon,
+                       aom_highbd_dc_128_predictor_32x64_neon,
+                       aom_highbd_v_predictor_32x64_neon,
+                       aom_highbd_h_predictor_32x64_neon,
+                       aom_highbd_paeth_predictor_32x64_neon,
+                       aom_highbd_smooth_predictor_32x64_neon,
+                       aom_highbd_smooth_v_predictor_32x64_neon,
+                       aom_highbd_smooth_h_predictor_32x64_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_32X8, aom_highbd_dc_predictor_32x8_neon,
+                       aom_highbd_dc_left_predictor_32x8_neon,
+                       aom_highbd_dc_top_predictor_32x8_neon,
+                       aom_highbd_dc_128_predictor_32x8_neon,
+                       aom_highbd_v_predictor_32x8_neon,
+                       aom_highbd_h_predictor_32x8_neon,
+                       aom_highbd_paeth_predictor_32x8_neon,
+                       aom_highbd_smooth_predictor_32x8_neon,
+                       aom_highbd_smooth_v_predictor_32x8_neon,
+                       aom_highbd_smooth_h_predictor_32x8_neon)
+#endif  // HAVE_NEON
+
+// -----------------------------------------------------------------------------
+// 64x64, 64x32, 64x16
+
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_64X64, aom_highbd_dc_predictor_64x64_c,
+    aom_highbd_dc_left_predictor_64x64_c, aom_highbd_dc_top_predictor_64x64_c,
+    aom_highbd_dc_128_predictor_64x64_c, aom_highbd_v_predictor_64x64_c,
+    aom_highbd_h_predictor_64x64_c, aom_highbd_paeth_predictor_64x64_c,
+    aom_highbd_smooth_predictor_64x64_c, aom_highbd_smooth_v_predictor_64x64_c,
+    aom_highbd_smooth_h_predictor_64x64_c)
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_64X32, aom_highbd_dc_predictor_64x32_c,
+    aom_highbd_dc_left_predictor_64x32_c, aom_highbd_dc_top_predictor_64x32_c,
+    aom_highbd_dc_128_predictor_64x32_c, aom_highbd_v_predictor_64x32_c,
+    aom_highbd_h_predictor_64x32_c, aom_highbd_paeth_predictor_64x32_c,
+    aom_highbd_smooth_predictor_64x32_c, aom_highbd_smooth_v_predictor_64x32_c,
+    aom_highbd_smooth_h_predictor_64x32_c)
+HIGHBD_INTRA_PRED_TEST(
+    C, TX_64X16, aom_highbd_dc_predictor_64x16_c,
+    aom_highbd_dc_left_predictor_64x16_c, aom_highbd_dc_top_predictor_64x16_c,
+    aom_highbd_dc_128_predictor_64x16_c, aom_highbd_v_predictor_64x16_c,
+    aom_highbd_h_predictor_64x16_c, aom_highbd_paeth_predictor_64x16_c,
+    aom_highbd_smooth_predictor_64x16_c, aom_highbd_smooth_v_predictor_64x16_c,
+    aom_highbd_smooth_h_predictor_64x16_c)
+
+#if HAVE_NEON
+HIGHBD_INTRA_PRED_TEST(NEON, TX_64X64, aom_highbd_dc_predictor_64x64_neon,
+                       aom_highbd_dc_left_predictor_64x64_neon,
+                       aom_highbd_dc_top_predictor_64x64_neon,
+                       aom_highbd_dc_128_predictor_64x64_neon,
+                       aom_highbd_v_predictor_64x64_neon,
+                       aom_highbd_h_predictor_64x64_neon,
+                       aom_highbd_paeth_predictor_64x64_neon,
+                       aom_highbd_smooth_predictor_64x64_neon,
+                       aom_highbd_smooth_v_predictor_64x64_neon,
+                       aom_highbd_smooth_h_predictor_64x64_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_64X32, aom_highbd_dc_predictor_64x32_neon,
+                       aom_highbd_dc_left_predictor_64x32_neon,
+                       aom_highbd_dc_top_predictor_64x32_neon,
+                       aom_highbd_dc_128_predictor_64x32_neon,
+                       aom_highbd_v_predictor_64x32_neon,
+                       aom_highbd_h_predictor_64x32_neon,
+                       aom_highbd_paeth_predictor_64x32_neon,
+                       aom_highbd_smooth_predictor_64x32_neon,
+                       aom_highbd_smooth_v_predictor_64x32_neon,
+                       aom_highbd_smooth_h_predictor_64x32_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TX_64X16, aom_highbd_dc_predictor_64x16_neon,
+                       aom_highbd_dc_left_predictor_64x16_neon,
+                       aom_highbd_dc_top_predictor_64x16_neon,
+                       aom_highbd_dc_128_predictor_64x16_neon,
+                       aom_highbd_v_predictor_64x16_neon,
+                       aom_highbd_h_predictor_64x16_neon,
+                       aom_highbd_paeth_predictor_64x16_neon,
+                       aom_highbd_smooth_predictor_64x16_neon,
+                       aom_highbd_smooth_v_predictor_64x16_neon,
+                       aom_highbd_smooth_h_predictor_64x16_neon)
+#endif  // HAVE_NEON
+
+// -----------------------------------------------------------------------------
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+#include "test/test_libaom.cc"
diff --git a/third_party/aom/test/test_libaom.cc b/third_party/aom/test/test_libaom.cc
new file mode 100644
index 0000000000..fbd7f2e380
--- /dev/null
+++ b/third_party/aom/test/test_libaom.cc
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+
+#if !CONFIG_SHARED
+#include <string.h>
+
+#include <string>
+
+#if AOM_ARCH_ARM
+#include "aom_ports/arm.h"
+#endif
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
+#include "aom_ports/x86.h"
+#endif
+
+extern "C" {
+extern void av1_rtcd();
+extern void aom_dsp_rtcd();
+extern void aom_scale_rtcd();
+}
+
+#if AOM_ARCH_ARM || AOM_ARCH_X86 || AOM_ARCH_X86_64
+static void append_negative_gtest_filter(const char *str) {
+  std::string flag_value = GTEST_FLAG_GET(filter);
+  // Negative patterns begin with one '-' followed by a ':' separated list.
+  if (flag_value.find('-') == std::string::npos) flag_value += '-';
+  // OPT.* matches TEST() functions
+  // OPT/* matches TEST_P() functions
+  // OPT_* matches tests which have been manually sharded.
+  // We do not match OPT* because of SSE/SSE2 collisions.
+  const char *search_terminators = "./_";
+  for (size_t pos = 0; pos < strlen(search_terminators); ++pos) {
+    flag_value += ":";
+    flag_value += str;
+    flag_value += search_terminators[pos];
+    flag_value += "*";
+  }
+  GTEST_FLAG_SET(filter, flag_value);
+}
+#endif  // AOM_ARCH_ARM || AOM_ARCH_X86 || AOM_ARCH_X86_64
+#endif  // !CONFIG_SHARED
+
+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+
+#if !CONFIG_SHARED
+#if AOM_ARCH_AARCH64
+  const int caps = aom_arm_cpu_caps();
+  if (!(caps & HAS_ARM_CRC32)) append_negative_gtest_filter("ARM_CRC32");
+  if (!(caps & HAS_NEON_DOTPROD)) append_negative_gtest_filter("NEON_DOTPROD");
+  if (!(caps & HAS_NEON_I8MM)) append_negative_gtest_filter("NEON_I8MM");
+  if (!(caps & HAS_SVE)) append_negative_gtest_filter("SVE");
+#elif AOM_ARCH_ARM
+  const int caps = aom_arm_cpu_caps();
+  if (!(caps & HAS_NEON)) append_negative_gtest_filter("NEON");
+#endif  // AOM_ARCH_ARM
+
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
+  const int simd_caps = x86_simd_caps();
+  if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter("MMX");
+  if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter("SSE");
+  if (!(simd_caps & HAS_SSE2)) append_negative_gtest_filter("SSE2");
+  if (!(simd_caps & HAS_SSE3)) append_negative_gtest_filter("SSE3");
+  if (!(simd_caps & HAS_SSSE3)) append_negative_gtest_filter("SSSE3");
+  if (!(simd_caps & HAS_SSE4_1)) append_negative_gtest_filter("SSE4_1");
+  if (!(simd_caps & HAS_SSE4_2)) append_negative_gtest_filter("SSE4_2");
+  if (!(simd_caps & HAS_AVX)) append_negative_gtest_filter("AVX");
+  if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter("AVX2");
+#endif  // AOM_ARCH_X86 || AOM_ARCH_X86_64
+
+  // Shared library builds don't support whitebox tests that exercise internal
+  // symbols.
+  av1_rtcd();
+  aom_dsp_rtcd();
+  aom_scale_rtcd();
+#endif  // !CONFIG_SHARED
+
+  return RUN_ALL_TESTS();
+}
diff --git a/third_party/aom/test/test_runner.cmake b/third_party/aom/test/test_runner.cmake
new file mode 100644
index 0000000000..f0648d16be
--- /dev/null
+++ b/third_party/aom/test/test_runner.cmake
@@ -0,0 +1,28 @@
+#
+# Copyright (c) 2017, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and the
+# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
+# not distributed with this source code in the LICENSE file, you can obtain it
+# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
+# License 1.0 was not distributed with this source code in the PATENTS file, you
+# can obtain it at www.aomedia.org/license/patent.
+#
+if(NOT GTEST_TOTAL_SHARDS
+   OR "${GTEST_SHARD_INDEX}" STREQUAL ""
+   OR NOT TEST_LIBAOM)
+  message(
+    FATAL_ERROR
+      "The variables GTEST_SHARD_INDEX, GTEST_TOTAL_SHARDS and TEST_LIBAOM
+          must be defined.")
+endif()
+
+set($ENV{GTEST_SHARD_INDEX} ${GTEST_SHARD_INDEX})
+set($ENV{GTEST_TOTAL_SHARDS} ${GTEST_TOTAL_SHARDS})
+execute_process(COMMAND ${TEST_LIBAOM} RESULT_VARIABLE test_result)
+set(test_message "Test shard ${GTEST_SHARD_INDEX}/${GTEST_TOTAL_SHARDS} result")
+message("${test_message}: ${test_result}")
+
+if(NOT "${test_result}" STREQUAL "0")
+  message(FATAL_ERROR "${test_message}: FAILED, non-zero exit code.")
+endif()
diff --git a/third_party/aom/test/test_vector_test.cc b/third_party/aom/test/test_vector_test.cc
new file mode 100644
index 0000000000..39414e32e4
--- /dev/null
+++ b/third_party/aom/test/test_vector_test.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <memory>
+#include <set>
+#include <string>
+#include <tuple>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "common/tools_common.h"
+#include "config/aom_config.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/ivf_video_source.h"
+#include "test/md5_helper.h"
+#include "test/test_vectors.h"
+#include "test/util.h"
+#if CONFIG_WEBM_IO
+#include "test/webm_video_source.h"
+#endif
+
+namespace {
+
+const int kThreads = 0;
+const int kFileName = 1;
+const int kRowMT = 2;
+
+typedef std::tuple<int, const char *, int> DecodeParam;
+
+class TestVectorTest : public ::libaom_test::DecoderTest,
+                       public ::libaom_test::CodecTestWithParam<DecodeParam> {
+ protected:
+  TestVectorTest() : DecoderTest(GET_PARAM(0)), md5_file_(nullptr) {}
+
+  ~TestVectorTest() override {
+    if (md5_file_) fclose(md5_file_);
+  }
+
+  void OpenMD5File(const std::string &md5_file_name_) {
+    md5_file_ = libaom_test::OpenTestDataFile(md5_file_name_);
+    ASSERT_NE(md5_file_, nullptr)
+        << "Md5 file open failed. Filename: " << md5_file_name_;
+  }
+
+  void PreDecodeFrameHook(const libaom_test::CompressedVideoSource &video,
+                          libaom_test::Decoder *decoder) override {
+    if (video.frame_number() == 0) decoder->Control(AV1D_SET_ROW_MT, row_mt_);
+  }
+
+  void DecompressedFrameHook(const aom_image_t &img,
+                             const unsigned int frame_number) override {
+    ASSERT_NE(md5_file_, nullptr);
+    char expected_md5[33];
+    char junk[128];
+
+    // Read correct md5 checksums.
+    const int res = fscanf(md5_file_, "%s  %s", expected_md5, junk);
+    ASSERT_NE(res, EOF) << "Read md5 data failed";
+    expected_md5[32] = '\0';
+
+    ::libaom_test::MD5 md5_res;
+#if FORCE_HIGHBITDEPTH_DECODING
+    const aom_img_fmt_t shifted_fmt =
+        (aom_img_fmt)(img.fmt & ~AOM_IMG_FMT_HIGHBITDEPTH);
+    if (img.bit_depth == 8 && shifted_fmt != img.fmt) {
+      aom_image_t *img_shifted =
+          aom_img_alloc(nullptr, shifted_fmt, img.d_w, img.d_h, 16);
+      img_shifted->bit_depth = img.bit_depth;
+      img_shifted->monochrome = img.monochrome;
+      aom_img_downshift(img_shifted, &img, 0);
+      md5_res.Add(img_shifted);
+      aom_img_free(img_shifted);
+    } else {
+#endif
+      md5_res.Add(&img);
+#if FORCE_HIGHBITDEPTH_DECODING
+    }
+#endif
+
+    const char *actual_md5 = md5_res.Get();
+    // Check md5 match.
+    ASSERT_STREQ(expected_md5, actual_md5)
+        << "Md5 checksums don't match: frame number = " << frame_number;
+  }
+
+  unsigned int row_mt_;
+
+ private:
+  FILE *md5_file_;
+};
+
+// This test runs through the whole set of test vectors, and decodes them.
+// The md5 checksums are computed for each frame in the video file. If md5
+// checksums match the correct md5 data, then the test is passed. Otherwise,
+// the test failed.
+TEST_P(TestVectorTest, MD5Match) {
+  const DecodeParam input = GET_PARAM(1);
+  const std::string filename = std::get<kFileName>(input);
+  aom_codec_flags_t flags = 0;
+  aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+  char str[256];
+
+  cfg.threads = std::get<kThreads>(input);
+  row_mt_ = std::get<kRowMT>(input);
+
+  snprintf(str, sizeof(str) / sizeof(str[0]) - 1, "file: %s threads: %d",
+           filename.c_str(), cfg.threads);
+  SCOPED_TRACE(str);
+
+  // Open compressed video file.
+  std::unique_ptr<libaom_test::CompressedVideoSource> video;
+  if (filename.substr(filename.length() - 3, 3) == "ivf") {
+    video.reset(new libaom_test::IVFVideoSource(filename));
+  } else if (filename.substr(filename.length() - 4, 4) == "webm" ||
+             filename.substr(filename.length() - 3, 3) == "mkv") {
+#if CONFIG_WEBM_IO
+    video.reset(new libaom_test::WebMVideoSource(filename));
+#else
+    fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n",
+            filename.c_str());
+    return;
+#endif
+  }
+  ASSERT_NE(video, nullptr);
+  video->Init();
+
+  // Construct md5 file name.
+  const std::string md5_filename = filename + ".md5";
+  OpenMD5File(md5_filename);
+
+  // Set decode config and flags.
+  cfg.allow_lowbitdepth = !FORCE_HIGHBITDEPTH_DECODING;
+  set_cfg(cfg);
+  set_flags(flags);
+
+  // Decode frame, and check the md5 matching.
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get(), cfg));
+}
+
+#if CONFIG_AV1_DECODER
+AV1_INSTANTIATE_TEST_SUITE(
+    TestVectorTest,
+    ::testing::Combine(::testing::Values(1),  // Single thread.
+                       ::testing::ValuesIn(libaom_test::kAV1TestVectors,
+                                           libaom_test::kAV1TestVectors +
+                                               libaom_test::kNumAV1TestVectors),
+                       ::testing::Values(0)));
+
+// Test AV1 decode in with different numbers of threads.
+INSTANTIATE_TEST_SUITE_P(
+    AV1MultiThreaded, TestVectorTest,
+    ::testing::Combine(
+        ::testing::Values(
+            static_cast<const libaom_test::CodecFactory *>(&libaom_test::kAV1)),
+        ::testing::Combine(
+            ::testing::Range(2, 9),  // With 2 ~ 8 threads.
+            ::testing::ValuesIn(libaom_test::kAV1TestVectors,
+                                libaom_test::kAV1TestVectors +
+                                    libaom_test::kNumAV1TestVectors),
+            ::testing::Range(0, 2))));
+
+#endif  // CONFIG_AV1_DECODER
+
+}  // namespace
diff --git a/third_party/aom/test/test_vectors.cc b/third_party/aom/test/test_vectors.cc
new file mode 100644
index 0000000000..09736d1ed8
--- /dev/null
+++ b/third_party/aom/test/test_vectors.cc
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "test/test_vectors.h"
+
+namespace libaom_test {
+
+#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
+
+#if CONFIG_AV1_DECODER
+const char *const kAV1TestVectors[] = {
+  "av1-1-b8-00-quantizer-00.ivf",
+  "av1-1-b8-00-quantizer-01.ivf",
+  "av1-1-b8-00-quantizer-02.ivf",
+  "av1-1-b8-00-quantizer-03.ivf",
+  "av1-1-b8-00-quantizer-04.ivf",
+  "av1-1-b8-00-quantizer-05.ivf",
+  "av1-1-b8-00-quantizer-06.ivf",
+  "av1-1-b8-00-quantizer-07.ivf",
+  "av1-1-b8-00-quantizer-08.ivf",
+  "av1-1-b8-00-quantizer-09.ivf",
+  "av1-1-b8-00-quantizer-10.ivf",
+  "av1-1-b8-00-quantizer-11.ivf",
+  "av1-1-b8-00-quantizer-12.ivf",
+  "av1-1-b8-00-quantizer-13.ivf",
+  "av1-1-b8-00-quantizer-14.ivf",
+  "av1-1-b8-00-quantizer-15.ivf",
+  "av1-1-b8-00-quantizer-16.ivf",
+  "av1-1-b8-00-quantizer-17.ivf",
+  "av1-1-b8-00-quantizer-18.ivf",
+  "av1-1-b8-00-quantizer-19.ivf",
+  "av1-1-b8-00-quantizer-20.ivf",
+  "av1-1-b8-00-quantizer-21.ivf",
+  "av1-1-b8-00-quantizer-22.ivf",
+  "av1-1-b8-00-quantizer-23.ivf",
+  "av1-1-b8-00-quantizer-24.ivf",
+  "av1-1-b8-00-quantizer-25.ivf",
+  "av1-1-b8-00-quantizer-26.ivf",
+  "av1-1-b8-00-quantizer-27.ivf",
+  "av1-1-b8-00-quantizer-28.ivf",
+  "av1-1-b8-00-quantizer-29.ivf",
+  "av1-1-b8-00-quantizer-30.ivf",
+  "av1-1-b8-00-quantizer-31.ivf",
+  "av1-1-b8-00-quantizer-32.ivf",
+  "av1-1-b8-00-quantizer-33.ivf",
+  "av1-1-b8-00-quantizer-34.ivf",
+  "av1-1-b8-00-quantizer-35.ivf",
+  "av1-1-b8-00-quantizer-36.ivf",
+  "av1-1-b8-00-quantizer-37.ivf",
+  "av1-1-b8-00-quantizer-38.ivf",
+  "av1-1-b8-00-quantizer-39.ivf",
+  "av1-1-b8-00-quantizer-40.ivf",
+  "av1-1-b8-00-quantizer-41.ivf",
+  "av1-1-b8-00-quantizer-42.ivf",
+  "av1-1-b8-00-quantizer-43.ivf",
+  "av1-1-b8-00-quantizer-44.ivf",
+  "av1-1-b8-00-quantizer-45.ivf",
+  "av1-1-b8-00-quantizer-46.ivf",
+  "av1-1-b8-00-quantizer-47.ivf",
+  "av1-1-b8-00-quantizer-48.ivf",
+  "av1-1-b8-00-quantizer-49.ivf",
+  "av1-1-b8-00-quantizer-50.ivf",
+  "av1-1-b8-00-quantizer-51.ivf",
+  "av1-1-b8-00-quantizer-52.ivf",
+  "av1-1-b8-00-quantizer-53.ivf",
+  "av1-1-b8-00-quantizer-54.ivf",
+  "av1-1-b8-00-quantizer-55.ivf",
+  "av1-1-b8-00-quantizer-56.ivf",
+  "av1-1-b8-00-quantizer-57.ivf",
+  "av1-1-b8-00-quantizer-58.ivf",
+  "av1-1-b8-00-quantizer-59.ivf",
+  "av1-1-b8-00-quantizer-60.ivf",
+  "av1-1-b8-00-quantizer-61.ivf",
+  "av1-1-b8-00-quantizer-62.ivf",
+  "av1-1-b8-00-quantizer-63.ivf",
+#if CONFIG_AV1_HIGHBITDEPTH
+  "av1-1-b10-00-quantizer-00.ivf",
+  "av1-1-b10-00-quantizer-01.ivf",
+  "av1-1-b10-00-quantizer-02.ivf",
+  "av1-1-b10-00-quantizer-03.ivf",
+  "av1-1-b10-00-quantizer-04.ivf",
+  "av1-1-b10-00-quantizer-05.ivf",
+  "av1-1-b10-00-quantizer-06.ivf",
+  "av1-1-b10-00-quantizer-07.ivf",
+  "av1-1-b10-00-quantizer-08.ivf",
+  "av1-1-b10-00-quantizer-09.ivf",
+  "av1-1-b10-00-quantizer-10.ivf",
+  "av1-1-b10-00-quantizer-11.ivf",
+  "av1-1-b10-00-quantizer-12.ivf",
+  "av1-1-b10-00-quantizer-13.ivf",
+  "av1-1-b10-00-quantizer-14.ivf",
+  "av1-1-b10-00-quantizer-15.ivf",
+  "av1-1-b10-00-quantizer-16.ivf",
+  "av1-1-b10-00-quantizer-17.ivf",
+  "av1-1-b10-00-quantizer-18.ivf",
+  "av1-1-b10-00-quantizer-19.ivf",
+  "av1-1-b10-00-quantizer-20.ivf",
+  "av1-1-b10-00-quantizer-21.ivf",
+  "av1-1-b10-00-quantizer-22.ivf",
+  "av1-1-b10-00-quantizer-23.ivf",
+  "av1-1-b10-00-quantizer-24.ivf",
+  "av1-1-b10-00-quantizer-25.ivf",
+  "av1-1-b10-00-quantizer-26.ivf",
+  "av1-1-b10-00-quantizer-27.ivf",
+  "av1-1-b10-00-quantizer-28.ivf",
+  "av1-1-b10-00-quantizer-29.ivf",
+  "av1-1-b10-00-quantizer-30.ivf",
+  "av1-1-b10-00-quantizer-31.ivf",
+  "av1-1-b10-00-quantizer-32.ivf",
+  "av1-1-b10-00-quantizer-33.ivf",
+  "av1-1-b10-00-quantizer-34.ivf",
+  "av1-1-b10-00-quantizer-35.ivf",
+  "av1-1-b10-00-quantizer-36.ivf",
+  "av1-1-b10-00-quantizer-37.ivf",
+  "av1-1-b10-00-quantizer-38.ivf",
+  "av1-1-b10-00-quantizer-39.ivf",
+  "av1-1-b10-00-quantizer-40.ivf",
+  "av1-1-b10-00-quantizer-41.ivf",
+  "av1-1-b10-00-quantizer-42.ivf",
+  "av1-1-b10-00-quantizer-43.ivf",
+  "av1-1-b10-00-quantizer-44.ivf",
+  "av1-1-b10-00-quantizer-45.ivf",
+  "av1-1-b10-00-quantizer-46.ivf",
+  "av1-1-b10-00-quantizer-47.ivf",
+  "av1-1-b10-00-quantizer-48.ivf",
+  "av1-1-b10-00-quantizer-49.ivf",
+  "av1-1-b10-00-quantizer-50.ivf",
+  "av1-1-b10-00-quantizer-51.ivf",
+  "av1-1-b10-00-quantizer-52.ivf",
+  "av1-1-b10-00-quantizer-53.ivf",
+  "av1-1-b10-00-quantizer-54.ivf",
+  "av1-1-b10-00-quantizer-55.ivf",
+  "av1-1-b10-00-quantizer-56.ivf",
+  "av1-1-b10-00-quantizer-57.ivf",
+  "av1-1-b10-00-quantizer-58.ivf",
+  "av1-1-b10-00-quantizer-59.ivf",
+  "av1-1-b10-00-quantizer-60.ivf",
+  "av1-1-b10-00-quantizer-61.ivf",
+  "av1-1-b10-00-quantizer-62.ivf",
+  "av1-1-b10-00-quantizer-63.ivf",
+  "av1-1-b10-23-film_grain-50.ivf",
+  "av1-1-b10-24-monochrome.ivf",
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+  "av1-1-b8-01-size-16x16.ivf",
+  "av1-1-b8-01-size-16x18.ivf",
+  "av1-1-b8-01-size-16x32.ivf",
+  "av1-1-b8-01-size-16x34.ivf",
+  "av1-1-b8-01-size-16x64.ivf",
+  "av1-1-b8-01-size-16x66.ivf",
+  "av1-1-b8-01-size-18x16.ivf",
+  "av1-1-b8-01-size-18x18.ivf",
+  "av1-1-b8-01-size-18x32.ivf",
+  "av1-1-b8-01-size-18x34.ivf",
+  "av1-1-b8-01-size-18x64.ivf",
+  "av1-1-b8-01-size-18x66.ivf",
+  "av1-1-b8-01-size-196x196.ivf",
+  "av1-1-b8-01-size-196x198.ivf",
+  "av1-1-b8-01-size-196x200.ivf",
+  "av1-1-b8-01-size-196x202.ivf",
+  "av1-1-b8-01-size-196x208.ivf",
+  "av1-1-b8-01-size-196x210.ivf",
+  "av1-1-b8-01-size-196x224.ivf",
+  "av1-1-b8-01-size-196x226.ivf",
+  "av1-1-b8-01-size-198x196.ivf",
+  "av1-1-b8-01-size-198x198.ivf",
+  "av1-1-b8-01-size-198x200.ivf",
+  "av1-1-b8-01-size-198x202.ivf",
+  "av1-1-b8-01-size-198x208.ivf",
+  "av1-1-b8-01-size-198x210.ivf",
+  "av1-1-b8-01-size-198x224.ivf",
+  "av1-1-b8-01-size-198x226.ivf",
+  "av1-1-b8-01-size-200x196.ivf",
+  "av1-1-b8-01-size-200x198.ivf",
+  "av1-1-b8-01-size-200x200.ivf",
+  "av1-1-b8-01-size-200x202.ivf",
+  "av1-1-b8-01-size-200x208.ivf",
+  "av1-1-b8-01-size-200x210.ivf",
+  "av1-1-b8-01-size-200x224.ivf",
+  "av1-1-b8-01-size-200x226.ivf",
+  "av1-1-b8-01-size-202x196.ivf",
+  "av1-1-b8-01-size-202x198.ivf",
+  "av1-1-b8-01-size-202x200.ivf",
+  "av1-1-b8-01-size-202x202.ivf",
+  "av1-1-b8-01-size-202x208.ivf",
+  "av1-1-b8-01-size-202x210.ivf",
+  "av1-1-b8-01-size-202x224.ivf",
+  "av1-1-b8-01-size-202x226.ivf",
+  "av1-1-b8-01-size-208x196.ivf",
+  "av1-1-b8-01-size-208x198.ivf",
+  "av1-1-b8-01-size-208x200.ivf",
+  "av1-1-b8-01-size-208x202.ivf",
+  "av1-1-b8-01-size-208x208.ivf",
+  "av1-1-b8-01-size-208x210.ivf",
+  "av1-1-b8-01-size-208x224.ivf",
+  "av1-1-b8-01-size-208x226.ivf",
+  "av1-1-b8-01-size-210x196.ivf",
+  "av1-1-b8-01-size-210x198.ivf",
+  "av1-1-b8-01-size-210x200.ivf",
+  "av1-1-b8-01-size-210x202.ivf",
+  "av1-1-b8-01-size-210x208.ivf",
+  "av1-1-b8-01-size-210x210.ivf",
+  "av1-1-b8-01-size-210x224.ivf",
+  "av1-1-b8-01-size-210x226.ivf",
+  "av1-1-b8-01-size-224x196.ivf",
+  "av1-1-b8-01-size-224x198.ivf",
+  "av1-1-b8-01-size-224x200.ivf",
+  "av1-1-b8-01-size-224x202.ivf",
+  "av1-1-b8-01-size-224x208.ivf",
+  "av1-1-b8-01-size-224x210.ivf",
+  "av1-1-b8-01-size-224x224.ivf",
+  "av1-1-b8-01-size-224x226.ivf",
+  "av1-1-b8-01-size-226x196.ivf",
+  "av1-1-b8-01-size-226x198.ivf",
+  "av1-1-b8-01-size-226x200.ivf",
+  "av1-1-b8-01-size-226x202.ivf",
+  "av1-1-b8-01-size-226x208.ivf",
+  "av1-1-b8-01-size-226x210.ivf",
+  "av1-1-b8-01-size-226x224.ivf",
+  "av1-1-b8-01-size-226x226.ivf",
+  "av1-1-b8-01-size-32x16.ivf",
+  "av1-1-b8-01-size-32x18.ivf",
+  "av1-1-b8-01-size-32x32.ivf",
+  "av1-1-b8-01-size-32x34.ivf",
+  "av1-1-b8-01-size-32x64.ivf",
+  "av1-1-b8-01-size-32x66.ivf",
+  "av1-1-b8-01-size-34x16.ivf",
+  "av1-1-b8-01-size-34x18.ivf",
+  "av1-1-b8-01-size-34x32.ivf",
+  "av1-1-b8-01-size-34x34.ivf",
+  "av1-1-b8-01-size-34x64.ivf",
+  "av1-1-b8-01-size-34x66.ivf",
+  "av1-1-b8-01-size-64x16.ivf",
+  "av1-1-b8-01-size-64x18.ivf",
+  "av1-1-b8-01-size-64x32.ivf",
+  "av1-1-b8-01-size-64x34.ivf",
+  "av1-1-b8-01-size-64x64.ivf",
+  "av1-1-b8-01-size-64x66.ivf",
+  "av1-1-b8-01-size-66x16.ivf",
+  "av1-1-b8-01-size-66x18.ivf",
+  "av1-1-b8-01-size-66x32.ivf",
+  "av1-1-b8-01-size-66x34.ivf",
+  "av1-1-b8-01-size-66x64.ivf",
+  "av1-1-b8-01-size-66x66.ivf",
+  "av1-1-b8-02-allintra.ivf",
+  "av1-1-b8-03-sizedown.mkv",
+  "av1-1-b8-03-sizeup.mkv",
+  "av1-1-b8-04-cdfupdate.ivf",
+  "av1-1-b8-05-mv.ivf",
+  "av1-1-b8-06-mfmv.ivf",
+  "av1-1-b8-16-intra_only-intrabc-extreme-dv.ivf",
+  "av1-1-b8-22-svc-L1T2.ivf",
+  "av1-1-b8-22-svc-L2T1.ivf",
+  "av1-1-b8-22-svc-L2T2.ivf",
+  "av1-1-b8-23-film_grain-50.ivf",
+  "av1-1-b8-24-monochrome.ivf"
+};
+const int kNumAV1TestVectors = NELEMENTS(kAV1TestVectors);
+#endif  // CONFIG_AV1_DECODER
+
+}  // namespace libaom_test
diff --git a/third_party/aom/test/test_vectors.h b/third_party/aom/test/test_vectors.h
new file mode 100644
index 0000000000..be37f6e377
--- /dev/null
+++ b/third_party/aom/test/test_vectors.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_TEST_TEST_VECTORS_H_
+#define AOM_TEST_TEST_VECTORS_H_
+
+#include "config/aom_config.h"
+
+namespace libaom_test {
+
+#if CONFIG_AV1_DECODER
+extern const int kNumAV1TestVectors;
+extern const char *const kAV1TestVectors[];
+#endif
+
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_TEST_VECTORS_H_
diff --git a/third_party/aom/test/tile_config_test.cc b/third_party/aom/test/tile_config_test.cc
new file mode 100644
index 0000000000..e2ac59284b
--- /dev/null
+++ b/third_party/aom/test/tile_config_test.cc
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 2020, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "aom/aom_codec.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/y4m_video_source.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+typedef struct {
+  // Superblock size
+  const unsigned int sb_size;
+  // log2(number of tile rows)
+  const unsigned int tile_rows;
+  // log2(number of tile columns)
+  const unsigned int tile_cols;
+} uniformTileConfigParam;
+
+const libaom_test::TestMode kTestModeParams[] =
+#if CONFIG_REALTIME_ONLY
+    { ::libaom_test::kRealTime };
+#else
+    { ::libaom_test::kRealTime, ::libaom_test::kOnePassGood,
+      ::libaom_test::kTwoPassGood };
+#endif
+
+static const uniformTileConfigParam uniformTileConfigParams[] = {
+  { 128, 0, 0 }, { 128, 0, 2 }, { 128, 2, 0 }, { 128, 1, 2 }, { 128, 2, 2 },
+  { 128, 3, 2 }, { 64, 0, 0 },  { 64, 0, 2 },  { 64, 2, 0 },  { 64, 1, 2 },
+  { 64, 2, 2 },  { 64, 3, 3 },  { 64, 4, 4 }
+};
+
+typedef struct {
+  // Superblock size
+  const unsigned int sb_size;
+  // number of tile widths
+  const unsigned int tile_width_count;
+  // list of tile widths
+  int tile_widths[AOM_MAX_TILE_COLS];
+  // number of tile heights
+  const unsigned int tile_height_count;
+  // list of tile heights
+  int tile_heights[AOM_MAX_TILE_ROWS];
+} nonUniformTileConfigParam;
+
+const nonUniformTileConfigParam nonUniformTileConfigParams[] = {
+  { 64, 1, { 3 }, 1, { 3 } },          { 64, 2, { 1, 2 }, 2, { 1, 2 } },
+  { 64, 3, { 2, 3, 4 }, 2, { 2, 3 } }, { 128, 1, { 3 }, 1, { 3 } },
+  { 128, 2, { 1, 2 }, 2, { 1, 2 } },   { 128, 3, { 2, 3, 4 }, 2, { 2, 3 } },
+};
+
+// Find smallest k>=0 such that (blk_size << k) >= target
+static INLINE int tile_log2(int blk_size, int target) {
+  int k;
+  for (k = 0; (blk_size << k) < target; k++) {
+  }
+  return k;
+}
+
+// This class is used to validate tile configuration for uniform spacing.
+class UniformTileConfigTestLarge
+    : public ::libaom_test::CodecTestWith3Params<
+          libaom_test::TestMode, uniformTileConfigParam, aom_rc_mode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  UniformTileConfigTestLarge()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        tile_config_param_(GET_PARAM(2)), end_usage_check_(GET_PARAM(3)) {
+    tile_config_violated_ = false;
+    max_tile_cols_log2_ = tile_log2(1, AOM_MAX_TILE_COLS);
+    max_tile_rows_log2_ = tile_log2(1, AOM_MAX_TILE_ROWS);
+  }
+  ~UniformTileConfigTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = end_usage_check_;
+    cfg_.g_threads = 1;
+    cfg_.g_lag_in_frames = 19;
+  }
+
+  bool DoDecode() const override { return true; }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_TILE_COLUMNS, tile_config_param_.tile_cols);
+      encoder->Control(AV1E_SET_TILE_ROWS, tile_config_param_.tile_rows);
+      encoder->Control(AOME_SET_CPUUSED, 5);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AV1E_SET_SUPERBLOCK_SIZE,
+                       tile_config_param_.sb_size == 64
+                           ? AOM_SUPERBLOCK_SIZE_64X64
+                           : AOM_SUPERBLOCK_SIZE_128X128);
+    }
+  }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    if (AOM_CODEC_OK == res_dec) {
+      aom_codec_ctx_t *ctx_dec = decoder->GetDecoder();
+      aom_tile_info tile_info;
+      int config_tile_columns = AOMMIN(1 << (int)tile_config_param_.tile_cols,
+                                       1 << max_tile_cols_log2_);
+      int config_tile_rows = AOMMIN(1 << (int)tile_config_param_.tile_rows,
+                                    1 << max_tile_rows_log2_);
+
+      AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_TILE_INFO, &tile_info);
+      if (tile_info.tile_columns != config_tile_columns ||
+          tile_info.tile_rows != config_tile_rows) {
+        tile_config_violated_ = true;
+      }
+    }
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  ::libaom_test::TestMode encoding_mode_;
+  const uniformTileConfigParam tile_config_param_;
+  int max_tile_cols_log2_;
+  int max_tile_rows_log2_;
+  bool tile_config_violated_;
+  aom_rc_mode end_usage_check_;
+};
+
+// This class is used to validate tile configuration for non uniform spacing.
+class NonUniformTileConfigTestLarge
+    : public ::libaom_test::CodecTestWith3Params<
+          libaom_test::TestMode, nonUniformTileConfigParam, aom_rc_mode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  NonUniformTileConfigTestLarge()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        tile_config_param_(GET_PARAM(2)), rc_end_usage_(GET_PARAM(3)) {
+    tile_config_violated_ = false;
+  }
+  ~NonUniformTileConfigTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = rc_end_usage_;
+    cfg_.g_threads = 1;
+    cfg_.g_lag_in_frames = 35;
+    cfg_.rc_target_bitrate = 1000;
+    cfg_.tile_width_count = tile_config_param_.tile_width_count;
+    memcpy(cfg_.tile_widths, tile_config_param_.tile_widths,
+           sizeof(tile_config_param_.tile_widths[0]) *
+               tile_config_param_.tile_width_count);
+    cfg_.tile_height_count = tile_config_param_.tile_height_count;
+    memcpy(cfg_.tile_heights, tile_config_param_.tile_heights,
+           sizeof(tile_config_param_.tile_heights[0]) *
+               tile_config_param_.tile_height_count);
+  }
+
+  bool DoDecode() const override { return true; }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 5);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AV1E_SET_SUPERBLOCK_SIZE,
+                       tile_config_param_.sb_size == 64
+                           ? AOM_SUPERBLOCK_SIZE_64X64
+                           : AOM_SUPERBLOCK_SIZE_128X128);
+    }
+  }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    if (AOM_CODEC_OK == res_dec) {
+      aom_codec_ctx_t *ctx_dec = decoder->GetDecoder();
+      aom_tile_info tile_info;
+      AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_TILE_INFO, &tile_info);
+
+      // check validity of tile cols
+      int tile_col_idx, tile_col = 0;
+      for (tile_col_idx = 0; tile_col_idx < tile_info.tile_columns - 1;
+           tile_col_idx++) {
+        if (tile_config_param_.tile_widths[tile_col] !=
+            tile_info.tile_widths[tile_col_idx])
+          tile_config_violated_ = true;
+        tile_col = (tile_col + 1) % (int)tile_config_param_.tile_width_count;
+      }
+      // last column may not be able to accommodate config, but if it is
+      // greater than what is configured, there is a violation.
+      if (tile_config_param_.tile_widths[tile_col] <
+          tile_info.tile_widths[tile_col_idx])
+        tile_config_violated_ = true;
+
+      // check validity of tile rows
+      int tile_row_idx, tile_row = 0;
+      for (tile_row_idx = 0; tile_row_idx < tile_info.tile_rows - 1;
+           tile_row_idx++) {
+        if (tile_config_param_.tile_heights[tile_row] !=
+            tile_info.tile_heights[tile_row_idx])
+          tile_config_violated_ = true;
+        tile_row = (tile_row + 1) % (int)tile_config_param_.tile_height_count;
+      }
+      // last row may not be able to accommodate config, but if it is
+      // greater than what is configured, there is a violation.
+      if (tile_config_param_.tile_heights[tile_row] <
+          tile_info.tile_heights[tile_row_idx])
+        tile_config_violated_ = true;
+    }
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  ::libaom_test::TestMode encoding_mode_;
+  const nonUniformTileConfigParam tile_config_param_;
+  bool tile_config_violated_;
+  aom_rc_mode rc_end_usage_;
+};
+
+TEST_P(UniformTileConfigTestLarge, UniformTileConfigTest) {
+  ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 1);
+  ASSERT_NO_FATAL_FAILURE(video.Begin());
+
+  int max_tiles_cols = video.img()->w / (int)tile_config_param_.sb_size;
+  int max_tiles_rows = video.img()->h / (int)tile_config_param_.sb_size;
+  max_tile_cols_log2_ = tile_log2(1, AOMMIN(max_tiles_cols, AOM_MAX_TILE_COLS));
+  max_tile_rows_log2_ = tile_log2(1, AOMMIN(max_tiles_rows, AOM_MAX_TILE_ROWS));
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(tile_config_violated_, false);
+}
+
+TEST_P(UniformTileConfigTestLarge, UniformTileConfigTestLowRes) {
+  ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 1);
+  ASSERT_NO_FATAL_FAILURE(video.Begin());
+
+  int max_tiles_cols = video.img()->w / (int)tile_config_param_.sb_size;
+  int max_tiles_rows = video.img()->h / (int)tile_config_param_.sb_size;
+  max_tile_cols_log2_ = tile_log2(1, AOMMIN(max_tiles_cols, AOM_MAX_TILE_COLS));
+  max_tile_rows_log2_ = tile_log2(1, AOMMIN(max_tiles_rows, AOM_MAX_TILE_ROWS));
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(tile_config_violated_, false);
+}
+
+TEST_P(NonUniformTileConfigTestLarge, NonUniformTileConfigTest) {
+  ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 1);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(tile_config_violated_, false);
+}
+
+AV1_INSTANTIATE_TEST_SUITE(UniformTileConfigTestLarge,
+                           ::testing::ValuesIn(kTestModeParams),
+                           ::testing::ValuesIn(uniformTileConfigParams),
+                           ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ));
+
+AV1_INSTANTIATE_TEST_SUITE(NonUniformTileConfigTestLarge,
+                           ::testing::ValuesIn(kTestModeParams),
+                           ::testing::ValuesIn(nonUniformTileConfigParams),
+                           ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ));
+
+typedef struct {
+  // Number of tile groups to set.
+  const int num_tg;
+  // Number of tile rows to set
+  const int num_tile_rows;
+  // Number of tile columns to set
+  const int num_tile_cols;
+} TileGroupConfigParams;
+
+static const TileGroupConfigParams tileGroupTestParams[] = {
+  { 5, 4, 4 }, { 3, 3, 3 }, { 5, 3, 3 }, { 7, 5, 5 }, { 7, 3, 3 }, { 7, 4, 4 }
+};
+
+std::ostream &operator<<(std::ostream &os,
+                         const TileGroupConfigParams &test_arg) {
+  return os << "TileGroupConfigParams { num_tg:" << test_arg.num_tg
+            << " num_tile_rows:" << test_arg.num_tile_rows
+            << " num_tile_cols:" << test_arg.num_tile_cols << " }";
+}
+
+// This class is used to test number of tile groups present in header.
+class TileGroupTestLarge
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode,
+                                                 TileGroupConfigParams>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  TileGroupTestLarge()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        tile_group_config_params_(GET_PARAM(2)) {
+    tile_group_config_violated_ = false;
+  }
+  ~TileGroupTestLarge() override = default;
+
+  void SetUp() override {
+    InitializeConfig(encoding_mode_);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_end_usage = AOM_Q;
+    cfg_.g_threads = 1;
+  }
+
+  bool DoDecode() const override { return true; }
+
+  void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                          ::libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 5);
+      encoder->Control(AV1E_SET_NUM_TG, tile_group_config_params_.num_tg);
+      encoder->Control(AV1E_SET_TILE_COLUMNS,
+                       tile_group_config_params_.num_tile_cols);
+      encoder->Control(AV1E_SET_TILE_ROWS,
+                       tile_group_config_params_.num_tile_rows);
+    }
+  }
+
+  bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                          libaom_test::Decoder *decoder) override {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    if (AOM_CODEC_OK == res_dec) {
+      aom_tile_info tile_info;
+      aom_codec_ctx_t *ctx_dec = decoder->GetDecoder();
+      AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_TILE_INFO, &tile_info);
+      AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_SHOW_EXISTING_FRAME_FLAG,
+                                    &show_existing_frame_);
+      if (tile_info.num_tile_groups != tile_group_config_params_.num_tg &&
+          !show_existing_frame_)
+        tile_group_config_violated_ = true;
+      EXPECT_EQ(tile_group_config_violated_, false);
+    }
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  int show_existing_frame_;
+  bool tile_group_config_violated_;
+  aom_rc_mode end_usage_check_;
+  ::libaom_test::TestMode encoding_mode_;
+  const TileGroupConfigParams tile_group_config_params_;
+};
+
+TEST_P(TileGroupTestLarge, TileGroupCountTest) {
+  libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480,
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 5);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+AV1_INSTANTIATE_TEST_SUITE(TileGroupTestLarge,
+                           ::testing::ValuesIn(kTestModeParams),
+                           ::testing::ValuesIn(tileGroupTestParams));
+}  // namespace
diff --git a/third_party/aom/test/tile_independence_test.cc b/third_party/aom/test/tile_independence_test.cc
new file mode 100644
index 0000000000..84406dd3fb
--- /dev/null
+++ b/third_party/aom/test/tile_independence_test.cc
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/md5_helper.h"
+#include "aom_mem/aom_mem.h"
+
+namespace {
+class TileIndependenceTest
+    : public ::libaom_test::CodecTestWith3Params<int, int, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  TileIndependenceTest()
+      : EncoderTest(GET_PARAM(0)), md5_fw_order_(), md5_inv_order_(),
+        n_tile_cols_(GET_PARAM(1)), n_tile_rows_(GET_PARAM(2)),
+        n_tile_groups_(GET_PARAM(3)) {
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.w = 704;
+    cfg.h = 576;
+    cfg.threads = 1;
+    cfg.allow_lowbitdepth = 1;
+    fw_dec_ = codec_->CreateDecoder(cfg, 0);
+    inv_dec_ = codec_->CreateDecoder(cfg, 0);
+    inv_dec_->Control(AV1_INVERT_TILE_DECODE_ORDER, 1);
+
+    if (fw_dec_->IsAV1() && inv_dec_->IsAV1()) {
+      fw_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+      fw_dec_->Control(AV1_SET_DECODE_TILE_COL, -1);
+      inv_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+      inv_dec_->Control(AV1_SET_DECODE_TILE_COL, -1);
+    }
+  }
+
+  ~TileIndependenceTest() override {
+    delete fw_dec_;
+    delete inv_dec_;
+  }
+
+  void SetUp() override { InitializeConfig(libaom_test::kTwoPassGood); }
+
+  void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                          libaom_test::Encoder *encoder) override {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_);
+      encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_);
+      SetCpuUsed(encoder);
+    } else if (video->frame() == 3) {
+      encoder->Control(AV1E_SET_NUM_TG, n_tile_groups_);
+    }
+  }
+
+  virtual void SetCpuUsed(libaom_test::Encoder *encoder) {
+    static const int kCpuUsed = 3;
+    encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
+  }
+
+  void UpdateMD5(::libaom_test::Decoder *dec, const aom_codec_cx_pkt_t *pkt,
+                 ::libaom_test::MD5 *md5) {
+    const aom_codec_err_t res = dec->DecodeFrame(
+        reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz);
+    if (res != AOM_CODEC_OK) {
+      abort_ = true;
+      ASSERT_EQ(AOM_CODEC_OK, res);
+    }
+    const aom_image_t *img = dec->GetDxData().Next();
+    md5->Add(img);
+  }
+
+  void FramePktHook(const aom_codec_cx_pkt_t *pkt) override {
+    UpdateMD5(fw_dec_, pkt, &md5_fw_order_);
+    UpdateMD5(inv_dec_, pkt, &md5_inv_order_);
+  }
+
+  void DoTest() {
+    const aom_rational timebase = { 33333333, 1000000000 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_target_bitrate = 500;
+    cfg_.g_lag_in_frames = 12;
+    cfg_.rc_end_usage = AOM_VBR;
+
+    libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 576,
+                                       timebase.den, timebase.num, 0, 5);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+    const char *md5_fw_str = md5_fw_order_.Get();
+    const char *md5_inv_str = md5_inv_order_.Get();
+    ASSERT_STREQ(md5_fw_str, md5_inv_str);
+  }
+
+  ::libaom_test::MD5 md5_fw_order_, md5_inv_order_;
+  ::libaom_test::Decoder *fw_dec_, *inv_dec_;
+
+ private:
+  int n_tile_cols_;
+  int n_tile_rows_;
+  int n_tile_groups_;
+};
+
+// run an encode with 2 or 4 tiles, and do the decode both in normal and
+// inverted tile ordering. Ensure that the MD5 of the output in both cases
+// is identical. If so, tiles are considered independent and the test passes.
+TEST_P(TileIndependenceTest, MD5Match) {
+  cfg_.large_scale_tile = 0;
+  fw_dec_->Control(AV1_SET_TILE_MODE, 0);
+  inv_dec_->Control(AV1_SET_TILE_MODE, 0);
+  DoTest();
+}
+
+class TileIndependenceTestLarge : public TileIndependenceTest {
+  void SetCpuUsed(libaom_test::Encoder *encoder) override {
+    static const int kCpuUsed = 0;
+    encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
+  }
+};
+
+TEST_P(TileIndependenceTestLarge, MD5Match) {
+  cfg_.large_scale_tile = 0;
+  fw_dec_->Control(AV1_SET_TILE_MODE, 0);
+  inv_dec_->Control(AV1_SET_TILE_MODE, 0);
+  DoTest();
+}
+
+AV1_INSTANTIATE_TEST_SUITE(TileIndependenceTest, ::testing::Values(0, 1),
+                           ::testing::Values(0, 1), ::testing::Values(1, 2, 4));
+AV1_INSTANTIATE_TEST_SUITE(TileIndependenceTestLarge, ::testing::Values(0, 1),
+                           ::testing::Values(0, 1), ::testing::Values(1, 2, 4));
+
+class TileIndependenceLSTest : public TileIndependenceTest {};
+
+TEST_P(TileIndependenceLSTest, MD5Match) {
+  cfg_.large_scale_tile = 1;
+  fw_dec_->Control(AV1_SET_TILE_MODE, 1);
+  fw_dec_->Control(AV1D_EXT_TILE_DEBUG, 1);
+  inv_dec_->Control(AV1_SET_TILE_MODE, 1);
+  inv_dec_->Control(AV1D_EXT_TILE_DEBUG, 1);
+  DoTest();
+}
+
+class TileIndependenceLSTestLarge : public TileIndependenceTestLarge {};
+
+TEST_P(TileIndependenceLSTestLarge, MD5Match) {
+  cfg_.large_scale_tile = 1;
+  fw_dec_->Control(AV1_SET_TILE_MODE, 1);
+  fw_dec_->Control(AV1D_EXT_TILE_DEBUG, 1);
+  inv_dec_->Control(AV1_SET_TILE_MODE, 1);
+  inv_dec_->Control(AV1D_EXT_TILE_DEBUG, 1);
+  DoTest();
+}
+
+AV1_INSTANTIATE_TEST_SUITE(TileIndependenceLSTest, ::testing::Values(6),
+                           ::testing::Values(6), ::testing::Values(1));
+AV1_INSTANTIATE_TEST_SUITE(TileIndependenceLSTestLarge, ::testing::Values(6),
+                           ::testing::Values(6), ::testing::Values(1));
+}  // namespace
diff --git a/third_party/aom/test/time_stamp_test.cc b/third_party/aom/test/time_stamp_test.cc
new file mode 100644
index 0000000000..5de98b719e
--- /dev/null
+++ b/third_party/aom/test/time_stamp_test.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2019, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+//  Test AOM timestamp handling
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/video_source.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+const int kVideoSourceWidth = 320;
+const int kVideoSourceHeight = 240;
+const int kFramesToEncode = 3;
+
+// A video source that exposes functions to set the timebase, framerate and
+// starting pts.
+class DummyTimebaseVideoSource : public ::libaom_test::DummyVideoSource {
+ public:
+  // Parameters num and den set the timebase for the video source.
+  DummyTimebaseVideoSource(int num, int den)
+      : framerate_numerator_(30), framerate_denominator_(1), starting_pts_(0) {
+    SetSize(kVideoSourceWidth, kVideoSourceHeight);
+    set_limit(kFramesToEncode);
+    timebase_.num = num;
+    timebase_.den = den;
+  }
+
+  void SetFramerate(int numerator, int denominator) {
+    framerate_numerator_ = numerator;
+    framerate_denominator_ = denominator;
+  }
+
+  // Returns one frames duration in timebase units as a double.
+  double FrameDuration() const {
+    return (static_cast<double>(timebase_.den) / timebase_.num) /
+           (static_cast<double>(framerate_numerator_) / framerate_denominator_);
+  }
+
+  aom_codec_pts_t pts() const override {
+    return static_cast<aom_codec_pts_t>(frame_ * FrameDuration() +
+                                        starting_pts_ + 0.5);
+  }
+
+  unsigned long duration() const override {
+    return static_cast<unsigned long>(FrameDuration() + 0.5);
+  }
+
+  aom_rational_t timebase() const override { return timebase_; }
+
+  void set_starting_pts(int64_t starting_pts) { starting_pts_ = starting_pts; }
+
+ private:
+  aom_rational_t timebase_;
+  int framerate_numerator_;
+  int framerate_denominator_;
+  int64_t starting_pts_;
+};
+
+class TimestampTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> {
+ protected:
+  TimestampTest() : EncoderTest(GET_PARAM(0)) {}
+  ~TimestampTest() override = default;
+
+  void SetUp() override { InitializeConfig(GET_PARAM(1)); }
+};
+
+// Tests encoding in millisecond timebase.
+TEST_P(TimestampTest, EncodeFrames) {
+  DummyTimebaseVideoSource video(1, 1000);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+TEST_P(TimestampTest, TestMicrosecondTimebase) {
+  // Set the timebase to microseconds.
+  DummyTimebaseVideoSource video(1, 1000000);
+  video.set_limit(1);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+TEST_P(TimestampTest, TestAv1Rollover) {
+  DummyTimebaseVideoSource video(1, 1000);
+  video.set_starting_pts(922337170351ll);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+#if CONFIG_REALTIME_ONLY
+AV1_INSTANTIATE_TEST_SUITE(TimestampTest,
+                           ::testing::Values(::libaom_test::kRealTime));
+#else
+AV1_INSTANTIATE_TEST_SUITE(TimestampTest,
+                           ::testing::Values(::libaom_test::kRealTime,
+                                             ::libaom_test::kTwoPassGood));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/tools_common.sh b/third_party/aom/test/tools_common.sh
new file mode 100755
index 0000000000..cb9eba1727
--- /dev/null
+++ b/third_party/aom/test/tools_common.sh
@@ -0,0 +1,520 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+##  This file contains shell code shared by test scripts for libaom tools.
+
+# Use $AOM_TEST_TOOLS_COMMON_SH as a pseudo include guard.
+if [ -z "${AOM_TEST_TOOLS_COMMON_SH}" ]; then
+AOM_TEST_TOOLS_COMMON_SH=included
+
+set -e
+devnull='> /dev/null 2>&1'
+AOM_TEST_PREFIX=""
+
+elog() {
+  echo "$@" 1>&2
+}
+
+vlog() {
+  if [ "${AOM_TEST_VERBOSE_OUTPUT}" = "yes" ]; then
+    echo "$@"
+  fi
+}
+
+# Sets $AOM_TOOL_TEST to the name specified by positional parameter one.
+test_begin() {
+  AOM_TOOL_TEST="${1}"
+}
+
+# Clears the AOM_TOOL_TEST variable after confirming that $AOM_TOOL_TEST matches
+# positional parameter one.
+test_end() {
+  if [ "$1" != "${AOM_TOOL_TEST}" ]; then
+    echo "FAIL completed test mismatch!."
+    echo "  completed test: ${1}"
+    echo "  active test: ${AOM_TOOL_TEST}."
+    return 1
+  fi
+  AOM_TOOL_TEST='<unset>'
+}
+
+# Echoes the target configuration being tested.
+test_configuration_target() {
+  aom_config_c="${LIBAOM_CONFIG_PATH}/config/aom_config.c"
+  # Clean up the cfg pointer line from aom_config.c for easier re-use by
+  # someone examining a failure in the example tests.
+  # 1. Run grep on aom_config.c for cfg and limit the results to 1.
+  # 2. Split the line using ' = ' as separator.
+  # 3. Abuse sed to consume the leading " and trailing "; from the assignment
+  #    to the cfg pointer.
+  cmake_config=$(awk -F ' = ' '/cfg/ { print $NF; exit }' "${aom_config_c}" \
+    | sed -e s/\"// -e s/\"\;//)
+  echo cmake generated via command: cmake path/to/aom ${cmake_config}
+}
+
+# Trap function used for failure reports and tool output directory removal.
+# When the contents of $AOM_TOOL_TEST do not match the string '<unset>', reports
+# failure of test stored in $AOM_TOOL_TEST.
+cleanup() {
+  if [ -n "${AOM_TOOL_TEST}" ] && [ "${AOM_TOOL_TEST}" != '<unset>' ]; then
+    echo "FAIL: $AOM_TOOL_TEST"
+  fi
+  if [ "${AOM_TEST_PRESERVE_OUTPUT}" = "yes" ]; then
+    return
+  fi
+  if [ -n "${AOM_TEST_OUTPUT_DIR}" ] && [ -d "${AOM_TEST_OUTPUT_DIR}" ]; then
+    rm -rf "${AOM_TEST_OUTPUT_DIR}"
+  fi
+}
+
+# Echoes the version string assigned to the VERSION_STRING_NOSP variable defined
+# in $LIBAOM_CONFIG_PATH/config/aom_version.h to stdout.
+cmake_version() {
+  aom_version_h="${LIBAOM_CONFIG_PATH}/config/aom_version.h"
+
+  # Find VERSION_STRING_NOSP line, split it with '"' and print the next to last
+  # field to output the version string to stdout.
+  aom_version=$(awk -F \" '/VERSION_STRING_NOSP/ {print $(NF-1)}' \
+    "${aom_version_h}")
+  echo "v${aom_version}"
+}
+
+# Echoes current git version as reported by running 'git describe', or the
+# version used by the cmake build when git is unavailable.
+source_version() {
+  if git --version > /dev/null 2>&1; then
+    (cd "$(dirname "${0}")"
+    git describe)
+  else
+    cmake_version
+  fi
+}
+
+# Echoes warnings to stdout when source version and CMake build generated
+# version are out of sync.
+check_version_strings() {
+  cmake_version=$(cmake_version)
+  source_version=$(source_version)
+
+  if [ "${cmake_version}" != "${source_version}" ]; then
+    echo "Warning: version has changed since last cmake run."
+    vlog "  cmake version: ${cmake_version} version now: ${source_version}"
+  fi
+}
+
+# $1 is the name of an environment variable containing a directory name to
+# test.
+test_env_var_dir() {
+  local dir=$(eval echo "\${$1}")
+  if [ ! -d "${dir}" ]; then
+    elog "'${dir}': No such directory"
+    elog "The $1 environment variable must be set to a valid directory."
+    return 1
+  fi
+}
+
+# This script requires that the LIBAOM_BIN_PATH, LIBAOM_CONFIG_PATH, and
+# LIBAOM_TEST_DATA_PATH variables are in the environment: Confirm that
+# the variables are set and that they all evaluate to directory paths.
+verify_aom_test_environment() {
+  test_env_var_dir "LIBAOM_BIN_PATH" \
+    && test_env_var_dir "LIBAOM_CONFIG_PATH" \
+    && test_env_var_dir "LIBAOM_TEST_DATA_PATH"
+}
+
+# Greps aom_config.h in LIBAOM_CONFIG_PATH for positional parameter one, which
+# should be a LIBAOM preprocessor flag. Echoes yes to stdout when the feature
+# is available.
+aom_config_option_enabled() {
+  aom_config_option="${1}"
+  aom_config_file="${LIBAOM_CONFIG_PATH}/config/aom_config.h"
+  config_line=$(grep "${aom_config_option}" "${aom_config_file}")
+  if echo "${config_line}" | egrep -q '1$'; then
+    echo yes
+  fi
+}
+
+# Echoes yes when output of test_configuration_target() contains win32 or win64.
+is_windows_target() {
+  if test_configuration_target \
+     | grep -q -e win32 -e win64 > /dev/null 2>&1; then
+    echo yes
+  fi
+}
+
+# Echoes path to $1 when it's executable and exists in one of the directories
+# included in $tool_paths, or an empty string. Caller is responsible for testing
+# the string once the function returns.
+aom_tool_path() {
+  local tool_name="$1"
+  local root_path="${LIBAOM_BIN_PATH}"
+  local suffix="${AOM_TEST_EXE_SUFFIX}"
+  local tool_paths="\
+    ${root_path}/${tool_name}${suffix} \
+    ${root_path}/../${tool_name}${suffix} \
+    ${root_path}/tools/${tool_name}${suffix} \
+    ${root_path}/../tools/${tool_name}${suffix}"
+
+  local toolpath=""
+
+  for tool_path in ${tool_paths}; do
+    if [ -x "${tool_path}" ] && [ -f "${tool_path}" ]; then
+      echo "${tool_path}"
+      return 0
+    fi
+  done
+
+  return 1
+}
+
+# Echoes yes to stdout when the file named by positional parameter one exists
+# in LIBAOM_BIN_PATH, and is executable.
+aom_tool_available() {
+  local tool_name="$1"
+  local tool="${LIBAOM_BIN_PATH}/${tool_name}${AOM_TEST_EXE_SUFFIX}"
+  [ -x "${tool}" ] && echo yes
+}
+
+# Echoes yes to stdout when aom_config_option_enabled() reports yes for
+# CONFIG_AV1_DECODER.
+av1_decode_available() {
+  [ "$(aom_config_option_enabled CONFIG_AV1_DECODER)" = "yes" ] && echo yes
+}
+
+# Echoes yes to stdout when aom_config_option_enabled() reports yes for
+# CONFIG_AV1_ENCODER.
+av1_encode_available() {
+  [ "$(aom_config_option_enabled CONFIG_AV1_ENCODER)" = "yes" ] && echo yes
+}
+
+# Echoes "fast" encode params for use with aomenc.
+aomenc_encode_test_fast_params() {
+  echo "--cpu-used=2
+        --limit=${AV1_ENCODE_TEST_FRAME_LIMIT}
+        --lag-in-frames=0
+        --test-decode=fatal"
+}
+
+# Echoes realtime encode params for use with aomenc.
+aomenc_encode_test_rt_params() {
+  echo "--limit=${AV1_ENCODE_TEST_FRAME_LIMIT}
+        --test-decode=fatal
+        --enable-tpl-model=0
+        --deltaq-mode=0
+        --enable-order-hint=0
+        --profile=0
+        --static-thresh=0
+        --end-usage=cbr
+        --cpu-used=7
+        --passes=1
+        --usage=1
+        --lag-in-frames=0
+        --aq-mode=3
+        --enable-obmc=0
+        --enable-warped-motion=0
+        --enable-ref-frame-mvs=0
+        --enable-cdef=1
+        --enable-order-hint=0
+        --coeff-cost-upd-freq=3
+        --mode-cost-upd-freq=3
+        --mv-cost-upd-freq=3"
+}
+
+# Echoes yes to stdout when aom_config_option_enabled() reports yes for
+# CONFIG_AV1_HIGHBITDEPTH.
+highbitdepth_available() {
+  [ "$(aom_config_option_enabled CONFIG_AV1_HIGHBITDEPTH)" = "yes" ] && echo yes
+}
+
+# Echoes yes to stdout when aom_config_option_enabled() reports yes for
+# CONFIG_WEBM_IO.
+webm_io_available() {
+  [ "$(aom_config_option_enabled CONFIG_WEBM_IO)" = "yes" ] && echo yes
+}
+
+# Echoes yes to stdout when aom_config_option_enabled() reports yes for
+# CONFIG_REALTIME_ONLY.
+realtime_only_build() {
+  [ "$(aom_config_option_enabled CONFIG_REALTIME_ONLY)" = "yes" ] && echo yes
+}
+
+# Filters strings from $1 using the filter specified by $2. Filter behavior
+# depends on the presence of $3. When $3 is present, strings that match the
+# filter are excluded. When $3 is omitted, strings matching the filter are
+# included.
+# The filtered result is echoed to stdout.
+filter_strings() {
+  strings=${1}
+  filter=${2}
+  exclude=${3}
+
+  if [ -n "${exclude}" ]; then
+    # When positional parameter three exists the caller wants to remove strings.
+    # Tell grep to invert matches using the -v argument.
+    exclude='-v'
+  else
+    unset exclude
+  fi
+
+  if [ -n "${filter}" ]; then
+    for s in ${strings}; do
+      if echo "${s}" | egrep -q ${exclude} "${filter}" > /dev/null 2>&1; then
+        filtered_strings="${filtered_strings} ${s}"
+      fi
+    done
+  else
+    filtered_strings="${strings}"
+  fi
+  echo "${filtered_strings}"
+}
+
+# Runs user test functions passed via positional parameters one and two.
+# Functions in positional parameter one are treated as environment verification
+# functions and are run unconditionally. Functions in positional parameter two
+# are run according to the rules specified in aom_test_usage().
+run_tests() {
+  local env_tests="verify_aom_test_environment $1"
+  local tests_to_filter="$2"
+  local test_name="${AOM_TEST_NAME}"
+
+  if [ -z "${test_name}" ]; then
+    test_name="$(basename "${0%.*}")"
+  fi
+
+  if [ "${AOM_TEST_RUN_DISABLED_TESTS}" != "yes" ]; then
+    # Filter out DISABLED tests.
+    tests_to_filter=$(filter_strings "${tests_to_filter}" ^DISABLED exclude)
+  fi
+
+  if [ -n "${AOM_TEST_FILTER}" ]; then
+    # Remove tests not matching the user's filter.
+    tests_to_filter=$(filter_strings "${tests_to_filter}" ${AOM_TEST_FILTER})
+  fi
+
+  # User requested test listing: Dump test names and return.
+  if [ "${AOM_TEST_LIST_TESTS}" = "yes" ]; then
+    for test_name in $tests_to_filter; do
+      echo ${test_name}
+    done
+    return
+  fi
+
+  # Don't bother with the environment tests if everything else was disabled.
+  [ -z "${tests_to_filter}" ] && return
+
+  # Combine environment and actual tests.
+  local tests_to_run="${env_tests} ${tests_to_filter}"
+
+  # av1_c_vs_simd_encode is a standalone test, and it doesn't need to check the
+  # version string.
+  if [ "${test_name}" != "av1_c_vs_simd_encode" ]; then
+    check_version_strings
+  fi
+
+  # Run tests.
+  for test in ${tests_to_run}; do
+    test_begin "${test}"
+    vlog "  RUN  ${test}"
+    "${test}"
+    vlog "  PASS ${test}"
+    test_end "${test}"
+  done
+
+  local tested_config="$(test_configuration_target) @ $(source_version)"
+  echo "${test_name}: Done, all tests pass for ${tested_config}."
+}
+
+aom_test_usage() {
+cat << EOF
+  Usage: ${0##*/} [arguments]
+    --bin-path <path to libaom binaries directory>
+    --config-path <path to libaom config directory>
+    --filter <filter>: User test filter. Only tests matching filter are run.
+    --run-disabled-tests: Run disabled tests.
+    --help: Display this message and exit.
+    --test-data-path <path to libaom test data directory>
+    --show-program-output: Shows output from all programs being tested.
+    --prefix: Allows for a user specified prefix to be inserted before all test
+              programs. Grants the ability, for example, to run test programs
+              within valgrind.
+    --list-tests: List all test names and exit without actually running tests.
+    --verbose: Verbose output.
+
+    When the --bin-path option is not specified the script attempts to use
+    \$LIBAOM_BIN_PATH and then the current directory.
+
+    When the --config-path option is not specified the script attempts to use
+    \$LIBAOM_CONFIG_PATH and then the current directory.
+
+    When the -test-data-path option is not specified the script attempts to use
+    \$LIBAOM_TEST_DATA_PATH and then the current directory.
+EOF
+}
+
+# Returns non-zero (failure) when required environment variables are empty
+# strings.
+aom_test_check_environment() {
+  if [ -z "${LIBAOM_BIN_PATH}" ] || \
+     [ -z "${LIBAOM_CONFIG_PATH}" ] || \
+     [ -z "${LIBAOM_TEST_DATA_PATH}" ]; then
+    return 1
+  fi
+}
+
+# Echo aomenc command line parameters allowing use of a raw yuv file as
+# input to aomenc.
+yuv_raw_input() {
+  echo ""${YUV_RAW_INPUT}"
+       --width="${YUV_RAW_INPUT_WIDTH}"
+       --height="${YUV_RAW_INPUT_HEIGHT}""
+}
+
+# Do a small encode for testing decoders.
+encode_yuv_raw_input_av1() {
+  if [ "$(av1_encode_available)" = "yes" ]; then
+    local output="$1"
+    local encoder="$(aom_tool_path aomenc)"
+    shift
+    eval "${encoder}" $(yuv_raw_input) \
+      $(aomenc_encode_test_fast_params) \
+      --output="${output}" \
+      $@ \
+      ${devnull}
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+# Parse the command line.
+while [ -n "$1" ]; do
+  case "$1" in
+    --bin-path)
+      LIBAOM_BIN_PATH="$2"
+      shift
+      ;;
+    --config-path)
+      LIBAOM_CONFIG_PATH="$2"
+      shift
+      ;;
+    --filter)
+      AOM_TEST_FILTER="$2"
+      shift
+      ;;
+    --run-disabled-tests)
+      AOM_TEST_RUN_DISABLED_TESTS=yes
+      ;;
+    --help)
+      aom_test_usage
+      exit
+      ;;
+    --test-data-path)
+      LIBAOM_TEST_DATA_PATH="$2"
+      shift
+      ;;
+    --prefix)
+      AOM_TEST_PREFIX="$2"
+      shift
+      ;;
+    --verbose)
+      AOM_TEST_VERBOSE_OUTPUT=yes
+      ;;
+    --show-program-output)
+      devnull=
+      ;;
+    --list-tests)
+      AOM_TEST_LIST_TESTS=yes
+      ;;
+    *)
+      aom_test_usage
+      exit 1
+      ;;
+  esac
+  shift
+done
+
+# Handle running the tests from a build directory without arguments when running
+# the tests on *nix/macosx.
+LIBAOM_BIN_PATH="${LIBAOM_BIN_PATH:-.}"
+LIBAOM_CONFIG_PATH="${LIBAOM_CONFIG_PATH:-.}"
+LIBAOM_TEST_DATA_PATH="${LIBAOM_TEST_DATA_PATH:-.}"
+
+# Create a temporary directory for output files, and a trap to clean it up.
+if [ -n "${TMPDIR}" ]; then
+  AOM_TEST_TEMP_ROOT="${TMPDIR}"
+elif [ -n "${TEMPDIR}" ]; then
+  AOM_TEST_TEMP_ROOT="${TEMPDIR}"
+else
+  AOM_TEST_TEMP_ROOT=/tmp
+fi
+
+AOM_TEST_OUTPUT_DIR="${AOM_TEST_OUTPUT_DIR:-${AOM_TEST_TEMP_ROOT}/aom_test_$$}"
+
+if ! mkdir -p "${AOM_TEST_OUTPUT_DIR}" || \
+   [ ! -d "${AOM_TEST_OUTPUT_DIR}" ]; then
+  echo "${0##*/}: Cannot create output directory, giving up."
+  echo "${0##*/}:   AOM_TEST_OUTPUT_DIR=${AOM_TEST_OUTPUT_DIR}"
+  exit 1
+fi
+
+AOM_TEST_PRESERVE_OUTPUT=${AOM_TEST_PRESERVE_OUTPUT:-no}
+
+# This checking requires config/aom_config.c that is available in Jenkins
+# testing.
+if [ "$(is_windows_target)" = "yes" ]; then
+  AOM_TEST_EXE_SUFFIX=".exe"
+fi
+
+# Variables shared by tests.
+AV1_ENCODE_CPU_USED=${AV1_ENCODE_CPU_USED:-1}
+AV1_ENCODE_TEST_FRAME_LIMIT=${AV1_ENCODE_TEST_FRAME_LIMIT:-5}
+AV1_IVF_FILE="${AV1_IVF_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.ivf}"
+AV1_OBU_ANNEXB_FILE="${AV1_OBU_ANNEXB_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.annexb.obu}"
+AV1_OBU_SEC5_FILE="${AV1_OBU_SEC5_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.section5.obu}"
+AV1_WEBM_FILE="${AV1_WEBM_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.webm}"
+
+YUV_RAW_INPUT="${LIBAOM_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
+YUV_RAW_INPUT_WIDTH=352
+YUV_RAW_INPUT_HEIGHT=288
+
+Y4M_NOSQ_PAR_INPUT="${LIBAOM_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m"
+Y4M_720P_INPUT="${LIBAOM_TEST_DATA_PATH}/niklas_1280_720_30.y4m"
+
+# Setup a trap function to clean up after tests complete.
+trap cleanup EXIT
+
+vlog "$(basename "${0%.*}") test configuration:
+  LIBAOM_BIN_PATH=${LIBAOM_BIN_PATH}
+  LIBAOM_CONFIG_PATH=${LIBAOM_CONFIG_PATH}
+  LIBAOM_TEST_DATA_PATH=${LIBAOM_TEST_DATA_PATH}
+  AOM_TEST_EXE_SUFFIX=${AOM_TEST_EXE_SUFFIX}
+  AOM_TEST_FILTER=${AOM_TEST_FILTER}
+  AOM_TEST_LIST_TESTS=${AOM_TEST_LIST_TESTS}
+  AOM_TEST_OUTPUT_DIR=${AOM_TEST_OUTPUT_DIR}
+  AOM_TEST_PREFIX=${AOM_TEST_PREFIX}
+  AOM_TEST_PRESERVE_OUTPUT=${AOM_TEST_PRESERVE_OUTPUT}
+  AOM_TEST_RUN_DISABLED_TESTS=${AOM_TEST_RUN_DISABLED_TESTS}
+  AOM_TEST_SHOW_PROGRAM_OUTPUT=${AOM_TEST_SHOW_PROGRAM_OUTPUT}
+  AOM_TEST_TEMP_ROOT=${AOM_TEST_TEMP_ROOT}
+  AOM_TEST_VERBOSE_OUTPUT=${AOM_TEST_VERBOSE_OUTPUT}
+  AV1_ENCODE_CPU_USED=${AV1_ENCODE_CPU_USED}
+  AV1_ENCODE_TEST_FRAME_LIMIT=${AV1_ENCODE_TEST_FRAME_LIMIT}
+  AV1_IVF_FILE=${AV1_IVF_FILE}
+  AV1_OBU_ANNEXB_FILE=${AV1_OBU_ANNEXB_FILE}
+  AV1_OBU_SEC5_FILE=${AV1_OBU_SEC5_FILE}
+  AV1_WEBM_FILE=${AV1_WEBM_FILE}
+  YUV_RAW_INPUT=${YUV_RAW_INPUT}
+  YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH}
+  YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}
+  Y4M_NOSQ_PAR_INPUT=${Y4M_NOSQ_PAR_INPUT}"
+
+fi  # End $AOM_TEST_TOOLS_COMMON_SH pseudo include guard.
diff --git a/third_party/aom/test/tpl_model_test.cc b/third_party/aom/test/tpl_model_test.cc
new file mode 100644
index 0000000000..91eb5e94d3
--- /dev/null
+++ b/third_party/aom/test/tpl_model_test.cc
@@ -0,0 +1,529 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdlib>
+#include <memory>
+#include <new>
+#include <vector>
+
+#include "av1/encoder/cost.h"
+#include "av1/encoder/tpl_model.h"
+#include "av1/encoder/encoder.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+#if CONFIG_BITRATE_ACCURACY
+constexpr double epsilon = 0.0000001;
+#endif
+
+double laplace_prob(double q_step, double b, double zero_bin_ratio,
+                    int qcoeff) {
+  int abs_qcoeff = abs(qcoeff);
+  double z0 = fmax(exp(-zero_bin_ratio / 2 * q_step / b), TPL_EPSILON);
+  if (abs_qcoeff == 0) {
+    double p0 = 1 - z0;
+    return p0;
+  } else {
+    assert(abs_qcoeff > 0);
+    double z = fmax(exp(-q_step / b), TPL_EPSILON);
+    double p = z0 / 2 * (1 - z) * pow(z, abs_qcoeff - 1);
+    return p;
+  }
+}
+TEST(TplModelTest, ExponentialEntropyBoundaryTest1) {
+  double b = 0;
+  double q_step = 1;
+  double entropy = av1_exponential_entropy(q_step, b);
+  EXPECT_NEAR(entropy, 0, 0.00001);
+}
+
+TEST(TplModelTest, TransformCoeffEntropyTest1) {
+  // Check the consistency between av1_estimate_coeff_entropy() and
+  // laplace_prob()
+  double b = 1;
+  double q_step = 1;
+  double zero_bin_ratio = 2;
+  for (int qcoeff = -256; qcoeff < 256; ++qcoeff) {
+    double rate = av1_estimate_coeff_entropy(q_step, b, zero_bin_ratio, qcoeff);
+    double prob = laplace_prob(q_step, b, zero_bin_ratio, qcoeff);
+    double ref_rate = -log2(prob);
+    EXPECT_DOUBLE_EQ(rate, ref_rate);
+  }
+}
+
+TEST(TplModelTest, TransformCoeffEntropyTest2) {
+  // Check the consistency between av1_estimate_coeff_entropy(), laplace_prob()
+  // and av1_laplace_entropy()
+  double b = 1;
+  double q_step = 1;
+  double zero_bin_ratio = 2;
+  double est_expected_rate = 0;
+  for (int qcoeff = -20; qcoeff < 20; ++qcoeff) {
+    double rate = av1_estimate_coeff_entropy(q_step, b, zero_bin_ratio, qcoeff);
+    double prob = laplace_prob(q_step, b, zero_bin_ratio, qcoeff);
+    est_expected_rate += prob * rate;
+  }
+  double expected_rate = av1_laplace_entropy(q_step, b, zero_bin_ratio);
+  EXPECT_NEAR(expected_rate, est_expected_rate, 0.001);
+}
+
+TEST(TplModelTest, InitTplStats1) {
+  // We use heap allocation instead of stack allocation here to avoid
+  // -Wstack-usage warning.
+  std::unique_ptr<TplParams> tpl_data(new (std::nothrow) TplParams);
+  ASSERT_NE(tpl_data, nullptr);
+  av1_zero(*tpl_data);
+  tpl_data->ready = 1;
+  EXPECT_EQ(sizeof(tpl_data->tpl_stats_buffer),
+            MAX_LENGTH_TPL_FRAME_STATS * sizeof(tpl_data->tpl_stats_buffer[0]));
+  for (int i = 0; i < MAX_LENGTH_TPL_FRAME_STATS; ++i) {
+    // Set it to a random non-zero number
+    tpl_data->tpl_stats_buffer[i].is_valid = i + 1;
+  }
+  av1_init_tpl_stats(tpl_data.get());
+  EXPECT_EQ(tpl_data->ready, 0);
+  for (int i = 0; i < MAX_LENGTH_TPL_FRAME_STATS; ++i) {
+    EXPECT_EQ(tpl_data->tpl_stats_buffer[i].is_valid, 0);
+  }
+}
+
+TEST(TplModelTest, DeltaRateCostZeroFlow) {
+  // When srcrf_dist equal to recrf_dist, av1_delta_rate_cost should return 0
+  int64_t srcrf_dist = 256;
+  int64_t recrf_dist = 256;
+  int64_t delta_rate = 512;
+  int pixel_num = 256;
+  int64_t rate_cost =
+      av1_delta_rate_cost(delta_rate, recrf_dist, srcrf_dist, pixel_num);
+  EXPECT_EQ(rate_cost, 0);
+}
+
+// a reference function of av1_delta_rate_cost() with delta_rate using bit as
+// basic unit
+double ref_delta_rate_cost(int64_t delta_rate, double src_rec_ratio,
+                           int pixel_count) {
+  assert(src_rec_ratio <= 1 && src_rec_ratio >= 0);
+  double bits_per_pixel = (double)delta_rate / pixel_count;
+  double p = pow(2, bits_per_pixel);
+  double flow_rate_per_pixel =
+      sqrt(p * p / (src_rec_ratio * p * p + (1 - src_rec_ratio)));
+  double rate_cost = pixel_count * log2(flow_rate_per_pixel);
+  return rate_cost;
+}
+
+TEST(TplModelTest, DeltaRateCostReference) {
+  const int64_t scale = TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT;
+  std::vector<int64_t> srcrf_dist_arr = { 256, 257, 312 };
+  std::vector<int64_t> recrf_dist_arr = { 512, 288, 620 };
+  std::vector<int64_t> delta_rate_arr = { 10, 278, 100 };
+  for (size_t t = 0; t < srcrf_dist_arr.size(); ++t) {
+    int64_t srcrf_dist = srcrf_dist_arr[t];
+    int64_t recrf_dist = recrf_dist_arr[t];
+    int64_t delta_rate = delta_rate_arr[t];
+    int64_t scaled_delta_rate = delta_rate << scale;
+    int pixel_count = 256;
+    int64_t rate_cost = av1_delta_rate_cost(scaled_delta_rate, recrf_dist,
+                                            srcrf_dist, pixel_count);
+    rate_cost >>= scale;
+    double src_rec_ratio = (double)srcrf_dist / recrf_dist;
+    double ref_rate_cost =
+        ref_delta_rate_cost(delta_rate, src_rec_ratio, pixel_count);
+    EXPECT_NEAR((double)rate_cost, ref_rate_cost, 1);
+  }
+}
+
+TEST(TplModelTest, GetOverlapAreaHasOverlap) {
+  // The block a's area is [10, 17) x [18, 24).
+  // The block b's area is [8, 15) x [17, 23).
+  // The overlapping area between block a and block b is [10, 15) x [18, 23).
+  // Therefore, the size of the area is (15 - 10) * (23 - 18) = 25.
+  int row_a = 10;
+  int col_a = 18;
+  int row_b = 8;
+  int col_b = 17;
+  int height = 7;
+  int width = 6;
+  int overlap_area =
+      av1_get_overlap_area(row_a, col_a, row_b, col_b, width, height);
+  EXPECT_EQ(overlap_area, 25);
+}
+
+TEST(TplModelTest, GetOverlapAreaNoOverlap) {
+  // The block a's area is [10, 14) x [18, 22).
+  // The block b's area is [5, 9) x [5, 9).
+  // Threre is no overlapping area between block a and block b.
+  // Therefore, the return value should be zero.
+  int row_a = 10;
+  int col_a = 18;
+  int row_b = 5;
+  int col_b = 5;
+  int height = 4;
+  int width = 4;
+  int overlap_area =
+      av1_get_overlap_area(row_a, col_a, row_b, col_b, width, height);
+  EXPECT_EQ(overlap_area, 0);
+}
+
+TEST(TplModelTest, GetQIndexFromQstepRatio) {
+  const aom_bit_depth_t bit_depth = AOM_BITS_8;
+  // When qstep_ratio is 1, the output q_index should be equal to leaf_qindex.
+  double qstep_ratio = 1.0;
+  for (int leaf_qindex = 1; leaf_qindex <= 255; ++leaf_qindex) {
+    const int q_index =
+        av1_get_q_index_from_qstep_ratio(leaf_qindex, qstep_ratio, bit_depth);
+    EXPECT_EQ(q_index, leaf_qindex);
+  }
+
+  // When qstep_ratio is very low, the output q_index should be 1.
+  qstep_ratio = 0.0001;
+  for (int leaf_qindex = 1; leaf_qindex <= 255; ++leaf_qindex) {
+    const int q_index =
+        av1_get_q_index_from_qstep_ratio(leaf_qindex, qstep_ratio, bit_depth);
+    EXPECT_EQ(q_index, 0);
+  }
+}
+
+TEST(TplModelTest, TxfmStatsInitTest) {
+  TplTxfmStats tpl_txfm_stats;
+  av1_init_tpl_txfm_stats(&tpl_txfm_stats);
+  EXPECT_EQ(tpl_txfm_stats.coeff_num, 256);
+  EXPECT_EQ(tpl_txfm_stats.txfm_block_count, 0);
+  for (int i = 0; i < tpl_txfm_stats.coeff_num; ++i) {
+    EXPECT_DOUBLE_EQ(tpl_txfm_stats.abs_coeff_sum[i], 0);
+  }
+}
+
+#if CONFIG_BITRATE_ACCURACY
+TEST(TplModelTest, TxfmStatsAccumulateTest) {
+  TplTxfmStats sub_stats;
+  av1_init_tpl_txfm_stats(&sub_stats);
+  sub_stats.txfm_block_count = 17;
+  for (int i = 0; i < sub_stats.coeff_num; ++i) {
+    sub_stats.abs_coeff_sum[i] = i;
+  }
+
+  TplTxfmStats accumulated_stats;
+  av1_init_tpl_txfm_stats(&accumulated_stats);
+  accumulated_stats.txfm_block_count = 13;
+  for (int i = 0; i < accumulated_stats.coeff_num; ++i) {
+    accumulated_stats.abs_coeff_sum[i] = 5 * i;
+  }
+
+  av1_accumulate_tpl_txfm_stats(&sub_stats, &accumulated_stats);
+  EXPECT_DOUBLE_EQ(accumulated_stats.txfm_block_count, 30);
+  for (int i = 0; i < accumulated_stats.coeff_num; ++i) {
+    EXPECT_DOUBLE_EQ(accumulated_stats.abs_coeff_sum[i], 6 * i);
+  }
+}
+
+TEST(TplModelTest, TxfmStatsRecordTest) {
+  TplTxfmStats stats1;
+  TplTxfmStats stats2;
+  av1_init_tpl_txfm_stats(&stats1);
+  av1_init_tpl_txfm_stats(&stats2);
+
+  tran_low_t coeff[256];
+  for (int i = 0; i < 256; ++i) {
+    coeff[i] = i;
+  }
+  av1_record_tpl_txfm_block(&stats1, coeff);
+  EXPECT_EQ(stats1.txfm_block_count, 1);
+
+  // we record the same transform block twice for testing purpose
+  av1_record_tpl_txfm_block(&stats2, coeff);
+  av1_record_tpl_txfm_block(&stats2, coeff);
+  EXPECT_EQ(stats2.txfm_block_count, 2);
+
+  EXPECT_EQ(stats1.coeff_num, 256);
+  EXPECT_EQ(stats2.coeff_num, 256);
+  for (int i = 0; i < 256; ++i) {
+    EXPECT_DOUBLE_EQ(stats2.abs_coeff_sum[i], 2 * stats1.abs_coeff_sum[i]);
+  }
+}
+#endif  // CONFIG_BITRATE_ACCURACY
+
+TEST(TplModelTest, ComputeMVDifferenceTest) {
+  TplDepFrame tpl_frame_small;
+  tpl_frame_small.is_valid = true;
+  tpl_frame_small.mi_rows = 4;
+  tpl_frame_small.mi_cols = 4;
+  tpl_frame_small.stride = 1;
+  uint8_t right_shift_small = 1;
+  int step_small = 1 << right_shift_small;
+
+  // Test values for motion vectors.
+  int mv_vals_small[4] = { 1, 2, 3, 4 };
+  int index = 0;
+
+  // 4x4 blocks means we need to allocate a 4 size array.
+  // According to av1_tpl_ptr_pos:
+  // (row >> right_shift) * stride + (col >> right_shift)
+  // (4 >> 1) * 1 + (4 >> 1) = 4
+  TplDepStats stats_buf_small[4];
+  tpl_frame_small.tpl_stats_ptr = stats_buf_small;
+
+  for (int row = 0; row < tpl_frame_small.mi_rows; row += step_small) {
+    for (int col = 0; col < tpl_frame_small.mi_cols; col += step_small) {
+      TplDepStats tpl_stats;
+      tpl_stats.ref_frame_index[0] = 0;
+      int_mv mv;
+      mv.as_mv.row = mv_vals_small[index];
+      mv.as_mv.col = mv_vals_small[index];
+      index++;
+      tpl_stats.mv[0] = mv;
+      tpl_frame_small.tpl_stats_ptr[av1_tpl_ptr_pos(
+          row, col, tpl_frame_small.stride, right_shift_small)] = tpl_stats;
+    }
+  }
+
+  int_mv result_mv =
+      av1_compute_mv_difference(&tpl_frame_small, 1, 1, step_small,
+                                tpl_frame_small.stride, right_shift_small);
+
+  // Expect the result to be exactly equal to 1 because this is the difference
+  // between neighboring motion vectors in this instance.
+  EXPECT_EQ(result_mv.as_mv.row, 1);
+  EXPECT_EQ(result_mv.as_mv.col, 1);
+}
+
+TEST(TplModelTest, ComputeMVBitsTest) {
+  TplDepFrame tpl_frame;
+  tpl_frame.is_valid = true;
+  tpl_frame.mi_rows = 16;
+  tpl_frame.mi_cols = 16;
+  tpl_frame.stride = 24;
+  uint8_t right_shift = 2;
+  int step = 1 << right_shift;
+  // Test values for motion vectors.
+  int mv_vals_ordered[16] = { 1, 2,  3,  4,  5,  6,  7,  8,
+                              9, 10, 11, 12, 13, 14, 15, 16 };
+  int mv_vals[16] = { 1, 16, 2, 15, 3, 14, 4, 13, 5, 12, 6, 11, 7, 10, 8, 9 };
+  int index = 0;
+
+  // 16x16 blocks means we need to allocate a 100 size array.
+  // According to av1_tpl_ptr_pos:
+  // (row >> right_shift) * stride + (col >> right_shift)
+  // (16 >> 2) * 24 + (16 >> 2) = 100
+  TplDepStats stats_buf[100];
+  tpl_frame.tpl_stats_ptr = stats_buf;
+
+  for (int row = 0; row < tpl_frame.mi_rows; row += step) {
+    for (int col = 0; col < tpl_frame.mi_cols; col += step) {
+      TplDepStats tpl_stats;
+      tpl_stats.ref_frame_index[0] = 0;
+      int_mv mv;
+      mv.as_mv.row = mv_vals_ordered[index];
+      mv.as_mv.col = mv_vals_ordered[index];
+      index++;
+      tpl_stats.mv[0] = mv;
+      tpl_frame.tpl_stats_ptr[av1_tpl_ptr_pos(row, col, tpl_frame.stride,
+                                              right_shift)] = tpl_stats;
+    }
+  }
+
+  double result = av1_tpl_compute_frame_mv_entropy(&tpl_frame, right_shift);
+
+  // Expect the result to be low because the motion vectors are ordered.
+  // The estimation algorithm takes this into account and reduces the cost.
+  EXPECT_NEAR(result, 20, 5);
+
+  index = 0;
+  for (int row = 0; row < tpl_frame.mi_rows; row += step) {
+    for (int col = 0; col < tpl_frame.mi_cols; col += step) {
+      TplDepStats tpl_stats;
+      tpl_stats.ref_frame_index[0] = 0;
+      int_mv mv;
+      mv.as_mv.row = mv_vals[index];
+      mv.as_mv.col = mv_vals[index];
+      index++;
+      tpl_stats.mv[0] = mv;
+      tpl_frame.tpl_stats_ptr[av1_tpl_ptr_pos(row, col, tpl_frame.stride,
+                                              right_shift)] = tpl_stats;
+    }
+  }
+
+  result = av1_tpl_compute_frame_mv_entropy(&tpl_frame, right_shift);
+
+  // Expect the result to be higher because the vectors are not ordered.
+  // Neighboring vectors will have different values, increasing the cost.
+  EXPECT_NEAR(result, 70, 5);
+}
+#if CONFIG_BITRATE_ACCURACY
+
+TEST(TplModelTest, VbrRcInfoSetGopBitBudget) {
+  VBR_RATECTRL_INFO vbr_rc_info;
+  const double total_bit_budget = 2000;
+  const int show_frame_count = 8;
+  const int gop_show_frame_count = 4;
+  av1_vbr_rc_init(&vbr_rc_info, total_bit_budget, show_frame_count);
+  av1_vbr_rc_set_gop_bit_budget(&vbr_rc_info, gop_show_frame_count);
+  EXPECT_NEAR(vbr_rc_info.gop_bit_budget, 1000, epsilon);
+}
+
+void init_toy_gf_group(GF_GROUP *gf_group) {
+  av1_zero(*gf_group);
+  gf_group->size = 4;
+  const FRAME_UPDATE_TYPE update_type[4] = { KF_UPDATE, ARF_UPDATE,
+                                             INTNL_ARF_UPDATE, LF_UPDATE };
+  for (int i = 0; i < gf_group->size; ++i) {
+    gf_group->update_type[i] = update_type[i];
+  }
+}
+
+void init_toy_vbr_rc_info(VBR_RATECTRL_INFO *vbr_rc_info, int gop_size) {
+  int total_bit_budget = 2000;
+  int show_frame_count = 8;
+  av1_vbr_rc_init(vbr_rc_info, total_bit_budget, show_frame_count);
+
+  for (int i = 0; i < gop_size; ++i) {
+    vbr_rc_info->qstep_ratio_list[i] = 1;
+  }
+}
+
+void init_toy_tpl_txfm_stats(std::vector<TplTxfmStats> *stats_list) {
+  for (size_t i = 0; i < stats_list->size(); i++) {
+    TplTxfmStats *txfm_stats = &stats_list->at(i);
+    av1_init_tpl_txfm_stats(txfm_stats);
+    txfm_stats->txfm_block_count = 8;
+    for (int j = 0; j < txfm_stats->coeff_num; j++) {
+      txfm_stats->abs_coeff_sum[j] = 1000 + j;
+    }
+    av1_tpl_txfm_stats_update_abs_coeff_mean(txfm_stats);
+  }
+}
+
+/*
+ * Helper method to brute-force search for the closest q_index
+ * that achieves the specified bit budget.
+ */
+int find_gop_q_iterative(double bit_budget, aom_bit_depth_t bit_depth,
+                         const double *update_type_scale_factors,
+                         int frame_count,
+                         const FRAME_UPDATE_TYPE *update_type_list,
+                         const double *qstep_ratio_list,
+                         const TplTxfmStats *stats_list, int *q_index_list,
+                         double *estimated_bitrate_byframe) {
+  int best_q = 255;
+  double curr_estimate = av1_vbr_rc_info_estimate_gop_bitrate(
+      best_q, bit_depth, update_type_scale_factors, frame_count,
+      update_type_list, qstep_ratio_list, stats_list, q_index_list,
+      estimated_bitrate_byframe);
+  double min_bits_diff = fabs(curr_estimate - bit_budget);
+  // Start at q = 254 because we already have an estimate for q = 255.
+  for (int q = 254; q >= 0; q--) {
+    curr_estimate = av1_vbr_rc_info_estimate_gop_bitrate(
+        q, bit_depth, update_type_scale_factors, frame_count, update_type_list,
+        qstep_ratio_list, stats_list, q_index_list, estimated_bitrate_byframe);
+    double bits_diff = fabs(curr_estimate - bit_budget);
+    if (bits_diff <= min_bits_diff) {
+      min_bits_diff = bits_diff;
+      best_q = q;
+    }
+  }
+  return best_q;
+}
+
+TEST(TplModelTest, EstimateFrameRateTest) {
+  GF_GROUP gf_group;
+  init_toy_gf_group(&gf_group);
+
+  VBR_RATECTRL_INFO vbr_rc_info;
+  init_toy_vbr_rc_info(&vbr_rc_info, gf_group.size);
+
+  std::vector<TplTxfmStats> stats_list(gf_group.size);
+  init_toy_tpl_txfm_stats(&stats_list);
+
+  std::vector<double> est_bitrate_list(gf_group.size);
+  init_toy_tpl_txfm_stats(&stats_list);
+  const aom_bit_depth_t bit_depth = AOM_BITS_8;
+
+  const int q = 125;
+
+  // Case1: all scale factors are 0
+  double scale_factors[FRAME_UPDATE_TYPES] = { 0 };
+  double estimate = av1_vbr_rc_info_estimate_gop_bitrate(
+      q, bit_depth, scale_factors, gf_group.size, gf_group.update_type,
+      vbr_rc_info.qstep_ratio_list, stats_list.data(), vbr_rc_info.q_index_list,
+      est_bitrate_list.data());
+  EXPECT_NEAR(estimate, 0, epsilon);
+
+  // Case2: all scale factors are 1
+  for (int i = 0; i < FRAME_UPDATE_TYPES; i++) {
+    scale_factors[i] = 1;
+  }
+  estimate = av1_vbr_rc_info_estimate_gop_bitrate(
+      q, bit_depth, scale_factors, gf_group.size, gf_group.update_type,
+      vbr_rc_info.qstep_ratio_list, stats_list.data(), vbr_rc_info.q_index_list,
+      est_bitrate_list.data());
+  double ref_estimate = 0;
+  for (int i = 0; i < gf_group.size; i++) {
+    ref_estimate += est_bitrate_list[i];
+  }
+  EXPECT_NEAR(estimate, ref_estimate, epsilon);
+
+  // Case3: Key frame scale factor is 0 and others are 1
+  for (int i = 0; i < FRAME_UPDATE_TYPES; i++) {
+    if (i == KF_UPDATE) {
+      scale_factors[i] = 0;
+    } else {
+      scale_factors[i] = 1;
+    }
+  }
+  estimate = av1_vbr_rc_info_estimate_gop_bitrate(
+      q, bit_depth, scale_factors, gf_group.size, gf_group.update_type,
+      vbr_rc_info.qstep_ratio_list, stats_list.data(), vbr_rc_info.q_index_list,
+      est_bitrate_list.data());
+  ref_estimate = 0;
+  for (int i = 0; i < gf_group.size; i++) {
+    if (gf_group.update_type[i] != KF_UPDATE) {
+      ref_estimate += est_bitrate_list[i];
+    }
+  }
+  EXPECT_NEAR(estimate, ref_estimate, epsilon);
+}
+
+TEST(TplModelTest, VbrRcInfoEstimateBaseQTest) {
+  GF_GROUP gf_group;
+  init_toy_gf_group(&gf_group);
+
+  VBR_RATECTRL_INFO vbr_rc_info;
+  init_toy_vbr_rc_info(&vbr_rc_info, gf_group.size);
+
+  std::vector<TplTxfmStats> stats_list(gf_group.size);
+  init_toy_tpl_txfm_stats(&stats_list);
+  const aom_bit_depth_t bit_depth = AOM_BITS_8;
+
+  // Test multiple bit budgets.
+  const std::vector<double> bit_budgets = { 0,     2470,  19200,  30750,
+                                            41315, 65017, DBL_MAX };
+
+  for (double bit_budget : bit_budgets) {
+    // Binary search method to find the optimal q.
+    const int base_q = av1_vbr_rc_info_estimate_base_q(
+        bit_budget, bit_depth, vbr_rc_info.scale_factors, gf_group.size,
+        gf_group.update_type, vbr_rc_info.qstep_ratio_list, stats_list.data(),
+        vbr_rc_info.q_index_list, nullptr);
+    const int ref_base_q = find_gop_q_iterative(
+        bit_budget, bit_depth, vbr_rc_info.scale_factors, gf_group.size,
+        gf_group.update_type, vbr_rc_info.qstep_ratio_list, stats_list.data(),
+        vbr_rc_info.q_index_list, nullptr);
+    if (bit_budget == 0) {
+      EXPECT_EQ(base_q, 255);
+    } else if (bit_budget == DBL_MAX) {
+      EXPECT_EQ(base_q, 0);
+    }
+    EXPECT_EQ(base_q, ref_base_q);
+  }
+}
+#endif  // CONFIG_BITRATE_ACCURACY
+
+}  // namespace
diff --git a/third_party/aom/test/transform_test_base.h b/third_party/aom/test/transform_test_base.h
new file mode 100644
index 0000000000..55e78fef48
--- /dev/null
+++ b/third_party/aom/test/transform_test_base.h
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_TEST_TRANSFORM_TEST_BASE_H_
+#define AOM_TEST_TRANSFORM_TEST_BASE_H_
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "aom/aom_codec.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_mem/aom_mem.h"
+#include "test/acm_random.h"
+
+namespace libaom_test {
+
+//  Note:
+//   Same constant are defined in av1/common/av1_entropy.h and
+//   av1/common/entropy.h.  Goal is to make this base class
+//   to use for future codec transform testing.  But including
+//   either of them would lead to compiling error when we do
+//   unit test for another codec. Suggest to move the definition
+//   to a aom header file.
+const int kDctMaxValue = 16384;
+
+template <typename OutputType>
+using FhtFunc = void (*)(const int16_t *in, OutputType *out, int stride,
+                         TxfmParam *txfm_param);
+
+template <typename OutputType>
+using IhtFunc = void (*)(const tran_low_t *in, uint8_t *out, int stride,
+                         const TxfmParam *txfm_param);
+
+template <typename OutType>
+class TransformTestBase {
+ public:
+  virtual ~TransformTestBase() = default;
+
+ protected:
+  virtual void RunFwdTxfm(const int16_t *in, OutType *out, int stride) = 0;
+
+  virtual void RunInvTxfm(const OutType *out, uint8_t *dst, int stride) = 0;
+
+  void RunAccuracyCheck(uint32_t ref_max_error, double ref_avg_error) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    uint32_t max_error = 0;
+    int64_t total_error = 0;
+    const int count_test_block = 10000;
+
+    int16_t *test_input_block = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(int16_t) * num_coeffs_));
+    ASSERT_NE(test_input_block, nullptr);
+    OutType *test_temp_block = reinterpret_cast<OutType *>(
+        aom_memalign(16, sizeof(test_temp_block[0]) * num_coeffs_));
+    ASSERT_NE(test_temp_block, nullptr);
+    uint8_t *dst = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
+    ASSERT_NE(dst, nullptr);
+    uint8_t *src = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
+    ASSERT_NE(src, nullptr);
+    uint16_t *dst16 = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
+    ASSERT_NE(dst16, nullptr);
+    uint16_t *src16 = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
+    ASSERT_NE(src16, nullptr);
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-255, 255].
+      for (int j = 0; j < num_coeffs_; ++j) {
+        if (bit_depth_ == AOM_BITS_8) {
+          src[j] = rnd.Rand8();
+          dst[j] = rnd.Rand8();
+          test_input_block[j] = src[j] - dst[j];
+        } else {
+          src16[j] = rnd.Rand16() & mask_;
+          dst16[j] = rnd.Rand16() & mask_;
+          test_input_block[j] = src16[j] - dst16[j];
+        }
+      }
+
+      API_REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
+      if (bit_depth_ == AOM_BITS_8) {
+        API_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
+      } else {
+        API_REGISTER_STATE_CHECK(
+            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
+      }
+
+      for (int j = 0; j < num_coeffs_; ++j) {
+        const int diff =
+            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+        const uint32_t error = diff * diff;
+        if (max_error < error) max_error = error;
+        total_error += error;
+      }
+    }
+
+    double avg_error = total_error * 1. / count_test_block / num_coeffs_;
+
+    EXPECT_GE(ref_max_error, max_error)
+        << "Error: FHT/IHT has an individual round trip error > "
+        << ref_max_error;
+
+    EXPECT_GE(ref_avg_error, avg_error)
+        << "Error: FHT/IHT has average round trip error > " << ref_avg_error
+        << " per block";
+
+    aom_free(test_input_block);
+    aom_free(test_temp_block);
+    aom_free(dst);
+    aom_free(src);
+    aom_free(dst16);
+    aom_free(src16);
+  }
+
+  void RunCoeffCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 5000;
+
+    // Use a stride value which is not the width of any transform, to catch
+    // cases where the transforms use the stride incorrectly.
+    int stride = 96;
+
+    int16_t *input_block = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(int16_t) * stride * height_));
+    ASSERT_NE(input_block, nullptr);
+    OutType *output_ref_block = reinterpret_cast<OutType *>(
+        aom_memalign(16, sizeof(output_ref_block[0]) * num_coeffs_));
+    ASSERT_NE(output_ref_block, nullptr);
+    OutType *output_block = reinterpret_cast<OutType *>(
+        aom_memalign(16, sizeof(output_block[0]) * num_coeffs_));
+    ASSERT_NE(output_block, nullptr);
+
+    for (int i = 0; i < count_test_block; ++i) {
+      int j, k;
+      for (j = 0; j < height_; ++j) {
+        for (k = 0; k < pitch_; ++k) {
+          int in_idx = j * stride + k;
+          int out_idx = j * pitch_ + k;
+          input_block[in_idx] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+          if (bit_depth_ == AOM_BITS_8) {
+            output_block[out_idx] = output_ref_block[out_idx] = rnd.Rand8();
+          } else {
+            output_block[out_idx] = output_ref_block[out_idx] =
+                rnd.Rand16() & mask_;
+          }
+        }
+      }
+
+      fwd_txfm_ref(input_block, output_ref_block, stride, &txfm_param_);
+      API_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, stride));
+
+      // The minimum quant value is 4.
+      for (j = 0; j < height_; ++j) {
+        for (k = 0; k < pitch_; ++k) {
+          int out_idx = j * pitch_ + k;
+          ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx])
+              << "Error: not bit-exact result at index: " << out_idx
+              << " at test block: " << i;
+        }
+      }
+    }
+    aom_free(input_block);
+    aom_free(output_ref_block);
+    aom_free(output_block);
+  }
+
+  void RunInvCoeffCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 5000;
+
+    // Use a stride value which is not the width of any transform, to catch
+    // cases where the transforms use the stride incorrectly.
+    int stride = 96;
+
+    int16_t *input_block = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(int16_t) * num_coeffs_));
+    ASSERT_NE(input_block, nullptr);
+    OutType *trans_block = reinterpret_cast<OutType *>(
+        aom_memalign(16, sizeof(trans_block[0]) * num_coeffs_));
+    ASSERT_NE(trans_block, nullptr);
+    uint8_t *output_block = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, sizeof(uint8_t) * stride * height_));
+    ASSERT_NE(output_block, nullptr);
+    uint8_t *output_ref_block = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, sizeof(uint8_t) * stride * height_));
+    ASSERT_NE(output_ref_block, nullptr);
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      int j, k;
+      for (j = 0; j < height_; ++j) {
+        for (k = 0; k < pitch_; ++k) {
+          int in_idx = j * pitch_ + k;
+          int out_idx = j * stride + k;
+          input_block[in_idx] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+          output_ref_block[out_idx] = rnd.Rand16() & mask_;
+          output_block[out_idx] = output_ref_block[out_idx];
+        }
+      }
+
+      fwd_txfm_ref(input_block, trans_block, pitch_, &txfm_param_);
+
+      inv_txfm_ref(trans_block, output_ref_block, stride, &txfm_param_);
+      API_REGISTER_STATE_CHECK(RunInvTxfm(trans_block, output_block, stride));
+
+      for (j = 0; j < height_; ++j) {
+        for (k = 0; k < pitch_; ++k) {
+          int out_idx = j * stride + k;
+          ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx])
+              << "Error: not bit-exact result at index: " << out_idx
+              << " j = " << j << " k = " << k << " at test block: " << i;
+        }
+      }
+    }
+    aom_free(input_block);
+    aom_free(trans_block);
+    aom_free(output_ref_block);
+    aom_free(output_block);
+  }
+
+  void RunMemCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 5000;
+
+    int16_t *input_extreme_block = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(int16_t) * num_coeffs_));
+    ASSERT_NE(input_extreme_block, nullptr);
+    OutType *output_ref_block = reinterpret_cast<OutType *>(
+        aom_memalign(16, sizeof(output_ref_block[0]) * num_coeffs_));
+    ASSERT_NE(output_ref_block, nullptr);
+    OutType *output_block = reinterpret_cast<OutType *>(
+        aom_memalign(16, sizeof(output_block[0]) * num_coeffs_));
+    ASSERT_NE(output_block, nullptr);
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < num_coeffs_; ++j) {
+        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
+      }
+      if (i == 0) {
+        for (int j = 0; j < num_coeffs_; ++j) input_extreme_block[j] = mask_;
+      } else if (i == 1) {
+        for (int j = 0; j < num_coeffs_; ++j) input_extreme_block[j] = -mask_;
+      }
+
+      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, &txfm_param_);
+      API_REGISTER_STATE_CHECK(
+          RunFwdTxfm(input_extreme_block, output_block, pitch_));
+
+      int row_length = FindRowLength();
+      // The minimum quant value is 4.
+      for (int j = 0; j < num_coeffs_; ++j) {
+        ASSERT_EQ(output_block[j], output_ref_block[j])
+            << "Not bit-exact at test index: " << i << ", "
+            << "j = " << j << std::endl;
+        EXPECT_GE(row_length * kDctMaxValue << (bit_depth_ - 8),
+                  abs(output_block[j]))
+            << "Error: NxN FDCT has coefficient larger than N*DCT_MAX_VALUE";
+      }
+    }
+    aom_free(input_extreme_block);
+    aom_free(output_ref_block);
+    aom_free(output_block);
+  }
+
+  void RunInvAccuracyCheck(int limit) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 1000;
+
+    int16_t *in = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(int16_t) * num_coeffs_));
+    ASSERT_NE(in, nullptr);
+    OutType *coeff = reinterpret_cast<OutType *>(
+        aom_memalign(16, sizeof(coeff[0]) * num_coeffs_));
+    ASSERT_NE(coeff, nullptr);
+    uint8_t *dst = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
+    ASSERT_NE(dst, nullptr);
+    uint8_t *src = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
+    ASSERT_NE(src, nullptr);
+
+    uint16_t *dst16 = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
+    ASSERT_NE(dst16, nullptr);
+    uint16_t *src16 = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
+    ASSERT_NE(src16, nullptr);
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < num_coeffs_; ++j) {
+        if (bit_depth_ == AOM_BITS_8) {
+          src[j] = rnd.Rand8();
+          dst[j] = rnd.Rand8();
+          in[j] = src[j] - dst[j];
+        } else {
+          src16[j] = rnd.Rand16() & mask_;
+          dst16[j] = rnd.Rand16() & mask_;
+          in[j] = src16[j] - dst16[j];
+        }
+      }
+
+      fwd_txfm_ref(in, coeff, pitch_, &txfm_param_);
+
+      if (bit_depth_ == AOM_BITS_8) {
+        API_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
+      } else {
+        API_REGISTER_STATE_CHECK(
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
+      }
+
+      for (int j = 0; j < num_coeffs_; ++j) {
+        const int diff =
+            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+        const uint32_t error = diff * diff;
+        ASSERT_GE(static_cast<uint32_t>(limit), error)
+            << "Error: 4x4 IDCT has error " << error << " at index " << j;
+      }
+    }
+    aom_free(in);
+    aom_free(coeff);
+    aom_free(dst);
+    aom_free(src);
+    aom_free(src16);
+    aom_free(dst16);
+  }
+
+  int pitch_;
+  int height_;
+  FhtFunc<OutType> fwd_txfm_ref;
+  IhtFunc<OutType> inv_txfm_ref;
+  aom_bit_depth_t bit_depth_;
+  int mask_;
+  int num_coeffs_;
+  TxfmParam txfm_param_;
+
+ private:
+  //  Assume transform size is 4x4, 8x8, 16x16,...
+  int FindRowLength() const {
+    int row = 4;
+    if (16 == num_coeffs_) {
+      row = 4;
+    } else if (64 == num_coeffs_) {
+      row = 8;
+    } else if (256 == num_coeffs_) {
+      row = 16;
+    } else if (1024 == num_coeffs_) {
+      row = 32;
+    }
+    return row;
+  }
+};
+
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_TRANSFORM_TEST_BASE_H_
diff --git a/third_party/aom/test/twopass_encoder.sh b/third_party/aom/test/twopass_encoder.sh
new file mode 100755
index 0000000000..44e7327b8f
--- /dev/null
+++ b/third_party/aom/test/twopass_encoder.sh
@@ -0,0 +1,54 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom twopass_encoder example. To add new tests to this
+## file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to twopass_encoder_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+twopass_encoder_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs twopass_encoder using the codec specified by $1 with a frame limit of
+# 100.
+twopass_encoder() {
+  local encoder="$(aom_tool_path twopass_encoder)"
+  local codec="$1"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/twopass_encoder_${codec}.ivf"
+  local limit=7
+
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
+      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" "${limit}" \
+      ${devnull} || return 1
+
+  [ -e "${output_file}" ] || return 1
+}
+
+twopass_encoder_av1() {
+  if [ "$(av1_encode_available)" = "yes" ]; then
+    twopass_encoder av1 || return 1
+  fi
+}
+
+twopass_encoder_tests="twopass_encoder_av1"
+
+run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}"
diff --git a/third_party/aom/test/util.h b/third_party/aom/test/util.h
new file mode 100644
index 0000000000..29df709c4f
--- /dev/null
+++ b/third_party/aom/test/util.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_TEST_UTIL_H_
+#define AOM_TEST_UTIL_H_
+
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "aom/aom_image.h"
+#include "aom_ports/aom_timer.h"
+
+// Macros
+#define GET_PARAM(k) std::get<k>(GetParam())
+
+inline int is_extension_y4m(const char *filename) {
+  const char *dot = strrchr(filename, '.');
+  if (!dot || dot == filename) return 0;
+
+  return !strcmp(dot, ".y4m");
+}
+
+inline double compute_psnr(const aom_image_t *img1, const aom_image_t *img2) {
+  assert((img1->fmt == img2->fmt) && (img1->d_w == img2->d_w) &&
+         (img1->d_h == img2->d_h));
+
+  const unsigned int width_y = img1->d_w;
+  const unsigned int height_y = img1->d_h;
+  unsigned int i, j;
+
+  int64_t sqrerr = 0;
+  for (i = 0; i < height_y; ++i)
+    for (j = 0; j < width_y; ++j) {
+      int64_t d = img1->planes[AOM_PLANE_Y][i * img1->stride[AOM_PLANE_Y] + j] -
+                  img2->planes[AOM_PLANE_Y][i * img2->stride[AOM_PLANE_Y] + j];
+      sqrerr += d * d;
+    }
+  double mse = static_cast<double>(sqrerr) / (width_y * height_y);
+  double psnr = 100.0;
+  if (mse > 0.0) {
+    psnr = 10 * log10(255.0 * 255.0 / mse);
+  }
+  return psnr;
+}
+
+static INLINE double get_time_mark(aom_usec_timer *t) {
+  aom_usec_timer_mark(t);
+  return static_cast<double>(aom_usec_timer_elapsed(t));
+}
+
+#endif  // AOM_TEST_UTIL_H_
diff --git a/third_party/aom/test/variance_test.cc b/third_party/aom/test/variance_test.cc
new file mode 100644
index 0000000000..a493a1f4cb
--- /dev/null
+++ b/third_party/aom/test/variance_test.cc
@@ -0,0 +1,4370 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdlib>
+#include <new>
+#include <ostream>
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/aom_timer.h"
+#include "aom_ports/mem.h"
+#include "av1/common/cdef_block.h"
+
+namespace {
+
+typedef uint64_t (*MseWxH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src,
+                                    int sstride, int w, int h);
+typedef uint64_t (*Mse16xH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src,
+                                     int w, int h);
+typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride,
+                                        const uint8_t *b, int b_stride,
+                                        unsigned int *sse);
+typedef void (*GetSseSum8x8QuadFunc)(const uint8_t *a, int a_stride,
+                                     const uint8_t *b, int b_stride,
+                                     uint32_t *sse8x8, int *sum8x8,
+                                     unsigned int *tot_sse, int *tot_sum,
+                                     uint32_t *var8x8);
+typedef void (*GetSseSum16x16DualFunc)(const uint8_t *a, int a_stride,
+                                       const uint8_t *b, int b_stride,
+                                       uint32_t *sse16x16,
+                                       unsigned int *tot_sse, int *tot_sum,
+                                       uint32_t *var16x16);
+typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride,
+                                         int xoffset, int yoffset,
+                                         const uint8_t *b, int b_stride,
+                                         unsigned int *sse);
+typedef unsigned int (*SubpixAvgVarMxNFunc)(const uint8_t *a, int a_stride,
+                                            int xoffset, int yoffset,
+                                            const uint8_t *b, int b_stride,
+                                            uint32_t *sse,
+                                            const uint8_t *second_pred);
+typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src);
+typedef unsigned int (*DistWtdSubpixAvgVarMxNFunc)(
+    const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
+    int b_stride, uint32_t *sse, const uint8_t *second_pred,
+    const DIST_WTD_COMP_PARAMS *jcp_param);
+
+#if !CONFIG_REALTIME_ONLY
+typedef uint32_t (*ObmcSubpelVarFunc)(const uint8_t *pre, int pre_stride,
+                                      int xoffset, int yoffset,
+                                      const int32_t *wsrc, const int32_t *mask,
+                                      unsigned int *sse);
+#endif
+
+using libaom_test::ACMRandom;
+
+// Truncate high bit depth results by downshifting (with rounding) by:
+// 2 * (bit_depth - 8) for sse
+// (bit_depth - 8) for se
+static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) {
+  switch (bit_depth) {
+    case AOM_BITS_12:
+      *sse = (*sse + 128) >> 8;
+      *se = (*se + 8) >> 4;
+      break;
+    case AOM_BITS_10:
+      *sse = (*sse + 8) >> 4;
+      *se = (*se + 2) >> 2;
+      break;
+    case AOM_BITS_8:
+    default: break;
+  }
+}
+
+static unsigned int mb_ss_ref(const int16_t *src) {
+  unsigned int res = 0;
+  for (int i = 0; i < 256; ++i) {
+    res += src[i] * src[i];
+  }
+  return res;
+}
+
+/* Note:
+ *  Our codebase calculates the "diff" value in the variance algorithm by
+ *  (src - ref).
+ */
+static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w,
+                             int l2h, int src_stride, int ref_stride,
+                             uint32_t *sse_ptr, bool use_high_bit_depth_,
+                             aom_bit_depth_t bit_depth) {
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w;
+  const int h = 1 << l2h;
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      int diff;
+      if (!use_high_bit_depth_) {
+        diff = src[y * src_stride + x] - ref[y * ref_stride + x];
+        se += diff;
+        sse += diff * diff;
+      } else {
+        diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] -
+               CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x];
+        se += diff;
+        sse += diff * diff;
+      }
+    }
+  }
+  RoundHighBitDepth(bit_depth, &se, &sse);
+  *sse_ptr = static_cast<uint32_t>(sse);
+  return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
+}
+
+/* The subpel reference functions differ from the codec version in one aspect:
+ * they calculate the bilinear factors directly instead of using a lookup table
+ * and therefore upshift xoff and yoff by 1. Only every other calculated value
+ * is used so the codec version shrinks the table to save space.
+ */
+static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
+                                    int l2w, int l2h, int xoff, int yoff,
+                                    uint32_t *sse_ptr, bool use_high_bit_depth_,
+                                    aom_bit_depth_t bit_depth) {
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w;
+  const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      // Bilinear interpolation at a 16th pel step.
+      if (!use_high_bit_depth_) {
+        const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int diff = r - src[w * y + x];
+        se += diff;
+        sse += diff * diff;
+      } else {
+        uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
+        uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+        const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int diff = r - src16[w * y + x];
+        se += diff;
+        sse += diff * diff;
+      }
+    }
+  }
+  RoundHighBitDepth(bit_depth, &se, &sse);
+  *sse_ptr = static_cast<uint32_t>(sse);
+  return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
+}
+
+static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src,
+                                        const uint8_t *second_pred, int l2w,
+                                        int l2h, int xoff, int yoff,
+                                        uint32_t *sse_ptr,
+                                        bool use_high_bit_depth,
+                                        aom_bit_depth_t bit_depth) {
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w;
+  const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      // bilinear interpolation at a 16th pel step
+      if (!use_high_bit_depth) {
+        const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int diff =
+            ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
+        se += diff;
+        sse += diff * diff;
+      } else {
+        const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
+        const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+        const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
+        const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
+        se += diff;
+        sse += diff * diff;
+      }
+    }
+  }
+  RoundHighBitDepth(bit_depth, &se, &sse);
+  *sse_ptr = static_cast<uint32_t>(sse);
+  return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
+}
+
+static uint32_t dist_wtd_subpel_avg_variance_ref(
+    const uint8_t *ref, const uint8_t *src, const uint8_t *second_pred, int l2w,
+    int l2h, int xoff, int yoff, uint32_t *sse_ptr, bool use_high_bit_depth,
+    aom_bit_depth_t bit_depth, DIST_WTD_COMP_PARAMS *jcp_param) {
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w;
+  const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      // bilinear interpolation at a 16th pel step
+      if (!use_high_bit_depth) {
+        const int a1 = ref[(w + 0) * (y + 0) + x + 0];
+        const int a2 = ref[(w + 0) * (y + 0) + x + 1];
+        const int b1 = ref[(w + 0) * (y + 1) + x + 0];
+        const int b2 = ref[(w + 0) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int avg = ROUND_POWER_OF_TWO(
+            r * jcp_param->fwd_offset +
+                second_pred[w * y + x] * jcp_param->bck_offset,
+            DIST_PRECISION_BITS);
+        const int diff = avg - src[w * y + x];
+
+        se += diff;
+        sse += diff * diff;
+      } else {
+        const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
+        const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+        const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
+        const int a1 = ref16[(w + 0) * (y + 0) + x + 0];
+        const int a2 = ref16[(w + 0) * (y + 0) + x + 1];
+        const int b1 = ref16[(w + 0) * (y + 1) + x + 0];
+        const int b2 = ref16[(w + 0) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int avg =
+            ROUND_POWER_OF_TWO(r * jcp_param->fwd_offset +
+                                   sec16[w * y + x] * jcp_param->bck_offset,
+                               DIST_PRECISION_BITS);
+        const int diff = avg - src16[w * y + x];
+
+        se += diff;
+        sse += diff * diff;
+      }
+    }
+  }
+  RoundHighBitDepth(bit_depth, &se, &sse);
+  *sse_ptr = static_cast<uint32_t>(sse);
+  return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
+}
+
+#if !CONFIG_REALTIME_ONLY
+static uint32_t obmc_subpel_variance_ref(const uint8_t *pre, int l2w, int l2h,
+                                         int xoff, int yoff,
+                                         const int32_t *wsrc,
+                                         const int32_t *mask, uint32_t *sse_ptr,
+                                         bool use_high_bit_depth_,
+                                         aom_bit_depth_t bit_depth) {
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w;
+  const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      // Bilinear interpolation at a 16th pel step.
+      if (!use_high_bit_depth_) {
+        const int a1 = pre[(w + 1) * (y + 0) + x + 0];
+        const int a2 = pre[(w + 1) * (y + 0) + x + 1];
+        const int b1 = pre[(w + 1) * (y + 1) + x + 0];
+        const int b2 = pre[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int diff = ROUND_POWER_OF_TWO_SIGNED(
+            wsrc[w * y + x] - r * mask[w * y + x], 12);
+        se += diff;
+        sse += diff * diff;
+      } else {
+        uint16_t *pre16 = CONVERT_TO_SHORTPTR(pre);
+        const int a1 = pre16[(w + 1) * (y + 0) + x + 0];
+        const int a2 = pre16[(w + 1) * (y + 0) + x + 1];
+        const int b1 = pre16[(w + 1) * (y + 1) + x + 0];
+        const int b2 = pre16[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int diff = ROUND_POWER_OF_TWO_SIGNED(
+            wsrc[w * y + x] - r * mask[w * y + x], 12);
+        se += diff;
+        sse += diff * diff;
+      }
+    }
+  }
+  RoundHighBitDepth(bit_depth, &se, &sse);
+  *sse_ptr = static_cast<uint32_t>(sse);
+  return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
+}
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+
+class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> {
+ public:
+  SumOfSquaresTest() : func_(GetParam()) {}
+
+  ~SumOfSquaresTest() override = default;
+
+ protected:
+  void ConstTest();
+  void RefTest();
+
+  SumOfSquaresFunction func_;
+  ACMRandom rnd_;
+};
+
+void SumOfSquaresTest::ConstTest() {
+  int16_t mem[256];
+  unsigned int res;
+  for (int v = 0; v < 256; ++v) {
+    for (int i = 0; i < 256; ++i) {
+      mem[i] = v;
+    }
+    API_REGISTER_STATE_CHECK(res = func_(mem));
+    EXPECT_EQ(256u * (v * v), res);
+  }
+}
+
+void SumOfSquaresTest::RefTest() {
+  int16_t mem[256];
+  for (int i = 0; i < 100; ++i) {
+    for (int j = 0; j < 256; ++j) {
+      mem[j] = rnd_.Rand8() - rnd_.Rand8();
+    }
+
+    const unsigned int expected = mb_ss_ref(mem);
+    unsigned int res;
+    API_REGISTER_STATE_CHECK(res = func_(mem));
+    EXPECT_EQ(expected, res);
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Encapsulating struct to store the function to test along with
+// some testing context.
+// Can be used for MSE, SSE, Variance, etc.
+
+template <typename Func>
+struct TestParams {
+  TestParams(int log2w = 0, int log2h = 0, Func function = nullptr,
+             int bit_depth_value = 0)
+      : log2width(log2w), log2height(log2h), func(function) {
+    use_high_bit_depth = (bit_depth_value > 0);
+    if (use_high_bit_depth) {
+      bit_depth = static_cast<aom_bit_depth_t>(bit_depth_value);
+    } else {
+      bit_depth = AOM_BITS_8;
+    }
+    width = 1 << log2width;
+    height = 1 << log2height;
+    block_size = width * height;
+    mask = (1u << bit_depth) - 1;
+  }
+
+  int log2width, log2height;
+  int width, height;
+  int block_size;
+  Func func;
+  aom_bit_depth_t bit_depth;
+  bool use_high_bit_depth;
+  uint32_t mask;
+};
+
+template <typename Func>
+std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) {
+  return os << "width/height:" << p.width << "/" << p.height
+            << " function:" << reinterpret_cast<const void *>(p.func)
+            << " bit-depth:" << p.bit_depth;
+}
+
+// Main class for testing a function type
+template <typename FunctionType>
+class MseWxHTestClass
+    : public ::testing::TestWithParam<TestParams<FunctionType> > {
+ public:
+  void SetUp() override {
+    params_ = this->GetParam();
+
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, block_size() * sizeof(src_)));
+    dst_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, block_size() * sizeof(dst_)));
+    ASSERT_NE(src_, nullptr);
+    ASSERT_NE(dst_, nullptr);
+  }
+
+  void TearDown() override {
+    aom_free(src_);
+    aom_free(dst_);
+    src_ = nullptr;
+    dst_ = nullptr;
+  }
+
+ protected:
+  void RefMatchTestMse();
+  void SpeedTest();
+
+ protected:
+  ACMRandom rnd_;
+  uint8_t *dst_;
+  uint16_t *src_;
+  TestParams<FunctionType> params_;
+
+  // some relay helpers
+  int block_size() const { return params_.block_size; }
+  int width() const { return params_.width; }
+  int height() const { return params_.height; }
+  int d_stride() const { return params_.width; }  // stride is same as width
+  int s_stride() const { return params_.width; }  // stride is same as width
+};
+
+template <typename MseWxHFunctionType>
+void MseWxHTestClass<MseWxHFunctionType>::SpeedTest() {
+  aom_usec_timer ref_timer, test_timer;
+  double elapsed_time_c = 0;
+  double elapsed_time_simd = 0;
+  int run_time = 10000000;
+  int w = width();
+  int h = height();
+  int dstride = d_stride();
+  int sstride = s_stride();
+
+  for (int k = 0; k < block_size(); ++k) {
+    dst_[k] = rnd_.Rand8();
+    src_[k] = rnd_.Rand8();
+  }
+  aom_usec_timer_start(&ref_timer);
+  for (int i = 0; i < run_time; i++) {
+    aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h);
+  }
+  aom_usec_timer_mark(&ref_timer);
+  elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
+
+  aom_usec_timer_start(&test_timer);
+  for (int i = 0; i < run_time; i++) {
+    params_.func(dst_, dstride, src_, sstride, w, h);
+  }
+  aom_usec_timer_mark(&test_timer);
+  elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
+
+  printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(),
+         elapsed_time_c, elapsed_time_simd,
+         (elapsed_time_c / elapsed_time_simd));
+}
+
+template <typename MseWxHFunctionType>
+void MseWxHTestClass<MseWxHFunctionType>::RefMatchTestMse() {
+  uint64_t mse_ref = 0;
+  uint64_t mse_mod = 0;
+  int w = width();
+  int h = height();
+  int dstride = d_stride();
+  int sstride = s_stride();
+
+  for (int i = 0; i < 10; i++) {
+    for (int k = 0; k < block_size(); ++k) {
+      dst_[k] = rnd_.Rand8();
+      src_[k] = rnd_.Rand8();
+    }
+    API_REGISTER_STATE_CHECK(
+        mse_ref = aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h));
+    API_REGISTER_STATE_CHECK(
+        mse_mod = params_.func(dst_, dstride, src_, sstride, w, h));
+    EXPECT_EQ(mse_ref, mse_mod)
+        << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
+  }
+}
+
+template <typename FunctionType>
+class Mse16xHTestClass
+    : public ::testing::TestWithParam<TestParams<FunctionType> > {
+ public:
+  // Memory required to compute mse of two 8x8 and four 4x4 blocks assigned for
+  // maximum width 16 and maximum height 8.
+  int mem_size = 16 * 8;
+  void SetUp() override {
+    params_ = this->GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, mem_size * sizeof(*src_)));
+    dst_ =
+        reinterpret_cast<uint8_t *>(aom_memalign(16, mem_size * sizeof(*dst_)));
+    ASSERT_NE(src_, nullptr);
+    ASSERT_NE(dst_, nullptr);
+  }
+
+  void TearDown() override {
+    aom_free(src_);
+    aom_free(dst_);
+    src_ = nullptr;
+    dst_ = nullptr;
+  }
+
+  uint8_t RandBool() {
+    const uint32_t value = rnd_.Rand8();
+    return (value & 0x1);
+  }
+
+ protected:
+  void RefMatchExtremeTestMse();
+  void RefMatchTestMse();
+  void SpeedTest();
+
+ protected:
+  ACMRandom rnd_;
+  uint8_t *dst_;
+  uint16_t *src_;
+  TestParams<FunctionType> params_;
+
+  // some relay helpers
+  int width() const { return params_.width; }
+  int height() const { return params_.height; }
+  int d_stride() const { return params_.width; }
+};
+
+template <typename Mse16xHFunctionType>
+void Mse16xHTestClass<Mse16xHFunctionType>::SpeedTest() {
+  aom_usec_timer ref_timer, test_timer;
+  double elapsed_time_c = 0.0;
+  double elapsed_time_simd = 0.0;
+  const int loop_count = 10000000;
+  const int w = width();
+  const int h = height();
+  const int dstride = d_stride();
+
+  for (int k = 0; k < mem_size; ++k) {
+    dst_[k] = rnd_.Rand8();
+    // Right shift by 6 is done to generate more input in range of [0,255] than
+    // CDEF_VERY_LARGE
+    int rnd_i10 = rnd_.Rand16() >> 6;
+    src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
+  }
+
+  aom_usec_timer_start(&ref_timer);
+  for (int i = 0; i < loop_count; i++) {
+    aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h);
+  }
+  aom_usec_timer_mark(&ref_timer);
+  elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
+
+  aom_usec_timer_start(&test_timer);
+  for (int i = 0; i < loop_count; i++) {
+    params_.func(dst_, dstride, src_, w, h);
+  }
+  aom_usec_timer_mark(&test_timer);
+  elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
+
+  printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%.31f\n", width(),
+         height(), elapsed_time_c, elapsed_time_simd,
+         (elapsed_time_c / elapsed_time_simd));
+}
+
+template <typename Mse16xHFunctionType>
+void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchTestMse() {
+  uint64_t mse_ref = 0;
+  uint64_t mse_mod = 0;
+  const int w = width();
+  const int h = height();
+  const int dstride = d_stride();
+
+  for (int i = 0; i < 10; i++) {
+    for (int k = 0; k < mem_size; ++k) {
+      dst_[k] = rnd_.Rand8();
+      // Right shift by 6 is done to generate more input in range of [0,255]
+      // than CDEF_VERY_LARGE
+      int rnd_i10 = rnd_.Rand16() >> 6;
+      src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
+    }
+
+    API_REGISTER_STATE_CHECK(
+        mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h));
+    API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h));
+    EXPECT_EQ(mse_ref, mse_mod)
+        << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
+  }
+}
+
+template <typename Mse16xHFunctionType>
+void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchExtremeTestMse() {
+  uint64_t mse_ref = 0;
+  uint64_t mse_mod = 0;
+  const int w = width();
+  const int h = height();
+  const int dstride = d_stride();
+  const int iter = 10;
+
+  // Fill the buffers with extreme values
+  for (int i = 0; i < iter; i++) {
+    for (int k = 0; k < mem_size; ++k) {
+      dst_[k] = static_cast<uint8_t>(RandBool() ? 0 : 255);
+      src_[k] = static_cast<uint16_t>(RandBool() ? 0 : CDEF_VERY_LARGE);
+    }
+
+    API_REGISTER_STATE_CHECK(
+        mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h));
+    API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h));
+    EXPECT_EQ(mse_ref, mse_mod)
+        << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
+  }
+}
+
+// Main class for testing a function type
+template <typename FunctionType>
+class MainTestClass
+    : public ::testing::TestWithParam<TestParams<FunctionType> > {
+ public:
+  void SetUp() override {
+    params_ = this->GetParam();
+
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    const size_t unit =
+        use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t);
+    src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size() * unit));
+    ref_ = new uint8_t[block_size() * unit];
+    ASSERT_NE(src_, nullptr);
+    ASSERT_NE(ref_, nullptr);
+    memset(src_, 0, block_size() * sizeof(src_[0]));
+    memset(ref_, 0, block_size() * sizeof(ref_[0]));
+    if (use_high_bit_depth()) {
+      // TODO(skal): remove!
+      src_ = CONVERT_TO_BYTEPTR(src_);
+      ref_ = CONVERT_TO_BYTEPTR(ref_);
+    }
+  }
+
+  void TearDown() override {
+    if (use_high_bit_depth()) {
+      // TODO(skal): remove!
+      src_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(src_));
+      ref_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(ref_));
+    }
+
+    aom_free(src_);
+    delete[] ref_;
+    src_ = nullptr;
+    ref_ = nullptr;
+  }
+
+ protected:
+  // We could sub-class MainTestClass into dedicated class for Variance
+  // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing
+  // to access top class fields xxx. That's cumbersome, so for now we'll just
+  // implement the testing methods here:
+
+  // Variance tests
+  void ZeroTest();
+  void RefTest();
+  void RefStrideTest();
+  void OneQuarterTest();
+  void SpeedTest();
+
+  // SSE&SUM tests
+  void RefTestSseSum();
+  void MinTestSseSum();
+  void MaxTestSseSum();
+  void SseSum_SpeedTest();
+
+  // SSE&SUM dual tests
+  void RefTestSseSumDual();
+  void MinTestSseSumDual();
+  void MaxTestSseSumDual();
+  void SseSum_SpeedTestDual();
+
+  // MSE/SSE tests
+  void RefTestMse();
+  void RefTestSse();
+  void MaxTestMse();
+  void MaxTestSse();
+
+ protected:
+  ACMRandom rnd_;
+  uint8_t *src_;
+  uint8_t *ref_;
+  TestParams<FunctionType> params_;
+
+  // some relay helpers
+  bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
+  int byte_shift() const { return params_.bit_depth - 8; }
+  int block_size() const { return params_.block_size; }
+  int width() const { return params_.width; }
+  int height() const { return params_.height; }
+  uint32_t mask() const { return params_.mask; }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Tests related to variance.
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::ZeroTest() {
+  for (int i = 0; i <= 255; ++i) {
+    if (!use_high_bit_depth()) {
+      memset(src_, i, block_size());
+    } else {
+      uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_);
+      for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift();
+    }
+    for (int j = 0; j <= 255; ++j) {
+      if (!use_high_bit_depth()) {
+        memset(ref_, j, block_size());
+      } else {
+        uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_);
+        for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift();
+      }
+      unsigned int sse, var;
+      API_REGISTER_STATE_CHECK(
+          var = params_.func(src_, width(), ref_, width(), &sse));
+      EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j;
+    }
+  }
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::RefTest() {
+  for (int i = 0; i < 10; ++i) {
+    for (int j = 0; j < block_size(); j++) {
+      if (!use_high_bit_depth()) {
+        src_[j] = rnd_.Rand8();
+        ref_[j] = rnd_.Rand8();
+      } else {
+        CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
+        CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
+      }
+    }
+    unsigned int sse1, sse2, var1, var2;
+    const int stride = width();
+    API_REGISTER_STATE_CHECK(
+        var1 = params_.func(src_, stride, ref_, stride, &sse1));
+    var2 =
+        variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
+                     stride, &sse2, use_high_bit_depth(), params_.bit_depth);
+    EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
+    EXPECT_EQ(var1, var2) << "Error at test index: " << i;
+  }
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::RefStrideTest() {
+  for (int i = 0; i < 10; ++i) {
+    const int ref_stride = (i & 1) * width();
+    const int src_stride = ((i >> 1) & 1) * width();
+    for (int j = 0; j < block_size(); j++) {
+      const int ref_ind = (j / width()) * ref_stride + j % width();
+      const int src_ind = (j / width()) * src_stride + j % width();
+      if (!use_high_bit_depth()) {
+        src_[src_ind] = rnd_.Rand8();
+        ref_[ref_ind] = rnd_.Rand8();
+      } else {
+        CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask();
+        CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask();
+      }
+    }
+    unsigned int sse1, sse2;
+    unsigned int var1, var2;
+
+    API_REGISTER_STATE_CHECK(
+        var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1));
+    var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height,
+                        src_stride, ref_stride, &sse2, use_high_bit_depth(),
+                        params_.bit_depth);
+    EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
+    EXPECT_EQ(var1, var2) << "Error at test index: " << i;
+  }
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::OneQuarterTest() {
+  const int half = block_size() / 2;
+  if (!use_high_bit_depth()) {
+    memset(src_, 255, block_size());
+    memset(ref_, 255, half);
+    memset(ref_ + half, 0, half);
+  } else {
+    aom_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size());
+    aom_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half);
+    aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half);
+  }
+  unsigned int sse, var, expected;
+  API_REGISTER_STATE_CHECK(
+      var = params_.func(src_, width(), ref_, width(), &sse));
+  expected = block_size() * 255 * 255 / 4;
+  EXPECT_EQ(expected, var);
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::SpeedTest() {
+  for (int j = 0; j < block_size(); j++) {
+    if (!use_high_bit_depth()) {
+      src_[j] = rnd_.Rand8();
+      ref_[j] = rnd_.Rand8();
+#if CONFIG_AV1_HIGHBITDEPTH
+    } else {
+      CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
+      CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+    }
+  }
+  unsigned int sse;
+  const int stride = width();
+  int run_time = 1000000000 / block_size();
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < run_time; ++i) {
+    params_.func(src_, stride, ref_, stride, &sse);
+  }
+
+  aom_usec_timer_mark(&timer);
+  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+  printf("Variance %dx%d : %d us\n", width(), height(), elapsed_time);
+}
+
+template <typename GetSseSum8x8QuadFuncType>
+void MainTestClass<GetSseSum8x8QuadFuncType>::RefTestSseSum() {
+  for (int i = 0; i < 10; ++i) {
+    for (int j = 0; j < block_size(); ++j) {
+      src_[j] = rnd_.Rand8();
+      ref_[j] = rnd_.Rand8();
+    }
+    unsigned int sse1[256] = { 0 };
+    unsigned int sse2[256] = { 0 };
+    unsigned int var1[256] = { 0 };
+    unsigned int var2[256] = { 0 };
+    int sum1[256] = { 0 };
+    int sum2[256] = { 0 };
+    unsigned int sse_tot_c = 0;
+    unsigned int sse_tot_simd = 0;
+    int sum_tot_c = 0;
+    int sum_tot_simd = 0;
+    const int stride = width();
+    int k = 0;
+
+    for (int row = 0; row < height(); row += 8) {
+      for (int col = 0; col < width(); col += 32) {
+        API_REGISTER_STATE_CHECK(params_.func(src_ + stride * row + col, stride,
+                                              ref_ + stride * row + col, stride,
+                                              &sse1[k], &sum1[k], &sse_tot_simd,
+                                              &sum_tot_simd, &var1[k]));
+        aom_get_var_sse_sum_8x8_quad_c(
+            src_ + stride * row + col, stride, ref_ + stride * row + col,
+            stride, &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
+        k += 4;
+      }
+    }
+    EXPECT_EQ(sse_tot_c, sse_tot_simd);
+    EXPECT_EQ(sum_tot_c, sum_tot_simd);
+    for (int p = 0; p < 256; p++) {
+      EXPECT_EQ(sse1[p], sse2[p]);
+      EXPECT_EQ(sum1[p], sum2[p]);
+      EXPECT_EQ(var1[p], var2[p]);
+    }
+  }
+}
+
+template <typename GetSseSum8x8QuadFuncType>
+void MainTestClass<GetSseSum8x8QuadFuncType>::MinTestSseSum() {
+  memset(src_, 0, block_size());
+  memset(ref_, 255, block_size());
+  unsigned int sse1[256] = { 0 };
+  unsigned int sse2[256] = { 0 };
+  unsigned int var1[256] = { 0 };
+  unsigned int var2[256] = { 0 };
+  int sum1[256] = { 0 };
+  int sum2[256] = { 0 };
+  unsigned int sse_tot_c = 0;
+  unsigned int sse_tot_simd = 0;
+  int sum_tot_c = 0;
+  int sum_tot_simd = 0;
+  const int stride = width();
+  int k = 0;
+
+  for (int i = 0; i < height(); i += 8) {
+    for (int j = 0; j < width(); j += 32) {
+      API_REGISTER_STATE_CHECK(params_.func(
+          src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
+          &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
+      aom_get_var_sse_sum_8x8_quad_c(
+          src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
+          &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
+      k += 4;
+    }
+  }
+  EXPECT_EQ(sse_tot_simd, sse_tot_c);
+  EXPECT_EQ(sum_tot_simd, sum_tot_c);
+  for (int p = 0; p < 256; p++) {
+    EXPECT_EQ(sse1[p], sse2[p]);
+    EXPECT_EQ(sum1[p], sum2[p]);
+    EXPECT_EQ(var1[p], var2[p]);
+  }
+}
+
+template <typename GetSseSum8x8QuadFuncType>
+void MainTestClass<GetSseSum8x8QuadFuncType>::MaxTestSseSum() {
+  memset(src_, 255, block_size());
+  memset(ref_, 0, block_size());
+  unsigned int sse1[256] = { 0 };
+  unsigned int sse2[256] = { 0 };
+  unsigned int var1[256] = { 0 };
+  unsigned int var2[256] = { 0 };
+  int sum1[256] = { 0 };
+  int sum2[256] = { 0 };
+  unsigned int sse_tot_c = 0;
+  unsigned int sse_tot_simd = 0;
+  int sum_tot_c = 0;
+  int sum_tot_simd = 0;
+  const int stride = width();
+  int k = 0;
+
+  for (int i = 0; i < height(); i += 8) {
+    for (int j = 0; j < width(); j += 32) {
+      API_REGISTER_STATE_CHECK(params_.func(
+          src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
+          &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
+      aom_get_var_sse_sum_8x8_quad_c(
+          src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
+          &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
+      k += 4;
+    }
+  }
+  EXPECT_EQ(sse_tot_c, sse_tot_simd);
+  EXPECT_EQ(sum_tot_c, sum_tot_simd);
+
+  for (int p = 0; p < 256; p++) {
+    EXPECT_EQ(sse1[p], sse2[p]);
+    EXPECT_EQ(sum1[p], sum2[p]);
+    EXPECT_EQ(var1[p], var2[p]);
+  }
+}
+
+template <typename GetSseSum8x8QuadFuncType>
+void MainTestClass<GetSseSum8x8QuadFuncType>::SseSum_SpeedTest() {
+  const int loop_count = 1000000000 / block_size();
+  for (int j = 0; j < block_size(); ++j) {
+    src_[j] = rnd_.Rand8();
+    ref_[j] = rnd_.Rand8();
+  }
+
+  unsigned int sse1[4] = { 0 };
+  unsigned int sse2[4] = { 0 };
+  unsigned int var1[4] = { 0 };
+  unsigned int var2[4] = { 0 };
+  int sum1[4] = { 0 };
+  int sum2[4] = { 0 };
+  unsigned int sse_tot_c = 0;
+  unsigned int sse_tot_simd = 0;
+  int sum_tot_c = 0;
+  int sum_tot_simd = 0;
+  const int stride = width();
+
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+  for (int r = 0; r < loop_count; ++r) {
+    for (int i = 0; i < height(); i += 8) {
+      for (int j = 0; j < width(); j += 32) {
+        aom_get_var_sse_sum_8x8_quad_c(src_ + stride * i + j, stride,
+                                       ref_ + stride * i + j, stride, sse2,
+                                       sum2, &sse_tot_c, &sum_tot_c, var2);
+      }
+    }
+  }
+  aom_usec_timer_mark(&timer);
+  const double elapsed_time_ref =
+      static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+  aom_usec_timer_start(&timer);
+  for (int r = 0; r < loop_count; ++r) {
+    for (int i = 0; i < height(); i += 8) {
+      for (int j = 0; j < width(); j += 32) {
+        params_.func(src_ + stride * i + j, stride, ref_ + stride * i + j,
+                     stride, sse1, sum1, &sse_tot_simd, &sum_tot_simd, var1);
+      }
+    }
+  }
+  aom_usec_timer_mark(&timer);
+  const double elapsed_time_simd =
+      static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+  printf(
+      "aom_getvar_8x8_quad for block=%dx%d : ref_time=%lf \t simd_time=%lf \t "
+      "gain=%lf \n",
+      width(), height(), elapsed_time_ref, elapsed_time_simd,
+      elapsed_time_ref / elapsed_time_simd);
+}
+
+template <typename GetSseSum16x16DualFuncType>
+void MainTestClass<GetSseSum16x16DualFuncType>::RefTestSseSumDual() {
+  for (int iter = 0; iter < 10; ++iter) {
+    for (int idx = 0; idx < block_size(); ++idx) {
+      src_[idx] = rnd_.Rand8();
+      ref_[idx] = rnd_.Rand8();
+    }
+    unsigned int sse1[64] = { 0 };
+    unsigned int sse2[64] = { 0 };
+    unsigned int var1[64] = { 0 };
+    unsigned int var2[64] = { 0 };
+    unsigned int sse_tot_c = 0;
+    unsigned int sse_tot_simd = 0;
+    int sum_tot_c = 0;
+    int sum_tot_simd = 0;
+    const int stride = width();
+    int k = 0;
+
+    for (int row = 0; row < height(); row += 16) {
+      for (int col = 0; col < width(); col += 32) {
+        API_REGISTER_STATE_CHECK(params_.func(
+            src_ + stride * row + col, stride, ref_ + stride * row + col,
+            stride, &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
+        aom_get_var_sse_sum_16x16_dual_c(
+            src_ + stride * row + col, stride, ref_ + stride * row + col,
+            stride, &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
+        k += 2;
+      }
+    }
+    EXPECT_EQ(sse_tot_c, sse_tot_simd);
+    EXPECT_EQ(sum_tot_c, sum_tot_simd);
+    for (int p = 0; p < 64; p++) {
+      EXPECT_EQ(sse1[p], sse2[p]);
+      EXPECT_EQ(sse_tot_simd, sse_tot_c);
+      EXPECT_EQ(sum_tot_simd, sum_tot_c);
+      EXPECT_EQ(var1[p], var2[p]);
+    }
+  }
+}
+
+template <typename GetSseSum16x16DualFuncType>
+void MainTestClass<GetSseSum16x16DualFuncType>::MinTestSseSumDual() {
+  memset(src_, 0, block_size());
+  memset(ref_, 255, block_size());
+  unsigned int sse1[64] = { 0 };
+  unsigned int sse2[64] = { 0 };
+  unsigned int var1[64] = { 0 };
+  unsigned int var2[64] = { 0 };
+  unsigned int sse_tot_c = 0;
+  unsigned int sse_tot_simd = 0;
+  int sum_tot_c = 0;
+  int sum_tot_simd = 0;
+  const int stride = width();
+  int k = 0;
+
+  for (int row = 0; row < height(); row += 16) {
+    for (int col = 0; col < width(); col += 32) {
+      API_REGISTER_STATE_CHECK(params_.func(
+          src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
+          &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
+      aom_get_var_sse_sum_16x16_dual_c(
+          src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
+          &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
+      k += 2;
+    }
+  }
+  EXPECT_EQ(sse_tot_simd, sse_tot_c);
+  EXPECT_EQ(sum_tot_simd, sum_tot_c);
+  for (int p = 0; p < 64; p++) {
+    EXPECT_EQ(sse1[p], sse2[p]);
+    EXPECT_EQ(var1[p], var2[p]);
+  }
+}
+
+template <typename GetSseSum16x16DualFuncType>
+void MainTestClass<GetSseSum16x16DualFuncType>::MaxTestSseSumDual() {
+  memset(src_, 255, block_size());
+  memset(ref_, 0, block_size());
+  unsigned int sse1[64] = { 0 };
+  unsigned int sse2[64] = { 0 };
+  unsigned int var1[64] = { 0 };
+  unsigned int var2[64] = { 0 };
+  unsigned int sse_tot_c = 0;
+  unsigned int sse_tot_simd = 0;
+  int sum_tot_c = 0;
+  int sum_tot_simd = 0;
+  const int stride = width();
+  int k = 0;
+
+  for (int row = 0; row < height(); row += 16) {
+    for (int col = 0; col < width(); col += 32) {
+      API_REGISTER_STATE_CHECK(params_.func(
+          src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
+          &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
+      aom_get_var_sse_sum_16x16_dual_c(
+          src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
+          &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
+      k += 2;
+    }
+  }
+  EXPECT_EQ(sse_tot_c, sse_tot_simd);
+  EXPECT_EQ(sum_tot_c, sum_tot_simd);
+
+  for (int p = 0; p < 64; p++) {
+    EXPECT_EQ(sse1[p], sse2[p]);
+    EXPECT_EQ(var1[p], var2[p]);
+  }
+}
+
+template <typename GetSseSum16x16DualFuncType>
+void MainTestClass<GetSseSum16x16DualFuncType>::SseSum_SpeedTestDual() {
+  const int loop_count = 1000000000 / block_size();
+  for (int idx = 0; idx < block_size(); ++idx) {
+    src_[idx] = rnd_.Rand8();
+    ref_[idx] = rnd_.Rand8();
+  }
+
+  unsigned int sse1[2] = { 0 };
+  unsigned int sse2[2] = { 0 };
+  unsigned int var1[2] = { 0 };
+  unsigned int var2[2] = { 0 };
+  unsigned int sse_tot_c = 0;
+  unsigned int sse_tot_simd = 0;
+  int sum_tot_c = 0;
+  int sum_tot_simd = 0;
+  const int stride = width();
+
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+  for (int r = 0; r < loop_count; ++r) {
+    for (int row = 0; row < height(); row += 16) {
+      for (int col = 0; col < width(); col += 32) {
+        aom_get_var_sse_sum_16x16_dual_c(src_ + stride * row + col, stride,
+                                         ref_ + stride * row + col, stride,
+                                         sse2, &sse_tot_c, &sum_tot_c, var2);
+      }
+    }
+  }
+  aom_usec_timer_mark(&timer);
+  const double elapsed_time_ref =
+      static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+  aom_usec_timer_start(&timer);
+  for (int r = 0; r < loop_count; ++r) {
+    for (int row = 0; row < height(); row += 16) {
+      for (int col = 0; col < width(); col += 32) {
+        params_.func(src_ + stride * row + col, stride,
+                     ref_ + stride * row + col, stride, sse1, &sse_tot_simd,
+                     &sum_tot_simd, var1);
+      }
+    }
+  }
+  aom_usec_timer_mark(&timer);
+  const double elapsed_time_simd =
+      static_cast<double>(aom_usec_timer_elapsed(&timer));
+
+  printf(
+      "aom_getvar_16x16_dual for block=%dx%d : ref_time=%lf \t simd_time=%lf "
+      "\t "
+      "gain=%lf \n",
+      width(), height(), elapsed_time_ref, elapsed_time_simd,
+      elapsed_time_ref / elapsed_time_simd);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Tests related to MSE / SSE.
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::RefTestMse() {
+  for (int i = 0; i < 10; ++i) {
+    for (int j = 0; j < block_size(); ++j) {
+      if (!use_high_bit_depth()) {
+        src_[j] = rnd_.Rand8();
+        ref_[j] = rnd_.Rand8();
+#if CONFIG_AV1_HIGHBITDEPTH
+      } else {
+        CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
+        CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+      }
+    }
+    unsigned int sse1, sse2;
+    const int stride = width();
+    API_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1));
+    variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
+                 stride, &sse2, use_high_bit_depth(), params_.bit_depth);
+    EXPECT_EQ(sse1, sse2);
+  }
+}
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::RefTestSse() {
+  for (int i = 0; i < 10; ++i) {
+    for (int j = 0; j < block_size(); ++j) {
+      src_[j] = rnd_.Rand8();
+      ref_[j] = rnd_.Rand8();
+    }
+    unsigned int sse2;
+    unsigned int var1;
+    const int stride = width();
+    API_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride));
+    variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
+                 stride, &sse2, false, AOM_BITS_8);
+    EXPECT_EQ(var1, sse2);
+  }
+}
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::MaxTestMse() {
+  int max_value = (1 << params_.bit_depth) - 1;
+  if (!use_high_bit_depth()) {
+    memset(src_, max_value, block_size());
+    memset(ref_, 0, block_size());
+#if CONFIG_AV1_HIGHBITDEPTH
+  } else {
+    aom_memset16(CONVERT_TO_SHORTPTR(src_), max_value, block_size());
+    aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, block_size());
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+  }
+  unsigned int sse;
+  API_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse));
+  unsigned int expected = (unsigned int)block_size() * max_value * max_value;
+  switch (params_.bit_depth) {
+    case AOM_BITS_12: expected = ROUND_POWER_OF_TWO(expected, 8); break;
+    case AOM_BITS_10: expected = ROUND_POWER_OF_TWO(expected, 4); break;
+    case AOM_BITS_8:
+    default: break;
+  }
+  EXPECT_EQ(expected, sse);
+}
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::MaxTestSse() {
+  memset(src_, 255, block_size());
+  memset(ref_, 0, block_size());
+  unsigned int var;
+  API_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width()));
+  const unsigned int expected = block_size() * 255 * 255;
+  EXPECT_EQ(expected, var);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+using std::get;
+using std::make_tuple;
+using std::tuple;
+
+template <typename FunctionType>
+class SubpelVarianceTest
+    : public ::testing::TestWithParam<TestParams<FunctionType> > {
+ public:
+  void SetUp() override {
+    params_ = this->GetParam();
+
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    if (!use_high_bit_depth()) {
+      src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
+      sec_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
+      ref_ = reinterpret_cast<uint8_t *>(
+          aom_memalign(32, block_size() + width() + height() + 1));
+    } else {
+      src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
+          aom_memalign(32, block_size() * sizeof(uint16_t))));
+      sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
+          aom_memalign(32, block_size() * sizeof(uint16_t))));
+      ref_ = CONVERT_TO_BYTEPTR(aom_memalign(
+          32, (block_size() + width() + height() + 1) * sizeof(uint16_t)));
+    }
+    ASSERT_NE(src_, nullptr);
+    ASSERT_NE(sec_, nullptr);
+    ASSERT_NE(ref_, nullptr);
+  }
+
+  void TearDown() override {
+    if (!use_high_bit_depth()) {
+      aom_free(src_);
+      aom_free(ref_);
+      aom_free(sec_);
+    } else {
+      aom_free(CONVERT_TO_SHORTPTR(src_));
+      aom_free(CONVERT_TO_SHORTPTR(ref_));
+      aom_free(CONVERT_TO_SHORTPTR(sec_));
+    }
+  }
+
+ protected:
+  void RefTest();
+  void ExtremeRefTest();
+  void SpeedTest();
+
+  ACMRandom rnd_;
+  uint8_t *src_;
+  uint8_t *ref_;
+  uint8_t *sec_;
+  TestParams<FunctionType> params_;
+  DIST_WTD_COMP_PARAMS jcp_param_;
+
+  // some relay helpers
+  bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
+  int byte_shift() const { return params_.bit_depth - 8; }
+  int block_size() const { return params_.block_size; }
+  int width() const { return params_.width; }
+  int height() const { return params_.height; }
+  uint32_t mask() const { return params_.mask; }
+};
+
+template <typename SubpelVarianceFunctionType>
+void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
+      if (!use_high_bit_depth()) {
+        for (int j = 0; j < block_size(); j++) {
+          src_[j] = rnd_.Rand8();
+        }
+        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+          ref_[j] = rnd_.Rand8();
+        }
+      } else {
+        for (int j = 0; j < block_size(); j++) {
+          CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
+        }
+        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+          CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
+        }
+      }
+      unsigned int sse1, sse2;
+      unsigned int var1;
+      API_REGISTER_STATE_CHECK(
+          var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
+      const unsigned int var2 = subpel_variance_ref(
+          ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
+          use_high_bit_depth(), params_.bit_depth);
+      EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
+      EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+    }
+  }
+}
+
+template <typename SubpelVarianceFunctionType>
+void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
+  // Compare against reference.
+  // Src: Set the first half of values to 0, the second half to the maximum.
+  // Ref: Set the first half of values to the maximum, the second half to 0.
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
+      const int half = block_size() / 2;
+      if (!use_high_bit_depth()) {
+        memset(src_, 0, half);
+        memset(src_ + half, 255, half);
+        memset(ref_, 255, half);
+        memset(ref_ + half, 0, half + width() + height() + 1);
+      } else {
+        aom_memset16(CONVERT_TO_SHORTPTR(src_), mask(), half);
+        aom_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half);
+        aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half);
+        aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask(),
+                     half + width() + height() + 1);
+      }
+      unsigned int sse1, sse2;
+      unsigned int var1;
+      API_REGISTER_STATE_CHECK(
+          var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
+      const unsigned int var2 = subpel_variance_ref(
+          ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
+          use_high_bit_depth(), params_.bit_depth);
+      EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
+      EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
+    }
+  }
+}
+
+template <typename SubpelVarianceFunctionType>
+void SubpelVarianceTest<SubpelVarianceFunctionType>::SpeedTest() {
+  if (!use_high_bit_depth()) {
+    for (int j = 0; j < block_size(); j++) {
+      src_[j] = rnd_.Rand8();
+    }
+    for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+      ref_[j] = rnd_.Rand8();
+    }
+  } else {
+    for (int j = 0; j < block_size(); j++) {
+      CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
+    }
+    for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+      CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
+    }
+  }
+
+  unsigned int sse1, sse2;
+  int run_time = 1000000000 / block_size();
+  aom_usec_timer timer;
+
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < run_time; ++i) {
+    int x = rnd_(8);
+    int y = rnd_(8);
+    params_.func(ref_, width() + 1, x, y, src_, width(), &sse1);
+  }
+  aom_usec_timer_mark(&timer);
+
+  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+
+  aom_usec_timer timer_c;
+
+  aom_usec_timer_start(&timer_c);
+  for (int i = 0; i < run_time; ++i) {
+    int x = rnd_(8);
+    int y = rnd_(8);
+    subpel_variance_ref(ref_, src_, params_.log2width, params_.log2height, x, y,
+                        &sse2, use_high_bit_depth(), params_.bit_depth);
+  }
+  aom_usec_timer_mark(&timer_c);
+
+  const int elapsed_time_c = static_cast<int>(aom_usec_timer_elapsed(&timer_c));
+
+  printf(
+      "sub_pixel_variance_%dx%d_%d: ref_time=%d us opt_time=%d us gain=%d \n",
+      width(), height(), params_.bit_depth, elapsed_time_c, elapsed_time,
+      elapsed_time_c / elapsed_time);
+}
+
+template <>
+void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
+      if (!use_high_bit_depth()) {
+        for (int j = 0; j < block_size(); j++) {
+          src_[j] = rnd_.Rand8();
+          sec_[j] = rnd_.Rand8();
+        }
+        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+          ref_[j] = rnd_.Rand8();
+        }
+      } else {
+        for (int j = 0; j < block_size(); j++) {
+          CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
+          CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
+        }
+        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+          CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
+        }
+      }
+      uint32_t sse1, sse2;
+      uint32_t var1, var2;
+      API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y,
+                                                   src_, width(), &sse1, sec_));
+      var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width,
+                                     params_.log2height, x, y, &sse2,
+                                     use_high_bit_depth(), params_.bit_depth);
+      EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
+      EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+    }
+  }
+}
+
+template <>
+void SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>::RefTest() {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
+      if (!use_high_bit_depth()) {
+        for (int j = 0; j < block_size(); j++) {
+          src_[j] = rnd_.Rand8();
+          sec_[j] = rnd_.Rand8();
+        }
+        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+          ref_[j] = rnd_.Rand8();
+        }
+      } else {
+        for (int j = 0; j < block_size(); j++) {
+          CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
+          CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
+        }
+        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+          CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
+        }
+      }
+      for (int x0 = 0; x0 < 2; ++x0) {
+        for (int y0 = 0; y0 < 4; ++y0) {
+          uint32_t sse1, sse2;
+          uint32_t var1, var2;
+          jcp_param_.fwd_offset = quant_dist_lookup_table[y0][x0];
+          jcp_param_.bck_offset = quant_dist_lookup_table[y0][1 - x0];
+          API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 0, x, y,
+                                                       src_, width(), &sse1,
+                                                       sec_, &jcp_param_));
+          var2 = dist_wtd_subpel_avg_variance_ref(
+              ref_, src_, sec_, params_.log2width, params_.log2height, x, y,
+              &sse2, use_high_bit_depth(), params_.bit_depth, &jcp_param_);
+          EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
+          EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+        }
+      }
+    }
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+#if !CONFIG_REALTIME_ONLY
+
+static const int kMaskMax = 64;
+
+typedef TestParams<ObmcSubpelVarFunc> ObmcSubpelVarianceParams;
+
+template <typename FunctionType>
+class ObmcVarianceTest
+    : public ::testing::TestWithParam<TestParams<FunctionType> > {
+ public:
+  void SetUp() override {
+    params_ = this->GetParam();
+
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    if (!use_high_bit_depth()) {
+      pre_ = reinterpret_cast<uint8_t *>(
+          aom_memalign(32, block_size() + width() + height() + 1));
+    } else {
+      pre_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(aom_memalign(
+          32, (block_size() + width() + height() + 1) * sizeof(uint16_t))));
+    }
+    wsrc_ = reinterpret_cast<int32_t *>(
+        aom_memalign(32, block_size() * sizeof(uint32_t)));
+    mask_ = reinterpret_cast<int32_t *>(
+        aom_memalign(32, block_size() * sizeof(uint32_t)));
+    ASSERT_NE(pre_, nullptr);
+    ASSERT_NE(wsrc_, nullptr);
+    ASSERT_NE(mask_, nullptr);
+  }
+
+  void TearDown() override {
+    if (!use_high_bit_depth()) {
+      aom_free(pre_);
+    } else {
+      aom_free(CONVERT_TO_SHORTPTR(pre_));
+    }
+    aom_free(wsrc_);
+    aom_free(mask_);
+  }
+
+ protected:
+  void RefTest();
+  void ExtremeRefTest();
+  void SpeedTest();
+
+  ACMRandom rnd_;
+  uint8_t *pre_;
+  int32_t *wsrc_;
+  int32_t *mask_;
+  TestParams<FunctionType> params_;
+
+  // some relay helpers
+  bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
+  int byte_shift() const { return params_.bit_depth - 8; }
+  int block_size() const { return params_.block_size; }
+  int width() const { return params_.width; }
+  int height() const { return params_.height; }
+  uint32_t bd_mask() const { return params_.mask; }
+};
+
+template <>
+void ObmcVarianceTest<ObmcSubpelVarFunc>::RefTest() {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
+      if (!use_high_bit_depth())
+        for (int j = 0; j < block_size() + width() + height() + 1; j++)
+          pre_[j] = rnd_.Rand8();
+      else
+        for (int j = 0; j < block_size() + width() + height() + 1; j++)
+          CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
+      for (int j = 0; j < block_size(); j++) {
+        wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
+        mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
+      }
+
+      uint32_t sse1, sse2;
+      uint32_t var1, var2;
+      API_REGISTER_STATE_CHECK(
+          var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
+      var2 = obmc_subpel_variance_ref(
+          pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
+          &sse2, use_high_bit_depth(), params_.bit_depth);
+      EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
+      EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
+    }
+  }
+}
+
+template <>
+void ObmcVarianceTest<ObmcSubpelVarFunc>::ExtremeRefTest() {
+  // Pre: Set the first half of values to the maximum, the second half to 0.
+  // Mask: same as above
+  // WSrc: Set the first half of values to 0, the second half to the maximum.
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
+      const int half = block_size() / 2;
+      if (!use_high_bit_depth()) {
+        memset(pre_, 255, half);
+        memset(pre_ + half, 0, half + width() + height() + 1);
+      } else {
+        aom_memset16(CONVERT_TO_SHORTPTR(pre_), bd_mask(), half);
+        aom_memset16(CONVERT_TO_SHORTPTR(pre_) + half, 0,
+                     half + width() + height() + 1);
+      }
+      for (int j = 0; j < half; j++) {
+        wsrc_[j] = bd_mask() * kMaskMax * kMaskMax;
+        mask_[j] = 0;
+      }
+      for (int j = half; j < block_size(); j++) {
+        wsrc_[j] = 0;
+        mask_[j] = kMaskMax * kMaskMax;
+      }
+
+      uint32_t sse1, sse2;
+      uint32_t var1, var2;
+      API_REGISTER_STATE_CHECK(
+          var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
+      var2 = obmc_subpel_variance_ref(
+          pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
+          &sse2, use_high_bit_depth(), params_.bit_depth);
+      EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
+      EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
+    }
+  }
+}
+
+template <>
+void ObmcVarianceTest<ObmcSubpelVarFunc>::SpeedTest() {
+  if (!use_high_bit_depth())
+    for (int j = 0; j < block_size() + width() + height() + 1; j++)
+      pre_[j] = rnd_.Rand8();
+  else
+    for (int j = 0; j < block_size() + width() + height() + 1; j++)
+      CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
+  for (int j = 0; j < block_size(); j++) {
+    wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
+    mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
+  }
+  unsigned int sse1;
+  const int stride = width() + 1;
+  int run_time = 1000000000 / block_size();
+  aom_usec_timer timer;
+
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < run_time; ++i) {
+    int x = rnd_(8);
+    int y = rnd_(8);
+    API_REGISTER_STATE_CHECK(
+        params_.func(pre_, stride, x, y, wsrc_, mask_, &sse1));
+  }
+  aom_usec_timer_mark(&timer);
+
+  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+  printf("obmc_sub_pixel_variance_%dx%d_%d: %d us\n", width(), height(),
+         params_.bit_depth, elapsed_time);
+}
+
+#endif  // !CONFIG_REALTIME_ONLY
+
+typedef MseWxHTestClass<MseWxH16bitFunc> MseWxHTest;
+typedef Mse16xHTestClass<Mse16xH16bitFunc> Mse16xHTest;
+typedef MainTestClass<VarianceMxNFunc> AvxMseTest;
+typedef MainTestClass<VarianceMxNFunc> AvxVarianceTest;
+typedef MainTestClass<GetSseSum8x8QuadFunc> GetSseSum8x8QuadTest;
+typedef MainTestClass<GetSseSum16x16DualFunc> GetSseSum16x16DualTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxSubpelVarianceTest;
+typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxSubpelAvgVarianceTest;
+typedef SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>
+    AvxDistWtdSubpelAvgVarianceTest;
+#if !CONFIG_REALTIME_ONLY
+typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxObmcSubpelVarianceTest;
+#endif
+typedef TestParams<MseWxH16bitFunc> MseWxHParams;
+typedef TestParams<Mse16xH16bitFunc> Mse16xHParams;
+
+TEST_P(MseWxHTest, RefMse) { RefMatchTestMse(); }
+TEST_P(MseWxHTest, DISABLED_SpeedMse) { SpeedTest(); }
+TEST_P(Mse16xHTest, RefMse) { RefMatchTestMse(); }
+TEST_P(Mse16xHTest, RefMseExtreme) { RefMatchExtremeTestMse(); }
+TEST_P(Mse16xHTest, DISABLED_SpeedMse) { SpeedTest(); }
+TEST_P(AvxMseTest, RefMse) { RefTestMse(); }
+TEST_P(AvxMseTest, MaxMse) { MaxTestMse(); }
+TEST_P(AvxVarianceTest, Zero) { ZeroTest(); }
+TEST_P(AvxVarianceTest, Ref) { RefTest(); }
+TEST_P(AvxVarianceTest, RefStride) { RefStrideTest(); }
+TEST_P(AvxVarianceTest, OneQuarter) { OneQuarterTest(); }
+TEST_P(AvxVarianceTest, DISABLED_Speed) { SpeedTest(); }
+TEST_P(GetSseSum8x8QuadTest, RefMseSum) { RefTestSseSum(); }
+TEST_P(GetSseSum8x8QuadTest, MinSseSum) { MinTestSseSum(); }
+TEST_P(GetSseSum8x8QuadTest, MaxMseSum) { MaxTestSseSum(); }
+TEST_P(GetSseSum8x8QuadTest, DISABLED_Speed) { SseSum_SpeedTest(); }
+TEST_P(GetSseSum16x16DualTest, RefMseSum) { RefTestSseSumDual(); }
+TEST_P(GetSseSum16x16DualTest, MinSseSum) { MinTestSseSumDual(); }
+TEST_P(GetSseSum16x16DualTest, MaxMseSum) { MaxTestSseSumDual(); }
+TEST_P(GetSseSum16x16DualTest, DISABLED_Speed) { SseSum_SpeedTestDual(); }
+TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
+TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
+TEST_P(AvxSubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(AvxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
+TEST_P(AvxSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
+TEST_P(AvxSubpelAvgVarianceTest, Ref) { RefTest(); }
+TEST_P(AvxDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); }
+#if !CONFIG_REALTIME_ONLY
+TEST_P(AvxObmcSubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(AvxObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
+TEST_P(AvxObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
+#endif
+
+INSTANTIATE_TEST_SUITE_P(
+    C, MseWxHTest,
+    ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_c, 8),
+                      MseWxHParams(3, 2, &aom_mse_wxh_16bit_c, 8),
+                      MseWxHParams(2, 3, &aom_mse_wxh_16bit_c, 8),
+                      MseWxHParams(2, 2, &aom_mse_wxh_16bit_c, 8)));
+
+INSTANTIATE_TEST_SUITE_P(
+    C, Mse16xHTest,
+    ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_c, 8),
+                      Mse16xHParams(3, 2, &aom_mse_16xh_16bit_c, 8),
+                      Mse16xHParams(2, 3, &aom_mse_16xh_16bit_c, 8),
+                      Mse16xHParams(2, 2, &aom_mse_16xh_16bit_c, 8)));
+
+INSTANTIATE_TEST_SUITE_P(C, SumOfSquaresTest,
+                         ::testing::Values(aom_get_mb_ss_c));
+
+typedef TestParams<VarianceMxNFunc> MseParams;
+INSTANTIATE_TEST_SUITE_P(C, AvxMseTest,
+                         ::testing::Values(MseParams(4, 4, &aom_mse16x16_c),
+                                           MseParams(4, 3, &aom_mse16x8_c),
+                                           MseParams(3, 4, &aom_mse8x16_c),
+                                           MseParams(3, 3, &aom_mse8x8_c)));
+
+typedef TestParams<VarianceMxNFunc> VarianceParams;
+const VarianceParams kArrayVariance_c[] = {
+  VarianceParams(7, 7, &aom_variance128x128_c),
+  VarianceParams(7, 6, &aom_variance128x64_c),
+  VarianceParams(6, 7, &aom_variance64x128_c),
+  VarianceParams(6, 6, &aom_variance64x64_c),
+  VarianceParams(6, 5, &aom_variance64x32_c),
+  VarianceParams(5, 6, &aom_variance32x64_c),
+  VarianceParams(5, 5, &aom_variance32x32_c),
+  VarianceParams(5, 4, &aom_variance32x16_c),
+  VarianceParams(4, 5, &aom_variance16x32_c),
+  VarianceParams(4, 4, &aom_variance16x16_c),
+  VarianceParams(4, 3, &aom_variance16x8_c),
+  VarianceParams(3, 4, &aom_variance8x16_c),
+  VarianceParams(3, 3, &aom_variance8x8_c),
+  VarianceParams(3, 2, &aom_variance8x4_c),
+  VarianceParams(2, 3, &aom_variance4x8_c),
+  VarianceParams(2, 2, &aom_variance4x4_c),
+#if !CONFIG_REALTIME_ONLY
+  VarianceParams(6, 4, &aom_variance64x16_c),
+  VarianceParams(4, 6, &aom_variance16x64_c),
+  VarianceParams(5, 3, &aom_variance32x8_c),
+  VarianceParams(3, 5, &aom_variance8x32_c),
+  VarianceParams(4, 2, &aom_variance16x4_c),
+  VarianceParams(2, 4, &aom_variance4x16_c),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(C, AvxVarianceTest,
+                         ::testing::ValuesIn(kArrayVariance_c));
+
+typedef TestParams<GetSseSum8x8QuadFunc> GetSseSumParams;
+const GetSseSumParams kArrayGetSseSum8x8Quad_c[] = {
+  GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_c, 0),
+  GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_c, 0),
+  GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_c, 0),
+  GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_c, 0)
+};
+INSTANTIATE_TEST_SUITE_P(C, GetSseSum8x8QuadTest,
+                         ::testing::ValuesIn(kArrayGetSseSum8x8Quad_c));
+
+typedef TestParams<GetSseSum16x16DualFunc> GetSseSumParamsDual;
+const GetSseSumParamsDual kArrayGetSseSum16x16Dual_c[] = {
+  GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_c, 0),
+  GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_c, 0),
+  GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_c, 0),
+  GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_c, 0)
+};
+
+INSTANTIATE_TEST_SUITE_P(C, GetSseSum16x16DualTest,
+                         ::testing::ValuesIn(kArrayGetSseSum16x16Dual_c));
+
+typedef TestParams<SubpixVarMxNFunc> SubpelVarianceParams;
+const SubpelVarianceParams kArraySubpelVariance_c[] = {
+  SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_c, 0),
+  SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_c, 0),
+  SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_c, 0),
+  SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_c, 0),
+  SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_c, 0),
+  SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_c, 0),
+  SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_c, 0),
+  SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_c, 0),
+  SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_c, 0),
+  SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_c, 0),
+  SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_c, 0),
+  SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_c, 0),
+  SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_c, 0),
+  SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_c, 0),
+  SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_c, 0),
+  SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_c, 0),
+#if !CONFIG_REALTIME_ONLY
+  SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_c, 0),
+  SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_c, 0),
+  SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_c, 0),
+  SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_c, 0),
+  SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_c, 0),
+  SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_c, 0),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(C, AvxSubpelVarianceTest,
+                         ::testing::ValuesIn(kArraySubpelVariance_c));
+
+typedef TestParams<SubpixAvgVarMxNFunc> SubpelAvgVarianceParams;
+const SubpelAvgVarianceParams kArraySubpelAvgVariance_c[] = {
+  SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_c, 0),
+  SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_c, 0),
+  SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_c, 0),
+  SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_c, 0),
+  SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_c, 0),
+  SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_c, 0),
+  SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_c, 0),
+  SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_c, 0),
+  SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_c, 0),
+  SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_c, 0),
+  SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_c, 0),
+  SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_c, 0),
+  SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_c, 0),
+  SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_c, 0),
+  SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0),
+  SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0),
+#if !CONFIG_REALTIME_ONLY
+  SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_c, 0),
+  SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_c, 0),
+  SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_c, 0),
+  SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_c, 0),
+  SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_c, 0),
+  SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_c, 0),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(C, AvxSubpelAvgVarianceTest,
+                         ::testing::ValuesIn(kArraySubpelAvgVariance_c));
+
+typedef TestParams<DistWtdSubpixAvgVarMxNFunc> DistWtdSubpelAvgVarianceParams;
+const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_c[] = {
+  DistWtdSubpelAvgVarianceParams(
+      6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_c, 0),
+  DistWtdSubpelAvgVarianceParams(
+      6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_c, 0),
+  DistWtdSubpelAvgVarianceParams(
+      5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_c, 0),
+  DistWtdSubpelAvgVarianceParams(
+      5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_c, 0),
+  DistWtdSubpelAvgVarianceParams(
+      5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_c, 0),
+  DistWtdSubpelAvgVarianceParams(
+      4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_c, 0),
+  DistWtdSubpelAvgVarianceParams(
+      4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_c, 0),
+  DistWtdSubpelAvgVarianceParams(4, 3,
+                                 &aom_dist_wtd_sub_pixel_avg_variance16x8_c, 0),
+  DistWtdSubpelAvgVarianceParams(3, 4,
+                                 &aom_dist_wtd_sub_pixel_avg_variance8x16_c, 0),
+  DistWtdSubpelAvgVarianceParams(3, 3,
+                                 &aom_dist_wtd_sub_pixel_avg_variance8x8_c, 0),
+  DistWtdSubpelAvgVarianceParams(3, 2,
+                                 &aom_dist_wtd_sub_pixel_avg_variance8x4_c, 0),
+  DistWtdSubpelAvgVarianceParams(2, 3,
+                                 &aom_dist_wtd_sub_pixel_avg_variance4x8_c, 0),
+  DistWtdSubpelAvgVarianceParams(2, 2,
+                                 &aom_dist_wtd_sub_pixel_avg_variance4x4_c, 0),
+#if !CONFIG_REALTIME_ONLY
+
+  DistWtdSubpelAvgVarianceParams(
+      6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_c, 0),
+  DistWtdSubpelAvgVarianceParams(
+      4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_c, 0),
+  DistWtdSubpelAvgVarianceParams(5, 3,
+                                 &aom_dist_wtd_sub_pixel_avg_variance32x8_c, 0),
+  DistWtdSubpelAvgVarianceParams(3, 5,
+                                 &aom_dist_wtd_sub_pixel_avg_variance8x32_c, 0),
+  DistWtdSubpelAvgVarianceParams(4, 2,
+                                 &aom_dist_wtd_sub_pixel_avg_variance16x4_c, 0),
+  DistWtdSubpelAvgVarianceParams(2, 4,
+                                 &aom_dist_wtd_sub_pixel_avg_variance4x16_c, 0),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(C, AvxDistWtdSubpelAvgVarianceTest,
+                         ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_c));
+
+#if !CONFIG_REALTIME_ONLY
+INSTANTIATE_TEST_SUITE_P(
+    C, AvxObmcSubpelVarianceTest,
+    ::testing::Values(
+        ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_c,
+                                 0),
+        ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_c, 0),
+        ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_c, 0),
+        ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_c, 0),
+        ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_c, 0),
+        ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_c, 0),
+        ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_c, 0),
+        ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_c, 0),
+        ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_c, 0),
+        ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_c, 0),
+        ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_c, 0),
+        ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_c, 0),
+        ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_c, 0),
+        ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_c, 0),
+        ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_c, 0),
+        ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_c, 0),
+
+        ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_c, 0),
+        ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_c, 0),
+        ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_c, 0),
+        ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_c, 0),
+        ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_c, 0),
+        ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_c, 0)));
+#endif
+
+#if CONFIG_AV1_HIGHBITDEPTH
+typedef uint64_t (*MseHBDWxH16bitFunc)(uint16_t *dst, int dstride,
+                                       uint16_t *src, int sstride, int w,
+                                       int h);
+
+template <typename FunctionType>
+class MseHBDWxHTestClass
+    : public ::testing::TestWithParam<TestParams<FunctionType> > {
+ public:
+  void SetUp() override {
+    params_ = this->GetParam();
+
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, block_size() * sizeof(src_)));
+    dst_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, block_size() * sizeof(dst_)));
+    ASSERT_NE(src_, nullptr);
+    ASSERT_NE(dst_, nullptr);
+  }
+
+  void TearDown() override {
+    aom_free(src_);
+    aom_free(dst_);
+    src_ = nullptr;
+    dst_ = nullptr;
+  }
+
+ protected:
+  void RefMatchTestMse();
+  void SpeedTest();
+
+ protected:
+  ACMRandom rnd_;
+  uint16_t *dst_;
+  uint16_t *src_;
+  TestParams<FunctionType> params_;
+
+  // some relay helpers
+  int block_size() const { return params_.block_size; }
+  int width() const { return params_.width; }
+  int d_stride() const { return params_.width; }  // stride is same as width
+  int s_stride() const { return params_.width; }  // stride is same as width
+  int height() const { return params_.height; }
+  int mask() const { return params_.mask; }
+};
+
+template <typename MseHBDWxHFunctionType>
+void MseHBDWxHTestClass<MseHBDWxHFunctionType>::SpeedTest() {
+  aom_usec_timer ref_timer, test_timer;
+  double elapsed_time_c = 0;
+  double elapsed_time_simd = 0;
+  int run_time = 10000000;
+  int w = width();
+  int h = height();
+  int dstride = d_stride();
+  int sstride = s_stride();
+  for (int k = 0; k < block_size(); ++k) {
+    dst_[k] = rnd_.Rand16() & mask();
+    src_[k] = rnd_.Rand16() & mask();
+  }
+  aom_usec_timer_start(&ref_timer);
+  for (int i = 0; i < run_time; i++) {
+    aom_mse_wxh_16bit_highbd_c(dst_, dstride, src_, sstride, w, h);
+  }
+  aom_usec_timer_mark(&ref_timer);
+  elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
+
+  aom_usec_timer_start(&test_timer);
+  for (int i = 0; i < run_time; i++) {
+    params_.func(dst_, dstride, src_, sstride, w, h);
+  }
+  aom_usec_timer_mark(&test_timer);
+  elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
+
+  printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(),
+         elapsed_time_c, elapsed_time_simd,
+         (elapsed_time_c / elapsed_time_simd));
+}
+
+template <typename MseHBDWxHFunctionType>
+void MseHBDWxHTestClass<MseHBDWxHFunctionType>::RefMatchTestMse() {
+  uint64_t mse_ref = 0;
+  uint64_t mse_mod = 0;
+  int w = width();
+  int h = height();
+  int dstride = d_stride();
+  int sstride = s_stride();
+  for (int i = 0; i < 10; i++) {
+    for (int k = 0; k < block_size(); ++k) {
+      dst_[k] = rnd_.Rand16() & mask();
+      src_[k] = rnd_.Rand16() & mask();
+    }
+    API_REGISTER_STATE_CHECK(mse_ref = aom_mse_wxh_16bit_highbd_c(
+                                 dst_, dstride, src_, sstride, w, h));
+    API_REGISTER_STATE_CHECK(
+        mse_mod = params_.func(dst_, dstride, src_, sstride, w, h));
+    EXPECT_EQ(mse_ref, mse_mod)
+        << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
+  }
+}
+
+typedef TestParams<MseHBDWxH16bitFunc> MseHBDWxHParams;
+typedef MseHBDWxHTestClass<MseHBDWxH16bitFunc> MseHBDWxHTest;
+typedef MainTestClass<VarianceMxNFunc> AvxHBDMseTest;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDMseTest);
+typedef MainTestClass<VarianceMxNFunc> AvxHBDVarianceTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxHBDSubpelVarianceTest;
+typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxHBDSubpelAvgVarianceTest;
+typedef SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>
+    AvxHBDDistWtdSubpelAvgVarianceTest;
+#if !CONFIG_REALTIME_ONLY
+typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxHBDObmcSubpelVarianceTest;
+#endif
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDObmcSubpelVarianceTest);
+
+TEST_P(MseHBDWxHTest, RefMse) { RefMatchTestMse(); }
+TEST_P(MseHBDWxHTest, DISABLED_SpeedMse) { SpeedTest(); }
+TEST_P(AvxHBDMseTest, RefMse) { RefTestMse(); }
+TEST_P(AvxHBDMseTest, MaxMse) { MaxTestMse(); }
+TEST_P(AvxHBDMseTest, DISABLED_SpeedMse) { SpeedTest(); }
+TEST_P(AvxHBDVarianceTest, Zero) { ZeroTest(); }
+TEST_P(AvxHBDVarianceTest, Ref) { RefTest(); }
+TEST_P(AvxHBDVarianceTest, RefStride) { RefStrideTest(); }
+TEST_P(AvxHBDVarianceTest, OneQuarter) { OneQuarterTest(); }
+TEST_P(AvxHBDVarianceTest, DISABLED_Speed) { SpeedTest(); }
+TEST_P(AvxHBDSubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(AvxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
+TEST_P(AvxHBDSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
+TEST_P(AvxHBDSubpelAvgVarianceTest, Ref) { RefTest(); }
+TEST_P(AvxHBDDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); }
+#if !CONFIG_REALTIME_ONLY
+TEST_P(AvxHBDObmcSubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(AvxHBDObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
+TEST_P(AvxHBDObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
+#endif
+
+INSTANTIATE_TEST_SUITE_P(
+    C, MseHBDWxHTest,
+    ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_c, 10),
+                      MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_c, 10),
+                      MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_c, 10),
+                      MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_c, 10)));
+
+INSTANTIATE_TEST_SUITE_P(
+    C, AvxHBDMseTest,
+    ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_c, 12),
+                      MseParams(4, 3, &aom_highbd_12_mse16x8_c, 12),
+                      MseParams(3, 4, &aom_highbd_12_mse8x16_c, 12),
+                      MseParams(3, 3, &aom_highbd_12_mse8x8_c, 12),
+                      MseParams(4, 4, &aom_highbd_10_mse16x16_c, 10),
+                      MseParams(4, 3, &aom_highbd_10_mse16x8_c, 10),
+                      MseParams(3, 4, &aom_highbd_10_mse8x16_c, 10),
+                      MseParams(3, 3, &aom_highbd_10_mse8x8_c, 10),
+                      MseParams(4, 4, &aom_highbd_8_mse16x16_c, 8),
+                      MseParams(4, 3, &aom_highbd_8_mse16x8_c, 8),
+                      MseParams(3, 4, &aom_highbd_8_mse8x16_c, 8),
+                      MseParams(3, 3, &aom_highbd_8_mse8x8_c, 8)));
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, MseHBDWxHTest,
+    ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_neon, 10),
+                      MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_neon, 10),
+                      MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_neon, 10),
+                      MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_neon,
+                                      10)));
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AvxHBDMseTest,
+    ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_neon, 12),
+                      MseParams(4, 3, &aom_highbd_12_mse16x8_neon, 12),
+                      MseParams(3, 4, &aom_highbd_12_mse8x16_neon, 12),
+                      MseParams(3, 3, &aom_highbd_12_mse8x8_neon, 12),
+                      MseParams(4, 4, &aom_highbd_10_mse16x16_neon, 10),
+                      MseParams(4, 3, &aom_highbd_10_mse16x8_neon, 10),
+                      MseParams(3, 4, &aom_highbd_10_mse8x16_neon, 10),
+                      MseParams(3, 3, &aom_highbd_10_mse8x8_neon, 10),
+                      MseParams(4, 4, &aom_highbd_8_mse16x16_neon, 8),
+                      MseParams(4, 3, &aom_highbd_8_mse16x8_neon, 8),
+                      MseParams(3, 4, &aom_highbd_8_mse8x16_neon, 8),
+                      MseParams(3, 3, &aom_highbd_8_mse8x8_neon, 8)));
+#endif  // HAVE_NEON
+
+#if HAVE_NEON_DOTPROD
+INSTANTIATE_TEST_SUITE_P(
+    NEON_DOTPROD, AvxHBDMseTest,
+    ::testing::Values(MseParams(4, 4, &aom_highbd_8_mse16x16_neon_dotprod, 8),
+                      MseParams(4, 3, &aom_highbd_8_mse16x8_neon_dotprod, 8),
+                      MseParams(3, 4, &aom_highbd_8_mse8x16_neon_dotprod, 8),
+                      MseParams(3, 3, &aom_highbd_8_mse8x8_neon_dotprod, 8)));
+#endif  // HAVE_NEON_DOTPROD
+
+#if HAVE_SVE
+INSTANTIATE_TEST_SUITE_P(
+    SVE, MseHBDWxHTest,
+    ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sve, 10),
+                      MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sve, 10),
+                      MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sve, 10),
+                      MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sve,
+                                      10)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SVE, AvxHBDMseTest,
+    ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sve, 12),
+                      MseParams(4, 3, &aom_highbd_12_mse16x8_sve, 12),
+                      MseParams(3, 4, &aom_highbd_12_mse8x16_sve, 12),
+                      MseParams(3, 3, &aom_highbd_12_mse8x8_sve, 12),
+                      MseParams(4, 4, &aom_highbd_10_mse16x16_sve, 10),
+                      MseParams(4, 3, &aom_highbd_10_mse16x8_sve, 10),
+                      MseParams(3, 4, &aom_highbd_10_mse8x16_sve, 10),
+                      MseParams(3, 3, &aom_highbd_10_mse8x8_sve, 10),
+                      MseParams(4, 4, &aom_highbd_8_mse16x16_sve, 8),
+                      MseParams(4, 3, &aom_highbd_8_mse16x8_sve, 8),
+                      MseParams(3, 4, &aom_highbd_8_mse8x16_sve, 8),
+                      MseParams(3, 3, &aom_highbd_8_mse8x8_sve, 8)));
+#endif  // HAVE_SVE
+
+const VarianceParams kArrayHBDVariance_c[] = {
+  VarianceParams(7, 7, &aom_highbd_12_variance128x128_c, 12),
+  VarianceParams(7, 6, &aom_highbd_12_variance128x64_c, 12),
+  VarianceParams(6, 7, &aom_highbd_12_variance64x128_c, 12),
+  VarianceParams(6, 6, &aom_highbd_12_variance64x64_c, 12),
+  VarianceParams(6, 5, &aom_highbd_12_variance64x32_c, 12),
+  VarianceParams(5, 6, &aom_highbd_12_variance32x64_c, 12),
+  VarianceParams(5, 5, &aom_highbd_12_variance32x32_c, 12),
+  VarianceParams(5, 4, &aom_highbd_12_variance32x16_c, 12),
+  VarianceParams(4, 5, &aom_highbd_12_variance16x32_c, 12),
+  VarianceParams(4, 4, &aom_highbd_12_variance16x16_c, 12),
+  VarianceParams(4, 3, &aom_highbd_12_variance16x8_c, 12),
+  VarianceParams(3, 4, &aom_highbd_12_variance8x16_c, 12),
+  VarianceParams(3, 3, &aom_highbd_12_variance8x8_c, 12),
+  VarianceParams(3, 2, &aom_highbd_12_variance8x4_c, 12),
+  VarianceParams(2, 3, &aom_highbd_12_variance4x8_c, 12),
+  VarianceParams(2, 2, &aom_highbd_12_variance4x4_c, 12),
+  VarianceParams(7, 7, &aom_highbd_10_variance128x128_c, 10),
+  VarianceParams(7, 6, &aom_highbd_10_variance128x64_c, 10),
+  VarianceParams(6, 7, &aom_highbd_10_variance64x128_c, 10),
+  VarianceParams(6, 6, &aom_highbd_10_variance64x64_c, 10),
+  VarianceParams(6, 5, &aom_highbd_10_variance64x32_c, 10),
+  VarianceParams(5, 6, &aom_highbd_10_variance32x64_c, 10),
+  VarianceParams(5, 5, &aom_highbd_10_variance32x32_c, 10),
+  VarianceParams(5, 4, &aom_highbd_10_variance32x16_c, 10),
+  VarianceParams(4, 5, &aom_highbd_10_variance16x32_c, 10),
+  VarianceParams(4, 4, &aom_highbd_10_variance16x16_c, 10),
+  VarianceParams(4, 3, &aom_highbd_10_variance16x8_c, 10),
+  VarianceParams(3, 4, &aom_highbd_10_variance8x16_c, 10),
+  VarianceParams(3, 3, &aom_highbd_10_variance8x8_c, 10),
+  VarianceParams(3, 2, &aom_highbd_10_variance8x4_c, 10),
+  VarianceParams(2, 3, &aom_highbd_10_variance4x8_c, 10),
+  VarianceParams(2, 2, &aom_highbd_10_variance4x4_c, 10),
+  VarianceParams(7, 7, &aom_highbd_8_variance128x128_c, 8),
+  VarianceParams(7, 6, &aom_highbd_8_variance128x64_c, 8),
+  VarianceParams(6, 7, &aom_highbd_8_variance64x128_c, 8),
+  VarianceParams(6, 6, &aom_highbd_8_variance64x64_c, 8),
+  VarianceParams(6, 5, &aom_highbd_8_variance64x32_c, 8),
+  VarianceParams(5, 6, &aom_highbd_8_variance32x64_c, 8),
+  VarianceParams(5, 5, &aom_highbd_8_variance32x32_c, 8),
+  VarianceParams(5, 4, &aom_highbd_8_variance32x16_c, 8),
+  VarianceParams(4, 5, &aom_highbd_8_variance16x32_c, 8),
+  VarianceParams(4, 4, &aom_highbd_8_variance16x16_c, 8),
+  VarianceParams(4, 3, &aom_highbd_8_variance16x8_c, 8),
+  VarianceParams(3, 4, &aom_highbd_8_variance8x16_c, 8),
+  VarianceParams(3, 3, &aom_highbd_8_variance8x8_c, 8),
+  VarianceParams(3, 2, &aom_highbd_8_variance8x4_c, 8),
+  VarianceParams(2, 3, &aom_highbd_8_variance4x8_c, 8),
+  VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8),
+#if !CONFIG_REALTIME_ONLY
+  VarianceParams(6, 4, &aom_highbd_12_variance64x16_c, 12),
+  VarianceParams(4, 6, &aom_highbd_12_variance16x64_c, 12),
+  VarianceParams(5, 3, &aom_highbd_12_variance32x8_c, 12),
+  VarianceParams(3, 5, &aom_highbd_12_variance8x32_c, 12),
+  VarianceParams(4, 2, &aom_highbd_12_variance16x4_c, 12),
+  VarianceParams(2, 4, &aom_highbd_12_variance4x16_c, 12),
+  VarianceParams(6, 4, &aom_highbd_10_variance64x16_c, 10),
+  VarianceParams(4, 6, &aom_highbd_10_variance16x64_c, 10),
+  VarianceParams(5, 3, &aom_highbd_10_variance32x8_c, 10),
+  VarianceParams(3, 5, &aom_highbd_10_variance8x32_c, 10),
+  VarianceParams(4, 2, &aom_highbd_10_variance16x4_c, 10),
+  VarianceParams(2, 4, &aom_highbd_10_variance4x16_c, 10),
+  VarianceParams(6, 4, &aom_highbd_8_variance64x16_c, 8),
+  VarianceParams(4, 6, &aom_highbd_8_variance16x64_c, 8),
+  VarianceParams(5, 3, &aom_highbd_8_variance32x8_c, 8),
+  VarianceParams(3, 5, &aom_highbd_8_variance8x32_c, 8),
+  VarianceParams(4, 2, &aom_highbd_8_variance16x4_c, 8),
+  VarianceParams(2, 4, &aom_highbd_8_variance4x16_c, 8),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(C, AvxHBDVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDVariance_c));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AvxHBDVarianceTest,
+    ::testing::Values(
+        VarianceParams(2, 2, &aom_highbd_8_variance4x4_sse4_1, 8),
+        VarianceParams(2, 2, &aom_highbd_10_variance4x4_sse4_1, 10),
+        VarianceParams(2, 2, &aom_highbd_12_variance4x4_sse4_1, 12)));
+#endif  // HAVE_SSE4_1
+
+const SubpelVarianceParams kArrayHBDSubpelVariance_c[] = {
+  SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_c, 8),
+  SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_c, 8),
+  SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_c, 8),
+  SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_c, 8),
+  SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_c, 8),
+  SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_c, 8),
+  SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_c, 8),
+  SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_c, 8),
+  SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_c, 8),
+  SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_c, 8),
+  SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_c, 8),
+  SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_c, 8),
+  SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_c, 8),
+  SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_c, 8),
+  SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_c, 8),
+  SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_c, 8),
+  SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_c, 10),
+  SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_c, 10),
+  SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_c, 10),
+  SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_c, 10),
+  SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_c, 10),
+  SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_c, 10),
+  SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_c, 10),
+  SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_c, 10),
+  SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_c, 10),
+  SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_c, 10),
+  SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_c, 10),
+  SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_c, 10),
+  SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_c, 10),
+  SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_c, 10),
+  SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_c, 10),
+  SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_c, 10),
+  SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_c, 12),
+  SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_c, 12),
+  SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_c, 12),
+  SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_c, 12),
+  SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_c, 12),
+  SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_c, 12),
+  SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_c, 12),
+  SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_c, 12),
+  SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_c, 12),
+  SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_c, 12),
+  SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_c, 12),
+  SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_c, 12),
+  SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_c, 12),
+  SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_c, 12),
+  SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_c, 12),
+  SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_c, 12),
+#if !CONFIG_REALTIME_ONLY
+  SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_c, 8),
+  SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_c, 8),
+  SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_c, 8),
+  SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_c, 8),
+  SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_c, 8),
+  SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_c, 8),
+  SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_c, 10),
+  SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_c, 10),
+  SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_c, 10),
+  SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_c, 10),
+  SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_c, 10),
+  SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_c, 10),
+  SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_c, 12),
+  SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_c, 12),
+  SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_c, 12),
+  SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_c, 12),
+  SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_c, 12),
+  SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_c, 12),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDSubpelVariance_c));
+
+const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = {
+  SubpelAvgVarianceParams(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_c,
+                          8),
+  SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_c,
+                          8),
+  SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_c,
+                          8),
+  SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_c, 8),
+  SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_c, 8),
+  SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_c, 8),
+  SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_c, 8),
+  SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_c, 8),
+  SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_c, 8),
+  SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_c, 8),
+  SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_c, 8),
+  SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_c, 8),
+  SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_c, 8),
+  SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_c, 8),
+  SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_c, 8),
+  SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_c, 8),
+  SubpelAvgVarianceParams(7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_c,
+                          10),
+  SubpelAvgVarianceParams(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_c,
+                          10),
+  SubpelAvgVarianceParams(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_c,
+                          10),
+  SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_c,
+                          10),
+  SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_c,
+                          10),
+  SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_c,
+                          10),
+  SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_c,
+                          10),
+  SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_c,
+                          10),
+  SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_c,
+                          10),
+  SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_c,
+                          10),
+  SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_c,
+                          10),
+  SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_c,
+                          10),
+  SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_c, 10),
+  SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_c, 10),
+  SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_c, 10),
+  SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_c, 10),
+  SubpelAvgVarianceParams(7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_c,
+                          12),
+  SubpelAvgVarianceParams(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_c,
+                          12),
+  SubpelAvgVarianceParams(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_c,
+                          12),
+  SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_c,
+                          12),
+  SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_c,
+                          12),
+  SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_c,
+                          12),
+  SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_c,
+                          12),
+  SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_c,
+                          12),
+  SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_c,
+                          12),
+  SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_c,
+                          12),
+  SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_c,
+                          12),
+  SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_c,
+                          12),
+  SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_c, 12),
+  SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_c, 12),
+  SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_c, 12),
+  SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12),
+
+#if !CONFIG_REALTIME_ONLY
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_c, 8),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_c, 8),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_c, 8),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_c, 8),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_c, 8),
+  SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_c, 8),
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_c,
+                          10),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_c,
+                          10),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_c,
+                          10),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_c,
+                          10),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_c,
+                          10),
+  SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_c,
+                          10),
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_c,
+                          12),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_c,
+                          12),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_c,
+                          12),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_c,
+                          12),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_c,
+                          12),
+  SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_c,
+                          12),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelAvgVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c));
+
+const DistWtdSubpelAvgVarianceParams kArrayHBDDistWtdSubpelAvgVariance_c[] = {
+  DistWtdSubpelAvgVarianceParams(
+      7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_c, 12),
+
+#if !CONFIG_REALTIME_ONLY
+  DistWtdSubpelAvgVarianceParams(
+      6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_c, 8),
+  DistWtdSubpelAvgVarianceParams(
+      6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_c, 10),
+  DistWtdSubpelAvgVarianceParams(
+      6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_c, 12),
+  DistWtdSubpelAvgVarianceParams(
+      2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_c, 12),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(
+    C, AvxHBDDistWtdSubpelAvgVarianceTest,
+    ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_c));
+
+#if !CONFIG_REALTIME_ONLY
+const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_c[] = {
+  ObmcSubpelVarianceParams(7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_c,
+                           8),
+  ObmcSubpelVarianceParams(7, 6, &aom_highbd_8_obmc_sub_pixel_variance128x64_c,
+                           8),
+  ObmcSubpelVarianceParams(6, 7, &aom_highbd_8_obmc_sub_pixel_variance64x128_c,
+                           8),
+  ObmcSubpelVarianceParams(6, 6, &aom_highbd_8_obmc_sub_pixel_variance64x64_c,
+                           8),
+  ObmcSubpelVarianceParams(6, 5, &aom_highbd_8_obmc_sub_pixel_variance64x32_c,
+                           8),
+  ObmcSubpelVarianceParams(5, 6, &aom_highbd_8_obmc_sub_pixel_variance32x64_c,
+                           8),
+  ObmcSubpelVarianceParams(5, 5, &aom_highbd_8_obmc_sub_pixel_variance32x32_c,
+                           8),
+  ObmcSubpelVarianceParams(5, 4, &aom_highbd_8_obmc_sub_pixel_variance32x16_c,
+                           8),
+  ObmcSubpelVarianceParams(4, 5, &aom_highbd_8_obmc_sub_pixel_variance16x32_c,
+                           8),
+  ObmcSubpelVarianceParams(4, 4, &aom_highbd_8_obmc_sub_pixel_variance16x16_c,
+                           8),
+  ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_c,
+                           8),
+  ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_c,
+                           8),
+  ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_c, 8),
+  ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_c, 8),
+  ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_c, 8),
+  ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_c, 8),
+  ObmcSubpelVarianceParams(7, 7,
+                           &aom_highbd_10_obmc_sub_pixel_variance128x128_c, 10),
+  ObmcSubpelVarianceParams(7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_c,
+                           10),
+  ObmcSubpelVarianceParams(6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_c,
+                           10),
+  ObmcSubpelVarianceParams(6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_c,
+                           10),
+  ObmcSubpelVarianceParams(6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_c,
+                           10),
+  ObmcSubpelVarianceParams(5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_c,
+                           10),
+  ObmcSubpelVarianceParams(5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_c,
+                           10),
+  ObmcSubpelVarianceParams(5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_c,
+                           10),
+  ObmcSubpelVarianceParams(4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_c,
+                           10),
+  ObmcSubpelVarianceParams(4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_c,
+                           10),
+  ObmcSubpelVarianceParams(4, 3, &aom_highbd_10_obmc_sub_pixel_variance16x8_c,
+                           10),
+  ObmcSubpelVarianceParams(3, 4, &aom_highbd_10_obmc_sub_pixel_variance8x16_c,
+                           10),
+  ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_c,
+                           10),
+  ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_c,
+                           10),
+  ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_c,
+                           10),
+  ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_c,
+                           10),
+  ObmcSubpelVarianceParams(7, 7,
+                           &aom_highbd_12_obmc_sub_pixel_variance128x128_c, 12),
+  ObmcSubpelVarianceParams(7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_c,
+                           12),
+  ObmcSubpelVarianceParams(6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_c,
+                           12),
+  ObmcSubpelVarianceParams(6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_c,
+                           12),
+  ObmcSubpelVarianceParams(6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_c,
+                           12),
+  ObmcSubpelVarianceParams(5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_c,
+                           12),
+  ObmcSubpelVarianceParams(5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_c,
+                           12),
+  ObmcSubpelVarianceParams(5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_c,
+                           12),
+  ObmcSubpelVarianceParams(4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_c,
+                           12),
+  ObmcSubpelVarianceParams(4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_c,
+                           12),
+  ObmcSubpelVarianceParams(4, 3, &aom_highbd_12_obmc_sub_pixel_variance16x8_c,
+                           12),
+  ObmcSubpelVarianceParams(3, 4, &aom_highbd_12_obmc_sub_pixel_variance8x16_c,
+                           12),
+  ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_c,
+                           12),
+  ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_c,
+                           12),
+  ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_c,
+                           12),
+  ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_c,
+                           12),
+
+  ObmcSubpelVarianceParams(6, 4, &aom_highbd_8_obmc_sub_pixel_variance64x16_c,
+                           8),
+  ObmcSubpelVarianceParams(4, 6, &aom_highbd_8_obmc_sub_pixel_variance16x64_c,
+                           8),
+  ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_c,
+                           8),
+  ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_c,
+                           8),
+  ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_c,
+                           8),
+  ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_c,
+                           8),
+  ObmcSubpelVarianceParams(6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_c,
+                           10),
+  ObmcSubpelVarianceParams(4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_c,
+                           10),
+  ObmcSubpelVarianceParams(5, 3, &aom_highbd_10_obmc_sub_pixel_variance32x8_c,
+                           10),
+  ObmcSubpelVarianceParams(3, 5, &aom_highbd_10_obmc_sub_pixel_variance8x32_c,
+                           10),
+  ObmcSubpelVarianceParams(4, 2, &aom_highbd_10_obmc_sub_pixel_variance16x4_c,
+                           10),
+  ObmcSubpelVarianceParams(2, 4, &aom_highbd_10_obmc_sub_pixel_variance4x16_c,
+                           10),
+  ObmcSubpelVarianceParams(6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_c,
+                           12),
+  ObmcSubpelVarianceParams(4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_c,
+                           12),
+  ObmcSubpelVarianceParams(5, 3, &aom_highbd_12_obmc_sub_pixel_variance32x8_c,
+                           12),
+  ObmcSubpelVarianceParams(3, 5, &aom_highbd_12_obmc_sub_pixel_variance8x32_c,
+                           12),
+  ObmcSubpelVarianceParams(4, 2, &aom_highbd_12_obmc_sub_pixel_variance16x4_c,
+                           12),
+  ObmcSubpelVarianceParams(2, 4, &aom_highbd_12_obmc_sub_pixel_variance4x16_c,
+                           12),
+};
+INSTANTIATE_TEST_SUITE_P(C, AvxHBDObmcSubpelVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_c));
+#endif  // !CONFIG_REALTIME_ONLY
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, MseWxHTest,
+    ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_sse2, 8),
+                      MseWxHParams(3, 2, &aom_mse_wxh_16bit_sse2, 8),
+                      MseWxHParams(2, 3, &aom_mse_wxh_16bit_sse2, 8),
+                      MseWxHParams(2, 2, &aom_mse_wxh_16bit_sse2, 8)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, Mse16xHTest,
+    ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_sse2, 8),
+                      Mse16xHParams(3, 2, &aom_mse_16xh_16bit_sse2, 8),
+                      Mse16xHParams(2, 3, &aom_mse_16xh_16bit_sse2, 8),
+                      Mse16xHParams(2, 2, &aom_mse_16xh_16bit_sse2, 8)));
+
+INSTANTIATE_TEST_SUITE_P(SSE2, SumOfSquaresTest,
+                         ::testing::Values(aom_get_mb_ss_sse2));
+
+INSTANTIATE_TEST_SUITE_P(SSE2, AvxMseTest,
+                         ::testing::Values(MseParams(4, 4, &aom_mse16x16_sse2),
+                                           MseParams(4, 3, &aom_mse16x8_sse2),
+                                           MseParams(3, 4, &aom_mse8x16_sse2),
+                                           MseParams(3, 3, &aom_mse8x8_sse2)));
+
+const VarianceParams kArrayVariance_sse2[] = {
+  VarianceParams(7, 7, &aom_variance128x128_sse2),
+  VarianceParams(7, 6, &aom_variance128x64_sse2),
+  VarianceParams(6, 7, &aom_variance64x128_sse2),
+  VarianceParams(6, 6, &aom_variance64x64_sse2),
+  VarianceParams(6, 5, &aom_variance64x32_sse2),
+  VarianceParams(5, 6, &aom_variance32x64_sse2),
+  VarianceParams(5, 5, &aom_variance32x32_sse2),
+  VarianceParams(5, 4, &aom_variance32x16_sse2),
+  VarianceParams(4, 5, &aom_variance16x32_sse2),
+  VarianceParams(4, 4, &aom_variance16x16_sse2),
+  VarianceParams(4, 3, &aom_variance16x8_sse2),
+  VarianceParams(3, 4, &aom_variance8x16_sse2),
+  VarianceParams(3, 3, &aom_variance8x8_sse2),
+  VarianceParams(3, 2, &aom_variance8x4_sse2),
+  VarianceParams(2, 3, &aom_variance4x8_sse2),
+  VarianceParams(2, 2, &aom_variance4x4_sse2),
+#if !CONFIG_REALTIME_ONLY
+  VarianceParams(6, 4, &aom_variance64x16_sse2),
+  VarianceParams(5, 3, &aom_variance32x8_sse2),
+  VarianceParams(4, 6, &aom_variance16x64_sse2),
+  VarianceParams(4, 2, &aom_variance16x4_sse2),
+  VarianceParams(3, 5, &aom_variance8x32_sse2),
+  VarianceParams(2, 4, &aom_variance4x16_sse2),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, AvxVarianceTest,
+                         ::testing::ValuesIn(kArrayVariance_sse2));
+
+const GetSseSumParams kArrayGetSseSum8x8Quad_sse2[] = {
+  GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
+  GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
+  GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
+  GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_sse2, 0)
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum8x8QuadTest,
+                         ::testing::ValuesIn(kArrayGetSseSum8x8Quad_sse2));
+
+const GetSseSumParamsDual kArrayGetSseSum16x16Dual_sse2[] = {
+  GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
+  GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
+  GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
+  GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_sse2, 0)
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum16x16DualTest,
+                         ::testing::ValuesIn(kArrayGetSseSum16x16Dual_sse2));
+
+const SubpelVarianceParams kArraySubpelVariance_sse2[] = {
+  SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_sse2, 0),
+  SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_sse2, 0),
+  SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_sse2, 0),
+  SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_sse2, 0),
+  SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_sse2, 0),
+  SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_sse2, 0),
+  SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_sse2, 0),
+  SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_sse2, 0),
+  SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_sse2, 0),
+  SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_sse2, 0),
+  SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_sse2, 0),
+  SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_sse2, 0),
+  SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_sse2, 0),
+  SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_sse2, 0),
+  SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_sse2, 0),
+  SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_sse2, 0),
+#if !CONFIG_REALTIME_ONLY
+  SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_sse2, 0),
+  SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_sse2, 0),
+  SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_sse2, 0),
+  SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_sse2, 0),
+  SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_sse2, 0),
+  SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_sse2, 0),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, AvxSubpelVarianceTest,
+                         ::testing::ValuesIn(kArraySubpelVariance_sse2));
+
+const SubpelAvgVarianceParams kArraySubpelAvgVariance_sse2[] = {
+  SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_sse2, 0),
+  SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_sse2, 0),
+  SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_sse2, 0),
+  SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_sse2, 0),
+  SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_sse2, 0),
+  SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_sse2, 0),
+  SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_sse2, 0),
+  SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_sse2, 0),
+  SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_sse2, 0),
+  SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_sse2, 0),
+  SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_sse2, 0),
+  SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_sse2, 0),
+  SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_sse2, 0),
+  SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_sse2, 0),
+  SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_sse2, 0),
+  SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_sse2, 0),
+#if !CONFIG_REALTIME_ONLY
+  SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_sse2, 0),
+  SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_sse2, 0),
+  SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_sse2, 0),
+  SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_sse2, 0),
+  SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_sse2, 0),
+  SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_sse2, 0),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, AvxSubpelAvgVarianceTest,
+                         ::testing::ValuesIn(kArraySubpelAvgVariance_sse2));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, MseHBDWxHTest,
+    ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sse2, 10),
+                      MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sse2, 10),
+                      MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sse2, 10),
+                      MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sse2,
+                                      10)));
+#endif  // HAVE_SSE2
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AvxSubpelVarianceTest,
+    ::testing::Values(
+        SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_sse4_1,
+                             8),
+        SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_sse4_1,
+                             10),
+        SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_sse4_1,
+                             12)));
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AvxSubpelAvgVarianceTest,
+    ::testing::Values(
+        SubpelAvgVarianceParams(2, 2,
+                                &aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1,
+                                8),
+        SubpelAvgVarianceParams(2, 2,
+                                &aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1,
+                                10),
+        SubpelAvgVarianceParams(2, 2,
+                                &aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1,
+                                12)));
+#endif  // HAVE_SSE4_1
+
+INSTANTIATE_TEST_SUITE_P(
+    SSE2, AvxHBDMseTest,
+    ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sse2, 12),
+                      MseParams(3, 3, &aom_highbd_12_mse8x8_sse2, 12),
+                      MseParams(4, 4, &aom_highbd_10_mse16x16_sse2, 10),
+                      MseParams(3, 3, &aom_highbd_10_mse8x8_sse2, 10),
+                      MseParams(4, 4, &aom_highbd_8_mse16x16_sse2, 8),
+                      MseParams(3, 3, &aom_highbd_8_mse8x8_sse2, 8)));
+
+const VarianceParams kArrayHBDVariance_sse2[] = {
+  VarianceParams(7, 7, &aom_highbd_12_variance128x128_sse2, 12),
+  VarianceParams(7, 6, &aom_highbd_12_variance128x64_sse2, 12),
+  VarianceParams(6, 7, &aom_highbd_12_variance64x128_sse2, 12),
+  VarianceParams(6, 6, &aom_highbd_12_variance64x64_sse2, 12),
+  VarianceParams(6, 5, &aom_highbd_12_variance64x32_sse2, 12),
+  VarianceParams(5, 6, &aom_highbd_12_variance32x64_sse2, 12),
+  VarianceParams(5, 5, &aom_highbd_12_variance32x32_sse2, 12),
+  VarianceParams(5, 4, &aom_highbd_12_variance32x16_sse2, 12),
+  VarianceParams(4, 5, &aom_highbd_12_variance16x32_sse2, 12),
+  VarianceParams(4, 4, &aom_highbd_12_variance16x16_sse2, 12),
+  VarianceParams(4, 3, &aom_highbd_12_variance16x8_sse2, 12),
+  VarianceParams(3, 4, &aom_highbd_12_variance8x16_sse2, 12),
+  VarianceParams(3, 3, &aom_highbd_12_variance8x8_sse2, 12),
+  VarianceParams(7, 7, &aom_highbd_10_variance128x128_sse2, 10),
+  VarianceParams(7, 6, &aom_highbd_10_variance128x64_sse2, 10),
+  VarianceParams(6, 7, &aom_highbd_10_variance64x128_sse2, 10),
+  VarianceParams(6, 6, &aom_highbd_10_variance64x64_sse2, 10),
+  VarianceParams(6, 5, &aom_highbd_10_variance64x32_sse2, 10),
+  VarianceParams(5, 6, &aom_highbd_10_variance32x64_sse2, 10),
+  VarianceParams(5, 5, &aom_highbd_10_variance32x32_sse2, 10),
+  VarianceParams(5, 4, &aom_highbd_10_variance32x16_sse2, 10),
+  VarianceParams(4, 5, &aom_highbd_10_variance16x32_sse2, 10),
+  VarianceParams(4, 4, &aom_highbd_10_variance16x16_sse2, 10),
+  VarianceParams(4, 3, &aom_highbd_10_variance16x8_sse2, 10),
+  VarianceParams(3, 4, &aom_highbd_10_variance8x16_sse2, 10),
+  VarianceParams(3, 3, &aom_highbd_10_variance8x8_sse2, 10),
+  VarianceParams(7, 7, &aom_highbd_8_variance128x128_sse2, 8),
+  VarianceParams(7, 6, &aom_highbd_8_variance128x64_sse2, 8),
+  VarianceParams(6, 7, &aom_highbd_8_variance64x128_sse2, 8),
+  VarianceParams(6, 6, &aom_highbd_8_variance64x64_sse2, 8),
+  VarianceParams(6, 5, &aom_highbd_8_variance64x32_sse2, 8),
+  VarianceParams(5, 6, &aom_highbd_8_variance32x64_sse2, 8),
+  VarianceParams(5, 5, &aom_highbd_8_variance32x32_sse2, 8),
+  VarianceParams(5, 4, &aom_highbd_8_variance32x16_sse2, 8),
+  VarianceParams(4, 5, &aom_highbd_8_variance16x32_sse2, 8),
+  VarianceParams(4, 4, &aom_highbd_8_variance16x16_sse2, 8),
+  VarianceParams(4, 3, &aom_highbd_8_variance16x8_sse2, 8),
+  VarianceParams(3, 4, &aom_highbd_8_variance8x16_sse2, 8),
+  VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8),
+#if !CONFIG_REALTIME_ONLY
+  VarianceParams(6, 4, &aom_highbd_12_variance64x16_sse2, 12),
+  VarianceParams(4, 6, &aom_highbd_12_variance16x64_sse2, 12),
+  VarianceParams(5, 3, &aom_highbd_12_variance32x8_sse2, 12),
+  VarianceParams(3, 5, &aom_highbd_12_variance8x32_sse2, 12),
+  // VarianceParams(4, 2, &aom_highbd_12_variance16x4_sse2, 12),
+  // VarianceParams(2, 4, &aom_highbd_12_variance4x16_sse2, 12),
+  VarianceParams(6, 4, &aom_highbd_10_variance64x16_sse2, 10),
+  VarianceParams(4, 6, &aom_highbd_10_variance16x64_sse2, 10),
+  VarianceParams(5, 3, &aom_highbd_10_variance32x8_sse2, 10),
+  VarianceParams(3, 5, &aom_highbd_10_variance8x32_sse2, 10),
+  // VarianceParams(4, 2, &aom_highbd_10_variance16x4_sse2, 10),
+  // VarianceParams(2, 4, &aom_highbd_10_variance4x16_sse2, 10),
+  VarianceParams(6, 4, &aom_highbd_8_variance64x16_sse2, 8),
+  VarianceParams(4, 6, &aom_highbd_8_variance16x64_sse2, 8),
+  VarianceParams(5, 3, &aom_highbd_8_variance32x8_sse2, 8),
+  VarianceParams(3, 5, &aom_highbd_8_variance8x32_sse2, 8),
+// VarianceParams(4, 2, &aom_highbd_8_variance16x4_sse2, 8),
+// VarianceParams(2, 4, &aom_highbd_8_variance4x16_sse2, 8),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDVariance_sse2));
+
+#if HAVE_AVX2
+
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, MseHBDWxHTest,
+    ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_avx2, 10),
+                      MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_avx2, 10),
+                      MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_avx2, 10),
+                      MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_avx2,
+                                      10)));
+
+const VarianceParams kArrayHBDVariance_avx2[] = {
+  VarianceParams(7, 7, &aom_highbd_10_variance128x128_avx2, 10),
+  VarianceParams(7, 6, &aom_highbd_10_variance128x64_avx2, 10),
+  VarianceParams(6, 7, &aom_highbd_10_variance64x128_avx2, 10),
+  VarianceParams(6, 6, &aom_highbd_10_variance64x64_avx2, 10),
+  VarianceParams(6, 5, &aom_highbd_10_variance64x32_avx2, 10),
+  VarianceParams(5, 6, &aom_highbd_10_variance32x64_avx2, 10),
+  VarianceParams(5, 5, &aom_highbd_10_variance32x32_avx2, 10),
+  VarianceParams(5, 4, &aom_highbd_10_variance32x16_avx2, 10),
+  VarianceParams(4, 5, &aom_highbd_10_variance16x32_avx2, 10),
+  VarianceParams(4, 4, &aom_highbd_10_variance16x16_avx2, 10),
+  VarianceParams(4, 3, &aom_highbd_10_variance16x8_avx2, 10),
+  VarianceParams(3, 4, &aom_highbd_10_variance8x16_avx2, 10),
+  VarianceParams(3, 3, &aom_highbd_10_variance8x8_avx2, 10),
+#if !CONFIG_REALTIME_ONLY
+  VarianceParams(6, 4, &aom_highbd_10_variance64x16_avx2, 10),
+  VarianceParams(5, 3, &aom_highbd_10_variance32x8_avx2, 10),
+  VarianceParams(4, 6, &aom_highbd_10_variance16x64_avx2, 10),
+  VarianceParams(3, 5, &aom_highbd_10_variance8x32_avx2, 10),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDVariance_avx2));
+
+const SubpelVarianceParams kArrayHBDSubpelVariance_avx2[] = {
+  SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_avx2, 10),
+  SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_avx2, 10),
+  SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_avx2, 10),
+  SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_avx2, 10),
+  SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_avx2, 10),
+  SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_avx2, 10),
+  SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_avx2, 10),
+  SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_avx2, 10),
+  SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_avx2, 10),
+  SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_avx2, 10),
+  SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_avx2, 10),
+  SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_avx2, 10),
+  SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_avx2, 10),
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDSubpelVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDSubpelVariance_avx2));
+#endif  // HAVE_AVX2
+
+const SubpelVarianceParams kArrayHBDSubpelVariance_sse2[] = {
+  SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_sse2, 12),
+  SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_sse2, 12),
+  SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_sse2, 12),
+  SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_sse2, 12),
+  SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_sse2, 12),
+  SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_sse2, 12),
+  SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12),
+  SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12),
+  SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12),
+  SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12),
+  SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12),
+  SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12),
+  SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12),
+  SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12),
+  SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_sse2, 10),
+  SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_sse2, 10),
+  SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_sse2, 10),
+  SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_sse2, 10),
+  SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_sse2, 10),
+  SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_sse2, 10),
+  SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10),
+  SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10),
+  SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10),
+  SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10),
+  SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10),
+  SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10),
+  SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10),
+  SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10),
+  SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_sse2, 8),
+  SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_sse2, 8),
+  SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_sse2, 8),
+  SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_sse2, 8),
+  SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_sse2, 8),
+  SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_sse2, 8),
+  SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8),
+  SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8),
+  SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8),
+  SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8),
+  SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8),
+  SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8),
+  SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8),
+  SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8),
+#if !CONFIG_REALTIME_ONLY
+  SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_sse2, 12),
+  SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_sse2, 12),
+  SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_sse2, 12),
+  SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_sse2, 12),
+  SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_sse2, 12),
+  // SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_sse2, 12),
+  SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_sse2, 10),
+  SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_sse2, 10),
+  SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_sse2, 10),
+  SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_sse2, 10),
+  SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_sse2, 10),
+  // SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_sse2, 10),
+  SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_sse2, 8),
+  SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_sse2, 8),
+  SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_sse2, 8),
+  SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_sse2, 8),
+  SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_sse2, 8),
+// SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_sse2, 8),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDSubpelVariance_sse2));
+
+const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_sse2[] = {
+  SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_sse2,
+                          12),
+  SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_sse2,
+                          12),
+  SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_sse2,
+                          12),
+  SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_sse2,
+                          12),
+  SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_sse2,
+                          12),
+  SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2,
+                          12),
+  SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2,
+                          12),
+  SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2,
+                          12),
+  SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2,
+                          12),
+  SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2,
+                          12),
+  SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_sse2,
+                          12),
+  SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_sse2,
+                          10),
+  SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_sse2,
+                          10),
+  SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_sse2,
+                          10),
+  SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_sse2,
+                          10),
+  SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_sse2,
+                          10),
+  SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2,
+                          10),
+  SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2,
+                          10),
+  SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2,
+                          10),
+  SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2,
+                          10),
+  SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2,
+                          10),
+  SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_sse2,
+                          10),
+  SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_sse2,
+                          8),
+  SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_sse2,
+                          8),
+  SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_sse2,
+                          8),
+  SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_sse2,
+                          8),
+  SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_sse2,
+                          8),
+  SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2,
+                          8),
+  SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2,
+                          8),
+  SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2,
+                          8),
+  SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2,
+                          8),
+  SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2,
+                          8),
+  SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2,
+                          8),
+
+#if !CONFIG_REALTIME_ONLY
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_sse2,
+                          12),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_sse2,
+                          12),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_sse2,
+                          12),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_sse2,
+                          12),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_sse2,
+                          12),
+  // SubpelAvgVarianceParams(2, 4,
+  // &aom_highbd_12_sub_pixel_avg_variance4x16_sse2, 12),
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_sse2,
+                          10),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_sse2,
+                          10),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_sse2,
+                          10),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_sse2,
+                          10),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_sse2,
+                          10),
+  // SubpelAvgVarianceParams(2, 4,
+  // &aom_highbd_10_sub_pixel_avg_variance4x16_sse2, 10),
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_sse2,
+                          8),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_sse2,
+                          8),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_sse2,
+                          8),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_sse2,
+                          8),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_sse2,
+                          8),
+// SubpelAvgVarianceParams(2, 4,
+// &aom_highbd_8_sub_pixel_avg_variance4x16_sse2, 8),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelAvgVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_sse2));
+#endif  // HAVE_SSE2
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+#if HAVE_SSSE3
+const SubpelVarianceParams kArraySubpelVariance_ssse3[] = {
+  SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_ssse3, 0),
+  SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_ssse3, 0),
+  SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_ssse3, 0),
+  SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_ssse3, 0),
+  SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_ssse3, 0),
+  SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_ssse3, 0),
+  SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_ssse3, 0),
+  SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_ssse3, 0),
+  SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_ssse3, 0),
+  SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_ssse3, 0),
+  SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_ssse3, 0),
+  SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_ssse3, 0),
+  SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_ssse3, 0),
+  SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_ssse3, 0),
+  SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_ssse3, 0),
+  SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0),
+#if !CONFIG_REALTIME_ONLY
+  SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_ssse3, 0),
+  SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_ssse3, 0),
+  SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_ssse3, 0),
+  SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_ssse3, 0),
+  SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_ssse3, 0),
+  SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_ssse3, 0),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelVarianceTest,
+                         ::testing::ValuesIn(kArraySubpelVariance_ssse3));
+
+const SubpelAvgVarianceParams kArraySubpelAvgVariance_ssse3[] = {
+  SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_ssse3, 0),
+  SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_ssse3, 0),
+  SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_ssse3, 0),
+  SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_ssse3, 0),
+  SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_ssse3, 0),
+  SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_ssse3, 0),
+  SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_ssse3, 0),
+  SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_ssse3, 0),
+  SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_ssse3, 0),
+  SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_ssse3, 0),
+  SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_ssse3, 0),
+  SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_ssse3, 0),
+  SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_ssse3, 0),
+  SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_ssse3, 0),
+  SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0),
+  SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3, 0),
+#if !CONFIG_REALTIME_ONLY
+  SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_ssse3, 0),
+  SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_ssse3, 0),
+  SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_ssse3, 0),
+  SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_ssse3, 0),
+  SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_ssse3, 0),
+  SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_ssse3, 0),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelAvgVarianceTest,
+                         ::testing::ValuesIn(kArraySubpelAvgVariance_ssse3));
+
+const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_ssse3[] = {
+  DistWtdSubpelAvgVarianceParams(
+      7, 7, &aom_dist_wtd_sub_pixel_avg_variance128x128_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      7, 6, &aom_dist_wtd_sub_pixel_avg_variance128x64_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      6, 7, &aom_dist_wtd_sub_pixel_avg_variance64x128_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_ssse3, 0),
+#if !CONFIG_REALTIME_ONLY
+  DistWtdSubpelAvgVarianceParams(
+      6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_ssse3, 0),
+  DistWtdSubpelAvgVarianceParams(
+      2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_ssse3, 0),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(
+    SSSE3, AvxDistWtdSubpelAvgVarianceTest,
+    ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_ssse3));
+#endif  // HAVE_SSSE3
+
+#if HAVE_SSE4_1
+#if !CONFIG_REALTIME_ONLY
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AvxObmcSubpelVarianceTest,
+    ::testing::Values(
+        ObmcSubpelVarianceParams(7, 7,
+                                 &aom_obmc_sub_pixel_variance128x128_sse4_1, 0),
+        ObmcSubpelVarianceParams(7, 6,
+                                 &aom_obmc_sub_pixel_variance128x64_sse4_1, 0),
+        ObmcSubpelVarianceParams(6, 7,
+                                 &aom_obmc_sub_pixel_variance64x128_sse4_1, 0),
+        ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_sse4_1,
+                                 0)));
+#endif
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, MseWxHTest,
+    ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_avx2, 8),
+                      MseWxHParams(3, 2, &aom_mse_wxh_16bit_avx2, 8),
+                      MseWxHParams(2, 3, &aom_mse_wxh_16bit_avx2, 8),
+                      MseWxHParams(2, 2, &aom_mse_wxh_16bit_avx2, 8)));
+
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, Mse16xHTest,
+    ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_avx2, 8),
+                      Mse16xHParams(3, 2, &aom_mse_16xh_16bit_avx2, 8),
+                      Mse16xHParams(2, 3, &aom_mse_16xh_16bit_avx2, 8),
+                      Mse16xHParams(2, 2, &aom_mse_16xh_16bit_avx2, 8)));
+
+INSTANTIATE_TEST_SUITE_P(AVX2, AvxMseTest,
+                         ::testing::Values(MseParams(4, 4,
+                                                     &aom_mse16x16_avx2)));
+
+const VarianceParams kArrayVariance_avx2[] = {
+  VarianceParams(7, 7, &aom_variance128x128_avx2),
+  VarianceParams(7, 6, &aom_variance128x64_avx2),
+  VarianceParams(6, 7, &aom_variance64x128_avx2),
+  VarianceParams(6, 6, &aom_variance64x64_avx2),
+  VarianceParams(6, 5, &aom_variance64x32_avx2),
+  VarianceParams(5, 6, &aom_variance32x64_avx2),
+  VarianceParams(5, 5, &aom_variance32x32_avx2),
+  VarianceParams(5, 4, &aom_variance32x16_avx2),
+  VarianceParams(4, 5, &aom_variance16x32_avx2),
+  VarianceParams(4, 4, &aom_variance16x16_avx2),
+  VarianceParams(4, 3, &aom_variance16x8_avx2),
+#if !CONFIG_REALTIME_ONLY
+  VarianceParams(6, 4, &aom_variance64x16_avx2),
+  VarianceParams(4, 6, &aom_variance16x64_avx2),
+  VarianceParams(5, 3, &aom_variance32x8_avx2),
+  VarianceParams(4, 2, &aom_variance16x4_avx2),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, AvxVarianceTest,
+                         ::testing::ValuesIn(kArrayVariance_avx2));
+
+const GetSseSumParams kArrayGetSseSum8x8Quad_avx2[] = {
+  GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
+  GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
+  GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
+  GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_avx2, 0)
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum8x8QuadTest,
+                         ::testing::ValuesIn(kArrayGetSseSum8x8Quad_avx2));
+
+const GetSseSumParamsDual kArrayGetSseSum16x16Dual_avx2[] = {
+  GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
+  GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
+  GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
+  GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_avx2, 0)
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum16x16DualTest,
+                         ::testing::ValuesIn(kArrayGetSseSum16x16Dual_avx2));
+
+const SubpelVarianceParams kArraySubpelVariance_avx2[] = {
+  SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_avx2, 0),
+  SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_avx2, 0),
+  SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_avx2, 0),
+  SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_avx2, 0),
+  SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_avx2, 0),
+  SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_avx2, 0),
+  SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_avx2, 0),
+  SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_avx2, 0),
+
+  SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_avx2, 0),
+  SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_avx2, 0),
+  SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_avx2, 0),
+#if !CONFIG_REALTIME_ONLY
+  SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_avx2, 0),
+  SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_avx2, 0),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, AvxSubpelVarianceTest,
+                         ::testing::ValuesIn(kArraySubpelVariance_avx2));
+
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AvxSubpelAvgVarianceTest,
+    ::testing::Values(
+        SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_avx2,
+                                0),
+        SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_avx2,
+                                0),
+        SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_avx2,
+                                0),
+        SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0),
+        SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_avx2, 0),
+        SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_avx2, 0),
+        SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0),
+        SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_avx2,
+                                0)));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, MseWxHTest,
+    ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_neon, 8),
+                      MseWxHParams(3, 2, &aom_mse_wxh_16bit_neon, 8),
+                      MseWxHParams(2, 3, &aom_mse_wxh_16bit_neon, 8),
+                      MseWxHParams(2, 2, &aom_mse_wxh_16bit_neon, 8)));
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON, Mse16xHTest,
+    ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_neon, 8),
+                      Mse16xHParams(3, 2, &aom_mse_16xh_16bit_neon, 8),
+                      Mse16xHParams(2, 3, &aom_mse_16xh_16bit_neon, 8),
+                      Mse16xHParams(2, 2, &aom_mse_16xh_16bit_neon, 8)));
+
+INSTANTIATE_TEST_SUITE_P(NEON, SumOfSquaresTest,
+                         ::testing::Values(aom_get_mb_ss_neon));
+
+INSTANTIATE_TEST_SUITE_P(NEON, AvxMseTest,
+                         ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon),
+                                           MseParams(3, 4, &aom_mse8x16_neon),
+                                           MseParams(4, 4, &aom_mse16x16_neon),
+                                           MseParams(4, 3, &aom_mse16x8_neon)));
+
+const VarianceParams kArrayVariance_neon[] = {
+  VarianceParams(7, 7, &aom_variance128x128_neon),
+  VarianceParams(6, 6, &aom_variance64x64_neon),
+  VarianceParams(7, 6, &aom_variance128x64_neon),
+  VarianceParams(6, 7, &aom_variance64x128_neon),
+  VarianceParams(6, 6, &aom_variance64x64_neon),
+  VarianceParams(6, 5, &aom_variance64x32_neon),
+  VarianceParams(5, 6, &aom_variance32x64_neon),
+  VarianceParams(5, 5, &aom_variance32x32_neon),
+  VarianceParams(5, 4, &aom_variance32x16_neon),
+  VarianceParams(4, 5, &aom_variance16x32_neon),
+  VarianceParams(4, 4, &aom_variance16x16_neon),
+  VarianceParams(4, 3, &aom_variance16x8_neon),
+  VarianceParams(3, 4, &aom_variance8x16_neon),
+  VarianceParams(3, 3, &aom_variance8x8_neon),
+  VarianceParams(3, 2, &aom_variance8x4_neon),
+  VarianceParams(2, 3, &aom_variance4x8_neon),
+  VarianceParams(2, 2, &aom_variance4x4_neon),
+#if !CONFIG_REALTIME_ONLY
+  VarianceParams(2, 4, &aom_variance4x16_neon),
+  VarianceParams(4, 2, &aom_variance16x4_neon),
+  VarianceParams(3, 5, &aom_variance8x32_neon),
+  VarianceParams(5, 3, &aom_variance32x8_neon),
+  VarianceParams(4, 6, &aom_variance16x64_neon),
+  VarianceParams(6, 4, &aom_variance64x16_neon),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, AvxVarianceTest,
+                         ::testing::ValuesIn(kArrayVariance_neon));
+
+const SubpelVarianceParams kArraySubpelVariance_neon[] = {
+  SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_neon, 0),
+  SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_neon, 0),
+  SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_neon, 0),
+  SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_neon, 0),
+  SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_neon, 0),
+  SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_neon, 0),
+  SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_neon, 0),
+  SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_neon, 0),
+  SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_neon, 0),
+  SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_neon, 0),
+  SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_neon, 0),
+  SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_neon, 0),
+  SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_neon, 0),
+  SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_neon, 0),
+  SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_neon, 0),
+  SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_neon, 0),
+#if !CONFIG_REALTIME_ONLY
+  SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_neon, 0),
+  SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_neon, 0),
+  SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_neon, 0),
+  SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_neon, 0),
+  SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_neon, 0),
+  SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_neon, 0),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelVarianceTest,
+                         ::testing::ValuesIn(kArraySubpelVariance_neon));
+
+const SubpelAvgVarianceParams kArraySubpelAvgVariance_neon[] = {
+  SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_neon, 0),
+  SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_neon, 0),
+  SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_neon, 0),
+  SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_neon, 0),
+  SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_neon, 0),
+  SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_neon, 0),
+  SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_neon, 0),
+  SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_neon, 0),
+  SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_neon, 0),
+  SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_neon, 0),
+  SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_neon, 0),
+  SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_neon, 0),
+  SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_neon, 0),
+  SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_neon, 0),
+  SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_neon, 0),
+  SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_neon, 0),
+#if !CONFIG_REALTIME_ONLY
+  SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_neon, 0),
+  SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_neon, 0),
+  SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_neon, 0),
+  SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_neon, 0),
+  SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_neon, 0),
+  SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_neon, 0),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelAvgVarianceTest,
+                         ::testing::ValuesIn(kArraySubpelAvgVariance_neon));
+
+const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_neon[] = {
+  DistWtdSubpelAvgVarianceParams(
+      6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_neon, 0),
+#if !CONFIG_REALTIME_ONLY
+  DistWtdSubpelAvgVarianceParams(
+      6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_neon, 0),
+  DistWtdSubpelAvgVarianceParams(
+      2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_neon, 0),
+#endif  // !CONFIG_REALTIME_ONLY
+};
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AvxDistWtdSubpelAvgVarianceTest,
+    ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_neon));
+
+#if !CONFIG_REALTIME_ONLY
+const ObmcSubpelVarianceParams kArrayObmcSubpelVariance_neon[] = {
+  ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_neon, 0),
+  ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_neon, 0),
+  ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_neon, 0),
+  ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_neon, 0),
+  ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_neon, 0),
+  ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_neon, 0),
+  ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_neon, 0),
+  ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_neon, 0),
+  ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_neon, 0),
+  ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_neon, 0),
+  ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_neon, 0),
+  ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_neon, 0),
+  ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_neon, 0),
+  ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_neon, 0),
+  ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_neon, 0),
+  ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_neon, 0),
+  ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_neon, 0),
+  ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_neon, 0),
+  ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_neon, 0),
+  ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_neon, 0),
+  ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_neon, 0),
+  ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_neon, 0),
+};
+INSTANTIATE_TEST_SUITE_P(NEON, AvxObmcSubpelVarianceTest,
+                         ::testing::ValuesIn(kArrayObmcSubpelVariance_neon));
+#endif
+
+const GetSseSumParams kArrayGetSseSum8x8Quad_neon[] = {
+  GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon, 0),
+  GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon, 0),
+  GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon, 0),
+  GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon, 0)
+};
+INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum8x8QuadTest,
+                         ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon));
+
+const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon[] = {
+  GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon, 0),
+  GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon, 0),
+  GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon, 0),
+  GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon, 0)
+};
+INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum16x16DualTest,
+                         ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+const VarianceParams kArrayHBDVariance_neon[] = {
+  VarianceParams(7, 7, &aom_highbd_12_variance128x128_neon, 12),
+  VarianceParams(7, 6, &aom_highbd_12_variance128x64_neon, 12),
+  VarianceParams(6, 7, &aom_highbd_12_variance64x128_neon, 12),
+  VarianceParams(6, 6, &aom_highbd_12_variance64x64_neon, 12),
+  VarianceParams(6, 5, &aom_highbd_12_variance64x32_neon, 12),
+  VarianceParams(5, 6, &aom_highbd_12_variance32x64_neon, 12),
+  VarianceParams(5, 5, &aom_highbd_12_variance32x32_neon, 12),
+  VarianceParams(5, 4, &aom_highbd_12_variance32x16_neon, 12),
+  VarianceParams(4, 5, &aom_highbd_12_variance16x32_neon, 12),
+  VarianceParams(4, 4, &aom_highbd_12_variance16x16_neon, 12),
+  VarianceParams(4, 3, &aom_highbd_12_variance16x8_neon, 12),
+  VarianceParams(3, 4, &aom_highbd_12_variance8x16_neon, 12),
+  VarianceParams(3, 3, &aom_highbd_12_variance8x8_neon, 12),
+  VarianceParams(3, 2, &aom_highbd_12_variance8x4_neon, 12),
+  VarianceParams(2, 3, &aom_highbd_12_variance4x8_neon, 12),
+  VarianceParams(2, 2, &aom_highbd_12_variance4x4_neon, 12),
+  VarianceParams(7, 7, &aom_highbd_10_variance128x128_neon, 10),
+  VarianceParams(7, 6, &aom_highbd_10_variance128x64_neon, 10),
+  VarianceParams(6, 7, &aom_highbd_10_variance64x128_neon, 10),
+  VarianceParams(6, 6, &aom_highbd_10_variance64x64_neon, 10),
+  VarianceParams(6, 5, &aom_highbd_10_variance64x32_neon, 10),
+  VarianceParams(5, 6, &aom_highbd_10_variance32x64_neon, 10),
+  VarianceParams(5, 5, &aom_highbd_10_variance32x32_neon, 10),
+  VarianceParams(5, 4, &aom_highbd_10_variance32x16_neon, 10),
+  VarianceParams(4, 5, &aom_highbd_10_variance16x32_neon, 10),
+  VarianceParams(4, 4, &aom_highbd_10_variance16x16_neon, 10),
+  VarianceParams(4, 3, &aom_highbd_10_variance16x8_neon, 10),
+  VarianceParams(3, 4, &aom_highbd_10_variance8x16_neon, 10),
+  VarianceParams(3, 3, &aom_highbd_10_variance8x8_neon, 10),
+  VarianceParams(3, 2, &aom_highbd_10_variance8x4_neon, 10),
+  VarianceParams(2, 3, &aom_highbd_10_variance4x8_neon, 10),
+  VarianceParams(2, 2, &aom_highbd_10_variance4x4_neon, 10),
+  VarianceParams(7, 7, &aom_highbd_8_variance128x128_neon, 8),
+  VarianceParams(7, 6, &aom_highbd_8_variance128x64_neon, 8),
+  VarianceParams(6, 7, &aom_highbd_8_variance64x128_neon, 8),
+  VarianceParams(6, 6, &aom_highbd_8_variance64x64_neon, 8),
+  VarianceParams(6, 5, &aom_highbd_8_variance64x32_neon, 8),
+  VarianceParams(5, 6, &aom_highbd_8_variance32x64_neon, 8),
+  VarianceParams(5, 5, &aom_highbd_8_variance32x32_neon, 8),
+  VarianceParams(5, 4, &aom_highbd_8_variance32x16_neon, 8),
+  VarianceParams(4, 5, &aom_highbd_8_variance16x32_neon, 8),
+  VarianceParams(4, 4, &aom_highbd_8_variance16x16_neon, 8),
+  VarianceParams(4, 3, &aom_highbd_8_variance16x8_neon, 8),
+  VarianceParams(3, 4, &aom_highbd_8_variance8x16_neon, 8),
+  VarianceParams(3, 3, &aom_highbd_8_variance8x8_neon, 8),
+  VarianceParams(3, 2, &aom_highbd_8_variance8x4_neon, 8),
+  VarianceParams(2, 3, &aom_highbd_8_variance4x8_neon, 8),
+  VarianceParams(2, 2, &aom_highbd_8_variance4x4_neon, 8),
+#if !CONFIG_REALTIME_ONLY
+  VarianceParams(6, 4, &aom_highbd_12_variance64x16_neon, 12),
+  VarianceParams(4, 6, &aom_highbd_12_variance16x64_neon, 12),
+  VarianceParams(5, 3, &aom_highbd_12_variance32x8_neon, 12),
+  VarianceParams(3, 5, &aom_highbd_12_variance8x32_neon, 12),
+  VarianceParams(4, 2, &aom_highbd_12_variance16x4_neon, 12),
+  VarianceParams(2, 4, &aom_highbd_12_variance4x16_neon, 12),
+  VarianceParams(6, 4, &aom_highbd_10_variance64x16_neon, 10),
+  VarianceParams(4, 6, &aom_highbd_10_variance16x64_neon, 10),
+  VarianceParams(5, 3, &aom_highbd_10_variance32x8_neon, 10),
+  VarianceParams(3, 5, &aom_highbd_10_variance8x32_neon, 10),
+  VarianceParams(4, 2, &aom_highbd_10_variance16x4_neon, 10),
+  VarianceParams(2, 4, &aom_highbd_10_variance4x16_neon, 10),
+  VarianceParams(6, 4, &aom_highbd_8_variance64x16_neon, 8),
+  VarianceParams(4, 6, &aom_highbd_8_variance16x64_neon, 8),
+  VarianceParams(5, 3, &aom_highbd_8_variance32x8_neon, 8),
+  VarianceParams(3, 5, &aom_highbd_8_variance8x32_neon, 8),
+  VarianceParams(4, 2, &aom_highbd_8_variance16x4_neon, 8),
+  VarianceParams(2, 4, &aom_highbd_8_variance4x16_neon, 8),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDVariance_neon));
+
+const SubpelVarianceParams kArrayHBDSubpelVariance_neon[] = {
+  SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_neon, 12),
+  SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_neon, 12),
+  SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_neon, 12),
+  SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_neon, 12),
+  SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_neon, 12),
+  SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_neon, 12),
+  SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_neon, 12),
+  SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_neon, 12),
+  SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_neon, 12),
+  SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_neon, 12),
+  SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_neon, 12),
+  SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_neon, 12),
+  SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_neon, 12),
+  SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_neon, 10),
+  SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_neon, 10),
+  SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_neon, 10),
+  SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_neon, 10),
+  SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_neon, 10),
+  SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_neon, 10),
+  SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_neon, 10),
+  SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_neon, 10),
+  SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_neon, 10),
+  SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_neon, 10),
+  SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_neon, 10),
+  SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_neon, 10),
+  SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_neon, 10),
+  SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_neon, 8),
+  SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_neon, 8),
+  SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_neon, 8),
+  SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_neon, 8),
+  SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_neon, 8),
+  SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_neon, 8),
+  SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_neon, 8),
+  SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_neon, 8),
+  SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_neon, 8),
+  SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_neon, 8),
+  SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_neon, 8),
+  SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_neon, 8),
+  SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_neon, 8),
+#if !CONFIG_REALTIME_ONLY
+  SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_neon, 8),
+  SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_neon, 8),
+  SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_neon, 8),
+  SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_neon, 8),
+  SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_neon, 8),
+  SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_neon, 8),
+  SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_neon, 10),
+  SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_neon, 10),
+  SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_neon, 10),
+  SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_neon, 10),
+  SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_neon, 10),
+  SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_neon, 10),
+  SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_neon, 12),
+  SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_neon, 12),
+  SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_neon, 12),
+  SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_neon, 12),
+  SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_neon, 12),
+  SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_neon, 12),
+#endif  //! CONFIG_REALTIME_ONLY
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDSubpelVariance_neon));
+
+const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_neon[] = {
+  SubpelAvgVarianceParams(7, 7,
+                          &aom_highbd_8_sub_pixel_avg_variance128x128_neon, 8),
+  SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_neon,
+                          8),
+  SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_neon,
+                          8),
+  SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_neon,
+                          8),
+  SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_neon,
+                          8),
+  SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_neon,
+                          8),
+  SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_neon,
+                          8),
+  SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_neon,
+                          8),
+  SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_neon,
+                          8),
+  SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_neon,
+                          8),
+  SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_neon,
+                          8),
+  SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_neon,
+                          8),
+  SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_neon,
+                          8),
+  SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_neon,
+                          8),
+  SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_neon,
+                          8),
+  SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_neon,
+                          8),
+  SubpelAvgVarianceParams(
+      7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_neon, 10),
+  SubpelAvgVarianceParams(7, 6,
+                          &aom_highbd_10_sub_pixel_avg_variance128x64_neon, 10),
+  SubpelAvgVarianceParams(6, 7,
+                          &aom_highbd_10_sub_pixel_avg_variance64x128_neon, 10),
+  SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_neon,
+                          10),
+  SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_neon,
+                          10),
+  SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_neon,
+                          10),
+  SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_neon,
+                          10),
+  SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_neon,
+                          10),
+  SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_neon,
+                          10),
+  SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_neon,
+                          10),
+  SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_neon,
+                          10),
+  SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_neon,
+                          10),
+  SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_neon,
+                          10),
+  SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_neon,
+                          10),
+  SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_neon,
+                          10),
+  SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_neon,
+                          10),
+  SubpelAvgVarianceParams(
+      7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_neon, 12),
+  SubpelAvgVarianceParams(7, 6,
+                          &aom_highbd_12_sub_pixel_avg_variance128x64_neon, 12),
+  SubpelAvgVarianceParams(6, 7,
+                          &aom_highbd_12_sub_pixel_avg_variance64x128_neon, 12),
+  SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_neon,
+                          12),
+  SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_neon,
+                          12),
+  SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_neon,
+                          12),
+  SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_neon,
+                          12),
+  SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_neon,
+                          12),
+  SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_neon,
+                          12),
+  SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_neon,
+                          12),
+  SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_neon,
+                          12),
+  SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_neon,
+                          12),
+  SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_neon,
+                          12),
+  SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_neon,
+                          12),
+  SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_neon,
+                          12),
+  SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_neon,
+                          12),
+
+#if !CONFIG_REALTIME_ONLY
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_neon,
+                          8),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_neon,
+                          8),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_neon,
+                          8),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_neon,
+                          8),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_neon,
+                          8),
+  SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_neon,
+                          8),
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_neon,
+                          10),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_neon,
+                          10),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_neon,
+                          10),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_neon,
+                          10),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_neon,
+                          10),
+  SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_neon,
+                          10),
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_neon,
+                          12),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_neon,
+                          12),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_neon,
+                          12),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_neon,
+                          12),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_neon,
+                          12),
+  SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_neon,
+                          12),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelAvgVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_neon));
+
+const DistWtdSubpelAvgVarianceParams
+    kArrayHBDDistWtdSubpelAvgVariance_neon[] = {
+      DistWtdSubpelAvgVarianceParams(
+          7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_neon, 12),
+
+#if !CONFIG_REALTIME_ONLY
+      DistWtdSubpelAvgVarianceParams(
+          6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_neon, 8),
+      DistWtdSubpelAvgVarianceParams(
+          6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_neon, 10),
+      DistWtdSubpelAvgVarianceParams(
+          6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_neon, 12),
+      DistWtdSubpelAvgVarianceParams(
+          2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_neon, 12),
+#endif  // !CONFIG_REALTIME_ONLY
+    };
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AvxHBDDistWtdSubpelAvgVarianceTest,
+    ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_neon));
+
+#if !CONFIG_REALTIME_ONLY
+const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_neon[] = {
+  ObmcSubpelVarianceParams(
+      7, 7, &aom_highbd_12_obmc_sub_pixel_variance128x128_neon, 12),
+  ObmcSubpelVarianceParams(
+      7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_neon, 12),
+  ObmcSubpelVarianceParams(
+      6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_neon, 12),
+  ObmcSubpelVarianceParams(
+      6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_neon, 12),
+  ObmcSubpelVarianceParams(
+      6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_neon, 12),
+  ObmcSubpelVarianceParams(
+      5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_neon, 12),
+  ObmcSubpelVarianceParams(
+      5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_neon, 12),
+  ObmcSubpelVarianceParams(
+      5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_neon, 12),
+  ObmcSubpelVarianceParams(
+      4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_neon, 12),
+  ObmcSubpelVarianceParams(
+      4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_neon, 12),
+  ObmcSubpelVarianceParams(4, 3,
+                           &aom_highbd_12_obmc_sub_pixel_variance16x8_neon, 12),
+  ObmcSubpelVarianceParams(3, 4,
+                           &aom_highbd_12_obmc_sub_pixel_variance8x16_neon, 12),
+  ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_neon,
+                           12),
+  ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_neon,
+                           12),
+  ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_neon,
+                           12),
+  ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_neon,
+                           12),
+  ObmcSubpelVarianceParams(
+      6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_neon, 12),
+  ObmcSubpelVarianceParams(
+      4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_neon, 12),
+  ObmcSubpelVarianceParams(5, 3,
+                           &aom_highbd_12_obmc_sub_pixel_variance32x8_neon, 12),
+  ObmcSubpelVarianceParams(3, 5,
+                           &aom_highbd_12_obmc_sub_pixel_variance8x32_neon, 12),
+  ObmcSubpelVarianceParams(4, 2,
+                           &aom_highbd_12_obmc_sub_pixel_variance16x4_neon, 12),
+  ObmcSubpelVarianceParams(2, 4,
+                           &aom_highbd_12_obmc_sub_pixel_variance4x16_neon, 12),
+  ObmcSubpelVarianceParams(
+      7, 7, &aom_highbd_10_obmc_sub_pixel_variance128x128_neon, 10),
+  ObmcSubpelVarianceParams(
+      7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_neon, 10),
+  ObmcSubpelVarianceParams(
+      6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_neon, 10),
+  ObmcSubpelVarianceParams(
+      6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_neon, 10),
+  ObmcSubpelVarianceParams(
+      6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_neon, 10),
+  ObmcSubpelVarianceParams(
+      5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_neon, 10),
+  ObmcSubpelVarianceParams(
+      5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_neon, 10),
+  ObmcSubpelVarianceParams(
+      5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_neon, 10),
+  ObmcSubpelVarianceParams(
+      4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_neon, 10),
+  ObmcSubpelVarianceParams(
+      4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_neon, 10),
+  ObmcSubpelVarianceParams(4, 3,
+                           &aom_highbd_10_obmc_sub_pixel_variance16x8_neon, 10),
+  ObmcSubpelVarianceParams(3, 4,
+                           &aom_highbd_10_obmc_sub_pixel_variance8x16_neon, 10),
+  ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_neon,
+                           10),
+  ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_neon,
+                           10),
+  ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_neon,
+                           10),
+  ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_neon,
+                           10),
+  ObmcSubpelVarianceParams(
+      6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_neon, 10),
+  ObmcSubpelVarianceParams(
+      4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_neon, 10),
+  ObmcSubpelVarianceParams(5, 3,
+                           &aom_highbd_10_obmc_sub_pixel_variance32x8_neon, 10),
+  ObmcSubpelVarianceParams(3, 5,
+                           &aom_highbd_10_obmc_sub_pixel_variance8x32_neon, 10),
+  ObmcSubpelVarianceParams(4, 2,
+                           &aom_highbd_10_obmc_sub_pixel_variance16x4_neon, 10),
+  ObmcSubpelVarianceParams(2, 4,
+                           &aom_highbd_10_obmc_sub_pixel_variance4x16_neon, 10),
+  ObmcSubpelVarianceParams(
+      7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_neon, 8),
+  ObmcSubpelVarianceParams(7, 6,
+                           &aom_highbd_8_obmc_sub_pixel_variance128x64_neon, 8),
+  ObmcSubpelVarianceParams(6, 7,
+                           &aom_highbd_8_obmc_sub_pixel_variance64x128_neon, 8),
+  ObmcSubpelVarianceParams(6, 6,
+                           &aom_highbd_8_obmc_sub_pixel_variance64x64_neon, 8),
+  ObmcSubpelVarianceParams(6, 5,
+                           &aom_highbd_8_obmc_sub_pixel_variance64x32_neon, 8),
+  ObmcSubpelVarianceParams(5, 6,
+                           &aom_highbd_8_obmc_sub_pixel_variance32x64_neon, 8),
+  ObmcSubpelVarianceParams(5, 5,
+                           &aom_highbd_8_obmc_sub_pixel_variance32x32_neon, 8),
+  ObmcSubpelVarianceParams(5, 4,
+                           &aom_highbd_8_obmc_sub_pixel_variance32x16_neon, 8),
+  ObmcSubpelVarianceParams(4, 5,
+                           &aom_highbd_8_obmc_sub_pixel_variance16x32_neon, 8),
+  ObmcSubpelVarianceParams(4, 4,
+                           &aom_highbd_8_obmc_sub_pixel_variance16x16_neon, 8),
+  ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_neon,
+                           8),
+  ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_neon,
+                           8),
+  ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_neon,
+                           8),
+  ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_neon,
+                           8),
+  ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_neon,
+                           8),
+  ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_neon,
+                           8),
+  ObmcSubpelVarianceParams(6, 4,
+                           &aom_highbd_8_obmc_sub_pixel_variance64x16_neon, 8),
+  ObmcSubpelVarianceParams(4, 6,
+                           &aom_highbd_8_obmc_sub_pixel_variance16x64_neon, 8),
+  ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_neon,
+                           8),
+  ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_neon,
+                           8),
+  ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_neon,
+                           8),
+  ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_neon,
+                           8),
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDObmcSubpelVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_neon));
+#endif  // !CONFIG_REALTIME_ONLY
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+#endif  // HAVE_NEON
+
+#if HAVE_NEON_DOTPROD
+
+const VarianceParams kArrayVariance_neon_dotprod[] = {
+  VarianceParams(7, 7, &aom_variance128x128_neon_dotprod),
+  VarianceParams(6, 6, &aom_variance64x64_neon_dotprod),
+  VarianceParams(7, 6, &aom_variance128x64_neon_dotprod),
+  VarianceParams(6, 7, &aom_variance64x128_neon_dotprod),
+  VarianceParams(6, 6, &aom_variance64x64_neon_dotprod),
+  VarianceParams(6, 5, &aom_variance64x32_neon_dotprod),
+  VarianceParams(5, 6, &aom_variance32x64_neon_dotprod),
+  VarianceParams(5, 5, &aom_variance32x32_neon_dotprod),
+  VarianceParams(5, 4, &aom_variance32x16_neon_dotprod),
+  VarianceParams(4, 5, &aom_variance16x32_neon_dotprod),
+  VarianceParams(4, 4, &aom_variance16x16_neon_dotprod),
+  VarianceParams(4, 3, &aom_variance16x8_neon_dotprod),
+  VarianceParams(3, 4, &aom_variance8x16_neon_dotprod),
+  VarianceParams(3, 3, &aom_variance8x8_neon_dotprod),
+  VarianceParams(3, 2, &aom_variance8x4_neon_dotprod),
+  VarianceParams(2, 3, &aom_variance4x8_neon_dotprod),
+  VarianceParams(2, 2, &aom_variance4x4_neon_dotprod),
+#if !CONFIG_REALTIME_ONLY
+  VarianceParams(2, 4, &aom_variance4x16_neon_dotprod),
+  VarianceParams(4, 2, &aom_variance16x4_neon_dotprod),
+  VarianceParams(3, 5, &aom_variance8x32_neon_dotprod),
+  VarianceParams(5, 3, &aom_variance32x8_neon_dotprod),
+  VarianceParams(4, 6, &aom_variance16x64_neon_dotprod),
+  VarianceParams(6, 4, &aom_variance64x16_neon_dotprod),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AvxVarianceTest,
+                         ::testing::ValuesIn(kArrayVariance_neon_dotprod));
+
+const GetSseSumParams kArrayGetSseSum8x8Quad_neon_dotprod[] = {
+  GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
+  GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
+  GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
+  GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0)
+};
+INSTANTIATE_TEST_SUITE_P(
+    NEON_DOTPROD, GetSseSum8x8QuadTest,
+    ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon_dotprod));
+
+const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon_dotprod[] = {
+  GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
+  GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
+  GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
+  GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0)
+};
+INSTANTIATE_TEST_SUITE_P(
+    NEON_DOTPROD, GetSseSum16x16DualTest,
+    ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon_dotprod));
+
+INSTANTIATE_TEST_SUITE_P(
+    NEON_DOTPROD, AvxMseTest,
+    ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon_dotprod),
+                      MseParams(3, 4, &aom_mse8x16_neon_dotprod),
+                      MseParams(4, 4, &aom_mse16x16_neon_dotprod),
+                      MseParams(4, 3, &aom_mse16x8_neon_dotprod)));
+
+#endif  // HAVE_NEON_DOTPROD
+
+#if HAVE_SVE
+
+#if CONFIG_AV1_HIGHBITDEPTH
+const VarianceParams kArrayHBDVariance_sve[] = {
+  VarianceParams(7, 7, &aom_highbd_12_variance128x128_sve, 12),
+  VarianceParams(7, 6, &aom_highbd_12_variance128x64_sve, 12),
+  VarianceParams(6, 7, &aom_highbd_12_variance64x128_sve, 12),
+  VarianceParams(6, 6, &aom_highbd_12_variance64x64_sve, 12),
+  VarianceParams(6, 5, &aom_highbd_12_variance64x32_sve, 12),
+  VarianceParams(5, 6, &aom_highbd_12_variance32x64_sve, 12),
+  VarianceParams(5, 5, &aom_highbd_12_variance32x32_sve, 12),
+  VarianceParams(5, 4, &aom_highbd_12_variance32x16_sve, 12),
+  VarianceParams(4, 5, &aom_highbd_12_variance16x32_sve, 12),
+  VarianceParams(4, 4, &aom_highbd_12_variance16x16_sve, 12),
+  VarianceParams(4, 3, &aom_highbd_12_variance16x8_sve, 12),
+  VarianceParams(3, 4, &aom_highbd_12_variance8x16_sve, 12),
+  VarianceParams(3, 3, &aom_highbd_12_variance8x8_sve, 12),
+  VarianceParams(3, 2, &aom_highbd_12_variance8x4_sve, 12),
+  VarianceParams(2, 3, &aom_highbd_12_variance4x8_sve, 12),
+  VarianceParams(2, 2, &aom_highbd_12_variance4x4_sve, 12),
+  VarianceParams(7, 7, &aom_highbd_10_variance128x128_sve, 10),
+  VarianceParams(7, 6, &aom_highbd_10_variance128x64_sve, 10),
+  VarianceParams(6, 7, &aom_highbd_10_variance64x128_sve, 10),
+  VarianceParams(6, 6, &aom_highbd_10_variance64x64_sve, 10),
+  VarianceParams(6, 5, &aom_highbd_10_variance64x32_sve, 10),
+  VarianceParams(5, 6, &aom_highbd_10_variance32x64_sve, 10),
+  VarianceParams(5, 5, &aom_highbd_10_variance32x32_sve, 10),
+  VarianceParams(5, 4, &aom_highbd_10_variance32x16_sve, 10),
+  VarianceParams(4, 5, &aom_highbd_10_variance16x32_sve, 10),
+  VarianceParams(4, 4, &aom_highbd_10_variance16x16_sve, 10),
+  VarianceParams(4, 3, &aom_highbd_10_variance16x8_sve, 10),
+  VarianceParams(3, 4, &aom_highbd_10_variance8x16_sve, 10),
+  VarianceParams(3, 3, &aom_highbd_10_variance8x8_sve, 10),
+  VarianceParams(3, 2, &aom_highbd_10_variance8x4_sve, 10),
+  VarianceParams(2, 3, &aom_highbd_10_variance4x8_sve, 10),
+  VarianceParams(2, 2, &aom_highbd_10_variance4x4_sve, 10),
+  VarianceParams(7, 7, &aom_highbd_8_variance128x128_sve, 8),
+  VarianceParams(7, 6, &aom_highbd_8_variance128x64_sve, 8),
+  VarianceParams(6, 7, &aom_highbd_8_variance64x128_sve, 8),
+  VarianceParams(6, 6, &aom_highbd_8_variance64x64_sve, 8),
+  VarianceParams(6, 5, &aom_highbd_8_variance64x32_sve, 8),
+  VarianceParams(5, 6, &aom_highbd_8_variance32x64_sve, 8),
+  VarianceParams(5, 5, &aom_highbd_8_variance32x32_sve, 8),
+  VarianceParams(5, 4, &aom_highbd_8_variance32x16_sve, 8),
+  VarianceParams(4, 5, &aom_highbd_8_variance16x32_sve, 8),
+  VarianceParams(4, 4, &aom_highbd_8_variance16x16_sve, 8),
+  VarianceParams(4, 3, &aom_highbd_8_variance16x8_sve, 8),
+  VarianceParams(3, 4, &aom_highbd_8_variance8x16_sve, 8),
+  VarianceParams(3, 3, &aom_highbd_8_variance8x8_sve, 8),
+  VarianceParams(3, 2, &aom_highbd_8_variance8x4_sve, 8),
+  VarianceParams(2, 3, &aom_highbd_8_variance4x8_sve, 8),
+  VarianceParams(2, 2, &aom_highbd_8_variance4x4_sve, 8),
+#if !CONFIG_REALTIME_ONLY
+  VarianceParams(6, 4, &aom_highbd_12_variance64x16_sve, 12),
+  VarianceParams(4, 6, &aom_highbd_12_variance16x64_sve, 12),
+  VarianceParams(5, 3, &aom_highbd_12_variance32x8_sve, 12),
+  VarianceParams(3, 5, &aom_highbd_12_variance8x32_sve, 12),
+  VarianceParams(4, 2, &aom_highbd_12_variance16x4_sve, 12),
+  VarianceParams(2, 4, &aom_highbd_12_variance4x16_sve, 12),
+  VarianceParams(6, 4, &aom_highbd_10_variance64x16_sve, 10),
+  VarianceParams(4, 6, &aom_highbd_10_variance16x64_sve, 10),
+  VarianceParams(5, 3, &aom_highbd_10_variance32x8_sve, 10),
+  VarianceParams(3, 5, &aom_highbd_10_variance8x32_sve, 10),
+  VarianceParams(4, 2, &aom_highbd_10_variance16x4_sve, 10),
+  VarianceParams(2, 4, &aom_highbd_10_variance4x16_sve, 10),
+  VarianceParams(6, 4, &aom_highbd_8_variance64x16_sve, 8),
+  VarianceParams(4, 6, &aom_highbd_8_variance16x64_sve, 8),
+  VarianceParams(5, 3, &aom_highbd_8_variance32x8_sve, 8),
+  VarianceParams(3, 5, &aom_highbd_8_variance8x32_sve, 8),
+  VarianceParams(4, 2, &aom_highbd_8_variance16x4_sve, 8),
+  VarianceParams(2, 4, &aom_highbd_8_variance4x16_sve, 8),
+#endif
+};
+
+INSTANTIATE_TEST_SUITE_P(SVE, AvxHBDVarianceTest,
+                         ::testing::ValuesIn(kArrayHBDVariance_sve));
+
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // HAVE_SVE
+
+}  // namespace
diff --git a/third_party/aom/test/video_source.h b/third_party/aom/test/video_source.h
new file mode 100644
index 0000000000..9d73d7b253
--- /dev/null
+++ b/third_party/aom/test/video_source.h
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef AOM_TEST_VIDEO_SOURCE_H_
+#define AOM_TEST_VIDEO_SOURCE_H_
+
+#if defined(_WIN32)
+#undef NOMINMAX
+#define NOMINMAX
+#undef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#include <string>
+
+#include "aom/aom_encoder.h"
+#include "test/acm_random.h"
+#if !defined(_WIN32)
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#endif
+
+namespace libaom_test {
+
+// Helper macros to ensure LIBAOM_TEST_DATA_PATH is a quoted string.
+// These are undefined right below GetDataPath
+// NOTE: LIBAOM_TEST_DATA_PATH MUST NOT be a quoted string before
+// Stringification or the GetDataPath will fail at runtime
+#define TO_STRING(S) #S
+#define STRINGIFY(S) TO_STRING(S)
+
+// A simple function to encapsulate cross platform retrieval of test data path
+static std::string GetDataPath() {
+  const char *const data_path = getenv("LIBAOM_TEST_DATA_PATH");
+  if (data_path == nullptr) {
+#ifdef LIBAOM_TEST_DATA_PATH
+    // In some environments, we cannot set environment variables
+    // Instead, we set the data path by using a preprocessor symbol
+    // which can be set from make files
+    return STRINGIFY(LIBAOM_TEST_DATA_PATH);
+#else
+    return ".";
+#endif
+  }
+  return data_path;
+}
+
+// Undefining stringification macros because they are not used elsewhere
+#undef TO_STRING
+#undef STRINGIFY
+
+inline FILE *OpenTestDataFile(const std::string &file_name) {
+  const std::string path_to_source = GetDataPath() + "/" + file_name;
+  return fopen(path_to_source.c_str(), "rb");
+}
+
+static FILE *GetTempOutFile(std::string *file_name) {
+  file_name->clear();
+#if defined(_WIN32)
+  char fname[MAX_PATH];
+  char tmppath[MAX_PATH];
+  if (GetTempPathA(MAX_PATH, tmppath)) {
+    // Assume for now that the filename generated is unique per process
+    if (GetTempFileNameA(tmppath, "lvx", 0, fname)) {
+      file_name->assign(fname);
+      return fopen(fname, "wb+");
+    }
+  }
+  return nullptr;
+#else
+  std::string temp_dir = testing::TempDir();
+  if (temp_dir.empty()) return nullptr;
+  // Versions of testing::TempDir() prior to release-1.11.0-214-g5e6a5336 may
+  // use the value of an environment variable without checking for a trailing
+  // path delimiter.
+  if (temp_dir[temp_dir.size() - 1] != '/') temp_dir += '/';
+  const char name_template[] = "libaomtest.XXXXXX";
+  std::unique_ptr<char[]> temp_file_name(
+      new char[temp_dir.size() + sizeof(name_template)]);
+  if (temp_file_name == nullptr) return nullptr;
+  memcpy(temp_file_name.get(), temp_dir.data(), temp_dir.size());
+  memcpy(temp_file_name.get() + temp_dir.size(), name_template,
+         sizeof(name_template));
+  const int fd = mkstemp(temp_file_name.get());
+  if (fd == -1) return nullptr;
+  *file_name = temp_file_name.get();
+  return fdopen(fd, "wb+");
+#endif
+}
+
+class TempOutFile {
+ public:
+  TempOutFile() { file_ = GetTempOutFile(&file_name_); }
+  ~TempOutFile() {
+    CloseFile();
+    if (!file_name_.empty()) {
+      EXPECT_EQ(0, remove(file_name_.c_str()));
+    }
+  }
+  FILE *file() { return file_; }
+  const std::string &file_name() { return file_name_; }
+
+ protected:
+  void CloseFile() {
+    if (file_) {
+      fclose(file_);
+      file_ = nullptr;
+    }
+  }
+  FILE *file_;
+  std::string file_name_;
+};
+
+// Abstract base class for test video sources, which provide a stream of
+// aom_image_t images with associated timestamps and duration.
+class VideoSource {
+ public:
+  virtual ~VideoSource() = default;
+
+  // Prepare the stream for reading, rewind/open as necessary.
+  virtual void Begin() = 0;
+
+  // Advance the cursor to the next frame
+  virtual void Next() = 0;
+
+  // Get the current video frame, or nullptr on End-Of-Stream.
+  virtual aom_image_t *img() const = 0;
+
+  // Get the presentation timestamp of the current frame.
+  virtual aom_codec_pts_t pts() const = 0;
+
+  // Get the current frame's duration
+  virtual unsigned long duration() const = 0;
+
+  // Get the timebase for the stream
+  virtual aom_rational_t timebase() const = 0;
+
+  // Get the current frame counter, starting at 0.
+  virtual unsigned int frame() const = 0;
+
+  // Get the current file limit.
+  virtual unsigned int limit() const = 0;
+};
+
+class DummyVideoSource : public VideoSource {
+ public:
+  DummyVideoSource()
+      : img_(nullptr), limit_(100), width_(80), height_(64),
+        format_(AOM_IMG_FMT_I420) {
+    ReallocImage();
+  }
+
+  ~DummyVideoSource() override { aom_img_free(img_); }
+
+  void Begin() override {
+    frame_ = 0;
+    FillFrame();
+  }
+
+  void Next() override {
+    ++frame_;
+    FillFrame();
+  }
+
+  aom_image_t *img() const override {
+    return (frame_ < limit_) ? img_ : nullptr;
+  }
+
+  // Models a stream where Timebase = 1/FPS, so pts == frame.
+  aom_codec_pts_t pts() const override { return frame_; }
+
+  unsigned long duration() const override { return 1; }
+
+  aom_rational_t timebase() const override {
+    const aom_rational_t t = { 1, 30 };
+    return t;
+  }
+
+  unsigned int frame() const override { return frame_; }
+
+  unsigned int limit() const override { return limit_; }
+
+  void set_limit(unsigned int limit) { limit_ = limit; }
+
+  void SetSize(unsigned int width, unsigned int height) {
+    if (width != width_ || height != height_) {
+      width_ = width;
+      height_ = height;
+      ReallocImage();
+    }
+  }
+
+  void SetImageFormat(aom_img_fmt_t format) {
+    if (format_ != format) {
+      format_ = format;
+      ReallocImage();
+    }
+  }
+
+ protected:
+  virtual void FillFrame() {
+    if (img_) memset(img_->img_data, 0, raw_sz_);
+  }
+
+  void ReallocImage() {
+    aom_img_free(img_);
+    img_ = aom_img_alloc(nullptr, format_, width_, height_, 32);
+    ASSERT_NE(img_, nullptr);
+    raw_sz_ = ((img_->w + 31) & ~31u) * img_->h * img_->bps / 8;
+  }
+
+  aom_image_t *img_;
+  size_t raw_sz_;
+  unsigned int limit_;
+  unsigned int frame_;
+  unsigned int width_;
+  unsigned int height_;
+  aom_img_fmt_t format_;
+};
+
+class RandomVideoSource : public DummyVideoSource {
+ public:
+  RandomVideoSource(int seed = ACMRandom::DeterministicSeed())
+      : rnd_(seed), seed_(seed) {}
+
+  // Reset the RNG to get a matching stream for the second pass
+  void Begin() override {
+    frame_ = 0;
+    rnd_.Reset(seed_);
+    FillFrame();
+  }
+
+ protected:
+  // 15 frames of noise, followed by 15 static frames. Reset to 0 rather
+  // than holding previous frames to encourage keyframes to be thrown.
+  void FillFrame() override {
+    if (img_) {
+      if (frame_ % 30 < 15)
+        for (size_t i = 0; i < raw_sz_; ++i) img_->img_data[i] = rnd_.Rand8();
+      else
+        memset(img_->img_data, 0, raw_sz_);
+    }
+  }
+
+  ACMRandom rnd_;
+  int seed_;
+};
+
+// Abstract base class for test video sources, which provide a stream of
+// decompressed images to the decoder.
+class CompressedVideoSource {
+ public:
+  virtual ~CompressedVideoSource() = default;
+
+  virtual void Init() = 0;
+
+  // Prepare the stream for reading, rewind/open as necessary.
+  virtual void Begin() = 0;
+
+  // Advance the cursor to the next frame
+  virtual void Next() = 0;
+
+  virtual const uint8_t *cxdata() const = 0;
+
+  virtual size_t frame_size() const = 0;
+
+  virtual unsigned int frame_number() const = 0;
+};
+
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/visual_metrics.py b/third_party/aom/test/visual_metrics.py
new file mode 100755
index 0000000000..9055feb334
--- /dev/null
+++ b/third_party/aom/test/visual_metrics.py
@@ -0,0 +1,466 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2016, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and
+# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+# was not distributed with this source code in the LICENSE file, you can
+# obtain it at www.aomedia.org/license/software. If the Alliance for Open
+# Media Patent License 1.0 was not distributed with this source code in the
+# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+#
+
+"""Converts video encoding result data from text files to visualization
+data source."""
+
+__author__ = "jzern@google.com (James Zern),"
+__author__ += "jimbankoski@google.com (Jim Bankoski)"
+
+import fnmatch
+import numpy as np
+import scipy as sp
+import scipy.interpolate
+import os
+import re
+import string
+import sys
+import math
+import warnings
+
+import gviz_api
+
+from os.path import basename
+from os.path import splitext
+
+warnings.simplefilter('ignore', np.RankWarning)
+warnings.simplefilter('ignore', RuntimeWarning)
+
+def bdsnr2(metric_set1, metric_set2):
+  """
+  BJONTEGAARD    Bjontegaard metric calculation adapted
+  Bjontegaard's snr metric allows to compute the average % saving in decibels
+  between two rate-distortion curves [1].  This is an adaptation of that
+  method that fixes inconsistencies when the curve fit operation goes awry
+  by replacing the curve fit function with a Piecewise Cubic Hermite
+  Interpolating Polynomial and then integrating that by evaluating that
+  function at small intervals using the trapezoid method to calculate
+  the integral.
+
+  metric_set1 - list of tuples ( bitrate,  metric ) for first graph
+  metric_set2 - list of tuples ( bitrate,  metric ) for second graph
+  """
+
+  if not metric_set1 or not metric_set2:
+    return 0.0
+
+  try:
+
+    # pchip_interlopate requires keys sorted by x axis. x-axis will
+    # be our metric not the bitrate so sort by metric.
+    metric_set1.sort()
+    metric_set2.sort()
+
+    # Pull the log of the rate and clamped psnr from metric_sets.
+    log_rate1 = [math.log(x[0]) for x in metric_set1]
+    metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1]
+    log_rate2 = [math.log(x[0]) for x in metric_set2]
+    metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2]
+
+    # Integration interval.  This metric only works on the area that's
+    # overlapping.   Extrapolation of these things is sketchy so we avoid.
+    min_int = max([min(log_rate1), min(log_rate2)])
+    max_int = min([max(log_rate1), max(log_rate2)])
+
+    # No overlap means no sensible metric possible.
+    if max_int <= min_int:
+      return 0.0
+
+    # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to
+    # create 100 new samples points separated by interval.
+    lin = np.linspace(min_int, max_int, num=100, retstep=True)
+    interval = lin[1]
+    samples = lin[0]
+    v1 = scipy.interpolate.pchip_interpolate(log_rate1, metric1, samples)
+    v2 = scipy.interpolate.pchip_interpolate(log_rate2, metric2, samples)
+
+    # Calculate the integral using the trapezoid method on the samples.
+    int_v1 = np.trapz(v1, dx=interval)
+    int_v2 = np.trapz(v2, dx=interval)
+
+    # Calculate the average improvement.
+    avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int)
+
+  except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e:
+    return 0
+
+  return avg_exp_diff
+
+def bdrate2(metric_set1, metric_set2):
+  """
+  BJONTEGAARD    Bjontegaard metric calculation adapted
+  Bjontegaard's metric allows to compute the average % saving in bitrate
+  between two rate-distortion curves [1].  This is an adaptation of that
+  method that fixes inconsistencies when the curve fit operation goes awry
+  by replacing the curve fit function with a Piecewise Cubic Hermite
+  Interpolating Polynomial and then integrating that by evaluating that
+  function at small intervals using the trapezoid method to calculate
+  the integral.
+
+  metric_set1 - list of tuples ( bitrate,  metric ) for first graph
+  metric_set2 - list of tuples ( bitrate,  metric ) for second graph
+  """
+
+  if not metric_set1 or not metric_set2:
+    return 0.0
+
+  try:
+
+    # pchip_interlopate requires keys sorted by x axis. x-axis will
+    # be our metric not the bitrate so sort by metric.
+    metric_set1.sort(key=lambda tup: tup[1])
+    metric_set2.sort(key=lambda tup: tup[1])
+
+    # Pull the log of the rate and clamped psnr from metric_sets.
+    log_rate1 = [math.log(x[0]) for x in metric_set1]
+    metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1]
+    log_rate2 = [math.log(x[0]) for x in metric_set2]
+    metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2]
+
+    # Integration interval.  This metric only works on the area that's
+    # overlapping.   Extrapolation of these things is sketchy so we avoid.
+    min_int = max([min(metric1), min(metric2)])
+    max_int = min([max(metric1), max(metric2)])
+
+    # No overlap means no sensible metric possible.
+    if max_int <= min_int:
+      return 0.0
+
+    # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to
+    # create 100 new samples points separated by interval.
+    lin = np.linspace(min_int, max_int, num=100, retstep=True)
+    interval = lin[1]
+    samples = lin[0]
+    v1 = scipy.interpolate.pchip_interpolate(metric1, log_rate1, samples)
+    v2 = scipy.interpolate.pchip_interpolate(metric2, log_rate2, samples)
+
+    # Calculate the integral using the trapezoid method on the samples.
+    int_v1 = np.trapz(v1, dx=interval)
+    int_v2 = np.trapz(v2, dx=interval)
+
+    # Calculate the average improvement.
+    avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int)
+
+  except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e:
+    return 0
+
+  # Convert to a percentage.
+  avg_diff = (math.exp(avg_exp_diff) - 1) * 100
+
+  return avg_diff
+
+
+
+def FillForm(string_for_substitution, dictionary_of_vars):
+  """
+  This function substitutes all matches of the command string //%% ... %%//
+  with the variable represented by ...  .
+  """
+  return_string = string_for_substitution
+  for i in re.findall("//%%(.*)%%//", string_for_substitution):
+    return_string = re.sub("//%%" + i + "%%//", dictionary_of_vars[i],
+                           return_string)
+  return return_string
+
+
+def HasMetrics(line):
+  """
+  The metrics files produced by aomenc are started with a B for headers.
+  """
+  # If the first char of the first word on the line is a digit
+  if len(line) == 0:
+    return False
+  if len(line.split()) == 0:
+    return False
+  if line.split()[0][0:1].isdigit():
+    return True
+  return False
+
+def GetMetrics(file_name):
+  metric_file = open(file_name, "r")
+  return metric_file.readline().split();
+
+def ParseMetricFile(file_name, metric_column):
+  metric_set1 = set([])
+  metric_file = open(file_name, "r")
+  for line in metric_file:
+    metrics = string.split(line)
+    if HasMetrics(line):
+      if metric_column < len(metrics):
+        try:
+          tuple = float(metrics[0]), float(metrics[metric_column])
+        except:
+          tuple = float(metrics[0]), 0
+      else:
+        tuple = float(metrics[0]), 0
+      metric_set1.add(tuple)
+  metric_set1_sorted = sorted(metric_set1)
+  return metric_set1_sorted
+
+
+def FileBetter(file_name_1, file_name_2, metric_column, method):
+  """
+  Compares two data files and determines which is better and by how
+  much. Also produces a histogram of how much better, by PSNR.
+  metric_column is the metric.
+  """
+  # Store and parse our two files into lists of unique tuples.
+
+  # Read the two files, parsing out lines starting with bitrate.
+  metric_set1_sorted = ParseMetricFile(file_name_1, metric_column)
+  metric_set2_sorted = ParseMetricFile(file_name_2, metric_column)
+
+
+  def GraphBetter(metric_set1_sorted, metric_set2_sorted, base_is_set_2):
+    """
+    Search through the sorted metric file for metrics on either side of
+    the metric from file 1.  Since both lists are sorted we really
+    should not have to search through the entire range, but these
+    are small files."""
+    total_bitrate_difference_ratio = 0.0
+    count = 0
+    for bitrate, metric in metric_set1_sorted:
+      if bitrate == 0:
+        continue
+      for i in range(len(metric_set2_sorted) - 1):
+        s2_bitrate_0, s2_metric_0 = metric_set2_sorted[i]
+        s2_bitrate_1, s2_metric_1 = metric_set2_sorted[i + 1]
+        # We have a point on either side of our metric range.
+        if metric > s2_metric_0 and metric <= s2_metric_1:
+
+          # Calculate a slope.
+          if s2_metric_1 - s2_metric_0 != 0:
+            metric_slope = ((s2_bitrate_1 - s2_bitrate_0) /
+                            (s2_metric_1 - s2_metric_0))
+          else:
+            metric_slope = 0
+
+          estimated_s2_bitrate = (s2_bitrate_0 + (metric - s2_metric_0) *
+                                  metric_slope)
+
+          if estimated_s2_bitrate == 0:
+            continue
+          # Calculate percentage difference as given by base.
+          if base_is_set_2 == 0:
+            bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) /
+                                        bitrate)
+          else:
+            bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) /
+                                        estimated_s2_bitrate)
+
+          total_bitrate_difference_ratio += bitrate_difference_ratio
+          count += 1
+          break
+
+    # Calculate the average improvement between graphs.
+    if count != 0:
+      avg = total_bitrate_difference_ratio / count
+
+    else:
+      avg = 0.0
+
+    return avg
+
+  # Be fair to both graphs by testing all the points in each.
+  if method == 'avg':
+    avg_improvement = 50 * (
+                       GraphBetter(metric_set1_sorted, metric_set2_sorted, 1) -
+                       GraphBetter(metric_set2_sorted, metric_set1_sorted, 0))
+  elif method == 'dsnr':
+      avg_improvement = bdsnr2(metric_set1_sorted, metric_set2_sorted)
+  else:
+      avg_improvement = bdrate2(metric_set2_sorted, metric_set1_sorted)
+
+  return avg_improvement
+
+
+def HandleFiles(variables):
+  """
+  This script creates html for displaying metric data produced from data
+  in a video stats file,  as created by the AOM project when enable_psnr
+  is turned on:
+
+  Usage: visual_metrics.py template.html pattern base_dir sub_dir [ sub_dir2 ..]
+
+  The script parses each metrics file [see below] that matches the
+  statfile_pattern  in the baseline directory and looks for the file that
+  matches that same file in each of the sub_dirs, and compares the resultant
+  metrics bitrate, avg psnr, glb psnr, and ssim. "
+
+  It provides a table in which each row is a file in the line directory,
+  and a column for each subdir, with the cells representing how that clip
+  compares to baseline for that subdir.   A graph is given for each which
+  compares filesize to that metric.  If you click on a point in the graph it
+  zooms in on that point.
+
+  a SAMPLE metrics file:
+
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+   25.911   38.242   38.104   38.258   38.121   75.790    14103
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+   49.982   41.264   41.129   41.255   41.122   83.993    19817
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+   74.967   42.911   42.767   42.899   42.756   87.928    17332
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  100.012   43.983   43.838   43.881   43.738   89.695    25389
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  149.980   45.338   45.203   45.184   45.043   91.591    25438
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  199.852   46.225   46.123   46.113   45.999   92.679    28302
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  249.922   46.864   46.773   46.777   46.673   93.334    27244
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  299.998   47.366   47.281   47.317   47.220   93.844    27137
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  349.769   47.746   47.677   47.722   47.648   94.178    32226
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  399.773   48.032   47.971   48.013   47.946   94.362    36203
+
+  sample use:
+  visual_metrics.py template.html "*stt" aom aom_b aom_c > metrics.html
+  """
+
+  # The template file is the html file into which we will write the
+  # data from the stats file, formatted correctly for the gviz_api.
+  template_file = open(variables[1], "r")
+  page_template = template_file.read()
+  template_file.close()
+
+  # This is the path match pattern for finding stats files amongst
+  # all the other files it could be.  eg: *.stt
+  file_pattern = variables[2]
+
+  # This is the directory with files that we will use to do the comparison
+  # against.
+  baseline_dir = variables[3]
+  snrs = ''
+  filestable = {}
+
+  filestable['dsnr'] = ''
+  filestable['drate'] = ''
+  filestable['avg'] = ''
+
+  # Dirs is directories after the baseline to compare to the base.
+  dirs = variables[4:len(variables)]
+
+  # Find the metric files in the baseline directory.
+  dir_list = sorted(fnmatch.filter(os.listdir(baseline_dir), file_pattern))
+
+  metrics = GetMetrics(baseline_dir + "/" + dir_list[0])
+
+  metrics_js = 'metrics = ["' + '", "'.join(metrics) + '"];'
+
+  for column in range(1, len(metrics)):
+
+    for metric in ['avg','dsnr','drate']:
+      description = {"file": ("string", "File")}
+
+      # Go through each directory and add a column header to our description.
+      countoverall = {}
+      sumoverall = {}
+
+      for directory in dirs:
+        description[directory] = ("number", directory)
+        countoverall[directory] = 0
+        sumoverall[directory] = 0
+
+      # Data holds the data for the visualization, name given comes from
+      # gviz_api sample code.
+      data = []
+      for filename in dir_list:
+        row = {'file': splitext(basename(filename))[0] }
+        baseline_file_name = baseline_dir + "/" + filename
+
+        # Read the metric file from each of the directories in our list.
+        for directory in dirs:
+          metric_file_name = directory + "/" + filename
+
+          # If there is a metric file in the current directory, open it
+          # and calculate its overall difference between it and the baseline
+          # directory's metric file.
+          if os.path.isfile(metric_file_name):
+            overall = FileBetter(baseline_file_name, metric_file_name,
+                                 column, metric)
+            row[directory] = overall
+
+            sumoverall[directory] += overall
+            countoverall[directory] += 1
+
+        data.append(row)
+
+      # Add the overall numbers.
+      row = {"file": "OVERALL" }
+      for directory in dirs:
+        row[directory] = sumoverall[directory] / countoverall[directory]
+      data.append(row)
+
+      # write the tables out
+      data_table = gviz_api.DataTable(description)
+      data_table.LoadData(data)
+
+      filestable[metric] = ( filestable[metric] + "filestable_" + metric +
+                             "[" + str(column) + "]=" +
+                             data_table.ToJSon(columns_order=["file"]+dirs) + "\n" )
+
+    filestable_avg = filestable['avg']
+    filestable_dpsnr = filestable['dsnr']
+    filestable_drate = filestable['drate']
+
+    # Now we collect all the data for all the graphs.  First the column
+    # headers which will be Datarate and then each directory.
+    columns = ("datarate",baseline_dir)
+    description = {"datarate":("number", "Datarate")}
+    for directory in dirs:
+      description[directory] = ("number", directory)
+
+    description[baseline_dir] = ("number", baseline_dir)
+
+    snrs = snrs + "snrs[" + str(column) + "] = ["
+
+    # Now collect the data for the graphs, file by file.
+    for filename in dir_list:
+
+      data = []
+
+      # Collect the file in each directory and store all of its metrics
+      # in the associated gviz metrics table.
+      all_dirs = dirs + [baseline_dir]
+      for directory in all_dirs:
+
+        metric_file_name = directory + "/" + filename
+        if not os.path.isfile(metric_file_name):
+          continue
+
+        # Read and parse the metrics file storing it to the data we'll
+        # use for the gviz_api.Datatable.
+        metrics = ParseMetricFile(metric_file_name, column)
+        for bitrate, metric in metrics:
+          data.append({"datarate": bitrate, directory: metric})
+
+      data_table = gviz_api.DataTable(description)
+      data_table.LoadData(data)
+      snrs = snrs + "'" + data_table.ToJSon(
+         columns_order=tuple(["datarate",baseline_dir]+dirs)) + "',"
+
+    snrs = snrs + "]\n"
+
+    formatters = ""
+    for i in range(len(dirs)):
+      formatters = "%s   formatter.format(better, %d);" % (formatters, i+1)
+
+  print FillForm(page_template, vars())
+  return
+
+if len(sys.argv) < 3:
+  print HandleFiles.__doc__
+else:
+  HandleFiles(sys.argv)
diff --git a/third_party/aom/test/warp_filter_test.cc b/third_party/aom/test/warp_filter_test.cc
new file mode 100644
index 0000000000..f0be7d226b
--- /dev/null
+++ b/third_party/aom/test/warp_filter_test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/warp_filter_test_util.h"
+using libaom_test::ACMRandom;
+#if CONFIG_AV1_HIGHBITDEPTH
+using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest;
+#endif
+using libaom_test::AV1WarpFilter::AV1WarpFilterTest;
+using std::make_tuple;
+using std::tuple;
+
+namespace {
+
+TEST_P(AV1WarpFilterTest, CheckOutput) {
+  RunCheckOutput(std::get<3>(GET_PARAM(0)));
+}
+TEST_P(AV1WarpFilterTest, DISABLED_Speed) {
+  RunSpeedTest(std::get<3>(GET_PARAM(0)));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    C, AV1WarpFilterTest,
+    libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_c));
+
+#if CONFIG_AV1_HIGHBITDEPTH && (HAVE_SSE4_1 || HAVE_NEON)
+TEST_P(AV1HighbdWarpFilterTest, CheckOutput) {
+  RunCheckOutput(std::get<4>(GET_PARAM(0)));
+}
+TEST_P(AV1HighbdWarpFilterTest, DISABLED_Speed) {
+  RunSpeedTest(std::get<4>(GET_PARAM(0)));
+}
+#endif  // CONFIG_AV1_HIGHBITDEPTH && (HAVE_SSE4_1 || HAVE_NEON)
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(
+    SSE4_1, AV1WarpFilterTest,
+    libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_sse4_1));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdWarpFilterTest,
+                         libaom_test::AV1HighbdWarpFilter::BuildParams(
+                             av1_highbd_warp_affine_sse4_1));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1WarpFilterTest,
+    libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_avx2));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AV1HighbdWarpFilterTest,
+    libaom_test::AV1HighbdWarpFilter::BuildParams(av1_highbd_warp_affine_avx2));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1WarpFilterTest,
+    libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_neon));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(
+    NEON, AV1HighbdWarpFilterTest,
+    libaom_test::AV1HighbdWarpFilter::BuildParams(av1_highbd_warp_affine_neon));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // HAVE_NEON
+
+#if HAVE_NEON_I8MM
+INSTANTIATE_TEST_SUITE_P(
+    NEON_I8MM, AV1WarpFilterTest,
+    libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_neon_i8mm));
+#endif  // HAVE_NEON_I8MM
+
+#if HAVE_SVE
+INSTANTIATE_TEST_SUITE_P(
+    SVE, AV1WarpFilterTest,
+    libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_sve));
+#endif  // HAVE_SVE
+
+}  // namespace
diff --git a/third_party/aom/test/warp_filter_test_util.cc b/third_party/aom/test/warp_filter_test_util.cc
new file mode 100644
index 0000000000..470c980777
--- /dev/null
+++ b/third_party/aom/test/warp_filter_test_util.cc
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <memory>
+#include <new>
+
+#include "aom_ports/aom_timer.h"
+#include "test/warp_filter_test_util.h"
+
+using std::make_tuple;
+using std::tuple;
+
+namespace libaom_test {
+
+int32_t random_warped_param(libaom_test::ACMRandom *rnd, int bits,
+                            int rnd_gen_zeros) {
+  // Avoid accidentally generating a zero in speed tests, they are set by the
+  // is_*_zero parameters instead.
+  if (rnd_gen_zeros) {
+    // 1 in 8 chance of generating zero (arbitrarily chosen)
+    if (((rnd->Rand8()) & 7) == 0) return 0;
+  }
+  // Otherwise, enerate uniform values in the range
+  // [-(1 << bits), 1] U [1, 1<<bits]
+  int32_t v = 1 + (rnd->Rand16() & ((1 << bits) - 1));
+  if ((rnd->Rand8()) & 1) return -v;
+  return v;
+}
+
+void generate_warped_model(libaom_test::ACMRandom *rnd, int32_t *mat,
+                           int16_t *alpha, int16_t *beta, int16_t *gamma,
+                           int16_t *delta, const int is_alpha_zero,
+                           const int is_beta_zero, const int is_gamma_zero,
+                           const int is_delta_zero, const int rnd_gen_zeros) {
+  while (true) {
+    int rnd8 = rnd->Rand8() & 3;
+    mat[0] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6, rnd_gen_zeros);
+    mat[1] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6, rnd_gen_zeros);
+    mat[2] =
+        (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros)) +
+        (1 << WARPEDMODEL_PREC_BITS);
+    mat[3] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros);
+
+    if (rnd8 <= 1) {
+      // AFFINE
+      mat[4] =
+          random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros);
+      mat[5] =
+          (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros)) +
+          (1 << WARPEDMODEL_PREC_BITS);
+    } else if (rnd8 == 2) {
+      mat[4] = -mat[3];
+      mat[5] = mat[2];
+    } else {
+      mat[4] =
+          random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros);
+      mat[5] =
+          (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros)) +
+          (1 << WARPEDMODEL_PREC_BITS);
+    }
+
+    if (is_alpha_zero == 1) {
+      mat[2] = 1 << WARPEDMODEL_PREC_BITS;
+    }
+    if (is_beta_zero == 1) {
+      mat[3] = 0;
+    }
+    if (is_gamma_zero == 1) {
+      mat[4] = 0;
+    }
+    if (is_delta_zero == 1) {
+      mat[5] = static_cast<int32_t>(
+          ((static_cast<int64_t>(mat[3]) * mat[4] + (mat[2] / 2)) / mat[2]) +
+          (1 << WARPEDMODEL_PREC_BITS));
+    }
+
+    // Calculate the derived parameters and check that they are suitable
+    // for the warp filter.
+    assert(mat[2] != 0);
+
+    *alpha = clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
+    *beta = clamp(mat[3], INT16_MIN, INT16_MAX);
+    *gamma = static_cast<int16_t>(clamp64(
+        (static_cast<int64_t>(mat[4]) * (1 << WARPEDMODEL_PREC_BITS)) / mat[2],
+        INT16_MIN, INT16_MAX));
+    *delta = static_cast<int16_t>(clamp64(
+        mat[5] -
+            ((static_cast<int64_t>(mat[3]) * mat[4] + (mat[2] / 2)) / mat[2]) -
+            (1 << WARPEDMODEL_PREC_BITS),
+        INT16_MIN, INT16_MAX));
+
+    if ((4 * abs(*alpha) + 7 * abs(*beta) >= (1 << WARPEDMODEL_PREC_BITS)) ||
+        (4 * abs(*gamma) + 4 * abs(*delta) >= (1 << WARPEDMODEL_PREC_BITS)))
+      continue;
+
+    *alpha = ROUND_POWER_OF_TWO_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS) *
+             (1 << WARP_PARAM_REDUCE_BITS);
+    *beta = ROUND_POWER_OF_TWO_SIGNED(*beta, WARP_PARAM_REDUCE_BITS) *
+            (1 << WARP_PARAM_REDUCE_BITS);
+    *gamma = ROUND_POWER_OF_TWO_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS) *
+             (1 << WARP_PARAM_REDUCE_BITS);
+    *delta = ROUND_POWER_OF_TWO_SIGNED(*delta, WARP_PARAM_REDUCE_BITS) *
+             (1 << WARP_PARAM_REDUCE_BITS);
+
+    // We have a valid model, so finish
+    return;
+  }
+}
+
+namespace AV1WarpFilter {
+::testing::internal::ParamGenerator<WarpTestParams> BuildParams(
+    warp_affine_func filter) {
+  WarpTestParam params[] = {
+    make_tuple(4, 4, 5000, filter),  make_tuple(8, 8, 5000, filter),
+    make_tuple(64, 64, 100, filter), make_tuple(4, 16, 2000, filter),
+    make_tuple(32, 8, 1000, filter),
+  };
+  return ::testing::Combine(::testing::ValuesIn(params),
+                            ::testing::Values(0, 1), ::testing::Values(0, 1),
+                            ::testing::Values(0, 1), ::testing::Values(0, 1));
+}
+
+AV1WarpFilterTest::~AV1WarpFilterTest() = default;
+void AV1WarpFilterTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+
+void AV1WarpFilterTest::RunSpeedTest(warp_affine_func test_impl) {
+  const int w = 128, h = 128;
+  const int border = 16;
+  const int stride = w + 2 * border;
+  WarpTestParam params = GET_PARAM(0);
+  const int out_w = std::get<0>(params), out_h = std::get<1>(params);
+  const int is_alpha_zero = GET_PARAM(1);
+  const int is_beta_zero = GET_PARAM(2);
+  const int is_gamma_zero = GET_PARAM(3);
+  const int is_delta_zero = GET_PARAM(4);
+  int sub_x, sub_y;
+  const int bd = 8;
+
+  std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * stride]);
+  ASSERT_NE(input_, nullptr);
+  uint8_t *input = input_.get() + border;
+
+  // The warp functions always write rows with widths that are multiples of 8.
+  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
+  int output_n = ((out_w + 7) & ~7) * out_h;
+  std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
+  ASSERT_NE(output, nullptr);
+  int32_t mat[8];
+  int16_t alpha, beta, gamma, delta;
+  ConvolveParams conv_params = get_conv_params(0, 0, bd);
+  std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow)
+                                            CONV_BUF_TYPE[output_n]);
+  ASSERT_NE(dsta, nullptr);
+  generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
+                        is_alpha_zero, is_beta_zero, is_gamma_zero,
+                        is_delta_zero, 0);
+
+  for (int r = 0; r < h; ++r)
+    for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8();
+  for (int r = 0; r < h; ++r) {
+    memset(input + r * stride - border, input[r * stride], border);
+    memset(input + r * stride + w, input[r * stride + (w - 1)], border);
+  }
+
+  sub_x = 0;
+  sub_y = 0;
+  int do_average = 0;
+
+  conv_params =
+      get_conv_params_no_round(do_average, 0, dsta.get(), out_w, 1, bd);
+  conv_params.use_dist_wtd_comp_avg = 0;
+
+  const int num_loops = 1000000000 / (out_w + out_h);
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < num_loops; ++i)
+    test_impl(mat, input, w, h, stride, output.get(), 32, 32, out_w, out_h,
+              out_w, sub_x, sub_y, &conv_params, alpha, beta, gamma, delta);
+
+  aom_usec_timer_mark(&timer);
+  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+  printf("warp %3dx%-3d alpha=%d beta=%d gamma=%d delta=%d: %7.2f ns \n", out_w,
+         out_h, alpha, beta, gamma, delta, 1000.0 * elapsed_time / num_loops);
+}
+
+void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
+  const int w = 128, h = 128;
+  const int border = 16;
+  const int stride = w + 2 * border;
+  WarpTestParam params = GET_PARAM(0);
+  const int is_alpha_zero = GET_PARAM(1);
+  const int is_beta_zero = GET_PARAM(2);
+  const int is_gamma_zero = GET_PARAM(3);
+  const int is_delta_zero = GET_PARAM(4);
+  const int out_w = std::get<0>(params), out_h = std::get<1>(params);
+  const int num_iters = std::get<2>(params);
+  const int bd = 8;
+
+  // The warp functions always write rows with widths that are multiples of 8.
+  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
+  int output_n = ((out_w + 7) & ~7) * out_h;
+  std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * stride]);
+  ASSERT_NE(input_, nullptr);
+  uint8_t *input = input_.get() + border;
+  std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
+  ASSERT_NE(output, nullptr);
+  std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
+  ASSERT_NE(output2, nullptr);
+  int32_t mat[8];
+  int16_t alpha, beta, gamma, delta;
+  ConvolveParams conv_params = get_conv_params(0, 0, bd);
+  std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow)
+                                            CONV_BUF_TYPE[output_n]);
+  ASSERT_NE(dsta, nullptr);
+  std::unique_ptr<CONV_BUF_TYPE[]> dstb(new (std::nothrow)
+                                            CONV_BUF_TYPE[output_n]);
+  ASSERT_NE(dstb, nullptr);
+  for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand8();
+
+  for (int i = 0; i < num_iters; ++i) {
+    // Generate an input block and extend its borders horizontally
+    for (int r = 0; r < h; ++r)
+      for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8();
+    for (int r = 0; r < h; ++r) {
+      memset(input + r * stride - border, input[r * stride], border);
+      memset(input + r * stride + w, input[r * stride + (w - 1)], border);
+    }
+    const int use_no_round = rnd_.Rand8() & 1;
+    for (int sub_x = 0; sub_x < 2; ++sub_x)
+      for (int sub_y = 0; sub_y < 2; ++sub_y) {
+        generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
+                              is_alpha_zero, is_beta_zero, is_gamma_zero,
+                              is_delta_zero, 1);
+
+        for (int ii = 0; ii < 2; ++ii) {
+          for (int jj = 0; jj < 5; ++jj) {
+            for (int do_average = 0; do_average <= 1; ++do_average) {
+              if (use_no_round) {
+                conv_params = get_conv_params_no_round(
+                    do_average, 0, dsta.get(), out_w, 1, bd);
+              } else {
+                conv_params = get_conv_params(0, 0, bd);
+              }
+              if (jj >= 4) {
+                conv_params.use_dist_wtd_comp_avg = 0;
+              } else {
+                conv_params.use_dist_wtd_comp_avg = 1;
+                conv_params.fwd_offset = quant_dist_lookup_table[jj][ii];
+                conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
+              }
+              av1_warp_affine_c(mat, input, w, h, stride, output.get(), 32, 32,
+                                out_w, out_h, out_w, sub_x, sub_y, &conv_params,
+                                alpha, beta, gamma, delta);
+              if (use_no_round) {
+                conv_params = get_conv_params_no_round(
+                    do_average, 0, dstb.get(), out_w, 1, bd);
+              }
+              if (jj >= 4) {
+                conv_params.use_dist_wtd_comp_avg = 0;
+              } else {
+                conv_params.use_dist_wtd_comp_avg = 1;
+                conv_params.fwd_offset = quant_dist_lookup_table[jj][ii];
+                conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
+              }
+              test_impl(mat, input, w, h, stride, output2.get(), 32, 32, out_w,
+                        out_h, out_w, sub_x, sub_y, &conv_params, alpha, beta,
+                        gamma, delta);
+              if (use_no_round) {
+                for (int j = 0; j < out_w * out_h; ++j)
+                  ASSERT_EQ(dsta[j], dstb[j])
+                      << "Pixel mismatch at index " << j << " = ("
+                      << (j % out_w) << ", " << (j / out_w) << ") on iteration "
+                      << i;
+                for (int j = 0; j < out_w * out_h; ++j)
+                  ASSERT_EQ(output[j], output2[j])
+                      << "Pixel mismatch at index " << j << " = ("
+                      << (j % out_w) << ", " << (j / out_w) << ") on iteration "
+                      << i;
+              } else {
+                for (int j = 0; j < out_w * out_h; ++j)
+                  ASSERT_EQ(output[j], output2[j])
+                      << "Pixel mismatch at index " << j << " = ("
+                      << (j % out_w) << ", " << (j / out_w) << ") on iteration "
+                      << i;
+              }
+            }
+          }
+        }
+      }
+  }
+}
+}  // namespace AV1WarpFilter
+
+#if CONFIG_AV1_HIGHBITDEPTH
+namespace AV1HighbdWarpFilter {
+::testing::internal::ParamGenerator<HighbdWarpTestParams> BuildParams(
+    highbd_warp_affine_func filter) {
+  const HighbdWarpTestParam params[] = {
+    make_tuple(4, 4, 100, 8, filter),    make_tuple(8, 8, 100, 8, filter),
+    make_tuple(64, 64, 100, 8, filter),  make_tuple(4, 16, 100, 8, filter),
+    make_tuple(32, 8, 100, 8, filter),   make_tuple(4, 4, 100, 10, filter),
+    make_tuple(8, 8, 100, 10, filter),   make_tuple(64, 64, 100, 10, filter),
+    make_tuple(4, 16, 100, 10, filter),  make_tuple(32, 8, 100, 10, filter),
+    make_tuple(4, 4, 100, 12, filter),   make_tuple(8, 8, 100, 12, filter),
+    make_tuple(64, 64, 100, 12, filter), make_tuple(4, 16, 100, 12, filter),
+    make_tuple(32, 8, 100, 12, filter),
+  };
+  return ::testing::Combine(::testing::ValuesIn(params),
+                            ::testing::Values(0, 1), ::testing::Values(0, 1),
+                            ::testing::Values(0, 1), ::testing::Values(0, 1));
+}
+
+AV1HighbdWarpFilterTest::~AV1HighbdWarpFilterTest() = default;
+void AV1HighbdWarpFilterTest::SetUp() {
+  rnd_.Reset(ACMRandom::DeterministicSeed());
+}
+
+void AV1HighbdWarpFilterTest::RunSpeedTest(highbd_warp_affine_func test_impl) {
+  const int w = 128, h = 128;
+  const int border = 16;
+  const int stride = w + 2 * border;
+  HighbdWarpTestParam param = GET_PARAM(0);
+  const int is_alpha_zero = GET_PARAM(1);
+  const int is_beta_zero = GET_PARAM(2);
+  const int is_gamma_zero = GET_PARAM(3);
+  const int is_delta_zero = GET_PARAM(4);
+  const int out_w = std::get<0>(param), out_h = std::get<1>(param);
+  const int bd = std::get<3>(param);
+  const int mask = (1 << bd) - 1;
+  int sub_x, sub_y;
+
+  // The warp functions always write rows with widths that are multiples of 8.
+  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
+  int output_n = ((out_w + 7) & ~7) * out_h;
+  std::unique_ptr<uint16_t[]> input_(new (std::nothrow) uint16_t[h * stride]);
+  ASSERT_NE(input_, nullptr);
+  uint16_t *input = input_.get() + border;
+  std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
+  ASSERT_NE(output, nullptr);
+  int32_t mat[8];
+  int16_t alpha, beta, gamma, delta;
+  ConvolveParams conv_params = get_conv_params(0, 0, bd);
+  std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow)
+                                            CONV_BUF_TYPE[output_n]);
+  ASSERT_NE(dsta, nullptr);
+
+  generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
+                        is_alpha_zero, is_beta_zero, is_gamma_zero,
+                        is_delta_zero, 0);
+  // Generate an input block and extend its borders horizontally
+  for (int r = 0; r < h; ++r)
+    for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask;
+  for (int r = 0; r < h; ++r) {
+    for (int c = 0; c < border; ++c) {
+      input[r * stride - border + c] = input[r * stride];
+      input[r * stride + w + c] = input[r * stride + (w - 1)];
+    }
+  }
+
+  sub_x = 0;
+  sub_y = 0;
+  int do_average = 0;
+  conv_params.use_dist_wtd_comp_avg = 0;
+  conv_params =
+      get_conv_params_no_round(do_average, 0, dsta.get(), out_w, 1, bd);
+
+  const int num_loops = 1000000000 / (out_w + out_h);
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+
+  for (int i = 0; i < num_loops; ++i)
+    test_impl(mat, input, w, h, stride, output.get(), 32, 32, out_w, out_h,
+              out_w, sub_x, sub_y, bd, &conv_params, alpha, beta, gamma, delta);
+
+  aom_usec_timer_mark(&timer);
+  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+  printf("highbd warp %3dx%-3d alpha=%d beta=%d gamma=%d delta=%d: %7.2f ns \n",
+         out_w, out_h, alpha, beta, gamma, delta,
+         1000.0 * elapsed_time / num_loops);
+}
+
+void AV1HighbdWarpFilterTest::RunCheckOutput(
+    highbd_warp_affine_func test_impl) {
+  const int w = 128, h = 128;
+  const int border = 16;
+  const int stride = w + 2 * border;
+  HighbdWarpTestParam param = GET_PARAM(0);
+  const int is_alpha_zero = GET_PARAM(1);
+  const int is_beta_zero = GET_PARAM(2);
+  const int is_gamma_zero = GET_PARAM(3);
+  const int is_delta_zero = GET_PARAM(4);
+  const int out_w = std::get<0>(param), out_h = std::get<1>(param);
+  const int bd = std::get<3>(param);
+  const int num_iters = std::get<2>(param);
+  const int mask = (1 << bd) - 1;
+
+  // The warp functions always write rows with widths that are multiples of 8.
+  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
+  int output_n = ((out_w + 7) & ~7) * out_h;
+  std::unique_ptr<uint16_t[]> input_(new (std::nothrow) uint16_t[h * stride]);
+  ASSERT_NE(input_, nullptr);
+  uint16_t *input = input_.get() + border;
+  std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
+  ASSERT_NE(output, nullptr);
+  std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
+  ASSERT_NE(output2, nullptr);
+  int32_t mat[8];
+  int16_t alpha, beta, gamma, delta;
+  ConvolveParams conv_params = get_conv_params(0, 0, bd);
+  std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow)
+                                            CONV_BUF_TYPE[output_n]);
+  ASSERT_NE(dsta, nullptr);
+  std::unique_ptr<CONV_BUF_TYPE[]> dstb(new (std::nothrow)
+                                            CONV_BUF_TYPE[output_n]);
+  ASSERT_NE(dstb, nullptr);
+  for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand16();
+
+  for (int i = 0; i < num_iters; ++i) {
+    // Generate an input block and extend its borders horizontally
+    for (int r = 0; r < h; ++r)
+      for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask;
+    for (int r = 0; r < h; ++r) {
+      for (int c = 0; c < border; ++c) {
+        input[r * stride - border + c] = input[r * stride];
+        input[r * stride + w + c] = input[r * stride + (w - 1)];
+      }
+    }
+    const int use_no_round = rnd_.Rand8() & 1;
+    for (int sub_x = 0; sub_x < 2; ++sub_x)
+      for (int sub_y = 0; sub_y < 2; ++sub_y) {
+        generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
+                              is_alpha_zero, is_beta_zero, is_gamma_zero,
+                              is_delta_zero, 1);
+        for (int ii = 0; ii < 2; ++ii) {
+          for (int jj = 0; jj < 5; ++jj) {
+            for (int do_average = 0; do_average <= 1; ++do_average) {
+              if (use_no_round) {
+                conv_params = get_conv_params_no_round(
+                    do_average, 0, dsta.get(), out_w, 1, bd);
+              } else {
+                conv_params = get_conv_params(0, 0, bd);
+              }
+              if (jj >= 4) {
+                conv_params.use_dist_wtd_comp_avg = 0;
+              } else {
+                conv_params.use_dist_wtd_comp_avg = 1;
+                conv_params.fwd_offset = quant_dist_lookup_table[jj][ii];
+                conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
+              }
+
+              av1_highbd_warp_affine_c(mat, input, w, h, stride, output.get(),
+                                       32, 32, out_w, out_h, out_w, sub_x,
+                                       sub_y, bd, &conv_params, alpha, beta,
+                                       gamma, delta);
+              if (use_no_round) {
+                // TODO(angiebird): Change this to test_impl once we have SIMD
+                // implementation
+                conv_params = get_conv_params_no_round(
+                    do_average, 0, dstb.get(), out_w, 1, bd);
+              }
+              if (jj >= 4) {
+                conv_params.use_dist_wtd_comp_avg = 0;
+              } else {
+                conv_params.use_dist_wtd_comp_avg = 1;
+                conv_params.fwd_offset = quant_dist_lookup_table[jj][ii];
+                conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
+              }
+              test_impl(mat, input, w, h, stride, output2.get(), 32, 32, out_w,
+                        out_h, out_w, sub_x, sub_y, bd, &conv_params, alpha,
+                        beta, gamma, delta);
+
+              if (use_no_round) {
+                for (int j = 0; j < out_w * out_h; ++j)
+                  ASSERT_EQ(dsta[j], dstb[j])
+                      << "Pixel mismatch at index " << j << " = ("
+                      << (j % out_w) << ", " << (j / out_w) << ") on iteration "
+                      << i;
+                for (int j = 0; j < out_w * out_h; ++j)
+                  ASSERT_EQ(output[j], output2[j])
+                      << "Pixel mismatch at index " << j << " = ("
+                      << (j % out_w) << ", " << (j / out_w) << ") on iteration "
+                      << i;
+              } else {
+                for (int j = 0; j < out_w * out_h; ++j)
+                  ASSERT_EQ(output[j], output2[j])
+                      << "Pixel mismatch at index " << j << " = ("
+                      << (j % out_w) << ", " << (j / out_w) << ") on iteration "
+                      << i;
+              }
+            }
+          }
+        }
+      }
+  }
+}
+}  // namespace AV1HighbdWarpFilter
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+}  // namespace libaom_test
diff --git a/third_party/aom/test/warp_filter_test_util.h b/third_party/aom/test/warp_filter_test_util.h
new file mode 100644
index 0000000000..364368ac0c
--- /dev/null
+++ b/third_party/aom/test/warp_filter_test_util.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_TEST_WARP_FILTER_TEST_UTIL_H_
+#define AOM_TEST_WARP_FILTER_TEST_UTIL_H_
+
+#include <tuple>
+
+#include "config/av1_rtcd.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "test/register_state_check.h"
+
+#include "av1/common/mv.h"
+#include "av1/common/common_data.h"
+
+namespace libaom_test {
+
+void generate_warped_model(libaom_test::ACMRandom *rnd, int32_t *mat,
+                           int16_t *alpha, int16_t *beta, int16_t *gamma,
+                           int16_t *delta, int is_alpha_zero, int is_beta_zero,
+                           int is_gamma_zero, int is_delta_zero);
+
+namespace AV1WarpFilter {
+
+typedef void (*warp_affine_func)(const int32_t *mat, const uint8_t *ref,
+                                 int width, int height, int stride,
+                                 uint8_t *pred, int p_col, int p_row,
+                                 int p_width, int p_height, int p_stride,
+                                 int subsampling_x, int subsampling_y,
+                                 ConvolveParams *conv_params, int16_t alpha,
+                                 int16_t beta, int16_t gamma, int16_t delta);
+
+typedef std::tuple<int, int, int, warp_affine_func> WarpTestParam;
+typedef std::tuple<WarpTestParam, int, int, int, int> WarpTestParams;
+
+::testing::internal::ParamGenerator<WarpTestParams> BuildParams(
+    warp_affine_func filter);
+
+class AV1WarpFilterTest : public ::testing::TestWithParam<WarpTestParams> {
+ public:
+  ~AV1WarpFilterTest() override;
+  void SetUp() override;
+
+ protected:
+  void RunCheckOutput(warp_affine_func test_impl);
+  void RunSpeedTest(warp_affine_func test_impl);
+
+  libaom_test::ACMRandom rnd_;
+};
+
+}  // namespace AV1WarpFilter
+
+#if CONFIG_AV1_HIGHBITDEPTH
+namespace AV1HighbdWarpFilter {
+typedef void (*highbd_warp_affine_func)(const int32_t *mat, const uint16_t *ref,
+                                        int width, int height, int stride,
+                                        uint16_t *pred, int p_col, int p_row,
+                                        int p_width, int p_height, int p_stride,
+                                        int subsampling_x, int subsampling_y,
+                                        int bd, ConvolveParams *conv_params,
+                                        int16_t alpha, int16_t beta,
+                                        int16_t gamma, int16_t delta);
+
+typedef std::tuple<int, int, int, int, highbd_warp_affine_func>
+    HighbdWarpTestParam;
+typedef std::tuple<HighbdWarpTestParam, int, int, int, int>
+    HighbdWarpTestParams;
+
+::testing::internal::ParamGenerator<HighbdWarpTestParams> BuildParams(
+    highbd_warp_affine_func filter);
+
+class AV1HighbdWarpFilterTest
+    : public ::testing::TestWithParam<HighbdWarpTestParams> {
+ public:
+  ~AV1HighbdWarpFilterTest() override;
+  void SetUp() override;
+
+ protected:
+  void RunCheckOutput(highbd_warp_affine_func test_impl);
+  void RunSpeedTest(highbd_warp_affine_func test_impl);
+
+  libaom_test::ACMRandom rnd_;
+};
+
+}  // namespace AV1HighbdWarpFilter
+#endif  // CONFIG_AV1_HIGHBITDEPTH
+
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_WARP_FILTER_TEST_UTIL_H_
diff --git a/third_party/aom/test/webm_video_source.h b/third_party/aom/test/webm_video_source.h
new file mode 100644
index 0000000000..845abd6dce
--- /dev/null
+++ b/third_party/aom/test/webm_video_source.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef AOM_TEST_WEBM_VIDEO_SOURCE_H_
+#define AOM_TEST_WEBM_VIDEO_SOURCE_H_
+#include <cstdarg>
+#include <cstdio>
+#include <cstdlib>
+#include <new>
+#include <string>
+#include "common/tools_common.h"
+#include "common/webmdec.h"
+#include "test/video_source.h"
+
+namespace libaom_test {
+
+// This class extends VideoSource to allow parsing of WebM files,
+// so that we can do actual file decodes.
+class WebMVideoSource : public CompressedVideoSource {
+ public:
+  explicit WebMVideoSource(const std::string &file_name)
+      : file_name_(file_name), aom_ctx_(new AvxInputContext()),
+        webm_ctx_(new WebmInputContext()), buf_(nullptr), buf_sz_(0),
+        frame_sz_(0), frame_number_(0), end_of_file_(false) {}
+
+  ~WebMVideoSource() override {
+    if (aom_ctx_->file != nullptr) fclose(aom_ctx_->file);
+    webm_free(webm_ctx_);
+    delete aom_ctx_;
+    delete webm_ctx_;
+  }
+
+  void Init() override {
+    ASSERT_NE(aom_ctx_, nullptr);
+    ASSERT_NE(webm_ctx_, nullptr);
+  }
+
+  void Begin() override {
+    ASSERT_NE(aom_ctx_, nullptr);
+    ASSERT_NE(webm_ctx_, nullptr);
+    aom_ctx_->file = OpenTestDataFile(file_name_);
+    ASSERT_NE(aom_ctx_->file, nullptr)
+        << "Input file open failed. Filename: " << file_name_;
+
+    ASSERT_EQ(file_is_webm(webm_ctx_, aom_ctx_), 1) << "file is not WebM";
+
+    FillFrame();
+  }
+
+  void Next() override {
+    ++frame_number_;
+    FillFrame();
+  }
+
+  void FillFrame() {
+    ASSERT_NE(aom_ctx_, nullptr);
+    ASSERT_NE(webm_ctx_, nullptr);
+    ASSERT_NE(aom_ctx_->file, nullptr);
+    const int status = webm_read_frame(webm_ctx_, &buf_, &frame_sz_, &buf_sz_);
+    ASSERT_GE(status, 0) << "webm_read_frame failed";
+    if (status == 1) {
+      end_of_file_ = true;
+    }
+  }
+
+  void SeekToNextKeyFrame() {
+    ASSERT_NE(aom_ctx_, nullptr);
+    ASSERT_NE(webm_ctx_, nullptr);
+    ASSERT_NE(aom_ctx_->file, nullptr);
+    do {
+      const int status =
+          webm_read_frame(webm_ctx_, &buf_, &frame_sz_, &buf_sz_);
+      ASSERT_GE(status, 0) << "webm_read_frame failed";
+      ++frame_number_;
+      if (status == 1) {
+        end_of_file_ = true;
+      }
+    } while (!webm_ctx_->is_key_frame && !end_of_file_);
+  }
+
+  const uint8_t *cxdata() const override {
+    return end_of_file_ ? nullptr : buf_;
+  }
+  size_t frame_size() const override { return frame_sz_; }
+  unsigned int frame_number() const override { return frame_number_; }
+
+ protected:
+  std::string file_name_;
+  AvxInputContext *aom_ctx_;
+  WebmInputContext *webm_ctx_;
+  uint8_t *buf_;  // Owned by webm_ctx_ and freed when webm_ctx_ is freed.
+  size_t buf_sz_;
+  size_t frame_sz_;
+  unsigned int frame_number_;
+  bool end_of_file_;
+};
+
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_WEBM_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/webmenc_test.cc b/third_party/aom/test/webmenc_test.cc
new file mode 100644
index 0000000000..acd795f2ec
--- /dev/null
+++ b/third_party/aom/test/webmenc_test.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <string>
+#include "common/webmenc.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+#if CONFIG_WEBM_IO
+
+class WebmencTest : public ::testing::Test {};
+
+// All of these variations on output should be identical.
+TEST(WebmencTest, ExtractEncoderSettingsOutput1) {
+  const char *argv[] = { "aomenc", "-o", "output", "input",
+                         "--target-bitrate=300" };
+  int argc = 5;
+  const std::string expected("version:1.2.3 --target-bitrate=300");
+  char *result = extract_encoder_settings("1.2.3", argv, argc, "input");
+  ASSERT_EQ(expected, std::string(result));
+  free(result);
+}
+
+TEST(WebmencTest, ExtractEncoderSettingsOutput2) {
+  const char *argv[] = { "aomenc", "--output", "bar", "foo", "--cpu-used=3" };
+  int argc = 5;
+  const std::string expected("version:abc --cpu-used=3");
+  char *result = extract_encoder_settings("abc", argv, argc, "foo");
+  ASSERT_EQ(expected, std::string(result));
+  free(result);
+}
+
+TEST(WebmencTest, ExtractEncoderSettingsOutput3) {
+  const char *argv[] = { "aomenc", "--cq-level=63", "--end-usage=q",
+                         "--output=foo", "baz" };
+  int argc = 5;
+  const std::string expected("version:23 --cq-level=63 --end-usage=q");
+  char *result = extract_encoder_settings("23", argv, argc, "baz");
+  ASSERT_EQ(expected, std::string(result));
+  free(result);
+}
+
+TEST(WebmencTest, ExtractEncoderSettingsInput) {
+  // Check that input filename is filtered regardless of position.
+  const char *argv[] = { "aomenc", "-o", "out", "input", "-p", "2" };
+  int argc = 6;
+  const char version[] = "1.0.0";
+  const std::string expected("version:1.0.0 -p 2");
+  char *result = extract_encoder_settings(version, argv, argc, "input");
+  ASSERT_EQ(expected, std::string(result));
+  free(result);
+
+  const char *argv2[] = { "aomenc", "input", "-o", "out", "-p", "2" };
+  result = extract_encoder_settings(version, argv2, argc, "input");
+  ASSERT_EQ(expected, std::string(result));
+  free(result);
+}
+
+#endif  // CONFIG_WEBM_IO
+}  // namespace
diff --git a/third_party/aom/test/wiener_test.cc b/third_party/aom/test/wiener_test.cc
new file mode 100644
index 0000000000..7eb6372aaa
--- /dev/null
+++ b/third_party/aom/test/wiener_test.cc
@@ -0,0 +1,1390 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/register_state_check.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom/aom_integer.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/encoder/pickrst.h"
+
+#define MAX_WIENER_BLOCK 384
+#define MAX_DATA_BLOCK (MAX_WIENER_BLOCK + WIENER_WIN)
+
+// 8-bit-depth tests
+namespace wiener_lowbd {
+
+// C implementation of the algorithm implmented by the SIMD code.
+// This is a little more efficient than the version in av1_compute_stats_c().
+static void compute_stats_win_opt_c(int wiener_win, const uint8_t *dgd,
+                                    const uint8_t *src, int16_t *d, int16_t *s,
+                                    int h_start, int h_end, int v_start,
+                                    int v_end, int dgd_stride, int src_stride,
+                                    int64_t *M, int64_t *H,
+                                    int use_downsampled_wiener_stats) {
+  ASSERT_TRUE(wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA);
+  (void)d;
+  (void)s;
+  int i, j, k, l, m, n;
+  const int pixel_count = (h_end - h_start) * (v_end - v_start);
+  const int wiener_win2 = wiener_win * wiener_win;
+  const int wiener_halfwin = (wiener_win >> 1);
+  uint8_t avg = find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
+  int downsample_factor =
+      use_downsampled_wiener_stats ? WIENER_STATS_DOWNSAMPLE_FACTOR : 1;
+
+  std::vector<std::vector<int64_t> > M_int(wiener_win,
+                                           std::vector<int64_t>(wiener_win, 0));
+  std::vector<std::vector<int64_t> > H_int(
+      wiener_win * wiener_win, std::vector<int64_t>(wiener_win * 8, 0));
+  std::vector<std::vector<int32_t> > sumY(wiener_win,
+                                          std::vector<int32_t>(wiener_win, 0));
+  int32_t sumX = 0;
+  const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin;
+
+  // Main loop handles two pixels at a time
+  // We can assume that h_start is even, since it will always be aligned to
+  // a tile edge + some number of restoration units, and both of those will
+  // be 64-pixel aligned.
+  // However, at the edge of the image, h_end may be odd, so we need to handle
+  // that case correctly.
+  assert(h_start % 2 == 0);
+  for (i = v_start; i < v_end; i = i + downsample_factor) {
+    if (use_downsampled_wiener_stats &&
+        (v_end - i < WIENER_STATS_DOWNSAMPLE_FACTOR)) {
+      downsample_factor = v_end - i;
+    }
+    int32_t sumX_row_i32 = 0;
+    std::vector<std::vector<int32_t> > sumY_row(
+        wiener_win, std::vector<int32_t>(wiener_win, 0));
+    std::vector<std::vector<int32_t> > M_row_i32(
+        wiener_win, std::vector<int32_t>(wiener_win, 0));
+    std::vector<std::vector<int32_t> > H_row_i32(
+        wiener_win * wiener_win, std::vector<int32_t>(wiener_win * 8, 0));
+    const int h_end_even = h_end & ~1;
+    const int has_odd_pixel = h_end & 1;
+    for (j = h_start; j < h_end_even; j += 2) {
+      const uint8_t X1 = src[i * src_stride + j];
+      const uint8_t X2 = src[i * src_stride + j + 1];
+      sumX_row_i32 += X1 + X2;
+
+      const uint8_t *dgd_ij = dgd_win + i * dgd_stride + j;
+      for (k = 0; k < wiener_win; k++) {
+        for (l = 0; l < wiener_win; l++) {
+          const uint8_t *dgd_ijkl = dgd_ij + k * dgd_stride + l;
+          int32_t *H_int_temp = &H_row_i32[(l * wiener_win + k)][0];
+          const uint8_t D1 = dgd_ijkl[0];
+          const uint8_t D2 = dgd_ijkl[1];
+          sumY_row[k][l] += D1 + D2;
+          M_row_i32[l][k] += D1 * X1 + D2 * X2;
+          for (m = 0; m < wiener_win; m++) {
+            for (n = 0; n < wiener_win; n++) {
+              H_int_temp[m * 8 + n] += D1 * dgd_ij[n + dgd_stride * m] +
+                                       D2 * dgd_ij[n + dgd_stride * m + 1];
+            }
+          }
+        }
+      }
+    }
+    // If the width is odd, add in the final pixel
+    if (has_odd_pixel) {
+      const uint8_t X1 = src[i * src_stride + j];
+      sumX_row_i32 += X1;
+
+      const uint8_t *dgd_ij = dgd_win + i * dgd_stride + j;
+      for (k = 0; k < wiener_win; k++) {
+        for (l = 0; l < wiener_win; l++) {
+          const uint8_t *dgd_ijkl = dgd_ij + k * dgd_stride + l;
+          int32_t *H_int_temp = &H_row_i32[(l * wiener_win + k)][0];
+          const uint8_t D1 = dgd_ijkl[0];
+          sumY_row[k][l] += D1;
+          M_row_i32[l][k] += D1 * X1;
+          for (m = 0; m < wiener_win; m++) {
+            for (n = 0; n < wiener_win; n++) {
+              H_int_temp[m * 8 + n] += D1 * dgd_ij[n + dgd_stride * m];
+            }
+          }
+        }
+      }
+    }
+
+    sumX += sumX_row_i32 * downsample_factor;
+    // Scale M matrix based on the downsampling factor
+    for (k = 0; k < wiener_win; ++k) {
+      for (l = 0; l < wiener_win; ++l) {
+        sumY[k][l] += sumY_row[k][l] * downsample_factor;
+        M_int[k][l] += (int64_t)M_row_i32[k][l] * downsample_factor;
+      }
+    }
+    // Scale H matrix based on the downsampling factor
+    for (k = 0; k < wiener_win * wiener_win; ++k) {
+      for (l = 0; l < wiener_win * 8; ++l) {
+        H_int[k][l] += (int64_t)H_row_i32[k][l] * downsample_factor;
+      }
+    }
+  }
+
+  const int64_t avg_square_sum = (int64_t)avg * (int64_t)avg * pixel_count;
+  for (k = 0; k < wiener_win; k++) {
+    for (l = 0; l < wiener_win; l++) {
+      M[l * wiener_win + k] =
+          M_int[l][k] + avg_square_sum - (int64_t)avg * (sumX + sumY[k][l]);
+      for (m = 0; m < wiener_win; m++) {
+        for (n = 0; n < wiener_win; n++) {
+          H[(l * wiener_win + k) * wiener_win2 + m * wiener_win + n] =
+              H_int[(l * wiener_win + k)][n * 8 + m] + avg_square_sum -
+              (int64_t)avg * (sumY[k][l] + sumY[n][m]);
+        }
+      }
+    }
+  }
+}
+
+void compute_stats_opt_c(int wiener_win, const uint8_t *dgd, const uint8_t *src,
+                         int16_t *d, int16_t *s, int h_start, int h_end,
+                         int v_start, int v_end, int dgd_stride, int src_stride,
+                         int64_t *M, int64_t *H,
+                         int use_downsampled_wiener_stats) {
+  if (wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA) {
+    compute_stats_win_opt_c(wiener_win, dgd, src, d, s, h_start, h_end, v_start,
+                            v_end, dgd_stride, src_stride, M, H,
+                            use_downsampled_wiener_stats);
+  } else {
+    av1_compute_stats_c(wiener_win, dgd, src, d, s, h_start, h_end, v_start,
+                        v_end, dgd_stride, src_stride, M, H,
+                        use_downsampled_wiener_stats);
+  }
+}
+
+static const int kIterations = 100;
+typedef void (*compute_stats_Func)(int wiener_win, const uint8_t *dgd,
+                                   const uint8_t *src, int16_t *dgd_avg,
+                                   int16_t *src_avg, int h_start, int h_end,
+                                   int v_start, int v_end, int dgd_stride,
+                                   int src_stride, int64_t *M, int64_t *H,
+                                   int use_downsampled_wiener_stats);
+
+////////////////////////////////////////////////////////////////////////////////
+// 8 bit
+////////////////////////////////////////////////////////////////////////////////
+
+typedef std::tuple<const compute_stats_Func> WienerTestParam;
+
+class WienerTest : public ::testing::TestWithParam<WienerTestParam> {
+ public:
+  void SetUp() override {
+    src_buf = (uint8_t *)aom_memalign(
+        32, MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*src_buf));
+    ASSERT_NE(src_buf, nullptr);
+    dgd_buf = (uint8_t *)aom_memalign(
+        32, MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*dgd_buf));
+    ASSERT_NE(dgd_buf, nullptr);
+    const int buf_size =
+        sizeof(*buf) * 6 * RESTORATION_UNITSIZE_MAX * RESTORATION_UNITSIZE_MAX;
+    buf = (int16_t *)aom_memalign(32, buf_size);
+    ASSERT_NE(buf, nullptr);
+    memset(buf, 0, buf_size);
+    target_func_ = GET_PARAM(0);
+  }
+  void TearDown() override {
+    aom_free(src_buf);
+    aom_free(dgd_buf);
+    aom_free(buf);
+  }
+  void RunWienerTest(const int32_t wiener_win, int32_t run_times);
+  void RunWienerTest_ExtremeValues(const int32_t wiener_win);
+
+ private:
+  compute_stats_Func target_func_;
+  libaom_test::ACMRandom rng_;
+  uint8_t *src_buf;
+  uint8_t *dgd_buf;
+  int16_t *buf;
+};
+
+void WienerTest::RunWienerTest(const int32_t wiener_win, int32_t run_times) {
+  const int32_t wiener_halfwin = wiener_win >> 1;
+  const int32_t wiener_win2 = wiener_win * wiener_win;
+  DECLARE_ALIGNED(32, int64_t, M_ref[WIENER_WIN2]);
+  DECLARE_ALIGNED(32, int64_t, H_ref[WIENER_WIN2 * WIENER_WIN2]);
+  DECLARE_ALIGNED(32, int64_t, M_test[WIENER_WIN2]);
+  DECLARE_ALIGNED(32, int64_t, H_test[WIENER_WIN2 * WIENER_WIN2]);
+  // Note(rachelbarker):
+  // The SIMD code requires `h_start` to be even, but can otherwise
+  // deal with any values of `h_end`, `v_start`, `v_end`. We cover this
+  // entire range, even though (at the time of writing) `h_start` and `v_start`
+  // will always be multiples of 64 when called from non-test code.
+  // If in future any new requirements are added, these lines will
+  // need changing.
+  int h_start = (rng_.Rand16() % (MAX_WIENER_BLOCK / 2)) & ~1;
+  int h_end = run_times != 1 ? 256 : (rng_.Rand16() % MAX_WIENER_BLOCK);
+  if (h_start > h_end) std::swap(h_start, h_end);
+  int v_start = rng_.Rand16() % (MAX_WIENER_BLOCK / 2);
+  int v_end = run_times != 1 ? 256 : (rng_.Rand16() % MAX_WIENER_BLOCK);
+  if (v_start > v_end) std::swap(v_start, v_end);
+  const int dgd_stride = h_end;
+  const int src_stride = MAX_DATA_BLOCK;
+  const int iters = run_times == 1 ? kIterations : 2;
+  const int max_value_downsample_stats = 1;
+  int16_t *dgd_avg = buf;
+  int16_t *src_avg =
+      buf + (3 * RESTORATION_UNITSIZE_MAX * RESTORATION_UNITSIZE_MAX);
+
+  for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
+      dgd_buf[i] = rng_.Rand8();
+      src_buf[i] = rng_.Rand8();
+    }
+    uint8_t *dgd = dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin;
+    uint8_t *src = src_buf;
+    for (int use_downsampled_stats = 0;
+         use_downsampled_stats <= max_value_downsample_stats;
+         use_downsampled_stats++) {
+      aom_usec_timer timer;
+      aom_usec_timer_start(&timer);
+      for (int i = 0; i < run_times; ++i) {
+        av1_compute_stats_c(wiener_win, dgd, src, dgd_avg, src_avg, h_start,
+                            h_end, v_start, v_end, dgd_stride, src_stride,
+                            M_ref, H_ref, use_downsampled_stats);
+      }
+      aom_usec_timer_mark(&timer);
+      const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+      aom_usec_timer_start(&timer);
+      for (int i = 0; i < run_times; ++i) {
+        target_func_(wiener_win, dgd, src, dgd_avg, src_avg, h_start, h_end,
+                     v_start, v_end, dgd_stride, src_stride, M_test, H_test,
+                     use_downsampled_stats);
+      }
+      aom_usec_timer_mark(&timer);
+      const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+      if (run_times > 10) {
+        printf("win %d %3dx%-3d:%7.2f/%7.2fns", wiener_win, h_end, v_end, time1,
+               time2);
+        printf("(%3.2f)\n", time1 / time2);
+      }
+      int failed = 0;
+      for (int i = 0; i < wiener_win2; ++i) {
+        if (M_ref[i] != M_test[i]) {
+          failed = 1;
+          printf("win %d M iter %d [%4d] ref %6" PRId64 " test %6" PRId64 " \n",
+                 wiener_win, iter, i, M_ref[i], M_test[i]);
+          break;
+        }
+      }
+      for (int i = 0; i < wiener_win2 * wiener_win2; ++i) {
+        if (H_ref[i] != H_test[i]) {
+          failed = 1;
+          printf("win %d H iter %d [%4d] ref %6" PRId64 " test %6" PRId64 " \n",
+                 wiener_win, iter, i, H_ref[i], H_test[i]);
+          break;
+        }
+      }
+      ASSERT_EQ(failed, 0);
+    }
+  }
+}
+
+void WienerTest::RunWienerTest_ExtremeValues(const int32_t wiener_win) {
+  const int32_t wiener_halfwin = wiener_win >> 1;
+  const int32_t wiener_win2 = wiener_win * wiener_win;
+  DECLARE_ALIGNED(32, int64_t, M_ref[WIENER_WIN2]);
+  DECLARE_ALIGNED(32, int64_t, H_ref[WIENER_WIN2 * WIENER_WIN2]);
+  DECLARE_ALIGNED(32, int64_t, M_test[WIENER_WIN2]);
+  DECLARE_ALIGNED(32, int64_t, H_test[WIENER_WIN2 * WIENER_WIN2]);
+  const int h_start = 16;
+  const int h_end = MAX_WIENER_BLOCK;
+  const int v_start = 16;
+  const int v_end = MAX_WIENER_BLOCK;
+  const int dgd_stride = h_end;
+  const int src_stride = MAX_DATA_BLOCK;
+  const int iters = 1;
+  const int max_value_downsample_stats = 1;
+  int16_t *dgd_avg = buf;
+  int16_t *src_avg =
+      buf + (3 * RESTORATION_UNITSIZE_MAX * RESTORATION_UNITSIZE_MAX);
+
+  for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
+    // Fill with alternating extreme values to maximize difference with
+    // the average.
+    for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
+      dgd_buf[i] = i & 1 ? 255 : 0;
+      src_buf[i] = i & 1 ? 255 : 0;
+    }
+    uint8_t *dgd = dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin;
+    uint8_t *src = src_buf;
+    for (int use_downsampled_stats = 0;
+         use_downsampled_stats <= max_value_downsample_stats;
+         use_downsampled_stats++) {
+      av1_compute_stats_c(wiener_win, dgd, src, dgd_avg, src_avg, h_start,
+                          h_end, v_start, v_end, dgd_stride, src_stride, M_ref,
+                          H_ref, use_downsampled_stats);
+
+      target_func_(wiener_win, dgd, src, dgd_avg, src_avg, h_start, h_end,
+                   v_start, v_end, dgd_stride, src_stride, M_test, H_test,
+                   use_downsampled_stats);
+
+      int failed = 0;
+      for (int i = 0; i < wiener_win2; ++i) {
+        if (M_ref[i] != M_test[i]) {
+          failed = 1;
+          printf("win %d M iter %d [%4d] ref %6" PRId64 " test %6" PRId64 " \n",
+                 wiener_win, iter, i, M_ref[i], M_test[i]);
+          break;
+        }
+      }
+      for (int i = 0; i < wiener_win2 * wiener_win2; ++i) {
+        if (H_ref[i] != H_test[i]) {
+          failed = 1;
+          printf("win %d H iter %d [%4d] ref %6" PRId64 " test %6" PRId64 " \n",
+                 wiener_win, iter, i, H_ref[i], H_test[i]);
+          break;
+        }
+      }
+      ASSERT_EQ(failed, 0);
+    }
+  }
+}
+
+TEST_P(WienerTest, RandomValues) {
+  RunWienerTest(WIENER_WIN, 1);
+  RunWienerTest(WIENER_WIN_CHROMA, 1);
+}
+
+TEST_P(WienerTest, ExtremeValues) {
+  RunWienerTest_ExtremeValues(WIENER_WIN);
+  RunWienerTest_ExtremeValues(WIENER_WIN_CHROMA);
+}
+
+TEST_P(WienerTest, DISABLED_Speed) {
+  RunWienerTest(WIENER_WIN, 200);
+  RunWienerTest(WIENER_WIN_CHROMA, 200);
+}
+
+INSTANTIATE_TEST_SUITE_P(C, WienerTest, ::testing::Values(compute_stats_opt_c));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE4_1, WienerTest,
+                         ::testing::Values(av1_compute_stats_sse4_1));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+
+INSTANTIATE_TEST_SUITE_P(AVX2, WienerTest,
+                         ::testing::Values(av1_compute_stats_avx2));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+
+INSTANTIATE_TEST_SUITE_P(NEON, WienerTest,
+                         ::testing::Values(av1_compute_stats_neon));
+#endif  // HAVE_NEON
+
+}  // namespace wiener_lowbd
+
+#if CONFIG_AV1_HIGHBITDEPTH
+// High bit-depth tests:
+namespace wiener_highbd {
+
+static void compute_stats_highbd_win_opt_c(int wiener_win, const uint8_t *dgd8,
+                                           const uint8_t *src8, int h_start,
+                                           int h_end, int v_start, int v_end,
+                                           int dgd_stride, int src_stride,
+                                           int64_t *M, int64_t *H,
+                                           aom_bit_depth_t bit_depth) {
+  ASSERT_TRUE(wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA);
+  int i, j, k, l, m, n;
+  const int pixel_count = (h_end - h_start) * (v_end - v_start);
+  const int wiener_win2 = wiener_win * wiener_win;
+  const int wiener_halfwin = (wiener_win >> 1);
+  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  const uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
+  const uint16_t avg =
+      find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride);
+
+  std::vector<std::vector<int64_t> > M_int(wiener_win,
+                                           std::vector<int64_t>(wiener_win, 0));
+  std::vector<std::vector<int64_t> > H_int(
+      wiener_win * wiener_win, std::vector<int64_t>(wiener_win * 8, 0));
+  std::vector<std::vector<int32_t> > sumY(wiener_win,
+                                          std::vector<int32_t>(wiener_win, 0));
+
+  memset(M, 0, sizeof(*M) * wiener_win2);
+  memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
+
+  int64_t sumX = 0;
+  const uint16_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin;
+
+  // Main loop handles two pixels at a time
+  // We can assume that h_start is even, since it will always be aligned to
+  // a tile edge + some number of restoration units, and both of those will
+  // be 64-pixel aligned.
+  // However, at the edge of the image, h_end may be odd, so we need to handle
+  // that case correctly.
+  assert(h_start % 2 == 0);
+  for (i = v_start; i < v_end; i++) {
+    const int h_end_even = h_end & ~1;
+    const int has_odd_pixel = h_end & 1;
+    for (j = h_start; j < h_end_even; j += 2) {
+      const uint16_t X1 = src[i * src_stride + j];
+      const uint16_t X2 = src[i * src_stride + j + 1];
+      sumX += X1 + X2;
+
+      const uint16_t *dgd_ij = dgd_win + i * dgd_stride + j;
+      for (k = 0; k < wiener_win; k++) {
+        for (l = 0; l < wiener_win; l++) {
+          const uint16_t *dgd_ijkl = dgd_ij + k * dgd_stride + l;
+          int64_t *H_int_temp = &H_int[(l * wiener_win + k)][0];
+          const uint16_t D1 = dgd_ijkl[0];
+          const uint16_t D2 = dgd_ijkl[1];
+          sumY[k][l] += D1 + D2;
+          M_int[l][k] += D1 * X1 + D2 * X2;
+          for (m = 0; m < wiener_win; m++) {
+            for (n = 0; n < wiener_win; n++) {
+              H_int_temp[m * 8 + n] += D1 * dgd_ij[n + dgd_stride * m] +
+                                       D2 * dgd_ij[n + dgd_stride * m + 1];
+            }
+          }
+        }
+      }
+    }
+    // If the width is odd, add in the final pixel
+    if (has_odd_pixel) {
+      const uint16_t X1 = src[i * src_stride + j];
+      sumX += X1;
+
+      const uint16_t *dgd_ij = dgd_win + i * dgd_stride + j;
+      for (k = 0; k < wiener_win; k++) {
+        for (l = 0; l < wiener_win; l++) {
+          const uint16_t *dgd_ijkl = dgd_ij + k * dgd_stride + l;
+          int64_t *H_int_temp = &H_int[(l * wiener_win + k)][0];
+          const uint16_t D1 = dgd_ijkl[0];
+          sumY[k][l] += D1;
+          M_int[l][k] += D1 * X1;
+          for (m = 0; m < wiener_win; m++) {
+            for (n = 0; n < wiener_win; n++) {
+              H_int_temp[m * 8 + n] += D1 * dgd_ij[n + dgd_stride * m];
+            }
+          }
+        }
+      }
+    }
+  }
+
+  uint8_t bit_depth_divider = 1;
+  if (bit_depth == AOM_BITS_12)
+    bit_depth_divider = 16;
+  else if (bit_depth == AOM_BITS_10)
+    bit_depth_divider = 4;
+
+  const int64_t avg_square_sum = (int64_t)avg * (int64_t)avg * pixel_count;
+  for (k = 0; k < wiener_win; k++) {
+    for (l = 0; l < wiener_win; l++) {
+      M[l * wiener_win + k] =
+          (M_int[l][k] +
+           (avg_square_sum - (int64_t)avg * (sumX + sumY[k][l]))) /
+          bit_depth_divider;
+      for (m = 0; m < wiener_win; m++) {
+        for (n = 0; n < wiener_win; n++) {
+          H[(l * wiener_win + k) * wiener_win2 + m * wiener_win + n] =
+              (H_int[(l * wiener_win + k)][n * 8 + m] +
+               (avg_square_sum - (int64_t)avg * (sumY[k][l] + sumY[n][m]))) /
+              bit_depth_divider;
+        }
+      }
+    }
+  }
+}
+
+void compute_stats_highbd_opt_c(int wiener_win, const uint8_t *dgd,
+                                const uint8_t *src, int h_start, int h_end,
+                                int v_start, int v_end, int dgd_stride,
+                                int src_stride, int64_t *M, int64_t *H,
+                                aom_bit_depth_t bit_depth) {
+  if (wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA) {
+    compute_stats_highbd_win_opt_c(wiener_win, dgd, src, h_start, h_end,
+                                   v_start, v_end, dgd_stride, src_stride, M, H,
+                                   bit_depth);
+  } else {
+    av1_compute_stats_highbd_c(wiener_win, dgd, src, h_start, h_end, v_start,
+                               v_end, dgd_stride, src_stride, M, H, bit_depth);
+  }
+}
+
+static const int kIterations = 100;
+typedef void (*compute_stats_Func)(int wiener_win, const uint8_t *dgd,
+                                   const uint8_t *src, int h_start, int h_end,
+                                   int v_start, int v_end, int dgd_stride,
+                                   int src_stride, int64_t *M, int64_t *H,
+                                   aom_bit_depth_t bit_depth);
+
+typedef std::tuple<const compute_stats_Func> WienerTestParam;
+
+class WienerTestHighbd : public ::testing::TestWithParam<WienerTestParam> {
+ public:
+  void SetUp() override {
+    src_buf = (uint16_t *)aom_memalign(
+        32, MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*src_buf));
+    ASSERT_NE(src_buf, nullptr);
+    dgd_buf = (uint16_t *)aom_memalign(
+        32, MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*dgd_buf));
+    ASSERT_NE(dgd_buf, nullptr);
+    target_func_ = GET_PARAM(0);
+  }
+  void TearDown() override {
+    aom_free(src_buf);
+    aom_free(dgd_buf);
+  }
+  void RunWienerTest(const int32_t wiener_win, int32_t run_times,
+                     aom_bit_depth_t bit_depth);
+  void RunWienerTest_ExtremeValues(const int32_t wiener_win,
+                                   aom_bit_depth_t bit_depth);
+
+ private:
+  compute_stats_Func target_func_;
+  libaom_test::ACMRandom rng_;
+  uint16_t *src_buf;
+  uint16_t *dgd_buf;
+};
+
+void WienerTestHighbd::RunWienerTest(const int32_t wiener_win,
+                                     int32_t run_times,
+                                     aom_bit_depth_t bit_depth) {
+  const int32_t wiener_halfwin = wiener_win >> 1;
+  const int32_t wiener_win2 = wiener_win * wiener_win;
+  DECLARE_ALIGNED(32, int64_t, M_ref[WIENER_WIN2]);
+  DECLARE_ALIGNED(32, int64_t, H_ref[WIENER_WIN2 * WIENER_WIN2]);
+  DECLARE_ALIGNED(32, int64_t, M_test[WIENER_WIN2]);
+  DECLARE_ALIGNED(32, int64_t, H_test[WIENER_WIN2 * WIENER_WIN2]);
+  // Note(rachelbarker):
+  // The SIMD code requires `h_start` to be even, but can otherwise
+  // deal with any values of `h_end`, `v_start`, `v_end`. We cover this
+  // entire range, even though (at the time of writing) `h_start` and `v_start`
+  // will always be multiples of 64 when called from non-test code.
+  // If in future any new requirements are added, these lines will
+  // need changing.
+  int h_start = (rng_.Rand16() % (MAX_WIENER_BLOCK / 2)) & ~1;
+  int h_end = run_times != 1 ? 256 : (rng_.Rand16() % MAX_WIENER_BLOCK);
+  if (h_start > h_end) std::swap(h_start, h_end);
+  int v_start = rng_.Rand16() % (MAX_WIENER_BLOCK / 2);
+  int v_end = run_times != 1 ? 256 : (rng_.Rand16() % MAX_WIENER_BLOCK);
+  if (v_start > v_end) std::swap(v_start, v_end);
+  const int dgd_stride = h_end;
+  const int src_stride = MAX_DATA_BLOCK;
+  const int iters = run_times == 1 ? kIterations : 2;
+  for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
+      dgd_buf[i] = rng_.Rand16() % (1 << bit_depth);
+      src_buf[i] = rng_.Rand16() % (1 << bit_depth);
+    }
+    const uint8_t *dgd8 = CONVERT_TO_BYTEPTR(
+        dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin);
+    const uint8_t *src8 = CONVERT_TO_BYTEPTR(src_buf);
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      av1_compute_stats_highbd_c(wiener_win, dgd8, src8, h_start, h_end,
+                                 v_start, v_end, dgd_stride, src_stride, M_ref,
+                                 H_ref, bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      target_func_(wiener_win, dgd8, src8, h_start, h_end, v_start, v_end,
+                   dgd_stride, src_stride, M_test, H_test, bit_depth);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    if (run_times > 10) {
+      printf("win %d bd %d %3dx%-3d:%7.2f/%7.2fns", wiener_win, bit_depth,
+             h_end, v_end, time1, time2);
+      printf("(%3.2f)\n", time1 / time2);
+    }
+    int failed = 0;
+    for (int i = 0; i < wiener_win2; ++i) {
+      if (M_ref[i] != M_test[i]) {
+        failed = 1;
+        printf("win %d bd %d M iter %d [%4d] ref %6" PRId64 " test %6" PRId64
+               " \n",
+               wiener_win, bit_depth, iter, i, M_ref[i], M_test[i]);
+        break;
+      }
+    }
+    for (int i = 0; i < wiener_win2 * wiener_win2; ++i) {
+      if (H_ref[i] != H_test[i]) {
+        failed = 1;
+        printf("win %d bd %d H iter %d [%4d] ref %6" PRId64 " test %6" PRId64
+               " \n",
+               wiener_win, bit_depth, iter, i, H_ref[i], H_test[i]);
+        break;
+      }
+    }
+    ASSERT_EQ(failed, 0);
+  }
+}
+
+void WienerTestHighbd::RunWienerTest_ExtremeValues(const int32_t wiener_win,
+                                                   aom_bit_depth_t bit_depth) {
+  const int32_t wiener_halfwin = wiener_win >> 1;
+  const int32_t wiener_win2 = wiener_win * wiener_win;
+  DECLARE_ALIGNED(32, int64_t, M_ref[WIENER_WIN2]);
+  DECLARE_ALIGNED(32, int64_t, H_ref[WIENER_WIN2 * WIENER_WIN2]);
+  DECLARE_ALIGNED(32, int64_t, M_test[WIENER_WIN2]);
+  DECLARE_ALIGNED(32, int64_t, H_test[WIENER_WIN2 * WIENER_WIN2]);
+  const int h_start = 16;
+  const int h_end = MAX_WIENER_BLOCK;
+  const int v_start = 16;
+  const int v_end = MAX_WIENER_BLOCK;
+  const int dgd_stride = h_end;
+  const int src_stride = MAX_DATA_BLOCK;
+  const int iters = 1;
+  for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
+    // Fill with alternating extreme values to maximize difference with
+    // the average.
+    for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
+      dgd_buf[i] = i & 1 ? ((uint16_t)1 << bit_depth) - 1 : 0;
+      src_buf[i] = i & 1 ? ((uint16_t)1 << bit_depth) - 1 : 0;
+    }
+    const uint8_t *dgd8 = CONVERT_TO_BYTEPTR(
+        dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin);
+    const uint8_t *src8 = CONVERT_TO_BYTEPTR(src_buf);
+
+    av1_compute_stats_highbd_c(wiener_win, dgd8, src8, h_start, h_end, v_start,
+                               v_end, dgd_stride, src_stride, M_ref, H_ref,
+                               bit_depth);
+
+    target_func_(wiener_win, dgd8, src8, h_start, h_end, v_start, v_end,
+                 dgd_stride, src_stride, M_test, H_test, bit_depth);
+
+    int failed = 0;
+    for (int i = 0; i < wiener_win2; ++i) {
+      if (M_ref[i] != M_test[i]) {
+        failed = 1;
+        printf("win %d bd %d M iter %d [%4d] ref %6" PRId64 " test %6" PRId64
+               " \n",
+               wiener_win, bit_depth, iter, i, M_ref[i], M_test[i]);
+        break;
+      }
+    }
+    for (int i = 0; i < wiener_win2 * wiener_win2; ++i) {
+      if (H_ref[i] != H_test[i]) {
+        failed = 1;
+        printf("win %d bd %d H iter %d [%4d] ref %6" PRId64 " test %6" PRId64
+               " \n",
+               wiener_win, bit_depth, iter, i, H_ref[i], H_test[i]);
+        break;
+      }
+    }
+    ASSERT_EQ(failed, 0);
+  }
+}
+
+TEST_P(WienerTestHighbd, RandomValues) {
+  RunWienerTest(WIENER_WIN, 1, AOM_BITS_8);
+  RunWienerTest(WIENER_WIN_CHROMA, 1, AOM_BITS_8);
+  RunWienerTest(WIENER_WIN, 1, AOM_BITS_10);
+  RunWienerTest(WIENER_WIN_CHROMA, 1, AOM_BITS_10);
+  RunWienerTest(WIENER_WIN, 1, AOM_BITS_12);
+  RunWienerTest(WIENER_WIN_CHROMA, 1, AOM_BITS_12);
+}
+
+TEST_P(WienerTestHighbd, ExtremeValues) {
+  RunWienerTest_ExtremeValues(WIENER_WIN, AOM_BITS_8);
+  RunWienerTest_ExtremeValues(WIENER_WIN_CHROMA, AOM_BITS_8);
+  RunWienerTest_ExtremeValues(WIENER_WIN, AOM_BITS_10);
+  RunWienerTest_ExtremeValues(WIENER_WIN_CHROMA, AOM_BITS_10);
+  RunWienerTest_ExtremeValues(WIENER_WIN, AOM_BITS_12);
+  RunWienerTest_ExtremeValues(WIENER_WIN_CHROMA, AOM_BITS_12);
+}
+
+TEST_P(WienerTestHighbd, DISABLED_Speed) {
+  RunWienerTest(WIENER_WIN, 200, AOM_BITS_8);
+  RunWienerTest(WIENER_WIN_CHROMA, 200, AOM_BITS_8);
+  RunWienerTest(WIENER_WIN, 200, AOM_BITS_10);
+  RunWienerTest(WIENER_WIN_CHROMA, 200, AOM_BITS_10);
+  RunWienerTest(WIENER_WIN, 200, AOM_BITS_12);
+  RunWienerTest(WIENER_WIN_CHROMA, 200, AOM_BITS_12);
+}
+
+INSTANTIATE_TEST_SUITE_P(C, WienerTestHighbd,
+                         ::testing::Values(compute_stats_highbd_opt_c));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE4_1, WienerTestHighbd,
+                         ::testing::Values(av1_compute_stats_highbd_sse4_1));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(AVX2, WienerTestHighbd,
+                         ::testing::Values(av1_compute_stats_highbd_avx2));
+#endif  // HAVE_AVX2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, WienerTestHighbd,
+                         ::testing::Values(av1_compute_stats_highbd_neon));
+#endif  // HAVE_NEON
+
+// A test that reproduces b/274668506: signed integer overflow in
+// update_a_sep_sym().
+TEST(SearchWienerTest, 10bitSignedIntegerOverflowInUpdateASepSym) {
+  constexpr int kWidth = 427;
+  constexpr int kHeight = 1;
+  std::vector<uint16_t> buffer(3 * kWidth * kHeight);
+  // The values in the buffer alternate between 0 and 1023.
+  uint16_t value = 0;
+  for (size_t i = 0; i < buffer.size(); ++i) {
+    buffer[i] = value;
+    value = 1023 - value;
+  }
+  unsigned char *img_data = reinterpret_cast<unsigned char *>(buffer.data());
+
+  aom_image_t img;
+  EXPECT_EQ(
+      aom_img_wrap(&img, AOM_IMG_FMT_I44416, kWidth, kHeight, 1, img_data),
+      &img);
+  img.cp = AOM_CICP_CP_UNSPECIFIED;
+  img.tc = AOM_CICP_TC_UNSPECIFIED;
+  img.mc = AOM_CICP_MC_UNSPECIFIED;
+  img.range = AOM_CR_FULL_RANGE;
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA),
+            AOM_CODEC_OK);
+  cfg.rc_end_usage = AOM_Q;
+  cfg.g_profile = 1;
+  cfg.g_bit_depth = AOM_BITS_10;
+  cfg.g_input_bit_depth = 10;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  cfg.g_limit = 1;
+  cfg.g_lag_in_frames = 0;
+  cfg.kf_mode = AOM_KF_DISABLED;
+  cfg.kf_max_dist = 0;
+  cfg.g_threads = 61;
+  cfg.rc_min_quantizer = 2;
+  cfg.rc_max_quantizer = 20;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 11), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_ROW_MT, 1), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_ROWS, 4), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 3), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_SKIP_POSTPROC_FILTERING, 1),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM),
+            AOM_CODEC_OK);
+
+  // Encode frame
+  EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK);
+  aom_codec_iter_t iter = nullptr;
+  const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0x1f0011.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Flush encoder
+  EXPECT_EQ(aom_codec_encode(&enc, nullptr, 0, 1, 0), AOM_CODEC_OK);
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+}
+
+// A test that reproduces b/281219978: signed integer overflow in
+// update_b_sep_sym().
+TEST(SearchWienerTest, 12bitSignedIntegerOverflowInUpdateBSepSym) {
+  constexpr int kWidth = 311;
+  constexpr int kHeight = 3;
+  static const uint16_t buffer[3 * kWidth * kHeight] = {
+    // Y plane:
+    0, 0, 0, 2156, 2513, 2211, 4095, 4095, 0, 2538, 0, 0, 0, 0, 4095, 0, 258,
+    941, 4095, 907, 0, 0, 2325, 2485, 2408, 4095, 1513, 0, 3644, 2080, 4095,
+    4095, 0, 2135, 0, 2461, 4095, 0, 4095, 4095, 0, 1987, 0, 3629, 0, 4095,
+    3918, 4095, 0, 4095, 4095, 4095, 0, 1065, 0, 2072, 3597, 102, 0, 534, 0, 0,
+    0, 4095, 0, 0, 4095, 0, 4095, 0, 4095, 0, 3611, 0, 1139, 4095, 0, 0, 0, 0,
+    0, 4095, 0, 0, 0, 0, 4095, 4095, 4095, 0, 0, 0, 3070, 3224, 0, 0, 4095,
+    4051, 4095, 0, 4095, 3712, 0, 1465, 4095, 1699, 4095, 4095, 0, 0, 0, 3885,
+    0, 4095, 0, 0, 4095, 1686, 4095, 4095, 4095, 4095, 1330, 0, 0, 0, 4095, 0,
+    4095, 4095, 3919, 4095, 781, 2371, 2055, 4095, 912, 3710, 0, 2045, 0, 4095,
+    4095, 4095, 1811, 0, 1298, 1115, 0, 3327, 0, 0, 4095, 0, 253, 2386, 4095,
+    1791, 3657, 1444, 0, 4095, 1918, 4095, 4095, 0, 4095, 305, 1587, 0, 4095, 0,
+    3759, 0, 0, 4095, 2387, 4095, 4095, 0, 0, 4095, 4095, 0, 1015, 4095, 0, 768,
+    2598, 1667, 130, 4095, 0, 0, 435, 4095, 3683, 4095, 0, 4095, 4095, 1888,
+    2828, 4095, 3349, 0, 4095, 4095, 4095, 4095, 0, 4095, 0, 0, 4095, 0, 2491,
+    1598, 0, 0, 383, 3712, 4095, 0, 0, 4095, 760, 4095, 4095, 4095, 2030, 4095,
+    0, 0, 3236, 0, 1040, 0, 0, 4095, 0, 0, 4095, 4095, 4095, 0, 0, 1043, 3897,
+    2446, 233, 1589, 427, 4095, 4095, 4095, 4095, 0, 1656, 3786, 4095, 0, 840,
+    4095, 4095, 1429, 4095, 0, 4095, 2734, 4095, 0, 2431, 1801, 278, 0, 4095, 0,
+    4095, 0, 0, 420, 0, 0, 746, 0, 0, 3281, 3006, 4095, 4095, 0, 0, 0, 3605,
+    4095, 4095, 0, 4095, 4095, 4095, 4095, 2660, 496, 4095, 0, 0, 0, 0, 4095, 0,
+    1317, 4095, 4095, 510, 1919, 0, 3893, 0, 4095, 4095, 4095, 4095, 4095, 2071,
+    2006, 0, 3316, 4095, 0, 0, 4095, 852, 2982, 0, 2073, 0, 2728, 1499, 4095,
+    852, 361, 3137, 4095, 4095, 1502, 1575, 0, 4095, 0, 0, 0, 0, 1585, 4095, 0,
+    4095, 0, 3188, 3244, 4095, 2958, 4095, 4095, 0, 4095, 4095, 4095, 1706,
+    2896, 4095, 1788, 730, 1146, 4095, 0, 0, 4095, 0, 0, 0, 2791, 3613, 2175,
+    2925, 0, 0, 0, 0, 0, 1279, 4095, 4095, 0, 4095, 0, 0, 2336, 0, 3462, 4095,
+    0, 4095, 1997, 2328, 2860, 0, 4095, 4095, 3241, 4095, 4095, 4095, 4095,
+    4095, 4095, 118, 0, 4095, 4095, 4095, 0, 3734, 0, 0, 0, 4095, 1952, 4095,
+    413, 4095, 1183, 4095, 0, 4095, 0, 0, 4095, 4095, 4095, 3805, 0, 1398, 0,
+    4095, 0, 0, 0, 4095, 4095, 4095, 2802, 3658, 4095, 4095, 0, 0, 0, 4095, 0,
+    897, 0, 4095, 2163, 0, 0, 0, 4095, 1440, 2487, 4095, 4095, 0, 4095, 4095,
+    4095, 2808, 0, 1999, 0, 0, 4095, 4095, 4095, 1563, 124, 2179, 754, 0, 0,
+    2407, 2798, 0, 4095, 4095, 0, 0, 1929, 0, 0, 0, 1387, 4095, 4095, 0, 0,
+    3911, 562, 4095, 0, 4095, 2639, 2673, 4095, 4095, 0, 0, 4095, 4095, 0, 4095,
+    4095, 901, 0, 321, 3961, 4095, 0, 4095, 4095, 4095, 0, 0, 0, 0, 3035, 3713,
+    3441, 0, 4095, 0, 0, 854, 1544, 3963, 1968, 4095, 0, 0, 0, 0, 2897, 4095, 0,
+    4095, 4095, 0, 235, 1011, 4095, 0, 3452, 4095, 4095, 0, 0, 4095, 4095, 4095,
+    4095, 4095, 3312, 0, 3064, 4095, 3981, 4095, 4095, 4095, 4095, 4095, 0, 791,
+    3243, 4095, 799, 0, 0, 0, 523, 2117, 3776, 0, 4095, 3311, 0, 543, 4095,
+    4095, 4095, 0, 0, 4095, 4095, 4095, 4095, 0, 0, 4095, 4095, 225, 0, 1195,
+    3070, 1210, 4095, 0, 4095, 498, 782, 0, 0, 4095, 4095, 4095, 4095, 4095,
+    1456, 4095, 3898, 1472, 4095, 4095, 0, 4095, 4026, 0, 0, 2354, 1554, 0,
+    4095, 0, 2986, 0, 1053, 1228, 0, 0, 4095, 4095, 0, 0, 4095, 0, 0, 4095, 0,
+    0, 0, 606, 0, 4095, 3563, 4095, 2016, 4095, 0, 0, 4095, 0, 4095, 4095, 4095,
+    0, 0, 0, 929, 0, 0, 4095, 0, 3069, 4095, 0, 2687, 4095, 4095, 4095, 2015,
+    4095, 4095, 4095, 0, 4095, 0, 0, 2860, 3668, 0, 0, 4095, 2523, 2104, 0, 0,
+    3063, 4095, 3674, 4095, 0, 2762, 0, 4095, 2582, 3473, 930, 0, 1012, 108, 38,
+    4095, 1148, 3568, 4036, 4095, 4095, 0, 1120, 1873, 3028, 4095, 515, 1902,
+    4095, 0, 815, 4095, 1548, 0, 1073, 3919, 4095, 2374, 0, 3126, 4095, 2268, 0,
+    0, 0, 4095, 425, 4095, 0, 0, 4095, 4095, 2710, 4095, 2067, 4095, 4095, 2201,
+    4095, 4095, 0, 4095, 4095, 2933, 0, 417, 2801, 4095, 4095, 3274, 0, 2870,
+    4095, 4095, 0, 0, 973, 0, 0, 3129, 4095, 0, 0, 0, 4095, 4095, 4095, 0, 242,
+    4095, 0, 4095, 0, 0, 0, 0, 987, 0, 2426, 4045, 2780, 0, 4095, 3762, 3361,
+    3095, 4095, 596, 1072, 4071, 4095, 4095, 0, 0, 81, 0, 1001, 1683, 4095,
+    4095, 3105, 2673, 0, 3300, 104, 4030, 0, 2615, 4095, 4095, 0, 4095, 1830,
+    3917, 4095, 4095, 4095, 0, 4095, 3637, 0, 4095, 4095, 3677, 4095, 4095, 0,
+    880, 4095, 4095, 0, 2797, 0, 0, 0, 0, 3225, 4095, 4095, 1925, 2885, 1879, 0,
+    0, 4095, 0, 0, 0, 2974, 559, 0, 0, 0, 699, 997, 1491, 423, 4012, 0, 2315,
+    4095, 0, 0, 4095, 0, 836, 4095, 0, 4095, 0, 1752, 0, 0, 0, 4095, 4095, 0, 0,
+    51, 4095, 350, 0, 2143, 2588, 0, 4095, 0, 4095, 0, 2757, 2370, 4095, 668,
+    4095, 0, 4095, 0, 3652, 3890, 0, 4095, 0, 4095, 4095, 4095, 4095, 4095,
+    // U plane:
+    4095, 4095, 1465, 0, 588, 4095, 0, 4095, 4095, 4095, 0, 2167, 4095, 4095,
+    918, 3223, 4095, 4095, 0, 696, 4095, 4095, 0, 0, 594, 4095, 2935, 0, 0, 0,
+    2036, 4095, 0, 2492, 4095, 4095, 0, 0, 0, 3883, 0, 4095, 483, 4095, 4095,
+    324, 923, 0, 3079, 0, 4095, 4095, 810, 0, 3371, 4095, 4095, 0, 4095, 2756,
+    0, 723, 0, 3338, 1084, 0, 4095, 4095, 3764, 0, 4095, 4095, 4095, 2323, 0,
+    3693, 682, 0, 0, 909, 4095, 2348, 4095, 4095, 4095, 1509, 4095, 0, 4095,
+    4095, 4095, 4095, 3977, 3652, 1580, 637, 4095, 0, 593, 4095, 1199, 1773,
+    4095, 4095, 4095, 0, 3447, 0, 0, 4095, 3873, 0, 0, 2094, 0, 1195, 0, 3892,
+    4095, 4095, 729, 4095, 0, 0, 4095, 449, 4095, 4095, 2900, 0, 4095, 0, 2114,
+    4095, 4095, 4095, 1174, 995, 2933, 360, 0, 1970, 0, 4095, 1208, 0, 4095, 0,
+    4095, 0, 4095, 4095, 0, 4095, 0, 0, 0, 1976, 0, 0, 921, 4095, 4095, 192,
+    1006, 0, 0, 2725, 4095, 0, 2813, 0, 0, 2375, 4095, 1982, 0, 2725, 4095,
+    1225, 3566, 4095, 0, 344, 863, 2747, 0, 4095, 4095, 1928, 4095, 4095, 0,
+    3640, 0, 1744, 3191, 4095, 4095, 0, 4095, 4095, 4095, 0, 0, 748, 4095, 0,
+    2609, 0, 0, 0, 0, 0, 3508, 4095, 4095, 2463, 0, 4095, 0, 4095, 4095, 4095,
+    3175, 419, 2193, 0, 0, 4095, 0, 0, 4095, 4051, 2159, 4095, 4095, 2262, 379,
+    4095, 0, 0, 3399, 4095, 4095, 4095, 3769, 2510, 4054, 3336, 730, 3968, 0, 0,
+    3354, 0, 1822, 0, 4095, 0, 3847, 3823, 3262, 0, 0, 2936, 0, 4095, 4095,
+    2120, 0, 3147, 0, 2838, 3480, 474, 1194, 4095, 4095, 2820, 4095, 0, 4095,
+    1882, 4095, 1085, 0, 4095, 2234, 3371, 4095, 0, 4095, 0, 0, 0, 2586, 4095,
+    4095, 4095, 4095, 0, 3818, 1401, 2273, 4095, 0, 4095, 0, 3907, 4095, 4095,
+    694, 0, 4066, 4095, 0, 0, 4095, 2116, 4095, 4095, 4095, 4095, 4095, 0, 2821,
+    29, 0, 0, 663, 1711, 652, 1271, 4095, 4095, 2401, 3726, 4095, 3453, 1803,
+    3614, 0, 4095, 3439, 4095, 0, 4095, 0, 816, 0, 0, 4095, 4095, 2635, 0, 1918,
+    0, 2663, 381, 0, 0, 3670, 0, 4095, 3065, 965, 4095, 4095, 4095, 2993, 4095,
+    4095, 0, 4095, 973, 4095, 0, 4095, 4095, 0, 3071, 0, 2777, 4095, 4095, 0,
+    3996, 4095, 1637, 0, 4095, 67, 3784, 0, 0, 4095, 2603, 579, 4095, 4095,
+    2854, 4095, 3016, 0, 4095, 0, 0, 4095, 4095, 4095, 4095, 3998, 3023, 4095,
+    4095, 0, 0, 0, 4095, 4095, 4095, 4095, 0, 0, 2623, 1308, 55, 4095, 0, 0,
+    2554, 2311, 0, 4095, 4095, 4095, 1134, 2112, 0, 4095, 4095, 0, 4095, 0, 645,
+    0, 0, 4095, 0, 909, 0, 0, 1719, 4095, 0, 3542, 0, 575, 0, 4095, 4095, 4095,
+    3428, 1172, 481, 1521, 4095, 3199, 1265, 4095, 3518, 4017, 4095, 760, 2042,
+    3986, 0, 4095, 42, 4095, 0, 4095, 4095, 4095, 4095, 2235, 346, 3865, 0,
+    4095, 4095, 4095, 4095, 4095, 4095, 845, 4095, 0, 2826, 4095, 4095, 0, 0,
+    335, 1614, 1465, 0, 4095, 4095, 0, 2771, 4095, 0, 2810, 4095, 4095, 0, 1254,
+    4095, 2589, 4095, 4095, 2252, 0, 0, 0, 4095, 0, 73, 4095, 4095, 0, 1341, 0,
+    0, 0, 0, 4095, 0, 0, 2645, 1985, 492, 914, 3996, 4095, 4095, 4095, 0, 2383,
+    2556, 433, 0, 4095, 1094, 4095, 4095, 642, 4095, 1722, 0, 3460, 4095, 4095,
+    4095, 4095, 4095, 0, 154, 4095, 92, 4095, 0, 0, 0, 4095, 0, 4095, 4095, 444,
+    0, 2925, 0, 0, 0, 0, 1628, 0, 4095, 1731, 2418, 697, 4095, 0, 2513, 4095, 0,
+    4095, 4095, 4095, 4095, 4095, 0, 2510, 4095, 3850, 0, 0, 4095, 2480, 4095,
+    4095, 2661, 4095, 0, 4095, 0, 0, 4095, 4095, 847, 4095, 4095, 3257, 443, 0,
+    67, 0, 0, 0, 4095, 0, 0, 3073, 4095, 0, 4095, 0, 4095, 0, 4095, 1224, 4095,
+    4095, 4095, 0, 4095, 958, 0, 4095, 0, 2327, 684, 0, 0, 0, 0, 4095, 4095, 0,
+    3693, 795, 4095, 0, 621, 1592, 2314, 4095, 0, 928, 1897, 4095, 4095, 0,
+    4095, 0, 0, 4095, 2619, 4095, 0, 4095, 0, 0, 4095, 2485, 4095, 4095, 0, 435,
+    4095, 1818, 4095, 4095, 0, 0, 0, 4095, 4095, 4095, 4095, 0, 1671, 4095,
+    4095, 0, 2617, 0, 2572, 0, 0, 4095, 3471, 0, 0, 4095, 2719, 3979, 1307, 0,
+    0, 0, 0, 1794, 642, 447, 913, 4095, 3927, 0, 2686, 0, 0, 4095, 0, 857, 0,
+    4095, 4095, 567, 2385, 0, 0, 4095, 893, 0, 289, 0, 0, 0, 4095, 4095, 2566,
+    0, 1913, 0, 2350, 1033, 2764, 0, 4095, 0, 4095, 0, 0, 0, 0, 4095, 3952,
+    3969, 0, 3476, 0, 4095, 4095, 393, 0, 2613, 0, 0, 1422, 0, 3359, 491, 3263,
+    4095, 4095, 0, 0, 4095, 697, 3601, 4095, 0, 4095, 4095, 0, 4095, 0, 0, 4095,
+    0, 4095, 4095, 4095, 2506, 0, 0, 1403, 0, 3836, 3976, 0, 4095, 4095, 4095,
+    2497, 4095, 4095, 4095, 4095, 0, 4095, 3317, 4095, 4095, 4095, 0, 0, 1131,
+    0, 0, 0, 4095, 0, 0, 4095, 0, 0, 2988, 4095, 4095, 2711, 2487, 1335, 0, 0,
+    0, 4095, 261, 4095, 86, 0, 0, 1138, 4095, 0, 0, 4095, 4095, 0, 0, 0, 334, 0,
+    2395, 3297, 4095, 1698, 4095, 1791, 1341, 0, 3559, 0, 4095, 0, 2056, 3238,
+    3310, 4095, 4095, 779, 2129, 2849, 4095, 2622, 1051, 0, 0, 1282, 4095, 1246,
+    0, 0, 3696, 4095, 556, 0, 0, 3463, 2658, 3572, 4095, 3982, 4095, 4095, 0, 0,
+    4053, 4095, 4095, 4095, 2162, 2567, 1621, 4095, 4095, 1522, 293, 4095, 0, 0,
+    1976, 4095, 3089, 4095, 0, 0, 0, 0, 3650,
+    // V plane:
+    0, 1892, 4095, 1995, 0, 0, 0, 2208, 1152, 1794, 4095, 4095, 89, 3333, 4095,
+    2478, 4095, 2505, 4095, 0, 2664, 4095, 1984, 0, 1144, 4095, 0, 4095, 0,
+    4095, 0, 0, 0, 2404, 1727, 4095, 4095, 0, 1326, 2033, 0, 4095, 0, 4095,
+    3022, 0, 4095, 0, 1980, 4095, 0, 2284, 4095, 0, 3422, 0, 4095, 2171, 3155,
+    4095, 0, 4095, 0, 636, 0, 0, 4095, 3264, 3862, 0, 2164, 0, 0, 3879, 3886, 0,
+    225, 0, 0, 4095, 0, 1956, 523, 464, 738, 0, 1545, 0, 2829, 4095, 4095, 4095,
+    799, 4095, 358, 4095, 0, 0, 953, 0, 0, 2081, 4095, 1604, 4095, 2086, 0, 954,
+    0, 0, 2393, 2413, 4095, 4095, 0, 3583, 4095, 4095, 2995, 4095, 0, 4095,
+    4095, 3501, 4095, 247, 4095, 0, 0, 0, 4095, 1303, 3382, 1059, 4095, 0, 543,
+    1276, 1801, 0, 0, 0, 2928, 0, 4095, 3931, 70, 0, 0, 3992, 4095, 1278, 1930,
+    4095, 0, 4095, 4095, 3894, 0, 0, 0, 0, 4095, 0, 0, 0, 0, 0, 0, 4095, 4095,
+    4095, 1098, 4095, 2059, 0, 380, 3166, 0, 4095, 2215, 0, 0, 2846, 0, 0, 2614,
+    528, 4095, 0, 4095, 2371, 0, 4095, 0, 0, 0, 0, 4095, 3133, 4095, 4095, 0,
+    4095, 1283, 3821, 1772, 0, 0, 4095, 4095, 4095, 890, 3475, 4095, 4095, 133,
+    3292, 1819, 4095, 4095, 4095, 0, 0, 4095, 702, 4095, 0, 0, 0, 4095, 0, 2137,
+    4095, 4095, 4095, 0, 0, 0, 4095, 4095, 1555, 2435, 2778, 4095, 0, 4095,
+    3825, 0, 3736, 3054, 0, 0, 4095, 4095, 4095, 0, 0, 0, 0, 371, 4095, 4095, 0,
+    0, 1565, 4095, 2731, 4095, 0, 756, 925, 0, 0, 0, 4095, 775, 1379, 4095,
+    1439, 0, 0, 0, 2680, 0, 0, 4095, 1280, 4095, 0, 0, 4095, 4095, 0, 3088, 0,
+    4095, 4095, 4095, 0, 0, 1526, 4095, 2314, 4095, 4095, 0, 4095, 288, 0, 205,
+    4095, 4095, 4095, 0, 1247, 2014, 0, 1530, 1985, 0, 0, 4095, 3195, 0, 4095,
+    4, 2397, 4095, 4095, 4095, 0, 4095, 4095, 4095, 0, 0, 0, 0, 0, 4031, 928,
+    4095, 0, 0, 4095, 4095, 4095, 1966, 4095, 2299, 1215, 4095, 0, 4095, 1335,
+    0, 4095, 1991, 4095, 0, 4095, 114, 0, 0, 0, 2123, 2639, 4095, 3323, 4095,
+    4095, 418, 209, 0, 0, 4095, 4095, 4095, 4095, 963, 0, 0, 0, 4095, 2505, 0,
+    3627, 0, 311, 3748, 2047, 4095, 2791, 0, 3643, 1852, 0, 0, 4095, 0, 2179, 0,
+    4095, 2678, 0, 0, 0, 2342, 4095, 4095, 0, 0, 4095, 0, 0, 0, 0, 1076, 0, 0,
+    4095, 0, 2370, 0, 3530, 0, 0, 0, 0, 0, 4095, 0, 0, 0, 3474, 1201, 0, 379,
+    699, 4095, 777, 4095, 0, 4095, 4095, 0, 1213, 1762, 4095, 4095, 4095, 0,
+    4095, 1090, 1233, 0, 4095, 0, 4095, 0, 0, 0, 2845, 3385, 2718, 0, 0, 2975,
+    3630, 0, 4095, 4095, 4095, 4095, 3261, 243, 0, 4095, 0, 0, 3836, 4095, 4095,
+    4095, 963, 0, 0, 2526, 0, 4095, 4000, 4095, 2069, 0, 0, 4095, 0, 4095, 1421,
+    0, 4095, 0, 4095, 4095, 0, 4095, 0, 4095, 4095, 1537, 4095, 3201, 0, 0,
+    4095, 2719, 4095, 0, 4095, 4095, 4095, 0, 4095, 0, 4095, 2300, 0, 2876, 0,
+    4095, 4095, 4095, 3235, 497, 635, 0, 1480, 4095, 0, 3067, 3979, 3741, 0,
+    3059, 1214, 4095, 4095, 2197, 0, 4095, 4095, 2734, 0, 4095, 4095, 3364,
+    2369, 4095, 303, 4095, 0, 4095, 4095, 3472, 1733, 4095, 4095, 4095, 0, 55,
+    0, 10, 1378, 1169, 4095, 0, 0, 688, 3613, 0, 4095, 2832, 867, 4095, 4095,
+    3514, 4095, 0, 4095, 4095, 2458, 3506, 0, 1920, 0, 1762, 1178, 2549, 4095,
+    3967, 4095, 0, 2975, 1282, 0, 377, 846, 3434, 97, 0, 0, 1616, 3526, 136,
+    1888, 0, 147, 334, 4095, 0, 4095, 0, 4095, 1106, 4095, 0, 4095, 3280, 4095,
+    4095, 0, 2849, 3528, 0, 4095, 4095, 0, 2306, 0, 3412, 0, 4095, 4095, 4095,
+    4048, 2273, 0, 4095, 4095, 4095, 0, 4095, 3031, 4095, 4095, 4095, 0, 3382,
+    3812, 2315, 4095, 0, 0, 0, 432, 4095, 3606, 0, 4, 2847, 4095, 0, 4095, 0, 0,
+    2616, 4095, 4095, 0, 4095, 0, 3394, 4095, 3976, 3119, 0, 0, 0, 0, 4046,
+    4095, 4095, 3331, 4095, 2127, 0, 4095, 0, 0, 0, 4095, 4095, 4095, 0, 4095,
+    4095, 4095, 0, 2068, 0, 0, 3882, 2967, 0, 1745, 4095, 2112, 478, 0, 4095, 0,
+    199, 4095, 4095, 3542, 4095, 2634, 4095, 4095, 1235, 4095, 4095, 167, 1553,
+    0, 4095, 2649, 0, 3383, 0, 4095, 2803, 4095, 0, 4095, 0, 785, 4095, 0, 4095,
+    1743, 4095, 0, 3945, 0, 4095, 1894, 4095, 3973, 4095, 0, 0, 4095, 0, 0,
+    4095, 318, 4095, 4095, 4095, 0, 261, 4095, 4095, 2125, 2690, 4095, 0, 4095,
+    3863, 1740, 4095, 0, 2899, 1509, 0, 0, 0, 2780, 4095, 1897, 2104, 4095,
+    1708, 284, 4095, 0, 4095, 3382, 4095, 4095, 483, 0, 0, 0, 3099, 0, 4095, 0,
+    926, 4095, 2062, 1931, 2121, 0, 4095, 0, 2485, 1535, 4095, 4095, 3662, 4095,
+    2419, 2487, 0, 4095, 4095, 4095, 0, 0, 4095, 0, 0, 2029, 0, 3008, 2338, 0,
+    4095, 0, 3854, 0, 4095, 0, 0, 1315, 0, 0, 0, 0, 3492, 0, 1445, 0, 11, 4095,
+    0, 0, 873, 0, 4095, 0, 4095, 2654, 3040, 0, 0, 0, 4095, 0, 68, 4095, 0, 0,
+    990, 0, 828, 1015, 88, 3606, 0, 2875, 4095, 0, 3117, 411, 0, 0, 2859, 0, 0,
+    4095, 3480, 25, 4095, 4095, 4095, 0, 0, 0, 4095, 4095, 4095, 4095, 1724, 0,
+    0, 0, 3635, 1063, 3728, 4095, 4095, 2025, 3715, 0, 0, 0, 3722, 0, 1648, 0,
+    4095, 3579, 0, 0, 0, 4095, 4095, 0, 4095
+  };
+  unsigned char *img_data =
+      reinterpret_cast<unsigned char *>(const_cast<uint16_t *>(buffer));
+
+  aom_image_t img;
+  EXPECT_EQ(
+      aom_img_wrap(&img, AOM_IMG_FMT_I44416, kWidth, kHeight, 1, img_data),
+      &img);
+  img.cp = AOM_CICP_CP_UNSPECIFIED;
+  img.tc = AOM_CICP_TC_UNSPECIFIED;
+  img.mc = AOM_CICP_MC_UNSPECIFIED;
+  img.range = AOM_CR_FULL_RANGE;
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA),
+            AOM_CODEC_OK);
+  cfg.rc_end_usage = AOM_Q;
+  cfg.g_profile = 2;
+  cfg.g_bit_depth = AOM_BITS_12;
+  cfg.g_input_bit_depth = 12;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  cfg.g_limit = 1;
+  cfg.g_lag_in_frames = 0;
+  cfg.kf_mode = AOM_KF_DISABLED;
+  cfg.kf_max_dist = 0;
+  cfg.g_threads = 34;
+  cfg.rc_min_quantizer = 8;
+  cfg.rc_max_quantizer = 20;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 14), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_ROW_MT, 1), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_ROWS, 4), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_COLUMNS, 4), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 0), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_SKIP_POSTPROC_FILTERING, 1),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM),
+            AOM_CODEC_OK);
+
+  // Encode frame
+  EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK);
+  aom_codec_iter_t iter = nullptr;
+  const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0x1f0011.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Flush encoder
+  EXPECT_EQ(aom_codec_encode(&enc, nullptr, 0, 1, 0), AOM_CODEC_OK);
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+}
+
+// A test that reproduces b/272139363: signed integer overflow in
+// update_b_sep_sym().
+TEST(SearchWienerTest, 10bitSignedIntegerOverflowInUpdateBSepSym) {
+  constexpr int kWidth = 34;
+  constexpr int kHeight = 3;
+  static const uint16_t buffer[3 * kWidth * kHeight] = {
+    // Y plane:
+    61, 765, 674, 188, 367, 944, 153, 275, 906, 433, 154, 51, 8, 855, 186, 154,
+    392, 0, 634, 3, 690, 1023, 1023, 1023, 1023, 1023, 1023, 8, 1, 64, 426, 0,
+    100, 344, 944, 816, 816, 33, 1023, 1023, 1023, 1023, 295, 1023, 1023, 1023,
+    1023, 1023, 1023, 1015, 1023, 231, 1020, 254, 439, 439, 894, 439, 150, 1019,
+    1023, 1023, 1023, 1023, 1023, 1023, 1023, 1023, 1023, 1023, 385, 320, 575,
+    682, 1023, 1023, 1023, 1023, 1023, 1023, 1023, 1023, 511, 699, 987, 3, 140,
+    661, 120, 33, 143, 0, 0, 0, 3, 40, 625, 585, 16, 579, 160, 867,
+    // U plane:
+    739, 646, 13, 603, 7, 328, 91, 32, 488, 870, 330, 330, 330, 330, 330, 330,
+    109, 330, 330, 330, 3, 545, 945, 249, 35, 561, 801, 32, 931, 639, 801, 91,
+    1023, 827, 844, 948, 631, 894, 854, 601, 432, 504, 85, 1, 0, 0, 89, 89, 0,
+    0, 0, 0, 0, 0, 432, 801, 382, 4, 0, 0, 2, 89, 89, 89, 89, 89, 89, 384, 0, 0,
+    0, 0, 0, 0, 0, 1023, 1019, 1, 3, 691, 575, 691, 691, 691, 691, 691, 691,
+    691, 691, 691, 691, 691, 84, 527, 4, 485, 8, 682, 698, 340, 1015, 706,
+    // V plane:
+    49, 10, 28, 1023, 1023, 1023, 0, 32, 32, 872, 114, 1003, 1023, 57, 477, 999,
+    1023, 309, 309, 309, 309, 309, 309, 309, 309, 309, 309, 309, 309, 309, 309,
+    9, 418, 418, 418, 418, 418, 418, 0, 0, 0, 1023, 4, 5, 0, 0, 1023, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 64, 0, 155, 709, 3, 331, 807, 633, 1023,
+    1018, 646, 886, 991, 692, 915, 294, 0, 35, 2, 0, 471, 643, 770, 346, 176,
+    32, 329, 322, 302, 61, 765, 674, 188, 367, 944, 153, 275, 906, 433, 154
+  };
+  unsigned char *img_data =
+      reinterpret_cast<unsigned char *>(const_cast<uint16_t *>(buffer));
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I44416, kWidth, kHeight, 1,
+                               img_data));
+  img.cp = AOM_CICP_CP_UNSPECIFIED;
+  img.tc = AOM_CICP_TC_UNSPECIFIED;
+  img.mc = AOM_CICP_MC_UNSPECIFIED;
+  img.range = AOM_CR_FULL_RANGE;
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA));
+  cfg.rc_end_usage = AOM_Q;
+  cfg.g_profile = 1;
+  cfg.g_bit_depth = AOM_BITS_10;
+  cfg.g_input_bit_depth = 10;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  cfg.g_limit = 1;
+  cfg.g_lag_in_frames = 0;
+  cfg.kf_mode = AOM_KF_DISABLED;
+  cfg.kf_max_dist = 0;
+  cfg.rc_min_quantizer = 3;
+  cfg.rc_max_quantizer = 54;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 28));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AV1E_SET_TILE_COLUMNS, 3));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 0));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AV1E_SET_SKIP_POSTPROC_FILTERING, 1));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM));
+
+  // Encode frame
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0));
+  aom_codec_iter_t iter = nullptr;
+  const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0x1f0011.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Flush encoder
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0));
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+// A test that reproduces b/277121724: signed integer overflow in
+// update_b_sep_sym().
+TEST(SearchWienerTest, 8bitSignedIntegerOverflowInUpdateBSepSym) {
+  constexpr int kWidth = 198;
+  constexpr int kHeight = 3;
+  // 8-bit YUV 4:2:2
+  static const unsigned char buffer[2 * kWidth * kHeight] = {
+    // Y plane:
+    35, 225, 56, 91, 8, 142, 137, 143, 224, 49, 217, 57, 202, 163, 159, 246,
+    232, 134, 135, 14, 76, 101, 239, 88, 186, 159, 118, 23, 114, 20, 108, 41,
+    72, 17, 58, 242, 45, 146, 230, 14, 135, 140, 34, 61, 189, 181, 222, 71, 98,
+    221, 5, 199, 244, 85, 229, 163, 105, 87, 144, 105, 64, 150, 36, 233, 235, 1,
+    179, 190, 50, 222, 176, 109, 166, 18, 80, 129, 45, 9, 218, 144, 234, 10,
+    148, 117, 37, 10, 232, 139, 206, 92, 208, 247, 128, 79, 202, 79, 212, 89,
+    185, 152, 206, 182, 83, 105, 21, 86, 150, 84, 21, 165, 34, 251, 174, 240,
+    172, 155, 254, 85, 98, 25, 96, 78, 230, 253, 36, 19, 247, 155, 112, 216,
+    166, 114, 229, 118, 197, 149, 186, 194, 128, 45, 219, 26, 36, 77, 110, 45,
+    252, 238, 183, 161, 171, 96, 232, 108, 73, 61, 243, 58, 155, 38, 91, 209,
+    187, 206, 16, 165, 236, 145, 69, 126, 102, 10, 4, 43, 191, 106, 193, 240,
+    132, 226, 38, 78, 7, 152, 101, 255, 254, 39, 33, 86, 35, 247, 199, 179, 239,
+    198, 165, 58, 190, 171, 226, 94, 158, 21, 190, 151, 75, 176, 11, 53, 199,
+    87, 91, 1, 226, 20, 117, 96, 75, 192, 101, 200, 125, 106, 233, 176, 63, 204,
+    114, 16, 31, 222, 15, 14, 71, 2, 25, 47, 100, 174, 26, 209, 138, 138, 211,
+    147, 164, 204, 9, 104, 135, 250, 9, 201, 88, 218, 71, 251, 61, 199, 0, 34,
+    59, 115, 228, 161, 100, 132, 50, 4, 117, 100, 191, 126, 53, 28, 193, 42,
+    155, 206, 79, 80, 117, 11, 3, 253, 181, 181, 138, 239, 107, 142, 216, 57,
+    202, 126, 229, 250, 60, 62, 150, 128, 95, 32, 251, 207, 236, 208, 247, 183,
+    59, 19, 117, 40, 106, 87, 140, 57, 109, 190, 51, 105, 226, 116, 156, 3, 35,
+    86, 255, 138, 52, 211, 245, 76, 83, 109, 113, 77, 106, 77, 18, 56, 235, 158,
+    24, 53, 151, 104, 152, 21, 15, 46, 163, 144, 217, 168, 154, 44, 80, 25, 11,
+    37, 100, 235, 145, 154, 113, 0, 140, 153, 80, 64, 19, 121, 185, 144, 43,
+    206, 16, 16, 72, 189, 175, 231, 177, 40, 177, 206, 116, 4, 82, 43, 244, 237,
+    22, 252, 71, 194, 106, 4, 112, 0, 108, 137, 126, 80, 122, 142, 43, 205, 22,
+    209, 217, 165, 32, 208, 100, 70, 3, 120, 159, 203, 7, 233, 152, 37, 96, 212,
+    177, 1, 133, 218, 161, 172, 202, 192, 186, 114, 150, 121, 177, 227, 175, 64,
+    127, 153, 113, 91, 198, 0, 111, 227, 226, 218, 71, 62, 5, 43, 128, 27, 3,
+    82, 5, 10, 68, 153, 215, 181, 138, 246, 224, 170, 1, 241, 191, 181, 151,
+    167, 14, 80, 45, 4, 252, 29, 66, 125, 58, 225, 253, 255, 248, 224, 40, 24,
+    236, 46, 11, 219, 154, 134, 12, 76, 72, 97, 239, 50, 39, 85, 182, 55, 219,
+    19, 109, 81, 119, 125, 206, 159, 239, 67, 193, 180, 132, 80, 127, 2, 169,
+    99, 53, 47, 5, 100, 174, 151, 124, 246, 202, 93, 82, 65, 53, 214, 238, 32,
+    218, 15, 254, 153, 95, 79, 189, 67, 233, 47, 83, 48, 125, 144, 206, 82, 69,
+    186, 112, 134, 244, 96, 21, 143, 187, 248, 8, 224, 161, 227, 185, 236, 6,
+    175, 237, 169, 154, 89, 143, 106, 205, 26, 47, 155, 42, 28, 162, 7, 8, 45,
+    // U plane:
+    55, 165, 203, 139, 152, 208, 36, 177, 61, 49, 129, 211, 140, 71, 253, 250,
+    120, 167, 238, 67, 255, 223, 104, 32, 240, 179, 28, 41, 86, 84, 61, 243,
+    169, 212, 201, 0, 9, 236, 89, 194, 204, 75, 228, 250, 27, 81, 137, 29, 255,
+    131, 194, 241, 76, 133, 186, 135, 212, 197, 150, 145, 203, 96, 86, 231, 91,
+    119, 197, 67, 226, 2, 118, 66, 181, 86, 219, 86, 132, 137, 156, 161, 221,
+    18, 55, 170, 35, 206, 201, 193, 38, 63, 229, 29, 110, 96, 14, 135, 229, 99,
+    106, 108, 167, 110, 50, 32, 144, 113, 48, 29, 57, 29, 20, 199, 145, 245, 9,
+    183, 88, 174, 114, 237, 29, 40, 99, 117, 233, 6, 51, 227, 2, 28, 76, 149,
+    190, 23, 240, 73, 113, 10, 73, 240, 105, 220, 129, 26, 144, 214, 34, 4, 24,
+    219, 24, 156, 198, 214, 244, 143, 106, 255, 204, 93, 2, 88, 107, 211, 241,
+    242, 86, 189, 219, 164, 132, 149, 32, 228, 219, 60, 202, 218, 189, 34, 250,
+    160, 158, 36, 212, 212, 41, 233, 61, 92, 121, 170, 220, 192, 232, 255, 124,
+    249, 231, 55, 196, 219, 196, 62, 238, 187, 76, 33, 138, 67, 82, 159, 169,
+    196, 66, 196, 110, 194, 64, 35, 205, 64, 218, 12, 41, 188, 195, 244, 178,
+    17, 80, 8, 149, 39, 110, 146, 164, 162, 215, 227, 107, 103, 47, 52, 95, 3,
+    181, 90, 255, 80, 83, 206, 66, 153, 112, 72, 109, 235, 69, 105, 57, 75, 145,
+    186, 16, 87, 73, 61, 98, 197, 237, 17, 32, 207, 220, 246, 188, 46, 73, 121,
+    84, 252, 164, 111, 21, 98, 13, 170, 174, 170, 231, 77, 10, 113, 9, 217, 11,
+    // V plane:
+    124, 94, 69, 212, 107, 223, 228, 96, 56, 2, 158, 49, 251, 217, 143, 107,
+    113, 17, 84, 169, 208, 43, 28, 37, 176, 54, 235, 150, 135, 135, 221, 94, 50,
+    131, 251, 78, 38, 254, 129, 200, 207, 55, 111, 110, 144, 109, 228, 65, 70,
+    39, 170, 5, 208, 151, 87, 86, 255, 74, 155, 153, 250, 15, 35, 33, 201, 226,
+    117, 119, 220, 238, 133, 229, 69, 122, 160, 114, 245, 182, 13, 65, 2, 228,
+    205, 174, 128, 248, 4, 139, 178, 227, 204, 243, 249, 253, 119, 253, 107,
+    234, 39, 15, 173, 47, 93, 12, 222, 238, 30, 121, 124, 167, 27, 40, 215, 84,
+    172, 130, 66, 43, 165, 55, 225, 79, 84, 153, 59, 110, 64, 176, 54, 123, 82,
+    128, 189, 150, 52, 202, 102, 133, 199, 197, 253, 180, 221, 127, 144, 124,
+    255, 224, 52, 149, 88, 166, 39, 38, 78, 114, 44, 242, 233, 40, 132, 142,
+    152, 213, 112, 244, 221, 7, 52, 206, 246, 51, 182, 160, 247, 154, 183, 209,
+    81, 70, 56, 186, 63, 182, 2, 82, 202, 178, 233, 52, 198, 241, 175, 38, 165,
+    9, 231, 150, 114, 43, 159, 200, 42, 173, 217, 25, 233, 214, 210, 50, 43,
+    159, 231, 102, 241, 246, 77, 76, 115, 77, 81, 114, 194, 182, 236, 0, 236,
+    198, 197, 180, 176, 148, 48, 177, 106, 180, 150, 158, 237, 130, 242, 109,
+    174, 247, 57, 230, 184, 64, 245, 251, 123, 169, 122, 156, 125, 123, 104,
+    238, 1, 235, 187, 53, 67, 38, 50, 139, 123, 149, 111, 72, 80, 17, 175, 186,
+    98, 153, 247, 97, 218, 141, 38, 0, 171, 254, 180, 81, 233, 71, 156, 48, 14,
+    62, 210, 161, 124, 203, 92
+  };
+  unsigned char *img_data = const_cast<unsigned char *>(buffer);
+
+  aom_image_t img;
+  EXPECT_EQ(aom_img_wrap(&img, AOM_IMG_FMT_I422, kWidth, kHeight, 1, img_data),
+            &img);
+  img.cp = AOM_CICP_CP_UNSPECIFIED;
+  img.tc = AOM_CICP_TC_UNSPECIFIED;
+  img.mc = AOM_CICP_MC_UNSPECIFIED;
+  img.range = AOM_CR_FULL_RANGE;
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA),
+            AOM_CODEC_OK);
+  cfg.rc_end_usage = AOM_Q;
+  cfg.g_profile = 2;
+  cfg.g_bit_depth = AOM_BITS_8;
+  cfg.g_input_bit_depth = 8;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  cfg.g_limit = 1;
+  cfg.g_lag_in_frames = 0;
+  cfg.kf_mode = AOM_KF_DISABLED;
+  cfg.kf_max_dist = 0;
+  cfg.g_threads = 43;
+  cfg.rc_min_quantizer = 30;
+  cfg.rc_max_quantizer = 50;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 40), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_ROW_MT, 1), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_ROWS, 4), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_COLUMNS, 1), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 2), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_SKIP_POSTPROC_FILTERING, 1),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM),
+            AOM_CODEC_OK);
+
+  // Encode frame
+  EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK);
+  aom_codec_iter_t iter = nullptr;
+  const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0x1f0011.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Flush encoder
+  EXPECT_EQ(aom_codec_encode(&enc, nullptr, 0, 1, 0), AOM_CODEC_OK);
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+}
+
+// A test that reproduces b/259173819: signed integer overflow in
+// linsolve_wiener().
+TEST(SearchWienerTest, 10bitSignedIntegerOverflowInLinsolveWiener) {
+  constexpr int kWidth = 3;
+  constexpr int kHeight = 3;
+  static const uint16_t buffer[3 * kWidth * kHeight] = {
+    // Y plane:
+    81, 81, 1023, 1020, 81, 1023, 81, 128, 0,
+    // U plane:
+    273, 273, 273, 273, 273, 273, 273, 273, 273,
+    // V plane:
+    273, 273, 273, 273, 273, 273, 516, 81, 81
+  };
+  unsigned char *img_data =
+      reinterpret_cast<unsigned char *>(const_cast<uint16_t *>(buffer));
+
+  aom_image_t img;
+  EXPECT_EQ(
+      aom_img_wrap(&img, AOM_IMG_FMT_I44416, kWidth, kHeight, 1, img_data),
+      &img);
+  img.cp = AOM_CICP_CP_UNSPECIFIED;
+  img.tc = AOM_CICP_TC_UNSPECIFIED;
+  img.mc = AOM_CICP_MC_UNSPECIFIED;
+  img.range = AOM_CR_FULL_RANGE;
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA),
+            AOM_CODEC_OK);
+  cfg.rc_end_usage = AOM_Q;
+  cfg.g_profile = 1;
+  cfg.g_bit_depth = AOM_BITS_10;
+  cfg.g_input_bit_depth = 10;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  cfg.g_limit = 1;
+  cfg.g_lag_in_frames = 0;
+  cfg.kf_mode = AOM_KF_DISABLED;
+  cfg.kf_max_dist = 0;
+  cfg.g_threads = 21;
+  cfg.rc_min_quantizer = 16;
+  cfg.rc_max_quantizer = 54;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 35), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_ROW_MT, 1), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_ROWS, 2), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_COLUMNS, 5), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 1), AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_SKIP_POSTPROC_FILTERING, 1),
+            AOM_CODEC_OK);
+  EXPECT_EQ(aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM),
+            AOM_CODEC_OK);
+
+  // Encode frame
+  EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK);
+  aom_codec_iter_t iter = nullptr;
+  const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter);
+  ASSERT_NE(pkt, nullptr);
+  EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT);
+  // pkt->data.frame.flags is 0x1f0011.
+  EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY);
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  // Flush encoder
+  EXPECT_EQ(aom_codec_encode(&enc, nullptr, 0, 1, 0), AOM_CODEC_OK);
+  iter = nullptr;
+  pkt = aom_codec_get_cx_data(&enc, &iter);
+  EXPECT_EQ(pkt, nullptr);
+
+  EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
+}
+
+}  // namespace wiener_highbd
+#endif  // CONFIG_AV1_HIGHBITDEPTH
diff --git a/third_party/aom/test/y4m_test.cc b/third_party/aom/test/y4m_test.cc
new file mode 100644
index 0000000000..a4ed13f7c5
--- /dev/null
+++ b/third_party/aom/test/y4m_test.cc
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <string>
+
+#include "config/aom_config.h"
+
+#include "common/y4menc.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+using std::string;
+
+static const unsigned int kWidth = 160;
+static const unsigned int kHeight = 90;
+static const unsigned int kFrames = 10;
+
+struct Y4mTestParam {
+  const char *filename;
+  unsigned int bit_depth;
+  aom_img_fmt format;
+  const char *md5raw;
+};
+
+const Y4mTestParam kY4mTestVectors[] = {
+  { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420,
+    "e5406275b9fc6bb3436c31d4a05c1cab" },
+  { "park_joy_90p_8_420_monochrome.y4m", 8, AOM_IMG_FMT_I420,
+    "95ef5bf6218580588be24a5271bb6a7f" },
+  { "park_joy_90p_8_420_vertical_csp.y4m", 8, AOM_IMG_FMT_I420,
+    "e5406275b9fc6bb3436c31d4a05c1cab" },
+  { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422,
+    "284a47a47133b12884ec3a14e959a0b6" },
+  { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444,
+    "90517ff33843d85de712fd4fe60dbed0" },
+  { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016,
+    "63f21f9f717d8b8631bd2288ee87137b" },
+  { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216,
+    "48ab51fb540aed07f7ff5af130c9b605" },
+  { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416,
+    "067bfd75aa85ff9bae91fa3e0edd1e3e" },
+  { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016,
+    "9e6d8f6508c6e55625f6b697bc461cef" },
+  { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216,
+    "b239c6b301c0b835485be349ca83a7e3" },
+  { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416,
+    "5a6481a550821dab6d0192f5c63845e9" },
+};
+
+static const int PLANES_YUV[] = { AOM_PLANE_Y, AOM_PLANE_U, AOM_PLANE_V };
+
+class Y4mVideoSourceTest : public ::testing::TestWithParam<Y4mTestParam>,
+                           public ::libaom_test::Y4mVideoSource {
+ protected:
+  Y4mVideoSourceTest() : Y4mVideoSource("", 0, 0) {}
+
+  ~Y4mVideoSourceTest() override { CloseSource(); }
+
+  virtual void Init(const std::string &file_name, int limit) {
+    file_name_ = file_name;
+    start_ = 0;
+    limit_ = limit;
+    frame_ = 0;
+    Begin();
+  }
+
+  // Checks y4m header information
+  void HeaderChecks(unsigned int bit_depth, aom_img_fmt_t fmt) {
+    ASSERT_NE(input_file_, nullptr);
+    ASSERT_EQ(y4m_.pic_w, (int)kWidth);
+    ASSERT_EQ(y4m_.pic_h, (int)kHeight);
+    ASSERT_EQ(img()->d_w, kWidth);
+    ASSERT_EQ(img()->d_h, kHeight);
+    ASSERT_EQ(y4m_.bit_depth, bit_depth);
+    ASSERT_EQ(y4m_.aom_fmt, fmt);
+    if (fmt == AOM_IMG_FMT_I420 || fmt == AOM_IMG_FMT_I42016) {
+      ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3 / 2);
+      ASSERT_EQ(img()->x_chroma_shift, 1U);
+      ASSERT_EQ(img()->y_chroma_shift, 1U);
+    }
+    if (fmt == AOM_IMG_FMT_I422 || fmt == AOM_IMG_FMT_I42216) {
+      ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 2);
+      ASSERT_EQ(img()->x_chroma_shift, 1U);
+      ASSERT_EQ(img()->y_chroma_shift, 0U);
+    }
+    if (fmt == AOM_IMG_FMT_I444 || fmt == AOM_IMG_FMT_I44416) {
+      ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3);
+      ASSERT_EQ(img()->x_chroma_shift, 0U);
+      ASSERT_EQ(img()->y_chroma_shift, 0U);
+    }
+  }
+
+  // Checks MD5 of the raw frame data
+  void Md5Check(const string &expected_md5) {
+    ASSERT_NE(input_file_, nullptr);
+    libaom_test::MD5 md5;
+    for (unsigned int i = start_; i < limit_; i++) {
+      md5.Add(img());
+      Next();
+    }
+    ASSERT_EQ(string(md5.Get()), expected_md5);
+  }
+};
+
+TEST_P(Y4mVideoSourceTest, SourceTest) {
+  const Y4mTestParam t = GetParam();
+  Init(t.filename, kFrames);
+  HeaderChecks(t.bit_depth, t.format);
+  Md5Check(t.md5raw);
+}
+
+INSTANTIATE_TEST_SUITE_P(C, Y4mVideoSourceTest,
+                         ::testing::ValuesIn(kY4mTestVectors));
+
+class Y4mVideoWriteTest : public Y4mVideoSourceTest {
+ protected:
+  Y4mVideoWriteTest() : tmpfile_(nullptr) {}
+
+  ~Y4mVideoWriteTest() override {
+    delete tmpfile_;
+    input_file_ = nullptr;
+  }
+
+  void ReplaceInputFile(FILE *input_file) {
+    CloseSource();
+    frame_ = 0;
+    input_file_ = input_file;
+    rewind(input_file_);
+    ReadSourceToStart();
+  }
+
+  // Writes out a y4m file and then reads it back
+  void WriteY4mAndReadBack() {
+    ASSERT_NE(input_file_, nullptr);
+    char buf[Y4M_BUFFER_SIZE] = { 0 };
+    const struct AvxRational framerate = { y4m_.fps_n, y4m_.fps_d };
+    tmpfile_ = new libaom_test::TempOutFile;
+    ASSERT_NE(tmpfile_, nullptr);
+    ASSERT_NE(tmpfile_->file(), nullptr);
+    y4m_write_file_header(buf, sizeof(buf), kWidth, kHeight, &framerate,
+                          img()->monochrome, img()->csp, y4m_.aom_fmt,
+                          y4m_.bit_depth, AOM_CR_STUDIO_RANGE);
+    fputs(buf, tmpfile_->file());
+    for (unsigned int i = start_; i < limit_; i++) {
+      y4m_write_frame_header(buf, sizeof(buf));
+      fputs(buf, tmpfile_->file());
+      y4m_write_image_file(img(), PLANES_YUV, tmpfile_->file());
+      Next();
+    }
+    ReplaceInputFile(tmpfile_->file());
+  }
+
+  void Init(const std::string &file_name, int limit) override {
+    Y4mVideoSourceTest::Init(file_name, limit);
+    WriteY4mAndReadBack();
+  }
+  libaom_test::TempOutFile *tmpfile_;
+};
+
+TEST_P(Y4mVideoWriteTest, WriteTest) {
+  const Y4mTestParam t = GetParam();
+  Init(t.filename, kFrames);
+  HeaderChecks(t.bit_depth, t.format);
+  Md5Check(t.md5raw);
+}
+
+INSTANTIATE_TEST_SUITE_P(C, Y4mVideoWriteTest,
+                         ::testing::ValuesIn(kY4mTestVectors));
+
+static const char kY4MRegularHeader[] =
+    "YUV4MPEG2 W4 H4 F30:1 Ip A0:0 C420jpeg XYSCSS=420JPEG\n"
+    "FRAME\n"
+    "012345678912345601230123";
+
+TEST(Y4MHeaderTest, RegularHeader) {
+  libaom_test::TempOutFile f;
+  ASSERT_NE(f.file(), nullptr);
+  fwrite(kY4MRegularHeader, 1, sizeof(kY4MRegularHeader), f.file());
+  fflush(f.file());
+  EXPECT_EQ(0, fseek(f.file(), 0, 0));
+
+  y4m_input y4m;
+  EXPECT_EQ(y4m_input_open(&y4m, f.file(), nullptr, 0, AOM_CSP_UNKNOWN,
+                           /*only_420=*/0),
+            0);
+  EXPECT_EQ(y4m.pic_w, 4);
+  EXPECT_EQ(y4m.pic_h, 4);
+  EXPECT_EQ(y4m.fps_n, 30);
+  EXPECT_EQ(y4m.fps_d, 1);
+  EXPECT_EQ(y4m.interlace, 'p');
+  EXPECT_EQ(y4m.color_range, AOM_CR_STUDIO_RANGE);
+  EXPECT_EQ(strcmp("420jpeg", y4m.chroma_type), 0);
+  y4m_input_close(&y4m);
+}
+
+// Testing that headers over 100 characters can be parsed.
+static const char kY4MLongHeader[] =
+    "YUV4MPEG2 W4 H4 F30:1 Ip A0:0 C420jpeg XYSCSS=420JPEG "
+    "XCOLORRANGE=LIMITED XSOME_UNKNOWN_METADATA XOTHER_UNKNOWN_METADATA\n"
+    "FRAME\n"
+    "012345678912345601230123";
+
+TEST(Y4MHeaderTest, LongHeader) {
+  libaom_test::TempOutFile tmpfile;
+  FILE *f = tmpfile.file();
+  ASSERT_NE(f, nullptr);
+  fwrite(kY4MLongHeader, 1, sizeof(kY4MLongHeader), f);
+  fflush(f);
+  EXPECT_EQ(fseek(f, 0, 0), 0);
+
+  y4m_input y4m;
+  EXPECT_EQ(y4m_input_open(&y4m, f, nullptr, 0, AOM_CSP_UNKNOWN,
+                           /*only_420=*/0),
+            0);
+  EXPECT_EQ(y4m.pic_w, 4);
+  EXPECT_EQ(y4m.pic_h, 4);
+  EXPECT_EQ(y4m.fps_n, 30);
+  EXPECT_EQ(y4m.fps_d, 1);
+  EXPECT_EQ(y4m.interlace, 'p');
+  EXPECT_EQ(y4m.color_range, AOM_CR_STUDIO_RANGE);
+  EXPECT_EQ(strcmp("420jpeg", y4m.chroma_type), 0);
+  y4m_input_close(&y4m);
+}
+
+static const char kY4MFullRangeHeader[] =
+    "YUV4MPEG2 W4 H4 F30:1 Ip A0:0 C420jpeg XYSCSS=420JPEG XCOLORRANGE=FULL\n"
+    "FRAME\n"
+    "012345678912345601230123";
+
+TEST(Y4MHeaderTest, FullRangeHeader) {
+  libaom_test::TempOutFile tmpfile;
+  FILE *f = tmpfile.file();
+  ASSERT_NE(f, nullptr);
+  fwrite(kY4MFullRangeHeader, 1, sizeof(kY4MFullRangeHeader), f);
+  fflush(f);
+  EXPECT_EQ(fseek(f, 0, 0), 0);
+
+  y4m_input y4m;
+  EXPECT_EQ(y4m_input_open(&y4m, f, nullptr, 0, AOM_CSP_UNKNOWN,
+                           /*only_420=*/0),
+            0);
+  EXPECT_EQ(y4m.pic_w, 4);
+  EXPECT_EQ(y4m.pic_h, 4);
+  EXPECT_EQ(y4m.fps_n, 30);
+  EXPECT_EQ(y4m.fps_d, 1);
+  EXPECT_EQ(y4m.interlace, 'p');
+  EXPECT_EQ(strcmp("420jpeg", y4m.chroma_type), 0);
+  EXPECT_EQ(y4m.color_range, AOM_CR_FULL_RANGE);
+  y4m_input_close(&y4m);
+}
+
+TEST(Y4MHeaderTest, WriteStudioColorRange) {
+  char buf[128];
+  struct AvxRational framerate = { /*numerator=*/30, /*denominator=*/1 };
+  EXPECT_GE(y4m_write_file_header(
+                buf, /*len=*/128, /*width=*/4, /*height=*/5, &framerate,
+                /*monochrome=*/0, AOM_CSP_UNKNOWN, AOM_IMG_FMT_I420,
+                /*bit_depth=*/8, AOM_CR_STUDIO_RANGE),
+            0);
+  EXPECT_EQ(strcmp("YUV4MPEG2 W4 H5 F30:1 Ip C420jpeg\n", buf), 0);
+}
+
+TEST(Y4MHeaderTest, WriteFullColorRange) {
+  char buf[128];
+  struct AvxRational framerate = { /*numerator=*/30, /*denominator=*/1 };
+  EXPECT_GE(y4m_write_file_header(
+                buf, /*len=*/128, /*width=*/4, /*height=*/5, &framerate,
+                /*monochrome=*/0, AOM_CSP_UNKNOWN, AOM_IMG_FMT_I420,
+                /*bit_depth=*/8, AOM_CR_FULL_RANGE),
+            0);
+  EXPECT_EQ(strcmp("YUV4MPEG2 W4 H5 F30:1 Ip C420jpeg XCOLORRANGE=FULL\n", buf),
+            0);
+}
+
+}  // namespace
diff --git a/third_party/aom/test/y4m_video_source.h b/third_party/aom/test/y4m_video_source.h
new file mode 100644
index 0000000000..1369e4e280
--- /dev/null
+++ b/third_party/aom/test/y4m_video_source.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef AOM_TEST_Y4M_VIDEO_SOURCE_H_
+#define AOM_TEST_Y4M_VIDEO_SOURCE_H_
+#include <algorithm>
+#include <memory>
+#include <string>
+
+#include "common/y4minput.h"
+#include "test/video_source.h"
+
+namespace libaom_test {
+
+// This class extends VideoSource to allow parsing of raw yv12
+// so that we can do actual file encodes.
+class Y4mVideoSource : public VideoSource {
+ public:
+  Y4mVideoSource(const std::string &file_name, unsigned int start, int limit)
+      : file_name_(file_name), input_file_(nullptr), img_(new aom_image_t()),
+        start_(start), limit_(limit), frame_(0), framerate_numerator_(0),
+        framerate_denominator_(0), y4m_() {}
+
+  ~Y4mVideoSource() override {
+    aom_img_free(img_.get());
+    CloseSource();
+  }
+
+  virtual void OpenSource() {
+    CloseSource();
+    input_file_ = OpenTestDataFile(file_name_);
+    ASSERT_NE(input_file_, nullptr)
+        << "Input file open failed. Filename: " << file_name_;
+  }
+
+  virtual void ReadSourceToStart() {
+    ASSERT_NE(input_file_, nullptr);
+    ASSERT_FALSE(
+        y4m_input_open(&y4m_, input_file_, nullptr, 0, AOM_CSP_UNKNOWN, 0));
+    framerate_numerator_ = y4m_.fps_n;
+    framerate_denominator_ = y4m_.fps_d;
+    frame_ = 0;
+    for (unsigned int i = 0; i < start_; i++) {
+      Next();
+    }
+    FillFrame();
+  }
+
+  void Begin() override {
+    OpenSource();
+    ReadSourceToStart();
+  }
+
+  void Next() override {
+    ++frame_;
+    FillFrame();
+  }
+
+  aom_image_t *img() const override {
+    return (frame_ < limit_) ? img_.get() : nullptr;
+  }
+
+  // Models a stream where Timebase = 1/FPS, so pts == frame.
+  aom_codec_pts_t pts() const override { return frame_; }
+
+  unsigned long duration() const override { return 1; }
+
+  aom_rational_t timebase() const override {
+    const aom_rational_t t = { framerate_denominator_, framerate_numerator_ };
+    return t;
+  }
+
+  unsigned int frame() const override { return frame_; }
+
+  unsigned int limit() const override { return limit_; }
+
+  virtual void FillFrame() {
+    ASSERT_NE(input_file_, nullptr);
+    // Read a frame from input_file.
+    y4m_input_fetch_frame(&y4m_, input_file_, img_.get());
+  }
+
+  // Swap buffers with another y4m source. This allows reading a new frame
+  // while keeping the old frame around. A whole Y4mSource is required and
+  // not just a aom_image_t because of how the y4m reader manipulates
+  // aom_image_t internals,
+  void SwapBuffers(Y4mVideoSource *other) {
+    std::swap(other->y4m_.dst_buf, y4m_.dst_buf);
+    aom_image_t *tmp;
+    tmp = other->img_.release();
+    other->img_.reset(img_.release());
+    img_.reset(tmp);
+  }
+
+ protected:
+  void CloseSource() {
+    y4m_input_close(&y4m_);
+    y4m_ = y4m_input();
+    if (input_file_ != nullptr) {
+      fclose(input_file_);
+      input_file_ = nullptr;
+    }
+  }
+
+  std::string file_name_;
+  FILE *input_file_;
+  std::unique_ptr<aom_image_t> img_;
+  unsigned int start_;
+  unsigned int limit_;
+  unsigned int frame_;
+  int framerate_numerator_;
+  int framerate_denominator_;
+  y4m_input y4m_;
+};
+
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_Y4M_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/yuv_video_source.h b/third_party/aom/test/yuv_video_source.h
new file mode 100644
index 0000000000..77d5dfa73c
--- /dev/null
+++ b/third_party/aom/test/yuv_video_source.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef AOM_TEST_YUV_VIDEO_SOURCE_H_
+#define AOM_TEST_YUV_VIDEO_SOURCE_H_
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+#include "test/video_source.h"
+#include "aom/aom_image.h"
+
+namespace libaom_test {
+
+// This class extends VideoSource to allow parsing of raw YUV
+// formats of various color sampling and bit-depths so that we can
+// do actual file encodes.
+class YUVVideoSource : public VideoSource {
+ public:
+  YUVVideoSource(const std::string &file_name, aom_img_fmt format,
+                 unsigned int width, unsigned int height, int rate_numerator,
+                 int rate_denominator, unsigned int start, int limit)
+      : file_name_(file_name), input_file_(nullptr), img_(nullptr),
+        start_(start), limit_(limit), frame_(0), width_(0), height_(0),
+        format_(AOM_IMG_FMT_NONE), framerate_numerator_(rate_numerator),
+        framerate_denominator_(rate_denominator) {
+    // This initializes format_, raw_size_, width_, height_ and allocates img.
+    SetSize(width, height, format);
+  }
+
+  ~YUVVideoSource() override {
+    aom_img_free(img_);
+    if (input_file_) fclose(input_file_);
+  }
+
+  void Begin() override {
+    if (input_file_) fclose(input_file_);
+    input_file_ = OpenTestDataFile(file_name_);
+    ASSERT_NE(input_file_, nullptr)
+        << "Input file open failed. Filename: " << file_name_;
+    if (start_)
+      fseek(input_file_, static_cast<unsigned>(raw_size_) * start_, SEEK_SET);
+
+    frame_ = start_;
+    FillFrame();
+  }
+
+  void Next() override {
+    ++frame_;
+    FillFrame();
+  }
+
+  aom_image_t *img() const override {
+    return (frame_ < limit_) ? img_ : nullptr;
+  }
+
+  // Models a stream where Timebase = 1/FPS, so pts == frame.
+  aom_codec_pts_t pts() const override { return frame_; }
+
+  unsigned long duration() const override { return 1; }
+
+  aom_rational_t timebase() const override {
+    const aom_rational_t t = { framerate_denominator_, framerate_numerator_ };
+    return t;
+  }
+
+  unsigned int frame() const override { return frame_; }
+
+  unsigned int limit() const override { return limit_; }
+
+  virtual void SetSize(unsigned int width, unsigned int height,
+                       aom_img_fmt format) {
+    if (width != width_ || height != height_ || format != format_) {
+      aom_img_free(img_);
+      img_ = aom_img_alloc(nullptr, format, width, height, 1);
+      ASSERT_NE(img_, nullptr);
+      width_ = width;
+      height_ = height;
+      format_ = format;
+      switch (format) {
+        case AOM_IMG_FMT_NV12:
+        case AOM_IMG_FMT_I420: raw_size_ = width * height * 3 / 2; break;
+        case AOM_IMG_FMT_I422: raw_size_ = width * height * 2; break;
+        case AOM_IMG_FMT_I444: raw_size_ = width * height * 3; break;
+        case AOM_IMG_FMT_I42016: raw_size_ = width * height * 3; break;
+        case AOM_IMG_FMT_I42216: raw_size_ = width * height * 4; break;
+        case AOM_IMG_FMT_I44416: raw_size_ = width * height * 6; break;
+        default: ASSERT_TRUE(0);
+      }
+    }
+  }
+
+  virtual void FillFrame() {
+    ASSERT_NE(input_file_, nullptr);
+    // Read a frame from input_file.
+    if (fread(img_->img_data, raw_size_, 1, input_file_) == 0) {
+      limit_ = frame_;
+    }
+  }
+
+ protected:
+  std::string file_name_;
+  FILE *input_file_;
+  aom_image_t *img_;
+  size_t raw_size_;
+  unsigned int start_;
+  unsigned int limit_;
+  unsigned int frame_;
+  unsigned int width_;
+  unsigned int height_;
+  aom_img_fmt format_;
+  int framerate_numerator_;
+  int framerate_denominator_;
+};
+
+}  // namespace libaom_test
+
+#endif  // AOM_TEST_YUV_VIDEO_SOURCE_H_