summaryrefslogtreecommitdiffstats
path: root/media/libvpx/libvpx/test/vp9_quantize_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'media/libvpx/libvpx/test/vp9_quantize_test.cc')
-rw-r--r--media/libvpx/libvpx/test/vp9_quantize_test.cc758
1 files changed, 758 insertions, 0 deletions
diff --git a/media/libvpx/libvpx/test/vp9_quantize_test.cc b/media/libvpx/libvpx/test/vp9_quantize_test.cc
new file mode 100644
index 0000000000..5e3a7c2701
--- /dev/null
+++ b/media/libvpx/libvpx/test/vp9_quantize_test.cc
@@ -0,0 +1,758 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <tuple>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/bench.h"
+#include "test/buffer.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_scan.h"
+#include "vp9/encoder/vp9_block.h"
+#include "vpx/vpx_codec.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/msvc.h"
+#include "vpx_ports/vpx_timer.h"
+
+using libvpx_test::ACMRandom;
+using libvpx_test::Buffer;
+
+namespace {
+const int number_of_iterations = 100;
+
+typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
+ const macroblock_plane *mb_plane,
+ tran_low_t *qcoeff, tran_low_t *dqcoeff,
+ const int16_t *dequant, uint16_t *eob,
+ const struct ScanOrder *scan_order);
+typedef std::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
+ int /*max_size*/, bool /*is_fp*/>
+ QuantizeParam;
+
+// Wrapper which takes a macroblock_plane.
+typedef void (*QuantizeBaseFunc)(const tran_low_t *coeff, intptr_t count,
+ const int16_t *zbin, const int16_t *round,
+ const int16_t *quant,
+ const int16_t *quant_shift, tran_low_t *qcoeff,
+ tran_low_t *dqcoeff, const int16_t *dequant,
+ uint16_t *eob, const int16_t *scan,
+ const int16_t *iscan);
+
+template <QuantizeBaseFunc fn>
+void QuantWrapper(const tran_low_t *coeff, intptr_t count,
+ const macroblock_plane *const mb_plane, tran_low_t *qcoeff,
+ tran_low_t *dqcoeff, const int16_t *dequant, uint16_t *eob,
+ const struct ScanOrder *const scan_order) {
+ fn(coeff, count, mb_plane->zbin, mb_plane->round, mb_plane->quant,
+ mb_plane->quant_shift, qcoeff, dqcoeff, dequant, eob, scan_order->scan,
+ scan_order->iscan);
+}
+
+// Wrapper for 32x32 version which does not use count
+typedef void (*Quantize32x32Func)(const tran_low_t *coeff,
+ const macroblock_plane *const mb_plane,
+ tran_low_t *qcoeff, tran_low_t *dqcoeff,
+ const int16_t *dequant, uint16_t *eob,
+ const struct ScanOrder *const scan_order);
+
+template <Quantize32x32Func fn>
+void Quant32x32Wrapper(const tran_low_t *coeff, intptr_t count,
+ const macroblock_plane *const mb_plane,
+ tran_low_t *qcoeff, tran_low_t *dqcoeff,
+ const int16_t *dequant, uint16_t *eob,
+ const struct ScanOrder *const scan_order) {
+ (void)count;
+ fn(coeff, mb_plane, qcoeff, dqcoeff, dequant, eob, scan_order);
+}
+
+// Wrapper for FP version which does not use zbin or quant_shift.
+typedef void (*QuantizeFPFunc)(const tran_low_t *coeff, intptr_t count,
+ const int16_t *round, const int16_t *quant,
+ tran_low_t *qcoeff, tran_low_t *dqcoeff,
+ const int16_t *dequant, uint16_t *eob,
+ const int16_t *scan, const int16_t *iscan);
+
+template <QuantizeFPFunc fn>
+void QuantFPWrapper(const tran_low_t *coeff, intptr_t count,
+ const macroblock_plane *const mb_plane, tran_low_t *qcoeff,
+ tran_low_t *dqcoeff, const int16_t *dequant, uint16_t *eob,
+ const struct ScanOrder *const scan_order) {
+ fn(coeff, count, mb_plane->round_fp, mb_plane->quant_fp, qcoeff, dqcoeff,
+ dequant, eob, scan_order->scan, scan_order->iscan);
+}
+
+void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
+ int16_t *quant, int16_t *quant_shift,
+ int16_t *dequant, int16_t *round_fp,
+ int16_t *quant_fp) {
+ // Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V.
+ constexpr int kMaxQRoundingFactorFp = 64;
+
+ for (int j = 0; j < 2; j++) {
+ // The range is 4 to 1828 in the VP9 tables.
+ const int qlookup = rnd->RandRange(1825) + 4;
+ round_fp[j] = (kMaxQRoundingFactorFp * qlookup) >> 7;
+ quant_fp[j] = (1 << 16) / qlookup;
+
+ // Values determined by deconstructing vp9_init_quantizer().
+ // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
+ // values or U/V values of any bit depth. This is because y_delta is not
+ // factored into the vp9_ac_quant() call.
+ zbin[j] = rnd->RandRange(1200);
+
+ // round may be up to 685 for Y values or 914 for U/V.
+ round[j] = rnd->RandRange(914);
+ // quant ranges from 1 to -32703
+ quant[j] = static_cast<int>(rnd->RandRange(32704)) - 32703;
+ // quant_shift goes up to 1 << 16.
+ quant_shift[j] = rnd->RandRange(16384);
+ // dequant maxes out at 1828 for all cases.
+ dequant[j] = rnd->RandRange(1828);
+ }
+ for (int j = 2; j < 8; j++) {
+ zbin[j] = zbin[1];
+ round_fp[j] = round_fp[1];
+ quant_fp[j] = quant_fp[1];
+ round[j] = round[1];
+ quant[j] = quant[1];
+ quant_shift[j] = quant_shift[1];
+ dequant[j] = dequant[1];
+ }
+}
+
+class VP9QuantizeBase : public AbstractBench {
+ public:
+ VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
+ : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp),
+ coeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 16)),
+ qcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)),
+ dqcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)) {
+ // TODO(jianj): SSSE3 and AVX2 tests fail on extreme values.
+#if HAVE_NEON
+ max_value_ = (1 << (7 + bit_depth_)) - 1;
+#else
+ max_value_ = (1 << bit_depth_) - 1;
+#endif
+
+ mb_plane_ = reinterpret_cast<macroblock_plane *>(
+ vpx_memalign(16, sizeof(macroblock_plane)));
+
+ zbin_ptr_ = mb_plane_->zbin =
+ reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
+ round_fp_ptr_ = mb_plane_->round_fp = reinterpret_cast<int16_t *>(
+ vpx_memalign(16, 8 * sizeof(*round_fp_ptr_)));
+ quant_fp_ptr_ = mb_plane_->quant_fp = reinterpret_cast<int16_t *>(
+ vpx_memalign(16, 8 * sizeof(*quant_fp_ptr_)));
+ round_ptr_ = mb_plane_->round =
+ reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*round_ptr_)));
+ quant_ptr_ = mb_plane_->quant =
+ reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*quant_ptr_)));
+ quant_shift_ptr_ = mb_plane_->quant_shift = reinterpret_cast<int16_t *>(
+ vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_)));
+ dequant_ptr_ = reinterpret_cast<int16_t *>(
+ vpx_memalign(16, 8 * sizeof(*dequant_ptr_)));
+
+ r_ptr_ = (is_fp_) ? round_fp_ptr_ : round_ptr_;
+ q_ptr_ = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
+ }
+
+ ~VP9QuantizeBase() {
+ vpx_free(mb_plane_);
+ vpx_free(zbin_ptr_);
+ vpx_free(round_fp_ptr_);
+ vpx_free(quant_fp_ptr_);
+ vpx_free(round_ptr_);
+ vpx_free(quant_ptr_);
+ vpx_free(quant_shift_ptr_);
+ vpx_free(dequant_ptr_);
+ mb_plane_ = nullptr;
+ zbin_ptr_ = nullptr;
+ round_fp_ptr_ = nullptr;
+ quant_fp_ptr_ = nullptr;
+ round_ptr_ = nullptr;
+ quant_ptr_ = nullptr;
+ quant_shift_ptr_ = nullptr;
+ dequant_ptr_ = nullptr;
+ libvpx_test::ClearSystemState();
+ }
+
+ protected:
+ macroblock_plane *mb_plane_;
+ int16_t *zbin_ptr_;
+ int16_t *quant_fp_ptr_;
+ int16_t *round_fp_ptr_;
+ int16_t *round_ptr_;
+ int16_t *quant_ptr_;
+ int16_t *quant_shift_ptr_;
+ int16_t *dequant_ptr_;
+ const vpx_bit_depth_t bit_depth_;
+ int max_value_;
+ const int max_size_;
+ const bool is_fp_;
+ Buffer<tran_low_t> coeff_;
+ Buffer<tran_low_t> qcoeff_;
+ Buffer<tran_low_t> dqcoeff_;
+ int16_t *r_ptr_;
+ int16_t *q_ptr_;
+ int count_;
+ const ScanOrder *scan_;
+ uint16_t eob_;
+};
+
+class VP9QuantizeTest : public VP9QuantizeBase,
+ public ::testing::TestWithParam<QuantizeParam> {
+ public:
+ VP9QuantizeTest()
+ : VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3), GET_PARAM(4)),
+ quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
+
+ protected:
+ virtual void Run();
+ void Speed(bool is_median);
+ const QuantizeFunc quantize_op_;
+ const QuantizeFunc ref_quantize_op_;
+};
+
+void VP9QuantizeTest::Run() {
+ quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_, qcoeff_.TopLeftPixel(),
+ dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, scan_);
+}
+
+void VP9QuantizeTest::Speed(bool is_median) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ ASSERT_TRUE(coeff_.Init());
+ ASSERT_TRUE(qcoeff_.Init());
+ ASSERT_TRUE(dqcoeff_.Init());
+ TX_SIZE starting_sz, ending_sz;
+
+ if (max_size_ == 16) {
+ starting_sz = TX_4X4;
+ ending_sz = TX_16X16;
+ } else {
+ starting_sz = TX_32X32;
+ ending_sz = TX_32X32;
+ }
+
+ for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
+ // zbin > coeff, zbin < coeff.
+ for (int i = 0; i < 2; ++i) {
+ // TX_TYPE defines the scan order. That is not relevant to the speed test.
+ // Pick the first one.
+ const TX_TYPE tx_type = DCT_DCT;
+ count_ = (4 << sz) * (4 << sz);
+ scan_ = &vp9_scan_orders[sz][tx_type];
+
+ GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
+ quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
+ quant_fp_ptr_);
+
+ if (i == 0) {
+ // When |coeff values| are less than zbin the results are 0.
+ int threshold = 100;
+ if (max_size_ == 32) {
+ // For 32x32, the threshold is halved. Double it to keep the values
+ // from clearing it.
+ threshold = 200;
+ }
+ for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold;
+ coeff_.Set(&rnd, -99, 99);
+ } else if (i == 1) {
+ for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50;
+ coeff_.Set(&rnd, -500, 500);
+ }
+
+ const char *type =
+ (i == 0) ? "Bypass calculations " : "Full calculations ";
+ char block_size[16];
+ snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz);
+ char title[100];
+ snprintf(title, sizeof(title), "%25s %8s ", type, block_size);
+
+ if (is_median) {
+ RunNTimes(10000000 / count_);
+ PrintMedian(title);
+ } else {
+ Buffer<tran_low_t> ref_qcoeff =
+ Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+ ASSERT_TRUE(ref_qcoeff.Init());
+ Buffer<tran_low_t> ref_dqcoeff =
+ Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+ ASSERT_TRUE(ref_dqcoeff.Init());
+ uint16_t ref_eob = 0;
+
+ const int kNumTests = 5000000;
+ vpx_usec_timer timer, simd_timer;
+
+ vpx_usec_timer_start(&timer);
+ for (int n = 0; n < kNumTests; ++n) {
+ ref_quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_,
+ ref_qcoeff.TopLeftPixel(),
+ ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
+ scan_);
+ }
+ vpx_usec_timer_mark(&timer);
+
+ vpx_usec_timer_start(&simd_timer);
+ for (int n = 0; n < kNumTests; ++n) {
+ quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_,
+ qcoeff_.TopLeftPixel(), dqcoeff_.TopLeftPixel(),
+ dequant_ptr_, &eob_, scan_);
+ }
+ vpx_usec_timer_mark(&simd_timer);
+
+ const int elapsed_time =
+ static_cast<int>(vpx_usec_timer_elapsed(&timer));
+ const int simd_elapsed_time =
+ static_cast<int>(vpx_usec_timer_elapsed(&simd_timer));
+ printf("%s c_time = %d \t simd_time = %d \t Gain = %f \n", title,
+ elapsed_time, simd_elapsed_time,
+ ((float)elapsed_time / simd_elapsed_time));
+ }
+ }
+ }
+}
+
+// This quantizer compares the AC coefficients to the quantization step size to
+// determine if further multiplication operations are needed.
+// Based on vp9_quantize_fp_sse2().
+inline void quant_fp_nz(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan,
+ int is_32x32) {
+ int i, eob = -1;
+ const int thr = dequant_ptr[1] >> (1 + is_32x32);
+ (void)iscan;
+
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < n_coeffs; i += 16) {
+ int y;
+ int nzflag_cnt = 0;
+ int abs_coeff[16];
+ int coeff_sign[16];
+
+ // count nzflag for each row (16 tran_low_t)
+ for (y = 0; y < 16; ++y) {
+ const int rc = i + y;
+ const int coeff = coeff_ptr[rc];
+ coeff_sign[y] = (coeff >> 31);
+ abs_coeff[y] = (coeff ^ coeff_sign[y]) - coeff_sign[y];
+ // The first 16 are skipped in the sse2 code. Do the same here to match.
+ if (i >= 16 && (abs_coeff[y] <= thr)) {
+ nzflag_cnt++;
+ }
+ }
+
+ for (y = 0; y < 16; ++y) {
+ const int rc = i + y;
+ // If all of the AC coeffs in a row has magnitude less than the
+ // quantization step_size/2, quantize to zero.
+ if (nzflag_cnt < 16) {
+ int tmp;
+ int _round;
+
+ if (is_32x32) {
+ _round = ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ } else {
+ _round = round_ptr[rc != 0];
+ }
+ tmp = clamp(abs_coeff[y] + _round, INT16_MIN, INT16_MAX);
+ tmp = (tmp * quant_ptr[rc != 0]) >> (16 - is_32x32);
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y];
+ dqcoeff_ptr[rc] =
+ static_cast<tran_low_t>(qcoeff_ptr[rc] * dequant_ptr[rc != 0]);
+
+ if (is_32x32) {
+ dqcoeff_ptr[rc] = static_cast<tran_low_t>(qcoeff_ptr[rc] *
+ dequant_ptr[rc != 0] / 2);
+ } else {
+ dqcoeff_ptr[rc] =
+ static_cast<tran_low_t>(qcoeff_ptr[rc] * dequant_ptr[rc != 0]);
+ }
+ } else {
+ qcoeff_ptr[rc] = 0;
+ dqcoeff_ptr[rc] = 0;
+ }
+ }
+ }
+
+ // Scan for eob.
+ for (i = 0; i < n_coeffs; i++) {
+ // Use the scan order to find the correct eob.
+ const int rc = scan[i];
+ if (qcoeff_ptr[rc]) {
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ quant_fp_nz(coeff_ptr, n_coeffs, round_ptr, quant_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 0);
+}
+
+void quantize_fp_32x32_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ quant_fp_nz(coeff_ptr, n_coeffs, round_ptr, quant_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 1);
+}
+
+TEST_P(VP9QuantizeTest, OperationCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ ASSERT_TRUE(coeff_.Init());
+ ASSERT_TRUE(qcoeff_.Init());
+ ASSERT_TRUE(dqcoeff_.Init());
+ Buffer<tran_low_t> ref_qcoeff =
+ Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+ ASSERT_TRUE(ref_qcoeff.Init());
+ Buffer<tran_low_t> ref_dqcoeff =
+ Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+ ASSERT_TRUE(ref_dqcoeff.Init());
+ uint16_t ref_eob = 0;
+ eob_ = 0;
+
+ for (int i = 0; i < number_of_iterations; ++i) {
+ TX_SIZE sz;
+ if (max_size_ == 16) {
+ sz = static_cast<TX_SIZE>(i % 3); // TX_4X4, TX_8X8 TX_16X16
+ } else {
+ sz = TX_32X32;
+ }
+ const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
+ scan_ = &vp9_scan_orders[sz][tx_type];
+ count_ = (4 << sz) * (4 << sz);
+ coeff_.Set(&rnd, -max_value_, max_value_);
+ GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
+ quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
+ quant_fp_ptr_);
+ ref_quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_,
+ ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
+ dequant_ptr_, &ref_eob, scan_);
+
+ ASM_REGISTER_STATE_CHECK(quantize_op_(
+ coeff_.TopLeftPixel(), count_, mb_plane_, qcoeff_.TopLeftPixel(),
+ dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, scan_));
+
+ EXPECT_TRUE(qcoeff_.CheckValues(ref_qcoeff));
+ EXPECT_TRUE(dqcoeff_.CheckValues(ref_dqcoeff));
+
+ EXPECT_EQ(eob_, ref_eob);
+
+ if (HasFailure()) {
+ printf("Failure on iteration %d.\n", i);
+ qcoeff_.PrintDifference(ref_qcoeff);
+ dqcoeff_.PrintDifference(ref_dqcoeff);
+ return;
+ }
+ }
+}
+
+TEST_P(VP9QuantizeTest, EOBCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ ASSERT_TRUE(coeff_.Init());
+ ASSERT_TRUE(qcoeff_.Init());
+ ASSERT_TRUE(dqcoeff_.Init());
+ Buffer<tran_low_t> ref_qcoeff =
+ Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+ ASSERT_TRUE(ref_qcoeff.Init());
+ Buffer<tran_low_t> ref_dqcoeff =
+ Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+ ASSERT_TRUE(ref_dqcoeff.Init());
+ uint16_t ref_eob = 0;
+ eob_ = 0;
+ const uint32_t max_index = max_size_ * max_size_ - 1;
+
+ for (int i = 0; i < number_of_iterations; ++i) {
+ TX_SIZE sz;
+ if (max_size_ == 16) {
+ sz = static_cast<TX_SIZE>(i % 3); // TX_4X4, TX_8X8 TX_16X16
+ } else {
+ sz = TX_32X32;
+ }
+ const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
+ scan_ = &vp9_scan_orders[sz][tx_type];
+ count_ = (4 << sz) * (4 << sz);
+ // Two random entries
+ coeff_.Set(0);
+ coeff_.TopLeftPixel()[rnd.RandRange(count_) & max_index] =
+ static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
+ coeff_.TopLeftPixel()[rnd.RandRange(count_) & max_index] =
+ static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
+ GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
+ quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
+ quant_fp_ptr_);
+ ref_quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_,
+ ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
+ dequant_ptr_, &ref_eob, scan_);
+
+ ASM_REGISTER_STATE_CHECK(quantize_op_(
+ coeff_.TopLeftPixel(), count_, mb_plane_, qcoeff_.TopLeftPixel(),
+ dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, scan_));
+
+ EXPECT_TRUE(qcoeff_.CheckValues(ref_qcoeff));
+ EXPECT_TRUE(dqcoeff_.CheckValues(ref_dqcoeff));
+
+ EXPECT_EQ(eob_, ref_eob);
+
+ if (HasFailure()) {
+ printf("Failure on iteration %d.\n", i);
+ qcoeff_.PrintDifference(ref_qcoeff);
+ dqcoeff_.PrintDifference(ref_dqcoeff);
+ return;
+ }
+ }
+}
+
+TEST_P(VP9QuantizeTest, DISABLED_Speed) { Speed(false); }
+
+TEST_P(VP9QuantizeTest, DISABLED_SpeedMedian) { Speed(true); }
+
+using std::make_tuple;
+
+#if HAVE_SSE2
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(
+ SSE2, VP9QuantizeTest,
+ ::testing::Values(
+ make_tuple(&QuantWrapper<vpx_quantize_b_sse2>,
+ &QuantWrapper<vpx_quantize_b_c>, VPX_BITS_8, 16, false),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
+ &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_sse2>,
+ &QuantWrapper<vpx_highbd_quantize_b_c>, VPX_BITS_8, 16,
+ false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_sse2>,
+ &QuantWrapper<vpx_highbd_quantize_b_c>, VPX_BITS_10, 16,
+ false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_sse2>,
+ &QuantWrapper<vpx_highbd_quantize_b_c>, VPX_BITS_12, 16,
+ false),
+ make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
+ &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
+ VPX_BITS_8, 32, false),
+ make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
+ &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
+ VPX_BITS_10, 32, false),
+ make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
+ &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
+ VPX_BITS_12, 32, false)));
+
+#else
+INSTANTIATE_TEST_SUITE_P(
+ SSE2, VP9QuantizeTest,
+ ::testing::Values(make_tuple(&QuantWrapper<vpx_quantize_b_sse2>,
+ &QuantWrapper<vpx_quantize_b_c>, VPX_BITS_8,
+ 16, false),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
+ &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
+ 16, true)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // HAVE_SSE2
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_SUITE_P(
+ SSSE3, VP9QuantizeTest,
+ ::testing::Values(make_tuple(&QuantWrapper<vpx_quantize_b_ssse3>,
+ &QuantWrapper<vpx_quantize_b_c>, VPX_BITS_8,
+ 16, false),
+ make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_ssse3>,
+ &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
+ VPX_BITS_8, 32, false),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
+ &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
+ 16, true),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
+ &QuantFPWrapper<quantize_fp_32x32_nz_c>,
+ VPX_BITS_8, 32, true)));
+#endif // HAVE_SSSE3
+
+#if HAVE_AVX
+INSTANTIATE_TEST_SUITE_P(
+ AVX, VP9QuantizeTest,
+ ::testing::Values(make_tuple(&QuantWrapper<vpx_quantize_b_avx>,
+ &QuantWrapper<vpx_quantize_b_c>, VPX_BITS_8,
+ 16, false),
+ make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_avx>,
+ &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
+ VPX_BITS_8, 32, false)));
+#endif // HAVE_AVX
+
+#if VPX_ARCH_X86_64 && HAVE_AVX2
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(
+ AVX2, VP9QuantizeTest,
+ ::testing::Values(
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
+ &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
+ make_tuple(&QuantFPWrapper<vp9_highbd_quantize_fp_avx2>,
+ &QuantFPWrapper<vp9_highbd_quantize_fp_c>, VPX_BITS_12, 16,
+ true),
+ make_tuple(&QuantFPWrapper<vp9_highbd_quantize_fp_32x32_avx2>,
+ &QuantFPWrapper<vp9_highbd_quantize_fp_32x32_c>, VPX_BITS_12,
+ 32, true),
+ make_tuple(&QuantWrapper<vpx_quantize_b_avx2>,
+ &QuantWrapper<vpx_quantize_b_c>, VPX_BITS_8, 16, false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_avx2>,
+ &QuantWrapper<vpx_highbd_quantize_b_c>, VPX_BITS_8, 16,
+ false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_avx2>,
+ &QuantWrapper<vpx_highbd_quantize_b_c>, VPX_BITS_10, 16,
+ false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_avx2>,
+ &QuantWrapper<vpx_highbd_quantize_b_c>, VPX_BITS_12, 16,
+ false),
+ make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_avx2>,
+ &Quant32x32Wrapper<vpx_quantize_b_32x32_c>, VPX_BITS_8, 32,
+ false),
+ make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
+ &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
+ VPX_BITS_8, 32, false),
+ make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
+ &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
+ VPX_BITS_10, 32, false),
+ make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
+ &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
+ VPX_BITS_12, 32, false)));
+#else
+INSTANTIATE_TEST_SUITE_P(
+ AVX2, VP9QuantizeTest,
+ ::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
+ &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
+ 16, true),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_avx2>,
+ &QuantFPWrapper<quantize_fp_32x32_nz_c>,
+ VPX_BITS_8, 32, true),
+ make_tuple(&QuantWrapper<vpx_quantize_b_avx2>,
+ &QuantWrapper<vpx_quantize_b_c>, VPX_BITS_8,
+ 16, false),
+ make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_avx2>,
+ &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
+ VPX_BITS_8, 32, false)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // HAVE_AVX2
+
+#if HAVE_NEON
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(
+ NEON, VP9QuantizeTest,
+ ::testing::Values(
+ make_tuple(&QuantWrapper<vpx_quantize_b_neon>,
+ &QuantWrapper<vpx_quantize_b_c>, VPX_BITS_8, 16, false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_neon>,
+ &QuantWrapper<vpx_highbd_quantize_b_c>, VPX_BITS_8, 16,
+ false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_neon>,
+ &QuantWrapper<vpx_highbd_quantize_b_c>, VPX_BITS_10, 16,
+ false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_neon>,
+ &QuantWrapper<vpx_highbd_quantize_b_c>, VPX_BITS_12, 16,
+ false),
+ make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_neon>,
+ &Quant32x32Wrapper<vpx_quantize_b_32x32_c>, VPX_BITS_8, 32,
+ false),
+ make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
+ &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
+ VPX_BITS_8, 32, false),
+ make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
+ &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
+ VPX_BITS_10, 32, false),
+ make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
+ &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
+ VPX_BITS_12, 32, false),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
+ &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
+ &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
+ true)));
+#else
+INSTANTIATE_TEST_SUITE_P(
+ NEON, VP9QuantizeTest,
+ ::testing::Values(make_tuple(&QuantWrapper<vpx_quantize_b_neon>,
+ &QuantWrapper<vpx_quantize_b_c>, VPX_BITS_8,
+ 16, false),
+ make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_neon>,
+ &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
+ VPX_BITS_8, 32, false),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
+ &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
+ 16, true),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
+ &QuantFPWrapper<vp9_quantize_fp_32x32_c>,
+ VPX_BITS_8, 32, true)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // HAVE_NEON
+
+#if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(
+ VSX, VP9QuantizeTest,
+ ::testing::Values(make_tuple(&vpx_quantize_b_vsx, &vpx_quantize_b_c,
+ VPX_BITS_8, 16, false),
+ make_tuple(&vpx_quantize_b_32x32_vsx,
+ &vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
+ false),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_vsx>,
+ &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
+ 16, true),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_vsx>,
+ &QuantFPWrapper<vp9_quantize_fp_32x32_c>,
+ VPX_BITS_8, 32, true)));
+#endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
+
+#if HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(LSX, VP9QuantizeTest,
+ ::testing::Values(make_tuple(&vpx_quantize_b_lsx,
+ &vpx_quantize_b_c,
+ VPX_BITS_8, 16, false),
+ make_tuple(&vpx_quantize_b_32x32_lsx,
+ &vpx_quantize_b_32x32_c,
+ VPX_BITS_8, 32, false)));
+#endif // HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH
+
+// Only useful to compare "Speed" test results.
+INSTANTIATE_TEST_SUITE_P(
+ DISABLED_C, VP9QuantizeTest,
+ ::testing::Values(
+ make_tuple(&QuantWrapper<vpx_quantize_b_c>,
+ &QuantWrapper<vpx_quantize_b_c>, VPX_BITS_8, 16, false),
+ make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
+ &Quant32x32Wrapper<vpx_quantize_b_32x32_c>, VPX_BITS_8, 32,
+ false),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>,
+ &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
+ make_tuple(&QuantFPWrapper<quantize_fp_nz_c>,
+ &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
+ make_tuple(&QuantFPWrapper<quantize_fp_32x32_nz_c>,
+ &QuantFPWrapper<quantize_fp_32x32_nz_c>, VPX_BITS_8, 32,
+ true),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
+ &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
+ true)));
+} // namespace