72 files changed, 15543 insertions, 0 deletions
diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht16x16_add_neon.c b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht16x16_add_neon.c
new file mode 100644
index 0000000000..aeb7e49c10
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht16x16_add_neon.c
@@ -0,0 +1,446 @@
+/*
+ *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vp9/common/vp9_enums.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/highbd_idct_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/transpose_neon.h"
+#include "vpx_dsp/inv_txfm.h"
+
+// Use macros to make sure argument lane is passed in as an constant integer.
+
+#define vmull_lane_s32_dual(in, c, lane, out)                          \
+  do {                                                                 \
+    out[0].val[0] = vmull_lane_s32(vget_low_s32(in.val[0]), c, lane);  \
+    out[0].val[1] = vmull_lane_s32(vget_low_s32(in.val[1]), c, lane);  \
+    out[1].val[0] = vmull_lane_s32(vget_high_s32(in.val[0]), c, lane); \
+    out[1].val[1] = vmull_lane_s32(vget_high_s32(in.val[1]), c, lane); \
+  } while (0)
+
+#define vmlal_lane_s32_dual(in, c, lane, out)                             \
+  do {                                                                    \
+    out[0].val[0] =                                                       \
+        vmlal_lane_s32(out[0].val[0], vget_low_s32(in.val[0]), c, lane);  \
+    out[0].val[1] =                                                       \
+        vmlal_lane_s32(out[0].val[1], vget_low_s32(in.val[1]), c, lane);  \
+    out[1].val[0] =                                                       \
+        vmlal_lane_s32(out[1].val[0], vget_high_s32(in.val[0]), c, lane); \
+    out[1].val[1] =                                                       \
+        vmlal_lane_s32(out[1].val[1], vget_high_s32(in.val[1]), c, lane); \
+  } while (0)
+
+#define vmlsl_lane_s32_dual(in, c, lane, out)                             \
+  do {                                                                    \
+    out[0].val[0] =                                                       \
+        vmlsl_lane_s32(out[0].val[0], vget_low_s32(in.val[0]), c, lane);  \
+    out[0].val[1] =                                                       \
+        vmlsl_lane_s32(out[0].val[1], vget_low_s32(in.val[1]), c, lane);  \
+    out[1].val[0] =                                                       \
+        vmlsl_lane_s32(out[1].val[0], vget_high_s32(in.val[0]), c, lane); \
+    out[1].val[1] =                                                       \
+        vmlsl_lane_s32(out[1].val[1], vget_high_s32(in.val[1]), c, lane); \
+  } while (0)
+
+static INLINE int32x4x2_t
+highbd_dct_const_round_shift_low_8(const int64x2x2_t *const in) {
+  int32x4x2_t out;
+  out.val[0] = vcombine_s32(vrshrn_n_s64(in[0].val[0], DCT_CONST_BITS),
+                            vrshrn_n_s64(in[1].val[0], DCT_CONST_BITS));
+  out.val[1] = vcombine_s32(vrshrn_n_s64(in[0].val[1], DCT_CONST_BITS),
+                            vrshrn_n_s64(in[1].val[1], DCT_CONST_BITS));
+  return out;
+}
+
+#define highbd_iadst_half_butterfly(in, c, lane, out) \
+  do {                                                \
+    int64x2x2_t _t[2];                                \
+    vmull_lane_s32_dual(in, c, lane, _t);             \
+    out = highbd_dct_const_round_shift_low_8(_t);     \
+  } while (0)
+
+#define highbd_iadst_butterfly(in0, in1, c, lane0, lane1, s0, s1) \
+  do {                                                            \
+    vmull_lane_s32_dual(in0, c, lane0, s0);                       \
+    vmull_lane_s32_dual(in0, c, lane1, s1);                       \
+    vmlal_lane_s32_dual(in1, c, lane1, s0);                       \
+    vmlsl_lane_s32_dual(in1, c, lane0, s1);                       \
+  } while (0)
+
+static INLINE int32x4x2_t vaddq_s32_dual(const int32x4x2_t in0,
+                                         const int32x4x2_t in1) {
+  int32x4x2_t out;
+  out.val[0] = vaddq_s32(in0.val[0], in1.val[0]);
+  out.val[1] = vaddq_s32(in0.val[1], in1.val[1]);
+  return out;
+}
+
+static INLINE int64x2x2_t vaddq_s64_dual(const int64x2x2_t in0,
+                                         const int64x2x2_t in1) {
+  int64x2x2_t out;
+  out.val[0] = vaddq_s64(in0.val[0], in1.val[0]);
+  out.val[1] = vaddq_s64(in0.val[1], in1.val[1]);
+  return out;
+}
+
+static INLINE int32x4x2_t vsubq_s32_dual(const int32x4x2_t in0,
+                                         const int32x4x2_t in1) {
+  int32x4x2_t out;
+  out.val[0] = vsubq_s32(in0.val[0], in1.val[0]);
+  out.val[1] = vsubq_s32(in0.val[1], in1.val[1]);
+  return out;
+}
+
+static INLINE int64x2x2_t vsubq_s64_dual(const int64x2x2_t in0,
+                                         const int64x2x2_t in1) {
+  int64x2x2_t out;
+  out.val[0] = vsubq_s64(in0.val[0], in1.val[0]);
+  out.val[1] = vsubq_s64(in0.val[1], in1.val[1]);
+  return out;
+}
+
+static INLINE int32x4x2_t vcombine_s32_dual(const int32x2x2_t in0,
+                                            const int32x2x2_t in1) {
+  int32x4x2_t out;
+  out.val[0] = vcombine_s32(in0.val[0], in1.val[0]);
+  out.val[1] = vcombine_s32(in0.val[1], in1.val[1]);
+  return out;
+}
+
+static INLINE int32x4x2_t highbd_add_dct_const_round_shift_low_8(
+    const int64x2x2_t *const in0, const int64x2x2_t *const in1) {
+  const int64x2x2_t sum_lo = vaddq_s64_dual(in0[0], in1[0]);
+  const int64x2x2_t sum_hi = vaddq_s64_dual(in0[1], in1[1]);
+  int32x2x2_t out_lo, out_hi;
+
+  out_lo.val[0] = vrshrn_n_s64(sum_lo.val[0], DCT_CONST_BITS);
+  out_lo.val[1] = vrshrn_n_s64(sum_lo.val[1], DCT_CONST_BITS);
+  out_hi.val[0] = vrshrn_n_s64(sum_hi.val[0], DCT_CONST_BITS);
+  out_hi.val[1] = vrshrn_n_s64(sum_hi.val[1], DCT_CONST_BITS);
+  return vcombine_s32_dual(out_lo, out_hi);
+}
+
+static INLINE int32x4x2_t highbd_sub_dct_const_round_shift_low_8(
+    const int64x2x2_t *const in0, const int64x2x2_t *const in1) {
+  const int64x2x2_t sub_lo = vsubq_s64_dual(in0[0], in1[0]);
+  const int64x2x2_t sub_hi = vsubq_s64_dual(in0[1], in1[1]);
+  int32x2x2_t out_lo, out_hi;
+
+  out_lo.val[0] = vrshrn_n_s64(sub_lo.val[0], DCT_CONST_BITS);
+  out_lo.val[1] = vrshrn_n_s64(sub_lo.val[1], DCT_CONST_BITS);
+  out_hi.val[0] = vrshrn_n_s64(sub_hi.val[0], DCT_CONST_BITS);
+  out_hi.val[1] = vrshrn_n_s64(sub_hi.val[1], DCT_CONST_BITS);
+  return vcombine_s32_dual(out_lo, out_hi);
+}
+
+static INLINE int32x4x2_t vnegq_s32_dual(const int32x4x2_t in) {
+  int32x4x2_t out;
+  out.val[0] = vnegq_s32(in.val[0]);
+  out.val[1] = vnegq_s32(in.val[1]);
+  return out;
+}
+
+static void highbd_iadst16_neon(const int32_t *input, int32_t *output,
+                                uint16_t *dest, const int stride,
+                                const int bd) {
+  const int32x4_t c_1_31_5_27 =
+      create_s32x4_neon(cospi_1_64, cospi_31_64, cospi_5_64, cospi_27_64);
+  const int32x4_t c_9_23_13_19 =
+      create_s32x4_neon(cospi_9_64, cospi_23_64, cospi_13_64, cospi_19_64);
+  const int32x4_t c_17_15_21_11 =
+      create_s32x4_neon(cospi_17_64, cospi_15_64, cospi_21_64, cospi_11_64);
+  const int32x4_t c_25_7_29_3 =
+      create_s32x4_neon(cospi_25_64, cospi_7_64, cospi_29_64, cospi_3_64);
+  const int32x4_t c_4_28_20_12 =
+      create_s32x4_neon(cospi_4_64, cospi_28_64, cospi_20_64, cospi_12_64);
+  const int32x4_t c_16_n16_8_24 =
+      create_s32x4_neon(cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64);
+  int32x4x2_t in[16], out[16];
+  int32x4x2_t x[16], t[12];
+  int64x2x2_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
+  int64x2x2_t s8[2], s9[2], s10[2], s11[2], s12[2], s13[2], s14[2], s15[2];
+
+  // Load input (16x8)
+  in[0].val[0] = vld1q_s32(input);
+  in[0].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[8].val[0] = vld1q_s32(input);
+  in[8].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[1].val[0] = vld1q_s32(input);
+  in[1].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[9].val[0] = vld1q_s32(input);
+  in[9].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[2].val[0] = vld1q_s32(input);
+  in[2].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[10].val[0] = vld1q_s32(input);
+  in[10].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[3].val[0] = vld1q_s32(input);
+  in[3].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[11].val[0] = vld1q_s32(input);
+  in[11].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[4].val[0] = vld1q_s32(input);
+  in[4].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[12].val[0] = vld1q_s32(input);
+  in[12].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[5].val[0] = vld1q_s32(input);
+  in[5].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[13].val[0] = vld1q_s32(input);
+  in[13].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[6].val[0] = vld1q_s32(input);
+  in[6].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[14].val[0] = vld1q_s32(input);
+  in[14].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[7].val[0] = vld1q_s32(input);
+  in[7].val[1] = vld1q_s32(input + 4);
+  input += 8;
+  in[15].val[0] = vld1q_s32(input);
+  in[15].val[1] = vld1q_s32(input + 4);
+
+  // Transpose
+  transpose_s32_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6],
+                    &in[7]);
+  transpose_s32_8x8(&in[8], &in[9], &in[10], &in[11], &in[12], &in[13], &in[14],
+                    &in[15]);
+
+  x[0] = in[15];
+  x[1] = in[0];
+  x[2] = in[13];
+  x[3] = in[2];
+  x[4] = in[11];
+  x[5] = in[4];
+  x[6] = in[9];
+  x[7] = in[6];
+  x[8] = in[7];
+  x[9] = in[8];
+  x[10] = in[5];
+  x[11] = in[10];
+  x[12] = in[3];
+  x[13] = in[12];
+  x[14] = in[1];
+  x[15] = in[14];
+
+  // stage 1
+  highbd_iadst_butterfly(x[0], x[1], vget_low_s32(c_1_31_5_27), 0, 1, s0, s1);
+  highbd_iadst_butterfly(x[2], x[3], vget_high_s32(c_1_31_5_27), 0, 1, s2, s3);
+  highbd_iadst_butterfly(x[4], x[5], vget_low_s32(c_9_23_13_19), 0, 1, s4, s5);
+  highbd_iadst_butterfly(x[6], x[7], vget_high_s32(c_9_23_13_19), 0, 1, s6, s7);
+  highbd_iadst_butterfly(x[8], x[9], vget_low_s32(c_17_15_21_11), 0, 1, s8, s9);
+  highbd_iadst_butterfly(x[10], x[11], vget_high_s32(c_17_15_21_11), 0, 1, s10,
+                         s11);
+  highbd_iadst_butterfly(x[12], x[13], vget_low_s32(c_25_7_29_3), 0, 1, s12,
+                         s13);
+  highbd_iadst_butterfly(x[14], x[15], vget_high_s32(c_25_7_29_3), 0, 1, s14,
+                         s15);
+
+  x[0] = highbd_add_dct_const_round_shift_low_8(s0, s8);
+  x[1] = highbd_add_dct_const_round_shift_low_8(s1, s9);
+  x[2] = highbd_add_dct_const_round_shift_low_8(s2, s10);
+  x[3] = highbd_add_dct_const_round_shift_low_8(s3, s11);
+  x[4] = highbd_add_dct_const_round_shift_low_8(s4, s12);
+  x[5] = highbd_add_dct_const_round_shift_low_8(s5, s13);
+  x[6] = highbd_add_dct_const_round_shift_low_8(s6, s14);
+  x[7] = highbd_add_dct_const_round_shift_low_8(s7, s15);
+  x[8] = highbd_sub_dct_const_round_shift_low_8(s0, s8);
+  x[9] = highbd_sub_dct_const_round_shift_low_8(s1, s9);
+  x[10] = highbd_sub_dct_const_round_shift_low_8(s2, s10);
+  x[11] = highbd_sub_dct_const_round_shift_low_8(s3, s11);
+  x[12] = highbd_sub_dct_const_round_shift_low_8(s4, s12);
+  x[13] = highbd_sub_dct_const_round_shift_low_8(s5, s13);
+  x[14] = highbd_sub_dct_const_round_shift_low_8(s6, s14);
+  x[15] = highbd_sub_dct_const_round_shift_low_8(s7, s15);
+
+  // stage 2
+  t[0] = x[0];
+  t[1] = x[1];
+  t[2] = x[2];
+  t[3] = x[3];
+  t[4] = x[4];
+  t[5] = x[5];
+  t[6] = x[6];
+  t[7] = x[7];
+  highbd_iadst_butterfly(x[8], x[9], vget_low_s32(c_4_28_20_12), 0, 1, s8, s9);
+  highbd_iadst_butterfly(x[10], x[11], vget_high_s32(c_4_28_20_12), 0, 1, s10,
+                         s11);
+  highbd_iadst_butterfly(x[13], x[12], vget_low_s32(c_4_28_20_12), 1, 0, s13,
+                         s12);
+  highbd_iadst_butterfly(x[15], x[14], vget_high_s32(c_4_28_20_12), 1, 0, s15,
+                         s14);
+
+  x[0] = vaddq_s32_dual(t[0], t[4]);
+  x[1] = vaddq_s32_dual(t[1], t[5]);
+  x[2] = vaddq_s32_dual(t[2], t[6]);
+  x[3] = vaddq_s32_dual(t[3], t[7]);
+  x[4] = vsubq_s32_dual(t[0], t[4]);
+  x[5] = vsubq_s32_dual(t[1], t[5]);
+  x[6] = vsubq_s32_dual(t[2], t[6]);
+  x[7] = vsubq_s32_dual(t[3], t[7]);
+  x[8] = highbd_add_dct_const_round_shift_low_8(s8, s12);
+  x[9] = highbd_add_dct_const_round_shift_low_8(s9, s13);
+  x[10] = highbd_add_dct_const_round_shift_low_8(s10, s14);
+  x[11] = highbd_add_dct_const_round_shift_low_8(s11, s15);
+  x[12] = highbd_sub_dct_const_round_shift_low_8(s8, s12);
+  x[13] = highbd_sub_dct_const_round_shift_low_8(s9, s13);
+  x[14] = highbd_sub_dct_const_round_shift_low_8(s10, s14);
+  x[15] = highbd_sub_dct_const_round_shift_low_8(s11, s15);
+
+  // stage 3
+  t[0] = x[0];
+  t[1] = x[1];
+  t[2] = x[2];
+  t[3] = x[3];
+  highbd_iadst_butterfly(x[4], x[5], vget_high_s32(c_16_n16_8_24), 0, 1, s4,
+                         s5);
+  highbd_iadst_butterfly(x[7], x[6], vget_high_s32(c_16_n16_8_24), 1, 0, s7,
+                         s6);
+  t[8] = x[8];
+  t[9] = x[9];
+  t[10] = x[10];
+  t[11] = x[11];
+  highbd_iadst_butterfly(x[12], x[13], vget_high_s32(c_16_n16_8_24), 0, 1, s12,
+                         s13);
+  highbd_iadst_butterfly(x[15], x[14], vget_high_s32(c_16_n16_8_24), 1, 0, s15,
+                         s14);
+
+  x[0] = vaddq_s32_dual(t[0], t[2]);
+  x[1] = vaddq_s32_dual(t[1], t[3]);
+  x[2] = vsubq_s32_dual(t[0], t[2]);
+  x[3] = vsubq_s32_dual(t[1], t[3]);
+  x[4] = highbd_add_dct_const_round_shift_low_8(s4, s6);
+  x[5] = highbd_add_dct_const_round_shift_low_8(s5, s7);
+  x[6] = highbd_sub_dct_const_round_shift_low_8(s4, s6);
+  x[7] = highbd_sub_dct_const_round_shift_low_8(s5, s7);
+  x[8] = vaddq_s32_dual(t[8], t[10]);
+  x[9] = vaddq_s32_dual(t[9], t[11]);
+  x[10] = vsubq_s32_dual(t[8], t[10]);
+  x[11] = vsubq_s32_dual(t[9], t[11]);
+  x[12] = highbd_add_dct_const_round_shift_low_8(s12, s14);
+  x[13] = highbd_add_dct_const_round_shift_low_8(s13, s15);
+  x[14] = highbd_sub_dct_const_round_shift_low_8(s12, s14);
+  x[15] = highbd_sub_dct_const_round_shift_low_8(s13, s15);
+
+  // stage 4
+  {
+    const int32x4x2_t sum = vaddq_s32_dual(x[2], x[3]);
+    const int32x4x2_t sub = vsubq_s32_dual(x[2], x[3]);
+    highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 1, x[2]);
+    highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[3]);
+  }
+  {
+    const int32x4x2_t sum = vaddq_s32_dual(x[7], x[6]);
+    const int32x4x2_t sub = vsubq_s32_dual(x[7], x[6]);
+    highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 0, x[6]);
+    highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[7]);
+  }
+  {
+    const int32x4x2_t sum = vaddq_s32_dual(x[11], x[10]);
+    const int32x4x2_t sub = vsubq_s32_dual(x[11], x[10]);
+    highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 0, x[10]);
+    highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[11]);
+  }
+  {
+    const int32x4x2_t sum = vaddq_s32_dual(x[14], x[15]);
+    const int32x4x2_t sub = vsubq_s32_dual(x[14], x[15]);
+    highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 1, x[14]);
+    highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[15]);
+  }
+
+  out[0] = x[0];
+  out[1] = vnegq_s32_dual(x[8]);
+  out[2] = x[12];
+  out[3] = vnegq_s32_dual(x[4]);
+  out[4] = x[6];
+  out[5] = x[14];
+  out[6] = x[10];
+  out[7] = x[2];
+  out[8] = x[3];
+  out[9] = x[11];
+  out[10] = x[15];
+  out[11] = x[7];
+  out[12] = x[5];
+  out[13] = vnegq_s32_dual(x[13]);
+  out[14] = x[9];
+  out[15] = vnegq_s32_dual(x[1]);
+
+  if (output) {
+    highbd_idct16x16_store_pass1(out, output);
+  } else {
+    highbd_idct16x16_add_store(out, dest, stride, bd);
+  }
+}
+
+typedef void (*highbd_iht_1d)(const int32_t *input, int32_t *output,
+                              uint16_t *dest, const int stride, const int bd);
+
+typedef struct {
+  highbd_iht_1d cols, rows;  // vertical and horizontal
+} highbd_iht_2d;
+
+void vp9_highbd_iht16x16_256_add_neon(const tran_low_t *input, uint16_t *dest,
+                                      int stride, int tx_type, int bd) {
+  if (bd == 8) {
+    static const iht_2d IHT_16[] = {
+      { vpx_idct16x16_256_add_half1d,
+        vpx_idct16x16_256_add_half1d },  // DCT_DCT  = 0
+      { vpx_iadst16x16_256_add_half1d,
+        vpx_idct16x16_256_add_half1d },  // ADST_DCT = 1
+      { vpx_idct16x16_256_add_half1d,
+        vpx_iadst16x16_256_add_half1d },  // DCT_ADST = 2
+      { vpx_iadst16x16_256_add_half1d,
+        vpx_iadst16x16_256_add_half1d }  // ADST_ADST = 3
+    };
+    const iht_2d ht = IHT_16[tx_type];
+    int16_t row_output[16 * 16];
+
+    // pass 1
+    ht.rows(input, row_output, dest, stride, 1);               // upper 8 rows
+    ht.rows(input + 8 * 16, row_output + 8, dest, stride, 1);  // lower 8 rows
+
+    // pass 2
+    ht.cols(row_output, NULL, dest, stride, 1);               // left 8 columns
+    ht.cols(row_output + 16 * 8, NULL, dest + 8, stride, 1);  // right 8 columns
+  } else {
+    static const highbd_iht_2d IHT_16[] = {
+      { vpx_highbd_idct16x16_256_add_half1d,
+        vpx_highbd_idct16x16_256_add_half1d },  // DCT_DCT  = 0
+      { highbd_iadst16_neon,
+        vpx_highbd_idct16x16_256_add_half1d },  // ADST_DCT = 1
+      { vpx_highbd_idct16x16_256_add_half1d,
+        highbd_iadst16_neon },                      // DCT_ADST = 2
+      { highbd_iadst16_neon, highbd_iadst16_neon }  // ADST_ADST = 3
+    };
+    const highbd_iht_2d ht = IHT_16[tx_type];
+    int32_t row_output[16 * 16];
+
+    // pass 1
+    ht.rows(input, row_output, dest, stride, bd);               // upper 8 rows
+    ht.rows(input + 8 * 16, row_output + 8, dest, stride, bd);  // lower 8 rows
+
+    // pass 2
+    ht.cols(row_output, NULL, dest, stride, bd);  // left 8 columns
+    ht.cols(row_output + 8 * 16, NULL, dest + 8, stride,
+            bd);  // right 8 columns
+  }
+}
diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c
new file mode 100644
index 0000000000..52c4f1937d
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c
@@ -0,0 +1,181 @@
+/*
+ *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/highbd_idct_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/txfm_common.h"
+
+static INLINE void highbd_iadst4(int32x4_t *const io) {
+  const int32_t sinpis[4] = { sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9 };
+  const int32x4_t sinpi = vld1q_s32(sinpis);
+  int64x2x2_t s[7], t[4];
+  int32x4_t s7;
+
+  s[0].val[0] = vmull_lane_s32(vget_low_s32(io[0]), vget_low_s32(sinpi), 0);
+  s[0].val[1] = vmull_lane_s32(vget_high_s32(io[0]), vget_low_s32(sinpi), 0);
+  s[1].val[0] = vmull_lane_s32(vget_low_s32(io[0]), vget_low_s32(sinpi), 1);
+  s[1].val[1] = vmull_lane_s32(vget_high_s32(io[0]), vget_low_s32(sinpi), 1);
+  s[2].val[0] = vmull_lane_s32(vget_low_s32(io[1]), vget_high_s32(sinpi), 0);
+  s[2].val[1] = vmull_lane_s32(vget_high_s32(io[1]), vget_high_s32(sinpi), 0);
+  s[3].val[0] = vmull_lane_s32(vget_low_s32(io[2]), vget_high_s32(sinpi), 1);
+  s[3].val[1] = vmull_lane_s32(vget_high_s32(io[2]), vget_high_s32(sinpi), 1);
+  s[4].val[0] = vmull_lane_s32(vget_low_s32(io[2]), vget_low_s32(sinpi), 0);
+  s[4].val[1] = vmull_lane_s32(vget_high_s32(io[2]), vget_low_s32(sinpi), 0);
+  s[5].val[0] = vmull_lane_s32(vget_low_s32(io[3]), vget_low_s32(sinpi), 1);
+  s[5].val[1] = vmull_lane_s32(vget_high_s32(io[3]), vget_low_s32(sinpi), 1);
+  s[6].val[0] = vmull_lane_s32(vget_low_s32(io[3]), vget_high_s32(sinpi), 1);
+  s[6].val[1] = vmull_lane_s32(vget_high_s32(io[3]), vget_high_s32(sinpi), 1);
+  s7 = vsubq_s32(io[0], io[2]);
+  s7 = vaddq_s32(s7, io[3]);
+
+  s[0].val[0] = vaddq_s64(s[0].val[0], s[3].val[0]);
+  s[0].val[1] = vaddq_s64(s[0].val[1], s[3].val[1]);
+  s[0].val[0] = vaddq_s64(s[0].val[0], s[5].val[0]);
+  s[0].val[1] = vaddq_s64(s[0].val[1], s[5].val[1]);
+  s[1].val[0] = vsubq_s64(s[1].val[0], s[4].val[0]);
+  s[1].val[1] = vsubq_s64(s[1].val[1], s[4].val[1]);
+  s[1].val[0] = vsubq_s64(s[1].val[0], s[6].val[0]);
+  s[1].val[1] = vsubq_s64(s[1].val[1], s[6].val[1]);
+  s[3] = s[2];
+  s[2].val[0] = vmull_lane_s32(vget_low_s32(s7), vget_high_s32(sinpi), 0);
+  s[2].val[1] = vmull_lane_s32(vget_high_s32(s7), vget_high_s32(sinpi), 0);
+
+  t[0].val[0] = vaddq_s64(s[0].val[0], s[3].val[0]);
+  t[0].val[1] = vaddq_s64(s[0].val[1], s[3].val[1]);
+  t[1].val[0] = vaddq_s64(s[1].val[0], s[3].val[0]);
+  t[1].val[1] = vaddq_s64(s[1].val[1], s[3].val[1]);
+  t[2] = s[2];
+  t[3].val[0] = vaddq_s64(s[0].val[0], s[1].val[0]);
+  t[3].val[1] = vaddq_s64(s[0].val[1], s[1].val[1]);
+  t[3].val[0] = vsubq_s64(t[3].val[0], s[3].val[0]);
+  t[3].val[1] = vsubq_s64(t[3].val[1], s[3].val[1]);
+  io[0] = vcombine_s32(vrshrn_n_s64(t[0].val[0], DCT_CONST_BITS),
+                       vrshrn_n_s64(t[0].val[1], DCT_CONST_BITS));
+  io[1] = vcombine_s32(vrshrn_n_s64(t[1].val[0], DCT_CONST_BITS),
+                       vrshrn_n_s64(t[1].val[1], DCT_CONST_BITS));
+  io[2] = vcombine_s32(vrshrn_n_s64(t[2].val[0], DCT_CONST_BITS),
+                       vrshrn_n_s64(t[2].val[1], DCT_CONST_BITS));
+  io[3] = vcombine_s32(vrshrn_n_s64(t[3].val[0], DCT_CONST_BITS),
+                       vrshrn_n_s64(t[3].val[1], DCT_CONST_BITS));
+}
+
+void vp9_highbd_iht4x4_16_add_neon(const tran_low_t *input, uint16_t *dest,
+                                   int stride, int tx_type, int bd) {
+  const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
+  int16x8_t a[2];
+  int32x4_t c[4];
+
+  c[0] = vld1q_s32(input);
+  c[1] = vld1q_s32(input + 4);
+  c[2] = vld1q_s32(input + 8);
+  c[3] = vld1q_s32(input + 12);
+
+  if (bd == 8) {
+    a[0] = vcombine_s16(vmovn_s32(c[0]), vmovn_s32(c[1]));
+    a[1] = vcombine_s16(vmovn_s32(c[2]), vmovn_s32(c[3]));
+    transpose_s16_4x4q(&a[0], &a[1]);
+
+    switch (tx_type) {
+      case DCT_DCT:
+        idct4x4_16_kernel_bd8(a);
+        a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+        transpose_s16_4x4q(&a[0], &a[1]);
+        idct4x4_16_kernel_bd8(a);
+        a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+        break;
+
+      case ADST_DCT:
+        idct4x4_16_kernel_bd8(a);
+        a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+        transpose_s16_4x4q(&a[0], &a[1]);
+        iadst4(a);
+        break;
+
+      case DCT_ADST:
+        iadst4(a);
+        transpose_s16_4x4q(&a[0], &a[1]);
+        idct4x4_16_kernel_bd8(a);
+        a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+        break;
+
+      default:
+        assert(tx_type == ADST_ADST);
+        iadst4(a);
+        transpose_s16_4x4q(&a[0], &a[1]);
+        iadst4(a);
+        break;
+    }
+    a[0] = vrshrq_n_s16(a[0], 4);
+    a[1] = vrshrq_n_s16(a[1], 4);
+  } else {
+    switch (tx_type) {
+      case DCT_DCT: {
+        const int32x4_t cospis = vld1q_s32(kCospi32);
+
+        if (bd == 10) {
+          idct4x4_16_kernel_bd10(cospis, c);
+          idct4x4_16_kernel_bd10(cospis, c);
+        } else {
+          idct4x4_16_kernel_bd12(cospis, c);
+          idct4x4_16_kernel_bd12(cospis, c);
+        }
+        break;
+      }
+
+      case ADST_DCT: {
+        const int32x4_t cospis = vld1q_s32(kCospi32);
+
+        if (bd == 10) {
+          idct4x4_16_kernel_bd10(cospis, c);
+        } else {
+          idct4x4_16_kernel_bd12(cospis, c);
+        }
+        transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
+        highbd_iadst4(c);
+        break;
+      }
+
+      case DCT_ADST: {
+        const int32x4_t cospis = vld1q_s32(kCospi32);
+
+        transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
+        highbd_iadst4(c);
+        if (bd == 10) {
+          idct4x4_16_kernel_bd10(cospis, c);
+        } else {
+          idct4x4_16_kernel_bd12(cospis, c);
+        }
+        break;
+      }
+
+      default: {
+        assert(tx_type == ADST_ADST);
+        transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
+        highbd_iadst4(c);
+        transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
+        highbd_iadst4(c);
+        break;
+      }
+    }
+    a[0] = vcombine_s16(vqrshrn_n_s32(c[0], 4), vqrshrn_n_s32(c[1], 4));
+    a[1] = vcombine_s16(vqrshrn_n_s32(c[2], 4), vqrshrn_n_s32(c[3], 4));
+  }
+
+  highbd_idct4x4_1_add_kernel1(&dest, stride, a[0], max);
+  highbd_idct4x4_1_add_kernel1(&dest, stride, a[1], max);
+}
diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht8x8_add_neon.c b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht8x8_add_neon.c
new file mode 100644
index 0000000000..2232c6841c
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht8x8_add_neon.c
@@ -0,0 +1,345 @@
+/*
+ *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vp9/common/vp9_enums.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/highbd_idct_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/transpose_neon.h"
+#include "vpx_dsp/inv_txfm.h"
+
+static INLINE void highbd_iadst_half_butterfly_neon(int32x4_t *const x,
+                                                    const int32x2_t c) {
+  const int32x4_t sum = vaddq_s32(x[0], x[1]);
+  const int32x4_t sub = vsubq_s32(x[0], x[1]);
+  const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(sum), c, 0);
+  const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(sub), c, 0);
+  const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(sum), c, 0);
+  const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(sub), c, 0);
+  const int32x2_t out0_lo = vrshrn_n_s64(t0_lo, DCT_CONST_BITS);
+  const int32x2_t out1_lo = vrshrn_n_s64(t1_lo, DCT_CONST_BITS);
+  const int32x2_t out0_hi = vrshrn_n_s64(t0_hi, DCT_CONST_BITS);
+  const int32x2_t out1_hi = vrshrn_n_s64(t1_hi, DCT_CONST_BITS);
+
+  x[0] = vcombine_s32(out0_lo, out0_hi);
+  x[1] = vcombine_s32(out1_lo, out1_hi);
+}
+
+static INLINE void highbd_iadst_butterfly_lane_0_1_neon(const int32x4_t in0,
+                                                        const int32x4_t in1,
+                                                        const int32x2_t c,
+                                                        int64x2_t *const s0,
+                                                        int64x2_t *const s1) {
+  const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(in0), c, 0);
+  const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(in0), c, 1);
+  const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(in0), c, 0);
+  const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(in0), c, 1);
+
+  s0[0] = vmlal_lane_s32(t0_lo, vget_low_s32(in1), c, 1);
+  s1[0] = vmlsl_lane_s32(t1_lo, vget_low_s32(in1), c, 0);
+  s0[1] = vmlal_lane_s32(t0_hi, vget_high_s32(in1), c, 1);
+  s1[1] = vmlsl_lane_s32(t1_hi, vget_high_s32(in1), c, 0);
+}
+
+static INLINE void highbd_iadst_butterfly_lane_1_0_neon(const int32x4_t in0,
+                                                        const int32x4_t in1,
+                                                        const int32x2_t c,
+                                                        int64x2_t *const s0,
+                                                        int64x2_t *const s1) {
+  const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(in0), c, 1);
+  const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(in0), c, 0);
+  const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(in0), c, 1);
+  const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(in0), c, 0);
+
+  s0[0] = vmlal_lane_s32(t0_lo, vget_low_s32(in1), c, 0);
+  s1[0] = vmlsl_lane_s32(t1_lo, vget_low_s32(in1), c, 1);
+  s0[1] = vmlal_lane_s32(t0_hi, vget_high_s32(in1), c, 0);
+  s1[1] = vmlsl_lane_s32(t1_hi, vget_high_s32(in1), c, 1);
+}
+
+static INLINE int32x4_t highbd_add_dct_const_round_shift_low_8(
+    const int64x2_t *const in0, const int64x2_t *const in1) {
+  const int64x2_t sum_lo = vaddq_s64(in0[0], in1[0]);
+  const int64x2_t sum_hi = vaddq_s64(in0[1], in1[1]);
+  const int32x2_t out_lo = vrshrn_n_s64(sum_lo, DCT_CONST_BITS);
+  const int32x2_t out_hi = vrshrn_n_s64(sum_hi, DCT_CONST_BITS);
+  return vcombine_s32(out_lo, out_hi);
+}
+
+static INLINE int32x4_t highbd_sub_dct_const_round_shift_low_8(
+    const int64x2_t *const in0, const int64x2_t *const in1) {
+  const int64x2_t sub_lo = vsubq_s64(in0[0], in1[0]);
+  const int64x2_t sub_hi = vsubq_s64(in0[1], in1[1]);
+  const int32x2_t out_lo = vrshrn_n_s64(sub_lo, DCT_CONST_BITS);
+  const int32x2_t out_hi = vrshrn_n_s64(sub_hi, DCT_CONST_BITS);
+  return vcombine_s32(out_lo, out_hi);
+}
+
+static INLINE void highbd_iadst8(int32x4_t *const io0, int32x4_t *const io1,
+                                 int32x4_t *const io2, int32x4_t *const io3,
+                                 int32x4_t *const io4, int32x4_t *const io5,
+                                 int32x4_t *const io6, int32x4_t *const io7) {
+  const int32x4_t c0 =
+      create_s32x4_neon(cospi_2_64, cospi_30_64, cospi_10_64, cospi_22_64);
+  const int32x4_t c1 =
+      create_s32x4_neon(cospi_18_64, cospi_14_64, cospi_26_64, cospi_6_64);
+  const int32x4_t c2 =
+      create_s32x4_neon(cospi_16_64, 0, cospi_8_64, cospi_24_64);
+  int32x4_t x[8], t[4];
+  int64x2_t s[8][2];
+
+  x[0] = *io7;
+  x[1] = *io0;
+  x[2] = *io5;
+  x[3] = *io2;
+  x[4] = *io3;
+  x[5] = *io4;
+  x[6] = *io1;
+  x[7] = *io6;
+
+  // stage 1
+  highbd_iadst_butterfly_lane_0_1_neon(x[0], x[1], vget_low_s32(c0), s[0],
+                                       s[1]);
+  highbd_iadst_butterfly_lane_0_1_neon(x[2], x[3], vget_high_s32(c0), s[2],
+                                       s[3]);
+  highbd_iadst_butterfly_lane_0_1_neon(x[4], x[5], vget_low_s32(c1), s[4],
+                                       s[5]);
+  highbd_iadst_butterfly_lane_0_1_neon(x[6], x[7], vget_high_s32(c1), s[6],
+                                       s[7]);
+
+  x[0] = highbd_add_dct_const_round_shift_low_8(s[0], s[4]);
+  x[1] = highbd_add_dct_const_round_shift_low_8(s[1], s[5]);
+  x[2] = highbd_add_dct_const_round_shift_low_8(s[2], s[6]);
+  x[3] = highbd_add_dct_const_round_shift_low_8(s[3], s[7]);
+  x[4] = highbd_sub_dct_const_round_shift_low_8(s[0], s[4]);
+  x[5] = highbd_sub_dct_const_round_shift_low_8(s[1], s[5]);
+  x[6] = highbd_sub_dct_const_round_shift_low_8(s[2], s[6]);
+  x[7] = highbd_sub_dct_const_round_shift_low_8(s[3], s[7]);
+
+  // stage 2
+  t[0] = x[0];
+  t[1] = x[1];
+  t[2] = x[2];
+  t[3] = x[3];
+  highbd_iadst_butterfly_lane_0_1_neon(x[4], x[5], vget_high_s32(c2), s[4],
+                                       s[5]);
+  highbd_iadst_butterfly_lane_1_0_neon(x[7], x[6], vget_high_s32(c2), s[7],
+                                       s[6]);
+
+  x[0] = vaddq_s32(t[0], t[2]);
+  x[1] = vaddq_s32(t[1], t[3]);
+  x[2] = vsubq_s32(t[0], t[2]);
+  x[3] = vsubq_s32(t[1], t[3]);
+  x[4] = highbd_add_dct_const_round_shift_low_8(s[4], s[6]);
+  x[5] = highbd_add_dct_const_round_shift_low_8(s[5], s[7]);
+  x[6] = highbd_sub_dct_const_round_shift_low_8(s[4], s[6]);
+  x[7] = highbd_sub_dct_const_round_shift_low_8(s[5], s[7]);
+
+  // stage 3
+  highbd_iadst_half_butterfly_neon(x + 2, vget_low_s32(c2));
+  highbd_iadst_half_butterfly_neon(x + 6, vget_low_s32(c2));
+
+  *io0 = x[0];
+  *io1 = vnegq_s32(x[4]);
+  *io2 = x[6];
+  *io3 = vnegq_s32(x[2]);
+  *io4 = x[3];
+  *io5 = vnegq_s32(x[7]);
+  *io6 = x[5];
+  *io7 = vnegq_s32(x[1]);
+}
+
+void vp9_highbd_iht8x8_64_add_neon(const tran_low_t *input, uint16_t *dest,
+                                   int stride, int tx_type, int bd) {
+  int32x4_t a[16];
+  int16x8_t c[8];
+
+  a[0] = vld1q_s32(input);
+  a[1] = vld1q_s32(input + 4);
+  a[2] = vld1q_s32(input + 8);
+  a[3] = vld1q_s32(input + 12);
+  a[4] = vld1q_s32(input + 16);
+  a[5] = vld1q_s32(input + 20);
+  a[6] = vld1q_s32(input + 24);
+  a[7] = vld1q_s32(input + 28);
+  a[8] = vld1q_s32(input + 32);
+  a[9] = vld1q_s32(input + 36);
+  a[10] = vld1q_s32(input + 40);
+  a[11] = vld1q_s32(input + 44);
+  a[12] = vld1q_s32(input + 48);
+  a[13] = vld1q_s32(input + 52);
+  a[14] = vld1q_s32(input + 56);
+  a[15] = vld1q_s32(input + 60);
+
+  if (bd == 8) {
+    c[0] = vcombine_s16(vmovn_s32(a[0]), vmovn_s32(a[1]));
+    c[1] = vcombine_s16(vmovn_s32(a[2]), vmovn_s32(a[3]));
+    c[2] = vcombine_s16(vmovn_s32(a[4]), vmovn_s32(a[5]));
+    c[3] = vcombine_s16(vmovn_s32(a[6]), vmovn_s32(a[7]));
+    c[4] = vcombine_s16(vmovn_s32(a[8]), vmovn_s32(a[9]));
+    c[5] = vcombine_s16(vmovn_s32(a[10]), vmovn_s32(a[11]));
+    c[6] = vcombine_s16(vmovn_s32(a[12]), vmovn_s32(a[13]));
+    c[7] = vcombine_s16(vmovn_s32(a[14]), vmovn_s32(a[15]));
+
+    switch (tx_type) {
+      case DCT_DCT: {
+        const int16x8_t cospis = vld1q_s16(kCospi);
+        const int16x4_t cospis0 = vget_low_s16(cospis);   // cospi 0, 8, 16, 24
+        const int16x4_t cospis1 = vget_high_s16(cospis);  // cospi 4, 12, 20, 28
+
+        idct8x8_64_1d_bd8(cospis0, cospis1, c);
+        idct8x8_64_1d_bd8(cospis0, cospis1, c);
+        break;
+      }
+
+      case ADST_DCT: {
+        const int16x8_t cospis = vld1q_s16(kCospi);
+        const int16x4_t cospis0 = vget_low_s16(cospis);   // cospi 0, 8, 16, 24
+        const int16x4_t cospis1 = vget_high_s16(cospis);  // cospi 4, 12, 20, 28
+
+        idct8x8_64_1d_bd8(cospis0, cospis1, c);
+        transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6],
+                          &c[7]);
+        iadst8(c);
+        break;
+      }
+
+      case DCT_ADST: {
+        const int16x8_t cospis = vld1q_s16(kCospi);
+        const int16x4_t cospis0 = vget_low_s16(cospis);   // cospi 0, 8, 16, 24
+        const int16x4_t cospis1 = vget_high_s16(cospis);  // cospi 4, 12, 20, 28
+
+        transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6],
+                          &c[7]);
+        iadst8(c);
+        idct8x8_64_1d_bd8(cospis0, cospis1, c);
+        break;
+      }
+
+      default: {
+        transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6],
+                          &c[7]);
+        iadst8(c);
+        transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6],
+                          &c[7]);
+        iadst8(c);
+        break;
+      }
+    }
+
+    c[0] = vrshrq_n_s16(c[0], 5);
+    c[1] = vrshrq_n_s16(c[1], 5);
+    c[2] = vrshrq_n_s16(c[2], 5);
+    c[3] = vrshrq_n_s16(c[3], 5);
+    c[4] = vrshrq_n_s16(c[4], 5);
+    c[5] = vrshrq_n_s16(c[5], 5);
+    c[6] = vrshrq_n_s16(c[6], 5);
+    c[7] = vrshrq_n_s16(c[7], 5);
+  } else {
+    switch (tx_type) {
+      case DCT_DCT: {
+        const int32x4_t cospis0 = vld1q_s32(kCospi32);  // cospi 0, 8, 16, 24
+        const int32x4_t cospis1 =
+            vld1q_s32(kCospi32 + 4);  // cospi 4, 12, 20, 28
+
+        if (bd == 10) {
+          idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3],
+                                 &a[4], &a[5], &a[6], &a[7]);
+          idct8x8_64_half1d_bd10(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11],
+                                 &a[12], &a[13], &a[14], &a[15]);
+          idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9],
+                                 &a[2], &a[10], &a[3], &a[11]);
+          idct8x8_64_half1d_bd10(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13],
+                                 &a[6], &a[14], &a[7], &a[15]);
+        } else {
+          idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3],
+                                 &a[4], &a[5], &a[6], &a[7]);
+          idct8x8_64_half1d_bd12(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11],
+                                 &a[12], &a[13], &a[14], &a[15]);
+          idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9],
+                                 &a[2], &a[10], &a[3], &a[11]);
+          idct8x8_64_half1d_bd12(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13],
+                                 &a[6], &a[14], &a[7], &a[15]);
+        }
+        break;
+      }
+
+      case ADST_DCT: {
+        const int32x4_t cospis0 = vld1q_s32(kCospi32);  // cospi 0, 8, 16, 24
+        const int32x4_t cospis1 =
+            vld1q_s32(kCospi32 + 4);  // cospi 4, 12, 20, 28
+
+        idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3],
+                               &a[4], &a[5], &a[6], &a[7]);
+        idct8x8_64_half1d_bd12(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11],
+                               &a[12], &a[13], &a[14], &a[15]);
+        transpose_s32_8x4(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3],
+                          &a[11]);
+        highbd_iadst8(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]);
+        transpose_s32_8x4(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7],
+                          &a[15]);
+        highbd_iadst8(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7],
+                      &a[15]);
+        break;
+      }
+
+      case DCT_ADST: {
+        const int32x4_t cospis0 = vld1q_s32(kCospi32);  // cospi 0, 8, 16, 24
+        const int32x4_t cospis1 =
+            vld1q_s32(kCospi32 + 4);  // cospi 4, 12, 20, 28
+
+        transpose_s32_8x4(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
+                          &a[7]);
+        highbd_iadst8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+        transpose_s32_8x4(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14],
+                          &a[15]);
+        highbd_iadst8(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14],
+                      &a[15]);
+        idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9],
+                               &a[2], &a[10], &a[3], &a[11]);
+        idct8x8_64_half1d_bd12(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13],
+                               &a[6], &a[14], &a[7], &a[15]);
+        break;
+      }
+
+      default: {
+        assert(tx_type == ADST_ADST);
+        transpose_s32_8x4(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
+                          &a[7]);
+        highbd_iadst8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+        transpose_s32_8x4(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14],
+                          &a[15]);
+        highbd_iadst8(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14],
+                      &a[15]);
+        transpose_s32_8x4(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3],
+                          &a[11]);
+        highbd_iadst8(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]);
+        transpose_s32_8x4(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7],
+                          &a[15]);
+        highbd_iadst8(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7],
+                      &a[15]);
+        break;
+      }
+    }
+
+    c[0] = vcombine_s16(vrshrn_n_s32(a[0], 5), vrshrn_n_s32(a[4], 5));
+    c[1] = vcombine_s16(vrshrn_n_s32(a[8], 5), vrshrn_n_s32(a[12], 5));
+    c[2] = vcombine_s16(vrshrn_n_s32(a[1], 5), vrshrn_n_s32(a[5], 5));
+    c[3] = vcombine_s16(vrshrn_n_s32(a[9], 5), vrshrn_n_s32(a[13], 5));
+    c[4] = vcombine_s16(vrshrn_n_s32(a[2], 5), vrshrn_n_s32(a[6], 5));
+    c[5] = vcombine_s16(vrshrn_n_s32(a[10], 5), vrshrn_n_s32(a[14], 5));
+    c[6] = vcombine_s16(vrshrn_n_s32(a[3], 5), vrshrn_n_s32(a[7], 5));
+    c[7] = vcombine_s16(vrshrn_n_s32(a[11], 5), vrshrn_n_s32(a[15], 5));
+  }
+  highbd_add8x8(c, dest, stride, bd);
+}
diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht16x16_add_neon.c b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht16x16_add_neon.c
new file mode 100644
index 0000000000..db72ff1161
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht16x16_add_neon.c
@@ -0,0 +1,279 @@
+/*
+ *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/arm/transpose_neon.h"
+
+void vpx_iadst16x16_256_add_half1d(const void *const input, int16_t *output,
+                                   void *const dest, const int stride,
+                                   const int highbd_flag) {
+  int16x8_t in[16], out[16];
+  const int16x4_t c_1_31_5_27 =
+      create_s16x4_neon(cospi_1_64, cospi_31_64, cospi_5_64, cospi_27_64);
+  const int16x4_t c_9_23_13_19 =
+      create_s16x4_neon(cospi_9_64, cospi_23_64, cospi_13_64, cospi_19_64);
+  const int16x4_t c_17_15_21_11 =
+      create_s16x4_neon(cospi_17_64, cospi_15_64, cospi_21_64, cospi_11_64);
+  const int16x4_t c_25_7_29_3 =
+      create_s16x4_neon(cospi_25_64, cospi_7_64, cospi_29_64, cospi_3_64);
+  const int16x4_t c_4_28_20_12 =
+      create_s16x4_neon(cospi_4_64, cospi_28_64, cospi_20_64, cospi_12_64);
+  const int16x4_t c_16_n16_8_24 =
+      create_s16x4_neon(cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64);
+  int16x8_t x[16], t[12];
+  int32x4_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
+  int32x4_t s8[2], s9[2], s10[2], s11[2], s12[2], s13[2], s14[2], s15[2];
+
+  // Load input (16x8)
+  if (output) {
+    const tran_low_t *inputT = (const tran_low_t *)input;
+    in[0] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[8] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[1] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[9] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[2] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[10] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[3] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[11] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[4] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[12] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[5] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[13] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[6] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[14] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[7] = load_tran_low_to_s16q(inputT);
+    inputT += 8;
+    in[15] = load_tran_low_to_s16q(inputT);
+  } else {
+    const int16_t *inputT = (const int16_t *)input;
+    in[0] = vld1q_s16(inputT);
+    inputT += 8;
+    in[8] = vld1q_s16(inputT);
+    inputT += 8;
+    in[1] = vld1q_s16(inputT);
+    inputT += 8;
+    in[9] = vld1q_s16(inputT);
+    inputT += 8;
+    in[2] = vld1q_s16(inputT);
+    inputT += 8;
+    in[10] = vld1q_s16(inputT);
+    inputT += 8;
+    in[3] = vld1q_s16(inputT);
+    inputT += 8;
+    in[11] = vld1q_s16(inputT);
+    inputT += 8;
+    in[4] = vld1q_s16(inputT);
+    inputT += 8;
+    in[12] = vld1q_s16(inputT);
+    inputT += 8;
+    in[5] = vld1q_s16(inputT);
+    inputT += 8;
+    in[13] = vld1q_s16(inputT);
+    inputT += 8;
+    in[6] = vld1q_s16(inputT);
+    inputT += 8;
+    in[14] = vld1q_s16(inputT);
+    inputT += 8;
+    in[7] = vld1q_s16(inputT);
+    inputT += 8;
+    in[15] = vld1q_s16(inputT);
+  }
+
+  // Transpose
+  transpose_s16_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6],
+                    &in[7]);
+  transpose_s16_8x8(&in[8], &in[9], &in[10], &in[11], &in[12], &in[13], &in[14],
+                    &in[15]);
+
+  x[0] = in[15];
+  x[1] = in[0];
+  x[2] = in[13];
+  x[3] = in[2];
+  x[4] = in[11];
+  x[5] = in[4];
+  x[6] = in[9];
+  x[7] = in[6];
+  x[8] = in[7];
+  x[9] = in[8];
+  x[10] = in[5];
+  x[11] = in[10];
+  x[12] = in[3];
+  x[13] = in[12];
+  x[14] = in[1];
+  x[15] = in[14];
+
+  // stage 1
+  iadst_butterfly_lane_0_1_neon(x[0], x[1], c_1_31_5_27, s0, s1);
+  iadst_butterfly_lane_2_3_neon(x[2], x[3], c_1_31_5_27, s2, s3);
+  iadst_butterfly_lane_0_1_neon(x[4], x[5], c_9_23_13_19, s4, s5);
+  iadst_butterfly_lane_2_3_neon(x[6], x[7], c_9_23_13_19, s6, s7);
+  iadst_butterfly_lane_0_1_neon(x[8], x[9], c_17_15_21_11, s8, s9);
+  iadst_butterfly_lane_2_3_neon(x[10], x[11], c_17_15_21_11, s10, s11);
+  iadst_butterfly_lane_0_1_neon(x[12], x[13], c_25_7_29_3, s12, s13);
+  iadst_butterfly_lane_2_3_neon(x[14], x[15], c_25_7_29_3, s14, s15);
+
+  x[0] = add_dct_const_round_shift_low_8(s0, s8);
+  x[1] = add_dct_const_round_shift_low_8(s1, s9);
+  x[2] = add_dct_const_round_shift_low_8(s2, s10);
+  x[3] = add_dct_const_round_shift_low_8(s3, s11);
+  x[4] = add_dct_const_round_shift_low_8(s4, s12);
+  x[5] = add_dct_const_round_shift_low_8(s5, s13);
+  x[6] = add_dct_const_round_shift_low_8(s6, s14);
+  x[7] = add_dct_const_round_shift_low_8(s7, s15);
+  x[8] = sub_dct_const_round_shift_low_8(s0, s8);
+  x[9] = sub_dct_const_round_shift_low_8(s1, s9);
+  x[10] = sub_dct_const_round_shift_low_8(s2, s10);
+  x[11] = sub_dct_const_round_shift_low_8(s3, s11);
+  x[12] = sub_dct_const_round_shift_low_8(s4, s12);
+  x[13] = sub_dct_const_round_shift_low_8(s5, s13);
+  x[14] = sub_dct_const_round_shift_low_8(s6, s14);
+  x[15] = sub_dct_const_round_shift_low_8(s7, s15);
+
+  // stage 2
+  t[0] = x[0];
+  t[1] = x[1];
+  t[2] = x[2];
+  t[3] = x[3];
+  t[4] = x[4];
+  t[5] = x[5];
+  t[6] = x[6];
+  t[7] = x[7];
+  iadst_butterfly_lane_0_1_neon(x[8], x[9], c_4_28_20_12, s8, s9);
+  iadst_butterfly_lane_2_3_neon(x[10], x[11], c_4_28_20_12, s10, s11);
+  iadst_butterfly_lane_1_0_neon(x[13], x[12], c_4_28_20_12, s13, s12);
+  iadst_butterfly_lane_3_2_neon(x[15], x[14], c_4_28_20_12, s15, s14);
+
+  x[0] = vaddq_s16(t[0], t[4]);
+  x[1] = vaddq_s16(t[1], t[5]);
+  x[2] = vaddq_s16(t[2], t[6]);
+  x[3] = vaddq_s16(t[3], t[7]);
+  x[4] = vsubq_s16(t[0], t[4]);
+  x[5] = vsubq_s16(t[1], t[5]);
+  x[6] = vsubq_s16(t[2], t[6]);
+  x[7] = vsubq_s16(t[3], t[7]);
+  x[8] = add_dct_const_round_shift_low_8(s8, s12);
+  x[9] = add_dct_const_round_shift_low_8(s9, s13);
+  x[10] = add_dct_const_round_shift_low_8(s10, s14);
+  x[11] = add_dct_const_round_shift_low_8(s11, s15);
+  x[12] = sub_dct_const_round_shift_low_8(s8, s12);
+  x[13] = sub_dct_const_round_shift_low_8(s9, s13);
+  x[14] = sub_dct_const_round_shift_low_8(s10, s14);
+  x[15] = sub_dct_const_round_shift_low_8(s11, s15);
+
+  // stage 3
+  t[0] = x[0];
+  t[1] = x[1];
+  t[2] = x[2];
+  t[3] = x[3];
+  iadst_butterfly_lane_2_3_neon(x[4], x[5], c_16_n16_8_24, s4, s5);
+  iadst_butterfly_lane_3_2_neon(x[7], x[6], c_16_n16_8_24, s7, s6);
+  t[8] = x[8];
+  t[9] = x[9];
+  t[10] = x[10];
+  t[11] = x[11];
+  iadst_butterfly_lane_2_3_neon(x[12], x[13], c_16_n16_8_24, s12, s13);
+  iadst_butterfly_lane_3_2_neon(x[15], x[14], c_16_n16_8_24, s15, s14);
+
+  x[0] = vaddq_s16(t[0], t[2]);
+  x[1] = vaddq_s16(t[1], t[3]);
+  x[2] = vsubq_s16(t[0], t[2]);
+  x[3] = vsubq_s16(t[1], t[3]);
+  x[4] = add_dct_const_round_shift_low_8(s4, s6);
+  x[5] = add_dct_const_round_shift_low_8(s5, s7);
+  x[6] = sub_dct_const_round_shift_low_8(s4, s6);
+  x[7] = sub_dct_const_round_shift_low_8(s5, s7);
+  x[8] = vaddq_s16(t[8], t[10]);
+  x[9] = vaddq_s16(t[9], t[11]);
+  x[10] = vsubq_s16(t[8], t[10]);
+  x[11] = vsubq_s16(t[9], t[11]);
+  x[12] = add_dct_const_round_shift_low_8(s12, s14);
+  x[13] = add_dct_const_round_shift_low_8(s13, s15);
+  x[14] = sub_dct_const_round_shift_low_8(s12, s14);
+  x[15] = sub_dct_const_round_shift_low_8(s13, s15);
+
+  // stage 4
+  iadst_half_butterfly_neg_neon(&x[3], &x[2], c_16_n16_8_24);
+  iadst_half_butterfly_pos_neon(&x[7], &x[6], c_16_n16_8_24);
+  iadst_half_butterfly_pos_neon(&x[11], &x[10], c_16_n16_8_24);
+  iadst_half_butterfly_neg_neon(&x[15], &x[14], c_16_n16_8_24);
+
+  out[0] = x[0];
+  out[1] = vnegq_s16(x[8]);
+  out[2] = x[12];
+  out[3] = vnegq_s16(x[4]);
+  out[4] = x[6];
+  out[5] = x[14];
+  out[6] = x[10];
+  out[7] = x[2];
+  out[8] = x[3];
+  out[9] = x[11];
+  out[10] = x[15];
+  out[11] = x[7];
+  out[12] = x[5];
+  out[13] = vnegq_s16(x[13]);
+  out[14] = x[9];
+  out[15] = vnegq_s16(x[1]);
+
+  if (output) {
+    idct16x16_store_pass1(out, output);
+  } else {
+    if (highbd_flag) {
+      idct16x16_add_store_bd8(out, dest, stride);
+    } else {
+      idct16x16_add_store(out, dest, stride);
+    }
+  }
+}
+
+void vp9_iht16x16_256_add_neon(const tran_low_t *input, uint8_t *dest,
+                               int stride, int tx_type) {
+  static const iht_2d IHT_16[] = {
+    { vpx_idct16x16_256_add_half1d,
+      vpx_idct16x16_256_add_half1d },  // DCT_DCT  = 0
+    { vpx_iadst16x16_256_add_half1d,
+      vpx_idct16x16_256_add_half1d },  // ADST_DCT = 1
+    { vpx_idct16x16_256_add_half1d,
+      vpx_iadst16x16_256_add_half1d },  // DCT_ADST = 2
+    { vpx_iadst16x16_256_add_half1d,
+      vpx_iadst16x16_256_add_half1d }  // ADST_ADST = 3
+  };
+  const iht_2d ht = IHT_16[tx_type];
+  int16_t row_output[16 * 16];
+
+  // pass 1
+  ht.rows(input, row_output, dest, stride, 0);               // upper 8 rows
+  ht.rows(input + 8 * 16, row_output + 8, dest, stride, 0);  // lower 8 rows
+
+  // pass 2
+  ht.cols(row_output, NULL, dest, stride, 0);               // left 8 columns
+  ht.cols(row_output + 16 * 8, NULL, dest + 8, stride, 0);  // right 8 columns
+}
diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c
new file mode 100644
index 0000000000..4f0a90f215
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c
@@ -0,0 +1,76 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/txfm_common.h"
+
+void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
+                            int tx_type) {
+  int16x8_t a[2];
+  uint8x8_t s[2], d[2];
+  uint16x8_t sum[2];
+
+  assert(!((intptr_t)dest % sizeof(uint32_t)));
+  assert(!(stride % sizeof(uint32_t)));
+
+  a[0] = load_tran_low_to_s16q(input);
+  a[1] = load_tran_low_to_s16q(input + 8);
+  transpose_s16_4x4q(&a[0], &a[1]);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      idct4x4_16_kernel_bd8(a);
+      a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+      transpose_s16_4x4q(&a[0], &a[1]);
+      idct4x4_16_kernel_bd8(a);
+      a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+      break;
+
+    case ADST_DCT:
+      idct4x4_16_kernel_bd8(a);
+      a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+      transpose_s16_4x4q(&a[0], &a[1]);
+      iadst4(a);
+      break;
+
+    case DCT_ADST:
+      iadst4(a);
+      transpose_s16_4x4q(&a[0], &a[1]);
+      idct4x4_16_kernel_bd8(a);
+      a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+      break;
+
+    default:
+      assert(tx_type == ADST_ADST);
+      iadst4(a);
+      transpose_s16_4x4q(&a[0], &a[1]);
+      iadst4(a);
+      break;
+  }
+
+  a[0] = vrshrq_n_s16(a[0], 4);
+  a[1] = vrshrq_n_s16(a[1], 4);
+  s[0] = load_u8(dest, stride);
+  s[1] = load_u8(dest + 2 * stride, stride);
+  sum[0] = vaddw_u8(vreinterpretq_u16_s16(a[0]), s[0]);
+  sum[1] = vaddw_u8(vreinterpretq_u16_s16(a[1]), s[1]);
+  d[0] = vqmovun_s16(vreinterpretq_s16_u16(sum[0]));
+  d[1] = vqmovun_s16(vreinterpretq_s16_u16(sum[1]));
+  store_u8(dest, stride, d[0]);
+  store_u8(dest + 2 * stride, stride, d[1]);
+}
diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c
new file mode 100644
index 0000000000..46ee632e01
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c
@@ -0,0 +1,68 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/arm/transpose_neon.h"
+
+void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
+                            int tx_type) {
+  const int16x8_t cospis = vld1q_s16(kCospi);
+  const int16x4_t cospis0 = vget_low_s16(cospis);   // cospi 0, 8, 16, 24
+  const int16x4_t cospis1 = vget_high_s16(cospis);  // cospi 4, 12, 20, 28
+  int16x8_t a[8];
+
+  a[0] = load_tran_low_to_s16q(input + 0 * 8);
+  a[1] = load_tran_low_to_s16q(input + 1 * 8);
+  a[2] = load_tran_low_to_s16q(input + 2 * 8);
+  a[3] = load_tran_low_to_s16q(input + 3 * 8);
+  a[4] = load_tran_low_to_s16q(input + 4 * 8);
+  a[5] = load_tran_low_to_s16q(input + 5 * 8);
+  a[6] = load_tran_low_to_s16q(input + 6 * 8);
+  a[7] = load_tran_low_to_s16q(input + 7 * 8);
+
+  transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
+      transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+      idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
+      break;
+
+    case ADST_DCT:
+      idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
+      transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+      iadst8(a);
+      break;
+
+    case DCT_ADST:
+      iadst8(a);
+      transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+      idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
+      break;
+
+    default:
+      assert(tx_type == ADST_ADST);
+      iadst8(a);
+      transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+      iadst8(a);
+      break;
+  }
+
+  idct8x8_add8x8_neon(a, dest, stride);
+}
diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht_neon.h b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht_neon.h
new file mode 100644
index 0000000000..c64822e27c
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht_neon.h
@@ -0,0 +1,272 @@
+/*
+ *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
+#define VPX_VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
+
+#include <arm_neon.h>
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/txfm_common.h"
+
+static INLINE void iadst4(int16x8_t *const io) {
+  const int32x4_t c3 = vdupq_n_s32(sinpi_3_9);
+  int16x4_t x[4];
+  int32x4_t s[8], output[4];
+  const int16x4_t c =
+      create_s16x4_neon(sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9);
+
+  x[0] = vget_low_s16(io[0]);
+  x[1] = vget_low_s16(io[1]);
+  x[2] = vget_high_s16(io[0]);
+  x[3] = vget_high_s16(io[1]);
+
+  s[0] = vmull_lane_s16(x[0], c, 0);
+  s[1] = vmull_lane_s16(x[0], c, 1);
+  s[2] = vmull_lane_s16(x[1], c, 2);
+  s[3] = vmull_lane_s16(x[2], c, 3);
+  s[4] = vmull_lane_s16(x[2], c, 0);
+  s[5] = vmull_lane_s16(x[3], c, 1);
+  s[6] = vmull_lane_s16(x[3], c, 3);
+  s[7] = vaddl_s16(x[0], x[3]);
+  s[7] = vsubw_s16(s[7], x[2]);
+
+  s[0] = vaddq_s32(s[0], s[3]);
+  s[0] = vaddq_s32(s[0], s[5]);
+  s[1] = vsubq_s32(s[1], s[4]);
+  s[1] = vsubq_s32(s[1], s[6]);
+  s[3] = s[2];
+  s[2] = vmulq_s32(c3, s[7]);
+
+  output[0] = vaddq_s32(s[0], s[3]);
+  output[1] = vaddq_s32(s[1], s[3]);
+  output[2] = s[2];
+  output[3] = vaddq_s32(s[0], s[1]);
+  output[3] = vsubq_s32(output[3], s[3]);
+  dct_const_round_shift_low_8_dual(output, &io[0], &io[1]);
+}
+
+static INLINE void iadst_half_butterfly_neon(int16x8_t *const x,
+                                             const int16x4_t c) {
+  // Don't add/sub before multiply, which will overflow in iadst8.
+  const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(x[0]), c, 0);
+  const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(x[0]), c, 0);
+  const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(x[1]), c, 0);
+  const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(x[1]), c, 0);
+  int32x4_t t0[2], t1[2];
+
+  t0[0] = vaddq_s32(x0_lo, x1_lo);
+  t0[1] = vaddq_s32(x0_hi, x1_hi);
+  t1[0] = vsubq_s32(x0_lo, x1_lo);
+  t1[1] = vsubq_s32(x0_hi, x1_hi);
+  x[0] = dct_const_round_shift_low_8(t0);
+  x[1] = dct_const_round_shift_low_8(t1);
+}
+
+static INLINE void iadst_half_butterfly_neg_neon(int16x8_t *const x0,
+                                                 int16x8_t *const x1,
+                                                 const int16x4_t c) {
+  // Don't add/sub before multiply, which will overflow in iadst8.
+  const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(*x0), c, 1);
+  const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(*x0), c, 1);
+  const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(*x1), c, 1);
+  const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(*x1), c, 1);
+  int32x4_t t0[2], t1[2];
+
+  t0[0] = vaddq_s32(x0_lo, x1_lo);
+  t0[1] = vaddq_s32(x0_hi, x1_hi);
+  t1[0] = vsubq_s32(x0_lo, x1_lo);
+  t1[1] = vsubq_s32(x0_hi, x1_hi);
+  *x1 = dct_const_round_shift_low_8(t0);
+  *x0 = dct_const_round_shift_low_8(t1);
+}
+
+static INLINE void iadst_half_butterfly_pos_neon(int16x8_t *const x0,
+                                                 int16x8_t *const x1,
+                                                 const int16x4_t c) {
+  // Don't add/sub before multiply, which will overflow in iadst8.
+  const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(*x0), c, 0);
+  const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(*x0), c, 0);
+  const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(*x1), c, 0);
+  const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(*x1), c, 0);
+  int32x4_t t0[2], t1[2];
+
+  t0[0] = vaddq_s32(x0_lo, x1_lo);
+  t0[1] = vaddq_s32(x0_hi, x1_hi);
+  t1[0] = vsubq_s32(x0_lo, x1_lo);
+  t1[1] = vsubq_s32(x0_hi, x1_hi);
+  *x1 = dct_const_round_shift_low_8(t0);
+  *x0 = dct_const_round_shift_low_8(t1);
+}
+
+static INLINE void iadst_butterfly_lane_0_1_neon(const int16x8_t in0,
+                                                 const int16x8_t in1,
+                                                 const int16x4_t c,
+                                                 int32x4_t *const s0,
+                                                 int32x4_t *const s1) {
+  s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 0);
+  s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 0);
+  s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 1);
+  s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 1);
+
+  s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 1);
+  s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 1);
+  s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 0);
+  s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 0);
+}
+
+static INLINE void iadst_butterfly_lane_2_3_neon(const int16x8_t in0,
+                                                 const int16x8_t in1,
+                                                 const int16x4_t c,
+                                                 int32x4_t *const s0,
+                                                 int32x4_t *const s1) {
+  s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
+  s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
+  s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
+  s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
+
+  s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 3);
+  s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 3);
+  s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 2);
+  s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 2);
+}
+
+static INLINE void iadst_butterfly_lane_1_0_neon(const int16x8_t in0,
+                                                 const int16x8_t in1,
+                                                 const int16x4_t c,
+                                                 int32x4_t *const s0,
+                                                 int32x4_t *const s1) {
+  s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 1);
+  s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 1);
+  s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 0);
+  s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 0);
+
+  s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 0);
+  s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 0);
+  s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 1);
+  s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 1);
+}
+
+static INLINE void iadst_butterfly_lane_3_2_neon(const int16x8_t in0,
+                                                 const int16x8_t in1,
+                                                 const int16x4_t c,
+                                                 int32x4_t *const s0,
+                                                 int32x4_t *const s1) {
+  s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
+  s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
+  s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
+  s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
+
+  s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 2);
+  s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 2);
+  s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 3);
+  s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 3);
+}
+
+static INLINE int16x8_t add_dct_const_round_shift_low_8(
+    const int32x4_t *const in0, const int32x4_t *const in1) {
+  int32x4_t sum[2];
+
+  sum[0] = vaddq_s32(in0[0], in1[0]);
+  sum[1] = vaddq_s32(in0[1], in1[1]);
+  return dct_const_round_shift_low_8(sum);
+}
+
+static INLINE int16x8_t sub_dct_const_round_shift_low_8(
+    const int32x4_t *const in0, const int32x4_t *const in1) {
+  int32x4_t sum[2];
+
+  sum[0] = vsubq_s32(in0[0], in1[0]);
+  sum[1] = vsubq_s32(in0[1], in1[1]);
+  return dct_const_round_shift_low_8(sum);
+}
+
+static INLINE void iadst8(int16x8_t *const io) {
+  const int16x4_t c0 =
+      create_s16x4_neon(cospi_2_64, cospi_30_64, cospi_10_64, cospi_22_64);
+  const int16x4_t c1 =
+      create_s16x4_neon(cospi_18_64, cospi_14_64, cospi_26_64, cospi_6_64);
+  const int16x4_t c2 =
+      create_s16x4_neon(cospi_16_64, 0, cospi_8_64, cospi_24_64);
+  int16x8_t x[8], t[4];
+  int32x4_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
+
+  x[0] = io[7];
+  x[1] = io[0];
+  x[2] = io[5];
+  x[3] = io[2];
+  x[4] = io[3];
+  x[5] = io[4];
+  x[6] = io[1];
+  x[7] = io[6];
+
+  // stage 1
+  iadst_butterfly_lane_0_1_neon(x[0], x[1], c0, s0, s1);
+  iadst_butterfly_lane_2_3_neon(x[2], x[3], c0, s2, s3);
+  iadst_butterfly_lane_0_1_neon(x[4], x[5], c1, s4, s5);
+  iadst_butterfly_lane_2_3_neon(x[6], x[7], c1, s6, s7);
+
+  x[0] = add_dct_const_round_shift_low_8(s0, s4);
+  x[1] = add_dct_const_round_shift_low_8(s1, s5);
+  x[2] = add_dct_const_round_shift_low_8(s2, s6);
+  x[3] = add_dct_const_round_shift_low_8(s3, s7);
+  x[4] = sub_dct_const_round_shift_low_8(s0, s4);
+  x[5] = sub_dct_const_round_shift_low_8(s1, s5);
+  x[6] = sub_dct_const_round_shift_low_8(s2, s6);
+  x[7] = sub_dct_const_round_shift_low_8(s3, s7);
+
+  // stage 2
+  t[0] = x[0];
+  t[1] = x[1];
+  t[2] = x[2];
+  t[3] = x[3];
+  iadst_butterfly_lane_2_3_neon(x[4], x[5], c2, s4, s5);
+  iadst_butterfly_lane_3_2_neon(x[7], x[6], c2, s7, s6);
+
+  x[0] = vaddq_s16(t[0], t[2]);
+  x[1] = vaddq_s16(t[1], t[3]);
+  x[2] = vsubq_s16(t[0], t[2]);
+  x[3] = vsubq_s16(t[1], t[3]);
+  x[4] = add_dct_const_round_shift_low_8(s4, s6);
+  x[5] = add_dct_const_round_shift_low_8(s5, s7);
+  x[6] = sub_dct_const_round_shift_low_8(s4, s6);
+  x[7] = sub_dct_const_round_shift_low_8(s5, s7);
+
+  // stage 3
+  iadst_half_butterfly_neon(x + 2, c2);
+  iadst_half_butterfly_neon(x + 6, c2);
+
+  io[0] = x[0];
+  io[1] = vnegq_s16(x[4]);
+  io[2] = x[6];
+  io[3] = vnegq_s16(x[2]);
+  io[4] = x[3];
+  io[5] = vnegq_s16(x[7]);
+  io[6] = x[5];
+  io[7] = vnegq_s16(x[1]);
+}
+
+void vpx_iadst16x16_256_add_half1d(const void *const input, int16_t *output,
+                                   void *const dest, const int stride,
+                                   const int highbd_flag);
+
+typedef void (*iht_1d)(const void *const input, int16_t *output,
+                       void *const dest, const int stride,
+                       const int highbd_flag);
+
+typedef struct {
+  iht_1d cols, rows;  // vertical and horizontal
+} iht_2d;
+
+#endif  // VPX_VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
diff --git a/media/libvpx/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c b/media/libvpx/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
new file mode 100644
index 0000000000..e68d01e9fd
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
@@ -0,0 +1,98 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/mips/inv_txfm_dspr2.h"
+#include "vpx_dsp/txfm_common.h"
+#include "vpx_ports/mem.h"
+
+#if HAVE_DSPR2
+void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, int pitch,
+                                int tx_type) {
+  int i, j;
+  DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
+  int16_t *outptr = out;
+  int16_t temp_out[16];
+  uint32_t pos = 45;
+
+  /* bit positon for extract from acc */
+  __asm__ __volatile__("wrdsp    %[pos],    1    \n\t" : : [pos] "r"(pos));
+
+  switch (tx_type) {
+    case DCT_DCT:  // DCT in both horizontal and vertical
+      idct16_rows_dspr2(input, outptr, 16);
+      idct16_cols_add_blk_dspr2(out, dest, pitch);
+      break;
+    case ADST_DCT:  // ADST in vertical, DCT in horizontal
+      idct16_rows_dspr2(input, outptr, 16);
+
+      outptr = out;
+
+      for (i = 0; i < 16; ++i) {
+        iadst16_dspr2(outptr, temp_out);
+
+        for (j = 0; j < 16; ++j)
+          dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) +
+                                           dest[j * pitch + i]);
+        outptr += 16;
+      }
+      break;
+    case DCT_ADST:  // DCT in vertical, ADST in horizontal
+    {
+      int16_t temp_in[16 * 16];
+
+      for (i = 0; i < 16; ++i) {
+        /* prefetch row */
+        prefetch_load((const uint8_t *)(input + 16));
+
+        iadst16_dspr2(input, outptr);
+        input += 16;
+        outptr += 16;
+      }
+
+      for (i = 0; i < 16; ++i)
+        for (j = 0; j < 16; ++j) temp_in[j * 16 + i] = out[i * 16 + j];
+
+      idct16_cols_add_blk_dspr2(temp_in, dest, pitch);
+      break;
+    }
+    case ADST_ADST:  // ADST in both directions
+    {
+      int16_t temp_in[16];
+
+      for (i = 0; i < 16; ++i) {
+        /* prefetch row */
+        prefetch_load((const uint8_t *)(input + 16));
+
+        iadst16_dspr2(input, outptr);
+        input += 16;
+        outptr += 16;
+      }
+
+      for (i = 0; i < 16; ++i) {
+        for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+        iadst16_dspr2(temp_in, temp_out);
+        for (j = 0; j < 16; ++j)
+          dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) +
+                                           dest[j * pitch + i]);
+      }
+      break;
+    }
+    default: printf("vp9_short_iht16x16_add_dspr2 : Invalid tx_type\n"); break;
+  }
+}
+#endif  // #if HAVE_DSPR2
diff --git a/media/libvpx/libvpx/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c b/media/libvpx/libvpx/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
new file mode 100644
index 0000000000..f6b29265e6
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
@@ -0,0 +1,90 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/mips/inv_txfm_dspr2.h"
+#include "vpx_dsp/txfm_common.h"
+#include "vpx_ports/mem.h"
+
+#if HAVE_DSPR2
+void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, int stride,
+                             int tx_type) {
+  int i, j;
+  DECLARE_ALIGNED(32, int16_t, out[4 * 4]);
+  int16_t *outptr = out;
+  int16_t temp_in[4 * 4], temp_out[4];
+  uint32_t pos = 45;
+
+  /* bit positon for extract from acc */
+  __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
+                       :
+                       : [pos] "r"(pos));
+
+  switch (tx_type) {
+    case DCT_DCT:  // DCT in both horizontal and vertical
+      vpx_idct4_rows_dspr2(input, outptr);
+      vpx_idct4_columns_add_blk_dspr2(&out[0], dest, stride);
+      break;
+    case ADST_DCT:  // ADST in vertical, DCT in horizontal
+      vpx_idct4_rows_dspr2(input, outptr);
+
+      outptr = out;
+
+      for (i = 0; i < 4; ++i) {
+        iadst4_dspr2(outptr, temp_out);
+
+        for (j = 0; j < 4; ++j)
+          dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) +
+                                            dest[j * stride + i]);
+
+        outptr += 4;
+      }
+      break;
+    case DCT_ADST:  // DCT in vertical, ADST in horizontal
+      for (i = 0; i < 4; ++i) {
+        iadst4_dspr2(input, outptr);
+        input += 4;
+        outptr += 4;
+      }
+
+      for (i = 0; i < 4; ++i) {
+        for (j = 0; j < 4; ++j) {
+          temp_in[i * 4 + j] = out[j * 4 + i];
+        }
+      }
+      vpx_idct4_columns_add_blk_dspr2(&temp_in[0], dest, stride);
+      break;
+    case ADST_ADST:  // ADST in both directions
+      for (i = 0; i < 4; ++i) {
+        iadst4_dspr2(input, outptr);
+        input += 4;
+        outptr += 4;
+      }
+
+      for (i = 0; i < 4; ++i) {
+        for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
+        iadst4_dspr2(temp_in, temp_out);
+
+        for (j = 0; j < 4; ++j)
+          dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) +
+                                            dest[j * stride + i]);
+      }
+      break;
+    default: printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n"); break;
+  }
+}
+#endif  // #if HAVE_DSPR2
diff --git a/media/libvpx/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/media/libvpx/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
new file mode 100644
index 0000000000..b945e307e6
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
@@ -0,0 +1,84 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vpx_dsp/mips/inv_txfm_dspr2.h"
+#include "vpx_dsp/txfm_common.h"
+#include "vpx_ports/mem.h"
+
+#if HAVE_DSPR2
+void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, int stride,
+                             int tx_type) {
+  int i, j;
+  DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
+  int16_t *outptr = out;
+  int16_t temp_in[8 * 8], temp_out[8];
+  uint32_t pos = 45;
+
+  /* bit positon for extract from acc */
+  __asm__ __volatile__("wrdsp    %[pos],    1    \n\t" : : [pos] "r"(pos));
+
+  switch (tx_type) {
+    case DCT_DCT:  // DCT in both horizontal and vertical
+      idct8_rows_dspr2(input, outptr, 8);
+      idct8_columns_add_blk_dspr2(&out[0], dest, stride);
+      break;
+    case ADST_DCT:  // ADST in vertical, DCT in horizontal
+      idct8_rows_dspr2(input, outptr, 8);
+
+      for (i = 0; i < 8; ++i) {
+        iadst8_dspr2(&out[i * 8], temp_out);
+
+        for (j = 0; j < 8; ++j)
+          dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) +
+                                            dest[j * stride + i]);
+      }
+      break;
+    case DCT_ADST:  // DCT in vertical, ADST in horizontal
+      for (i = 0; i < 8; ++i) {
+        iadst8_dspr2(input, outptr);
+        input += 8;
+        outptr += 8;
+      }
+
+      for (i = 0; i < 8; ++i) {
+        for (j = 0; j < 8; ++j) {
+          temp_in[i * 8 + j] = out[j * 8 + i];
+        }
+      }
+      idct8_columns_add_blk_dspr2(&temp_in[0], dest, stride);
+      break;
+    case ADST_ADST:  // ADST in both directions
+      for (i = 0; i < 8; ++i) {
+        iadst8_dspr2(input, outptr);
+        input += 8;
+        outptr += 8;
+      }
+
+      for (i = 0; i < 8; ++i) {
+        for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+
+        iadst8_dspr2(temp_in, temp_out);
+
+        for (j = 0; j < 8; ++j)
+          dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) +
+                                            dest[j * stride + i]);
+      }
+      break;
+    default: printf("vp9_short_iht8x8_add_dspr2 : Invalid tx_type\n"); break;
+  }
+}
+#endif  // #if HAVE_DSPR2
diff --git a/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c
new file mode 100644
index 0000000000..c031322806
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c
@@ -0,0 +1,80 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_enums.h"
+#include "vpx_dsp/mips/inv_txfm_msa.h"
+
+void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst,
+                              int32_t dst_stride, int32_t tx_type) {
+  int32_t i;
+  DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
+  int16_t *out_ptr = &out[0];
+
+  switch (tx_type) {
+    case DCT_DCT:
+      /* transform rows */
+      for (i = 0; i < 2; ++i) {
+        /* process 16 * 8 block */
+        vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+      }
+
+      /* transform columns */
+      for (i = 0; i < 2; ++i) {
+        /* process 8 * 16 block */
+        vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+                                         dst_stride);
+      }
+      break;
+    case ADST_DCT:
+      /* transform rows */
+      for (i = 0; i < 2; ++i) {
+        /* process 16 * 8 block */
+        vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+      }
+
+      /* transform columns */
+      for (i = 0; i < 2; ++i) {
+        vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
+                                          (dst + (i << 3)), dst_stride);
+      }
+      break;
+    case DCT_ADST:
+      /* transform rows */
+      for (i = 0; i < 2; ++i) {
+        /* process 16 * 8 block */
+        vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+      }
+
+      /* transform columns */
+      for (i = 0; i < 2; ++i) {
+        /* process 8 * 16 block */
+        vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+                                         dst_stride);
+      }
+      break;
+    case ADST_ADST:
+      /* transform rows */
+      for (i = 0; i < 2; ++i) {
+        /* process 16 * 8 block */
+        vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+      }
+
+      /* transform columns */
+      for (i = 0; i < 2; ++i) {
+        vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
+                                          (dst + (i << 3)), dst_stride);
+      }
+      break;
+    default: assert(0); break;
+  }
+}
diff --git a/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c
new file mode 100644
index 0000000000..aaccd5ca7b
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c
@@ -0,0 +1,61 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_enums.h"
+#include "vpx_dsp/mips/inv_txfm_msa.h"
+
+void vp9_iht4x4_16_add_msa(const int16_t *input, uint8_t *dst,
+                           int32_t dst_stride, int32_t tx_type) {
+  v8i16 in0, in1, in2, in3;
+
+  /* load vector elements of 4x4 block */
+  LD4x4_SH(input, in0, in1, in2, in3);
+  TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      /* DCT in horizontal */
+      VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      /* DCT in vertical */
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    case ADST_DCT:
+      /* DCT in horizontal */
+      VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      /* ADST in vertical */
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    case DCT_ADST:
+      /* ADST in horizontal */
+      VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      /* DCT in vertical */
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    case ADST_ADST:
+      /* ADST in horizontal */
+      VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      /* ADST in vertical */
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    default: assert(0); break;
+  }
+
+  /* final rounding (add 2^3, divide by 2^4) and shift */
+  SRARI_H4_SH(in0, in1, in2, in3, 4);
+  /* add block and store 4x4 */
+  ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
+}
diff --git a/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c
new file mode 100644
index 0000000000..76d15ff8c0
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c
@@ -0,0 +1,79 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_enums.h"
+#include "vpx_dsp/mips/inv_txfm_msa.h"
+
+void vp9_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst,
+                           int32_t dst_stride, int32_t tx_type) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+  /* load vector elements of 8x8 block */
+  LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+                     in4, in5, in6, in7);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      /* DCT in horizontal */
+      VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+                     in4, in5, in6, in7);
+      /* DCT in vertical */
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+                         in3, in4, in5, in6, in7);
+      VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+                     in4, in5, in6, in7);
+      break;
+    case ADST_DCT:
+      /* DCT in horizontal */
+      VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+                     in4, in5, in6, in7);
+      /* ADST in vertical */
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+                         in3, in4, in5, in6, in7);
+      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+                in5, in6, in7);
+      break;
+    case DCT_ADST:
+      /* ADST in horizontal */
+      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+                in5, in6, in7);
+      /* DCT in vertical */
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+                         in3, in4, in5, in6, in7);
+      VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+                     in4, in5, in6, in7);
+      break;
+    case ADST_ADST:
+      /* ADST in horizontal */
+      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+                in5, in6, in7);
+      /* ADST in vertical */
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+                         in3, in4, in5, in6, in7);
+      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+                in5, in6, in7);
+      break;
+    default: assert(0); break;
+  }
+
+  /* final rounding (add 2^4, divide by 2^5) and shift */
+  SRARI_H4_SH(in0, in1, in2, in3, 5);
+  SRARI_H4_SH(in4, in5, in6, in7, 5);
+
+  /* add block and store 8x8 */
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
+  dst += (4 * dst_stride);
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
+}
diff --git a/media/libvpx/libvpx/vp9/common/mips/msa/vp9_mfqe_msa.c b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_mfqe_msa.c
new file mode 100644
index 0000000000..2c3840958e
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_mfqe_msa.c
@@ -0,0 +1,134 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vpx_dsp/mips/macros_msa.h"
+
+static void filter_by_weight8x8_msa(const uint8_t *src_ptr, int32_t src_stride,
+                                    uint8_t *dst_ptr, int32_t dst_stride,
+                                    int32_t src_weight) {
+  int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
+  int32_t row;
+  uint64_t src0_d, src1_d, dst0_d, dst1_d;
+  v16i8 src0 = { 0 };
+  v16i8 src1 = { 0 };
+  v16i8 dst0 = { 0 };
+  v16i8 dst1 = { 0 };
+  v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
+
+  src_wt = __msa_fill_h(src_weight);
+  dst_wt = __msa_fill_h(dst_weight);
+
+  for (row = 2; row--;) {
+    LD2(src_ptr, src_stride, src0_d, src1_d);
+    src_ptr += (2 * src_stride);
+    LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
+    INSERT_D2_SB(src0_d, src1_d, src0);
+    INSERT_D2_SB(dst0_d, dst1_d, dst0);
+
+    LD2(src_ptr, src_stride, src0_d, src1_d);
+    src_ptr += (2 * src_stride);
+    LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
+    INSERT_D2_SB(src0_d, src1_d, src1);
+    INSERT_D2_SB(dst0_d, dst1_d, dst1);
+
+    UNPCK_UB_SH(src0, src_r, src_l);
+    UNPCK_UB_SH(dst0, dst_r, dst_l);
+    res_h_r = (src_r * src_wt);
+    res_h_r += (dst_r * dst_wt);
+    res_h_l = (src_l * src_wt);
+    res_h_l += (dst_l * dst_wt);
+    SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+    dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
+    ST8x2_UB(dst0, dst_ptr, dst_stride);
+    dst_ptr += (2 * dst_stride);
+
+    UNPCK_UB_SH(src1, src_r, src_l);
+    UNPCK_UB_SH(dst1, dst_r, dst_l);
+    res_h_r = (src_r * src_wt);
+    res_h_r += (dst_r * dst_wt);
+    res_h_l = (src_l * src_wt);
+    res_h_l += (dst_l * dst_wt);
+    SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+    dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
+    ST8x2_UB(dst1, dst_ptr, dst_stride);
+    dst_ptr += (2 * dst_stride);
+  }
+}
+
+static void filter_by_weight16x16_msa(const uint8_t *src_ptr,
+                                      int32_t src_stride, uint8_t *dst_ptr,
+                                      int32_t dst_stride, int32_t src_weight) {
+  int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
+  int32_t row;
+  v16i8 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
+  v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
+
+  src_wt = __msa_fill_h(src_weight);
+  dst_wt = __msa_fill_h(dst_weight);
+
+  for (row = 4; row--;) {
+    LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
+    src_ptr += (4 * src_stride);
+    LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
+
+    UNPCK_UB_SH(src0, src_r, src_l);
+    UNPCK_UB_SH(dst0, dst_r, dst_l);
+    res_h_r = (src_r * src_wt);
+    res_h_r += (dst_r * dst_wt);
+    res_h_l = (src_l * src_wt);
+    res_h_l += (dst_l * dst_wt);
+    SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+    PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
+    dst_ptr += dst_stride;
+
+    UNPCK_UB_SH(src1, src_r, src_l);
+    UNPCK_UB_SH(dst1, dst_r, dst_l);
+    res_h_r = (src_r * src_wt);
+    res_h_r += (dst_r * dst_wt);
+    res_h_l = (src_l * src_wt);
+    res_h_l += (dst_l * dst_wt);
+    SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+    PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
+    dst_ptr += dst_stride;
+
+    UNPCK_UB_SH(src2, src_r, src_l);
+    UNPCK_UB_SH(dst2, dst_r, dst_l);
+    res_h_r = (src_r * src_wt);
+    res_h_r += (dst_r * dst_wt);
+    res_h_l = (src_l * src_wt);
+    res_h_l += (dst_l * dst_wt);
+    SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+    PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
+    dst_ptr += dst_stride;
+
+    UNPCK_UB_SH(src3, src_r, src_l);
+    UNPCK_UB_SH(dst3, dst_r, dst_l);
+    res_h_r = (src_r * src_wt);
+    res_h_r += (dst_r * dst_wt);
+    res_h_l = (src_l * src_wt);
+    res_h_l += (dst_l * dst_wt);
+    SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+    PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
+    dst_ptr += dst_stride;
+  }
+}
+
+void vp9_filter_by_weight8x8_msa(const uint8_t *src, int src_stride,
+                                 uint8_t *dst, int dst_stride, int src_weight) {
+  filter_by_weight8x8_msa(src, src_stride, dst, dst_stride, src_weight);
+}
+
+void vp9_filter_by_weight16x16_msa(const uint8_t *src, int src_stride,
+                                   uint8_t *dst, int dst_stride,
+                                   int src_weight) {
+  filter_by_weight16x16_msa(src, src_stride, dst, dst_stride, src_weight);
+}
diff --git a/media/libvpx/libvpx/vp9/common/ppc/vp9_idct_vsx.c b/media/libvpx/libvpx/vp9/common/ppc/vp9_idct_vsx.c
new file mode 100644
index 0000000000..e861596ad4
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/ppc/vp9_idct_vsx.c
@@ -0,0 +1,116 @@
+/*
+ *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/ppc/inv_txfm_vsx.h"
+#include "vpx_dsp/ppc/bitdepth_conversion_vsx.h"
+
+#include "vp9/common/vp9_enums.h"
+
+void vp9_iht4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, int stride,
+                           int tx_type) {
+  int16x8_t in[2], out[2];
+
+  in[0] = load_tran_low(0, input);
+  in[1] = load_tran_low(8 * sizeof(*input), input);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      vpx_idct4_vsx(in, out);
+      vpx_idct4_vsx(out, in);
+      break;
+    case ADST_DCT:
+      vpx_idct4_vsx(in, out);
+      vp9_iadst4_vsx(out, in);
+      break;
+    case DCT_ADST:
+      vp9_iadst4_vsx(in, out);
+      vpx_idct4_vsx(out, in);
+      break;
+    default:
+      assert(tx_type == ADST_ADST);
+      vp9_iadst4_vsx(in, out);
+      vp9_iadst4_vsx(out, in);
+      break;
+  }
+
+  vpx_round_store4x4_vsx(in, out, dest, stride);
+}
+
+void vp9_iht8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, int stride,
+                           int tx_type) {
+  int16x8_t in[8], out[8];
+
+  // load input data
+  in[0] = load_tran_low(0, input);
+  in[1] = load_tran_low(8 * sizeof(*input), input);
+  in[2] = load_tran_low(2 * 8 * sizeof(*input), input);
+  in[3] = load_tran_low(3 * 8 * sizeof(*input), input);
+  in[4] = load_tran_low(4 * 8 * sizeof(*input), input);
+  in[5] = load_tran_low(5 * 8 * sizeof(*input), input);
+  in[6] = load_tran_low(6 * 8 * sizeof(*input), input);
+  in[7] = load_tran_low(7 * 8 * sizeof(*input), input);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      vpx_idct8_vsx(in, out);
+      vpx_idct8_vsx(out, in);
+      break;
+    case ADST_DCT:
+      vpx_idct8_vsx(in, out);
+      vp9_iadst8_vsx(out, in);
+      break;
+    case DCT_ADST:
+      vp9_iadst8_vsx(in, out);
+      vpx_idct8_vsx(out, in);
+      break;
+    default:
+      assert(tx_type == ADST_ADST);
+      vp9_iadst8_vsx(in, out);
+      vp9_iadst8_vsx(out, in);
+      break;
+  }
+
+  vpx_round_store8x8_vsx(in, dest, stride);
+}
+
+void vp9_iht16x16_256_add_vsx(const tran_low_t *input, uint8_t *dest,
+                              int stride, int tx_type) {
+  int16x8_t in0[16], in1[16];
+
+  LOAD_INPUT16(load_tran_low, input, 0, 8 * sizeof(*input), in0);
+  LOAD_INPUT16(load_tran_low, input, 8 * 8 * 2 * sizeof(*input),
+               8 * sizeof(*input), in1);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      vpx_idct16_vsx(in0, in1);
+      vpx_idct16_vsx(in0, in1);
+      break;
+    case ADST_DCT:
+      vpx_idct16_vsx(in0, in1);
+      vpx_iadst16_vsx(in0, in1);
+      break;
+    case DCT_ADST:
+      vpx_iadst16_vsx(in0, in1);
+      vpx_idct16_vsx(in0, in1);
+      break;
+    default:
+      assert(tx_type == ADST_ADST);
+      vpx_iadst16_vsx(in0, in1);
+      vpx_iadst16_vsx(in0, in1);
+      break;
+  }
+
+  vpx_round_store16x16_vsx(in0, in1, dest, stride);
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_alloccommon.c b/media/libvpx/libvpx/vp9/common/vp9_alloccommon.c
new file mode 100644
index 0000000000..faad657a08
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_alloccommon.c
@@ -0,0 +1,194 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "vpx_mem/vpx_mem.h"
+
+#include "vp9/common/vp9_alloccommon.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_entropymv.h"
+#include "vp9/common/vp9_onyxc_int.h"
+
+void vp9_set_mi_size(int *mi_rows, int *mi_cols, int *mi_stride, int width,
+                     int height) {
+  const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
+  const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
+  *mi_cols = aligned_width >> MI_SIZE_LOG2;
+  *mi_rows = aligned_height >> MI_SIZE_LOG2;
+  *mi_stride = calc_mi_size(*mi_cols);
+}
+
+void vp9_set_mb_size(int *mb_rows, int *mb_cols, int *mb_num, int mi_rows,
+                     int mi_cols) {
+  *mb_cols = (mi_cols + 1) >> 1;
+  *mb_rows = (mi_rows + 1) >> 1;
+  *mb_num = (*mb_rows) * (*mb_cols);
+}
+
+void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) {
+  vp9_set_mi_size(&cm->mi_rows, &cm->mi_cols, &cm->mi_stride, width, height);
+  vp9_set_mb_size(&cm->mb_rows, &cm->mb_cols, &cm->MBs, cm->mi_rows,
+                  cm->mi_cols);
+}
+
+static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) {
+  int i;
+
+  for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
+    cm->seg_map_array[i] = (uint8_t *)vpx_calloc(seg_map_size, 1);
+    if (cm->seg_map_array[i] == NULL) return 1;
+  }
+  cm->seg_map_alloc_size = seg_map_size;
+
+  // Init the index.
+  cm->seg_map_idx = 0;
+  cm->prev_seg_map_idx = 1;
+
+  cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
+  cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
+
+  return 0;
+}
+
+static void free_seg_map(VP9_COMMON *cm) {
+  int i;
+
+  for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
+    vpx_free(cm->seg_map_array[i]);
+    cm->seg_map_array[i] = NULL;
+  }
+
+  cm->current_frame_seg_map = NULL;
+  cm->last_frame_seg_map = NULL;
+}
+
+void vp9_free_ref_frame_buffers(BufferPool *pool) {
+  int i;
+
+  if (!pool) return;
+
+  for (i = 0; i < FRAME_BUFFERS; ++i) {
+    if (!pool->frame_bufs[i].released &&
+        pool->frame_bufs[i].raw_frame_buffer.data != NULL) {
+      pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer);
+      pool->frame_bufs[i].ref_count = 0;
+      pool->frame_bufs[i].released = 1;
+    }
+    vpx_free(pool->frame_bufs[i].mvs);
+    pool->frame_bufs[i].mvs = NULL;
+    vpx_free_frame_buffer(&pool->frame_bufs[i].buf);
+  }
+}
+
+void vp9_free_postproc_buffers(VP9_COMMON *cm) {
+#if CONFIG_VP9_POSTPROC
+  vpx_free_frame_buffer(&cm->post_proc_buffer);
+  vpx_free_frame_buffer(&cm->post_proc_buffer_int);
+  vpx_free(cm->postproc_state.limits);
+  cm->postproc_state.limits = NULL;
+  vpx_free(cm->postproc_state.generated_noise);
+  cm->postproc_state.generated_noise = NULL;
+#else
+  (void)cm;
+#endif
+}
+
+void vp9_free_context_buffers(VP9_COMMON *cm) {
+  if (cm->free_mi) cm->free_mi(cm);
+  free_seg_map(cm);
+  vpx_free(cm->above_context);
+  cm->above_context = NULL;
+  vpx_free(cm->above_seg_context);
+  cm->above_seg_context = NULL;
+  vpx_free(cm->lf.lfm);
+  cm->lf.lfm = NULL;
+}
+
+int vp9_alloc_loop_filter(VP9_COMMON *cm) {
+  vpx_free(cm->lf.lfm);
+  // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region.  The
+  // stride and rows are rounded up / truncated to a multiple of 8.
+  cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3;
+  cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc(
+      ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride,
+      sizeof(*cm->lf.lfm));
+  if (!cm->lf.lfm) return 1;
+  return 0;
+}
+
+int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
+  int new_mi_size;
+
+  vp9_set_mb_mi(cm, width, height);
+  new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
+  if (cm->mi_alloc_size < new_mi_size) {
+    cm->free_mi(cm);
+    if (cm->alloc_mi(cm, new_mi_size)) goto fail;
+  }
+
+  if (cm->seg_map_alloc_size < cm->mi_rows * cm->mi_cols) {
+    // Create the segmentation map structure and set to 0.
+    free_seg_map(cm);
+    if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) goto fail;
+  }
+
+  if (cm->above_context_alloc_cols < cm->mi_cols) {
+    vpx_free(cm->above_context);
+    cm->above_context = (ENTROPY_CONTEXT *)vpx_calloc(
+        2 * mi_cols_aligned_to_sb(cm->mi_cols) * MAX_MB_PLANE,
+        sizeof(*cm->above_context));
+    if (!cm->above_context) goto fail;
+
+    vpx_free(cm->above_seg_context);
+    cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc(
+        mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context));
+    if (!cm->above_seg_context) goto fail;
+    cm->above_context_alloc_cols = cm->mi_cols;
+  }
+
+  if (vp9_alloc_loop_filter(cm)) goto fail;
+
+  return 0;
+
+fail:
+  // clear the mi_* values to force a realloc on resync
+  vp9_set_mb_mi(cm, 0, 0);
+  vp9_free_context_buffers(cm);
+  return 1;
+}
+
+void vp9_remove_common(VP9_COMMON *cm) {
+#if CONFIG_VP9_POSTPROC
+  vp9_free_postproc_buffers(cm);
+#endif
+  vp9_free_context_buffers(cm);
+
+  vpx_free(cm->fc);
+  cm->fc = NULL;
+  vpx_free(cm->frame_contexts);
+  cm->frame_contexts = NULL;
+}
+
+void vp9_init_context_buffers(VP9_COMMON *cm) {
+  cm->setup_mi(cm);
+  if (cm->last_frame_seg_map)
+    memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
+}
+
+void vp9_swap_current_and_last_seg_map(VP9_COMMON *cm) {
+  // Swap indices.
+  const int tmp = cm->seg_map_idx;
+  cm->seg_map_idx = cm->prev_seg_map_idx;
+  cm->prev_seg_map_idx = tmp;
+
+  cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
+  cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_alloccommon.h b/media/libvpx/libvpx/vp9/common/vp9_alloccommon.h
new file mode 100644
index 0000000000..90cbb093d7
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_alloccommon.h
@@ -0,0 +1,49 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_ALLOCCOMMON_H_
+#define VPX_VP9_COMMON_VP9_ALLOCCOMMON_H_
+
+#define INVALID_IDX (-1)  // Invalid buffer index.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP9Common;
+struct BufferPool;
+
+void vp9_remove_common(struct VP9Common *cm);
+
+int vp9_alloc_loop_filter(struct VP9Common *cm);
+int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height);
+void vp9_init_context_buffers(struct VP9Common *cm);
+void vp9_free_context_buffers(struct VP9Common *cm);
+
+void vp9_free_ref_frame_buffers(struct BufferPool *pool);
+void vp9_free_postproc_buffers(struct VP9Common *cm);
+
+int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height);
+void vp9_free_state_buffers(struct VP9Common *cm);
+
+void vp9_set_mi_size(int *mi_rows, int *mi_cols, int *mi_stride, int width,
+                     int height);
+void vp9_set_mb_size(int *mb_rows, int *mb_cols, int *mb_num, int mi_rows,
+                     int mi_cols);
+
+void vp9_set_mb_mi(struct VP9Common *cm, int width, int height);
+
+void vp9_swap_current_and_last_seg_map(struct VP9Common *cm);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_ALLOCCOMMON_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_blockd.c b/media/libvpx/libvpx/vp9/common/vp9_blockd.c
new file mode 100644
index 0000000000..4327599510
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_blockd.c
@@ -0,0 +1,131 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_blockd.h"
+
+PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi,
+                                    const MODE_INFO *left_mi, int b) {
+  if (b == 0 || b == 2) {
+    if (!left_mi || is_inter_block(left_mi)) return DC_PRED;
+
+    return get_y_mode(left_mi, b + 1);
+  } else {
+    assert(b == 1 || b == 3);
+    return cur_mi->bmi[b - 1].as_mode;
+  }
+}
+
+PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi,
+                                     const MODE_INFO *above_mi, int b) {
+  if (b == 0 || b == 1) {
+    if (!above_mi || is_inter_block(above_mi)) return DC_PRED;
+
+    return get_y_mode(above_mi, b + 2);
+  } else {
+    assert(b == 2 || b == 3);
+    return cur_mi->bmi[b - 2].as_mode;
+  }
+}
+
+void vp9_foreach_transformed_block_in_plane(
+    const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
+    foreach_transformed_block_visitor visit, void *arg) {
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  const MODE_INFO *mi = xd->mi[0];
+  // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
+  // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
+  // transform size varies per plane, look it up in a common way.
+  const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
+  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+  const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+  const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+  const int step = 1 << (tx_size << 1);
+  int i = 0, r, c;
+
+  // If mb_to_right_edge is < 0 we are in a situation in which
+  // the current block size extends into the UMV and we won't
+  // visit the sub blocks that are wholly within the UMV.
+  const int max_blocks_wide =
+      num_4x4_w + (xd->mb_to_right_edge >= 0
+                       ? 0
+                       : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+  const int max_blocks_high =
+      num_4x4_h + (xd->mb_to_bottom_edge >= 0
+                       ? 0
+                       : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+  const int extra_step = ((num_4x4_w - max_blocks_wide) >> tx_size) * step;
+
+  // Keep track of the row and column of the blocks we use so that we know
+  // if we are in the unrestricted motion border.
+  for (r = 0; r < max_blocks_high; r += (1 << tx_size)) {
+    // Skip visiting the sub blocks that are wholly within the UMV.
+    for (c = 0; c < max_blocks_wide; c += (1 << tx_size)) {
+      visit(plane, i, r, c, plane_bsize, tx_size, arg);
+      i += step;
+    }
+    i += extra_step;
+  }
+}
+
+void vp9_foreach_transformed_block(const MACROBLOCKD *const xd,
+                                   BLOCK_SIZE bsize,
+                                   foreach_transformed_block_visitor visit,
+                                   void *arg) {
+  int plane;
+
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+    vp9_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
+}
+
+void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
+                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
+                      int aoff, int loff) {
+  ENTROPY_CONTEXT *const a = pd->above_context + aoff;
+  ENTROPY_CONTEXT *const l = pd->left_context + loff;
+  const int tx_size_in_blocks = 1 << tx_size;
+
+  // above
+  if (has_eob && xd->mb_to_right_edge < 0) {
+    int i;
+    const int blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize] +
+                            (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+    int above_contexts = tx_size_in_blocks;
+    if (above_contexts + aoff > blocks_wide)
+      above_contexts = blocks_wide - aoff;
+
+    for (i = 0; i < above_contexts; ++i) a[i] = has_eob;
+    for (i = above_contexts; i < tx_size_in_blocks; ++i) a[i] = 0;
+  } else {
+    memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+  }
+
+  // left
+  if (has_eob && xd->mb_to_bottom_edge < 0) {
+    int i;
+    const int blocks_high = num_4x4_blocks_high_lookup[plane_bsize] +
+                            (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+    int left_contexts = tx_size_in_blocks;
+    if (left_contexts + loff > blocks_high) left_contexts = blocks_high - loff;
+
+    for (i = 0; i < left_contexts; ++i) l[i] = has_eob;
+    for (i = left_contexts; i < tx_size_in_blocks; ++i) l[i] = 0;
+  } else {
+    memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+  }
+}
+
+void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) {
+  int i;
+
+  for (i = 0; i < MAX_MB_PLANE; i++) {
+    xd->plane[i].subsampling_x = i ? ss_x : 0;
+    xd->plane[i].subsampling_y = i ? ss_y : 0;
+  }
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_blockd.h b/media/libvpx/libvpx/vp9/common/vp9_blockd.h
new file mode 100644
index 0000000000..d7de46cf4f
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_blockd.h
@@ -0,0 +1,322 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_BLOCKD_H_
+#define VPX_VP9_COMMON_VP9_BLOCKD_H_
+
+#include "./vpx_config.h"
+
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_ports/mem.h"
+#include "vpx_scale/yv12config.h"
+
+#include "vp9/common/vp9_common_data.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_mv.h"
+#include "vp9/common/vp9_scale.h"
+#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_tile_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_MB_PLANE 3
+
+typedef enum {
+  KEY_FRAME = 0,
+  INTER_FRAME = 1,
+  FRAME_TYPES,
+} FRAME_TYPE;
+
+static INLINE int is_inter_mode(PREDICTION_MODE mode) {
+  return mode >= NEARESTMV && mode <= NEWMV;
+}
+
+/* For keyframes, intra block modes are predicted by the (already decoded)
+   modes for the Y blocks to the left and above us; for interframes, there
+   is a single probability table. */
+
+typedef struct {
+  PREDICTION_MODE as_mode;
+  int_mv as_mv[2];  // first, second inter predictor motion vectors
+} b_mode_info;
+
+// Note that the rate-distortion optimization loop, bit-stream writer, and
+// decoder implementation modules critically rely on the defined entry values
+// specified herein. They should be refactored concurrently.
+
+#define NONE (-1)
+#define INTRA_FRAME 0
+#define LAST_FRAME 1
+#define GOLDEN_FRAME 2
+#define ALTREF_FRAME 3
+#define MAX_REF_FRAMES 4
+#define MAX_INTER_REF_FRAMES 3
+
+typedef int8_t MV_REFERENCE_FRAME;
+
+static INLINE int mv_ref_frame_to_inter_ref_idx(
+    MV_REFERENCE_FRAME mv_ref_frame) {
+  assert(mv_ref_frame >= LAST_FRAME && mv_ref_frame < MAX_REF_FRAMES);
+  return mv_ref_frame - 1;
+}
+
+// This structure now relates to 8x8 block regions.
+typedef struct MODE_INFO {
+  // Common for both INTER and INTRA blocks
+  BLOCK_SIZE sb_type;
+  PREDICTION_MODE mode;
+  TX_SIZE tx_size;
+  int8_t skip;
+  int8_t segment_id;
+  int8_t seg_id_predicted;  // valid only when temporal_update is enabled
+
+  // Only for INTRA blocks
+  PREDICTION_MODE uv_mode;
+
+  // Only for INTER blocks
+  INTERP_FILTER interp_filter;
+
+  // if ref_frame[idx] is equal to ALTREF_FRAME then
+  // MACROBLOCKD::block_ref[idx] is an altref
+  MV_REFERENCE_FRAME ref_frame[2];
+
+  // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead.
+  int_mv mv[2];
+
+  b_mode_info bmi[4];
+} MODE_INFO;
+
+static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) {
+  return mi->sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode : mi->mode;
+}
+
+static INLINE int is_inter_block(const MODE_INFO *mi) {
+  return mi->ref_frame[0] > INTRA_FRAME;
+}
+
+static INLINE int has_second_ref(const MODE_INFO *mi) {
+  return mi->ref_frame[1] > INTRA_FRAME;
+}
+
+PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi,
+                                    const MODE_INFO *left_mi, int b);
+
+PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi,
+                                     const MODE_INFO *above_mi, int b);
+
+enum mv_precision { MV_PRECISION_Q3, MV_PRECISION_Q4 };
+
+struct buf_2d {
+  uint8_t *buf;
+  int stride;
+};
+
+struct macroblockd_plane {
+  tran_low_t *dqcoeff;
+  int subsampling_x;
+  int subsampling_y;
+  struct buf_2d dst;
+  struct buf_2d pre[2];
+  ENTROPY_CONTEXT *above_context;
+  ENTROPY_CONTEXT *left_context;
+  int16_t seg_dequant[MAX_SEGMENTS][2];
+
+  // number of 4x4s in current block
+  uint16_t n4_w, n4_h;
+  // log2 of n4_w, n4_h
+  uint8_t n4_wl, n4_hl;
+
+  // encoder
+  const int16_t *dequant;
+
+  int *eob;
+};
+
+#define BLOCK_OFFSET(x, i) ((x) + (i)*16)
+
+typedef struct RefBuffer {
+  // TODO(dkovalev): idx is not really required and should be removed, now it
+  // is used in vp9_onyxd_if.c
+  int idx;
+  YV12_BUFFER_CONFIG *buf;
+  struct scale_factors sf;
+} RefBuffer;
+
+typedef struct macroblockd {
+  struct macroblockd_plane plane[MAX_MB_PLANE];
+  uint8_t bmode_blocks_wl;
+  uint8_t bmode_blocks_hl;
+
+  FRAME_COUNTS *counts;
+  TileInfo tile;
+
+  int mi_stride;
+
+  // Grid of 8x8 cells is placed over the block.
+  // If some of them belong to the same mbtree-block
+  // they will just have same mi[i][j] value
+  MODE_INFO **mi;
+  MODE_INFO *left_mi;
+  MODE_INFO *above_mi;
+
+  unsigned int max_blocks_wide;
+  unsigned int max_blocks_high;
+
+  const vpx_prob (*partition_probs)[PARTITION_TYPES - 1];
+
+  /* Distance of MB away from frame edges */
+  int mb_to_left_edge;
+  int mb_to_right_edge;
+  int mb_to_top_edge;
+  int mb_to_bottom_edge;
+
+  FRAME_CONTEXT *fc;
+
+  /* pointers to reference frames */
+  const RefBuffer *block_refs[2];
+
+  /* pointer to current frame */
+  const YV12_BUFFER_CONFIG *cur_buf;
+
+  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
+  ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
+
+  PARTITION_CONTEXT *above_seg_context;
+  PARTITION_CONTEXT left_seg_context[8];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  /* Bit depth: 8, 10, 12 */
+  int bd;
+#endif
+
+  int lossless;
+  int corrupted;
+
+  struct vpx_internal_error_info *error_info;
+
+  PARTITION_TYPE *partition;
+} MACROBLOCKD;
+
+static INLINE PLANE_TYPE get_plane_type(int plane) {
+  return (PLANE_TYPE)(plane > 0);
+}
+
+static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize,
+                                     PARTITION_TYPE partition) {
+  return subsize_lookup[partition][bsize];
+}
+
+extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES];
+
+static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
+                                  const MACROBLOCKD *xd) {
+  const MODE_INFO *const mi = xd->mi[0];
+
+  if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi))
+    return DCT_DCT;
+
+  return intra_mode_to_tx_type_lookup[mi->mode];
+}
+
+static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
+                                      const MACROBLOCKD *xd, int ib) {
+  const MODE_INFO *const mi = xd->mi[0];
+
+  if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi))
+    return DCT_DCT;
+
+  return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)];
+}
+
+void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y);
+
+static INLINE TX_SIZE get_uv_tx_size(const MODE_INFO *mi,
+                                     const struct macroblockd_plane *pd) {
+  assert(mi->sb_type < BLOCK_8X8 ||
+         ss_size_lookup[mi->sb_type][pd->subsampling_x][pd->subsampling_y] !=
+             BLOCK_INVALID);
+  return uv_txsize_lookup[mi->sb_type][mi->tx_size][pd->subsampling_x]
+                         [pd->subsampling_y];
+}
+
+static INLINE BLOCK_SIZE
+get_plane_block_size(BLOCK_SIZE bsize, const struct macroblockd_plane *pd) {
+  return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y];
+}
+
+static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
+  int i;
+  for (i = 0; i < MAX_MB_PLANE; i++) {
+    struct macroblockd_plane *const pd = &xd->plane[i];
+    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+    memset(pd->above_context, 0,
+           sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[plane_bsize]);
+    memset(pd->left_context, 0,
+           sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[plane_bsize]);
+  }
+}
+
+static INLINE const vpx_prob *get_y_mode_probs(const MODE_INFO *mi,
+                                               const MODE_INFO *above_mi,
+                                               const MODE_INFO *left_mi,
+                                               int block) {
+  const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block);
+  const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block);
+  return vp9_kf_y_mode_prob[above][left];
+}
+
+typedef void (*foreach_transformed_block_visitor)(int plane, int block, int row,
+                                                  int col,
+                                                  BLOCK_SIZE plane_bsize,
+                                                  TX_SIZE tx_size, void *arg);
+
+void vp9_foreach_transformed_block_in_plane(
+    const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
+    foreach_transformed_block_visitor visit, void *arg);
+
+void vp9_foreach_transformed_block(const MACROBLOCKD *const xd,
+                                   BLOCK_SIZE bsize,
+                                   foreach_transformed_block_visitor visit,
+                                   void *arg);
+
+void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
+                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
+                      int aoff, int loff);
+
+#if CONFIG_MISMATCH_DEBUG
+#define TX_UNIT_SIZE_LOG2 2
+static INLINE void mi_to_pixel_loc(int *pixel_c, int *pixel_r, int mi_col,
+                                   int mi_row, int tx_blk_col, int tx_blk_row,
+                                   int subsampling_x, int subsampling_y) {
+  *pixel_c = ((mi_col << MI_SIZE_LOG2) >> subsampling_x) +
+             (tx_blk_col << TX_UNIT_SIZE_LOG2);
+  *pixel_r = ((mi_row << MI_SIZE_LOG2) >> subsampling_y) +
+             (tx_blk_row << TX_UNIT_SIZE_LOG2);
+}
+
+static INLINE int get_block_width(BLOCK_SIZE bsize) {
+  const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+  return 4 * num_4x4_w;
+}
+
+static INLINE int get_block_height(BLOCK_SIZE bsize) {
+  const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
+  return 4 * num_4x4_h;
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_BLOCKD_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_common.h b/media/libvpx/libvpx/vp9/common/vp9_common.h
new file mode 100644
index 0000000000..d63bad93d1
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_common.h
@@ -0,0 +1,59 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_COMMON_H_
+#define VPX_VP9_COMMON_VP9_COMMON_H_
+
+/* Interface header for common constant data structures and lookup tables */
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/bitops.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Only need this for fixed-size arrays, for structs just assign.
+#define vp9_copy(dest, src)              \
+  do {                                   \
+    assert(sizeof(dest) == sizeof(src)); \
+    memcpy(dest, src, sizeof(src));      \
+  } while (0)
+
+// Use this for variably-sized arrays.
+#define vp9_copy_array(dest, src, n)           \
+  {                                            \
+    assert(sizeof(*(dest)) == sizeof(*(src))); \
+    memcpy(dest, src, (n) * sizeof(*(src)));   \
+  }
+
+#define vp9_zero(dest) memset(&(dest), 0, sizeof(dest))
+#define vp9_zero_array(dest, n) memset(dest, 0, (n) * sizeof(*(dest)))
+
+static INLINE int get_unsigned_bits(unsigned int num_values) {
+  return num_values > 0 ? get_msb(num_values) + 1 : 0;
+}
+
+#define VP9_SYNC_CODE_0 0x49
+#define VP9_SYNC_CODE_1 0x83
+#define VP9_SYNC_CODE_2 0x42
+
+#define VP9_FRAME_MARKER 0x2
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_COMMON_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_common_data.c b/media/libvpx/libvpx/vp9/common/vp9_common_data.c
new file mode 100644
index 0000000000..809d7317ce
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_common_data.c
@@ -0,0 +1,259 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_common_data.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+// Log 2 conversion lookup tables for block width and height
+const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2, 2,
+                                                   2, 3, 3, 3, 4, 4 };
+const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1, 2,
+                                                    3, 2, 3, 4, 3, 4 };
+const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 2, 2, 2,  4, 4,
+                                                          4, 8, 8, 8, 16, 16 };
+const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = { 1, 2, 1, 2,  4, 2, 4,
+                                                          8, 4, 8, 16, 8, 16 };
+// Log 2 conversion lookup tables for modeinfo width and height
+const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 0, 0, 0, 1, 1,
+                                                    1, 2, 2, 2, 3, 3 };
+const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 2, 2,
+                                                          2, 4, 4, 4, 8, 8 };
+const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 2, 1, 2,
+                                                          4, 2, 4, 8, 4, 8 };
+
+// VPXMIN(3, VPXMIN(b_width_log2_lookup(bsize), b_height_log2_lookup(bsize)))
+const uint8_t size_group_lookup[BLOCK_SIZES] = { 0, 0, 0, 1, 1, 1, 2,
+                                                 2, 2, 3, 3, 3, 3 };
+
+const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = { 4, 5, 5,  6,  7,  7, 8,
+                                                    9, 9, 10, 11, 11, 12 };
+
+const PARTITION_TYPE partition_lookup[][BLOCK_SIZES] = {
+  { // 4X4
+    // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+    PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+    PARTITION_INVALID },
+  { // 8X8
+    // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+    PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+    PARTITION_INVALID },
+  { // 16X16
+    // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+    PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+    PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID,
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+    PARTITION_INVALID },
+  { // 32X32
+    // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+    PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+    PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT,
+    PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID,
+    PARTITION_INVALID },
+  { // 64X64
+    // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+    PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+    PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+    PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ,
+    PARTITION_NONE }
+};
+
+const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = {
+  { // PARTITION_NONE
+    BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
+    BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64,
+    BLOCK_64X32, BLOCK_64X64 },
+  { // PARTITION_HORZ
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4, BLOCK_INVALID,
+    BLOCK_INVALID, BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32 },
+  { // PARTITION_VERT
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8, BLOCK_INVALID,
+    BLOCK_INVALID, BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64 },
+  { // PARTITION_SPLIT
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4, BLOCK_INVALID,
+    BLOCK_INVALID, BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16,
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32 }
+};
+
+const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = {
+  TX_4X4,   TX_4X4,   TX_4X4,   TX_8X8,   TX_8X8,   TX_8X8,  TX_16X16,
+  TX_16X16, TX_16X16, TX_32X32, TX_32X32, TX_32X32, TX_32X32
+};
+
+const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = {
+  BLOCK_4X4,    // TX_4X4
+  BLOCK_8X8,    // TX_8X8
+  BLOCK_16X16,  // TX_16X16
+  BLOCK_32X32,  // TX_32X32
+};
+
+const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
+  TX_4X4,    // ONLY_4X4
+  TX_8X8,    // ALLOW_8X8
+  TX_16X16,  // ALLOW_16X16
+  TX_32X32,  // ALLOW_32X32
+  TX_32X32,  // TX_MODE_SELECT
+};
+
+const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = {
+  //  ss_x == 0    ss_x == 0        ss_x == 1      ss_x == 1
+  //  ss_y == 0    ss_y == 1        ss_y == 0      ss_y == 1
+  { { BLOCK_4X4, BLOCK_INVALID }, { BLOCK_INVALID, BLOCK_INVALID } },
+  { { BLOCK_4X8, BLOCK_4X4 }, { BLOCK_INVALID, BLOCK_INVALID } },
+  { { BLOCK_8X4, BLOCK_INVALID }, { BLOCK_4X4, BLOCK_INVALID } },
+  { { BLOCK_8X8, BLOCK_8X4 }, { BLOCK_4X8, BLOCK_4X4 } },
+  { { BLOCK_8X16, BLOCK_8X8 }, { BLOCK_INVALID, BLOCK_4X8 } },
+  { { BLOCK_16X8, BLOCK_INVALID }, { BLOCK_8X8, BLOCK_8X4 } },
+  { { BLOCK_16X16, BLOCK_16X8 }, { BLOCK_8X16, BLOCK_8X8 } },
+  { { BLOCK_16X32, BLOCK_16X16 }, { BLOCK_INVALID, BLOCK_8X16 } },
+  { { BLOCK_32X16, BLOCK_INVALID }, { BLOCK_16X16, BLOCK_16X8 } },
+  { { BLOCK_32X32, BLOCK_32X16 }, { BLOCK_16X32, BLOCK_16X16 } },
+  { { BLOCK_32X64, BLOCK_32X32 }, { BLOCK_INVALID, BLOCK_16X32 } },
+  { { BLOCK_64X32, BLOCK_INVALID }, { BLOCK_32X32, BLOCK_32X16 } },
+  { { BLOCK_64X64, BLOCK_64X32 }, { BLOCK_32X64, BLOCK_32X32 } },
+};
+
+const TX_SIZE uv_txsize_lookup[BLOCK_SIZES][TX_SIZES][2][2] = {
+  //  ss_x == 0    ss_x == 0        ss_x == 1      ss_x == 1
+  //  ss_y == 0    ss_y == 1        ss_y == 0      ss_y == 1
+  {
+      // BLOCK_4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+  },
+  {
+      // BLOCK_4X8
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+  },
+  {
+      // BLOCK_8X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+  },
+  {
+      // BLOCK_8X8
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
+  },
+  {
+      // BLOCK_8X16
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
+  },
+  {
+      // BLOCK_16X8
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
+      { { TX_8X8, TX_4X4 }, { TX_8X8, TX_8X8 } },
+      { { TX_8X8, TX_4X4 }, { TX_8X8, TX_8X8 } },
+  },
+  {
+      // BLOCK_16X16
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
+  },
+  {
+      // BLOCK_16X32
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
+  },
+  {
+      // BLOCK_32X16
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
+      { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
+  },
+  {
+      // BLOCK_32X32
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
+      { { TX_32X32, TX_16X16 }, { TX_16X16, TX_16X16 } },
+  },
+  {
+      // BLOCK_32X64
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
+      { { TX_32X32, TX_32X32 }, { TX_16X16, TX_16X16 } },
+  },
+  {
+      // BLOCK_64X32
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
+      { { TX_32X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
+  },
+  {
+      // BLOCK_64X64
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
+      { { TX_32X32, TX_32X32 }, { TX_32X32, TX_32X32 } },
+  },
+};
+
+// Generates 4 bit field in which each bit set to 1 represents
+// a blocksize partition  1111 means we split 64x64, 32x32, 16x16
+// and 8x8.  1000 means we just split the 64x64 to 32x32
+const struct {
+  PARTITION_CONTEXT above;
+  PARTITION_CONTEXT left;
+} partition_context_lookup[BLOCK_SIZES] = {
+  { 15, 15 },  // 4X4   - {0b1111, 0b1111}
+  { 15, 14 },  // 4X8   - {0b1111, 0b1110}
+  { 14, 15 },  // 8X4   - {0b1110, 0b1111}
+  { 14, 14 },  // 8X8   - {0b1110, 0b1110}
+  { 14, 12 },  // 8X16  - {0b1110, 0b1100}
+  { 12, 14 },  // 16X8  - {0b1100, 0b1110}
+  { 12, 12 },  // 16X16 - {0b1100, 0b1100}
+  { 12, 8 },   // 16X32 - {0b1100, 0b1000}
+  { 8, 12 },   // 32X16 - {0b1000, 0b1100}
+  { 8, 8 },    // 32X32 - {0b1000, 0b1000}
+  { 8, 0 },    // 32X64 - {0b1000, 0b0000}
+  { 0, 8 },    // 64X32 - {0b0000, 0b1000}
+  { 0, 0 },    // 64X64 - {0b0000, 0b0000}
+};
+
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+const uint8_t need_top_left[INTRA_MODES] = {
+  0,  // DC_PRED
+  0,  // V_PRED
+  0,  // H_PRED
+  0,  // D45_PRED
+  1,  // D135_PRED
+  1,  // D117_PRED
+  1,  // D153_PRED
+  0,  // D207_PRED
+  0,  // D63_PRED
+  1,  // TM_PRED
+};
+#endif  // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
diff --git a/media/libvpx/libvpx/vp9/common/vp9_common_data.h b/media/libvpx/libvpx/vp9/common/vp9_common_data.h
new file mode 100644
index 0000000000..a533c5f058
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_common_data.h
@@ -0,0 +1,45 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_COMMON_DATA_H_
+#define VPX_VP9_COMMON_VP9_COMMON_DATA_H_
+
+#include "vp9/common/vp9_enums.h"
+#include "vpx/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern const uint8_t b_width_log2_lookup[BLOCK_SIZES];
+extern const uint8_t b_height_log2_lookup[BLOCK_SIZES];
+extern const uint8_t mi_width_log2_lookup[BLOCK_SIZES];
+extern const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES];
+extern const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES];
+extern const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES];
+extern const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES];
+extern const uint8_t size_group_lookup[BLOCK_SIZES];
+extern const uint8_t num_pels_log2_lookup[BLOCK_SIZES];
+extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZES];
+extern const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES];
+extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES];
+extern const BLOCK_SIZE txsize_to_bsize[TX_SIZES];
+extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES];
+extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
+extern const TX_SIZE uv_txsize_lookup[BLOCK_SIZES][TX_SIZES][2][2];
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+extern const uint8_t need_top_left[INTRA_MODES];
+#endif  // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_COMMON_DATA_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_debugmodes.c b/media/libvpx/libvpx/vp9/common/vp9_debugmodes.c
new file mode 100644
index 0000000000..28cd4a1924
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_debugmodes.c
@@ -0,0 +1,88 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_onyxc_int.h"
+
+static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) {
+  fprintf(f, "%s", str);
+  fprintf(f, "(Frame %d, Show:%d, Q:%d): \n", cm->current_video_frame,
+          cm->show_frame, cm->base_qindex);
+}
+/* This function dereferences a pointer to the mbmi structure
+ * and uses the passed in member offset to print out the value of an integer
+ * for each mbmi member value in the mi structure.
+ */
+static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor,
+                          size_t member_offset) {
+  int mi_row, mi_col;
+  MODE_INFO **mi = cm->mi_grid_visible;
+  int rows = cm->mi_rows;
+  int cols = cm->mi_cols;
+  char prefix = descriptor[0];
+
+  log_frame_info(cm, descriptor, file);
+  for (mi_row = 0; mi_row < rows; mi_row++) {
+    fprintf(file, "%c ", prefix);
+    for (mi_col = 0; mi_col < cols; mi_col++) {
+      fprintf(file, "%2d ", *((char *)((char *)(mi[0]) + member_offset)));
+      mi++;
+    }
+    fprintf(file, "\n");
+    mi += 8;
+  }
+  fprintf(file, "\n");
+}
+
+void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) {
+  int mi_row;
+  int mi_col;
+  FILE *mvs = fopen(file, "a");
+  MODE_INFO **mi = cm->mi_grid_visible;
+  int rows = cm->mi_rows;
+  int cols = cm->mi_cols;
+
+  print_mi_data(cm, mvs, "Partitions:", offsetof(MODE_INFO, sb_type));
+  print_mi_data(cm, mvs, "Modes:", offsetof(MODE_INFO, mode));
+  print_mi_data(cm, mvs, "Ref frame:", offsetof(MODE_INFO, ref_frame[0]));
+  print_mi_data(cm, mvs, "Transform:", offsetof(MODE_INFO, tx_size));
+  print_mi_data(cm, mvs, "UV Modes:", offsetof(MODE_INFO, uv_mode));
+
+  // output skip infomation.
+  log_frame_info(cm, "Skips:", mvs);
+  for (mi_row = 0; mi_row < rows; mi_row++) {
+    fprintf(mvs, "S ");
+    for (mi_col = 0; mi_col < cols; mi_col++) {
+      fprintf(mvs, "%2d ", mi[0]->skip);
+      mi++;
+    }
+    fprintf(mvs, "\n");
+    mi += 8;
+  }
+  fprintf(mvs, "\n");
+
+  // output motion vectors.
+  log_frame_info(cm, "Vectors ", mvs);
+  mi = cm->mi_grid_visible;
+  for (mi_row = 0; mi_row < rows; mi_row++) {
+    fprintf(mvs, "V ");
+    for (mi_col = 0; mi_col < cols; mi_col++) {
+      fprintf(mvs, "%4d:%4d ", mi[0]->mv[0].as_mv.row, mi[0]->mv[0].as_mv.col);
+      mi++;
+    }
+    fprintf(mvs, "\n");
+    mi += 8;
+  }
+  fprintf(mvs, "\n");
+
+  fclose(mvs);
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_entropy.c b/media/libvpx/libvpx/vp9/common/vp9_entropy.c
new file mode 100644
index 0000000000..430b917b8f
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_entropy.c
@@ -0,0 +1,1100 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx/vpx_integer.h"
+
+// Unconstrained Node Tree
+/* clang-format off */
+const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
+  2, 6,                                // 0 = LOW_VAL
+  -TWO_TOKEN, 4,                       // 1 = TWO
+  -THREE_TOKEN, -FOUR_TOKEN,           // 2 = THREE
+  8, 10,                               // 3 = HIGH_LOW
+  -CATEGORY1_TOKEN, -CATEGORY2_TOKEN,  // 4 = CAT_ONE
+  12, 14,                              // 5 = CAT_THREEFOUR
+  -CATEGORY3_TOKEN, -CATEGORY4_TOKEN,  // 6 = CAT_THREE
+  -CATEGORY5_TOKEN, -CATEGORY6_TOKEN   // 7 = CAT_FIVE
+};
+/* clang-format on */
+
+const vpx_prob vp9_cat1_prob[] = { 159 };
+const vpx_prob vp9_cat2_prob[] = { 165, 145 };
+const vpx_prob vp9_cat3_prob[] = { 173, 148, 140 };
+const vpx_prob vp9_cat4_prob[] = { 176, 155, 140, 135 };
+const vpx_prob vp9_cat5_prob[] = { 180, 157, 141, 134, 130 };
+const vpx_prob vp9_cat6_prob[] = { 254, 254, 254, 252, 249, 243, 230,
+                                   196, 177, 153, 140, 133, 130, 129 };
+#if CONFIG_VP9_HIGHBITDEPTH
+const vpx_prob vp9_cat6_prob_high12[] = { 255, 255, 255, 255, 254, 254,
+                                          254, 252, 249, 243, 230, 196,
+                                          177, 153, 140, 133, 130, 129 };
+#endif
+
+/* clang-format off */
+const uint8_t vp9_coefband_trans_8x8plus[1024] = {
+  0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
+  // beyond MAXBAND_INDEX+1 all values are filled as 5
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+};
+/* clang-format on */
+
+const uint8_t vp9_coefband_trans_4x4[16] = {
+  0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
+};
+
+const uint8_t vp9_pt_energy_class[ENTROPY_TOKENS] = { 0, 1, 2, 3, 3, 4,
+                                                      4, 5, 5, 5, 5, 5 };
+
+// Model obtained from a 2-sided zero-centerd distribuition derived
+// from a Pareto distribution. The cdf of the distribution is:
+// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta]
+//
+// For a given beta and a given probablity of the 1-node, the alpha
+// is first solved, and then the {alpha, beta} pair is used to generate
+// the probabilities for the rest of the nodes.
+
+// beta = 8
+
+// Every odd line in this table can be generated from the even lines
+// by averaging :
+// vp9_pareto8_full[l][node] = (vp9_pareto8_full[l-1][node] +
+//                              vp9_pareto8_full[l+1][node] ) >> 1;
+const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = {
+  { 3, 86, 128, 6, 86, 23, 88, 29 },
+  { 6, 86, 128, 11, 87, 42, 91, 52 },
+  { 9, 86, 129, 17, 88, 61, 94, 76 },
+  { 12, 86, 129, 22, 88, 77, 97, 93 },
+  { 15, 87, 129, 28, 89, 93, 100, 110 },
+  { 17, 87, 129, 33, 90, 105, 103, 123 },
+  { 20, 88, 130, 38, 91, 118, 106, 136 },
+  { 23, 88, 130, 43, 91, 128, 108, 146 },
+  { 26, 89, 131, 48, 92, 139, 111, 156 },
+  { 28, 89, 131, 53, 93, 147, 114, 163 },
+  { 31, 90, 131, 58, 94, 156, 117, 171 },
+  { 34, 90, 131, 62, 94, 163, 119, 177 },
+  { 37, 90, 132, 66, 95, 171, 122, 184 },
+  { 39, 90, 132, 70, 96, 177, 124, 189 },
+  { 42, 91, 132, 75, 97, 183, 127, 194 },
+  { 44, 91, 132, 79, 97, 188, 129, 198 },
+  { 47, 92, 133, 83, 98, 193, 132, 202 },
+  { 49, 92, 133, 86, 99, 197, 134, 205 },
+  { 52, 93, 133, 90, 100, 201, 137, 208 },
+  { 54, 93, 133, 94, 100, 204, 139, 211 },
+  { 57, 94, 134, 98, 101, 208, 142, 214 },
+  { 59, 94, 134, 101, 102, 211, 144, 216 },
+  { 62, 94, 135, 105, 103, 214, 146, 218 },
+  { 64, 94, 135, 108, 103, 216, 148, 220 },
+  { 66, 95, 135, 111, 104, 219, 151, 222 },
+  { 68, 95, 135, 114, 105, 221, 153, 223 },
+  { 71, 96, 136, 117, 106, 224, 155, 225 },
+  { 73, 96, 136, 120, 106, 225, 157, 226 },
+  { 76, 97, 136, 123, 107, 227, 159, 228 },
+  { 78, 97, 136, 126, 108, 229, 160, 229 },
+  { 80, 98, 137, 129, 109, 231, 162, 231 },
+  { 82, 98, 137, 131, 109, 232, 164, 232 },
+  { 84, 98, 138, 134, 110, 234, 166, 233 },
+  { 86, 98, 138, 137, 111, 235, 168, 234 },
+  { 89, 99, 138, 140, 112, 236, 170, 235 },
+  { 91, 99, 138, 142, 112, 237, 171, 235 },
+  { 93, 100, 139, 145, 113, 238, 173, 236 },
+  { 95, 100, 139, 147, 114, 239, 174, 237 },
+  { 97, 101, 140, 149, 115, 240, 176, 238 },
+  { 99, 101, 140, 151, 115, 241, 177, 238 },
+  { 101, 102, 140, 154, 116, 242, 179, 239 },
+  { 103, 102, 140, 156, 117, 242, 180, 239 },
+  { 105, 103, 141, 158, 118, 243, 182, 240 },
+  { 107, 103, 141, 160, 118, 243, 183, 240 },
+  { 109, 104, 141, 162, 119, 244, 185, 241 },
+  { 111, 104, 141, 164, 119, 244, 186, 241 },
+  { 113, 104, 142, 166, 120, 245, 187, 242 },
+  { 114, 104, 142, 168, 121, 245, 188, 242 },
+  { 116, 105, 143, 170, 122, 246, 190, 243 },
+  { 118, 105, 143, 171, 122, 246, 191, 243 },
+  { 120, 106, 143, 173, 123, 247, 192, 244 },
+  { 121, 106, 143, 175, 124, 247, 193, 244 },
+  { 123, 107, 144, 177, 125, 248, 195, 244 },
+  { 125, 107, 144, 178, 125, 248, 196, 244 },
+  { 127, 108, 145, 180, 126, 249, 197, 245 },
+  { 128, 108, 145, 181, 127, 249, 198, 245 },
+  { 130, 109, 145, 183, 128, 249, 199, 245 },
+  { 132, 109, 145, 184, 128, 249, 200, 245 },
+  { 134, 110, 146, 186, 129, 250, 201, 246 },
+  { 135, 110, 146, 187, 130, 250, 202, 246 },
+  { 137, 111, 147, 189, 131, 251, 203, 246 },
+  { 138, 111, 147, 190, 131, 251, 204, 246 },
+  { 140, 112, 147, 192, 132, 251, 205, 247 },
+  { 141, 112, 147, 193, 132, 251, 206, 247 },
+  { 143, 113, 148, 194, 133, 251, 207, 247 },
+  { 144, 113, 148, 195, 134, 251, 207, 247 },
+  { 146, 114, 149, 197, 135, 252, 208, 248 },
+  { 147, 114, 149, 198, 135, 252, 209, 248 },
+  { 149, 115, 149, 199, 136, 252, 210, 248 },
+  { 150, 115, 149, 200, 137, 252, 210, 248 },
+  { 152, 115, 150, 201, 138, 252, 211, 248 },
+  { 153, 115, 150, 202, 138, 252, 212, 248 },
+  { 155, 116, 151, 204, 139, 253, 213, 249 },
+  { 156, 116, 151, 205, 139, 253, 213, 249 },
+  { 158, 117, 151, 206, 140, 253, 214, 249 },
+  { 159, 117, 151, 207, 141, 253, 215, 249 },
+  { 161, 118, 152, 208, 142, 253, 216, 249 },
+  { 162, 118, 152, 209, 142, 253, 216, 249 },
+  { 163, 119, 153, 210, 143, 253, 217, 249 },
+  { 164, 119, 153, 211, 143, 253, 217, 249 },
+  { 166, 120, 153, 212, 144, 254, 218, 250 },
+  { 167, 120, 153, 212, 145, 254, 219, 250 },
+  { 168, 121, 154, 213, 146, 254, 220, 250 },
+  { 169, 121, 154, 214, 146, 254, 220, 250 },
+  { 171, 122, 155, 215, 147, 254, 221, 250 },
+  { 172, 122, 155, 216, 147, 254, 221, 250 },
+  { 173, 123, 155, 217, 148, 254, 222, 250 },
+  { 174, 123, 155, 217, 149, 254, 222, 250 },
+  { 176, 124, 156, 218, 150, 254, 223, 250 },
+  { 177, 124, 156, 219, 150, 254, 223, 250 },
+  { 178, 125, 157, 220, 151, 254, 224, 251 },
+  { 179, 125, 157, 220, 151, 254, 224, 251 },
+  { 180, 126, 157, 221, 152, 254, 225, 251 },
+  { 181, 126, 157, 221, 152, 254, 225, 251 },
+  { 183, 127, 158, 222, 153, 254, 226, 251 },
+  { 184, 127, 158, 223, 154, 254, 226, 251 },
+  { 185, 128, 159, 224, 155, 255, 227, 251 },
+  { 186, 128, 159, 224, 155, 255, 227, 251 },
+  { 187, 129, 160, 225, 156, 255, 228, 251 },
+  { 188, 130, 160, 225, 156, 255, 228, 251 },
+  { 189, 131, 160, 226, 157, 255, 228, 251 },
+  { 190, 131, 160, 226, 158, 255, 228, 251 },
+  { 191, 132, 161, 227, 159, 255, 229, 251 },
+  { 192, 132, 161, 227, 159, 255, 229, 251 },
+  { 193, 133, 162, 228, 160, 255, 230, 252 },
+  { 194, 133, 162, 229, 160, 255, 230, 252 },
+  { 195, 134, 163, 230, 161, 255, 231, 252 },
+  { 196, 134, 163, 230, 161, 255, 231, 252 },
+  { 197, 135, 163, 231, 162, 255, 231, 252 },
+  { 198, 135, 163, 231, 162, 255, 231, 252 },
+  { 199, 136, 164, 232, 163, 255, 232, 252 },
+  { 200, 136, 164, 232, 164, 255, 232, 252 },
+  { 201, 137, 165, 233, 165, 255, 233, 252 },
+  { 201, 137, 165, 233, 165, 255, 233, 252 },
+  { 202, 138, 166, 233, 166, 255, 233, 252 },
+  { 203, 138, 166, 233, 166, 255, 233, 252 },
+  { 204, 139, 166, 234, 167, 255, 234, 252 },
+  { 205, 139, 166, 234, 167, 255, 234, 252 },
+  { 206, 140, 167, 235, 168, 255, 235, 252 },
+  { 206, 140, 167, 235, 168, 255, 235, 252 },
+  { 207, 141, 168, 236, 169, 255, 235, 252 },
+  { 208, 141, 168, 236, 170, 255, 235, 252 },
+  { 209, 142, 169, 237, 171, 255, 236, 252 },
+  { 209, 143, 169, 237, 171, 255, 236, 252 },
+  { 210, 144, 169, 237, 172, 255, 236, 252 },
+  { 211, 144, 169, 237, 172, 255, 236, 252 },
+  { 212, 145, 170, 238, 173, 255, 237, 252 },
+  { 213, 145, 170, 238, 173, 255, 237, 252 },
+  { 214, 146, 171, 239, 174, 255, 237, 253 },
+  { 214, 146, 171, 239, 174, 255, 237, 253 },
+  { 215, 147, 172, 240, 175, 255, 238, 253 },
+  { 215, 147, 172, 240, 175, 255, 238, 253 },
+  { 216, 148, 173, 240, 176, 255, 238, 253 },
+  { 217, 148, 173, 240, 176, 255, 238, 253 },
+  { 218, 149, 173, 241, 177, 255, 239, 253 },
+  { 218, 149, 173, 241, 178, 255, 239, 253 },
+  { 219, 150, 174, 241, 179, 255, 239, 253 },
+  { 219, 151, 174, 241, 179, 255, 239, 253 },
+  { 220, 152, 175, 242, 180, 255, 240, 253 },
+  { 221, 152, 175, 242, 180, 255, 240, 253 },
+  { 222, 153, 176, 242, 181, 255, 240, 253 },
+  { 222, 153, 176, 242, 181, 255, 240, 253 },
+  { 223, 154, 177, 243, 182, 255, 240, 253 },
+  { 223, 154, 177, 243, 182, 255, 240, 253 },
+  { 224, 155, 178, 244, 183, 255, 241, 253 },
+  { 224, 155, 178, 244, 183, 255, 241, 253 },
+  { 225, 156, 178, 244, 184, 255, 241, 253 },
+  { 225, 157, 178, 244, 184, 255, 241, 253 },
+  { 226, 158, 179, 244, 185, 255, 242, 253 },
+  { 227, 158, 179, 244, 185, 255, 242, 253 },
+  { 228, 159, 180, 245, 186, 255, 242, 253 },
+  { 228, 159, 180, 245, 186, 255, 242, 253 },
+  { 229, 160, 181, 245, 187, 255, 242, 253 },
+  { 229, 160, 181, 245, 187, 255, 242, 253 },
+  { 230, 161, 182, 246, 188, 255, 243, 253 },
+  { 230, 162, 182, 246, 188, 255, 243, 253 },
+  { 231, 163, 183, 246, 189, 255, 243, 253 },
+  { 231, 163, 183, 246, 189, 255, 243, 253 },
+  { 232, 164, 184, 247, 190, 255, 243, 253 },
+  { 232, 164, 184, 247, 190, 255, 243, 253 },
+  { 233, 165, 185, 247, 191, 255, 244, 253 },
+  { 233, 165, 185, 247, 191, 255, 244, 253 },
+  { 234, 166, 185, 247, 192, 255, 244, 253 },
+  { 234, 167, 185, 247, 192, 255, 244, 253 },
+  { 235, 168, 186, 248, 193, 255, 244, 253 },
+  { 235, 168, 186, 248, 193, 255, 244, 253 },
+  { 236, 169, 187, 248, 194, 255, 244, 253 },
+  { 236, 169, 187, 248, 194, 255, 244, 253 },
+  { 236, 170, 188, 248, 195, 255, 245, 253 },
+  { 236, 170, 188, 248, 195, 255, 245, 253 },
+  { 237, 171, 189, 249, 196, 255, 245, 254 },
+  { 237, 172, 189, 249, 196, 255, 245, 254 },
+  { 238, 173, 190, 249, 197, 255, 245, 254 },
+  { 238, 173, 190, 249, 197, 255, 245, 254 },
+  { 239, 174, 191, 249, 198, 255, 245, 254 },
+  { 239, 174, 191, 249, 198, 255, 245, 254 },
+  { 240, 175, 192, 249, 199, 255, 246, 254 },
+  { 240, 176, 192, 249, 199, 255, 246, 254 },
+  { 240, 177, 193, 250, 200, 255, 246, 254 },
+  { 240, 177, 193, 250, 200, 255, 246, 254 },
+  { 241, 178, 194, 250, 201, 255, 246, 254 },
+  { 241, 178, 194, 250, 201, 255, 246, 254 },
+  { 242, 179, 195, 250, 202, 255, 246, 254 },
+  { 242, 180, 195, 250, 202, 255, 246, 254 },
+  { 242, 181, 196, 250, 203, 255, 247, 254 },
+  { 242, 181, 196, 250, 203, 255, 247, 254 },
+  { 243, 182, 197, 251, 204, 255, 247, 254 },
+  { 243, 183, 197, 251, 204, 255, 247, 254 },
+  { 244, 184, 198, 251, 205, 255, 247, 254 },
+  { 244, 184, 198, 251, 205, 255, 247, 254 },
+  { 244, 185, 199, 251, 206, 255, 247, 254 },
+  { 244, 185, 199, 251, 206, 255, 247, 254 },
+  { 245, 186, 200, 251, 207, 255, 247, 254 },
+  { 245, 187, 200, 251, 207, 255, 247, 254 },
+  { 246, 188, 201, 252, 207, 255, 248, 254 },
+  { 246, 188, 201, 252, 207, 255, 248, 254 },
+  { 246, 189, 202, 252, 208, 255, 248, 254 },
+  { 246, 190, 202, 252, 208, 255, 248, 254 },
+  { 247, 191, 203, 252, 209, 255, 248, 254 },
+  { 247, 191, 203, 252, 209, 255, 248, 254 },
+  { 247, 192, 204, 252, 210, 255, 248, 254 },
+  { 247, 193, 204, 252, 210, 255, 248, 254 },
+  { 248, 194, 205, 252, 211, 255, 248, 254 },
+  { 248, 194, 205, 252, 211, 255, 248, 254 },
+  { 248, 195, 206, 252, 212, 255, 249, 254 },
+  { 248, 196, 206, 252, 212, 255, 249, 254 },
+  { 249, 197, 207, 253, 213, 255, 249, 254 },
+  { 249, 197, 207, 253, 213, 255, 249, 254 },
+  { 249, 198, 208, 253, 214, 255, 249, 254 },
+  { 249, 199, 209, 253, 214, 255, 249, 254 },
+  { 250, 200, 210, 253, 215, 255, 249, 254 },
+  { 250, 200, 210, 253, 215, 255, 249, 254 },
+  { 250, 201, 211, 253, 215, 255, 249, 254 },
+  { 250, 202, 211, 253, 215, 255, 249, 254 },
+  { 250, 203, 212, 253, 216, 255, 249, 254 },
+  { 250, 203, 212, 253, 216, 255, 249, 254 },
+  { 251, 204, 213, 253, 217, 255, 250, 254 },
+  { 251, 205, 213, 253, 217, 255, 250, 254 },
+  { 251, 206, 214, 254, 218, 255, 250, 254 },
+  { 251, 206, 215, 254, 218, 255, 250, 254 },
+  { 252, 207, 216, 254, 219, 255, 250, 254 },
+  { 252, 208, 216, 254, 219, 255, 250, 254 },
+  { 252, 209, 217, 254, 220, 255, 250, 254 },
+  { 252, 210, 217, 254, 220, 255, 250, 254 },
+  { 252, 211, 218, 254, 221, 255, 250, 254 },
+  { 252, 212, 218, 254, 221, 255, 250, 254 },
+  { 253, 213, 219, 254, 222, 255, 250, 254 },
+  { 253, 213, 220, 254, 222, 255, 250, 254 },
+  { 253, 214, 221, 254, 223, 255, 250, 254 },
+  { 253, 215, 221, 254, 223, 255, 250, 254 },
+  { 253, 216, 222, 254, 224, 255, 251, 254 },
+  { 253, 217, 223, 254, 224, 255, 251, 254 },
+  { 253, 218, 224, 254, 225, 255, 251, 254 },
+  { 253, 219, 224, 254, 225, 255, 251, 254 },
+  { 254, 220, 225, 254, 225, 255, 251, 254 },
+  { 254, 221, 226, 254, 225, 255, 251, 254 },
+  { 254, 222, 227, 255, 226, 255, 251, 254 },
+  { 254, 223, 227, 255, 226, 255, 251, 254 },
+  { 254, 224, 228, 255, 227, 255, 251, 254 },
+  { 254, 225, 229, 255, 227, 255, 251, 254 },
+  { 254, 226, 230, 255, 228, 255, 251, 254 },
+  { 254, 227, 230, 255, 229, 255, 251, 254 },
+  { 255, 228, 231, 255, 230, 255, 251, 254 },
+  { 255, 229, 232, 255, 230, 255, 251, 254 },
+  { 255, 230, 233, 255, 231, 255, 252, 254 },
+  { 255, 231, 234, 255, 231, 255, 252, 254 },
+  { 255, 232, 235, 255, 232, 255, 252, 254 },
+  { 255, 233, 236, 255, 232, 255, 252, 254 },
+  { 255, 235, 237, 255, 233, 255, 252, 254 },
+  { 255, 236, 238, 255, 234, 255, 252, 254 },
+  { 255, 238, 240, 255, 235, 255, 252, 255 },
+  { 255, 239, 241, 255, 235, 255, 252, 254 },
+  { 255, 241, 243, 255, 236, 255, 252, 254 },
+  { 255, 243, 245, 255, 237, 255, 252, 254 },
+  { 255, 246, 247, 255, 239, 255, 253, 255 },
+};
+
+static const vp9_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = {
+  {     // Y plane
+    {   // Intra
+      { // Band 0
+        { 195, 29, 183 },
+        { 84, 49, 136 },
+        { 8, 42, 71 } },
+      { // Band 1
+        { 31, 107, 169 },
+        { 35, 99, 159 },
+        { 17, 82, 140 },
+        { 8, 66, 114 },
+        { 2, 44, 76 },
+        { 1, 19, 32 } },
+      { // Band 2
+        { 40, 132, 201 },
+        { 29, 114, 187 },
+        { 13, 91, 157 },
+        { 7, 75, 127 },
+        { 3, 58, 95 },
+        { 1, 28, 47 } },
+      { // Band 3
+        { 69, 142, 221 },
+        { 42, 122, 201 },
+        { 15, 91, 159 },
+        { 6, 67, 121 },
+        { 1, 42, 77 },
+        { 1, 17, 31 } },
+      { // Band 4
+        { 102, 148, 228 },
+        { 67, 117, 204 },
+        { 17, 82, 154 },
+        { 6, 59, 114 },
+        { 2, 39, 75 },
+        { 1, 15, 29 } },
+      { // Band 5
+        { 156, 57, 233 },
+        { 119, 57, 212 },
+        { 58, 48, 163 },
+        { 29, 40, 124 },
+        { 12, 30, 81 },
+        { 3, 12, 31 } } },
+    {   // Inter
+      { // Band 0
+        { 191, 107, 226 },
+        { 124, 117, 204 },
+        { 25, 99, 155 } },
+      { // Band 1
+        { 29, 148, 210 },
+        { 37, 126, 194 },
+        { 8, 93, 157 },
+        { 2, 68, 118 },
+        { 1, 39, 69 },
+        { 1, 17, 33 } },
+      { // Band 2
+        { 41, 151, 213 },
+        { 27, 123, 193 },
+        { 3, 82, 144 },
+        { 1, 58, 105 },
+        { 1, 32, 60 },
+        { 1, 13, 26 } },
+      { // Band 3
+        { 59, 159, 220 },
+        { 23, 126, 198 },
+        { 4, 88, 151 },
+        { 1, 66, 114 },
+        { 1, 38, 71 },
+        { 1, 18, 34 } },
+      { // Band 4
+        { 114, 136, 232 },
+        { 51, 114, 207 },
+        { 11, 83, 155 },
+        { 3, 56, 105 },
+        { 1, 33, 65 },
+        { 1, 17, 34 } },
+      { // Band 5
+        { 149, 65, 234 },
+        { 121, 57, 215 },
+        { 61, 49, 166 },
+        { 28, 36, 114 },
+        { 12, 25, 76 },
+        { 3, 16, 42 } } } },
+  {     // UV plane
+    {   // Intra
+      { // Band 0
+        { 214, 49, 220 },
+        { 132, 63, 188 },
+        { 42, 65, 137 } },
+      { // Band 1
+        { 85, 137, 221 },
+        { 104, 131, 216 },
+        { 49, 111, 192 },
+        { 21, 87, 155 },
+        { 2, 49, 87 },
+        { 1, 16, 28 } },
+      { // Band 2
+        { 89, 163, 230 },
+        { 90, 137, 220 },
+        { 29, 100, 183 },
+        { 10, 70, 135 },
+        { 2, 42, 81 },
+        { 1, 17, 33 } },
+      { // Band 3
+        { 108, 167, 237 },
+        { 55, 133, 222 },
+        { 15, 97, 179 },
+        { 4, 72, 135 },
+        { 1, 45, 85 },
+        { 1, 19, 38 } },
+      { // Band 4
+        { 124, 146, 240 },
+        { 66, 124, 224 },
+        { 17, 88, 175 },
+        { 4, 58, 122 },
+        { 1, 36, 75 },
+        { 1, 18, 37 } },
+      { //  Band 5
+        { 141, 79, 241 },
+        { 126, 70, 227 },
+        { 66, 58, 182 },
+        { 30, 44, 136 },
+        { 12, 34, 96 },
+        { 2, 20, 47 } } },
+    {   // Inter
+      { // Band 0
+        { 229, 99, 249 },
+        { 143, 111, 235 },
+        { 46, 109, 192 } },
+      { // Band 1
+        { 82, 158, 236 },
+        { 94, 146, 224 },
+        { 25, 117, 191 },
+        { 9, 87, 149 },
+        { 3, 56, 99 },
+        { 1, 33, 57 } },
+      { // Band 2
+        { 83, 167, 237 },
+        { 68, 145, 222 },
+        { 10, 103, 177 },
+        { 2, 72, 131 },
+        { 1, 41, 79 },
+        { 1, 20, 39 } },
+      { // Band 3
+        { 99, 167, 239 },
+        { 47, 141, 224 },
+        { 10, 104, 178 },
+        { 2, 73, 133 },
+        { 1, 44, 85 },
+        { 1, 22, 47 } },
+      { // Band 4
+        { 127, 145, 243 },
+        { 71, 129, 228 },
+        { 17, 93, 177 },
+        { 3, 61, 124 },
+        { 1, 41, 84 },
+        { 1, 21, 52 } },
+      { // Band 5
+        { 157, 78, 244 },
+        { 140, 72, 231 },
+        { 69, 58, 184 },
+        { 31, 44, 137 },
+        { 14, 38, 105 },
+        { 8, 23, 61 } } } }
+};
+
+static const vp9_coeff_probs_model default_coef_probs_8x8[PLANE_TYPES] = {
+  {     // Y plane
+    {   // Intra
+      { // Band 0
+        { 125, 34, 187 },
+        { 52, 41, 133 },
+        { 6, 31, 56 } },
+      { // Band 1
+        { 37, 109, 153 },
+        { 51, 102, 147 },
+        { 23, 87, 128 },
+        { 8, 67, 101 },
+        { 1, 41, 63 },
+        { 1, 19, 29 } },
+      { // Band 2
+        { 31, 154, 185 },
+        { 17, 127, 175 },
+        { 6, 96, 145 },
+        { 2, 73, 114 },
+        { 1, 51, 82 },
+        { 1, 28, 45 } },
+      { // Band 3
+        { 23, 163, 200 },
+        { 10, 131, 185 },
+        { 2, 93, 148 },
+        { 1, 67, 111 },
+        { 1, 41, 69 },
+        { 1, 14, 24 } },
+      { // Band 4
+        { 29, 176, 217 },
+        { 12, 145, 201 },
+        { 3, 101, 156 },
+        { 1, 69, 111 },
+        { 1, 39, 63 },
+        { 1, 14, 23 } },
+      { // Band 5
+        { 57, 192, 233 },
+        { 25, 154, 215 },
+        { 6, 109, 167 },
+        { 3, 78, 118 },
+        { 1, 48, 69 },
+        { 1, 21, 29 } } },
+    {   // Inter
+      { // Band 0
+        { 202, 105, 245 },
+        { 108, 106, 216 },
+        { 18, 90, 144 } },
+      { // Band 1
+        { 33, 172, 219 },
+        { 64, 149, 206 },
+        { 14, 117, 177 },
+        { 5, 90, 141 },
+        { 2, 61, 95 },
+        { 1, 37, 57 } },
+      { // Band 2
+        { 33, 179, 220 },
+        { 11, 140, 198 },
+        { 1, 89, 148 },
+        { 1, 60, 104 },
+        { 1, 33, 57 },
+        { 1, 12, 21 } },
+      { // Band 3
+        { 30, 181, 221 },
+        { 8, 141, 198 },
+        { 1, 87, 145 },
+        { 1, 58, 100 },
+        { 1, 31, 55 },
+        { 1, 12, 20 } },
+      { // Band 4
+        { 32, 186, 224 },
+        { 7, 142, 198 },
+        { 1, 86, 143 },
+        { 1, 58, 100 },
+        { 1, 31, 55 },
+        { 1, 12, 22 } },
+      { // Band 5
+        { 57, 192, 227 },
+        { 20, 143, 204 },
+        { 3, 96, 154 },
+        { 1, 68, 112 },
+        { 1, 42, 69 },
+        { 1, 19, 32 } } } },
+  {     // UV plane
+    {   // Intra
+      { // Band 0
+        { 212, 35, 215 },
+        { 113, 47, 169 },
+        { 29, 48, 105 } },
+      { // Band 1
+        { 74, 129, 203 },
+        { 106, 120, 203 },
+        { 49, 107, 178 },
+        { 19, 84, 144 },
+        { 4, 50, 84 },
+        { 1, 15, 25 } },
+      { // Band 2
+        { 71, 172, 217 },
+        { 44, 141, 209 },
+        { 15, 102, 173 },
+        { 6, 76, 133 },
+        { 2, 51, 89 },
+        { 1, 24, 42 } },
+      { // Band 3
+        { 64, 185, 231 },
+        { 31, 148, 216 },
+        { 8, 103, 175 },
+        { 3, 74, 131 },
+        { 1, 46, 81 },
+        { 1, 18, 30 } },
+      { // Band 4
+        { 65, 196, 235 },
+        { 25, 157, 221 },
+        { 5, 105, 174 },
+        { 1, 67, 120 },
+        { 1, 38, 69 },
+        { 1, 15, 30 } },
+      { // Band 5
+        { 65, 204, 238 },
+        { 30, 156, 224 },
+        { 7, 107, 177 },
+        { 2, 70, 124 },
+        { 1, 42, 73 },
+        { 1, 18, 34 } } },
+    {   // Inter
+      { // Band 0
+        { 225, 86, 251 },
+        { 144, 104, 235 },
+        { 42, 99, 181 } },
+      { // Band 1
+        { 85, 175, 239 },
+        { 112, 165, 229 },
+        { 29, 136, 200 },
+        { 12, 103, 162 },
+        { 6, 77, 123 },
+        { 2, 53, 84 } },
+      { // Band 2
+        { 75, 183, 239 },
+        { 30, 155, 221 },
+        { 3, 106, 171 },
+        { 1, 74, 128 },
+        { 1, 44, 76 },
+        { 1, 17, 28 } },
+      { // Band 3
+        { 73, 185, 240 },
+        { 27, 159, 222 },
+        { 2, 107, 172 },
+        { 1, 75, 127 },
+        { 1, 42, 73 },
+        { 1, 17, 29 } },
+      { // Band 4
+        { 62, 190, 238 },
+        { 21, 159, 222 },
+        { 2, 107, 172 },
+        { 1, 72, 122 },
+        { 1, 40, 71 },
+        { 1, 18, 32 } },
+      { // Band 5
+        { 61, 199, 240 },
+        { 27, 161, 226 },
+        { 4, 113, 180 },
+        { 1, 76, 129 },
+        { 1, 46, 80 },
+        { 1, 23, 41 } } } }
+};
+
+static const vp9_coeff_probs_model default_coef_probs_16x16[PLANE_TYPES] = {
+  {     // Y plane
+    {   // Intra
+      { // Band 0
+        { 7, 27, 153 },
+        { 5, 30, 95 },
+        { 1, 16, 30 } },
+      { // Band 1
+        { 50, 75, 127 },
+        { 57, 75, 124 },
+        { 27, 67, 108 },
+        { 10, 54, 86 },
+        { 1, 33, 52 },
+        { 1, 12, 18 } },
+      { // Band 2
+        { 43, 125, 151 },
+        { 26, 108, 148 },
+        { 7, 83, 122 },
+        { 2, 59, 89 },
+        { 1, 38, 60 },
+        { 1, 17, 27 } },
+      { // Band 3
+        { 23, 144, 163 },
+        { 13, 112, 154 },
+        { 2, 75, 117 },
+        { 1, 50, 81 },
+        { 1, 31, 51 },
+        { 1, 14, 23 } },
+      { // Band 4
+        { 18, 162, 185 },
+        { 6, 123, 171 },
+        { 1, 78, 125 },
+        { 1, 51, 86 },
+        { 1, 31, 54 },
+        { 1, 14, 23 } },
+      { // Band 5
+        { 15, 199, 227 },
+        { 3, 150, 204 },
+        { 1, 91, 146 },
+        { 1, 55, 95 },
+        { 1, 30, 53 },
+        { 1, 11, 20 } } },
+    {   // Inter
+      { // Band 0
+        { 19, 55, 240 },
+        { 19, 59, 196 },
+        { 3, 52, 105 } },
+      { // Band 1
+        { 41, 166, 207 },
+        { 104, 153, 199 },
+        { 31, 123, 181 },
+        { 14, 101, 152 },
+        { 5, 72, 106 },
+        { 1, 36, 52 } },
+      { // Band 2
+        { 35, 176, 211 },
+        { 12, 131, 190 },
+        { 2, 88, 144 },
+        { 1, 60, 101 },
+        { 1, 36, 60 },
+        { 1, 16, 28 } },
+      { // Band 3
+        { 28, 183, 213 },
+        { 8, 134, 191 },
+        { 1, 86, 142 },
+        { 1, 56, 96 },
+        { 1, 30, 53 },
+        { 1, 12, 20 } },
+      { // Band 4
+        { 20, 190, 215 },
+        { 4, 135, 192 },
+        { 1, 84, 139 },
+        { 1, 53, 91 },
+        { 1, 28, 49 },
+        { 1, 11, 20 } },
+      { // Band 5
+        { 13, 196, 216 },
+        { 2, 137, 192 },
+        { 1, 86, 143 },
+        { 1, 57, 99 },
+        { 1, 32, 56 },
+        { 1, 13, 24 } } } },
+  {     // UV plane
+    {   // Intra
+      { // Band 0
+        { 211, 29, 217 },
+        { 96, 47, 156 },
+        { 22, 43, 87 } },
+      { // Band 1
+        { 78, 120, 193 },
+        { 111, 116, 186 },
+        { 46, 102, 164 },
+        { 15, 80, 128 },
+        { 2, 49, 76 },
+        { 1, 18, 28 } },
+      { // Band 2
+        { 71, 161, 203 },
+        { 42, 132, 192 },
+        { 10, 98, 150 },
+        { 3, 69, 109 },
+        { 1, 44, 70 },
+        { 1, 18, 29 } },
+      { // Band 3
+        { 57, 186, 211 },
+        { 30, 140, 196 },
+        { 4, 93, 146 },
+        { 1, 62, 102 },
+        { 1, 38, 65 },
+        { 1, 16, 27 } },
+      { // Band 4
+        { 47, 199, 217 },
+        { 14, 145, 196 },
+        { 1, 88, 142 },
+        { 1, 57, 98 },
+        { 1, 36, 62 },
+        { 1, 15, 26 } },
+      { // Band 5
+        { 26, 219, 229 },
+        { 5, 155, 207 },
+        { 1, 94, 151 },
+        { 1, 60, 104 },
+        { 1, 36, 62 },
+        { 1, 16, 28 } } },
+    {   // Inter
+      { // Band 0
+        { 233, 29, 248 },
+        { 146, 47, 220 },
+        { 43, 52, 140 } },
+      { // Band 1
+        { 100, 163, 232 },
+        { 179, 161, 222 },
+        { 63, 142, 204 },
+        { 37, 113, 174 },
+        { 26, 89, 137 },
+        { 18, 68, 97 } },
+      { // Band 2
+        { 85, 181, 230 },
+        { 32, 146, 209 },
+        { 7, 100, 164 },
+        { 3, 71, 121 },
+        { 1, 45, 77 },
+        { 1, 18, 30 } },
+      { // Band 3
+        { 65, 187, 230 },
+        { 20, 148, 207 },
+        { 2, 97, 159 },
+        { 1, 68, 116 },
+        { 1, 40, 70 },
+        { 1, 14, 29 } },
+      { // Band 4
+        { 40, 194, 227 },
+        { 8, 147, 204 },
+        { 1, 94, 155 },
+        { 1, 65, 112 },
+        { 1, 39, 66 },
+        { 1, 14, 26 } },
+      { // Band 5
+        { 16, 208, 228 },
+        { 3, 151, 207 },
+        { 1, 98, 160 },
+        { 1, 67, 117 },
+        { 1, 41, 74 },
+        { 1, 17, 31 } } } }
+};
+
+static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = {
+  {     // Y plane
+    {   // Intra
+      { // Band 0
+        { 17, 38, 140 },
+        { 7, 34, 80 },
+        { 1, 17, 29 } },
+      { // Band 1
+        { 37, 75, 128 },
+        { 41, 76, 128 },
+        { 26, 66, 116 },
+        { 12, 52, 94 },
+        { 2, 32, 55 },
+        { 1, 10, 16 } },
+      { // Band 2
+        { 50, 127, 154 },
+        { 37, 109, 152 },
+        { 16, 82, 121 },
+        { 5, 59, 85 },
+        { 1, 35, 54 },
+        { 1, 13, 20 } },
+      { // Band 3
+        { 40, 142, 167 },
+        { 17, 110, 157 },
+        { 2, 71, 112 },
+        { 1, 44, 72 },
+        { 1, 27, 45 },
+        { 1, 11, 17 } },
+      { // Band 4
+        { 30, 175, 188 },
+        { 9, 124, 169 },
+        { 1, 74, 116 },
+        { 1, 48, 78 },
+        { 1, 30, 49 },
+        { 1, 11, 18 } },
+      { // Band 5
+        { 10, 222, 223 },
+        { 2, 150, 194 },
+        { 1, 83, 128 },
+        { 1, 48, 79 },
+        { 1, 27, 45 },
+        { 1, 11, 17 } } },
+    {   // Inter
+      { // Band 0
+        { 36, 41, 235 },
+        { 29, 36, 193 },
+        { 10, 27, 111 } },
+      { // Band 1
+        { 85, 165, 222 },
+        { 177, 162, 215 },
+        { 110, 135, 195 },
+        { 57, 113, 168 },
+        { 23, 83, 120 },
+        { 10, 49, 61 } },
+      { // Band 2
+        { 85, 190, 223 },
+        { 36, 139, 200 },
+        { 5, 90, 146 },
+        { 1, 60, 103 },
+        { 1, 38, 65 },
+        { 1, 18, 30 } },
+      { // Band 3
+        { 72, 202, 223 },
+        { 23, 141, 199 },
+        { 2, 86, 140 },
+        { 1, 56, 97 },
+        { 1, 36, 61 },
+        { 1, 16, 27 } },
+      { // Band 4
+        { 55, 218, 225 },
+        { 13, 145, 200 },
+        { 1, 86, 141 },
+        { 1, 57, 99 },
+        { 1, 35, 61 },
+        { 1, 13, 22 } },
+      { // Band 5
+        { 15, 235, 212 },
+        { 1, 132, 184 },
+        { 1, 84, 139 },
+        { 1, 57, 97 },
+        { 1, 34, 56 },
+        { 1, 14, 23 } } } },
+  {     // UV plane
+    {   // Intra
+      { // Band 0
+        { 181, 21, 201 },
+        { 61, 37, 123 },
+        { 10, 38, 71 } },
+      { // Band 1
+        { 47, 106, 172 },
+        { 95, 104, 173 },
+        { 42, 93, 159 },
+        { 18, 77, 131 },
+        { 4, 50, 81 },
+        { 1, 17, 23 } },
+      { // Band 2
+        { 62, 147, 199 },
+        { 44, 130, 189 },
+        { 28, 102, 154 },
+        { 18, 75, 115 },
+        { 2, 44, 65 },
+        { 1, 12, 19 } },
+      { // Band 3
+        { 55, 153, 210 },
+        { 24, 130, 194 },
+        { 3, 93, 146 },
+        { 1, 61, 97 },
+        { 1, 31, 50 },
+        { 1, 10, 16 } },
+      { // Band 4
+        { 49, 186, 223 },
+        { 17, 148, 204 },
+        { 1, 96, 142 },
+        { 1, 53, 83 },
+        { 1, 26, 44 },
+        { 1, 11, 17 } },
+      { // Band 5
+        { 13, 217, 212 },
+        { 2, 136, 180 },
+        { 1, 78, 124 },
+        { 1, 50, 83 },
+        { 1, 29, 49 },
+        { 1, 14, 23 } } },
+    {   // Inter
+      { // Band 0
+        { 197, 13, 247 },
+        { 82, 17, 222 },
+        { 25, 17, 162 } },
+      { // Band 1
+        { 126, 186, 247 },
+        { 234, 191, 243 },
+        { 176, 177, 234 },
+        { 104, 158, 220 },
+        { 66, 128, 186 },
+        { 55, 90, 137 } },
+      { // Band 2
+        { 111, 197, 242 },
+        { 46, 158, 219 },
+        { 9, 104, 171 },
+        { 2, 65, 125 },
+        { 1, 44, 80 },
+        { 1, 17, 91 } },
+      { // Band 3
+        { 104, 208, 245 },
+        { 39, 168, 224 },
+        { 3, 109, 162 },
+        { 1, 79, 124 },
+        { 1, 50, 102 },
+        { 1, 43, 102 } },
+      { // Band 4
+        { 84, 220, 246 },
+        { 31, 177, 231 },
+        { 2, 115, 180 },
+        { 1, 79, 134 },
+        { 1, 55, 77 },
+        { 1, 60, 79 } },
+      { // Band 5
+        { 43, 243, 240 },
+        { 8, 180, 217 },
+        { 1, 115, 166 },
+        { 1, 84, 121 },
+        { 1, 51, 67 },
+        { 1, 16, 6 } } } }
+};
+
+static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) {
+  assert(p != 0);
+  memcpy(probs, vp9_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob));
+}
+
+void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) {
+  if (full != model)
+    memcpy(full, model, sizeof(vpx_prob) * UNCONSTRAINED_NODES);
+  extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]);
+}
+
+void vp9_default_coef_probs(VP9_COMMON *cm) {
+  vp9_copy(cm->fc->coef_probs[TX_4X4], default_coef_probs_4x4);
+  vp9_copy(cm->fc->coef_probs[TX_8X8], default_coef_probs_8x8);
+  vp9_copy(cm->fc->coef_probs[TX_16X16], default_coef_probs_16x16);
+  vp9_copy(cm->fc->coef_probs[TX_32X32], default_coef_probs_32x32);
+}
+
+#define COEF_COUNT_SAT 24
+#define COEF_MAX_UPDATE_FACTOR 112
+#define COEF_COUNT_SAT_KEY 24
+#define COEF_MAX_UPDATE_FACTOR_KEY 112
+#define COEF_COUNT_SAT_AFTER_KEY 24
+#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128
+
+static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size,
+                             unsigned int count_sat,
+                             unsigned int update_factor) {
+  const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
+  vp9_coeff_probs_model *const probs = cm->fc->coef_probs[tx_size];
+  const vp9_coeff_probs_model *const pre_probs = pre_fc->coef_probs[tx_size];
+  vp9_coeff_count_model *counts = cm->counts.coef[tx_size];
+  unsigned int(*eob_counts)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
+      cm->counts.eob_branch[tx_size];
+  int i, j, k, l, m;
+
+  for (i = 0; i < PLANE_TYPES; ++i)
+    for (j = 0; j < REF_TYPES; ++j)
+      for (k = 0; k < COEF_BANDS; ++k)
+        for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+          const int n0 = counts[i][j][k][l][ZERO_TOKEN];
+          const int n1 = counts[i][j][k][l][ONE_TOKEN];
+          const int n2 = counts[i][j][k][l][TWO_TOKEN];
+          const int neob = counts[i][j][k][l][EOB_MODEL_TOKEN];
+          const unsigned int branch_ct[UNCONSTRAINED_NODES][2] = {
+            { neob, eob_counts[i][j][k][l] - neob }, { n0, n1 + n2 }, { n1, n2 }
+          };
+          for (m = 0; m < UNCONSTRAINED_NODES; ++m)
+            probs[i][j][k][l][m] =
+                merge_probs(pre_probs[i][j][k][l][m], branch_ct[m], count_sat,
+                            update_factor);
+        }
+}
+
+void vp9_adapt_coef_probs(VP9_COMMON *cm) {
+  TX_SIZE t;
+  unsigned int count_sat, update_factor;
+
+  if (frame_is_intra_only(cm)) {
+    update_factor = COEF_MAX_UPDATE_FACTOR_KEY;
+    count_sat = COEF_COUNT_SAT_KEY;
+  } else if (cm->last_frame_type == KEY_FRAME) {
+    update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */
+    count_sat = COEF_COUNT_SAT_AFTER_KEY;
+  } else {
+    update_factor = COEF_MAX_UPDATE_FACTOR;
+    count_sat = COEF_COUNT_SAT;
+  }
+  for (t = TX_4X4; t <= TX_32X32; t++)
+    adapt_coef_probs(cm, t, count_sat, update_factor);
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_entropy.h b/media/libvpx/libvpx/vp9/common/vp9_entropy.h
new file mode 100644
index 0000000000..d026651df7
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_entropy.h
@@ -0,0 +1,197 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_ENTROPY_H_
+#define VPX_VP9_COMMON_VP9_ENTROPY_H_
+
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/prob.h"
+
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_enums.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DIFF_UPDATE_PROB 252
+
+// Coefficient token alphabet
+#define ZERO_TOKEN 0        // 0     Extra Bits 0+0
+#define ONE_TOKEN 1         // 1     Extra Bits 0+1
+#define TWO_TOKEN 2         // 2     Extra Bits 0+1
+#define THREE_TOKEN 3       // 3     Extra Bits 0+1
+#define FOUR_TOKEN 4        // 4     Extra Bits 0+1
+#define CATEGORY1_TOKEN 5   // 5-6   Extra Bits 1+1
+#define CATEGORY2_TOKEN 6   // 7-10  Extra Bits 2+1
+#define CATEGORY3_TOKEN 7   // 11-18 Extra Bits 3+1
+#define CATEGORY4_TOKEN 8   // 19-34 Extra Bits 4+1
+#define CATEGORY5_TOKEN 9   // 35-66 Extra Bits 5+1
+#define CATEGORY6_TOKEN 10  // 67+   Extra Bits 14+1
+#define EOB_TOKEN 11        // EOB   Extra Bits 0+0
+
+#define ENTROPY_TOKENS 12
+
+#define ENTROPY_NODES 11
+
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_pt_energy_class[ENTROPY_TOKENS]);
+
+#define CAT1_MIN_VAL 5
+#define CAT2_MIN_VAL 7
+#define CAT3_MIN_VAL 11
+#define CAT4_MIN_VAL 19
+#define CAT5_MIN_VAL 35
+#define CAT6_MIN_VAL 67
+
+// Extra bit probabilities.
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob[1]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob[2]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob[3]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob[4]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob[5]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob[14]);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob_high10[1]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob_high10[2]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob_high10[3]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob_high10[4]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob_high10[5]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high10[16]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob_high12[1]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob_high12[2]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob_high12[3]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob_high12[4]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob_high12[5]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high12[18]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#define EOB_MODEL_TOKEN 3
+
+#define DCT_MAX_VALUE 16384
+#if CONFIG_VP9_HIGHBITDEPTH
+#define DCT_MAX_VALUE_HIGH10 65536
+#define DCT_MAX_VALUE_HIGH12 262144
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+/* Coefficients are predicted via a 3-dimensional probability table. */
+
+#define REF_TYPES 2  // intra=0, inter=1
+
+/* Middle dimension reflects the coefficient position within the transform. */
+#define COEF_BANDS 6
+
+/* Inside dimension is measure of nearby complexity, that reflects the energy
+   of nearby coefficients are nonzero.  For the first coefficient (DC, unless
+   block type is 0), we look at the (already encoded) blocks above and to the
+   left of the current block.  The context index is then the number (0,1,or 2)
+   of these blocks having nonzero coefficients.
+   After decoding a coefficient, the measure is determined by the size of the
+   most recently decoded coefficient.
+   Note that the intuitive meaning of this measure changes as coefficients
+   are decoded, e.g., prior to the first token, a zero means that my neighbors
+   are empty while, after the first token, because of the use of end-of-block,
+   a zero means we just decoded a zero and hence guarantees that a non-zero
+   coefficient will appear later in this block.  However, this shift
+   in meaning is perfectly OK because our context depends also on the
+   coefficient band (and since zigzag positions 0, 1, and 2 are in
+   distinct bands). */
+
+#define COEFF_CONTEXTS 6
+#define BAND_COEFF_CONTEXTS(band) ((band) == 0 ? 3 : COEFF_CONTEXTS)
+
+// #define ENTROPY_STATS
+
+typedef unsigned int vp9_coeff_count[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
+                                    [ENTROPY_TOKENS];
+typedef unsigned int vp9_coeff_stats[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
+                                    [ENTROPY_NODES][2];
+
+#define SUBEXP_PARAM 4   /* Subexponential code parameter */
+#define MODULUS_PARAM 13 /* Modulus parameter */
+
+struct VP9Common;
+void vp9_default_coef_probs(struct VP9Common *cm);
+void vp9_adapt_coef_probs(struct VP9Common *cm);
+
+// This is the index in the scan order beyond which all coefficients for
+// 8x8 transform and above are in the top band.
+// This macro is currently unused but may be used by certain implementations
+#define MAXBAND_INDEX 21
+
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_8x8plus[1024]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_4x4[16]);
+
+static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
+  return tx_size == TX_4X4 ? vp9_coefband_trans_4x4
+                           : vp9_coefband_trans_8x8plus;
+}
+
+// 128 lists of probabilities are stored for the following ONE node probs:
+// 1, 3, 5, 7, ..., 253, 255
+// In between probabilities are interpolated linearly
+#define COEFF_PROB_MODELS 255
+
+#define UNCONSTRAINED_NODES 3
+
+#define PIVOT_NODE 2  // which node is pivot
+
+#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
+extern const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)];
+extern const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
+
+typedef vpx_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
+                                      [UNCONSTRAINED_NODES];
+
+typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS]
+                                          [COEFF_CONTEXTS]
+                                          [UNCONSTRAINED_NODES + 1];
+
+void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full);
+
+typedef char ENTROPY_CONTEXT;
+
+static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a,
+                                           ENTROPY_CONTEXT b) {
+  return (a != 0) + (b != 0);
+}
+
+static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
+                                      const ENTROPY_CONTEXT *l) {
+  ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
+
+  switch (tx_size) {
+    case TX_4X4:
+      above_ec = a[0] != 0;
+      left_ec = l[0] != 0;
+      break;
+    case TX_8X8:
+      above_ec = !!*(const uint16_t *)a;
+      left_ec = !!*(const uint16_t *)l;
+      break;
+    case TX_16X16:
+      above_ec = !!*(const uint32_t *)a;
+      left_ec = !!*(const uint32_t *)l;
+      break;
+    case TX_32X32:
+      above_ec = !!*(const uint64_t *)a;
+      left_ec = !!*(const uint64_t *)l;
+      break;
+    default: assert(0 && "Invalid transform size."); break;
+  }
+
+  return combine_entropy_contexts(above_ec, left_ec);
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_ENTROPY_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_entropymode.c b/media/libvpx/libvpx/vp9/common/vp9_entropymode.c
new file mode 100644
index 0000000000..9289fc9e1f
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_entropymode.c
@@ -0,0 +1,469 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_mem/vpx_mem.h"
+
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_seg_common.h"
+
+const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = {
+  {
+      // above = dc
+      { 137, 30, 42, 148, 151, 207, 70, 52, 91 },   // left = dc
+      { 92, 45, 102, 136, 116, 180, 74, 90, 100 },  // left = v
+      { 73, 32, 19, 187, 222, 215, 46, 34, 100 },   // left = h
+      { 91, 30, 32, 116, 121, 186, 93, 86, 94 },    // left = d45
+      { 72, 35, 36, 149, 68, 206, 68, 63, 105 },    // left = d135
+      { 73, 31, 28, 138, 57, 124, 55, 122, 151 },   // left = d117
+      { 67, 23, 21, 140, 126, 197, 40, 37, 171 },   // left = d153
+      { 86, 27, 28, 128, 154, 212, 45, 43, 53 },    // left = d207
+      { 74, 32, 27, 107, 86, 160, 63, 134, 102 },   // left = d63
+      { 59, 67, 44, 140, 161, 202, 78, 67, 119 }    // left = tm
+  },
+  {
+      // above = v
+      { 63, 36, 126, 146, 123, 158, 60, 90, 96 },   // left = dc
+      { 43, 46, 168, 134, 107, 128, 69, 142, 92 },  // left = v
+      { 44, 29, 68, 159, 201, 177, 50, 57, 77 },    // left = h
+      { 58, 38, 76, 114, 97, 172, 78, 133, 92 },    // left = d45
+      { 46, 41, 76, 140, 63, 184, 69, 112, 57 },    // left = d135
+      { 38, 32, 85, 140, 46, 112, 54, 151, 133 },   // left = d117
+      { 39, 27, 61, 131, 110, 175, 44, 75, 136 },   // left = d153
+      { 52, 30, 74, 113, 130, 175, 51, 64, 58 },    // left = d207
+      { 47, 35, 80, 100, 74, 143, 64, 163, 74 },    // left = d63
+      { 36, 61, 116, 114, 128, 162, 80, 125, 82 }   // left = tm
+  },
+  {
+      // above = h
+      { 82, 26, 26, 171, 208, 204, 44, 32, 105 },  // left = dc
+      { 55, 44, 68, 166, 179, 192, 57, 57, 108 },  // left = v
+      { 42, 26, 11, 199, 241, 228, 23, 15, 85 },   // left = h
+      { 68, 42, 19, 131, 160, 199, 55, 52, 83 },   // left = d45
+      { 58, 50, 25, 139, 115, 232, 39, 52, 118 },  // left = d135
+      { 50, 35, 33, 153, 104, 162, 64, 59, 131 },  // left = d117
+      { 44, 24, 16, 150, 177, 202, 33, 19, 156 },  // left = d153
+      { 55, 27, 12, 153, 203, 218, 26, 27, 49 },   // left = d207
+      { 53, 49, 21, 110, 116, 168, 59, 80, 76 },   // left = d63
+      { 38, 72, 19, 168, 203, 212, 50, 50, 107 }   // left = tm
+  },
+  {
+      // above = d45
+      { 103, 26, 36, 129, 132, 201, 83, 80, 93 },  // left = dc
+      { 59, 38, 83, 112, 103, 162, 98, 136, 90 },  // left = v
+      { 62, 30, 23, 158, 200, 207, 59, 57, 50 },   // left = h
+      { 67, 30, 29, 84, 86, 191, 102, 91, 59 },    // left = d45
+      { 60, 32, 33, 112, 71, 220, 64, 89, 104 },   // left = d135
+      { 53, 26, 34, 130, 56, 149, 84, 120, 103 },  // left = d117
+      { 53, 21, 23, 133, 109, 210, 56, 77, 172 },  // left = d153
+      { 77, 19, 29, 112, 142, 228, 55, 66, 36 },   // left = d207
+      { 61, 29, 29, 93, 97, 165, 83, 175, 162 },   // left = d63
+      { 47, 47, 43, 114, 137, 181, 100, 99, 95 }   // left = tm
+  },
+  {
+      // above = d135
+      { 69, 23, 29, 128, 83, 199, 46, 44, 101 },   // left = dc
+      { 53, 40, 55, 139, 69, 183, 61, 80, 110 },   // left = v
+      { 40, 29, 19, 161, 180, 207, 43, 24, 91 },   // left = h
+      { 60, 34, 19, 105, 61, 198, 53, 64, 89 },    // left = d45
+      { 52, 31, 22, 158, 40, 209, 58, 62, 89 },    // left = d135
+      { 44, 31, 29, 147, 46, 158, 56, 102, 198 },  // left = d117
+      { 35, 19, 12, 135, 87, 209, 41, 45, 167 },   // left = d153
+      { 55, 25, 21, 118, 95, 215, 38, 39, 66 },    // left = d207
+      { 51, 38, 25, 113, 58, 164, 70, 93, 97 },    // left = d63
+      { 47, 54, 34, 146, 108, 203, 72, 103, 151 }  // left = tm
+  },
+  {
+      // above = d117
+      { 64, 19, 37, 156, 66, 138, 49, 95, 133 },   // left = dc
+      { 46, 27, 80, 150, 55, 124, 55, 121, 135 },  // left = v
+      { 36, 23, 27, 165, 149, 166, 54, 64, 118 },  // left = h
+      { 53, 21, 36, 131, 63, 163, 60, 109, 81 },   // left = d45
+      { 40, 26, 35, 154, 40, 185, 51, 97, 123 },   // left = d135
+      { 35, 19, 34, 179, 19, 97, 48, 129, 124 },   // left = d117
+      { 36, 20, 26, 136, 62, 164, 33, 77, 154 },   // left = d153
+      { 45, 18, 32, 130, 90, 157, 40, 79, 91 },    // left = d207
+      { 45, 26, 28, 129, 45, 129, 49, 147, 123 },  // left = d63
+      { 38, 44, 51, 136, 74, 162, 57, 97, 121 }    // left = tm
+  },
+  {
+      // above = d153
+      { 75, 17, 22, 136, 138, 185, 32, 34, 166 },  // left = dc
+      { 56, 39, 58, 133, 117, 173, 48, 53, 187 },  // left = v
+      { 35, 21, 12, 161, 212, 207, 20, 23, 145 },  // left = h
+      { 56, 29, 19, 117, 109, 181, 55, 68, 112 },  // left = d45
+      { 47, 29, 17, 153, 64, 220, 59, 51, 114 },   // left = d135
+      { 46, 16, 24, 136, 76, 147, 41, 64, 172 },   // left = d117
+      { 34, 17, 11, 108, 152, 187, 13, 15, 209 },  // left = d153
+      { 51, 24, 14, 115, 133, 209, 32, 26, 104 },  // left = d207
+      { 55, 30, 18, 122, 79, 179, 44, 88, 116 },   // left = d63
+      { 37, 49, 25, 129, 168, 164, 41, 54, 148 }   // left = tm
+  },
+  {
+      // above = d207
+      { 82, 22, 32, 127, 143, 213, 39, 41, 70 },   // left = dc
+      { 62, 44, 61, 123, 105, 189, 48, 57, 64 },   // left = v
+      { 47, 25, 17, 175, 222, 220, 24, 30, 86 },   // left = h
+      { 68, 36, 17, 106, 102, 206, 59, 74, 74 },   // left = d45
+      { 57, 39, 23, 151, 68, 216, 55, 63, 58 },    // left = d135
+      { 49, 30, 35, 141, 70, 168, 82, 40, 115 },   // left = d117
+      { 51, 25, 15, 136, 129, 202, 38, 35, 139 },  // left = d153
+      { 68, 26, 16, 111, 141, 215, 29, 28, 28 },   // left = d207
+      { 59, 39, 19, 114, 75, 180, 77, 104, 42 },   // left = d63
+      { 40, 61, 26, 126, 152, 206, 61, 59, 93 }    // left = tm
+  },
+  {
+      // above = d63
+      { 78, 23, 39, 111, 117, 170, 74, 124, 94 },   // left = dc
+      { 48, 34, 86, 101, 92, 146, 78, 179, 134 },   // left = v
+      { 47, 22, 24, 138, 187, 178, 68, 69, 59 },    // left = h
+      { 56, 25, 33, 105, 112, 187, 95, 177, 129 },  // left = d45
+      { 48, 31, 27, 114, 63, 183, 82, 116, 56 },    // left = d135
+      { 43, 28, 37, 121, 63, 123, 61, 192, 169 },   // left = d117
+      { 42, 17, 24, 109, 97, 177, 56, 76, 122 },    // left = d153
+      { 58, 18, 28, 105, 139, 182, 70, 92, 63 },    // left = d207
+      { 46, 23, 32, 74, 86, 150, 67, 183, 88 },     // left = d63
+      { 36, 38, 48, 92, 122, 165, 88, 137, 91 }     // left = tm
+  },
+  {
+      // above = tm
+      { 65, 70, 60, 155, 159, 199, 61, 60, 81 },    // left = dc
+      { 44, 78, 115, 132, 119, 173, 71, 112, 93 },  // left = v
+      { 39, 38, 21, 184, 227, 206, 42, 32, 64 },    // left = h
+      { 58, 47, 36, 124, 137, 193, 80, 82, 78 },    // left = d45
+      { 49, 50, 35, 144, 95, 205, 63, 78, 59 },     // left = d135
+      { 41, 53, 52, 148, 71, 142, 65, 128, 51 },    // left = d117
+      { 40, 36, 28, 143, 143, 202, 40, 55, 137 },   // left = d153
+      { 52, 34, 29, 129, 183, 227, 42, 35, 43 },    // left = d207
+      { 42, 44, 44, 104, 105, 164, 64, 130, 80 },   // left = d63
+      { 43, 81, 53, 140, 169, 204, 68, 84, 72 }     // left = tm
+  }
+};
+
+const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = {
+  { 144, 11, 54, 157, 195, 130, 46, 58, 108 },   // y = dc
+  { 118, 15, 123, 148, 131, 101, 44, 93, 131 },  // y = v
+  { 113, 12, 23, 188, 226, 142, 26, 32, 125 },   // y = h
+  { 120, 11, 50, 123, 163, 135, 64, 77, 103 },   // y = d45
+  { 113, 9, 36, 155, 111, 157, 32, 44, 161 },    // y = d135
+  { 116, 9, 55, 176, 76, 96, 37, 61, 149 },      // y = d117
+  { 115, 9, 28, 141, 161, 167, 21, 25, 193 },    // y = d153
+  { 120, 12, 32, 145, 195, 142, 32, 38, 86 },    // y = d207
+  { 116, 12, 64, 120, 140, 125, 49, 115, 121 },  // y = d63
+  { 102, 19, 66, 162, 182, 122, 35, 59, 128 }    // y = tm
+};
+
+static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = {
+  { 65, 32, 18, 144, 162, 194, 41, 51, 98 },   // block_size < 8x8
+  { 132, 68, 18, 165, 217, 196, 45, 40, 78 },  // block_size < 16x16
+  { 173, 80, 19, 176, 240, 193, 64, 35, 46 },  // block_size < 32x32
+  { 221, 135, 38, 194, 248, 121, 96, 85, 29 }  // block_size >= 32x32
+};
+
+static const vpx_prob default_if_uv_probs[INTRA_MODES][INTRA_MODES - 1] = {
+  { 120, 7, 76, 176, 208, 126, 28, 54, 103 },   // y = dc
+  { 48, 12, 154, 155, 139, 90, 34, 117, 119 },  // y = v
+  { 67, 6, 25, 204, 243, 158, 13, 21, 96 },     // y = h
+  { 97, 5, 44, 131, 176, 139, 48, 68, 97 },     // y = d45
+  { 83, 5, 42, 156, 111, 152, 26, 49, 152 },    // y = d135
+  { 80, 5, 58, 178, 74, 83, 33, 62, 145 },      // y = d117
+  { 86, 5, 32, 154, 192, 168, 14, 22, 163 },    // y = d153
+  { 85, 5, 32, 156, 216, 148, 19, 29, 73 },     // y = d207
+  { 77, 7, 64, 116, 132, 122, 37, 126, 120 },   // y = d63
+  { 101, 21, 107, 181, 192, 103, 19, 67, 125 }  // y = tm
+};
+
+const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS]
+                                     [PARTITION_TYPES - 1] = {
+                                       // 8x8 -> 4x4
+                                       { 158, 97, 94 },  // a/l both not split
+                                       { 93, 24, 99 },   // a split, l not split
+                                       { 85, 119, 44 },  // l split, a not split
+                                       { 62, 59, 67 },   // a/l both split
+
+                                       // 16x16 -> 8x8
+                                       { 149, 53, 53 },  // a/l both not split
+                                       { 94, 20, 48 },   // a split, l not split
+                                       { 83, 53, 24 },   // l split, a not split
+                                       { 52, 18, 18 },   // a/l both split
+
+                                       // 32x32 -> 16x16
+                                       { 150, 40, 39 },  // a/l both not split
+                                       { 78, 12, 26 },   // a split, l not split
+                                       { 67, 33, 11 },   // l split, a not split
+                                       { 24, 7, 5 },     // a/l both split
+
+                                       // 64x64 -> 32x32
+                                       { 174, 35, 49 },  // a/l both not split
+                                       { 68, 11, 27 },   // a split, l not split
+                                       { 57, 15, 9 },    // l split, a not split
+                                       { 12, 3, 3 },     // a/l both split
+                                     };
+
+static const vpx_prob
+    default_partition_probs[PARTITION_CONTEXTS][PARTITION_TYPES - 1] = {
+      // 8x8 -> 4x4
+      { 199, 122, 141 },  // a/l both not split
+      { 147, 63, 159 },   // a split, l not split
+      { 148, 133, 118 },  // l split, a not split
+      { 121, 104, 114 },  // a/l both split
+      // 16x16 -> 8x8
+      { 174, 73, 87 },  // a/l both not split
+      { 92, 41, 83 },   // a split, l not split
+      { 82, 99, 50 },   // l split, a not split
+      { 53, 39, 39 },   // a/l both split
+      // 32x32 -> 16x16
+      { 177, 58, 59 },  // a/l both not split
+      { 68, 26, 63 },   // a split, l not split
+      { 52, 79, 25 },   // l split, a not split
+      { 17, 14, 12 },   // a/l both split
+      // 64x64 -> 32x32
+      { 222, 34, 30 },  // a/l both not split
+      { 72, 16, 44 },   // a split, l not split
+      { 58, 32, 12 },   // l split, a not split
+      { 10, 7, 6 },     // a/l both split
+    };
+
+static const vpx_prob
+    default_inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1] = {
+      { 2, 173, 34 },  // 0 = both zero mv
+      { 7, 145, 85 },  // 1 = one zero mv + one a predicted mv
+      { 7, 166, 63 },  // 2 = two predicted mvs
+      { 7, 94, 66 },   // 3 = one predicted/zero and one new mv
+      { 8, 64, 46 },   // 4 = two new mvs
+      { 17, 81, 31 },  // 5 = one intra neighbour + x
+      { 25, 29, 30 },  // 6 = two intra neighbours
+    };
+
+/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
+const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
+  -DC_PRED,   2,          /* 0 = DC_NODE */
+  -TM_PRED,   4,          /* 1 = TM_NODE */
+  -V_PRED,    6,          /* 2 = V_NODE */
+  8,          12,         /* 3 = COM_NODE */
+  -H_PRED,    10,         /* 4 = H_NODE */
+  -D135_PRED, -D117_PRED, /* 5 = D135_NODE */
+  -D45_PRED,  14,         /* 6 = D45_NODE */
+  -D63_PRED,  16,         /* 7 = D63_NODE */
+  -D153_PRED, -D207_PRED  /* 8 = D153_NODE */
+};
+
+const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)] = {
+  -INTER_OFFSET(ZEROMV), 2, -INTER_OFFSET(NEARESTMV), 4, -INTER_OFFSET(NEARMV),
+  -INTER_OFFSET(NEWMV)
+};
+
+const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)] = {
+  -PARTITION_NONE, 2, -PARTITION_HORZ, 4, -PARTITION_VERT, -PARTITION_SPLIT
+};
+
+static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { 9, 102,
+                                                                      187,
+                                                                      225 };
+
+static const vpx_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = { 239, 183,
+                                                                    119, 96,
+                                                                    41 };
+
+static const vpx_prob default_comp_ref_p[REF_CONTEXTS] = { 50, 126, 123, 221,
+                                                           226 };
+
+static const vpx_prob default_single_ref_p[REF_CONTEXTS][2] = {
+  { 33, 16 }, { 77, 74 }, { 142, 142 }, { 172, 170 }, { 238, 247 }
+};
+
+static const struct tx_probs default_tx_probs = { { { 3, 136, 37 },
+                                                    { 5, 52, 13 } },
+
+                                                  { { 20, 152 }, { 15, 101 } },
+
+                                                  { { 100 }, { 66 } } };
+
+void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p,
+                                      unsigned int (*ct_32x32p)[2]) {
+  ct_32x32p[0][0] = tx_count_32x32p[TX_4X4];
+  ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] + tx_count_32x32p[TX_16X16] +
+                    tx_count_32x32p[TX_32X32];
+  ct_32x32p[1][0] = tx_count_32x32p[TX_8X8];
+  ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] + tx_count_32x32p[TX_32X32];
+  ct_32x32p[2][0] = tx_count_32x32p[TX_16X16];
+  ct_32x32p[2][1] = tx_count_32x32p[TX_32X32];
+}
+
+void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p,
+                                      unsigned int (*ct_16x16p)[2]) {
+  ct_16x16p[0][0] = tx_count_16x16p[TX_4X4];
+  ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16];
+  ct_16x16p[1][0] = tx_count_16x16p[TX_8X8];
+  ct_16x16p[1][1] = tx_count_16x16p[TX_16X16];
+}
+
+void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
+                                    unsigned int (*ct_8x8p)[2]) {
+  ct_8x8p[0][0] = tx_count_8x8p[TX_4X4];
+  ct_8x8p[0][1] = tx_count_8x8p[TX_8X8];
+}
+
+static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = { 192, 128, 64 };
+
+static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
+                                                    [SWITCHABLE_FILTERS - 1] = {
+                                                      { 235, 162 },
+                                                      { 36, 255 },
+                                                      { 34, 3 },
+                                                      { 149, 144 },
+                                                    };
+
+static void init_mode_probs(FRAME_CONTEXT *fc) {
+  vp9_copy(fc->uv_mode_prob, default_if_uv_probs);
+  vp9_copy(fc->y_mode_prob, default_if_y_probs);
+  vp9_copy(fc->switchable_interp_prob, default_switchable_interp_prob);
+  vp9_copy(fc->partition_prob, default_partition_probs);
+  vp9_copy(fc->intra_inter_prob, default_intra_inter_p);
+  vp9_copy(fc->comp_inter_prob, default_comp_inter_p);
+  vp9_copy(fc->comp_ref_prob, default_comp_ref_p);
+  vp9_copy(fc->single_ref_prob, default_single_ref_p);
+  fc->tx_probs = default_tx_probs;
+  vp9_copy(fc->skip_probs, default_skip_probs);
+  vp9_copy(fc->inter_mode_probs, default_inter_mode_probs);
+}
+
+const vpx_tree_index vp9_switchable_interp_tree[TREE_SIZE(
+    SWITCHABLE_FILTERS)] = { -EIGHTTAP, 2, -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP };
+
+void vp9_adapt_mode_probs(VP9_COMMON *cm) {
+  int i, j;
+  FRAME_CONTEXT *fc = cm->fc;
+  const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
+  const FRAME_COUNTS *counts = &cm->counts;
+
+  for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
+    fc->intra_inter_prob[i] = mode_mv_merge_probs(pre_fc->intra_inter_prob[i],
+                                                  counts->intra_inter[i]);
+  for (i = 0; i < COMP_INTER_CONTEXTS; i++)
+    fc->comp_inter_prob[i] =
+        mode_mv_merge_probs(pre_fc->comp_inter_prob[i], counts->comp_inter[i]);
+  for (i = 0; i < REF_CONTEXTS; i++)
+    fc->comp_ref_prob[i] =
+        mode_mv_merge_probs(pre_fc->comp_ref_prob[i], counts->comp_ref[i]);
+  for (i = 0; i < REF_CONTEXTS; i++)
+    for (j = 0; j < 2; j++)
+      fc->single_ref_prob[i][j] = mode_mv_merge_probs(
+          pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]);
+
+  for (i = 0; i < INTER_MODE_CONTEXTS; i++)
+    vpx_tree_merge_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i],
+                         counts->inter_mode[i], fc->inter_mode_probs[i]);
+
+  for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
+    vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i],
+                         counts->y_mode[i], fc->y_mode_prob[i]);
+
+  for (i = 0; i < INTRA_MODES; ++i)
+    vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i],
+                         counts->uv_mode[i], fc->uv_mode_prob[i]);
+
+  for (i = 0; i < PARTITION_CONTEXTS; i++)
+    vpx_tree_merge_probs(vp9_partition_tree, pre_fc->partition_prob[i],
+                         counts->partition[i], fc->partition_prob[i]);
+
+  if (cm->interp_filter == SWITCHABLE) {
+    for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
+      vpx_tree_merge_probs(
+          vp9_switchable_interp_tree, pre_fc->switchable_interp_prob[i],
+          counts->switchable_interp[i], fc->switchable_interp_prob[i]);
+  }
+
+  if (cm->tx_mode == TX_MODE_SELECT) {
+    unsigned int branch_ct_8x8p[TX_SIZES - 3][2];
+    unsigned int branch_ct_16x16p[TX_SIZES - 2][2];
+    unsigned int branch_ct_32x32p[TX_SIZES - 1][2];
+
+    for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
+      tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p);
+      for (j = 0; j < TX_SIZES - 3; ++j)
+        fc->tx_probs.p8x8[i][j] =
+            mode_mv_merge_probs(pre_fc->tx_probs.p8x8[i][j], branch_ct_8x8p[j]);
+
+      tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p);
+      for (j = 0; j < TX_SIZES - 2; ++j)
+        fc->tx_probs.p16x16[i][j] = mode_mv_merge_probs(
+            pre_fc->tx_probs.p16x16[i][j], branch_ct_16x16p[j]);
+
+      tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p);
+      for (j = 0; j < TX_SIZES - 1; ++j)
+        fc->tx_probs.p32x32[i][j] = mode_mv_merge_probs(
+            pre_fc->tx_probs.p32x32[i][j], branch_ct_32x32p[j]);
+    }
+  }
+
+  for (i = 0; i < SKIP_CONTEXTS; ++i)
+    fc->skip_probs[i] =
+        mode_mv_merge_probs(pre_fc->skip_probs[i], counts->skip[i]);
+}
+
+static void set_default_lf_deltas(struct loopfilter *lf) {
+  lf->mode_ref_delta_enabled = 1;
+  lf->mode_ref_delta_update = 1;
+
+  lf->ref_deltas[INTRA_FRAME] = 1;
+  lf->ref_deltas[LAST_FRAME] = 0;
+  lf->ref_deltas[GOLDEN_FRAME] = -1;
+  lf->ref_deltas[ALTREF_FRAME] = -1;
+
+  lf->mode_deltas[0] = 0;
+  lf->mode_deltas[1] = 0;
+}
+
+void vp9_setup_past_independence(VP9_COMMON *cm) {
+  // Reset the segment feature data to the default stats:
+  // Features disabled, 0, with delta coding (Default state).
+  struct loopfilter *const lf = &cm->lf;
+
+  int i;
+  vp9_clearall_segfeatures(&cm->seg);
+  cm->seg.abs_delta = SEGMENT_DELTADATA;
+
+  if (cm->last_frame_seg_map)
+    memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
+
+  if (cm->current_frame_seg_map)
+    memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
+
+  // Reset the mode ref deltas for loop filter
+  vp9_zero(lf->last_ref_deltas);
+  vp9_zero(lf->last_mode_deltas);
+  set_default_lf_deltas(lf);
+
+  // To force update of the sharpness
+  lf->last_sharpness_level = -1;
+
+  vp9_default_coef_probs(cm);
+  init_mode_probs(cm->fc);
+  vp9_init_mv_probs(cm);
+  cm->fc->initialized = 1;
+
+  if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
+      cm->reset_frame_context == 3) {
+    // Reset all frame contexts.
+    for (i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc;
+  } else if (cm->reset_frame_context == 2) {
+    // Reset only the frame context specified in the frame header.
+    cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+  }
+
+  // prev_mip will only be allocated in encoder.
+  if (frame_is_intra_only(cm) && cm->prev_mip)
+    memset(cm->prev_mip, 0,
+           cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip));
+
+  vp9_zero(cm->ref_frame_sign_bias);
+
+  cm->frame_context_idx = 0;
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_entropymode.h b/media/libvpx/libvpx/vp9/common/vp9_entropymode.h
new file mode 100644
index 0000000000..a756c8d0b8
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_entropymode.h
@@ -0,0 +1,107 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_ENTROPYMODE_H_
+#define VPX_VP9_COMMON_VP9_ENTROPYMODE_H_
+
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_entropymv.h"
+#include "vp9/common/vp9_filter.h"
+#include "vpx_dsp/vpx_filter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BLOCK_SIZE_GROUPS 4
+
+#define TX_SIZE_CONTEXTS 2
+
+#define INTER_OFFSET(mode) ((mode)-NEARESTMV)
+
+struct VP9Common;
+
+struct tx_probs {
+  vpx_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1];
+  vpx_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2];
+  vpx_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3];
+};
+
+struct tx_counts {
+  unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZES];
+  unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 1];
+  unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2];
+  unsigned int tx_totals[TX_SIZES];
+};
+
+typedef struct frame_contexts {
+  vpx_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
+  vpx_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
+  vpx_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1];
+  vp9_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES];
+  vpx_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
+                                 [SWITCHABLE_FILTERS - 1];
+  vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
+  vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
+  vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS];
+  vpx_prob single_ref_prob[REF_CONTEXTS][2];
+  vpx_prob comp_ref_prob[REF_CONTEXTS];
+  struct tx_probs tx_probs;
+  vpx_prob skip_probs[SKIP_CONTEXTS];
+  nmv_context nmvc;
+  int initialized;
+} FRAME_CONTEXT;
+
+typedef struct FRAME_COUNTS {
+  unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
+  unsigned int uv_mode[INTRA_MODES][INTRA_MODES];
+  unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES];
+  vp9_coeff_count_model coef[TX_SIZES][PLANE_TYPES];
+  unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES][COEF_BANDS]
+                         [COEFF_CONTEXTS];
+  unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS]
+                                [SWITCHABLE_FILTERS];
+  unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES];
+  unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
+  unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
+  unsigned int single_ref[REF_CONTEXTS][2][2];
+  unsigned int comp_ref[REF_CONTEXTS][2];
+  struct tx_counts tx;
+  unsigned int skip[SKIP_CONTEXTS][2];
+  nmv_context_counts mv;
+} FRAME_COUNTS;
+
+extern const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
+extern const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
+                                        [INTRA_MODES - 1];
+extern const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS]
+                                            [PARTITION_TYPES - 1];
+extern const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)];
+extern const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)];
+extern const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)];
+extern const vpx_tree_index
+    vp9_switchable_interp_tree[TREE_SIZE(SWITCHABLE_FILTERS)];
+
+void vp9_setup_past_independence(struct VP9Common *cm);
+
+void vp9_adapt_mode_probs(struct VP9Common *cm);
+
+void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p,
+                                      unsigned int (*ct_32x32p)[2]);
+void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p,
+                                      unsigned int (*ct_16x16p)[2]);
+void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
+                                    unsigned int (*ct_8x8p)[2]);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_ENTROPYMODE_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_entropymv.c b/media/libvpx/libvpx/vp9/common/vp9_entropymv.c
new file mode 100644
index 0000000000..b6f052d088
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_entropymv.c
@@ -0,0 +1,191 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_entropymv.h"
+
+const vpx_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = {
+  -MV_JOINT_ZERO, 2, -MV_JOINT_HNZVZ, 4, -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ
+};
+
+const vpx_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = {
+  -MV_CLASS_0, 2,           -MV_CLASS_1, 4,           6,
+  8,           -MV_CLASS_2, -MV_CLASS_3, 10,          12,
+  -MV_CLASS_4, -MV_CLASS_5, -MV_CLASS_6, 14,          16,
+  18,          -MV_CLASS_7, -MV_CLASS_8, -MV_CLASS_9, -MV_CLASS_10,
+};
+
+const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { -0, -1 };
+
+const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2,  -1,
+                                                               4,  -2, -3 };
+
+static const nmv_context default_nmv_context = {
+  { 32, 64, 96 },
+  { {
+        // Vertical component
+        128,                                                   // sign
+        { 224, 144, 192, 168, 192, 176, 192, 198, 198, 245 },  // class
+        { 216 },                                               // class0
+        { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 },  // bits
+        { { 128, 128, 64 }, { 96, 112, 64 } },                 // class0_fp
+        { 64, 96, 64 },                                        // fp
+        160,                                                   // class0_hp bit
+        128,                                                   // hp
+    },
+    {
+        // Horizontal component
+        128,                                                   // sign
+        { 216, 128, 176, 160, 176, 176, 192, 198, 198, 208 },  // class
+        { 208 },                                               // class0
+        { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 },  // bits
+        { { 128, 128, 64 }, { 96, 112, 64 } },                 // class0_fp
+        { 64, 96, 64 },                                        // fp
+        160,                                                   // class0_hp bit
+        128,                                                   // hp
+    } },
+};
+
+static const uint8_t log_in_base_2[] = {
+  0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
+};
+
+static INLINE int mv_class_base(MV_CLASS_TYPE c) {
+  return c ? CLASS0_SIZE << (c + 2) : 0;
+}
+
+MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) {
+  const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096)
+                              ? MV_CLASS_10
+                              : (MV_CLASS_TYPE)log_in_base_2[z >> 3];
+  if (offset) *offset = z - mv_class_base(c);
+  return c;
+}
+
+static void inc_mv_component(int v, nmv_component_counts *comp_counts, int incr,
+                             int usehp) {
+  int s, z, c, o, d, e, f;
+  assert(v != 0); /* should not be zero */
+  s = v < 0;
+  comp_counts->sign[s] += incr;
+  z = (s ? -v : v) - 1; /* magnitude - 1 */
+
+  c = vp9_get_mv_class(z, &o);
+  comp_counts->classes[c] += incr;
+
+  d = (o >> 3);     /* int mv data */
+  f = (o >> 1) & 3; /* fractional pel mv data */
+  e = (o & 1);      /* high precision mv data */
+
+  if (c == MV_CLASS_0) {
+    comp_counts->class0[d] += incr;
+    comp_counts->class0_fp[d][f] += incr;
+    comp_counts->class0_hp[e] += usehp * incr;
+  } else {
+    int i;
+    int b = c + CLASS0_BITS - 1;  // number of bits
+    for (i = 0; i < b; ++i) comp_counts->bits[i][((d >> i) & 1)] += incr;
+    comp_counts->fp[f] += incr;
+    comp_counts->hp[e] += usehp * incr;
+  }
+}
+
+void vp9_inc_mv(const MV *mv, nmv_context_counts *counts) {
+  if (counts != NULL) {
+    const MV_JOINT_TYPE j = vp9_get_mv_joint(mv);
+    ++counts->joints[j];
+
+    if (mv_joint_vertical(j)) {
+      inc_mv_component(mv->row, &counts->comps[0], 1, 1);
+    }
+
+    if (mv_joint_horizontal(j)) {
+      inc_mv_component(mv->col, &counts->comps[1], 1, 1);
+    }
+  }
+}
+
+void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
+  int i, j;
+
+  nmv_context *fc = &cm->fc->nmvc;
+  const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc;
+  const nmv_context_counts *counts = &cm->counts.mv;
+
+  vpx_tree_merge_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints,
+                       fc->joints);
+
+  for (i = 0; i < 2; ++i) {
+    nmv_component *comp = &fc->comps[i];
+    const nmv_component *pre_comp = &pre_fc->comps[i];
+    const nmv_component_counts *c = &counts->comps[i];
+
+    comp->sign = mode_mv_merge_probs(pre_comp->sign, c->sign);
+    vpx_tree_merge_probs(vp9_mv_class_tree, pre_comp->classes, c->classes,
+                         comp->classes);
+    vpx_tree_merge_probs(vp9_mv_class0_tree, pre_comp->class0, c->class0,
+                         comp->class0);
+
+    for (j = 0; j < MV_OFFSET_BITS; ++j)
+      comp->bits[j] = mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]);
+
+    for (j = 0; j < CLASS0_SIZE; ++j)
+      vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->class0_fp[j],
+                           c->class0_fp[j], comp->class0_fp[j]);
+
+    vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
+
+    if (allow_hp) {
+      comp->class0_hp = mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp);
+      comp->hp = mode_mv_merge_probs(pre_comp->hp, c->hp);
+    }
+  }
+}
+
+void vp9_init_mv_probs(VP9_COMMON *cm) { cm->fc->nmvc = default_nmv_context; }
diff --git a/media/libvpx/libvpx/vp9/common/vp9_entropymv.h b/media/libvpx/libvpx/vp9/common/vp9_entropymv.h
new file mode 100644
index 0000000000..ee9d37973f
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_entropymv.h
@@ -0,0 +1,136 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_ENTROPYMV_H_
+#define VPX_VP9_COMMON_VP9_ENTROPYMV_H_
+
+#include "./vpx_config.h"
+
+#include "vpx_dsp/prob.h"
+
+#include "vp9/common/vp9_mv.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP9Common;
+
+void vp9_init_mv_probs(struct VP9Common *cm);
+
+void vp9_adapt_mv_probs(struct VP9Common *cm, int allow_hp);
+
+static INLINE int use_mv_hp(const MV *ref) {
+  const int kMvRefThresh = 64;  // threshold for use of high-precision 1/8 mv
+  return abs(ref->row) < kMvRefThresh && abs(ref->col) < kMvRefThresh;
+}
+
+#define MV_UPDATE_PROB 252
+
+/* Symbols for coding which components are zero jointly */
+#define MV_JOINTS 4
+typedef enum {
+  MV_JOINT_ZERO = 0,   /* Zero vector */
+  MV_JOINT_HNZVZ = 1,  /* Vert zero, hor nonzero */
+  MV_JOINT_HZVNZ = 2,  /* Hor zero, vert nonzero */
+  MV_JOINT_HNZVNZ = 3, /* Both components nonzero */
+} MV_JOINT_TYPE;
+
+static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) {
+  return type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ;
+}
+
+static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) {
+  return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ;
+}
+
+/* Symbols for coding magnitude class of nonzero components */
+#define MV_CLASSES 11
+typedef enum {
+  MV_CLASS_0 = 0,   /* (0, 2]     integer pel */
+  MV_CLASS_1 = 1,   /* (2, 4]     integer pel */
+  MV_CLASS_2 = 2,   /* (4, 8]     integer pel */
+  MV_CLASS_3 = 3,   /* (8, 16]    integer pel */
+  MV_CLASS_4 = 4,   /* (16, 32]   integer pel */
+  MV_CLASS_5 = 5,   /* (32, 64]   integer pel */
+  MV_CLASS_6 = 6,   /* (64, 128]  integer pel */
+  MV_CLASS_7 = 7,   /* (128, 256] integer pel */
+  MV_CLASS_8 = 8,   /* (256, 512] integer pel */
+  MV_CLASS_9 = 9,   /* (512, 1024] integer pel */
+  MV_CLASS_10 = 10, /* (1024,2048] integer pel */
+} MV_CLASS_TYPE;
+
+#define CLASS0_BITS 1 /* bits at integer precision for class 0 */
+#define CLASS0_SIZE (1 << CLASS0_BITS)
+#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
+#define MV_FP_SIZE 4
+
+#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2)
+#define MV_MAX ((1 << MV_MAX_BITS) - 1)
+#define MV_VALS ((MV_MAX << 1) + 1)
+
+#define MV_IN_USE_BITS 14
+#define MV_UPP ((1 << MV_IN_USE_BITS) - 1)
+#define MV_LOW (-(1 << MV_IN_USE_BITS))
+
+extern const vpx_tree_index vp9_mv_joint_tree[];
+extern const vpx_tree_index vp9_mv_class_tree[];
+extern const vpx_tree_index vp9_mv_class0_tree[];
+extern const vpx_tree_index vp9_mv_fp_tree[];
+
+typedef struct {
+  vpx_prob sign;
+  vpx_prob classes[MV_CLASSES - 1];
+  vpx_prob class0[CLASS0_SIZE - 1];
+  vpx_prob bits[MV_OFFSET_BITS];
+  vpx_prob class0_fp[CLASS0_SIZE][MV_FP_SIZE - 1];
+  vpx_prob fp[MV_FP_SIZE - 1];
+  vpx_prob class0_hp;
+  vpx_prob hp;
+} nmv_component;
+
+typedef struct {
+  vpx_prob joints[MV_JOINTS - 1];
+  nmv_component comps[2];
+} nmv_context;
+
+static INLINE MV_JOINT_TYPE vp9_get_mv_joint(const MV *mv) {
+  if (mv->row == 0) {
+    return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ;
+  } else {
+    return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ;
+  }
+}
+
+MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset);
+
+typedef struct {
+  unsigned int sign[2];
+  unsigned int classes[MV_CLASSES];
+  unsigned int class0[CLASS0_SIZE];
+  unsigned int bits[MV_OFFSET_BITS][2];
+  unsigned int class0_fp[CLASS0_SIZE][MV_FP_SIZE];
+  unsigned int fp[MV_FP_SIZE];
+  unsigned int class0_hp[2];
+  unsigned int hp[2];
+} nmv_component_counts;
+
+typedef struct {
+  unsigned int joints[MV_JOINTS];
+  nmv_component_counts comps[2];
+} nmv_context_counts;
+
+void vp9_inc_mv(const MV *mv, nmv_context_counts *counts);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_ENTROPYMV_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_enums.h b/media/libvpx/libvpx/vp9/common/vp9_enums.h
new file mode 100644
index 0000000000..b33a3a2978
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_enums.h
@@ -0,0 +1,145 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_ENUMS_H_
+#define VPX_VP9_COMMON_VP9_ENUMS_H_
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MI_SIZE_LOG2 3
+#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2)  // 64 = 2^6
+
+#define MI_SIZE (1 << MI_SIZE_LOG2)              // pixels per mi-unit
+#define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2)  // mi-units per max block
+
+#define MI_MASK (MI_BLOCK_SIZE - 1)
+
+// Bitstream profiles indicated by 2-3 bits in the uncompressed header.
+// 00: Profile 0.  8-bit 4:2:0 only.
+// 10: Profile 1.  8-bit 4:4:4, 4:2:2, and 4:4:0.
+// 01: Profile 2.  10-bit and 12-bit color only, with 4:2:0 sampling.
+// 110: Profile 3. 10-bit and 12-bit color only, with 4:2:2/4:4:4/4:4:0
+//                 sampling.
+// 111: Undefined profile.
+typedef enum BITSTREAM_PROFILE {
+  PROFILE_0,
+  PROFILE_1,
+  PROFILE_2,
+  PROFILE_3,
+  MAX_PROFILES
+} BITSTREAM_PROFILE;
+
+typedef enum PARSE_RECON_FLAG { PARSE = 1, RECON = 2 } PARSE_RECON_FLAG;
+
+#define BLOCK_4X4 0
+#define BLOCK_4X8 1
+#define BLOCK_8X4 2
+#define BLOCK_8X8 3
+#define BLOCK_8X16 4
+#define BLOCK_16X8 5
+#define BLOCK_16X16 6
+#define BLOCK_16X32 7
+#define BLOCK_32X16 8
+#define BLOCK_32X32 9
+#define BLOCK_32X64 10
+#define BLOCK_64X32 11
+#define BLOCK_64X64 12
+#define BLOCK_SIZES 13
+#define BLOCK_INVALID BLOCK_SIZES
+typedef uint8_t BLOCK_SIZE;
+
+typedef enum PARTITION_TYPE {
+  PARTITION_NONE,
+  PARTITION_HORZ,
+  PARTITION_VERT,
+  PARTITION_SPLIT,
+  PARTITION_TYPES,
+  PARTITION_INVALID = PARTITION_TYPES
+} PARTITION_TYPE;
+
+typedef char PARTITION_CONTEXT;
+#define PARTITION_PLOFFSET 4  // number of probability models per block size
+#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
+
+// block transform size
+typedef uint8_t TX_SIZE;
+#define TX_4X4 ((TX_SIZE)0)    // 4x4 transform
+#define TX_8X8 ((TX_SIZE)1)    // 8x8 transform
+#define TX_16X16 ((TX_SIZE)2)  // 16x16 transform
+#define TX_32X32 ((TX_SIZE)3)  // 32x32 transform
+#define TX_SIZES ((TX_SIZE)4)
+
+// frame transform mode
+typedef enum {
+  ONLY_4X4 = 0,        // only 4x4 transform used
+  ALLOW_8X8 = 1,       // allow block transform size up to 8x8
+  ALLOW_16X16 = 2,     // allow block transform size up to 16x16
+  ALLOW_32X32 = 3,     // allow block transform size up to 32x32
+  TX_MODE_SELECT = 4,  // transform specified for each block
+  TX_MODES = 5,
+} TX_MODE;
+
+typedef enum {
+  DCT_DCT = 0,    // DCT  in both horizontal and vertical
+  ADST_DCT = 1,   // ADST in vertical, DCT in horizontal
+  DCT_ADST = 2,   // DCT  in vertical, ADST in horizontal
+  ADST_ADST = 3,  // ADST in both directions
+  TX_TYPES = 4
+} TX_TYPE;
+
+typedef enum {
+  VP9_LAST_FLAG = 1 << 0,
+  VP9_GOLD_FLAG = 1 << 1,
+  VP9_ALT_FLAG = 1 << 2,
+} VP9_REFFRAME;
+
+typedef enum { PLANE_TYPE_Y = 0, PLANE_TYPE_UV = 1, PLANE_TYPES } PLANE_TYPE;
+
+#define DC_PRED 0    // Average of above and left pixels
+#define V_PRED 1     // Vertical
+#define H_PRED 2     // Horizontal
+#define D45_PRED 3   // Directional 45  deg = round(arctan(1/1) * 180/pi)
+#define D135_PRED 4  // Directional 135 deg = 180 - 45
+#define D117_PRED 5  // Directional 117 deg = 180 - 63
+#define D153_PRED 6  // Directional 153 deg = 180 - 27
+#define D207_PRED 7  // Directional 207 deg = 180 + 27
+#define D63_PRED 8   // Directional 63  deg = round(arctan(2/1) * 180/pi)
+#define TM_PRED 9    // True-motion
+#define NEARESTMV 10
+#define NEARMV 11
+#define ZEROMV 12
+#define NEWMV 13
+#define MB_MODE_COUNT 14
+typedef uint8_t PREDICTION_MODE;
+
+#define INTRA_MODES (TM_PRED + 1)
+
+#define INTER_MODES (1 + NEWMV - NEARESTMV)
+
+#define SKIP_CONTEXTS 3
+#define INTER_MODE_CONTEXTS 7
+
+/* Segment Feature Masks */
+#define MAX_MV_REF_CANDIDATES 2
+
+#define INTRA_INTER_CONTEXTS 4
+#define COMP_INTER_CONTEXTS 5
+#define REF_CONTEXTS 5
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_ENUMS_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_filter.c b/media/libvpx/libvpx/vp9/common/vp9_filter.c
new file mode 100644
index 0000000000..adbda6c825
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_filter.c
@@ -0,0 +1,82 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "vp9/common/vp9_filter.h"
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+                bilinear_filters[SUBPEL_SHIFTS]) = {
+  { 0, 0, 0, 128, 0, 0, 0, 0 },  { 0, 0, 0, 120, 8, 0, 0, 0 },
+  { 0, 0, 0, 112, 16, 0, 0, 0 }, { 0, 0, 0, 104, 24, 0, 0, 0 },
+  { 0, 0, 0, 96, 32, 0, 0, 0 },  { 0, 0, 0, 88, 40, 0, 0, 0 },
+  { 0, 0, 0, 80, 48, 0, 0, 0 },  { 0, 0, 0, 72, 56, 0, 0, 0 },
+  { 0, 0, 0, 64, 64, 0, 0, 0 },  { 0, 0, 0, 56, 72, 0, 0, 0 },
+  { 0, 0, 0, 48, 80, 0, 0, 0 },  { 0, 0, 0, 40, 88, 0, 0, 0 },
+  { 0, 0, 0, 32, 96, 0, 0, 0 },  { 0, 0, 0, 24, 104, 0, 0, 0 },
+  { 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 }
+};
+
+// Lagrangian interpolation filter
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8[SUBPEL_SHIFTS]) = {
+  { 0, 0, 0, 128, 0, 0, 0, 0 },        { 0, 1, -5, 126, 8, -3, 1, 0 },
+  { -1, 3, -10, 122, 18, -6, 2, 0 },   { -1, 4, -13, 118, 27, -9, 3, -1 },
+  { -1, 4, -16, 112, 37, -11, 4, -1 }, { -1, 5, -18, 105, 48, -14, 4, -1 },
+  { -1, 5, -19, 97, 58, -16, 5, -1 },  { -1, 6, -19, 88, 68, -18, 5, -1 },
+  { -1, 6, -19, 78, 78, -19, 6, -1 },  { -1, 5, -18, 68, 88, -19, 6, -1 },
+  { -1, 5, -16, 58, 97, -19, 5, -1 },  { -1, 4, -14, 48, 105, -18, 5, -1 },
+  { -1, 4, -11, 37, 112, -16, 4, -1 }, { -1, 3, -9, 27, 118, -13, 4, -1 },
+  { 0, 2, -6, 18, 122, -10, 3, -1 },   { 0, 1, -3, 8, 126, -5, 1, 0 }
+};
+
+// DCT based filter
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
+  { 0, 0, 0, 128, 0, 0, 0, 0 },         { -1, 3, -7, 127, 8, -3, 1, 0 },
+  { -2, 5, -13, 125, 17, -6, 3, -1 },   { -3, 7, -17, 121, 27, -10, 5, -2 },
+  { -4, 9, -20, 115, 37, -13, 6, -2 },  { -4, 10, -23, 108, 48, -16, 8, -3 },
+  { -4, 10, -24, 100, 59, -19, 9, -3 }, { -4, 11, -24, 90, 70, -21, 10, -4 },
+  { -4, 11, -23, 80, 80, -23, 11, -4 }, { -4, 10, -21, 70, 90, -24, 11, -4 },
+  { -3, 9, -19, 59, 100, -24, 10, -4 }, { -3, 8, -16, 48, 108, -23, 10, -4 },
+  { -2, 6, -13, 37, 115, -20, 9, -4 },  { -2, 5, -10, 27, 121, -17, 7, -3 },
+  { -1, 3, -6, 17, 125, -13, 5, -2 },   { 0, 1, -3, 8, 127, -7, 3, -1 }
+};
+
+// freqmultiplier = 0.5
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
+  { 0, 0, 0, 128, 0, 0, 0, 0 },       { -3, -1, 32, 64, 38, 1, -3, 0 },
+  { -2, -2, 29, 63, 41, 2, -3, 0 },   { -2, -2, 26, 63, 43, 4, -4, 0 },
+  { -2, -3, 24, 62, 46, 5, -4, 0 },   { -2, -3, 21, 60, 49, 7, -4, 0 },
+  { -1, -4, 18, 59, 51, 9, -4, 0 },   { -1, -4, 16, 57, 53, 12, -4, -1 },
+  { -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 12, 53, 57, 16, -4, -1 },
+  { 0, -4, 9, 51, 59, 18, -4, -1 },   { 0, -4, 7, 49, 60, 21, -3, -2 },
+  { 0, -4, 5, 46, 62, 24, -3, -2 },   { 0, -4, 4, 43, 63, 26, -2, -2 },
+  { 0, -3, 2, 41, 63, 29, -2, -2 },   { 0, -3, 1, 38, 64, 32, -1, -3 }
+};
+
+// 4-tap filter
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_4[SUBPEL_SHIFTS]) = {
+  { 0, 0, 0, 128, 0, 0, 0, 0 },     { 0, 0, -4, 126, 8, -2, 0, 0 },
+  { 0, 0, -6, 120, 18, -4, 0, 0 },  { 0, 0, -8, 114, 28, -6, 0, 0 },
+  { 0, 0, -10, 108, 36, -6, 0, 0 }, { 0, 0, -12, 102, 46, -8, 0, 0 },
+  { 0, 0, -12, 94, 56, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 },
+  { 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 },
+  { 0, 0, -10, 56, 94, -12, 0, 0 }, { 0, 0, -8, 46, 102, -12, 0, 0 },
+  { 0, 0, -6, 36, 108, -10, 0, 0 }, { 0, 0, -6, 28, 114, -8, 0, 0 },
+  { 0, 0, -4, 18, 120, -6, 0, 0 },  { 0, 0, -2, 8, 126, -4, 0, 0 }
+};
+
+const InterpKernel *vp9_filter_kernels[5] = {
+  sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters,
+  sub_pel_filters_4
+};
diff --git a/media/libvpx/libvpx/vp9/common/vp9_filter.h b/media/libvpx/libvpx/vp9/common/vp9_filter.h
new file mode 100644
index 0000000000..0382c88e7c
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_filter.h
@@ -0,0 +1,42 @@
+/*
+ *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_FILTER_H_
+#define VPX_VP9_COMMON_VP9_FILTER_H_
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_filter.h"
+#include "vpx_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define EIGHTTAP 0
+#define EIGHTTAP_SMOOTH 1
+#define EIGHTTAP_SHARP 2
+#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */
+#define BILINEAR 3
+#define FOURTAP 4
+// The codec can operate in four possible inter prediction filter mode:
+// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
+#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
+#define SWITCHABLE 4 /* should be the last one */
+
+typedef uint8_t INTERP_FILTER;
+
+extern const InterpKernel *vp9_filter_kernels[5];
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_FILTER_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_frame_buffers.c b/media/libvpx/libvpx/vp9/common/vp9_frame_buffers.c
new file mode 100644
index 0000000000..889b809e50
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_frame_buffers.c
@@ -0,0 +1,82 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "vp9/common/vp9_frame_buffers.h"
+#include "vpx_mem/vpx_mem.h"
+
+int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list) {
+  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
+  assert(list != NULL);
+  vp9_free_internal_frame_buffers(list);
+
+  list->int_fb =
+      (InternalFrameBuffer *)vpx_calloc(num_buffers, sizeof(*list->int_fb));
+  if (list->int_fb) {
+    list->num_internal_frame_buffers = num_buffers;
+    return 0;
+  }
+  return -1;
+}
+
+void vp9_free_internal_frame_buffers(InternalFrameBufferList *list) {
+  int i;
+
+  assert(list != NULL);
+
+  for (i = 0; i < list->num_internal_frame_buffers; ++i) {
+    vpx_free(list->int_fb[i].data);
+    list->int_fb[i].data = NULL;
+  }
+  vpx_free(list->int_fb);
+  list->int_fb = NULL;
+  list->num_internal_frame_buffers = 0;
+}
+
+int vp9_get_frame_buffer(void *cb_priv, size_t min_size,
+                         vpx_codec_frame_buffer_t *fb) {
+  int i;
+  InternalFrameBufferList *const int_fb_list =
+      (InternalFrameBufferList *)cb_priv;
+  if (int_fb_list == NULL) return -1;
+
+  // Find a free frame buffer.
+  for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) {
+    if (!int_fb_list->int_fb[i].in_use) break;
+  }
+
+  if (i == int_fb_list->num_internal_frame_buffers) return -1;
+
+  if (int_fb_list->int_fb[i].size < min_size) {
+    vpx_free(int_fb_list->int_fb[i].data);
+    // The data must be zeroed to fix a valgrind error from the C loop filter
+    // due to access uninitialized memory in frame border. It could be
+    // skipped if border were totally removed.
+    int_fb_list->int_fb[i].data = (uint8_t *)vpx_calloc(1, min_size);
+    if (!int_fb_list->int_fb[i].data) return -1;
+    int_fb_list->int_fb[i].size = min_size;
+  }
+
+  fb->data = int_fb_list->int_fb[i].data;
+  fb->size = int_fb_list->int_fb[i].size;
+  int_fb_list->int_fb[i].in_use = 1;
+
+  // Set the frame buffer's private data to point at the internal frame buffer.
+  fb->priv = &int_fb_list->int_fb[i];
+  return 0;
+}
+
+int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) {
+  InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv;
+  (void)cb_priv;
+  if (int_fb) int_fb->in_use = 0;
+  return 0;
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_frame_buffers.h b/media/libvpx/libvpx/vp9/common/vp9_frame_buffers.h
new file mode 100644
index 0000000000..11be838c02
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_frame_buffers.h
@@ -0,0 +1,53 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_FRAME_BUFFERS_H_
+#define VPX_VP9_COMMON_VP9_FRAME_BUFFERS_H_
+
+#include "vpx/vpx_frame_buffer.h"
+#include "vpx/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct InternalFrameBuffer {
+  uint8_t *data;
+  size_t size;
+  int in_use;
+} InternalFrameBuffer;
+
+typedef struct InternalFrameBufferList {
+  int num_internal_frame_buffers;
+  InternalFrameBuffer *int_fb;
+} InternalFrameBufferList;
+
+// Initializes |list|. Returns 0 on success.
+int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list);
+
+// Free any data allocated to the frame buffers.
+void vp9_free_internal_frame_buffers(InternalFrameBufferList *list);
+
+// Callback used by libvpx to request an external frame buffer. |cb_priv|
+// Callback private data, which points to an InternalFrameBufferList.
+// |min_size| is the minimum size in bytes needed to decode the next frame.
+// |fb| pointer to the frame buffer.
+int vp9_get_frame_buffer(void *cb_priv, size_t min_size,
+                         vpx_codec_frame_buffer_t *fb);
+
+// Callback used by libvpx when there are no references to the frame buffer.
+// |cb_priv| is not used. |fb| pointer to the frame buffer.
+int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_FRAME_BUFFERS_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_idct.c b/media/libvpx/libvpx/vp9/common/vp9_idct.c
new file mode 100644
index 0000000000..71be0f310d
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_idct.c
@@ -0,0 +1,398 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/inv_txfm.h"
+#include "vpx_ports/mem.h"
+
+void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                         int tx_type) {
+  const transform_2d IHT_4[] = {
+    { idct4_c, idct4_c },   // DCT_DCT  = 0
+    { iadst4_c, idct4_c },  // ADST_DCT = 1
+    { idct4_c, iadst4_c },  // DCT_ADST = 2
+    { iadst4_c, iadst4_c }  // ADST_ADST = 3
+  };
+
+  int i, j;
+  tran_low_t out[4 * 4];
+  tran_low_t *outptr = out;
+  tran_low_t temp_in[4], temp_out[4];
+
+  // inverse transform row vectors
+  for (i = 0; i < 4; ++i) {
+    IHT_4[tx_type].rows(input, outptr);
+    input += 4;
+    outptr += 4;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 4; ++i) {
+    for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
+    IHT_4[tx_type].cols(temp_in, temp_out);
+    for (j = 0; j < 4; ++j) {
+      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+                                            ROUND_POWER_OF_TWO(temp_out[j], 4));
+    }
+  }
+}
+
+static const transform_2d IHT_8[] = {
+  { idct8_c, idct8_c },   // DCT_DCT  = 0
+  { iadst8_c, idct8_c },  // ADST_DCT = 1
+  { idct8_c, iadst8_c },  // DCT_ADST = 2
+  { iadst8_c, iadst8_c }  // ADST_ADST = 3
+};
+
+void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                         int tx_type) {
+  int i, j;
+  tran_low_t out[8 * 8];
+  tran_low_t *outptr = out;
+  tran_low_t temp_in[8], temp_out[8];
+  const transform_2d ht = IHT_8[tx_type];
+
+  // inverse transform row vectors
+  for (i = 0; i < 8; ++i) {
+    ht.rows(input, outptr);
+    input += 8;
+    outptr += 8;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 8; ++i) {
+    for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+    ht.cols(temp_in, temp_out);
+    for (j = 0; j < 8; ++j) {
+      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+                                            ROUND_POWER_OF_TWO(temp_out[j], 5));
+    }
+  }
+}
+
+static const transform_2d IHT_16[] = {
+  { idct16_c, idct16_c },   // DCT_DCT  = 0
+  { iadst16_c, idct16_c },  // ADST_DCT = 1
+  { idct16_c, iadst16_c },  // DCT_ADST = 2
+  { iadst16_c, iadst16_c }  // ADST_ADST = 3
+};
+
+void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                            int tx_type) {
+  int i, j;
+  tran_low_t out[16 * 16];
+  tran_low_t *outptr = out;
+  tran_low_t temp_in[16], temp_out[16];
+  const transform_2d ht = IHT_16[tx_type];
+
+  // Rows
+  for (i = 0; i < 16; ++i) {
+    ht.rows(input, outptr);
+    input += 16;
+    outptr += 16;
+  }
+
+  // Columns
+  for (i = 0; i < 16; ++i) {
+    for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+    ht.cols(temp_in, temp_out);
+    for (j = 0; j < 16; ++j) {
+      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+                                            ROUND_POWER_OF_TWO(temp_out[j], 6));
+    }
+  }
+}
+
+// idct
+void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+                     int eob) {
+  if (eob > 1)
+    vpx_idct4x4_16_add(input, dest, stride);
+  else
+    vpx_idct4x4_1_add(input, dest, stride);
+}
+
+void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+                     int eob) {
+  if (eob > 1)
+    vpx_iwht4x4_16_add(input, dest, stride);
+  else
+    vpx_iwht4x4_1_add(input, dest, stride);
+}
+
+void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
+                     int eob) {
+  // If dc is 1, then input[0] is the reconstructed value, do not need
+  // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
+
+  // The calculation can be simplified if there are not many non-zero dct
+  // coefficients. Use eobs to decide what to do.
+  if (eob == 1)
+    // DC only DCT coefficient
+    vpx_idct8x8_1_add(input, dest, stride);
+  else if (eob <= 12)
+    vpx_idct8x8_12_add(input, dest, stride);
+  else
+    vpx_idct8x8_64_add(input, dest, stride);
+}
+
+void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
+                       int eob) {
+  assert(((intptr_t)input) % 32 == 0);
+  /* The calculation can be simplified if there are not many non-zero dct
+   * coefficients. Use eobs to separate different cases. */
+  if (eob == 1) /* DC only DCT coefficient. */
+    vpx_idct16x16_1_add(input, dest, stride);
+  else if (eob <= 10)
+    vpx_idct16x16_10_add(input, dest, stride);
+  else if (eob <= 38)
+    vpx_idct16x16_38_add(input, dest, stride);
+  else
+    vpx_idct16x16_256_add(input, dest, stride);
+}
+
+void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
+                       int eob) {
+  assert(((intptr_t)input) % 32 == 0);
+  if (eob == 1)
+    vpx_idct32x32_1_add(input, dest, stride);
+  else if (eob <= 34)
+    // non-zero coeff only in upper-left 8x8
+    vpx_idct32x32_34_add(input, dest, stride);
+  else if (eob <= 135)
+    // non-zero coeff only in upper-left 16x16
+    vpx_idct32x32_135_add(input, dest, stride);
+  else
+    vpx_idct32x32_1024_add(input, dest, stride);
+}
+
+// iht
+void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
+                    int stride, int eob) {
+  if (tx_type == DCT_DCT)
+    vp9_idct4x4_add(input, dest, stride, eob);
+  else
+    vp9_iht4x4_16_add(input, dest, stride, tx_type);
+}
+
+void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
+                    int stride, int eob) {
+  if (tx_type == DCT_DCT) {
+    vp9_idct8x8_add(input, dest, stride, eob);
+  } else {
+    vp9_iht8x8_64_add(input, dest, stride, tx_type);
+  }
+}
+
+void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
+                      int stride, int eob) {
+  if (tx_type == DCT_DCT) {
+    vp9_idct16x16_add(input, dest, stride, eob);
+  } else {
+    vp9_iht16x16_256_add(input, dest, stride, tx_type);
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+
+void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint16_t *dest,
+                                int stride, int tx_type, int bd) {
+  const highbd_transform_2d IHT_4[] = {
+    { vpx_highbd_idct4_c, vpx_highbd_idct4_c },   // DCT_DCT  = 0
+    { vpx_highbd_iadst4_c, vpx_highbd_idct4_c },  // ADST_DCT = 1
+    { vpx_highbd_idct4_c, vpx_highbd_iadst4_c },  // DCT_ADST = 2
+    { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }  // ADST_ADST = 3
+  };
+
+  int i, j;
+  tran_low_t out[4 * 4];
+  tran_low_t *outptr = out;
+  tran_low_t temp_in[4], temp_out[4];
+
+  // Inverse transform row vectors.
+  for (i = 0; i < 4; ++i) {
+    IHT_4[tx_type].rows(input, outptr, bd);
+    input += 4;
+    outptr += 4;
+  }
+
+  // Inverse transform column vectors.
+  for (i = 0; i < 4; ++i) {
+    for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
+    IHT_4[tx_type].cols(temp_in, temp_out, bd);
+    for (j = 0; j < 4; ++j) {
+      dest[j * stride + i] = highbd_clip_pixel_add(
+          dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
+    }
+  }
+}
+
+static const highbd_transform_2d HIGH_IHT_8[] = {
+  { vpx_highbd_idct8_c, vpx_highbd_idct8_c },   // DCT_DCT  = 0
+  { vpx_highbd_iadst8_c, vpx_highbd_idct8_c },  // ADST_DCT = 1
+  { vpx_highbd_idct8_c, vpx_highbd_iadst8_c },  // DCT_ADST = 2
+  { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }  // ADST_ADST = 3
+};
+
+void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint16_t *dest,
+                                int stride, int tx_type, int bd) {
+  int i, j;
+  tran_low_t out[8 * 8];
+  tran_low_t *outptr = out;
+  tran_low_t temp_in[8], temp_out[8];
+  const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
+
+  // Inverse transform row vectors.
+  for (i = 0; i < 8; ++i) {
+    ht.rows(input, outptr, bd);
+    input += 8;
+    outptr += 8;
+  }
+
+  // Inverse transform column vectors.
+  for (i = 0; i < 8; ++i) {
+    for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+    ht.cols(temp_in, temp_out, bd);
+    for (j = 0; j < 8; ++j) {
+      dest[j * stride + i] = highbd_clip_pixel_add(
+          dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
+    }
+  }
+}
+
+static const highbd_transform_2d HIGH_IHT_16[] = {
+  { vpx_highbd_idct16_c, vpx_highbd_idct16_c },   // DCT_DCT  = 0
+  { vpx_highbd_iadst16_c, vpx_highbd_idct16_c },  // ADST_DCT = 1
+  { vpx_highbd_idct16_c, vpx_highbd_iadst16_c },  // DCT_ADST = 2
+  { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }  // ADST_ADST = 3
+};
+
+void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint16_t *dest,
+                                   int stride, int tx_type, int bd) {
+  int i, j;
+  tran_low_t out[16 * 16];
+  tran_low_t *outptr = out;
+  tran_low_t temp_in[16], temp_out[16];
+  const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
+
+  // Rows
+  for (i = 0; i < 16; ++i) {
+    ht.rows(input, outptr, bd);
+    input += 16;
+    outptr += 16;
+  }
+
+  // Columns
+  for (i = 0; i < 16; ++i) {
+    for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+    ht.cols(temp_in, temp_out, bd);
+    for (j = 0; j < 16; ++j) {
+      dest[j * stride + i] = highbd_clip_pixel_add(
+          dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+    }
+  }
+}
+
+// idct
+void vp9_highbd_idct4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
+                            int eob, int bd) {
+  if (eob > 1)
+    vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
+  else
+    vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
+}
+
+void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
+                            int eob, int bd) {
+  if (eob > 1)
+    vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
+  else
+    vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
+}
+
+void vp9_highbd_idct8x8_add(const tran_low_t *input, uint16_t *dest, int stride,
+                            int eob, int bd) {
+  // If dc is 1, then input[0] is the reconstructed value, do not need
+  // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
+
+  // The calculation can be simplified if there are not many non-zero dct
+  // coefficients. Use eobs to decide what to do.
+  // DC only DCT coefficient
+  if (eob == 1) {
+    vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
+  } else if (eob <= 12) {
+    vpx_highbd_idct8x8_12_add(input, dest, stride, bd);
+  } else {
+    vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
+  }
+}
+
+void vp9_highbd_idct16x16_add(const tran_low_t *input, uint16_t *dest,
+                              int stride, int eob, int bd) {
+  // The calculation can be simplified if there are not many non-zero dct
+  // coefficients. Use eobs to separate different cases.
+  // DC only DCT coefficient.
+  if (eob == 1) {
+    vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
+  } else if (eob <= 10) {
+    vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
+  } else if (eob <= 38) {
+    vpx_highbd_idct16x16_38_add(input, dest, stride, bd);
+  } else {
+    vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
+  }
+}
+
+void vp9_highbd_idct32x32_add(const tran_low_t *input, uint16_t *dest,
+                              int stride, int eob, int bd) {
+  // Non-zero coeff only in upper-left 8x8
+  if (eob == 1) {
+    vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
+  } else if (eob <= 34) {
+    vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
+  } else if (eob <= 135) {
+    vpx_highbd_idct32x32_135_add(input, dest, stride, bd);
+  } else {
+    vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
+  }
+}
+
+// iht
+void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
+                           uint16_t *dest, int stride, int eob, int bd) {
+  if (tx_type == DCT_DCT)
+    vp9_highbd_idct4x4_add(input, dest, stride, eob, bd);
+  else
+    vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
+}
+
+void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
+                           uint16_t *dest, int stride, int eob, int bd) {
+  if (tx_type == DCT_DCT) {
+    vp9_highbd_idct8x8_add(input, dest, stride, eob, bd);
+  } else {
+    vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
+  }
+}
+
+void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
+                             uint16_t *dest, int stride, int eob, int bd) {
+  if (tx_type == DCT_DCT) {
+    vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
+  } else {
+    vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/media/libvpx/libvpx/vp9/common/vp9_idct.h b/media/libvpx/libvpx/vp9/common/vp9_idct.h
new file mode 100644
index 0000000000..94eeaf599e
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_idct.h
@@ -0,0 +1,81 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_IDCT_H_
+#define VPX_VP9_COMMON_VP9_IDCT_H_
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_enums.h"
+#include "vpx_dsp/inv_txfm.h"
+#include "vpx_dsp/txfm_common.h"
+#include "vpx_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*transform_1d)(const tran_low_t *, tran_low_t *);
+
+typedef struct {
+  transform_1d cols, rows;  // vertical and horizontal
+} transform_2d;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef void (*highbd_transform_1d)(const tran_low_t *, tran_low_t *, int bd);
+
+typedef struct {
+  highbd_transform_1d cols, rows;  // vertical and horizontal
+} highbd_transform_2d;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+                     int eob);
+void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+                     int eob);
+void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
+                     int eob);
+void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
+                       int eob);
+void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
+                       int eob);
+
+void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
+                    int stride, int eob);
+void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
+                    int stride, int eob);
+void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
+                      int stride, int eob);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
+                            int eob, int bd);
+void vp9_highbd_idct4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
+                            int eob, int bd);
+void vp9_highbd_idct8x8_add(const tran_low_t *input, uint16_t *dest, int stride,
+                            int eob, int bd);
+void vp9_highbd_idct16x16_add(const tran_low_t *input, uint16_t *dest,
+                              int stride, int eob, int bd);
+void vp9_highbd_idct32x32_add(const tran_low_t *input, uint16_t *dest,
+                              int stride, int eob, int bd);
+void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
+                           uint16_t *dest, int stride, int eob, int bd);
+void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
+                           uint16_t *dest, int stride, int eob, int bd);
+void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
+                             uint16_t *dest, int stride, int eob, int bd);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_IDCT_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_loopfilter.c b/media/libvpx/libvpx/vp9/common/vp9_loopfilter.c
new file mode 100644
index 0000000000..1a9d45ae77
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_loopfilter.c
@@ -0,0 +1,1633 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
+
+#include "vp9/common/vp9_seg_common.h"
+
+// 64 bit masks for left transform size. Each 1 represents a position where
+// we should apply a loop filter across the left border of an 8x8 block
+// boundary.
+//
+// In the case of TX_16X16->  ( in low order byte first we end up with
+// a mask that looks like this
+//
+//    10101010
+//    10101010
+//    10101010
+//    10101010
+//    10101010
+//    10101010
+//    10101010
+//    10101010
+//
+// A loopfilter should be applied to every other 8x8 horizontally.
+static const uint64_t left_64x64_txform_mask[TX_SIZES] = {
+  0xffffffffffffffffULL,  // TX_4X4
+  0xffffffffffffffffULL,  // TX_8x8
+  0x5555555555555555ULL,  // TX_16x16
+  0x1111111111111111ULL,  // TX_32x32
+};
+
+// 64 bit masks for above transform size. Each 1 represents a position where
+// we should apply a loop filter across the top border of an 8x8 block
+// boundary.
+//
+// In the case of TX_32x32 ->  ( in low order byte first we end up with
+// a mask that looks like this
+//
+//    11111111
+//    00000000
+//    00000000
+//    00000000
+//    11111111
+//    00000000
+//    00000000
+//    00000000
+//
+// A loopfilter should be applied to every other 4 the row vertically.
+static const uint64_t above_64x64_txform_mask[TX_SIZES] = {
+  0xffffffffffffffffULL,  // TX_4X4
+  0xffffffffffffffffULL,  // TX_8x8
+  0x00ff00ff00ff00ffULL,  // TX_16x16
+  0x000000ff000000ffULL,  // TX_32x32
+};
+
+// 64 bit masks for prediction sizes (left). Each 1 represents a position
+// where left border of an 8x8 block. These are aligned to the right most
+// appropriate bit, and then shifted into place.
+//
+// In the case of TX_16x32 ->  ( low order byte first ) we end up with
+// a mask that looks like this :
+//
+//  10000000
+//  10000000
+//  10000000
+//  10000000
+//  00000000
+//  00000000
+//  00000000
+//  00000000
+static const uint64_t left_prediction_mask[BLOCK_SIZES] = {
+  0x0000000000000001ULL,  // BLOCK_4X4,
+  0x0000000000000001ULL,  // BLOCK_4X8,
+  0x0000000000000001ULL,  // BLOCK_8X4,
+  0x0000000000000001ULL,  // BLOCK_8X8,
+  0x0000000000000101ULL,  // BLOCK_8X16,
+  0x0000000000000001ULL,  // BLOCK_16X8,
+  0x0000000000000101ULL,  // BLOCK_16X16,
+  0x0000000001010101ULL,  // BLOCK_16X32,
+  0x0000000000000101ULL,  // BLOCK_32X16,
+  0x0000000001010101ULL,  // BLOCK_32X32,
+  0x0101010101010101ULL,  // BLOCK_32X64,
+  0x0000000001010101ULL,  // BLOCK_64X32,
+  0x0101010101010101ULL,  // BLOCK_64X64
+};
+
+// 64 bit mask to shift and set for each prediction size.
+static const uint64_t above_prediction_mask[BLOCK_SIZES] = {
+  0x0000000000000001ULL,  // BLOCK_4X4
+  0x0000000000000001ULL,  // BLOCK_4X8
+  0x0000000000000001ULL,  // BLOCK_8X4
+  0x0000000000000001ULL,  // BLOCK_8X8
+  0x0000000000000001ULL,  // BLOCK_8X16,
+  0x0000000000000003ULL,  // BLOCK_16X8
+  0x0000000000000003ULL,  // BLOCK_16X16
+  0x0000000000000003ULL,  // BLOCK_16X32,
+  0x000000000000000fULL,  // BLOCK_32X16,
+  0x000000000000000fULL,  // BLOCK_32X32,
+  0x000000000000000fULL,  // BLOCK_32X64,
+  0x00000000000000ffULL,  // BLOCK_64X32,
+  0x00000000000000ffULL,  // BLOCK_64X64
+};
+// 64 bit mask to shift and set for each prediction size. A bit is set for
+// each 8x8 block that would be in the left most block of the given block
+// size in the 64x64 block.
+static const uint64_t size_mask[BLOCK_SIZES] = {
+  0x0000000000000001ULL,  // BLOCK_4X4
+  0x0000000000000001ULL,  // BLOCK_4X8
+  0x0000000000000001ULL,  // BLOCK_8X4
+  0x0000000000000001ULL,  // BLOCK_8X8
+  0x0000000000000101ULL,  // BLOCK_8X16,
+  0x0000000000000003ULL,  // BLOCK_16X8
+  0x0000000000000303ULL,  // BLOCK_16X16
+  0x0000000003030303ULL,  // BLOCK_16X32,
+  0x0000000000000f0fULL,  // BLOCK_32X16,
+  0x000000000f0f0f0fULL,  // BLOCK_32X32,
+  0x0f0f0f0f0f0f0f0fULL,  // BLOCK_32X64,
+  0x00000000ffffffffULL,  // BLOCK_64X32,
+  0xffffffffffffffffULL,  // BLOCK_64X64
+};
+
+// These are used for masking the left and above borders.
+static const uint64_t left_border = 0x1111111111111111ULL;
+static const uint64_t above_border = 0x000000ff000000ffULL;
+
+// 16 bit masks for uv transform sizes.
+static const uint16_t left_64x64_txform_mask_uv[TX_SIZES] = {
+  0xffff,  // TX_4X4
+  0xffff,  // TX_8x8
+  0x5555,  // TX_16x16
+  0x1111,  // TX_32x32
+};
+
+static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = {
+  0xffff,  // TX_4X4
+  0xffff,  // TX_8x8
+  0x0f0f,  // TX_16x16
+  0x000f,  // TX_32x32
+};
+
+// 16 bit left mask to shift and set for each uv prediction size.
+static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = {
+  0x0001,  // BLOCK_4X4,
+  0x0001,  // BLOCK_4X8,
+  0x0001,  // BLOCK_8X4,
+  0x0001,  // BLOCK_8X8,
+  0x0001,  // BLOCK_8X16,
+  0x0001,  // BLOCK_16X8,
+  0x0001,  // BLOCK_16X16,
+  0x0011,  // BLOCK_16X32,
+  0x0001,  // BLOCK_32X16,
+  0x0011,  // BLOCK_32X32,
+  0x1111,  // BLOCK_32X64
+  0x0011,  // BLOCK_64X32,
+  0x1111,  // BLOCK_64X64
+};
+// 16 bit above mask to shift and set for uv each prediction size.
+static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = {
+  0x0001,  // BLOCK_4X4
+  0x0001,  // BLOCK_4X8
+  0x0001,  // BLOCK_8X4
+  0x0001,  // BLOCK_8X8
+  0x0001,  // BLOCK_8X16,
+  0x0001,  // BLOCK_16X8
+  0x0001,  // BLOCK_16X16
+  0x0001,  // BLOCK_16X32,
+  0x0003,  // BLOCK_32X16,
+  0x0003,  // BLOCK_32X32,
+  0x0003,  // BLOCK_32X64,
+  0x000f,  // BLOCK_64X32,
+  0x000f,  // BLOCK_64X64
+};
+
+// 64 bit mask to shift and set for each uv prediction size
+static const uint16_t size_mask_uv[BLOCK_SIZES] = {
+  0x0001,  // BLOCK_4X4
+  0x0001,  // BLOCK_4X8
+  0x0001,  // BLOCK_8X4
+  0x0001,  // BLOCK_8X8
+  0x0001,  // BLOCK_8X16,
+  0x0001,  // BLOCK_16X8
+  0x0001,  // BLOCK_16X16
+  0x0011,  // BLOCK_16X32,
+  0x0003,  // BLOCK_32X16,
+  0x0033,  // BLOCK_32X32,
+  0x3333,  // BLOCK_32X64,
+  0x00ff,  // BLOCK_64X32,
+  0xffff,  // BLOCK_64X64
+};
+static const uint16_t left_border_uv = 0x1111;
+static const uint16_t above_border_uv = 0x000f;
+
+static const int mode_lf_lut[MB_MODE_COUNT] = {
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // INTRA_MODES
+  1, 1, 0, 1                     // INTER_MODES (ZEROMV == 0)
+};
+
+static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
+  int lvl;
+
+  // For each possible value for the loop filter fill out limits
+  for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
+    // Set loop filter parameters that control sharpness.
+    int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
+
+    if (sharpness_lvl > 0) {
+      if (block_inside_limit > (9 - sharpness_lvl))
+        block_inside_limit = (9 - sharpness_lvl);
+    }
+
+    if (block_inside_limit < 1) block_inside_limit = 1;
+
+    memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
+    memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
+           SIMD_WIDTH);
+  }
+}
+
+static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
+                                const MODE_INFO *mi) {
+  return lfi_n->lvl[mi->segment_id][mi->ref_frame[0]][mode_lf_lut[mi->mode]];
+}
+
+void vp9_loop_filter_init(VP9_COMMON *cm) {
+  loop_filter_info_n *lfi = &cm->lf_info;
+  struct loopfilter *lf = &cm->lf;
+  int lvl;
+
+  // init limits for given sharpness
+  update_sharpness(lfi, lf->sharpness_level);
+  lf->last_sharpness_level = lf->sharpness_level;
+
+  // init hev threshold const vectors
+  for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
+    memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
+}
+
+void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
+  int seg_id;
+  // n_shift is the multiplier for lf_deltas
+  // the multiplier is 1 for when filter_lvl is between 0 and 31;
+  // 2 when filter_lvl is between 32 and 63
+  const int scale = 1 << (default_filt_lvl >> 5);
+  loop_filter_info_n *const lfi = &cm->lf_info;
+  struct loopfilter *const lf = &cm->lf;
+  const struct segmentation *const seg = &cm->seg;
+
+  // update limits if sharpness has changed
+  if (lf->last_sharpness_level != lf->sharpness_level) {
+    update_sharpness(lfi, lf->sharpness_level);
+    lf->last_sharpness_level = lf->sharpness_level;
+  }
+
+  for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
+    int lvl_seg = default_filt_lvl;
+    if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
+      const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
+      lvl_seg = clamp(
+          seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data, 0,
+          MAX_LOOP_FILTER);
+    }
+
+    if (!lf->mode_ref_delta_enabled) {
+      // we could get rid of this if we assume that deltas are set to
+      // zero when not in use; encoder always uses deltas
+      memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
+    } else {
+      int ref, mode;
+      const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
+      lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
+
+      for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) {
+        for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
+          const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
+                                lf->mode_deltas[mode] * scale;
+          lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
+        }
+      }
+    }
+  }
+}
+
+static void filter_selectively_vert_row2(
+    int subsampling_factor, uint8_t *s, int pitch, unsigned int mask_16x16,
+    unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int,
+    const loop_filter_thresh *lfthr, const uint8_t *lfl) {
+  const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
+  const int lfl_forward = subsampling_factor ? 4 : 8;
+  const unsigned int dual_one = 1 | (1 << lfl_forward);
+  unsigned int mask;
+  uint8_t *ss[2];
+  ss[0] = s;
+
+  for (mask =
+           (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
+       mask; mask = (mask & ~dual_one) >> 1) {
+    if (mask & dual_one) {
+      const loop_filter_thresh *lfis[2];
+      lfis[0] = lfthr + *lfl;
+      lfis[1] = lfthr + *(lfl + lfl_forward);
+      ss[1] = ss[0] + 8 * pitch;
+
+      if (mask_16x16 & dual_one) {
+        if ((mask_16x16 & dual_one) == dual_one) {
+          vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+                                   lfis[0]->hev_thr);
+        } else {
+          const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
+          vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
+                              lfi->lim, lfi->hev_thr);
+        }
+      }
+
+      if (mask_8x8 & dual_one) {
+        if ((mask_8x8 & dual_one) == dual_one) {
+          vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+                                  lfis[0]->hev_thr, lfis[1]->mblim,
+                                  lfis[1]->lim, lfis[1]->hev_thr);
+        } else {
+          const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
+          vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim,
+                             lfi->hev_thr);
+        }
+      }
+
+      if (mask_4x4 & dual_one) {
+        if ((mask_4x4 & dual_one) == dual_one) {
+          vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+                                  lfis[0]->hev_thr, lfis[1]->mblim,
+                                  lfis[1]->lim, lfis[1]->hev_thr);
+        } else {
+          const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
+          vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim,
+                             lfi->hev_thr);
+        }
+      }
+
+      if (mask_4x4_int & dual_one) {
+        if ((mask_4x4_int & dual_one) == dual_one) {
+          vpx_lpf_vertical_4_dual(
+              ss[0] + 4, pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr,
+              lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr);
+        } else {
+          const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
+          vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim,
+                             lfi->lim, lfi->hev_thr);
+        }
+      }
+    }
+
+    ss[0] += 8;
+    lfl += 1;
+    mask_16x16 >>= 1;
+    mask_8x8 >>= 1;
+    mask_4x4 >>= 1;
+    mask_4x4_int >>= 1;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_filter_selectively_vert_row2(
+    int subsampling_factor, uint16_t *s, int pitch, unsigned int mask_16x16,
+    unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int,
+    const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) {
+  const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
+  const int lfl_forward = subsampling_factor ? 4 : 8;
+  const unsigned int dual_one = 1 | (1 << lfl_forward);
+  unsigned int mask;
+  uint16_t *ss[2];
+  ss[0] = s;
+
+  for (mask =
+           (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
+       mask; mask = (mask & ~dual_one) >> 1) {
+    if (mask & dual_one) {
+      const loop_filter_thresh *lfis[2];
+      lfis[0] = lfthr + *lfl;
+      lfis[1] = lfthr + *(lfl + lfl_forward);
+      ss[1] = ss[0] + 8 * pitch;
+
+      if (mask_16x16 & dual_one) {
+        if ((mask_16x16 & dual_one) == dual_one) {
+          vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim,
+                                          lfis[0]->lim, lfis[0]->hev_thr, bd);
+        } else {
+          const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
+          vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
+                                     lfi->lim, lfi->hev_thr, bd);
+        }
+      }
+
+      if (mask_8x8 & dual_one) {
+        if ((mask_8x8 & dual_one) == dual_one) {
+          vpx_highbd_lpf_vertical_8_dual(
+              ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr,
+              lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr, bd);
+        } else {
+          const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
+          vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim,
+                                    lfi->lim, lfi->hev_thr, bd);
+        }
+      }
+
+      if (mask_4x4 & dual_one) {
+        if ((mask_4x4 & dual_one) == dual_one) {
+          vpx_highbd_lpf_vertical_4_dual(
+              ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr,
+              lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr, bd);
+        } else {
+          const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
+          vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim,
+                                    lfi->lim, lfi->hev_thr, bd);
+        }
+      }
+
+      if (mask_4x4_int & dual_one) {
+        if ((mask_4x4_int & dual_one) == dual_one) {
+          vpx_highbd_lpf_vertical_4_dual(
+              ss[0] + 4, pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr,
+              lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr, bd);
+        } else {
+          const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
+          vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch,
+                                    lfi->mblim, lfi->lim, lfi->hev_thr, bd);
+        }
+      }
+    }
+
+    ss[0] += 8;
+    lfl += 1;
+    mask_16x16 >>= 1;
+    mask_8x8 >>= 1;
+    mask_4x4 >>= 1;
+    mask_4x4_int >>= 1;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static void filter_selectively_horiz(
+    uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
+    unsigned int mask_4x4, unsigned int mask_4x4_int,
+    const loop_filter_thresh *lfthr, const uint8_t *lfl) {
+  unsigned int mask;
+  int count;
+
+  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
+       mask >>= count) {
+    count = 1;
+    if (mask & 1) {
+      const loop_filter_thresh *lfi = lfthr + *lfl;
+
+      if (mask_16x16 & 1) {
+        if ((mask_16x16 & 3) == 3) {
+          vpx_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim,
+                                     lfi->hev_thr);
+          count = 2;
+        } else {
+          vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+        }
+      } else if (mask_8x8 & 1) {
+        if ((mask_8x8 & 3) == 3) {
+          // Next block's thresholds.
+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
+
+          vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
+                                    lfi->hev_thr, lfin->mblim, lfin->lim,
+                                    lfin->hev_thr);
+
+          if ((mask_4x4_int & 3) == 3) {
+            vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+                                      lfi->lim, lfi->hev_thr, lfin->mblim,
+                                      lfin->lim, lfin->hev_thr);
+          } else {
+            if (mask_4x4_int & 1)
+              vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                   lfi->hev_thr);
+            else if (mask_4x4_int & 2)
+              vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+                                   lfin->lim, lfin->hev_thr);
+          }
+          count = 2;
+        } else {
+          vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+
+          if (mask_4x4_int & 1)
+            vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                 lfi->hev_thr);
+        }
+      } else if (mask_4x4 & 1) {
+        if ((mask_4x4 & 3) == 3) {
+          // Next block's thresholds.
+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
+
+          vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
+                                    lfi->hev_thr, lfin->mblim, lfin->lim,
+                                    lfin->hev_thr);
+          if ((mask_4x4_int & 3) == 3) {
+            vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+                                      lfi->lim, lfi->hev_thr, lfin->mblim,
+                                      lfin->lim, lfin->hev_thr);
+          } else {
+            if (mask_4x4_int & 1)
+              vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                   lfi->hev_thr);
+            else if (mask_4x4_int & 2)
+              vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+                                   lfin->lim, lfin->hev_thr);
+          }
+          count = 2;
+        } else {
+          vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+
+          if (mask_4x4_int & 1)
+            vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                 lfi->hev_thr);
+        }
+      } else {
+        vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                             lfi->hev_thr);
+      }
+    }
+    s += 8 * count;
+    lfl += count;
+    mask_16x16 >>= count;
+    mask_8x8 >>= count;
+    mask_4x4 >>= count;
+    mask_4x4_int >>= count;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_filter_selectively_horiz(
+    uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
+    unsigned int mask_4x4, unsigned int mask_4x4_int,
+    const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) {
+  unsigned int mask;
+  int count;
+
+  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
+       mask >>= count) {
+    count = 1;
+    if (mask & 1) {
+      const loop_filter_thresh *lfi = lfthr + *lfl;
+
+      if (mask_16x16 & 1) {
+        if ((mask_16x16 & 3) == 3) {
+          vpx_highbd_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim,
+                                            lfi->hev_thr, bd);
+          count = 2;
+        } else {
+          vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+                                       lfi->hev_thr, bd);
+        }
+      } else if (mask_8x8 & 1) {
+        if ((mask_8x8 & 3) == 3) {
+          // Next block's thresholds.
+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
+
+          vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
+                                           lfi->hev_thr, lfin->mblim, lfin->lim,
+                                           lfin->hev_thr, bd);
+
+          if ((mask_4x4_int & 3) == 3) {
+            vpx_highbd_lpf_horizontal_4_dual(
+                s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+                lfin->mblim, lfin->lim, lfin->hev_thr, bd);
+          } else {
+            if (mask_4x4_int & 1) {
+              vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+                                          lfi->lim, lfi->hev_thr, bd);
+            } else if (mask_4x4_int & 2) {
+              vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+                                          lfin->lim, lfin->hev_thr, bd);
+            }
+          }
+          count = 2;
+        } else {
+          vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
+                                      lfi->hev_thr, bd);
+
+          if (mask_4x4_int & 1) {
+            vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+                                        lfi->lim, lfi->hev_thr, bd);
+          }
+        }
+      } else if (mask_4x4 & 1) {
+        if ((mask_4x4 & 3) == 3) {
+          // Next block's thresholds.
+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
+
+          vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
+                                           lfi->hev_thr, lfin->mblim, lfin->lim,
+                                           lfin->hev_thr, bd);
+          if ((mask_4x4_int & 3) == 3) {
+            vpx_highbd_lpf_horizontal_4_dual(
+                s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+                lfin->mblim, lfin->lim, lfin->hev_thr, bd);
+          } else {
+            if (mask_4x4_int & 1) {
+              vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+                                          lfi->lim, lfi->hev_thr, bd);
+            } else if (mask_4x4_int & 2) {
+              vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+                                          lfin->lim, lfin->hev_thr, bd);
+            }
+          }
+          count = 2;
+        } else {
+          vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
+                                      lfi->hev_thr, bd);
+
+          if (mask_4x4_int & 1) {
+            vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+                                        lfi->lim, lfi->hev_thr, bd);
+          }
+        }
+      } else {
+        vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                    lfi->hev_thr, bd);
+      }
+    }
+    s += 8 * count;
+    lfl += count;
+    mask_16x16 >>= count;
+    mask_8x8 >>= count;
+    mask_4x4 >>= count;
+    mask_4x4_int >>= count;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+// This function ors into the current lfm structure, where to do loop
+// filters for the specific mi we are looking at. It uses information
+// including the block_size_type (32x16, 32x32, etc.), the transform size,
+// whether there were any coefficients encoded, and the loop filter strength
+// block we are currently looking at. Shift is used to position the
+// 1's we produce.
+static void build_masks(const loop_filter_info_n *const lfi_n,
+                        const MODE_INFO *mi, const int shift_y,
+                        const int shift_uv, LOOP_FILTER_MASK *lfm) {
+  const BLOCK_SIZE block_size = mi->sb_type;
+  const TX_SIZE tx_size_y = mi->tx_size;
+  const TX_SIZE tx_size_uv = uv_txsize_lookup[block_size][tx_size_y][1][1];
+  const int filter_level = get_filter_level(lfi_n, mi);
+  uint64_t *const left_y = &lfm->left_y[tx_size_y];
+  uint64_t *const above_y = &lfm->above_y[tx_size_y];
+  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
+  uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
+  uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
+  uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
+  int i;
+
+  // If filter level is 0 we don't loop filter.
+  if (!filter_level) {
+    return;
+  } else {
+    const int w = num_8x8_blocks_wide_lookup[block_size];
+    const int h = num_8x8_blocks_high_lookup[block_size];
+    int index = shift_y;
+    for (i = 0; i < h; i++) {
+      memset(&lfm->lfl_y[index], filter_level, w);
+      index += 8;
+    }
+  }
+
+  // These set 1 in the current block size for the block size edges.
+  // For instance if the block size is 32x16, we'll set:
+  //    above =   1111
+  //              0000
+  //    and
+  //    left  =   1000
+  //          =   1000
+  // NOTE : In this example the low bit is left most ( 1000 ) is stored as
+  //        1,  not 8...
+  //
+  // U and V set things on a 16 bit scale.
+  //
+  *above_y |= above_prediction_mask[block_size] << shift_y;
+  *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
+  *left_y |= left_prediction_mask[block_size] << shift_y;
+  *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
+
+  // If the block has no coefficients and is not intra we skip applying
+  // the loop filter on block edges.
+  if (mi->skip && is_inter_block(mi)) return;
+
+  // Here we are adding a mask for the transform size. The transform
+  // size mask is set to be correct for a 64x64 prediction block size. We
+  // mask to match the size of the block we are working on and then shift it
+  // into place..
+  *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y])
+              << shift_y;
+  *above_uv |=
+      (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv])
+      << shift_uv;
+
+  *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y])
+             << shift_y;
+  *left_uv |= (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv])
+              << shift_uv;
+
+  // Here we are trying to determine what to do with the internal 4x4 block
+  // boundaries.  These differ from the 4x4 boundaries on the outside edge of
+  // an 8x8 in that the internal ones can be skipped and don't depend on
+  // the prediction block size.
+  if (tx_size_y == TX_4X4) *int_4x4_y |= size_mask[block_size] << shift_y;
+
+  if (tx_size_uv == TX_4X4)
+    *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
+}
+
+// This function does the same thing as the one above with the exception that
+// it only affects the y masks. It exists because for blocks < 16x16 in size,
+// we only update u and v masks on the first block.
+static void build_y_mask(const loop_filter_info_n *const lfi_n,
+                         const MODE_INFO *mi, const int shift_y,
+                         LOOP_FILTER_MASK *lfm) {
+  const BLOCK_SIZE block_size = mi->sb_type;
+  const TX_SIZE tx_size_y = mi->tx_size;
+  const int filter_level = get_filter_level(lfi_n, mi);
+  uint64_t *const left_y = &lfm->left_y[tx_size_y];
+  uint64_t *const above_y = &lfm->above_y[tx_size_y];
+  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
+  int i;
+
+  if (!filter_level) {
+    return;
+  } else {
+    const int w = num_8x8_blocks_wide_lookup[block_size];
+    const int h = num_8x8_blocks_high_lookup[block_size];
+    int index = shift_y;
+    for (i = 0; i < h; i++) {
+      memset(&lfm->lfl_y[index], filter_level, w);
+      index += 8;
+    }
+  }
+
+  *above_y |= above_prediction_mask[block_size] << shift_y;
+  *left_y |= left_prediction_mask[block_size] << shift_y;
+
+  if (mi->skip && is_inter_block(mi)) return;
+
+  *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y])
+              << shift_y;
+
+  *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y])
+             << shift_y;
+
+  if (tx_size_y == TX_4X4) *int_4x4_y |= size_mask[block_size] << shift_y;
+}
+
+void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
+                     LOOP_FILTER_MASK *lfm) {
+  int i;
+
+  // The largest loopfilter we have is 16x16 so we use the 16x16 mask
+  // for 32x32 transforms also.
+  lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
+  lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
+  lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
+  lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];
+
+  // We do at least 8 tap filter on every 32x32 even if the transform size
+  // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
+  // remove it from the 4x4.
+  lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
+  lfm->left_y[TX_4X4] &= ~left_border;
+  lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
+  lfm->above_y[TX_4X4] &= ~above_border;
+  lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
+  lfm->left_uv[TX_4X4] &= ~left_border_uv;
+  lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
+  lfm->above_uv[TX_4X4] &= ~above_border_uv;
+
+  // We do some special edge handling.
+  if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) {
+    const uint64_t rows = cm->mi_rows - mi_row;
+
+    // Each pixel inside the border gets a 1,
+    const uint64_t mask_y = (((uint64_t)1 << (rows << 3)) - 1);
+    const uint16_t mask_uv = (((uint16_t)1 << (((rows + 1) >> 1) << 2)) - 1);
+
+    // Remove values completely outside our border.
+    for (i = 0; i < TX_32X32; i++) {
+      lfm->left_y[i] &= mask_y;
+      lfm->above_y[i] &= mask_y;
+      lfm->left_uv[i] &= mask_uv;
+      lfm->above_uv[i] &= mask_uv;
+    }
+    lfm->int_4x4_y &= mask_y;
+    lfm->int_4x4_uv &= mask_uv;
+
+    // We don't apply a wide loop filter on the last uv block row. If set
+    // apply the shorter one instead.
+    if (rows == 1) {
+      lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
+      lfm->above_uv[TX_16X16] = 0;
+    }
+    if (rows == 5) {
+      lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
+      lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
+    }
+  }
+
+  if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) {
+    const uint64_t columns = cm->mi_cols - mi_col;
+
+    // Each pixel inside the border gets a 1, the multiply copies the border
+    // to where we need it.
+    const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL;
+    const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;
+
+    // Internal edges are not applied on the last column of the image so
+    // we mask 1 more for the internal edges
+    const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;
+
+    // Remove the bits outside the image edge.
+    for (i = 0; i < TX_32X32; i++) {
+      lfm->left_y[i] &= mask_y;
+      lfm->above_y[i] &= mask_y;
+      lfm->left_uv[i] &= mask_uv;
+      lfm->above_uv[i] &= mask_uv;
+    }
+    lfm->int_4x4_y &= mask_y;
+    lfm->int_4x4_uv &= mask_uv_int;
+
+    // We don't apply a wide loop filter on the last uv column. If set
+    // apply the shorter one instead.
+    if (columns == 1) {
+      lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
+      lfm->left_uv[TX_16X16] = 0;
+    }
+    if (columns == 5) {
+      lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
+      lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
+    }
+  }
+  // We don't apply a loop filter on the first column in the image, mask that
+  // out.
+  if (mi_col == 0) {
+    for (i = 0; i < TX_32X32; i++) {
+      lfm->left_y[i] &= 0xfefefefefefefefeULL;
+      lfm->left_uv[i] &= 0xeeee;
+    }
+  }
+
+  // Assert if we try to apply 2 different loop filters at the same position.
+  assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8]));
+  assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4]));
+  assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4]));
+  assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16]));
+  assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_8X8]));
+  assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
+  assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
+  assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16]));
+  assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
+  assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
+  assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
+  assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16]));
+  assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
+  assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
+  assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
+  assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
+}
+
+// This function sets up the bit masks for the entire 64x64 region represented
+// by mi_row, mi_col.
+void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
+                    MODE_INFO **mi8x8, const int mode_info_stride,
+                    LOOP_FILTER_MASK *lfm) {
+  int idx_32, idx_16, idx_8;
+  const loop_filter_info_n *const lfi_n = &cm->lf_info;
+  MODE_INFO **mip = mi8x8;
+  MODE_INFO **mip2 = mi8x8;
+
+  // These are offsets to the next mi in the 64x64 block. It is what gets
+  // added to the mi ptr as we go through each loop. It helps us to avoid
+  // setting up special row and column counters for each index. The last step
+  // brings us out back to the starting position.
+  const int offset_32[] = { 4, (mode_info_stride << 2) - 4, 4,
+                            -(mode_info_stride << 2) - 4 };
+  const int offset_16[] = { 2, (mode_info_stride << 1) - 2, 2,
+                            -(mode_info_stride << 1) - 2 };
+  const int offset[] = { 1, mode_info_stride - 1, 1, -mode_info_stride - 1 };
+
+  // Following variables represent shifts to position the current block
+  // mask over the appropriate block. A shift of 36 to the left will move
+  // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left
+  // 4 rows to the appropriate spot.
+  const int shift_32_y[] = { 0, 4, 32, 36 };
+  const int shift_16_y[] = { 0, 2, 16, 18 };
+  const int shift_8_y[] = { 0, 1, 8, 9 };
+  const int shift_32_uv[] = { 0, 2, 8, 10 };
+  const int shift_16_uv[] = { 0, 1, 4, 5 };
+  const int max_rows =
+      (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? cm->mi_rows - mi_row
+                                            : MI_BLOCK_SIZE);
+  const int max_cols =
+      (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? cm->mi_cols - mi_col
+                                            : MI_BLOCK_SIZE);
+
+  vp9_zero(*lfm);
+  assert(mip[0] != NULL);
+
+  switch (mip[0]->sb_type) {
+    case BLOCK_64X64: build_masks(lfi_n, mip[0], 0, 0, lfm); break;
+    case BLOCK_64X32:
+      build_masks(lfi_n, mip[0], 0, 0, lfm);
+      mip2 = mip + mode_info_stride * 4;
+      if (4 >= max_rows) break;
+      build_masks(lfi_n, mip2[0], 32, 8, lfm);
+      break;
+    case BLOCK_32X64:
+      build_masks(lfi_n, mip[0], 0, 0, lfm);
+      mip2 = mip + 4;
+      if (4 >= max_cols) break;
+      build_masks(lfi_n, mip2[0], 4, 2, lfm);
+      break;
+    default:
+      for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
+        const int shift_y_32 = shift_32_y[idx_32];
+        const int shift_uv_32 = shift_32_uv[idx_32];
+        const int mi_32_col_offset = ((idx_32 & 1) << 2);
+        const int mi_32_row_offset = ((idx_32 >> 1) << 2);
+        if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
+          continue;
+        switch (mip[0]->sb_type) {
+          case BLOCK_32X32:
+            build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
+            break;
+          case BLOCK_32X16:
+            build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
+            if (mi_32_row_offset + 2 >= max_rows) continue;
+            mip2 = mip + mode_info_stride * 2;
+            build_masks(lfi_n, mip2[0], shift_y_32 + 16, shift_uv_32 + 4, lfm);
+            break;
+          case BLOCK_16X32:
+            build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
+            if (mi_32_col_offset + 2 >= max_cols) continue;
+            mip2 = mip + 2;
+            build_masks(lfi_n, mip2[0], shift_y_32 + 2, shift_uv_32 + 1, lfm);
+            break;
+          default:
+            for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
+              const int shift_y_16 = shift_y_32 + shift_16_y[idx_16];
+              const int shift_uv_16 = shift_uv_32 + shift_16_uv[idx_16];
+              const int mi_16_col_offset =
+                  mi_32_col_offset + ((idx_16 & 1) << 1);
+              const int mi_16_row_offset =
+                  mi_32_row_offset + ((idx_16 >> 1) << 1);
+
+              if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows)
+                continue;
+
+              switch (mip[0]->sb_type) {
+                case BLOCK_16X16:
+                  build_masks(lfi_n, mip[0], shift_y_16, shift_uv_16, lfm);
+                  break;
+                case BLOCK_16X8:
+                  build_masks(lfi_n, mip[0], shift_y_16, shift_uv_16, lfm);
+                  if (mi_16_row_offset + 1 >= max_rows) continue;
+                  mip2 = mip + mode_info_stride;
+                  build_y_mask(lfi_n, mip2[0], shift_y_16 + 8, lfm);
+                  break;
+                case BLOCK_8X16:
+                  build_masks(lfi_n, mip[0], shift_y_16, shift_uv_16, lfm);
+                  if (mi_16_col_offset + 1 >= max_cols) continue;
+                  mip2 = mip + 1;
+                  build_y_mask(lfi_n, mip2[0], shift_y_16 + 1, lfm);
+                  break;
+                default: {
+                  const int shift_y_8_0 = shift_y_16 + shift_8_y[0];
+                  build_masks(lfi_n, mip[0], shift_y_8_0, shift_uv_16, lfm);
+                  mip += offset[0];
+                  for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
+                    const int shift_y_8 = shift_y_16 + shift_8_y[idx_8];
+                    const int mi_8_col_offset =
+                        mi_16_col_offset + ((idx_8 & 1));
+                    const int mi_8_row_offset =
+                        mi_16_row_offset + ((idx_8 >> 1));
+
+                    if (mi_8_col_offset >= max_cols ||
+                        mi_8_row_offset >= max_rows)
+                      continue;
+                    build_y_mask(lfi_n, mip[0], shift_y_8, lfm);
+                  }
+                  break;
+                }
+              }
+            }
+            break;
+        }
+      }
+      break;
+  }
+}
+
+static void filter_selectively_vert(
+    uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
+    unsigned int mask_4x4, unsigned int mask_4x4_int,
+    const loop_filter_thresh *lfthr, const uint8_t *lfl) {
+  unsigned int mask;
+
+  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
+       mask >>= 1) {
+    const loop_filter_thresh *lfi = lfthr + *lfl;
+
+    if (mask & 1) {
+      if (mask_16x16 & 1) {
+        vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+      } else if (mask_8x8 & 1) {
+        vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+      } else if (mask_4x4 & 1) {
+        vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+      }
+    }
+    if (mask_4x4_int & 1)
+      vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+    s += 8;
+    lfl += 1;
+    mask_16x16 >>= 1;
+    mask_8x8 >>= 1;
+    mask_4x4 >>= 1;
+    mask_4x4_int >>= 1;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_filter_selectively_vert(
+    uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
+    unsigned int mask_4x4, unsigned int mask_4x4_int,
+    const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) {
+  unsigned int mask;
+
+  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
+       mask >>= 1) {
+    const loop_filter_thresh *lfi = lfthr + *lfl;
+
+    if (mask & 1) {
+      if (mask_16x16 & 1) {
+        vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+                                   bd);
+      } else if (mask_8x8 & 1) {
+        vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+                                  bd);
+      } else if (mask_4x4 & 1) {
+        vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+                                  bd);
+      }
+    }
+    if (mask_4x4_int & 1)
+      vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
+                                lfi->hev_thr, bd);
+    s += 8;
+    lfl += 1;
+    mask_16x16 >>= 1;
+    mask_8x8 >>= 1;
+    mask_4x4 >>= 1;
+    mask_4x4_int >>= 1;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+void vp9_filter_block_plane_non420(VP9_COMMON *cm,
+                                   struct macroblockd_plane *plane,
+                                   MODE_INFO **mi_8x8, int mi_row, int mi_col) {
+  const int ss_x = plane->subsampling_x;
+  const int ss_y = plane->subsampling_y;
+  const int row_step = 1 << ss_y;
+  const int col_step = 1 << ss_x;
+  const int row_step_stride = cm->mi_stride * row_step;
+  struct buf_2d *const dst = &plane->dst;
+  uint8_t *const dst0 = dst->buf;
+  unsigned int mask_16x16[MI_BLOCK_SIZE];
+  unsigned int mask_8x8[MI_BLOCK_SIZE];
+  unsigned int mask_4x4[MI_BLOCK_SIZE];
+  unsigned int mask_4x4_int[MI_BLOCK_SIZE];
+  uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE];
+  int r, c;
+
+  vp9_zero(mask_16x16);
+  vp9_zero(mask_8x8);
+  vp9_zero(mask_4x4);
+  vp9_zero(mask_4x4_int);
+  vp9_zero(lfl);
+
+  for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
+    unsigned int mask_16x16_c = 0;
+    unsigned int mask_8x8_c = 0;
+    unsigned int mask_4x4_c = 0;
+    unsigned int border_mask;
+
+    // Determine the vertical edges that need filtering
+    for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
+      const MODE_INFO *mi = mi_8x8[c];
+      const BLOCK_SIZE sb_type = mi[0].sb_type;
+      const int skip_this = mi[0].skip && is_inter_block(mi);
+      // left edge of current unit is block/partition edge -> no skip
+      const int block_edge_left =
+          (num_4x4_blocks_wide_lookup[sb_type] > 1)
+              ? !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1))
+              : 1;
+      const int skip_this_c = skip_this && !block_edge_left;
+      // top edge of current unit is block/partition edge -> no skip
+      const int block_edge_above =
+          (num_4x4_blocks_high_lookup[sb_type] > 1)
+              ? !(r & (num_8x8_blocks_high_lookup[sb_type] - 1))
+              : 1;
+      const int skip_this_r = skip_this && !block_edge_above;
+      const TX_SIZE tx_size = get_uv_tx_size(mi, plane);
+      const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
+      const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
+
+      // Filter level can vary per MI
+      if (!(lfl[(r << 3) + (c >> ss_x)] = get_filter_level(&cm->lf_info, mi)))
+        continue;
+
+      // Build masks based on the transform size of each block
+      if (tx_size == TX_32X32) {
+        if (!skip_this_c && ((c >> ss_x) & 3) == 0) {
+          if (!skip_border_4x4_c)
+            mask_16x16_c |= 1 << (c >> ss_x);
+          else
+            mask_8x8_c |= 1 << (c >> ss_x);
+        }
+        if (!skip_this_r && ((r >> ss_y) & 3) == 0) {
+          if (!skip_border_4x4_r)
+            mask_16x16[r] |= 1 << (c >> ss_x);
+          else
+            mask_8x8[r] |= 1 << (c >> ss_x);
+        }
+      } else if (tx_size == TX_16X16) {
+        if (!skip_this_c && ((c >> ss_x) & 1) == 0) {
+          if (!skip_border_4x4_c)
+            mask_16x16_c |= 1 << (c >> ss_x);
+          else
+            mask_8x8_c |= 1 << (c >> ss_x);
+        }
+        if (!skip_this_r && ((r >> ss_y) & 1) == 0) {
+          if (!skip_border_4x4_r)
+            mask_16x16[r] |= 1 << (c >> ss_x);
+          else
+            mask_8x8[r] |= 1 << (c >> ss_x);
+        }
+      } else {
+        // force 8x8 filtering on 32x32 boundaries
+        if (!skip_this_c) {
+          if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0)
+            mask_8x8_c |= 1 << (c >> ss_x);
+          else
+            mask_4x4_c |= 1 << (c >> ss_x);
+        }
+
+        if (!skip_this_r) {
+          if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0)
+            mask_8x8[r] |= 1 << (c >> ss_x);
+          else
+            mask_4x4[r] |= 1 << (c >> ss_x);
+        }
+
+        if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c)
+          mask_4x4_int[r] |= 1 << (c >> ss_x);
+      }
+    }
+
+    // Disable filtering on the leftmost column
+    border_mask = ~(mi_col == 0 ? 1u : 0u);
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cm->use_highbitdepth) {
+      highbd_filter_selectively_vert(
+          CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
+          mask_16x16_c & border_mask, mask_8x8_c & border_mask,
+          mask_4x4_c & border_mask, mask_4x4_int[r], cm->lf_info.lfthr,
+          &lfl[r << 3], (int)cm->bit_depth);
+    } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+      filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask,
+                              mask_8x8_c & border_mask,
+                              mask_4x4_c & border_mask, mask_4x4_int[r],
+                              cm->lf_info.lfthr, &lfl[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
+    }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    dst->buf += 8 * dst->stride;
+    mi_8x8 += row_step_stride;
+  }
+
+  // Now do horizontal pass
+  dst->buf = dst0;
+  for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
+    const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
+    const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
+
+    unsigned int mask_16x16_r;
+    unsigned int mask_8x8_r;
+    unsigned int mask_4x4_r;
+
+    if (mi_row + r == 0) {
+      mask_16x16_r = 0;
+      mask_8x8_r = 0;
+      mask_4x4_r = 0;
+    } else {
+      mask_16x16_r = mask_16x16[r];
+      mask_8x8_r = mask_8x8[r];
+      mask_4x4_r = mask_4x4[r];
+    }
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cm->use_highbitdepth) {
+      highbd_filter_selectively_horiz(
+          CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
+          mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, &lfl[r << 3],
+          (int)cm->bit_depth);
+    } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+      filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+                               mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr,
+                               &lfl[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
+    }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    dst->buf += 8 * dst->stride;
+  }
+}
+
+void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
+                                 struct macroblockd_plane *const plane,
+                                 int mi_row, LOOP_FILTER_MASK *lfm) {
+  struct buf_2d *const dst = &plane->dst;
+  uint8_t *const dst0 = dst->buf;
+  int r;
+  uint64_t mask_16x16 = lfm->left_y[TX_16X16];
+  uint64_t mask_8x8 = lfm->left_y[TX_8X8];
+  uint64_t mask_4x4 = lfm->left_y[TX_4X4];
+  uint64_t mask_4x4_int = lfm->int_4x4_y;
+
+  assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
+
+  // Vertical pass: do 2 rows at one time
+  for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cm->use_highbitdepth) {
+      // Disable filtering on the leftmost column.
+      highbd_filter_selectively_vert_row2(
+          plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
+          (unsigned int)mask_16x16, (unsigned int)mask_8x8,
+          (unsigned int)mask_4x4, (unsigned int)mask_4x4_int, cm->lf_info.lfthr,
+          &lfm->lfl_y[r << 3], (int)cm->bit_depth);
+    } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+      // Disable filtering on the leftmost column.
+      filter_selectively_vert_row2(
+          plane->subsampling_x, dst->buf, dst->stride, (unsigned int)mask_16x16,
+          (unsigned int)mask_8x8, (unsigned int)mask_4x4,
+          (unsigned int)mask_4x4_int, cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
+    }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    dst->buf += 16 * dst->stride;
+    mask_16x16 >>= 16;
+    mask_8x8 >>= 16;
+    mask_4x4 >>= 16;
+    mask_4x4_int >>= 16;
+  }
+
+  // Horizontal pass
+  dst->buf = dst0;
+  mask_16x16 = lfm->above_y[TX_16X16];
+  mask_8x8 = lfm->above_y[TX_8X8];
+  mask_4x4 = lfm->above_y[TX_4X4];
+  mask_4x4_int = lfm->int_4x4_y;
+
+  for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) {
+    unsigned int mask_16x16_r;
+    unsigned int mask_8x8_r;
+    unsigned int mask_4x4_r;
+
+    if (mi_row + r == 0) {
+      mask_16x16_r = 0;
+      mask_8x8_r = 0;
+      mask_4x4_r = 0;
+    } else {
+      mask_16x16_r = mask_16x16 & 0xff;
+      mask_8x8_r = mask_8x8 & 0xff;
+      mask_4x4_r = mask_4x4 & 0xff;
+    }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cm->use_highbitdepth) {
+      highbd_filter_selectively_horiz(
+          CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
+          mask_4x4_r, mask_4x4_int & 0xff, cm->lf_info.lfthr,
+          &lfm->lfl_y[r << 3], (int)cm->bit_depth);
+    } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+      filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+                               mask_4x4_r, mask_4x4_int & 0xff,
+                               cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
+    }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+    dst->buf += 8 * dst->stride;
+    mask_16x16 >>= 8;
+    mask_8x8 >>= 8;
+    mask_4x4 >>= 8;
+    mask_4x4_int >>= 8;
+  }
+}
+
+void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
+                                 struct macroblockd_plane *const plane,
+                                 int mi_row, LOOP_FILTER_MASK *lfm) {
+  struct buf_2d *const dst = &plane->dst;
+  uint8_t *const dst0 = dst->buf;
+  int r, c;
+  uint8_t lfl_uv[16];
+
+  uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
+  uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
+  uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
+  uint16_t mask_4x4_int = lfm->int_4x4_uv;
+
+  vp9_zero(lfl_uv);
+
+  assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
+
+  // Vertical pass: do 2 rows at one time
+  for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) {
+    for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) {
+      lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)];
+      lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
+    }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cm->use_highbitdepth) {
+      // Disable filtering on the leftmost column.
+      highbd_filter_selectively_vert_row2(
+          plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
+          (unsigned int)mask_16x16, (unsigned int)mask_8x8,
+          (unsigned int)mask_4x4, (unsigned int)mask_4x4_int, cm->lf_info.lfthr,
+          &lfl_uv[r << 1], (int)cm->bit_depth);
+    } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+      // Disable filtering on the leftmost column.
+      filter_selectively_vert_row2(
+          plane->subsampling_x, dst->buf, dst->stride, (unsigned int)mask_16x16,
+          (unsigned int)mask_8x8, (unsigned int)mask_4x4,
+          (unsigned int)mask_4x4_int, cm->lf_info.lfthr, &lfl_uv[r << 1]);
+#if CONFIG_VP9_HIGHBITDEPTH
+    }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+    dst->buf += 16 * dst->stride;
+    mask_16x16 >>= 8;
+    mask_8x8 >>= 8;
+    mask_4x4 >>= 8;
+    mask_4x4_int >>= 8;
+  }
+
+  // Horizontal pass
+  dst->buf = dst0;
+  mask_16x16 = lfm->above_uv[TX_16X16];
+  mask_8x8 = lfm->above_uv[TX_8X8];
+  mask_4x4 = lfm->above_uv[TX_4X4];
+  mask_4x4_int = lfm->int_4x4_uv;
+
+  for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
+    const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
+    const unsigned int mask_4x4_int_r =
+        skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf);
+    unsigned int mask_16x16_r;
+    unsigned int mask_8x8_r;
+    unsigned int mask_4x4_r;
+
+    if (mi_row + r == 0) {
+      mask_16x16_r = 0;
+      mask_8x8_r = 0;
+      mask_4x4_r = 0;
+    } else {
+      mask_16x16_r = mask_16x16 & 0xf;
+      mask_8x8_r = mask_8x8 & 0xf;
+      mask_4x4_r = mask_4x4 & 0xf;
+    }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cm->use_highbitdepth) {
+      highbd_filter_selectively_horiz(
+          CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
+          mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, &lfl_uv[r << 1],
+          (int)cm->bit_depth);
+    } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+      filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+                               mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr,
+                               &lfl_uv[r << 1]);
+#if CONFIG_VP9_HIGHBITDEPTH
+    }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+    dst->buf += 8 * dst->stride;
+    mask_16x16 >>= 4;
+    mask_8x8 >>= 4;
+    mask_4x4 >>= 4;
+    mask_4x4_int >>= 4;
+  }
+}
+
+static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm,
+                             struct macroblockd_plane planes[MAX_MB_PLANE],
+                             int start, int stop, int y_only) {
+  const int num_planes = y_only ? 1 : MAX_MB_PLANE;
+  enum lf_path path;
+  int mi_row, mi_col;
+
+  if (y_only)
+    path = LF_PATH_444;
+  else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
+    path = LF_PATH_420;
+  else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
+    path = LF_PATH_444;
+  else
+    path = LF_PATH_SLOW;
+
+  for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
+    MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
+    LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0);
+
+    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) {
+      int plane;
+
+      vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
+
+      // TODO(jimbankoski): For 444 only need to do y mask.
+      vp9_adjust_mask(cm, mi_row, mi_col, lfm);
+
+      vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm);
+      for (plane = 1; plane < num_planes; ++plane) {
+        switch (path) {
+          case LF_PATH_420:
+            vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm);
+            break;
+          case LF_PATH_444:
+            vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm);
+            break;
+          case LF_PATH_SLOW:
+            vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
+                                          mi_row, mi_col);
+            break;
+        }
+      }
+    }
+  }
+}
+
+void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm,
+                           MACROBLOCKD *xd, int frame_filter_level, int y_only,
+                           int partial_frame) {
+  int start_mi_row, end_mi_row, mi_rows_to_filter;
+  if (!frame_filter_level) return;
+  start_mi_row = 0;
+  mi_rows_to_filter = cm->mi_rows;
+  if (partial_frame && cm->mi_rows > 8) {
+    start_mi_row = cm->mi_rows >> 1;
+    start_mi_row &= 0xfffffff8;
+    mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
+  }
+  end_mi_row = start_mi_row + mi_rows_to_filter;
+  loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only);
+}
+
+// Used by the encoder to build the loopfilter masks.
+// TODO(slavarnway): Do the encoder the same way the decoder does it and
+//                   build the masks in line as part of the encode process.
+void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level,
+                          int partial_frame) {
+  int start_mi_row, end_mi_row, mi_rows_to_filter;
+  int mi_col, mi_row;
+  if (!frame_filter_level) return;
+  start_mi_row = 0;
+  mi_rows_to_filter = cm->mi_rows;
+  if (partial_frame && cm->mi_rows > 8) {
+    start_mi_row = cm->mi_rows >> 1;
+    start_mi_row &= 0xfffffff8;
+    mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
+  }
+  end_mi_row = start_mi_row + mi_rows_to_filter;
+
+  vp9_loop_filter_frame_init(cm, frame_filter_level);
+
+  for (mi_row = start_mi_row; mi_row < end_mi_row; mi_row += MI_BLOCK_SIZE) {
+    MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
+    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
+      // vp9_setup_mask() zeros lfm
+      vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
+                     get_lfm(&cm->lf, mi_row, mi_col));
+    }
+  }
+}
+
+// 8x8 blocks in a superblock.  A "1" represents the first block in a 16x16
+// or greater area.
+static const uint8_t first_block_in_16x16[8][8] = {
+  { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 },
+  { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 },
+  { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 },
+  { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 }
+};
+
+// This function sets up the bit masks for a block represented
+// by mi_row, mi_col in a 64x64 region.
+// TODO(SJL): This function only works for yv12.
+void vp9_build_mask(VP9_COMMON *cm, const MODE_INFO *mi, int mi_row, int mi_col,
+                    int bw, int bh) {
+  const BLOCK_SIZE block_size = mi->sb_type;
+  const TX_SIZE tx_size_y = mi->tx_size;
+  const loop_filter_info_n *const lfi_n = &cm->lf_info;
+  const int filter_level = get_filter_level(lfi_n, mi);
+  const TX_SIZE tx_size_uv = uv_txsize_lookup[block_size][tx_size_y][1][1];
+  LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col);
+  uint64_t *const left_y = &lfm->left_y[tx_size_y];
+  uint64_t *const above_y = &lfm->above_y[tx_size_y];
+  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
+  uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
+  uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
+  uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
+  const int row_in_sb = (mi_row & 7);
+  const int col_in_sb = (mi_col & 7);
+  const int shift_y = col_in_sb + (row_in_sb << 3);
+  const int shift_uv = (col_in_sb >> 1) + ((row_in_sb >> 1) << 2);
+  const int build_uv = first_block_in_16x16[row_in_sb][col_in_sb];
+
+  if (!filter_level) {
+    return;
+  } else {
+    int index = shift_y;
+    int i;
+    for (i = 0; i < bh; i++) {
+      memset(&lfm->lfl_y[index], filter_level, bw);
+      index += 8;
+    }
+  }
+
+  // These set 1 in the current block size for the block size edges.
+  // For instance if the block size is 32x16, we'll set:
+  //    above =   1111
+  //              0000
+  //    and
+  //    left  =   1000
+  //          =   1000
+  // NOTE : In this example the low bit is left most ( 1000 ) is stored as
+  //        1,  not 8...
+  //
+  // U and V set things on a 16 bit scale.
+  //
+  *above_y |= above_prediction_mask[block_size] << shift_y;
+  *left_y |= left_prediction_mask[block_size] << shift_y;
+
+  if (build_uv) {
+    *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
+    *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
+  }
+
+  // If the block has no coefficients and is not intra we skip applying
+  // the loop filter on block edges.
+  if (mi->skip && is_inter_block(mi)) return;
+
+  // Add a mask for the transform size. The transform size mask is set to
+  // be correct for a 64x64 prediction block size. Mask to match the size of
+  // the block we are working on and then shift it into place.
+  *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y])
+              << shift_y;
+  *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y])
+             << shift_y;
+
+  if (build_uv) {
+    *above_uv |=
+        (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv])
+        << shift_uv;
+
+    *left_uv |=
+        (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv])
+        << shift_uv;
+  }
+
+  // Try to determine what to do with the internal 4x4 block boundaries.  These
+  // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the
+  // internal ones can be skipped and don't depend on the prediction block size.
+  if (tx_size_y == TX_4X4) *int_4x4_y |= size_mask[block_size] << shift_y;
+
+  if (build_uv && tx_size_uv == TX_4X4)
+    *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
+}
+
+void vp9_loop_filter_data_reset(
+    LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer,
+    struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) {
+  lf_data->frame_buffer = frame_buffer;
+  lf_data->cm = cm;
+  lf_data->start = 0;
+  lf_data->stop = 0;
+  lf_data->y_only = 0;
+  memcpy(lf_data->planes, planes, sizeof(lf_data->planes));
+}
+
+void vp9_reset_lfm(VP9_COMMON *const cm) {
+  if (cm->lf.filter_level) {
+    memset(cm->lf.lfm, 0,
+           ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride *
+               sizeof(*cm->lf.lfm));
+  }
+}
+
+int vp9_loop_filter_worker(void *arg1, void *unused) {
+  LFWorkerData *const lf_data = (LFWorkerData *)arg1;
+  (void)unused;
+  loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+                   lf_data->start, lf_data->stop, lf_data->y_only);
+  return 1;
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_loopfilter.h b/media/libvpx/libvpx/vp9/common/vp9_loopfilter.h
new file mode 100644
index 0000000000..39648a72c3
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_loopfilter.h
@@ -0,0 +1,160 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_LOOPFILTER_H_
+#define VPX_VP9_COMMON_VP9_LOOPFILTER_H_
+
+#include "vpx_ports/mem.h"
+#include "./vpx_config.h"
+
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_seg_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_LOOP_FILTER 63
+#define MAX_SHARPNESS 7
+
+#define SIMD_WIDTH 16
+
+#define MAX_REF_LF_DELTAS 4
+#define MAX_MODE_LF_DELTAS 2
+
+enum lf_path {
+  LF_PATH_420,
+  LF_PATH_444,
+  LF_PATH_SLOW,
+};
+
+// Need to align this structure so when it is declared and
+// passed it can be loaded into vector registers.
+typedef struct {
+  DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]);
+  DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]);
+  DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]);
+} loop_filter_thresh;
+
+typedef struct {
+  loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1];
+  uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
+} loop_filter_info_n;
+
+// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
+// Each 1 bit represents a position in which we want to apply the loop filter.
+// Left_ entries refer to whether we apply a filter on the border to the
+// left of the block.   Above_ entries refer to whether or not to apply a
+// filter on the above border.   Int_ entries refer to whether or not to
+// apply borders on the 4x4 edges within the 8x8 block that each bit
+// represents.
+// Since each transform is accompanied by a potentially different type of
+// loop filter there is a different entry in the array for each transform size.
+typedef struct {
+  uint64_t left_y[TX_SIZES];
+  uint64_t above_y[TX_SIZES];
+  uint64_t int_4x4_y;
+  uint16_t left_uv[TX_SIZES];
+  uint16_t above_uv[TX_SIZES];
+  uint16_t int_4x4_uv;
+  uint8_t lfl_y[64];
+} LOOP_FILTER_MASK;
+
+struct loopfilter {
+  int filter_level;
+  int last_filt_level;
+
+  int sharpness_level;
+  int last_sharpness_level;
+
+  uint8_t mode_ref_delta_enabled;
+  uint8_t mode_ref_delta_update;
+
+  // 0 = Intra, Last, GF, ARF
+  signed char ref_deltas[MAX_REF_LF_DELTAS];
+  signed char last_ref_deltas[MAX_REF_LF_DELTAS];
+
+  // 0 = ZERO_MV, MV
+  signed char mode_deltas[MAX_MODE_LF_DELTAS];
+  signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
+
+  LOOP_FILTER_MASK *lfm;
+  int lfm_stride;
+};
+
+/* assorted loopfilter functions which get used elsewhere */
+struct VP9Common;
+struct macroblockd;
+struct VP9LfSyncData;
+
+// This function sets up the bit masks for the entire 64x64 region represented
+// by mi_row, mi_col.
+void vp9_setup_mask(struct VP9Common *const cm, const int mi_row,
+                    const int mi_col, MODE_INFO **mi8x8,
+                    const int mode_info_stride, LOOP_FILTER_MASK *lfm);
+
+void vp9_filter_block_plane_ss00(struct VP9Common *const cm,
+                                 struct macroblockd_plane *const plane,
+                                 int mi_row, LOOP_FILTER_MASK *lfm);
+
+void vp9_filter_block_plane_ss11(struct VP9Common *const cm,
+                                 struct macroblockd_plane *const plane,
+                                 int mi_row, LOOP_FILTER_MASK *lfm);
+
+void vp9_filter_block_plane_non420(struct VP9Common *cm,
+                                   struct macroblockd_plane *plane,
+                                   MODE_INFO **mi_8x8, int mi_row, int mi_col);
+
+void vp9_loop_filter_init(struct VP9Common *cm);
+
+// Update the loop filter for the current frame.
+// This should be called before vp9_loop_filter_frame(), vp9_build_mask_frame()
+// calls this function directly.
+void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl);
+
+void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm,
+                           struct macroblockd *xd, int frame_filter_level,
+                           int y_only, int partial_frame);
+
+// Get the superblock lfm for a given mi_row, mi_col.
+static INLINE LOOP_FILTER_MASK *get_lfm(const struct loopfilter *lf,
+                                        const int mi_row, const int mi_col) {
+  return &lf->lfm[(mi_col >> 3) + ((mi_row >> 3) * lf->lfm_stride)];
+}
+
+void vp9_build_mask(struct VP9Common *cm, const MODE_INFO *mi, int mi_row,
+                    int mi_col, int bw, int bh);
+void vp9_adjust_mask(struct VP9Common *const cm, const int mi_row,
+                     const int mi_col, LOOP_FILTER_MASK *lfm);
+void vp9_build_mask_frame(struct VP9Common *cm, int frame_filter_level,
+                          int partial_frame);
+void vp9_reset_lfm(struct VP9Common *const cm);
+
+typedef struct LoopFilterWorkerData {
+  YV12_BUFFER_CONFIG *frame_buffer;
+  struct VP9Common *cm;
+  struct macroblockd_plane planes[MAX_MB_PLANE];
+
+  int start;
+  int stop;
+  int y_only;
+} LFWorkerData;
+
+void vp9_loop_filter_data_reset(
+    LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer,
+    struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]);
+
+// Operates on the rows described by 'arg1' (cast to LFWorkerData *).
+int vp9_loop_filter_worker(void *arg1, void *unused);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_LOOPFILTER_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_mfqe.c b/media/libvpx/libvpx/vp9/common/vp9_mfqe.c
new file mode 100644
index 0000000000..e76d771b8d
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_mfqe.c
@@ -0,0 +1,383 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_scale_rtcd.h"
+
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_postproc.h"
+
+// TODO(jackychen): Replace this function with SSE2 code. There is
+// one SSE2 implementation in vp8, so will consider how to share it
+// between vp8 and vp9.
+static void filter_by_weight(const uint8_t *src, int src_stride, uint8_t *dst,
+                             int dst_stride, int block_size, int src_weight) {
+  const int dst_weight = (1 << MFQE_PRECISION) - src_weight;
+  const int rounding_bit = 1 << (MFQE_PRECISION - 1);
+  int r, c;
+
+  for (r = 0; r < block_size; r++) {
+    for (c = 0; c < block_size; c++) {
+      dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit) >>
+               MFQE_PRECISION;
+    }
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+void vp9_filter_by_weight8x8_c(const uint8_t *src, int src_stride, uint8_t *dst,
+                               int dst_stride, int src_weight) {
+  filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
+}
+
+void vp9_filter_by_weight16x16_c(const uint8_t *src, int src_stride,
+                                 uint8_t *dst, int dst_stride, int src_weight) {
+  filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
+}
+
+static void filter_by_weight32x32(const uint8_t *src, int src_stride,
+                                  uint8_t *dst, int dst_stride, int weight) {
+  vp9_filter_by_weight16x16(src, src_stride, dst, dst_stride, weight);
+  vp9_filter_by_weight16x16(src + 16, src_stride, dst + 16, dst_stride, weight);
+  vp9_filter_by_weight16x16(src + src_stride * 16, src_stride,
+                            dst + dst_stride * 16, dst_stride, weight);
+  vp9_filter_by_weight16x16(src + src_stride * 16 + 16, src_stride,
+                            dst + dst_stride * 16 + 16, dst_stride, weight);
+}
+
+static void filter_by_weight64x64(const uint8_t *src, int src_stride,
+                                  uint8_t *dst, int dst_stride, int weight) {
+  filter_by_weight32x32(src, src_stride, dst, dst_stride, weight);
+  filter_by_weight32x32(src + 32, src_stride, dst + 32, dst_stride, weight);
+  filter_by_weight32x32(src + src_stride * 32, src_stride,
+                        dst + dst_stride * 32, dst_stride, weight);
+  filter_by_weight32x32(src + src_stride * 32 + 32, src_stride,
+                        dst + dst_stride * 32 + 32, dst_stride, weight);
+}
+
+static void apply_ifactor(const uint8_t *y, int y_stride, uint8_t *yd,
+                          int yd_stride, const uint8_t *u, const uint8_t *v,
+                          int uv_stride, uint8_t *ud, uint8_t *vd,
+                          int uvd_stride, BLOCK_SIZE block_size, int weight) {
+  if (block_size == BLOCK_16X16) {
+    vp9_filter_by_weight16x16(y, y_stride, yd, yd_stride, weight);
+    vp9_filter_by_weight8x8(u, uv_stride, ud, uvd_stride, weight);
+    vp9_filter_by_weight8x8(v, uv_stride, vd, uvd_stride, weight);
+  } else if (block_size == BLOCK_32X32) {
+    filter_by_weight32x32(y, y_stride, yd, yd_stride, weight);
+    vp9_filter_by_weight16x16(u, uv_stride, ud, uvd_stride, weight);
+    vp9_filter_by_weight16x16(v, uv_stride, vd, uvd_stride, weight);
+  } else if (block_size == BLOCK_64X64) {
+    filter_by_weight64x64(y, y_stride, yd, yd_stride, weight);
+    filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight);
+    filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight);
+  }
+}
+
+// TODO(jackychen): Determine whether replace it with assembly code.
+static void copy_mem8x8(const uint8_t *src, int src_stride, uint8_t *dst,
+                        int dst_stride) {
+  int r;
+  for (r = 0; r < 8; r++) {
+    memcpy(dst, src, 8);
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+static void copy_mem16x16(const uint8_t *src, int src_stride, uint8_t *dst,
+                          int dst_stride) {
+  int r;
+  for (r = 0; r < 16; r++) {
+    memcpy(dst, src, 16);
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+static void copy_mem32x32(const uint8_t *src, int src_stride, uint8_t *dst,
+                          int dst_stride) {
+  copy_mem16x16(src, src_stride, dst, dst_stride);
+  copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride);
+  copy_mem16x16(src + src_stride * 16, src_stride, dst + dst_stride * 16,
+                dst_stride);
+  copy_mem16x16(src + src_stride * 16 + 16, src_stride,
+                dst + dst_stride * 16 + 16, dst_stride);
+}
+
+static void copy_mem64x64(const uint8_t *src, int src_stride, uint8_t *dst,
+                          int dst_stride) {
+  copy_mem32x32(src, src_stride, dst, dst_stride);
+  copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride);
+  copy_mem32x32(src + src_stride * 32, src_stride, dst + src_stride * 32,
+                dst_stride);
+  copy_mem32x32(src + src_stride * 32 + 32, src_stride,
+                dst + src_stride * 32 + 32, dst_stride);
+}
+
+static void copy_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
+                       int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud,
+                       uint8_t *vd, int yd_stride, int uvd_stride,
+                       BLOCK_SIZE bs) {
+  if (bs == BLOCK_16X16) {
+    copy_mem16x16(y, y_stride, yd, yd_stride);
+    copy_mem8x8(u, uv_stride, ud, uvd_stride);
+    copy_mem8x8(v, uv_stride, vd, uvd_stride);
+  } else if (bs == BLOCK_32X32) {
+    copy_mem32x32(y, y_stride, yd, yd_stride);
+    copy_mem16x16(u, uv_stride, ud, uvd_stride);
+    copy_mem16x16(v, uv_stride, vd, uvd_stride);
+  } else {
+    copy_mem64x64(y, y_stride, yd, yd_stride);
+    copy_mem32x32(u, uv_stride, ud, uvd_stride);
+    copy_mem32x32(v, uv_stride, vd, uvd_stride);
+  }
+}
+
+static void get_thr(BLOCK_SIZE bs, int qdiff, int *sad_thr, int *vdiff_thr) {
+  const int adj = qdiff >> MFQE_PRECISION;
+  if (bs == BLOCK_16X16) {
+    *sad_thr = 7 + adj;
+  } else if (bs == BLOCK_32X32) {
+    *sad_thr = 6 + adj;
+  } else {  // BLOCK_64X64
+    *sad_thr = 5 + adj;
+  }
+  *vdiff_thr = 125 + qdiff;
+}
+
+static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u,
+                       const uint8_t *v, int y_stride, int uv_stride,
+                       uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride,
+                       int uvd_stride, int qdiff) {
+  int sad, sad_thr, vdiff, vdiff_thr;
+  uint32_t sse;
+
+  get_thr(bs, qdiff, &sad_thr, &vdiff_thr);
+
+  if (bs == BLOCK_16X16) {
+    vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;
+    sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
+  } else if (bs == BLOCK_32X32) {
+    vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;
+    sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10;
+  } else /* if (bs == BLOCK_64X64) */ {
+    vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;
+    sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12;
+  }
+
+  // vdiff > sad * 3 means vdiff should not be too small, otherwise,
+  // it might be a lighting change in smooth area. When there is a
+  // lighting change in smooth area, it is dangerous to do MFQE.
+  if (sad > 1 && vdiff > sad * 3) {
+    const int weight = 1 << MFQE_PRECISION;
+    int ifactor = weight * sad * vdiff / (sad_thr * vdiff_thr);
+    // When ifactor equals weight, no MFQE is done.
+    if (ifactor > weight) {
+      ifactor = weight;
+    }
+    apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd,
+                  uvd_stride, bs, ifactor);
+  } else {
+    // Copy the block from current frame (i.e., no mfqe is done).
+    copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride, uvd_stride,
+               bs);
+  }
+}
+
+static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) {
+  // Check the motion in current block(for inter frame),
+  // or check the motion in the correlated block in last frame (for keyframe).
+  const int mv_len_square = mi->mv[0].as_mv.row * mi->mv[0].as_mv.row +
+                            mi->mv[0].as_mv.col * mi->mv[0].as_mv.col;
+  const int mv_threshold = 100;
+  return mi->mode >= NEARESTMV &&  // Not an intra block
+         cur_bs >= BLOCK_16X16 && mv_len_square <= mv_threshold;
+}
+
+// Process each partiton in a super block, recursively.
+static void mfqe_partition(VP9_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs,
+                           const uint8_t *y, const uint8_t *u, const uint8_t *v,
+                           int y_stride, int uv_stride, uint8_t *yd,
+                           uint8_t *ud, uint8_t *vd, int yd_stride,
+                           int uvd_stride) {
+  int mi_offset, y_offset, uv_offset;
+  const BLOCK_SIZE cur_bs = mi->sb_type;
+  const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex;
+  const int bsl = b_width_log2_lookup[bs];
+  PARTITION_TYPE partition = partition_lookup[bsl][cur_bs];
+  const BLOCK_SIZE subsize = get_subsize(bs, partition);
+
+  if (cur_bs < BLOCK_8X8) {
+    // If there are blocks smaller than 8x8, it must be on the boundary.
+    return;
+  }
+  // No MFQE on blocks smaller than 16x16
+  if (bs == BLOCK_16X16) {
+    partition = PARTITION_NONE;
+  }
+  if (bs == BLOCK_64X64) {
+    mi_offset = 4;
+    y_offset = 32;
+    uv_offset = 16;
+  } else {
+    mi_offset = 2;
+    y_offset = 16;
+    uv_offset = 8;
+  }
+  switch (partition) {
+    BLOCK_SIZE mfqe_bs, bs_tmp;
+    case PARTITION_HORZ:
+      if (bs == BLOCK_64X64) {
+        mfqe_bs = BLOCK_64X32;
+        bs_tmp = BLOCK_32X32;
+      } else {
+        mfqe_bs = BLOCK_32X16;
+        bs_tmp = BLOCK_16X16;
+      }
+      if (mfqe_decision(mi, mfqe_bs)) {
+        // Do mfqe on the first square partition.
+        mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
+                   uvd_stride, qdiff);
+        // Do mfqe on the second square partition.
+        mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, y_stride,
+                   uv_stride, yd + y_offset, ud + uv_offset, vd + uv_offset,
+                   yd_stride, uvd_stride, qdiff);
+      }
+      if (mfqe_decision(mi + mi_offset * cm->mi_stride, mfqe_bs)) {
+        // Do mfqe on the first square partition.
+        mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
+                   v + uv_offset * uv_stride, y_stride, uv_stride,
+                   yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
+                   vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
+        // Do mfqe on the second square partition.
+        mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
+                   u + uv_offset * uv_stride + uv_offset,
+                   v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride,
+                   yd + y_offset * yd_stride + y_offset,
+                   ud + uv_offset * uvd_stride + uv_offset,
+                   vd + uv_offset * uvd_stride + uv_offset, yd_stride,
+                   uvd_stride, qdiff);
+      }
+      break;
+    case PARTITION_VERT:
+      if (bs == BLOCK_64X64) {
+        mfqe_bs = BLOCK_32X64;
+        bs_tmp = BLOCK_32X32;
+      } else {
+        mfqe_bs = BLOCK_16X32;
+        bs_tmp = BLOCK_16X16;
+      }
+      if (mfqe_decision(mi, mfqe_bs)) {
+        // Do mfqe on the first square partition.
+        mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
+                   uvd_stride, qdiff);
+        // Do mfqe on the second square partition.
+        mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
+                   v + uv_offset * uv_stride, y_stride, uv_stride,
+                   yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
+                   vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
+      }
+      if (mfqe_decision(mi + mi_offset, mfqe_bs)) {
+        // Do mfqe on the first square partition.
+        mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, y_stride,
+                   uv_stride, yd + y_offset, ud + uv_offset, vd + uv_offset,
+                   yd_stride, uvd_stride, qdiff);
+        // Do mfqe on the second square partition.
+        mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
+                   u + uv_offset * uv_stride + uv_offset,
+                   v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride,
+                   yd + y_offset * yd_stride + y_offset,
+                   ud + uv_offset * uvd_stride + uv_offset,
+                   vd + uv_offset * uvd_stride + uv_offset, yd_stride,
+                   uvd_stride, qdiff);
+      }
+      break;
+    case PARTITION_NONE:
+      if (mfqe_decision(mi, cur_bs)) {
+        // Do mfqe on this partition.
+        mfqe_block(cur_bs, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
+                   uvd_stride, qdiff);
+      } else {
+        // Copy the block from current frame(i.e., no mfqe is done).
+        copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
+                   uvd_stride, bs);
+      }
+      break;
+    case PARTITION_SPLIT:
+      // Recursion on four square partitions, e.g. if bs is 64X64,
+      // then look into four 32X32 blocks in it.
+      mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd,
+                     yd_stride, uvd_stride);
+      mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset,
+                     v + uv_offset, y_stride, uv_stride, yd + y_offset,
+                     ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride);
+      mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize,
+                     y + y_offset * y_stride, u + uv_offset * uv_stride,
+                     v + uv_offset * uv_stride, y_stride, uv_stride,
+                     yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
+                     vd + uv_offset * uvd_stride, yd_stride, uvd_stride);
+      mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset, subsize,
+                     y + y_offset * y_stride + y_offset,
+                     u + uv_offset * uv_stride + uv_offset,
+                     v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride,
+                     yd + y_offset * yd_stride + y_offset,
+                     ud + uv_offset * uvd_stride + uv_offset,
+                     vd + uv_offset * uvd_stride + uv_offset, yd_stride,
+                     uvd_stride);
+      break;
+    default: assert(0);
+  }
+}
+
+void vp9_mfqe(VP9_COMMON *cm) {
+  int mi_row, mi_col;
+  // Current decoded frame.
+  const YV12_BUFFER_CONFIG *show = cm->frame_to_show;
+  // Last decoded frame and will store the MFQE result.
+  YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
+  // Loop through each super block.
+  for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) {
+    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
+      MODE_INFO *mi;
+      MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col);
+      // Motion Info in last frame.
+      MODE_INFO *mi_prev =
+          cm->postproc_state.prev_mi + (mi_row * cm->mi_stride + mi_col);
+      const uint32_t y_stride = show->y_stride;
+      const uint32_t uv_stride = show->uv_stride;
+      const uint32_t yd_stride = dest->y_stride;
+      const uint32_t uvd_stride = dest->uv_stride;
+      const uint32_t row_offset_y = mi_row << 3;
+      const uint32_t row_offset_uv = mi_row << 2;
+      const uint32_t col_offset_y = mi_col << 3;
+      const uint32_t col_offset_uv = mi_col << 2;
+      const uint8_t *y =
+          show->y_buffer + row_offset_y * y_stride + col_offset_y;
+      const uint8_t *u =
+          show->u_buffer + row_offset_uv * uv_stride + col_offset_uv;
+      const uint8_t *v =
+          show->v_buffer + row_offset_uv * uv_stride + col_offset_uv;
+      uint8_t *yd = dest->y_buffer + row_offset_y * yd_stride + col_offset_y;
+      uint8_t *ud = dest->u_buffer + row_offset_uv * uvd_stride + col_offset_uv;
+      uint8_t *vd = dest->v_buffer + row_offset_uv * uvd_stride + col_offset_uv;
+      if (frame_is_intra_only(cm)) {
+        mi = mi_prev;
+      } else {
+        mi = mi_local;
+      }
+      mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud,
+                     vd, yd_stride, uvd_stride);
+    }
+  }
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_mfqe.h b/media/libvpx/libvpx/vp9/common/vp9_mfqe.h
new file mode 100644
index 0000000000..f53e1c2f9d
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_mfqe.h
@@ -0,0 +1,31 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_MFQE_H_
+#define VPX_VP9_COMMON_VP9_MFQE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Multiframe Quality Enhancement.
+// The aim for MFQE is to replace pixel blocks in the current frame with
+// the correlated pixel blocks (with higher quality) in the last frame.
+// The replacement can only be taken in stationary blocks by checking
+// the motion of the blocks and other conditions such as the SAD of
+// the current block and correlated block, the variance of the block
+// difference, etc.
+void vp9_mfqe(struct VP9Common *cm);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_MFQE_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_mv.h b/media/libvpx/libvpx/vp9/common/vp9_mv.h
new file mode 100644
index 0000000000..76f93cf0ba
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_mv.h
@@ -0,0 +1,57 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_MV_H_
+#define VPX_VP9_COMMON_VP9_MV_H_
+
+#include "vpx/vpx_integer.h"
+
+#include "vp9/common/vp9_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define INVALID_MV 0x80008000
+
+typedef struct mv {
+  int16_t row;
+  int16_t col;
+} MV;
+
+typedef union int_mv {
+  uint32_t as_int;
+  MV as_mv;
+} int_mv; /* facilitates faster equality tests and copies */
+
+typedef struct mv32 {
+  int32_t row;
+  int32_t col;
+} MV32;
+
+static INLINE int is_zero_mv(const MV *mv) {
+  return *((const uint32_t *)mv) == 0;
+}
+
+static INLINE int is_equal_mv(const MV *a, const MV *b) {
+  return *((const uint32_t *)a) == *((const uint32_t *)b);
+}
+
+static INLINE void clamp_mv(MV *mv, int min_col, int max_col, int min_row,
+                            int max_row) {
+  mv->col = clamp(mv->col, min_col, max_col);
+  mv->row = clamp(mv->row, min_row, max_row);
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_MV_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_mvref_common.c b/media/libvpx/libvpx/vp9/common/vp9_mvref_common.c
new file mode 100644
index 0000000000..70f77aba1f
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_mvref_common.c
@@ -0,0 +1,199 @@
+
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_mvref_common.h"
+
+// This function searches the neighborhood of a given MB/SB
+// to try and find candidate reference vectors.
+static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+                             MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+                             int_mv *mv_ref_list, int block, int mi_row,
+                             int mi_col, uint8_t *mode_context) {
+  const int *ref_sign_bias = cm->ref_frame_sign_bias;
+  int i, refmv_count = 0;
+  const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type];
+  int different_ref_found = 0;
+  int context_counter = 0;
+  const MV_REF *const prev_frame_mvs =
+      cm->use_prev_frame_mvs
+          ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col
+          : NULL;
+  const TileInfo *const tile = &xd->tile;
+
+  // Blank the reference vector list
+  memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
+
+  // The nearest 2 blocks are treated differently
+  // if the size < 8x8 we get the mv from the bmi substructure,
+  // and we also need to keep a mode count.
+  for (i = 0; i < 2; ++i) {
+    const POSITION *const mv_ref = &mv_ref_search[i];
+    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+      const MODE_INFO *const candidate_mi =
+          xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
+      // Keep counts for entropy encoding.
+      context_counter += mode_2_counter[candidate_mi->mode];
+      different_ref_found = 1;
+
+      if (candidate_mi->ref_frame[0] == ref_frame)
+        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block),
+                        refmv_count, mv_ref_list, Done);
+      else if (candidate_mi->ref_frame[1] == ref_frame)
+        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block),
+                        refmv_count, mv_ref_list, Done);
+    }
+  }
+
+  // Check the rest of the neighbors in much the same way
+  // as before except we don't need to keep track of sub blocks or
+  // mode counts.
+  for (; i < MVREF_NEIGHBOURS; ++i) {
+    const POSITION *const mv_ref = &mv_ref_search[i];
+    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+      const MODE_INFO *const candidate_mi =
+          xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
+      different_ref_found = 1;
+
+      if (candidate_mi->ref_frame[0] == ref_frame)
+        ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done);
+      else if (candidate_mi->ref_frame[1] == ref_frame)
+        ADD_MV_REF_LIST(candidate_mi->mv[1], refmv_count, mv_ref_list, Done);
+    }
+  }
+
+  // Check the last frame's mode and mv info.
+  if (cm->use_prev_frame_mvs) {
+    if (prev_frame_mvs->ref_frame[0] == ref_frame) {
+      ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done);
+    } else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
+      ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done);
+    }
+  }
+
+  // Since we couldn't find 2 mvs from the same reference frame
+  // go back through the neighbors and find motion vectors from
+  // different reference frames.
+  if (different_ref_found) {
+    for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
+      const POSITION *mv_ref = &mv_ref_search[i];
+      if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+        const MODE_INFO *const candidate_mi =
+            xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
+
+        // If the candidate is INTRA we don't want to consider its mv.
+        IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias,
+                                 refmv_count, mv_ref_list, Done);
+      }
+    }
+  }
+
+  // Since we still don't have a candidate we'll try the last frame.
+  if (cm->use_prev_frame_mvs) {
+    if (prev_frame_mvs->ref_frame[0] != ref_frame &&
+        prev_frame_mvs->ref_frame[0] > INTRA_FRAME) {
+      int_mv mv = prev_frame_mvs->mv[0];
+      if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] !=
+          ref_sign_bias[ref_frame]) {
+        mv.as_mv.row *= -1;
+        mv.as_mv.col *= -1;
+      }
+      ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done);
+    }
+
+    if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME &&
+        prev_frame_mvs->ref_frame[1] != ref_frame &&
+        prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) {
+      int_mv mv = prev_frame_mvs->mv[1];
+      if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] !=
+          ref_sign_bias[ref_frame]) {
+        mv.as_mv.row *= -1;
+        mv.as_mv.col *= -1;
+      }
+      ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done);
+    }
+  }
+
+Done:
+
+  mode_context[ref_frame] = counter_to_context[context_counter];
+
+  // Clamp vectors
+  for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
+    clamp_mv_ref(&mv_ref_list[i].as_mv, xd);
+}
+
+void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+                      MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+                      int_mv *mv_ref_list, int mi_row, int mi_col,
+                      uint8_t *mode_context) {
+  find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col,
+                   mode_context);
+}
+
+void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, int_mv *mvlist,
+                           int_mv *nearest_mv, int_mv *near_mv) {
+  int i;
+  // Make sure all the candidates are properly clamped etc
+  for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
+    lower_mv_precision(&mvlist[i].as_mv, allow_hp);
+    clamp_mv2(&mvlist[i].as_mv, xd);
+  }
+  *nearest_mv = mvlist[0];
+  *near_mv = mvlist[1];
+}
+
+void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, int block,
+                                   int ref, int mi_row, int mi_col,
+                                   int_mv *nearest_mv, int_mv *near_mv,
+                                   uint8_t *mode_context) {
+  int_mv mv_list[MAX_MV_REF_CANDIDATES];
+  MODE_INFO *const mi = xd->mi[0];
+  b_mode_info *bmi = mi->bmi;
+  int n;
+
+  assert(MAX_MV_REF_CANDIDATES == 2);
+
+  find_mv_refs_idx(cm, xd, mi, mi->ref_frame[ref], mv_list, block, mi_row,
+                   mi_col, mode_context);
+
+  near_mv->as_int = 0;
+  switch (block) {
+    case 0:
+      nearest_mv->as_int = mv_list[0].as_int;
+      near_mv->as_int = mv_list[1].as_int;
+      break;
+    case 1:
+    case 2:
+      nearest_mv->as_int = bmi[0].as_mv[ref].as_int;
+      for (n = 0; n < MAX_MV_REF_CANDIDATES; ++n)
+        if (nearest_mv->as_int != mv_list[n].as_int) {
+          near_mv->as_int = mv_list[n].as_int;
+          break;
+        }
+      break;
+    case 3: {
+      int_mv candidates[2 + MAX_MV_REF_CANDIDATES];
+      candidates[0] = bmi[1].as_mv[ref];
+      candidates[1] = bmi[0].as_mv[ref];
+      candidates[2] = mv_list[0];
+      candidates[3] = mv_list[1];
+
+      nearest_mv->as_int = bmi[2].as_mv[ref].as_int;
+      for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n)
+        if (nearest_mv->as_int != candidates[n].as_int) {
+          near_mv->as_int = candidates[n].as_int;
+          break;
+        }
+      break;
+    }
+    default: assert(0 && "Invalid block index.");
+  }
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_mvref_common.h b/media/libvpx/libvpx/vp9/common/vp9_mvref_common.h
new file mode 100644
index 0000000000..5db6772dca
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_mvref_common.h
@@ -0,0 +1,323 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VPX_VP9_COMMON_VP9_MVREF_COMMON_H_
+#define VPX_VP9_COMMON_VP9_MVREF_COMMON_H_
+
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3)
+#define RIGHT_BOTTOM_MARGIN \
+  ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3)
+
+#define MVREF_NEIGHBOURS 8
+
+typedef struct position {
+  int row;
+  int col;
+} POSITION;
+
+typedef enum {
+  BOTH_ZERO = 0,
+  ZERO_PLUS_PREDICTED = 1,
+  BOTH_PREDICTED = 2,
+  NEW_PLUS_NON_INTRA = 3,
+  BOTH_NEW = 4,
+  INTRA_PLUS_NON_INTRA = 5,
+  BOTH_INTRA = 6,
+  INVALID_CASE = 9
+} motion_vector_context;
+
+// This is used to figure out a context for the ref blocks. The code flattens
+// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
+// adding 9 for each intra block, 3 for each zero mv and 1 for each new
+// motion vector. This single number is then converted into a context
+// with a single lookup ( counter_to_context ).
+static const int mode_2_counter[MB_MODE_COUNT] = {
+  9,  // DC_PRED
+  9,  // V_PRED
+  9,  // H_PRED
+  9,  // D45_PRED
+  9,  // D135_PRED
+  9,  // D117_PRED
+  9,  // D153_PRED
+  9,  // D207_PRED
+  9,  // D63_PRED
+  9,  // TM_PRED
+  0,  // NEARESTMV
+  0,  // NEARMV
+  3,  // ZEROMV
+  1,  // NEWMV
+};
+
+// There are 3^3 different combinations of 3 counts that can be either 0,1 or
+// 2. However the actual count can never be greater than 2 so the highest
+// counter we need is 18. 9 is an invalid counter that's never used.
+static const int counter_to_context[19] = {
+  BOTH_PREDICTED,        // 0
+  NEW_PLUS_NON_INTRA,    // 1
+  BOTH_NEW,              // 2
+  ZERO_PLUS_PREDICTED,   // 3
+  NEW_PLUS_NON_INTRA,    // 4
+  INVALID_CASE,          // 5
+  BOTH_ZERO,             // 6
+  INVALID_CASE,          // 7
+  INVALID_CASE,          // 8
+  INTRA_PLUS_NON_INTRA,  // 9
+  INTRA_PLUS_NON_INTRA,  // 10
+  INVALID_CASE,          // 11
+  INTRA_PLUS_NON_INTRA,  // 12
+  INVALID_CASE,          // 13
+  INVALID_CASE,          // 14
+  INVALID_CASE,          // 15
+  INVALID_CASE,          // 16
+  INVALID_CASE,          // 17
+  BOTH_INTRA             // 18
+};
+
+static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
+  // 4X4
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, -1 },
+    { -2, 0 },
+    { 0, -2 },
+    { -2, -1 },
+    { -1, -2 },
+    { -2, -2 } },
+  // 4X8
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, -1 },
+    { -2, 0 },
+    { 0, -2 },
+    { -2, -1 },
+    { -1, -2 },
+    { -2, -2 } },
+  // 8X4
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, -1 },
+    { -2, 0 },
+    { 0, -2 },
+    { -2, -1 },
+    { -1, -2 },
+    { -2, -2 } },
+  // 8X8
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, -1 },
+    { -2, 0 },
+    { 0, -2 },
+    { -2, -1 },
+    { -1, -2 },
+    { -2, -2 } },
+  // 8X16
+  { { 0, -1 },
+    { -1, 0 },
+    { 1, -1 },
+    { -1, -1 },
+    { 0, -2 },
+    { -2, 0 },
+    { -2, -1 },
+    { -1, -2 } },
+  // 16X8
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, 1 },
+    { -1, -1 },
+    { -2, 0 },
+    { 0, -2 },
+    { -1, -2 },
+    { -2, -1 } },
+  // 16X16
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, 1 },
+    { 1, -1 },
+    { -1, -1 },
+    { -3, 0 },
+    { 0, -3 },
+    { -3, -3 } },
+  // 16X32
+  { { 0, -1 },
+    { -1, 0 },
+    { 2, -1 },
+    { -1, -1 },
+    { -1, 1 },
+    { 0, -3 },
+    { -3, 0 },
+    { -3, -3 } },
+  // 32X16
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, 2 },
+    { -1, -1 },
+    { 1, -1 },
+    { -3, 0 },
+    { 0, -3 },
+    { -3, -3 } },
+  // 32X32
+  { { -1, 1 },
+    { 1, -1 },
+    { -1, 2 },
+    { 2, -1 },
+    { -1, -1 },
+    { -3, 0 },
+    { 0, -3 },
+    { -3, -3 } },
+  // 32X64
+  { { 0, -1 },
+    { -1, 0 },
+    { 4, -1 },
+    { -1, 2 },
+    { -1, -1 },
+    { 0, -3 },
+    { -3, 0 },
+    { 2, -1 } },
+  // 64X32
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, 4 },
+    { 2, -1 },
+    { -1, -1 },
+    { -3, 0 },
+    { 0, -3 },
+    { -1, 2 } },
+  // 64X64
+  { { -1, 3 },
+    { 3, -1 },
+    { -1, 4 },
+    { 4, -1 },
+    { -1, -1 },
+    { -1, 0 },
+    { 0, -1 },
+    { -1, 6 } }
+};
+
+static const int idx_n_column_to_subblock[4][2] = {
+  { 1, 2 }, { 1, 3 }, { 3, 2 }, { 3, 3 }
+};
+
+// clamp_mv_ref
+#define MV_BORDER (16 << 3)  // Allow 16 pels in 1/8th pel units
+
+static INLINE void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
+  clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
+           xd->mb_to_right_edge + MV_BORDER, xd->mb_to_top_edge - MV_BORDER,
+           xd->mb_to_bottom_edge + MV_BORDER);
+}
+
+// This function returns either the appropriate sub block or block's mv
+// on whether the block_size < 8x8 and we have check_sub_blocks set.
+static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
+                                      int search_col, int block_idx) {
+  return block_idx >= 0 && candidate->sb_type < BLOCK_8X8
+             ? candidate
+                   ->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
+                   .as_mv[which_mv]
+             : candidate->mv[which_mv];
+}
+
+// Performs mv sign inversion if indicated by the reference frame combination.
+static INLINE int_mv scale_mv(const MODE_INFO *mi, int ref,
+                              const MV_REFERENCE_FRAME this_ref_frame,
+                              const int *ref_sign_bias) {
+  int_mv mv = mi->mv[ref];
+  if (ref_sign_bias[mi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
+    mv.as_mv.row *= -1;
+    mv.as_mv.col *= -1;
+  }
+  return mv;
+}
+
+// This macro is used to add a motion vector mv_ref list if it isn't
+// already in the list.  If it's the second motion vector it will also
+// skip all additional processing and jump to Done!
+#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done) \
+  do {                                                      \
+    if (refmv_count) {                                      \
+      if ((mv).as_int != (mv_ref_list)[0].as_int) {         \
+        (mv_ref_list)[(refmv_count)] = (mv);                \
+        goto Done;                                          \
+      }                                                     \
+    } else {                                                \
+      (mv_ref_list)[(refmv_count)++] = (mv);                \
+    }                                                       \
+  } while (0)
+
+// If either reference frame is different, not INTRA, and they
+// are different from each other scale and add the mv to our list.
+#define IF_DIFF_REF_FRAME_ADD_MV(mbmi, ref_frame, ref_sign_bias, refmv_count, \
+                                 mv_ref_list, Done)                           \
+  do {                                                                        \
+    if (is_inter_block(mbmi)) {                                               \
+      if ((mbmi)->ref_frame[0] != (ref_frame))                                \
+        ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias),        \
+                        refmv_count, mv_ref_list, Done);                      \
+      if (has_second_ref(mbmi) && (mbmi)->ref_frame[1] != (ref_frame) &&      \
+          (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int)                       \
+        ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias),        \
+                        refmv_count, mv_ref_list, Done);                      \
+    }                                                                         \
+  } while (0)
+
+// Checks that the given mi_row, mi_col and search point
+// are inside the borders of the tile.
+static INLINE int is_inside(const TileInfo *const tile, int mi_col, int mi_row,
+                            int mi_rows, const POSITION *mi_pos) {
+  return !(mi_row + mi_pos->row < 0 ||
+           mi_col + mi_pos->col < tile->mi_col_start ||
+           mi_row + mi_pos->row >= mi_rows ||
+           mi_col + mi_pos->col >= tile->mi_col_end);
+}
+
+// TODO(jingning): this mv clamping function should be block size dependent.
+static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
+  clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
+           xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
+           xd->mb_to_top_edge - LEFT_TOP_MARGIN,
+           xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
+}
+
+static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
+  const int use_hp = allow_hp && use_mv_hp(mv);
+  if (!use_hp) {
+    if (mv->row & 1) mv->row += (mv->row > 0 ? -1 : 1);
+    if (mv->col & 1) mv->col += (mv->col > 0 ? -1 : 1);
+  }
+}
+
+typedef void (*find_mv_refs_sync)(void *const data, int mi_row);
+void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+                      MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+                      int_mv *mv_ref_list, int mi_row, int mi_col,
+                      uint8_t *mode_context);
+
+// check a list of motion vectors by sad score using a number rows of pixels
+// above and a number cols of pixels in the left to select the one with best
+// score to use as ref motion vector
+void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, int_mv *mvlist,
+                           int_mv *nearest_mv, int_mv *near_mv);
+
+void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, int block,
+                                   int ref, int mi_row, int mi_col,
+                                   int_mv *nearest_mv, int_mv *near_mv,
+                                   uint8_t *mode_context);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_MVREF_COMMON_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_onyxc_int.h b/media/libvpx/libvpx/vp9/common/vp9_onyxc_int.h
new file mode 100644
index 0000000000..1cfc12f6fa
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_onyxc_int.h
@@ -0,0 +1,468 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_ONYXC_INT_H_
+#define VPX_VP9_COMMON_VP9_ONYXC_INT_H_
+
+#include "./vpx_config.h"
+#include "vpx/internal/vpx_codec_internal.h"
+#include "vpx_util/vpx_thread.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_alloccommon.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vp9/common/vp9_entropymv.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_frame_buffers.h"
+#include "vp9/common/vp9_quant_common.h"
+#include "vp9/common/vp9_tile_common.h"
+
+#if CONFIG_VP9_POSTPROC
+#include "vp9/common/vp9_postproc.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define REFS_PER_FRAME 3
+
+#define REF_FRAMES_LOG2 3
+#define REF_FRAMES (1 << REF_FRAMES_LOG2)
+
+// 1 scratch frame for the new frame, REFS_PER_FRAME for scaled references on
+// the encoder.
+#define FRAME_BUFFERS (REF_FRAMES + 1 + REFS_PER_FRAME)
+
+#define FRAME_CONTEXTS_LOG2 2
+#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2)
+
+#define NUM_PING_PONG_BUFFERS 2
+
+extern const struct {
+  PARTITION_CONTEXT above;
+  PARTITION_CONTEXT left;
+} partition_context_lookup[BLOCK_SIZES];
+
+typedef enum {
+  SINGLE_REFERENCE = 0,
+  COMPOUND_REFERENCE = 1,
+  REFERENCE_MODE_SELECT = 2,
+  REFERENCE_MODES = 3,
+} REFERENCE_MODE;
+
+typedef struct {
+  int_mv mv[2];
+  MV_REFERENCE_FRAME ref_frame[2];
+} MV_REF;
+
+typedef struct {
+  int ref_count;
+  MV_REF *mvs;
+  int mi_rows;
+  int mi_cols;
+  uint8_t released;
+
+  // Note that frame_index/frame_coding_index are only set by set_frame_index()
+  // on the encoder side.
+
+  // TODO(angiebird): Set frame_index/frame_coding_index on the decoder side
+  // properly.
+  int frame_index;         // Display order in the video, it's equivalent to the
+                           // show_idx defined in EncodeFrameInfo.
+  int frame_coding_index;  // The coding order (starting from zero) of this
+                           // frame.
+  vpx_codec_frame_buffer_t raw_frame_buffer;
+  YV12_BUFFER_CONFIG buf;
+} RefCntBuffer;
+
+typedef struct BufferPool {
+  // Private data associated with the frame buffer callbacks.
+  void *cb_priv;
+
+  vpx_get_frame_buffer_cb_fn_t get_fb_cb;
+  vpx_release_frame_buffer_cb_fn_t release_fb_cb;
+
+  RefCntBuffer frame_bufs[FRAME_BUFFERS];
+
+  // Frame buffers allocated internally by the codec.
+  InternalFrameBufferList int_frame_buffers;
+} BufferPool;
+
+typedef struct VP9Common {
+  struct vpx_internal_error_info error;
+  vpx_color_space_t color_space;
+  vpx_color_range_t color_range;
+  int width;
+  int height;
+  int render_width;
+  int render_height;
+  int last_width;
+  int last_height;
+
+  // TODO(jkoleszar): this implies chroma ss right now, but could vary per
+  // plane. Revisit as part of the future change to YV12_BUFFER_CONFIG to
+  // support additional planes.
+  int subsampling_x;
+  int subsampling_y;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  int use_highbitdepth;  // Marks if we need to use 16bit frame buffers.
+#endif
+
+  YV12_BUFFER_CONFIG *frame_to_show;
+  RefCntBuffer *prev_frame;
+
+  // TODO(hkuang): Combine this with cur_buf in macroblockd.
+  RefCntBuffer *cur_frame;
+
+  int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */
+
+  // Prepare ref_frame_map for the next frame.
+  // Only used in frame parallel decode.
+  int next_ref_frame_map[REF_FRAMES];
+
+  // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and
+  // roll new_fb_idx into it.
+
+  // Each frame can reference REFS_PER_FRAME buffers
+  RefBuffer frame_refs[REFS_PER_FRAME];
+
+  int new_fb_idx;
+
+  int cur_show_frame_fb_idx;
+
+#if CONFIG_VP9_POSTPROC
+  YV12_BUFFER_CONFIG post_proc_buffer;
+  YV12_BUFFER_CONFIG post_proc_buffer_int;
+#endif
+
+  FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/
+  FRAME_TYPE frame_type;
+
+  int show_frame;
+  int last_show_frame;
+  int show_existing_frame;
+
+  // Flag signaling that the frame is encoded using only INTRA modes.
+  uint8_t intra_only;
+  uint8_t last_intra_only;
+
+  int allow_high_precision_mv;
+
+  // Flag signaling that the frame context should be reset to default values.
+  // 0 or 1 implies don't reset, 2 reset just the context specified in the
+  // frame header, 3 reset all contexts.
+  int reset_frame_context;
+
+  // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in
+  // MODE_INFO (8-pixel) units.
+  int MBs;
+  int mb_rows, mi_rows;
+  int mb_cols, mi_cols;
+  int mi_stride;
+
+  /* profile settings */
+  TX_MODE tx_mode;
+
+  int base_qindex;
+  int y_dc_delta_q;
+  int uv_dc_delta_q;
+  int uv_ac_delta_q;
+  int16_t y_dequant[MAX_SEGMENTS][2];
+  int16_t uv_dequant[MAX_SEGMENTS][2];
+
+  /* We allocate a MODE_INFO struct for each macroblock, together with
+     an extra row on top and column on the left to simplify prediction. */
+  int mi_alloc_size;
+  MODE_INFO *mip; /* Base of allocated array */
+  MODE_INFO *mi;  /* Corresponds to upper left visible macroblock */
+
+  // TODO(agrange): Move prev_mi into encoder structure.
+  // prev_mip and prev_mi will only be allocated in VP9 encoder.
+  MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
+  MODE_INFO *prev_mi;  /* 'mi' from last frame (points into prev_mip) */
+
+  // Separate mi functions between encoder and decoder.
+  int (*alloc_mi)(struct VP9Common *cm, int mi_size);
+  void (*free_mi)(struct VP9Common *cm);
+  void (*setup_mi)(struct VP9Common *cm);
+
+  // Grid of pointers to 8x8 MODE_INFO structs.  Any 8x8 not in the visible
+  // area will be NULL.
+  MODE_INFO **mi_grid_base;
+  MODE_INFO **mi_grid_visible;
+  MODE_INFO **prev_mi_grid_base;
+  MODE_INFO **prev_mi_grid_visible;
+
+  // Whether to use previous frame's motion vectors for prediction.
+  int use_prev_frame_mvs;
+
+  // Persistent mb segment id map used in prediction.
+  int seg_map_idx;
+  int prev_seg_map_idx;
+
+  uint8_t *seg_map_array[NUM_PING_PONG_BUFFERS];
+  uint8_t *last_frame_seg_map;
+  uint8_t *current_frame_seg_map;
+  int seg_map_alloc_size;
+
+  INTERP_FILTER interp_filter;
+
+  loop_filter_info_n lf_info;
+
+  int refresh_frame_context; /* Two state 0 = NO, 1 = YES */
+
+  int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
+
+  struct loopfilter lf;
+  struct segmentation seg;
+
+  // Context probabilities for reference frame prediction
+  MV_REFERENCE_FRAME comp_fixed_ref;
+  MV_REFERENCE_FRAME comp_var_ref[2];
+  REFERENCE_MODE reference_mode;
+
+  FRAME_CONTEXT *fc;              /* this frame entropy */
+  FRAME_CONTEXT *frame_contexts;  // FRAME_CONTEXTS
+  unsigned int frame_context_idx; /* Context to use/update */
+  FRAME_COUNTS counts;
+
+  // TODO(angiebird): current_video_frame/current_frame_coding_index into a
+  // structure
+  unsigned int current_video_frame;
+  // Each show or no show frame is assigned with a coding index based on its
+  // coding order (starting from zero).
+
+  // Current frame's coding index.
+  int current_frame_coding_index;
+  BITSTREAM_PROFILE profile;
+
+  // VPX_BITS_8 in profile 0 or 1, VPX_BITS_10 or VPX_BITS_12 in profile 2 or 3.
+  vpx_bit_depth_t bit_depth;
+  vpx_bit_depth_t dequant_bit_depth;  // bit_depth of current dequantizer
+
+#if CONFIG_VP9_POSTPROC
+  struct postproc_state postproc_state;
+#endif
+
+  int error_resilient_mode;
+  int frame_parallel_decoding_mode;
+
+  int log2_tile_cols, log2_tile_rows;
+  int byte_alignment;
+  int skip_loop_filter;
+
+  // External BufferPool passed from outside.
+  BufferPool *buffer_pool;
+
+  PARTITION_CONTEXT *above_seg_context;
+  ENTROPY_CONTEXT *above_context;
+  int above_context_alloc_cols;
+
+  int lf_row;
+} VP9_COMMON;
+
+static INLINE void init_frame_indexes(VP9_COMMON *cm) {
+  cm->current_video_frame = 0;
+  cm->current_frame_coding_index = 0;
+}
+
+static INLINE void update_frame_indexes(VP9_COMMON *cm, int show_frame) {
+  if (show_frame) {
+    // Don't increment frame counters if this was an altref buffer
+    // update not a real frame
+    ++cm->current_video_frame;
+  }
+  ++cm->current_frame_coding_index;
+}
+
+typedef struct {
+  int frame_width;
+  int frame_height;
+  int render_frame_width;
+  int render_frame_height;
+  int mi_rows;
+  int mi_cols;
+  int mb_rows;
+  int mb_cols;
+  int num_mbs;
+  vpx_bit_depth_t bit_depth;
+} FRAME_INFO;
+
+static INLINE void init_frame_info(FRAME_INFO *frame_info,
+                                   const VP9_COMMON *cm) {
+  frame_info->frame_width = cm->width;
+  frame_info->frame_height = cm->height;
+  frame_info->render_frame_width = cm->render_width;
+  frame_info->render_frame_height = cm->render_height;
+  frame_info->mi_cols = cm->mi_cols;
+  frame_info->mi_rows = cm->mi_rows;
+  frame_info->mb_cols = cm->mb_cols;
+  frame_info->mb_rows = cm->mb_rows;
+  frame_info->num_mbs = cm->MBs;
+  frame_info->bit_depth = cm->bit_depth;
+  // TODO(angiebird): Figure out how to get subsampling_x/y here
+}
+
+static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) {
+  if (index < 0 || index >= FRAME_BUFFERS) return NULL;
+  if (cm->error.error_code != VPX_CODEC_OK) return NULL;
+  return &cm->buffer_pool->frame_bufs[index].buf;
+}
+
+static INLINE YV12_BUFFER_CONFIG *get_ref_frame(VP9_COMMON *cm, int index) {
+  if (index < 0 || index >= REF_FRAMES) return NULL;
+  if (cm->ref_frame_map[index] < 0) return NULL;
+  assert(cm->ref_frame_map[index] < FRAME_BUFFERS);
+  return &cm->buffer_pool->frame_bufs[cm->ref_frame_map[index]].buf;
+}
+
+static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
+  return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf;
+}
+
+static INLINE int get_free_fb(VP9_COMMON *cm) {
+  RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+  int i;
+
+  for (i = 0; i < FRAME_BUFFERS; ++i)
+    if (frame_bufs[i].ref_count == 0) break;
+
+  if (i != FRAME_BUFFERS) {
+    frame_bufs[i].ref_count = 1;
+  } else {
+    // Reset i to be INVALID_IDX to indicate no free buffer found.
+    i = INVALID_IDX;
+  }
+
+  return i;
+}
+
+static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
+  const int ref_index = *idx;
+
+  if (ref_index >= 0 && bufs[ref_index].ref_count > 0)
+    bufs[ref_index].ref_count--;
+
+  *idx = new_idx;
+
+  bufs[new_idx].ref_count++;
+}
+
+static INLINE int mi_cols_aligned_to_sb(int n_mis) {
+  return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2);
+}
+
+static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) {
+  return cm->frame_type == KEY_FRAME || cm->intra_only;
+}
+
+static INLINE void set_partition_probs(const VP9_COMMON *const cm,
+                                       MACROBLOCKD *const xd) {
+  xd->partition_probs =
+      frame_is_intra_only(cm)
+          ? &vp9_kf_partition_probs[0]
+          : (const vpx_prob(*)[PARTITION_TYPES - 1]) cm->fc->partition_prob;
+}
+
+static INLINE void vp9_init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd,
+                                        tran_low_t *dqcoeff) {
+  int i;
+
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    xd->plane[i].dqcoeff = dqcoeff;
+    xd->above_context[i] =
+        cm->above_context +
+        i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols);
+
+    if (get_plane_type(i) == PLANE_TYPE_Y) {
+      memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
+    } else {
+      memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant));
+    }
+    xd->fc = cm->fc;
+  }
+
+  xd->above_seg_context = cm->above_seg_context;
+  xd->mi_stride = cm->mi_stride;
+  xd->error_info = &cm->error;
+
+  set_partition_probs(cm, xd);
+}
+
+static INLINE const vpx_prob *get_partition_probs(const MACROBLOCKD *xd,
+                                                  int ctx) {
+  return xd->partition_probs[ctx];
+}
+
+static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
+  const int above_idx = mi_col * 2;
+  const int left_idx = (mi_row * 2) & 15;
+  int i;
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    struct macroblockd_plane *const pd = &xd->plane[i];
+    pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x];
+    pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y];
+  }
+}
+
+static INLINE int calc_mi_size(int len) {
+  // len is in mi units.
+  return len + MI_BLOCK_SIZE;
+}
+
+static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
+                                  int mi_row, int bh, int mi_col, int bw,
+                                  int mi_rows, int mi_cols) {
+  xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
+  xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8;
+  xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
+  xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8;
+
+  // Are edges available for intra prediction?
+  xd->above_mi = (mi_row != 0) ? xd->mi[-xd->mi_stride] : NULL;
+  xd->left_mi = (mi_col > tile->mi_col_start) ? xd->mi[-1] : NULL;
+}
+
+static INLINE void update_partition_context(MACROBLOCKD *xd, int mi_row,
+                                            int mi_col, BLOCK_SIZE subsize,
+                                            BLOCK_SIZE bsize) {
+  PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col;
+  PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK);
+
+  // num_4x4_blocks_wide_lookup[bsize] / 2
+  const int bs = num_8x8_blocks_wide_lookup[bsize];
+
+  // update the partition context at the end notes. set partition bits
+  // of block sizes larger than the current one to be one, and partition
+  // bits of smaller block sizes to be zero.
+  memset(above_ctx, partition_context_lookup[subsize].above, bs);
+  memset(left_ctx, partition_context_lookup[subsize].left, bs);
+}
+
+static INLINE int partition_plane_context(const MACROBLOCKD *xd, int mi_row,
+                                          int mi_col, BLOCK_SIZE bsize) {
+  const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col;
+  const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK);
+  const int bsl = mi_width_log2_lookup[bsize];
+  int above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1;
+
+  assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
+  assert(bsl >= 0);
+
+  return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_ONYXC_INT_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_postproc.c b/media/libvpx/libvpx/vp9/common/vp9_postproc.c
new file mode 100644
index 0000000000..96519f0051
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_postproc.c
@@ -0,0 +1,435 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_scale_rtcd.h"
+#include "./vp9_rtcd.h"
+
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/postproc.h"
+#include "vpx_ports/mem.h"
+#include "vpx_ports/system_state.h"
+#include "vpx_scale/vpx_scale.h"
+#include "vpx_scale/yv12config.h"
+
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_postproc.h"
+
+#if CONFIG_VP9_POSTPROC
+
+static const uint8_t q_diff_thresh = 20;
+static const uint8_t last_q_thresh = 170;
+extern const int16_t vpx_rv[];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static const int16_t kernel5[] = { 1, 1, 4, 1, 1 };
+
+void vp9_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr,
+                                            uint16_t *dst_ptr,
+                                            int src_pixels_per_line,
+                                            int dst_pixels_per_line, int rows,
+                                            int cols, int flimit) {
+  uint16_t const *p_src;
+  uint16_t *p_dst;
+  int row, col, i, v, kernel;
+  int pitch = src_pixels_per_line;
+  uint16_t d[8];
+
+  for (row = 0; row < rows; row++) {
+    // post_proc_down for one row.
+    p_src = src_ptr;
+    p_dst = dst_ptr;
+
+    for (col = 0; col < cols; col++) {
+      kernel = 4;
+      v = p_src[col];
+
+      for (i = -2; i <= 2; i++) {
+        if (abs(v - p_src[col + i * pitch]) > flimit) goto down_skip_convolve;
+
+        kernel += kernel5[2 + i] * p_src[col + i * pitch];
+      }
+
+      v = (kernel >> 3);
+
+    down_skip_convolve:
+      p_dst[col] = v;
+    }
+
+    /* now post_proc_across */
+    p_src = dst_ptr;
+    p_dst = dst_ptr;
+
+    for (i = 0; i < 8; i++) d[i] = p_src[i];
+
+    for (col = 0; col < cols; col++) {
+      kernel = 4;
+      v = p_src[col];
+
+      d[col & 7] = v;
+
+      for (i = -2; i <= 2; i++) {
+        if (abs(v - p_src[col + i]) > flimit) goto across_skip_convolve;
+
+        kernel += kernel5[2 + i] * p_src[col + i];
+      }
+
+      d[col & 7] = (kernel >> 3);
+
+    across_skip_convolve:
+      if (col >= 2) p_dst[col - 2] = d[(col - 2) & 7];
+    }
+
+    /* handle the last two pixels */
+    p_dst[col - 2] = d[(col - 2) & 7];
+    p_dst[col - 1] = d[(col - 1) & 7];
+
+    /* next row */
+    src_ptr += pitch;
+    dst_ptr += dst_pixels_per_line;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static int q2mbl(int x) {
+  if (x < 20) x = 20;
+
+  x = 50 + (x - 50) * 10 / 8;
+  return x * x / 3;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_mbpost_proc_across_ip_c(uint16_t *src, int pitch, int rows,
+                                        int cols, int flimit) {
+  int r, c, i;
+
+  uint16_t *s = src;
+  uint16_t d[16];
+
+  for (r = 0; r < rows; r++) {
+    int sumsq = 0;
+    int sum = 0;
+
+    for (i = -8; i <= 6; i++) {
+      sumsq += s[i] * s[i];
+      sum += s[i];
+      d[i + 8] = 0;
+    }
+
+    for (c = 0; c < cols + 8; c++) {
+      int x = s[c + 7] - s[c - 8];
+      int y = s[c + 7] + s[c - 8];
+
+      sum += x;
+      sumsq += x * y;
+
+      d[c & 15] = s[c];
+
+      if (sumsq * 15 - sum * sum < flimit) {
+        d[c & 15] = (8 + sum + s[c]) >> 4;
+      }
+
+      s[c - 8] = d[(c - 8) & 15];
+    }
+
+    s += pitch;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_mbpost_proc_down_c(uint16_t *dst, int pitch, int rows, int cols,
+                                   int flimit) {
+  int r, c, i;
+  const int16_t *rv3 = &vpx_rv[63 & rand()];  // NOLINT
+
+  for (c = 0; c < cols; c++) {
+    uint16_t *s = &dst[c];
+    int sumsq = 0;
+    int sum = 0;
+    uint16_t d[16];
+    const int16_t *rv2 = rv3 + ((c * 17) & 127);
+
+    for (i = -8; i <= 6; i++) {
+      sumsq += s[i * pitch] * s[i * pitch];
+      sum += s[i * pitch];
+    }
+
+    for (r = 0; r < rows + 8; r++) {
+      sumsq += s[7 * pitch] * s[7 * pitch] - s[-8 * pitch] * s[-8 * pitch];
+      sum += s[7 * pitch] - s[-8 * pitch];
+      d[r & 15] = s[0];
+
+      if (sumsq * 15 - sum * sum < flimit) {
+        d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4;
+      }
+
+      s[-8 * pitch] = d[(r - 8) & 15];
+      s += pitch;
+    }
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static void deblock_and_de_macro_block(VP9_COMMON *cm,
+                                       YV12_BUFFER_CONFIG *source,
+                                       YV12_BUFFER_CONFIG *post, int q,
+                                       int low_var_thresh, int flag,
+                                       uint8_t *limits) {
+  (void)low_var_thresh;
+  (void)flag;
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (source->flags & YV12_FLAG_HIGHBITDEPTH) {
+    double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
+    int ppl = (int)(level + .5);
+    vp9_highbd_post_proc_down_and_across(
+        CONVERT_TO_SHORTPTR(source->y_buffer),
+        CONVERT_TO_SHORTPTR(post->y_buffer), source->y_stride, post->y_stride,
+        source->y_height, source->y_width, ppl);
+
+    vp9_highbd_mbpost_proc_across_ip(CONVERT_TO_SHORTPTR(post->y_buffer),
+                                     post->y_stride, post->y_height,
+                                     post->y_width, q2mbl(q));
+
+    vp9_highbd_mbpost_proc_down(CONVERT_TO_SHORTPTR(post->y_buffer),
+                                post->y_stride, post->y_height, post->y_width,
+                                q2mbl(q));
+
+    vp9_highbd_post_proc_down_and_across(
+        CONVERT_TO_SHORTPTR(source->u_buffer),
+        CONVERT_TO_SHORTPTR(post->u_buffer), source->uv_stride, post->uv_stride,
+        source->uv_height, source->uv_width, ppl);
+    vp9_highbd_post_proc_down_and_across(
+        CONVERT_TO_SHORTPTR(source->v_buffer),
+        CONVERT_TO_SHORTPTR(post->v_buffer), source->uv_stride, post->uv_stride,
+        source->uv_height, source->uv_width, ppl);
+  } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    vp9_deblock(cm, source, post, q, limits);
+    vpx_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height,
+                              post->y_width, q2mbl(q));
+    vpx_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height,
+                         post->y_width, q2mbl(q));
+#if CONFIG_VP9_HIGHBITDEPTH
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+}
+
+void vp9_deblock(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src,
+                 YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits) {
+  const int ppl =
+      (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q + 0.0065 + 0.5);
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
+    int i;
+    const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
+                                     src->v_buffer };
+    const int src_strides[3] = { src->y_stride, src->uv_stride,
+                                 src->uv_stride };
+    const int src_widths[3] = { src->y_width, src->uv_width, src->uv_width };
+    const int src_heights[3] = { src->y_height, src->uv_height,
+                                 src->uv_height };
+
+    uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
+    const int dst_strides[3] = { dst->y_stride, dst->uv_stride,
+                                 dst->uv_stride };
+    for (i = 0; i < MAX_MB_PLANE; ++i) {
+      vp9_highbd_post_proc_down_and_across(
+          CONVERT_TO_SHORTPTR(srcs[i]), CONVERT_TO_SHORTPTR(dsts[i]),
+          src_strides[i], dst_strides[i], src_heights[i], src_widths[i], ppl);
+    }
+  } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    int mbr;
+    const int mb_rows = cm->mb_rows;
+    const int mb_cols = cm->mb_cols;
+    memset(limits, (unsigned char)ppl, 16 * mb_cols);
+
+    for (mbr = 0; mbr < mb_rows; mbr++) {
+      vpx_post_proc_down_and_across_mb_row(
+          src->y_buffer + 16 * mbr * src->y_stride,
+          dst->y_buffer + 16 * mbr * dst->y_stride, src->y_stride,
+          dst->y_stride, src->y_width, limits, 16);
+      vpx_post_proc_down_and_across_mb_row(
+          src->u_buffer + 8 * mbr * src->uv_stride,
+          dst->u_buffer + 8 * mbr * dst->uv_stride, src->uv_stride,
+          dst->uv_stride, src->uv_width, limits, 8);
+      vpx_post_proc_down_and_across_mb_row(
+          src->v_buffer + 8 * mbr * src->uv_stride,
+          dst->v_buffer + 8 * mbr * dst->uv_stride, src->uv_stride,
+          dst->uv_stride, src->uv_width, limits, 8);
+    }
+#if CONFIG_VP9_HIGHBITDEPTH
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+}
+
+void vp9_denoise(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src,
+                 YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits) {
+  vp9_deblock(cm, src, dst, q, limits);
+}
+
+static void swap_mi_and_prev_mi(VP9_COMMON *cm) {
+  // Current mip will be the prev_mip for the next frame.
+  MODE_INFO *temp = cm->postproc_state.prev_mip;
+  cm->postproc_state.prev_mip = cm->mip;
+  cm->mip = temp;
+
+  // Update the upper left visible macroblock ptrs.
+  cm->mi = cm->mip + cm->mi_stride + 1;
+  cm->postproc_state.prev_mi = cm->postproc_state.prev_mip + cm->mi_stride + 1;
+}
+
+int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest,
+                        vp9_ppflags_t *ppflags, int unscaled_width) {
+  const int q = VPXMIN(105, cm->lf.filter_level * 2);
+  const int flags = ppflags->post_proc_flag;
+  YV12_BUFFER_CONFIG *const ppbuf = &cm->post_proc_buffer;
+  struct postproc_state *const ppstate = &cm->postproc_state;
+
+  if (!cm->frame_to_show) return -1;
+
+  if (!flags) {
+    *dest = *cm->frame_to_show;
+    return 0;
+  }
+
+  vpx_clear_system_state();
+
+  // Alloc memory for prev_mip in the first frame.
+  if (cm->current_video_frame == 1) {
+    ppstate->last_base_qindex = cm->base_qindex;
+    ppstate->last_frame_valid = 1;
+  }
+
+  if ((flags & VP9D_MFQE) && ppstate->prev_mip == NULL) {
+    ppstate->prev_mip = vpx_calloc(cm->mi_alloc_size, sizeof(*cm->mip));
+    if (!ppstate->prev_mip) {
+      return 1;
+    }
+    ppstate->prev_mi = ppstate->prev_mip + cm->mi_stride + 1;
+  }
+
+  // Allocate post_proc_buffer_int if needed.
+  if ((flags & VP9D_MFQE) && !cm->post_proc_buffer_int.buffer_alloc) {
+    if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) {
+      const int width = ALIGN_POWER_OF_TWO(cm->width, 4);
+      const int height = ALIGN_POWER_OF_TWO(cm->height, 4);
+
+      if (vpx_alloc_frame_buffer(&cm->post_proc_buffer_int, width, height,
+                                 cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                                 cm->use_highbitdepth,
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+                                 VP9_ENC_BORDER_IN_PIXELS,
+                                 cm->byte_alignment) < 0) {
+        vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                           "Failed to allocate MFQE framebuffer");
+      }
+
+      // Ensure that postproc is set to all 0s so that post proc
+      // doesn't pull random data in from edge.
+      memset(cm->post_proc_buffer_int.buffer_alloc, 128,
+             cm->post_proc_buffer.frame_size);
+    }
+  }
+
+  if (vpx_realloc_frame_buffer(&cm->post_proc_buffer, cm->width, cm->height,
+                               cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                               cm->use_highbitdepth,
+#endif
+                               VP9_DEC_BORDER_IN_PIXELS, cm->byte_alignment,
+                               NULL, NULL, NULL) < 0)
+    vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                       "Failed to allocate post-processing buffer");
+
+  if (flags & (VP9D_DEMACROBLOCK | VP9D_DEBLOCK)) {
+    if (!cm->postproc_state.limits) {
+      cm->postproc_state.limits =
+          vpx_calloc(unscaled_width, sizeof(*cm->postproc_state.limits));
+      if (!cm->postproc_state.limits) return 1;
+    }
+  }
+
+  if (flags & VP9D_ADDNOISE) {
+    if (!cm->postproc_state.generated_noise) {
+      cm->postproc_state.generated_noise = vpx_calloc(
+          cm->width + 256, sizeof(*cm->postproc_state.generated_noise));
+      if (!cm->postproc_state.generated_noise) return 1;
+    }
+  }
+
+  if ((flags & VP9D_MFQE) && cm->current_video_frame >= 2 &&
+      ppstate->last_frame_valid && cm->bit_depth == 8 &&
+      ppstate->last_base_qindex <= last_q_thresh &&
+      cm->base_qindex - ppstate->last_base_qindex >= q_diff_thresh) {
+    vp9_mfqe(cm);
+    // TODO(jackychen): Consider whether enable deblocking by default
+    // if mfqe is enabled. Need to take both the quality and the speed
+    // into consideration.
+    if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) {
+      vpx_yv12_copy_frame(ppbuf, &cm->post_proc_buffer_int);
+    }
+    if ((flags & VP9D_DEMACROBLOCK) && cm->post_proc_buffer_int.buffer_alloc) {
+      deblock_and_de_macro_block(cm, &cm->post_proc_buffer_int, ppbuf,
+                                 q + (ppflags->deblocking_level - 5) * 10, 1, 0,
+                                 cm->postproc_state.limits);
+    } else if (flags & VP9D_DEBLOCK) {
+      vp9_deblock(cm, &cm->post_proc_buffer_int, ppbuf, q,
+                  cm->postproc_state.limits);
+    } else {
+      vpx_yv12_copy_frame(&cm->post_proc_buffer_int, ppbuf);
+    }
+  } else if (flags & VP9D_DEMACROBLOCK) {
+    deblock_and_de_macro_block(cm, cm->frame_to_show, ppbuf,
+                               q + (ppflags->deblocking_level - 5) * 10, 1, 0,
+                               cm->postproc_state.limits);
+  } else if (flags & VP9D_DEBLOCK) {
+    vp9_deblock(cm, cm->frame_to_show, ppbuf, q, cm->postproc_state.limits);
+  } else {
+    vpx_yv12_copy_frame(cm->frame_to_show, ppbuf);
+  }
+
+  ppstate->last_base_qindex = cm->base_qindex;
+  ppstate->last_frame_valid = 1;
+  if (flags & VP9D_ADDNOISE) {
+    const int noise_level = ppflags->noise_level;
+    if (ppstate->last_q != q || ppstate->last_noise != noise_level) {
+      double sigma;
+      vpx_clear_system_state();
+      sigma = noise_level + .5 + .6 * q / 63.0;
+      ppstate->clamp =
+          vpx_setup_noise(sigma, ppstate->generated_noise, cm->width + 256);
+      ppstate->last_q = q;
+      ppstate->last_noise = noise_level;
+    }
+    vpx_plane_add_noise(ppbuf->y_buffer, ppstate->generated_noise,
+                        ppstate->clamp, ppstate->clamp, ppbuf->y_width,
+                        ppbuf->y_height, ppbuf->y_stride);
+  }
+
+  *dest = *ppbuf;
+
+  /* handle problem with extending borders */
+  dest->y_width = cm->width;
+  dest->y_height = cm->height;
+  dest->uv_width = dest->y_width >> cm->subsampling_x;
+  dest->uv_height = dest->y_height >> cm->subsampling_y;
+
+  if (flags & VP9D_MFQE) swap_mi_and_prev_mi(cm);
+  return 0;
+}
+#endif  // CONFIG_VP9_POSTPROC
diff --git a/media/libvpx/libvpx/vp9/common/vp9_postproc.h b/media/libvpx/libvpx/vp9/common/vp9_postproc.h
new file mode 100644
index 0000000000..bbe3aed835
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_postproc.h
@@ -0,0 +1,53 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_POSTPROC_H_
+#define VPX_VP9_COMMON_VP9_POSTPROC_H_
+
+#include "vpx_ports/mem.h"
+#include "vpx_scale/yv12config.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_mfqe.h"
+#include "vp9/common/vp9_ppflags.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct postproc_state {
+  int last_q;
+  int last_noise;
+  int last_base_qindex;
+  int last_frame_valid;
+  MODE_INFO *prev_mip;
+  MODE_INFO *prev_mi;
+  int clamp;
+  uint8_t *limits;
+  int8_t *generated_noise;
+};
+
+struct VP9Common;
+
+#define MFQE_PRECISION 4
+
+int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest,
+                        vp9_ppflags_t *ppflags, int unscaled_width);
+
+void vp9_denoise(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src,
+                 YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits);
+
+void vp9_deblock(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src,
+                 YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_POSTPROC_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_ppflags.h b/media/libvpx/libvpx/vp9/common/vp9_ppflags.h
new file mode 100644
index 0000000000..a0e3017626
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_ppflags.h
@@ -0,0 +1,36 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_PPFLAGS_H_
+#define VPX_VP9_COMMON_VP9_PPFLAGS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+  VP9D_NOFILTERING = 0,
+  VP9D_DEBLOCK = 1 << 0,
+  VP9D_DEMACROBLOCK = 1 << 1,
+  VP9D_ADDNOISE = 1 << 2,
+  VP9D_MFQE = 1 << 3
+};
+
+typedef struct {
+  int post_proc_flag;
+  int deblocking_level;
+  int noise_level;
+} vp9_ppflags_t;
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_PPFLAGS_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_pred_common.c b/media/libvpx/libvpx/vp9/common/vp9_pred_common.c
new file mode 100644
index 0000000000..375cb4d76c
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_pred_common.c
@@ -0,0 +1,316 @@
+
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_pred_common.h"
+#include "vp9/common/vp9_seg_common.h"
+
+int vp9_compound_reference_allowed(const VP9_COMMON *cm) {
+  int i;
+  for (i = 1; i < REFS_PER_FRAME; ++i)
+    if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) return 1;
+
+  return 0;
+}
+
+void vp9_setup_compound_reference_mode(VP9_COMMON *cm) {
+  if (cm->ref_frame_sign_bias[LAST_FRAME] ==
+      cm->ref_frame_sign_bias[GOLDEN_FRAME]) {
+    cm->comp_fixed_ref = ALTREF_FRAME;
+    cm->comp_var_ref[0] = LAST_FRAME;
+    cm->comp_var_ref[1] = GOLDEN_FRAME;
+  } else if (cm->ref_frame_sign_bias[LAST_FRAME] ==
+             cm->ref_frame_sign_bias[ALTREF_FRAME]) {
+    cm->comp_fixed_ref = GOLDEN_FRAME;
+    cm->comp_var_ref[0] = LAST_FRAME;
+    cm->comp_var_ref[1] = ALTREF_FRAME;
+  } else {
+    cm->comp_fixed_ref = LAST_FRAME;
+    cm->comp_var_ref[0] = GOLDEN_FRAME;
+    cm->comp_var_ref[1] = ALTREF_FRAME;
+  }
+}
+
+int vp9_get_reference_mode_context(const VP9_COMMON *cm,
+                                   const MACROBLOCKD *xd) {
+  int ctx;
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int has_above = !!above_mi;
+  const int has_left = !!left_mi;
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  if (has_above && has_left) {  // both edges available
+    if (!has_second_ref(above_mi) && !has_second_ref(left_mi))
+      // neither edge uses comp pred (0/1)
+      ctx = (above_mi->ref_frame[0] == cm->comp_fixed_ref) ^
+            (left_mi->ref_frame[0] == cm->comp_fixed_ref);
+    else if (!has_second_ref(above_mi))
+      // one of two edges uses comp pred (2/3)
+      ctx = 2 + (above_mi->ref_frame[0] == cm->comp_fixed_ref ||
+                 !is_inter_block(above_mi));
+    else if (!has_second_ref(left_mi))
+      // one of two edges uses comp pred (2/3)
+      ctx = 2 + (left_mi->ref_frame[0] == cm->comp_fixed_ref ||
+                 !is_inter_block(left_mi));
+    else  // both edges use comp pred (4)
+      ctx = 4;
+  } else if (has_above || has_left) {  // one edge available
+    const MODE_INFO *edge_mi = has_above ? above_mi : left_mi;
+
+    if (!has_second_ref(edge_mi))
+      // edge does not use comp pred (0/1)
+      ctx = edge_mi->ref_frame[0] == cm->comp_fixed_ref;
+    else
+      // edge uses comp pred (3)
+      ctx = 3;
+  } else {  // no edges available (1)
+    ctx = 1;
+  }
+  assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS);
+  return ctx;
+}
+
+// Returns a context number for the given MB prediction signal
+int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
+                                    const MACROBLOCKD *xd) {
+  int pred_context;
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int above_in_image = !!above_mi;
+  const int left_in_image = !!left_mi;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+  const int var_ref_idx = !fix_ref_idx;
+
+  if (above_in_image && left_in_image) {  // both edges available
+    const int above_intra = !is_inter_block(above_mi);
+    const int left_intra = !is_inter_block(left_mi);
+
+    if (above_intra && left_intra) {  // intra/intra (2)
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter
+      const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi;
+
+      if (!has_second_ref(edge_mi))  // single pred (1/3)
+        pred_context = 1 + 2 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]);
+      else  // comp pred (1/3)
+        pred_context =
+            1 + 2 * (edge_mi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]);
+    } else {  // inter/inter
+      const int l_sg = !has_second_ref(left_mi);
+      const int a_sg = !has_second_ref(above_mi);
+      const MV_REFERENCE_FRAME vrfa =
+          a_sg ? above_mi->ref_frame[0] : above_mi->ref_frame[var_ref_idx];
+      const MV_REFERENCE_FRAME vrfl =
+          l_sg ? left_mi->ref_frame[0] : left_mi->ref_frame[var_ref_idx];
+
+      if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) {
+        pred_context = 0;
+      } else if (l_sg && a_sg) {  // single/single
+        if ((vrfa == cm->comp_fixed_ref && vrfl == cm->comp_var_ref[0]) ||
+            (vrfl == cm->comp_fixed_ref && vrfa == cm->comp_var_ref[0]))
+          pred_context = 4;
+        else if (vrfa == vrfl)
+          pred_context = 3;
+        else
+          pred_context = 1;
+      } else if (l_sg || a_sg) {  // single/comp
+        const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl;
+        const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl;
+        if (vrfc == cm->comp_var_ref[1] && rfs != cm->comp_var_ref[1])
+          pred_context = 1;
+        else if (rfs == cm->comp_var_ref[1] && vrfc != cm->comp_var_ref[1])
+          pred_context = 2;
+        else
+          pred_context = 4;
+      } else if (vrfa == vrfl) {  // comp/comp
+        pred_context = 4;
+      } else {
+        pred_context = 2;
+      }
+    }
+  } else if (above_in_image || left_in_image) {  // one edge available
+    const MODE_INFO *edge_mi = above_in_image ? above_mi : left_mi;
+
+    if (!is_inter_block(edge_mi)) {
+      pred_context = 2;
+    } else {
+      if (has_second_ref(edge_mi))
+        pred_context =
+            4 * (edge_mi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]);
+      else
+        pred_context = 3 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]);
+    }
+  } else {  // no edges available (2)
+    pred_context = 2;
+  }
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+  return pred_context;
+}
+
+int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
+  int pred_context;
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int has_above = !!above_mi;
+  const int has_left = !!left_mi;
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mi);
+    const int left_intra = !is_inter_block(left_mi);
+
+    if (above_intra && left_intra) {  // intra/intra
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter or inter/intra
+      const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi;
+      if (!has_second_ref(edge_mi))
+        pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME);
+      else
+        pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME ||
+                            edge_mi->ref_frame[1] == LAST_FRAME);
+    } else {  // inter/inter
+      const int above_has_second = has_second_ref(above_mi);
+      const int left_has_second = has_second_ref(left_mi);
+      const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0];
+      const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1];
+      const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0];
+      const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1];
+
+      if (above_has_second && left_has_second) {
+        pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME ||
+                            left0 == LAST_FRAME || left1 == LAST_FRAME);
+      } else if (above_has_second || left_has_second) {
+        const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+        if (rfs == LAST_FRAME)
+          pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+        else
+          pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+      } else {
+        pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME);
+      }
+    }
+  } else if (has_above || has_left) {  // one edge available
+    const MODE_INFO *edge_mi = has_above ? above_mi : left_mi;
+    if (!is_inter_block(edge_mi)) {  // intra
+      pred_context = 2;
+    } else {  // inter
+      if (!has_second_ref(edge_mi))
+        pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME);
+      else
+        pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME ||
+                            edge_mi->ref_frame[1] == LAST_FRAME);
+    }
+  } else {  // no edges available
+    pred_context = 2;
+  }
+
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+  return pred_context;
+}
+
+int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
+  int pred_context;
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int has_above = !!above_mi;
+  const int has_left = !!left_mi;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mi);
+    const int left_intra = !is_inter_block(left_mi);
+
+    if (above_intra && left_intra) {  // intra/intra
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter or inter/intra
+      const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi;
+      if (!has_second_ref(edge_mi)) {
+        if (edge_mi->ref_frame[0] == LAST_FRAME)
+          pred_context = 3;
+        else
+          pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME);
+      } else {
+        pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
+                                edge_mi->ref_frame[1] == GOLDEN_FRAME);
+      }
+    } else {  // inter/inter
+      const int above_has_second = has_second_ref(above_mi);
+      const int left_has_second = has_second_ref(left_mi);
+      const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0];
+      const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1];
+      const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0];
+      const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1];
+
+      if (above_has_second && left_has_second) {
+        if (above0 == left0 && above1 == left1)
+          pred_context =
+              3 * (above0 == GOLDEN_FRAME || above1 == GOLDEN_FRAME ||
+                   left0 == GOLDEN_FRAME || left1 == GOLDEN_FRAME);
+        else
+          pred_context = 2;
+      } else if (above_has_second || left_has_second) {
+        const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+        if (rfs == GOLDEN_FRAME)
+          pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
+        else if (rfs == ALTREF_FRAME)
+          pred_context = crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME;
+        else
+          pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
+      } else {
+        if (above0 == LAST_FRAME && left0 == LAST_FRAME) {
+          pred_context = 3;
+        } else if (above0 == LAST_FRAME || left0 == LAST_FRAME) {
+          const MV_REFERENCE_FRAME edge0 =
+              (above0 == LAST_FRAME) ? left0 : above0;
+          pred_context = 4 * (edge0 == GOLDEN_FRAME);
+        } else {
+          pred_context =
+              2 * (above0 == GOLDEN_FRAME) + 2 * (left0 == GOLDEN_FRAME);
+        }
+      }
+    }
+  } else if (has_above || has_left) {  // one edge available
+    const MODE_INFO *edge_mi = has_above ? above_mi : left_mi;
+
+    if (!is_inter_block(edge_mi) ||
+        (edge_mi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mi)))
+      pred_context = 2;
+    else if (!has_second_ref(edge_mi))
+      pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME);
+    else
+      pred_context = 3 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
+                          edge_mi->ref_frame[1] == GOLDEN_FRAME);
+  } else {  // no edges available (2)
+    pred_context = 2;
+  }
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+  return pred_context;
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_pred_common.h b/media/libvpx/libvpx/vp9/common/vp9_pred_common.h
new file mode 100644
index 0000000000..ee59669359
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_pred_common.h
@@ -0,0 +1,197 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_PRED_COMMON_H_
+#define VPX_VP9_COMMON_VP9_PRED_COMMON_H_
+
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static INLINE int get_segment_id(const VP9_COMMON *cm,
+                                 const uint8_t *segment_ids, BLOCK_SIZE bsize,
+                                 int mi_row, int mi_col) {
+  const int mi_offset = mi_row * cm->mi_cols + mi_col;
+  const int bw = num_8x8_blocks_wide_lookup[bsize];
+  const int bh = num_8x8_blocks_high_lookup[bsize];
+  const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+  const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+  int x, y, segment_id = MAX_SEGMENTS;
+
+  for (y = 0; y < ymis; ++y)
+    for (x = 0; x < xmis; ++x)
+      segment_id =
+          VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]);
+
+  assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
+  return segment_id;
+}
+
+static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) {
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int above_sip = (above_mi != NULL) ? above_mi->seg_id_predicted : 0;
+  const int left_sip = (left_mi != NULL) ? left_mi->seg_id_predicted : 0;
+
+  return above_sip + left_sip;
+}
+
+static INLINE vpx_prob vp9_get_pred_prob_seg_id(const struct segmentation *seg,
+                                                const MACROBLOCKD *xd) {
+  return seg->pred_probs[vp9_get_pred_context_seg_id(xd)];
+}
+
+static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) {
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int above_skip = (above_mi != NULL) ? above_mi->skip : 0;
+  const int left_skip = (left_mi != NULL) ? left_mi->skip : 0;
+  return above_skip + left_skip;
+}
+
+static INLINE vpx_prob vp9_get_skip_prob(const VP9_COMMON *cm,
+                                         const MACROBLOCKD *xd) {
+  return cm->fc->skip_probs[vp9_get_skip_context(xd)];
+}
+
+// Returns a context number for the given MB prediction signal
+static INLINE int get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int left_type = left_mi ? left_mi->interp_filter : SWITCHABLE_FILTERS;
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const int above_type =
+      above_mi ? above_mi->interp_filter : SWITCHABLE_FILTERS;
+
+  if (left_type == above_type)
+    return left_type;
+  else if (left_type == SWITCHABLE_FILTERS)
+    return above_type;
+  else if (above_type == SWITCHABLE_FILTERS)
+    return left_type;
+  else
+    return SWITCHABLE_FILTERS;
+}
+
+// The mode info data structure has a one element border above and to the
+// left of the entries corresponding to real macroblocks.
+// The prediction flags in these dummy entries are initialized to 0.
+// 0 - inter/inter, inter/--, --/inter, --/--
+// 1 - intra/inter, inter/intra
+// 2 - intra/--, --/intra
+// 3 - intra/intra
+static INLINE int get_intra_inter_context(const MACROBLOCKD *xd) {
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int has_above = !!above_mi;
+  const int has_left = !!left_mi;
+
+  if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mi);
+    const int left_intra = !is_inter_block(left_mi);
+    return left_intra && above_intra ? 3 : left_intra || above_intra;
+  } else if (has_above || has_left) {  // one edge available
+    return 2 * !is_inter_block(has_above ? above_mi : left_mi);
+  }
+  return 0;
+}
+
+static INLINE vpx_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm,
+                                                const MACROBLOCKD *xd) {
+  return cm->fc->intra_inter_prob[get_intra_inter_context(xd)];
+}
+
+int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp9_get_reference_mode_prob(const VP9_COMMON *cm,
+                                                   const MACROBLOCKD *xd) {
+  return cm->fc->comp_inter_prob[vp9_get_reference_mode_context(cm, xd)];
+}
+
+int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
+                                    const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp9_get_pred_prob_comp_ref_p(const VP9_COMMON *cm,
+                                                    const MACROBLOCKD *xd) {
+  const int pred_context = vp9_get_pred_context_comp_ref_p(cm, xd);
+  return cm->fc->comp_ref_prob[pred_context];
+}
+
+int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp9_get_pred_prob_single_ref_p1(const VP9_COMMON *cm,
+                                                       const MACROBLOCKD *xd) {
+  return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p1(xd)][0];
+}
+
+int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm,
+                                                       const MACROBLOCKD *xd) {
+  return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p2(xd)][1];
+}
+
+int vp9_compound_reference_allowed(const VP9_COMMON *cm);
+
+void vp9_setup_compound_reference_mode(VP9_COMMON *cm);
+
+// Returns a context number for the given MB prediction signal
+// The mode info data structure has a one element border above and to the
+// left of the entries corresponding to real blocks.
+// The prediction flags in these dummy entries are initialized to 0.
+static INLINE int get_tx_size_context(const MACROBLOCKD *xd) {
+  const int max_tx_size = max_txsize_lookup[xd->mi[0]->sb_type];
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int has_above = !!above_mi;
+  const int has_left = !!left_mi;
+  int above_ctx =
+      (has_above && !above_mi->skip) ? (int)above_mi->tx_size : max_tx_size;
+  int left_ctx =
+      (has_left && !left_mi->skip) ? (int)left_mi->tx_size : max_tx_size;
+  if (!has_left) left_ctx = above_ctx;
+
+  if (!has_above) above_ctx = left_ctx;
+
+  return (above_ctx + left_ctx) > max_tx_size;
+}
+
+static INLINE const vpx_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
+                                           const struct tx_probs *tx_probs) {
+  switch (max_tx_size) {
+    case TX_8X8: return tx_probs->p8x8[ctx];
+    case TX_16X16: return tx_probs->p16x16[ctx];
+    case TX_32X32: return tx_probs->p32x32[ctx];
+    default: assert(0 && "Invalid max_tx_size."); return NULL;
+  }
+}
+
+static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
+                                          struct tx_counts *tx_counts) {
+  switch (max_tx_size) {
+    case TX_8X8: return tx_counts->p8x8[ctx];
+    case TX_16X16: return tx_counts->p16x16[ctx];
+    case TX_32X32: return tx_counts->p32x32[ctx];
+    default: assert(0 && "Invalid max_tx_size."); return NULL;
+  }
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_PRED_COMMON_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_quant_common.c b/media/libvpx/libvpx/vp9/common/vp9_quant_common.c
new file mode 100644
index 0000000000..1dc18dc6df
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_quant_common.c
@@ -0,0 +1,206 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_quant_common.h"
+#include "vp9/common/vp9_seg_common.h"
+
+static const int16_t dc_qlookup[QINDEX_RANGE] = {
+  4,    8,    8,    9,    10,  11,  12,  12,  13,  14,  15,   16,   17,   18,
+  19,   19,   20,   21,   22,  23,  24,  25,  26,  26,  27,   28,   29,   30,
+  31,   32,   32,   33,   34,  35,  36,  37,  38,  38,  39,   40,   41,   42,
+  43,   43,   44,   45,   46,  47,  48,  48,  49,  50,  51,   52,   53,   53,
+  54,   55,   56,   57,   57,  58,  59,  60,  61,  62,  62,   63,   64,   65,
+  66,   66,   67,   68,   69,  70,  70,  71,  72,  73,  74,   74,   75,   76,
+  77,   78,   78,   79,   80,  81,  81,  82,  83,  84,  85,   85,   87,   88,
+  90,   92,   93,   95,   96,  98,  99,  101, 102, 104, 105,  107,  108,  110,
+  111,  113,  114,  116,  117, 118, 120, 121, 123, 125, 127,  129,  131,  134,
+  136,  138,  140,  142,  144, 146, 148, 150, 152, 154, 156,  158,  161,  164,
+  166,  169,  172,  174,  177, 180, 182, 185, 187, 190, 192,  195,  199,  202,
+  205,  208,  211,  214,  217, 220, 223, 226, 230, 233, 237,  240,  243,  247,
+  250,  253,  257,  261,  265, 269, 272, 276, 280, 284, 288,  292,  296,  300,
+  304,  309,  313,  317,  322, 326, 330, 335, 340, 344, 349,  354,  359,  364,
+  369,  374,  379,  384,  389, 395, 400, 406, 411, 417, 423,  429,  435,  441,
+  447,  454,  461,  467,  475, 482, 489, 497, 505, 513, 522,  530,  539,  549,
+  559,  569,  579,  590,  602, 614, 626, 640, 654, 668, 684,  700,  717,  736,
+  755,  775,  796,  819,  843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139,
+  1184, 1232, 1282, 1336,
+};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static const int16_t dc_qlookup_10[QINDEX_RANGE] = {
+  4,    9,    10,   13,   15,   17,   20,   22,   25,   28,   31,   34,   37,
+  40,   43,   47,   50,   53,   57,   60,   64,   68,   71,   75,   78,   82,
+  86,   90,   93,   97,   101,  105,  109,  113,  116,  120,  124,  128,  132,
+  136,  140,  143,  147,  151,  155,  159,  163,  166,  170,  174,  178,  182,
+  185,  189,  193,  197,  200,  204,  208,  212,  215,  219,  223,  226,  230,
+  233,  237,  241,  244,  248,  251,  255,  259,  262,  266,  269,  273,  276,
+  280,  283,  287,  290,  293,  297,  300,  304,  307,  310,  314,  317,  321,
+  324,  327,  331,  334,  337,  343,  350,  356,  362,  369,  375,  381,  387,
+  394,  400,  406,  412,  418,  424,  430,  436,  442,  448,  454,  460,  466,
+  472,  478,  484,  490,  499,  507,  516,  525,  533,  542,  550,  559,  567,
+  576,  584,  592,  601,  609,  617,  625,  634,  644,  655,  666,  676,  687,
+  698,  708,  718,  729,  739,  749,  759,  770,  782,  795,  807,  819,  831,
+  844,  856,  868,  880,  891,  906,  920,  933,  947,  961,  975,  988,  1001,
+  1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202,
+  1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436,
+  1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717,
+  1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088,
+  2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675,
+  2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823,
+  3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
+};
+
+static const int16_t dc_qlookup_12[QINDEX_RANGE] = {
+  4,     12,    18,    25,    33,    41,    50,    60,    70,    80,    91,
+  103,   115,   127,   140,   153,   166,   180,   194,   208,   222,   237,
+  251,   266,   281,   296,   312,   327,   343,   358,   374,   390,   405,
+  421,   437,   453,   469,   484,   500,   516,   532,   548,   564,   580,
+  596,   611,   627,   643,   659,   674,   690,   706,   721,   737,   752,
+  768,   783,   798,   814,   829,   844,   859,   874,   889,   904,   919,
+  934,   949,   964,   978,   993,   1008,  1022,  1037,  1051,  1065,  1080,
+  1094,  1108,  1122,  1136,  1151,  1165,  1179,  1192,  1206,  1220,  1234,
+  1248,  1261,  1275,  1288,  1302,  1315,  1329,  1342,  1368,  1393,  1419,
+  1444,  1469,  1494,  1519,  1544,  1569,  1594,  1618,  1643,  1668,  1692,
+  1717,  1741,  1765,  1789,  1814,  1838,  1862,  1885,  1909,  1933,  1957,
+  1992,  2027,  2061,  2096,  2130,  2165,  2199,  2233,  2267,  2300,  2334,
+  2367,  2400,  2434,  2467,  2499,  2532,  2575,  2618,  2661,  2704,  2746,
+  2788,  2830,  2872,  2913,  2954,  2995,  3036,  3076,  3127,  3177,  3226,
+  3275,  3324,  3373,  3421,  3469,  3517,  3565,  3621,  3677,  3733,  3788,
+  3843,  3897,  3951,  4005,  4058,  4119,  4181,  4241,  4301,  4361,  4420,
+  4479,  4546,  4612,  4677,  4742,  4807,  4871,  4942,  5013,  5083,  5153,
+  5222,  5291,  5367,  5442,  5517,  5591,  5665,  5745,  5825,  5905,  5984,
+  6063,  6149,  6234,  6319,  6404,  6495,  6587,  6678,  6769,  6867,  6966,
+  7064,  7163,  7269,  7376,  7483,  7599,  7715,  7832,  7958,  8085,  8214,
+  8352,  8492,  8635,  8788,  8945,  9104,  9275,  9450,  9639,  9832,  10031,
+  10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118,
+  13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949,
+  19718, 20521, 21387,
+};
+#endif
+
+static const int16_t ac_qlookup[QINDEX_RANGE] = {
+  4,    8,    9,    10,   11,   12,   13,   14,   15,   16,   17,   18,   19,
+  20,   21,   22,   23,   24,   25,   26,   27,   28,   29,   30,   31,   32,
+  33,   34,   35,   36,   37,   38,   39,   40,   41,   42,   43,   44,   45,
+  46,   47,   48,   49,   50,   51,   52,   53,   54,   55,   56,   57,   58,
+  59,   60,   61,   62,   63,   64,   65,   66,   67,   68,   69,   70,   71,
+  72,   73,   74,   75,   76,   77,   78,   79,   80,   81,   82,   83,   84,
+  85,   86,   87,   88,   89,   90,   91,   92,   93,   94,   95,   96,   97,
+  98,   99,   100,  101,  102,  104,  106,  108,  110,  112,  114,  116,  118,
+  120,  122,  124,  126,  128,  130,  132,  134,  136,  138,  140,  142,  144,
+  146,  148,  150,  152,  155,  158,  161,  164,  167,  170,  173,  176,  179,
+  182,  185,  188,  191,  194,  197,  200,  203,  207,  211,  215,  219,  223,
+  227,  231,  235,  239,  243,  247,  251,  255,  260,  265,  270,  275,  280,
+  285,  290,  295,  300,  305,  311,  317,  323,  329,  335,  341,  347,  353,
+  359,  366,  373,  380,  387,  394,  401,  408,  416,  424,  432,  440,  448,
+  456,  465,  474,  483,  492,  501,  510,  520,  530,  540,  550,  560,  571,
+  582,  593,  604,  615,  627,  639,  651,  663,  676,  689,  702,  715,  729,
+  743,  757,  771,  786,  801,  816,  832,  848,  864,  881,  898,  915,  933,
+  951,  969,  988,  1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196,
+  1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537,
+  1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
+};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static const int16_t ac_qlookup_10[QINDEX_RANGE] = {
+  4,    9,    11,   13,   16,   18,   21,   24,   27,   30,   33,   37,   40,
+  44,   48,   51,   55,   59,   63,   67,   71,   75,   79,   83,   88,   92,
+  96,   100,  105,  109,  114,  118,  122,  127,  131,  136,  140,  145,  149,
+  154,  158,  163,  168,  172,  177,  181,  186,  190,  195,  199,  204,  208,
+  213,  217,  222,  226,  231,  235,  240,  244,  249,  253,  258,  262,  267,
+  271,  275,  280,  284,  289,  293,  297,  302,  306,  311,  315,  319,  324,
+  328,  332,  337,  341,  345,  349,  354,  358,  362,  367,  371,  375,  379,
+  384,  388,  392,  396,  401,  409,  417,  425,  433,  441,  449,  458,  466,
+  474,  482,  490,  498,  506,  514,  523,  531,  539,  547,  555,  563,  571,
+  579,  588,  596,  604,  616,  628,  640,  652,  664,  676,  688,  700,  713,
+  725,  737,  749,  761,  773,  785,  797,  809,  825,  841,  857,  873,  889,
+  905,  922,  938,  954,  970,  986,  1002, 1018, 1038, 1058, 1078, 1098, 1118,
+  1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411,
+  1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791,
+  1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283,
+  2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915,
+  2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731,
+  3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784,
+  4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148,
+  6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312,
+};
+
+static const int16_t ac_qlookup_12[QINDEX_RANGE] = {
+  4,     13,    19,    27,    35,    44,    54,    64,    75,    87,    99,
+  112,   126,   139,   154,   168,   183,   199,   214,   230,   247,   263,
+  280,   297,   314,   331,   349,   366,   384,   402,   420,   438,   456,
+  475,   493,   511,   530,   548,   567,   586,   604,   623,   642,   660,
+  679,   698,   716,   735,   753,   772,   791,   809,   828,   846,   865,
+  884,   902,   920,   939,   957,   976,   994,   1012,  1030,  1049,  1067,
+  1085,  1103,  1121,  1139,  1157,  1175,  1193,  1211,  1229,  1246,  1264,
+  1282,  1299,  1317,  1335,  1352,  1370,  1387,  1405,  1422,  1440,  1457,
+  1474,  1491,  1509,  1526,  1543,  1560,  1577,  1595,  1627,  1660,  1693,
+  1725,  1758,  1791,  1824,  1856,  1889,  1922,  1954,  1987,  2020,  2052,
+  2085,  2118,  2150,  2183,  2216,  2248,  2281,  2313,  2346,  2378,  2411,
+  2459,  2508,  2556,  2605,  2653,  2701,  2750,  2798,  2847,  2895,  2943,
+  2992,  3040,  3088,  3137,  3185,  3234,  3298,  3362,  3426,  3491,  3555,
+  3619,  3684,  3748,  3812,  3876,  3941,  4005,  4069,  4149,  4230,  4310,
+  4390,  4470,  4550,  4631,  4711,  4791,  4871,  4967,  5064,  5160,  5256,
+  5352,  5448,  5544,  5641,  5737,  5849,  5961,  6073,  6185,  6297,  6410,
+  6522,  6650,  6778,  6906,  7034,  7162,  7290,  7435,  7579,  7723,  7867,
+  8011,  8155,  8315,  8475,  8635,  8795,  8956,  9132,  9308,  9484,  9660,
+  9836,  10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885,
+  12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637,
+  14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062,
+  18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334,
+  22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599,
+  28143, 28687, 29247,
+};
+#endif
+
+int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  switch (bit_depth) {
+    case VPX_BITS_8: return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
+    case VPX_BITS_10: return dc_qlookup_10[clamp(qindex + delta, 0, MAXQ)];
+    case VPX_BITS_12: return dc_qlookup_12[clamp(qindex + delta, 0, MAXQ)];
+    default:
+      assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+      return -1;
+  }
+#else
+  (void)bit_depth;
+  return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
+#endif
+}
+
+int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  switch (bit_depth) {
+    case VPX_BITS_8: return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
+    case VPX_BITS_10: return ac_qlookup_10[clamp(qindex + delta, 0, MAXQ)];
+    case VPX_BITS_12: return ac_qlookup_12[clamp(qindex + delta, 0, MAXQ)];
+    default:
+      assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+      return -1;
+  }
+#else
+  (void)bit_depth;
+  return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
+#endif
+}
+
+int vp9_get_qindex(const struct segmentation *seg, int segment_id,
+                   int base_qindex) {
+  if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
+    const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
+    const int seg_qindex =
+        seg->abs_delta == SEGMENT_ABSDATA ? data : base_qindex + data;
+    return clamp(seg_qindex, 0, MAXQ);
+  } else {
+    return base_qindex;
+  }
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_quant_common.h b/media/libvpx/libvpx/vp9/common/vp9_quant_common.h
new file mode 100644
index 0000000000..ec8b9f4c6a
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_quant_common.h
@@ -0,0 +1,36 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_QUANT_COMMON_H_
+#define VPX_VP9_COMMON_VP9_QUANT_COMMON_H_
+
+#include "vpx/vpx_codec.h"
+#include "vp9/common/vp9_seg_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MINQ 0
+#define MAXQ 255
+#define QINDEX_RANGE (MAXQ - MINQ + 1)
+#define QINDEX_BITS 8
+
+int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
+int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
+
+int vp9_get_qindex(const struct segmentation *seg, int segment_id,
+                   int base_qindex);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_QUANT_COMMON_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_reconinter.c b/media/libvpx/libvpx/vp9/common/vp9_reconinter.c
new file mode 100644
index 0000000000..ff59ff5042
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_reconinter.c
@@ -0,0 +1,288 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vpx_scale_rtcd.h"
+#include "./vpx_config.h"
+
+#include "vpx/vpx_integer.h"
+
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/common/vp9_reconintra.h"
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_build_inter_predictor(
+    const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
+    const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
+    const InterpKernel *kernel, enum mv_precision precision, int x, int y,
+    int bd) {
+  const int is_q4 = precision == MV_PRECISION_Q4;
+  const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
+                     is_q4 ? src_mv->col : src_mv->col * 2 };
+  MV32 mv = vp9_scale_mv(&mv_q4, x, y, sf);
+  const int subpel_x = mv.col & SUBPEL_MASK;
+  const int subpel_y = mv.row & SUBPEL_MASK;
+
+  src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
+
+  highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
+                         sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4,
+                         bd);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
+                               int dst_stride, const MV *src_mv,
+                               const struct scale_factors *sf, int w, int h,
+                               int ref, const InterpKernel *kernel,
+                               enum mv_precision precision, int x, int y) {
+  const int is_q4 = precision == MV_PRECISION_Q4;
+  const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
+                     is_q4 ? src_mv->col : src_mv->col * 2 };
+  MV32 mv = vp9_scale_mv(&mv_q4, x, y, sf);
+  const int subpel_x = mv.col & SUBPEL_MASK;
+  const int subpel_y = mv.row & SUBPEL_MASK;
+
+  src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
+
+  inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
+                  h, ref, kernel, sf->x_step_q4, sf->y_step_q4);
+}
+
+static INLINE int round_mv_comp_q4(int value) {
+  return (value < 0 ? value - 2 : value + 2) / 4;
+}
+
+static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) {
+  MV res = { round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.row +
+                              mi->bmi[1].as_mv[idx].as_mv.row +
+                              mi->bmi[2].as_mv[idx].as_mv.row +
+                              mi->bmi[3].as_mv[idx].as_mv.row),
+             round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col +
+                              mi->bmi[1].as_mv[idx].as_mv.col +
+                              mi->bmi[2].as_mv[idx].as_mv.col +
+                              mi->bmi[3].as_mv[idx].as_mv.col) };
+  return res;
+}
+
+static INLINE int round_mv_comp_q2(int value) {
+  return (value < 0 ? value - 1 : value + 1) / 2;
+}
+
+static MV mi_mv_pred_q2(const MODE_INFO *mi, int idx, int block0, int block1) {
+  MV res = { round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.row +
+                              mi->bmi[block1].as_mv[idx].as_mv.row),
+             round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.col +
+                              mi->bmi[block1].as_mv[idx].as_mv.col) };
+  return res;
+}
+
+// TODO(jkoleszar): yet another mv clamping function :-(
+MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, int bw,
+                             int bh, int ss_x, int ss_y) {
+  // If the MV points so far into the UMV border that no visible pixels
+  // are used for reconstruction, the subpel part of the MV can be
+  // discarded and the MV limited to 16 pixels with equivalent results.
+  const int spel_left = (VP9_INTERP_EXTEND + bw) << SUBPEL_BITS;
+  const int spel_right = spel_left - SUBPEL_SHIFTS;
+  const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS;
+  const int spel_bottom = spel_top - SUBPEL_SHIFTS;
+  MV clamped_mv = { (short)(src_mv->row * (1 << (1 - ss_y))),
+                    (short)(src_mv->col * (1 << (1 - ss_x))) };
+  assert(ss_x <= 1);
+  assert(ss_y <= 1);
+
+  clamp_mv(&clamped_mv, xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left,
+           xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right,
+           xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top,
+           xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom);
+
+  return clamped_mv;
+}
+
+MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi,
+                     int ref, int block) {
+  const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0);
+  MV res = { 0, 0 };
+  switch (ss_idx) {
+    case 0: res = mi->bmi[block].as_mv[ref].as_mv; break;
+    case 1: res = mi_mv_pred_q2(mi, ref, block, block + 2); break;
+    case 2: res = mi_mv_pred_q2(mi, ref, block, block + 1); break;
+    case 3: res = mi_mv_pred_q4(mi, ref); break;
+    default: assert(ss_idx <= 3 && ss_idx >= 0);
+  }
+  return res;
+}
+
+static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
+                                   int bw, int bh, int x, int y, int w, int h,
+                                   int mi_x, int mi_y) {
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+  const MODE_INFO *mi = xd->mi[0];
+  const int is_compound = has_second_ref(mi);
+  const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
+  int ref;
+
+  for (ref = 0; ref < 1 + is_compound; ++ref) {
+    const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+    struct buf_2d *const pre_buf = &pd->pre[ref];
+    struct buf_2d *const dst_buf = &pd->dst;
+    uint8_t *const dst = dst_buf->buf + (int64_t)dst_buf->stride * y + x;
+    const MV mv = mi->sb_type < BLOCK_8X8
+                      ? average_split_mvs(pd, mi, ref, block)
+                      : mi->mv[ref].as_mv;
+
+    // TODO(jkoleszar): This clamping is done in the incorrect place for the
+    // scaling case. It needs to be done on the scaled MV, not the pre-scaling
+    // MV. Note however that it performs the subsampling aware scaling so
+    // that the result is always q4.
+    // mv_precision precision is MV_PRECISION_Q4.
+    const MV mv_q4 = clamp_mv_to_umv_border_sb(
+        xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
+
+    uint8_t *pre;
+    MV32 scaled_mv;
+    int xs, ys, subpel_x, subpel_y;
+    const int is_scaled = vp9_is_scaled(sf);
+
+    if (is_scaled) {
+      // Co-ordinate of containing block to pixel precision.
+      const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
+      const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
+#if 0  // CONFIG_BETTER_HW_COMPATIBILITY
+      assert(xd->mi[0]->sb_type != BLOCK_4X8 &&
+             xd->mi[0]->sb_type != BLOCK_8X4);
+      assert(mv_q4.row == mv.row * (1 << (1 - pd->subsampling_y)) &&
+             mv_q4.col == mv.col * (1 << (1 - pd->subsampling_x)));
+#endif
+      if (plane == 0)
+        pre_buf->buf = xd->block_refs[ref]->buf->y_buffer;
+      else if (plane == 1)
+        pre_buf->buf = xd->block_refs[ref]->buf->u_buffer;
+      else
+        pre_buf->buf = xd->block_refs[ref]->buf->v_buffer;
+
+      pre_buf->buf +=
+          scaled_buffer_offset(x_start + x, y_start + y, pre_buf->stride, sf);
+      pre = pre_buf->buf;
+      scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+      xs = sf->x_step_q4;
+      ys = sf->y_step_q4;
+    } else {
+      pre = pre_buf->buf + ((int64_t)y * pre_buf->stride + x);
+      scaled_mv.row = mv_q4.row;
+      scaled_mv.col = mv_q4.col;
+      xs = ys = 16;
+    }
+    subpel_x = scaled_mv.col & SUBPEL_MASK;
+    subpel_y = scaled_mv.row & SUBPEL_MASK;
+    pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride +
+           (scaled_mv.col >> SUBPEL_BITS);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+      highbd_inter_predictor(CONVERT_TO_SHORTPTR(pre), pre_buf->stride,
+                             CONVERT_TO_SHORTPTR(dst), dst_buf->stride,
+                             subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys,
+                             xd->bd);
+    } else {
+      inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x,
+                      subpel_y, sf, w, h, ref, kernel, xs, ys);
+    }
+#else
+    inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x,
+                    subpel_y, sf, w, h, ref, kernel, xs, ys);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  }
+}
+
+static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize,
+                                              int mi_row, int mi_col,
+                                              int plane_from, int plane_to) {
+  int plane;
+  const int mi_x = mi_col * MI_SIZE;
+  const int mi_y = mi_row * MI_SIZE;
+  for (plane = plane_from; plane <= plane_to; ++plane) {
+    const BLOCK_SIZE plane_bsize =
+        get_plane_block_size(bsize, &xd->plane[plane]);
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+    const int bw = 4 * num_4x4_w;
+    const int bh = 4 * num_4x4_h;
+
+    if (xd->mi[0]->sb_type < BLOCK_8X8) {
+      int i = 0, x, y;
+      assert(bsize == BLOCK_8X8);
+      for (y = 0; y < num_4x4_h; ++y)
+        for (x = 0; x < num_4x4_w; ++x)
+          build_inter_predictors(xd, plane, i++, bw, bh, 4 * x, 4 * y, 4, 4,
+                                 mi_x, mi_y);
+    } else {
+      build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, mi_x, mi_y);
+    }
+  }
+}
+
+void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                    BLOCK_SIZE bsize) {
+  build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, 0);
+}
+
+void vp9_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                    BLOCK_SIZE bsize, int plane) {
+  build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, plane, plane);
+}
+
+void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                     BLOCK_SIZE bsize) {
+  build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 1,
+                                    MAX_MB_PLANE - 1);
+}
+
+void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                   BLOCK_SIZE bsize) {
+  build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0,
+                                    MAX_MB_PLANE - 1);
+}
+
+void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
+                          const YV12_BUFFER_CONFIG *src, int mi_row,
+                          int mi_col) {
+  uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
+                                           src->v_buffer };
+  const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
+                                      src->uv_stride };
+  int i;
+
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    struct macroblockd_plane *const pd = &planes[i];
+    setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL,
+                     pd->subsampling_x, pd->subsampling_y);
+  }
+}
+
+void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx,
+                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
+                          const struct scale_factors *sf) {
+  if (src != NULL) {
+    int i;
+    uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
+                                             src->v_buffer };
+    const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
+                                        src->uv_stride };
+    for (i = 0; i < MAX_MB_PLANE; ++i) {
+      struct macroblockd_plane *const pd = &xd->plane[i];
+      setup_pred_plane(&pd->pre[idx], buffers[i], strides[i], mi_row, mi_col,
+                       sf, pd->subsampling_x, pd->subsampling_y);
+    }
+  }
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_reconinter.h b/media/libvpx/libvpx/vp9/common/vp9_reconinter.h
new file mode 100644
index 0000000000..12b545831a
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_reconinter.h
@@ -0,0 +1,107 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_RECONINTER_H_
+#define VPX_VP9_COMMON_VP9_RECONINTER_H_
+
+#include "vp9/common/vp9_filter.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_filter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static INLINE void inter_predictor(const uint8_t *src, int src_stride,
+                                   uint8_t *dst, int dst_stride,
+                                   const int subpel_x, const int subpel_y,
+                                   const struct scale_factors *sf, int w, int h,
+                                   int ref, const InterpKernel *kernel, int xs,
+                                   int ys) {
+  sf->predict[subpel_x != 0][subpel_y != 0][ref](src, src_stride, dst,
+                                                 dst_stride, kernel, subpel_x,
+                                                 xs, subpel_y, ys, w, h);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE void highbd_inter_predictor(
+    const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
+    const int subpel_x, const int subpel_y, const struct scale_factors *sf,
+    int w, int h, int ref, const InterpKernel *kernel, int xs, int ys, int bd) {
+  sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
+      src, src_stride, dst, dst_stride, kernel, subpel_x, xs, subpel_y, ys, w,
+      h, bd);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi,
+                     int ref, int block);
+
+MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, int bw,
+                             int bh, int ss_x, int ss_y);
+
+void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                    BLOCK_SIZE bsize);
+
+void vp9_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                    BLOCK_SIZE bsize, int plane);
+
+void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                     BLOCK_SIZE bsize);
+
+void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                   BLOCK_SIZE bsize);
+
+void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
+                               int dst_stride, const MV *src_mv,
+                               const struct scale_factors *sf, int w, int h,
+                               int ref, const InterpKernel *kernel,
+                               enum mv_precision precision, int x, int y);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_build_inter_predictor(
+    const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
+    const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
+    const InterpKernel *kernel, enum mv_precision precision, int x, int y,
+    int bd);
+#endif
+
+static INLINE int64_t scaled_buffer_offset(int x_offset, int y_offset,
+                                           int stride,
+                                           const struct scale_factors *sf) {
+  const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset;
+  const int y = sf ? sf->scale_value_y(y_offset, sf) : y_offset;
+  return (int64_t)y * stride + x;
+}
+
+static INLINE void setup_pred_plane(struct buf_2d *dst, uint8_t *src,
+                                    int stride, int mi_row, int mi_col,
+                                    const struct scale_factors *scale,
+                                    int subsampling_x, int subsampling_y) {
+  const int x = (MI_SIZE * mi_col) >> subsampling_x;
+  const int y = (MI_SIZE * mi_row) >> subsampling_y;
+  dst->buf = src + scaled_buffer_offset(x, y, stride, scale);
+  dst->stride = stride;
+}
+
+void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
+                          const YV12_BUFFER_CONFIG *src, int mi_row,
+                          int mi_col);
+
+void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx,
+                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
+                          const struct scale_factors *sf);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_RECONINTER_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_reconintra.c b/media/libvpx/libvpx/vp9/common/vp9_reconintra.c
new file mode 100644
index 0000000000..3e5ed616d3
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_reconintra.c
@@ -0,0 +1,431 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#include "vpx_dsp/vpx_dsp_common.h"
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
+#include "vpx_ports/vpx_once.h"
+
+#include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_onyxc_int.h"
+
+const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
+  DCT_DCT,    // DC
+  ADST_DCT,   // V
+  DCT_ADST,   // H
+  DCT_DCT,    // D45
+  ADST_ADST,  // D135
+  ADST_DCT,   // D117
+  DCT_ADST,   // D153
+  DCT_ADST,   // D207
+  ADST_DCT,   // D63
+  ADST_ADST,  // TM
+};
+
+enum {
+  NEED_LEFT = 1 << 1,
+  NEED_ABOVE = 1 << 2,
+  NEED_ABOVERIGHT = 1 << 3,
+};
+
+static const uint8_t extend_modes[INTRA_MODES] = {
+  NEED_ABOVE | NEED_LEFT,  // DC
+  NEED_ABOVE,              // V
+  NEED_LEFT,               // H
+  NEED_ABOVERIGHT,         // D45
+  NEED_LEFT | NEED_ABOVE,  // D135
+  NEED_LEFT | NEED_ABOVE,  // D117
+  NEED_LEFT | NEED_ABOVE,  // D153
+  NEED_LEFT,               // D207
+  NEED_ABOVERIGHT,         // D63
+  NEED_LEFT | NEED_ABOVE,  // TM
+};
+
+typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left);
+
+static intra_pred_fn pred[INTRA_MODES][TX_SIZES];
+static intra_pred_fn dc_pred[2][2][TX_SIZES];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
+                                   const uint16_t *above, const uint16_t *left,
+                                   int bd);
+static intra_high_pred_fn pred_high[INTRA_MODES][4];
+static intra_high_pred_fn dc_pred_high[2][2][4];
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static void vp9_init_intra_predictors_internal(void) {
+#define INIT_ALL_SIZES(p, type)               \
+  p[TX_4X4] = vpx_##type##_predictor_4x4;     \
+  p[TX_8X8] = vpx_##type##_predictor_8x8;     \
+  p[TX_16X16] = vpx_##type##_predictor_16x16; \
+  p[TX_32X32] = vpx_##type##_predictor_32x32
+
+  INIT_ALL_SIZES(pred[V_PRED], v);
+  INIT_ALL_SIZES(pred[H_PRED], h);
+  INIT_ALL_SIZES(pred[D207_PRED], d207);
+  INIT_ALL_SIZES(pred[D45_PRED], d45);
+  INIT_ALL_SIZES(pred[D63_PRED], d63);
+  INIT_ALL_SIZES(pred[D117_PRED], d117);
+  INIT_ALL_SIZES(pred[D135_PRED], d135);
+  INIT_ALL_SIZES(pred[D153_PRED], d153);
+  INIT_ALL_SIZES(pred[TM_PRED], tm);
+
+  INIT_ALL_SIZES(dc_pred[0][0], dc_128);
+  INIT_ALL_SIZES(dc_pred[0][1], dc_top);
+  INIT_ALL_SIZES(dc_pred[1][0], dc_left);
+  INIT_ALL_SIZES(dc_pred[1][1], dc);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
+  INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
+  INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207);
+  INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45);
+  INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63);
+  INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117);
+  INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135);
+  INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153);
+  INIT_ALL_SIZES(pred_high[TM_PRED], highbd_tm);
+
+  INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
+  INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
+  INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
+  INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#undef intra_pred_allsizes
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void build_intra_predictors_high(
+    const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8,
+    int dst_stride, PREDICTION_MODE mode, TX_SIZE tx_size, int up_available,
+    int left_available, int right_available, int x, int y, int plane, int bd) {
+  int i;
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  DECLARE_ALIGNED(16, uint16_t, left_col[32]);
+  DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]);
+  uint16_t *above_row = above_data + 16;
+  const uint16_t *const_above_row = above_row;
+  const int bs = 4 << tx_size;
+  int frame_width, frame_height;
+  int x0, y0;
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  const int need_left = extend_modes[mode] & NEED_LEFT;
+  const int need_above = extend_modes[mode] & NEED_ABOVE;
+  const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT;
+  int base = 128 << (bd - 8);
+  // 127 127 127 .. 127 127 127 127 127 127
+  // 129  A   B  ..  Y   Z
+  // 129  C   D  ..  W   X
+  // 129  E   F  ..  U   V
+  // 129  G   H  ..  S   T   T   T   T   T
+  // For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1.
+
+  // Get current frame pointer, width and height.
+  if (plane == 0) {
+    frame_width = xd->cur_buf->y_width;
+    frame_height = xd->cur_buf->y_height;
+  } else {
+    frame_width = xd->cur_buf->uv_width;
+    frame_height = xd->cur_buf->uv_height;
+  }
+
+  // Get block position in current frame.
+  x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
+  y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
+
+  // NEED_LEFT
+  if (need_left) {
+    if (left_available) {
+      if (xd->mb_to_bottom_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (y0 + bs <= frame_height) {
+          for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1];
+        } else {
+          const int extend_bottom = frame_height - y0;
+          for (i = 0; i < extend_bottom; ++i)
+            left_col[i] = ref[i * ref_stride - 1];
+          for (; i < bs; ++i)
+            left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
+        }
+      } else {
+        /* faster path if the block does not need extension */
+        for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1];
+      }
+    } else {
+      vpx_memset16(left_col, base + 1, bs);
+    }
+  }
+
+  // NEED_ABOVE
+  if (need_above) {
+    if (up_available) {
+      const uint16_t *above_ref = ref - ref_stride;
+      if (xd->mb_to_right_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (x0 + bs <= frame_width) {
+          memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+        } else if (x0 <= frame_width) {
+          const int r = frame_width - x0;
+          memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+          vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width);
+        }
+      } else {
+        /* faster path if the block does not need extension */
+        if (bs == 4 && right_available && left_available) {
+          const_above_row = above_ref;
+        } else {
+          memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+        }
+      }
+      above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+    } else {
+      vpx_memset16(above_row, base - 1, bs);
+      above_row[-1] = base - 1;
+    }
+  }
+
+  // NEED_ABOVERIGHT
+  if (need_aboveright) {
+    if (up_available) {
+      const uint16_t *above_ref = ref - ref_stride;
+      if (xd->mb_to_right_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (x0 + 2 * bs <= frame_width) {
+          if (right_available && bs == 4) {
+            memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0]));
+          } else {
+            memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          }
+        } else if (x0 + bs <= frame_width) {
+          const int r = frame_width - x0;
+          if (right_available && bs == 4) {
+            memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+            vpx_memset16(above_row + r, above_row[r - 1],
+                         x0 + 2 * bs - frame_width);
+          } else {
+            memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          }
+        } else if (x0 <= frame_width) {
+          const int r = frame_width - x0;
+          memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+          vpx_memset16(above_row + r, above_row[r - 1],
+                       x0 + 2 * bs - frame_width);
+        }
+        above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+      } else {
+        /* faster path if the block does not need extension */
+        if (bs == 4 && right_available && left_available) {
+          const_above_row = above_ref;
+        } else {
+          memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+          if (bs == 4 && right_available)
+            memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
+          else
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+        }
+      }
+    } else {
+      vpx_memset16(above_row, base - 1, bs * 2);
+      above_row[-1] = base - 1;
+    }
+  }
+
+  // predict
+  if (mode == DC_PRED) {
+    dc_pred_high[left_available][up_available][tx_size](
+        dst, dst_stride, const_above_row, left_col, xd->bd);
+  } else {
+    pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col,
+                             xd->bd);
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
+                                   int ref_stride, uint8_t *dst, int dst_stride,
+                                   PREDICTION_MODE mode, TX_SIZE tx_size,
+                                   int up_available, int left_available,
+                                   int right_available, int x, int y,
+                                   int plane) {
+  int i;
+  DECLARE_ALIGNED(16, uint8_t, left_col[32]);
+  DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]);
+  uint8_t *above_row = above_data + 16;
+  const uint8_t *const_above_row = above_row;
+  const int bs = 4 << tx_size;
+  int frame_width, frame_height;
+  int x0, y0;
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+
+  // 127 127 127 .. 127 127 127 127 127 127
+  // 129  A   B  ..  Y   Z
+  // 129  C   D  ..  W   X
+  // 129  E   F  ..  U   V
+  // 129  G   H  ..  S   T   T   T   T   T
+  // ..
+
+  // Get current frame pointer, width and height.
+  if (plane == 0) {
+    frame_width = xd->cur_buf->y_width;
+    frame_height = xd->cur_buf->y_height;
+  } else {
+    frame_width = xd->cur_buf->uv_width;
+    frame_height = xd->cur_buf->uv_height;
+  }
+
+  // Get block position in current frame.
+  x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
+  y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
+
+  // NEED_LEFT
+  if (extend_modes[mode] & NEED_LEFT) {
+    if (left_available) {
+      if (xd->mb_to_bottom_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (y0 + bs <= frame_height) {
+          for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1];
+        } else {
+          const int extend_bottom = frame_height - y0;
+          for (i = 0; i < extend_bottom; ++i)
+            left_col[i] = ref[i * ref_stride - 1];
+          for (; i < bs; ++i)
+            left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
+        }
+      } else {
+        /* faster path if the block does not need extension */
+        for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1];
+      }
+    } else {
+      memset(left_col, 129, bs);
+    }
+  }
+
+  // NEED_ABOVE
+  if (extend_modes[mode] & NEED_ABOVE) {
+    if (up_available) {
+      const uint8_t *above_ref = ref - ref_stride;
+      if (xd->mb_to_right_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (x0 + bs <= frame_width) {
+          memcpy(above_row, above_ref, bs);
+        } else if (x0 <= frame_width) {
+          const int r = frame_width - x0;
+          memcpy(above_row, above_ref, r);
+          memset(above_row + r, above_row[r - 1], x0 + bs - frame_width);
+        }
+      } else {
+        /* faster path if the block does not need extension */
+        if (bs == 4 && right_available && left_available) {
+          const_above_row = above_ref;
+        } else {
+          memcpy(above_row, above_ref, bs);
+        }
+      }
+      above_row[-1] = left_available ? above_ref[-1] : 129;
+    } else {
+      memset(above_row, 127, bs);
+      above_row[-1] = 127;
+    }
+  }
+
+  // NEED_ABOVERIGHT
+  if (extend_modes[mode] & NEED_ABOVERIGHT) {
+    if (up_available) {
+      const uint8_t *above_ref = ref - ref_stride;
+      if (xd->mb_to_right_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (x0 + 2 * bs <= frame_width) {
+          if (right_available && bs == 4) {
+            memcpy(above_row, above_ref, 2 * bs);
+          } else {
+            memcpy(above_row, above_ref, bs);
+            memset(above_row + bs, above_row[bs - 1], bs);
+          }
+        } else if (x0 + bs <= frame_width) {
+          const int r = frame_width - x0;
+          if (right_available && bs == 4) {
+            memcpy(above_row, above_ref, r);
+            memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width);
+          } else {
+            memcpy(above_row, above_ref, bs);
+            memset(above_row + bs, above_row[bs - 1], bs);
+          }
+        } else if (x0 <= frame_width) {
+          const int r = frame_width - x0;
+          memcpy(above_row, above_ref, r);
+          memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width);
+        }
+      } else {
+        /* faster path if the block does not need extension */
+        if (bs == 4 && right_available && left_available) {
+          const_above_row = above_ref;
+        } else {
+          memcpy(above_row, above_ref, bs);
+          if (bs == 4 && right_available)
+            memcpy(above_row + bs, above_ref + bs, bs);
+          else
+            memset(above_row + bs, above_row[bs - 1], bs);
+        }
+      }
+      above_row[-1] = left_available ? above_ref[-1] : 129;
+    } else {
+      memset(above_row, 127, bs * 2);
+      above_row[-1] = 127;
+    }
+  }
+
+  // predict
+  if (mode == DC_PRED) {
+    dc_pred[left_available][up_available][tx_size](dst, dst_stride,
+                                                   const_above_row, left_col);
+  } else {
+    pred[mode][tx_size](dst, dst_stride, const_above_row, left_col);
+  }
+}
+
+void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, TX_SIZE tx_size,
+                             PREDICTION_MODE mode, const uint8_t *ref,
+                             int ref_stride, uint8_t *dst, int dst_stride,
+                             int aoff, int loff, int plane) {
+  const int bw = (1 << bwl_in);
+  const int txw = (1 << tx_size);
+  const int have_top = loff || (xd->above_mi != NULL);
+  const int have_left = aoff || (xd->left_mi != NULL);
+  const int have_right = (aoff + txw) < bw;
+  const int x = aoff * 4;
+  const int y = loff * 4;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode,
+                                tx_size, have_top, have_left, have_right, x, y,
+                                plane, xd->bd);
+    return;
+  }
+#endif
+  build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
+                         have_top, have_left, have_right, x, y, plane);
+}
+
+void vp9_init_intra_predictors(void) {
+  once(vp9_init_intra_predictors_internal);
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_reconintra.h b/media/libvpx/libvpx/vp9/common/vp9_reconintra.h
new file mode 100644
index 0000000000..426a35ebfa
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_reconintra.h
@@ -0,0 +1,31 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_RECONINTRA_H_
+#define VPX_VP9_COMMON_VP9_RECONINTRA_H_
+
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp9_init_intra_predictors(void);
+
+void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, TX_SIZE tx_size,
+                             PREDICTION_MODE mode, const uint8_t *ref,
+                             int ref_stride, uint8_t *dst, int dst_stride,
+                             int aoff, int loff, int plane);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_RECONINTRA_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_rtcd.c b/media/libvpx/libvpx/vp9/common/vp9_rtcd.c
new file mode 100644
index 0000000000..37762ca15a
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_rtcd.c
@@ -0,0 +1,15 @@
+/*
+ *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_config.h"
+#define RTCD_C
+#include "./vp9_rtcd.h"
+#include "vpx_ports/vpx_once.h"
+
+void vp9_rtcd() { once(setup_rtcd_internal); }
diff --git a/media/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl b/media/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl
new file mode 100644
index 0000000000..4b94c31f15
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -0,0 +1,226 @@
+##
+##  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+
+sub vp9_common_forward_decls() {
+print <<EOF
+/*
+ * VP9
+ */
+
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_enums.h"
+#include "vp9/common/vp9_filter.h"
+
+struct macroblockd;
+
+/* Encoder forward decls */
+struct macroblock;
+struct vp9_sad_table;
+struct search_site_config;
+struct mv;
+union int_mv;
+struct yv12_buffer_config;
+EOF
+}
+forward_decls qw/vp9_common_forward_decls/;
+
+# functions that are 64 bit only.
+$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
+if ($opts{arch} eq "x86_64") {
+  $mmx_x86_64 = 'mmx';
+  $sse2_x86_64 = 'sse2';
+  $ssse3_x86_64 = 'ssse3';
+  $avx_x86_64 = 'avx';
+  $avx2_x86_64 = 'avx2';
+  $avx512_x86_64 = 'avx512';
+}
+
+#
+# post proc
+#
+if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
+add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
+specialize qw/vp9_filter_by_weight16x16 sse2 msa/;
+
+add_proto qw/void vp9_filter_by_weight8x8/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
+specialize qw/vp9_filter_by_weight8x8 sse2 msa/;
+}
+
+#
+# dct
+#
+# Force C versions if CONFIG_EMULATE_HARDWARE is 1
+add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";
+
+add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";
+
+add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";
+
+if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
+  # Note that there are more specializations appended when
+  # CONFIG_VP9_HIGHBITDEPTH is off.
+  specialize qw/vp9_iht4x4_16_add neon sse2 vsx/;
+  specialize qw/vp9_iht8x8_64_add neon sse2 vsx/;
+  specialize qw/vp9_iht16x16_256_add neon sse2 vsx/;
+  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
+    # Note that these specializations are appended to the above ones.
+    specialize qw/vp9_iht4x4_16_add dspr2 msa/;
+    specialize qw/vp9_iht8x8_64_add dspr2 msa/;
+    specialize qw/vp9_iht16x16_256_add dspr2 msa/;
+  }
+}
+
+# High bitdepth functions
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+  #
+  # post proc
+  #
+  if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
+    add_proto qw/void vp9_highbd_mbpost_proc_down/, "uint16_t *dst, int pitch, int rows, int cols, int flimit";
+
+    add_proto qw/void vp9_highbd_mbpost_proc_across_ip/, "uint16_t *src, int pitch, int rows, int cols, int flimit";
+
+    add_proto qw/void vp9_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
+  }
+
+  #
+  # dct
+  #
+  # Note as optimized versions of these functions are added we need to add a check to ensure
+  # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
+  add_proto qw/void vp9_highbd_iht4x4_16_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd";
+
+  add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd";
+
+  add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd";
+
+  if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
+    specialize qw/vp9_highbd_iht4x4_16_add neon sse4_1/;
+    specialize qw/vp9_highbd_iht8x8_64_add neon sse4_1/;
+    specialize qw/vp9_highbd_iht16x16_256_add neon sse4_1/;
+  }
+}
+
+#
+# Encoder functions below this point.
+#
+if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
+
+# ENCODEMB INVOKE
+
+#
+# Denoiser
+#
+if (vpx_config("CONFIG_VP9_TEMPORAL_DENOISING") eq "yes") {
+  add_proto qw/int vp9_denoiser_filter/, "const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude";
+  specialize qw/vp9_denoiser_filter neon sse2/;
+}
+
+add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+
+add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size";
+specialize qw/vp9_block_error_fp neon avx2 sse2/;
+
+add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+specialize qw/vp9_quantize_fp neon sse2 ssse3 avx2 vsx/;
+
+add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+specialize qw/vp9_quantize_fp_32x32 neon ssse3 avx2 vsx/;
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+  specialize qw/vp9_block_error neon avx2 sse2/;
+
+  add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
+  specialize qw/vp9_highbd_block_error neon sse2/;
+} else {
+  specialize qw/vp9_block_error neon avx2 msa sse2/;
+}
+
+# fdct functions
+
+add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+
+add_proto qw/void vp9_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+
+add_proto qw/void vp9_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+
+add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+
+# Note that there are more specializations appended when CONFIG_VP9_HIGHBITDEPTH
+# is off.
+specialize qw/vp9_fht4x4 sse2 neon/;
+specialize qw/vp9_fht8x8 sse2 neon/;
+specialize qw/vp9_fht16x16 sse2 neon/;
+specialize qw/vp9_fwht4x4 sse2/;
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
+  # Note that these specializations are appended to the above ones.
+  specialize qw/vp9_fht4x4 msa/;
+  specialize qw/vp9_fht8x8 msa/;
+  specialize qw/vp9_fht16x16 msa/;
+  specialize qw/vp9_fwht4x4 msa/;
+}
+
+#
+# Motion search
+#
+add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv";
+specialize qw/vp9_diamond_search_sad avx neon/;
+
+#
+# Apply temporal filter
+#
+if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") {
+add_proto qw/void vp9_apply_temporal_filter/, "const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator, uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count";
+specialize qw/vp9_apply_temporal_filter sse4_1 neon/;
+
+  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+    add_proto qw/void vp9_highbd_apply_temporal_filter/, "const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count";
+    specialize qw/vp9_highbd_apply_temporal_filter sse4_1 neon/;
+  }
+}
+
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+
+  # ENCODEMB INVOKE
+
+  add_proto qw/void vp9_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  specialize qw/vp9_highbd_quantize_fp avx2 neon/;
+
+  add_proto qw/void vp9_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" ;
+  specialize qw/vp9_highbd_quantize_fp_32x32 avx2 neon/;
+
+  # fdct functions
+  add_proto qw/void vp9_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp9_highbd_fht4x4 neon/;
+
+  add_proto qw/void vp9_highbd_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp9_highbd_fht8x8 neon/;
+
+  add_proto qw/void vp9_highbd_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp9_highbd_fht16x16 neon/;
+
+  add_proto qw/void vp9_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+
+  add_proto qw/void vp9_highbd_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int *blk_fw, int use_32x32, uint32_t *accumulator, uint16_t *count";
+
+}
+# End vp9_high encoder functions
+
+#
+# frame based scale
+#
+add_proto qw/void vp9_scale_and_extend_frame/, "const struct yv12_buffer_config *src, struct yv12_buffer_config *dst, INTERP_FILTER filter_type, int phase_scaler";
+specialize qw/vp9_scale_and_extend_frame neon ssse3/;
+
+}
+# end encoder functions
+1;
diff --git a/media/libvpx/libvpx/vp9/common/vp9_scale.c b/media/libvpx/libvpx/vp9/common/vp9_scale.c
new file mode 100644
index 0000000000..8aedd66222
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_scale.c
@@ -0,0 +1,171 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_dsp_rtcd.h"
+#include "vp9/common/vp9_filter.h"
+#include "vp9/common/vp9_scale.h"
+#include "vpx_dsp/vpx_filter.h"
+
+static INLINE int scaled_x(int val, const struct scale_factors *sf) {
+  return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT);
+}
+
+static INLINE int scaled_y(int val, const struct scale_factors *sf) {
+  return (int)((int64_t)val * sf->y_scale_fp >> REF_SCALE_SHIFT);
+}
+
+static int unscaled_value(int val, const struct scale_factors *sf) {
+  (void)sf;
+  return val;
+}
+
+static int get_fixed_point_scale_factor(int other_size, int this_size) {
+  // Calculate scaling factor once for each reference frame
+  // and use fixed point scaling factors in decoding and encoding routines.
+  // Hardware implementations can calculate scale factor in device driver
+  // and use multiplication and shifting on hardware instead of division.
+  return (other_size << REF_SCALE_SHIFT) / this_size;
+}
+
+MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) {
+  const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf) & SUBPEL_MASK;
+  const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf) & SUBPEL_MASK;
+  const MV32 res = { scaled_y(mv->row, sf) + y_off_q4,
+                     scaled_x(mv->col, sf) + x_off_q4 };
+  return res;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
+                                       int other_h, int this_w, int this_h,
+                                       int use_highbd) {
+#else
+void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
+                                       int other_h, int this_w, int this_h) {
+#endif
+  if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
+    sf->x_scale_fp = REF_INVALID_SCALE;
+    sf->y_scale_fp = REF_INVALID_SCALE;
+    return;
+  }
+
+  sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w);
+  sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h);
+  sf->x_step_q4 = scaled_x(16, sf);
+  sf->y_step_q4 = scaled_y(16, sf);
+
+  if (vp9_is_scaled(sf)) {
+    sf->scale_value_x = scaled_x;
+    sf->scale_value_y = scaled_y;
+  } else {
+    sf->scale_value_x = unscaled_value;
+    sf->scale_value_y = unscaled_value;
+  }
+
+  // TODO(agrange): Investigate the best choice of functions to use here
+  // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what
+  // to do at full-pel offsets. The current selection, where the filter is
+  // applied in one direction only, and not at all for 0,0, seems to give the
+  // best quality, but it may be worth trying an additional mode that does
+  // do the filtering on full-pel.
+
+  if (sf->x_step_q4 == 16) {
+    if (sf->y_step_q4 == 16) {
+      // No scaling in either direction.
+      sf->predict[0][0][0] = vpx_convolve_copy;
+      sf->predict[0][0][1] = vpx_convolve_avg;
+      sf->predict[0][1][0] = vpx_convolve8_vert;
+      sf->predict[0][1][1] = vpx_convolve8_avg_vert;
+      sf->predict[1][0][0] = vpx_convolve8_horiz;
+      sf->predict[1][0][1] = vpx_convolve8_avg_horiz;
+    } else {
+      // No scaling in x direction. Must always scale in the y direction.
+      sf->predict[0][0][0] = vpx_scaled_vert;
+      sf->predict[0][0][1] = vpx_scaled_avg_vert;
+      sf->predict[0][1][0] = vpx_scaled_vert;
+      sf->predict[0][1][1] = vpx_scaled_avg_vert;
+      sf->predict[1][0][0] = vpx_scaled_2d;
+      sf->predict[1][0][1] = vpx_scaled_avg_2d;
+    }
+  } else {
+    if (sf->y_step_q4 == 16) {
+      // No scaling in the y direction. Must always scale in the x direction.
+      sf->predict[0][0][0] = vpx_scaled_horiz;
+      sf->predict[0][0][1] = vpx_scaled_avg_horiz;
+      sf->predict[0][1][0] = vpx_scaled_2d;
+      sf->predict[0][1][1] = vpx_scaled_avg_2d;
+      sf->predict[1][0][0] = vpx_scaled_horiz;
+      sf->predict[1][0][1] = vpx_scaled_avg_horiz;
+    } else {
+      // Must always scale in both directions.
+      sf->predict[0][0][0] = vpx_scaled_2d;
+      sf->predict[0][0][1] = vpx_scaled_avg_2d;
+      sf->predict[0][1][0] = vpx_scaled_2d;
+      sf->predict[0][1][1] = vpx_scaled_avg_2d;
+      sf->predict[1][0][0] = vpx_scaled_2d;
+      sf->predict[1][0][1] = vpx_scaled_avg_2d;
+    }
+  }
+
+  // 2D subpel motion always gets filtered in both directions
+
+  if ((sf->x_step_q4 != 16) || (sf->y_step_q4 != 16)) {
+    sf->predict[1][1][0] = vpx_scaled_2d;
+    sf->predict[1][1][1] = vpx_scaled_avg_2d;
+  } else {
+    sf->predict[1][1][0] = vpx_convolve8;
+    sf->predict[1][1][1] = vpx_convolve8_avg;
+  }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (use_highbd) {
+    if (sf->x_step_q4 == 16) {
+      if (sf->y_step_q4 == 16) {
+        // No scaling in either direction.
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve_copy;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve_avg;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz;
+      } else {
+        // No scaling in x direction. Must always scale in the y direction.
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_vert;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_vert;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg;
+      }
+    } else {
+      if (sf->y_step_q4 == 16) {
+        // No scaling in the y direction. Must always scale in the x direction.
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_horiz;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_horiz;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz;
+      } else {
+        // Must always scale in both directions.
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg;
+      }
+    }
+    // 2D subpel motion always gets filtered in both directions.
+    sf->highbd_predict[1][1][0] = vpx_highbd_convolve8;
+    sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg;
+  }
+#endif
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_scale.h b/media/libvpx/libvpx/vp9/common/vp9_scale.h
new file mode 100644
index 0000000000..2f3b609483
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_scale.h
@@ -0,0 +1,71 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_SCALE_H_
+#define VPX_VP9_COMMON_VP9_SCALE_H_
+
+#include "vp9/common/vp9_mv.h"
+#include "vpx_dsp/vpx_convolve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define REF_SCALE_SHIFT 14
+#define REF_NO_SCALE (1 << REF_SCALE_SHIFT)
+#define REF_INVALID_SCALE (-1)
+
+struct scale_factors {
+  int x_scale_fp;  // horizontal fixed point scale factor
+  int y_scale_fp;  // vertical fixed point scale factor
+  int x_step_q4;
+  int y_step_q4;
+
+  int (*scale_value_x)(int val, const struct scale_factors *sf);
+  int (*scale_value_y)(int val, const struct scale_factors *sf);
+
+  convolve_fn_t predict[2][2][2];  // horiz, vert, avg
+#if CONFIG_VP9_HIGHBITDEPTH
+  highbd_convolve_fn_t highbd_predict[2][2][2];  // horiz, vert, avg
+#endif
+};
+
+MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
+                                       int other_h, int this_w, int this_h,
+                                       int use_highbd);
+#else
+void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
+                                       int other_h, int this_w, int this_h);
+#endif
+
+static INLINE int vp9_is_valid_scale(const struct scale_factors *sf) {
+  return sf->x_scale_fp != REF_INVALID_SCALE &&
+         sf->y_scale_fp != REF_INVALID_SCALE;
+}
+
+static INLINE int vp9_is_scaled(const struct scale_factors *sf) {
+  return vp9_is_valid_scale(sf) &&
+         (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE);
+}
+
+static INLINE int valid_ref_frame_size(int ref_width, int ref_height,
+                                       int this_width, int this_height) {
+  return 2 * this_width >= ref_width && 2 * this_height >= ref_height &&
+         this_width <= 16 * ref_width && this_height <= 16 * ref_height;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_SCALE_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_scan.c b/media/libvpx/libvpx/vp9/common/vp9_scan.c
new file mode 100644
index 0000000000..adacb7ef96
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_scan.c
@@ -0,0 +1,725 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "vp9/common/vp9_scan.h"
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = {
+  0, 4, 1, 5, 8, 2, 12, 9, 3, 6, 13, 10, 7, 14, 11, 15,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, col_scan_4x4[16]) = {
+  0, 4, 8, 1, 12, 5, 9, 2, 13, 6, 10, 3, 7, 14, 11, 15,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4[16]) = {
+  0, 1, 4, 2, 5, 3, 6, 8, 9, 7, 12, 10, 13, 11, 14, 15,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = {
+  0,  8,  1,  16, 9,  2,  17, 24, 10, 3,  18, 25, 32, 11, 4,  26,
+  33, 19, 40, 12, 34, 27, 5,  41, 20, 48, 13, 35, 42, 28, 21, 6,
+  49, 56, 36, 43, 29, 7,  14, 50, 57, 44, 22, 37, 15, 51, 58, 30,
+  45, 23, 52, 59, 38, 31, 60, 53, 46, 39, 61, 54, 47, 62, 55, 63,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, col_scan_8x8[64]) = {
+  0,  8,  16, 1,  24, 9,  32, 17, 2,  40, 25, 10, 33, 18, 48, 3,
+  26, 41, 11, 56, 19, 34, 4,  49, 27, 42, 12, 35, 20, 57, 50, 28,
+  5,  43, 13, 36, 58, 51, 21, 44, 6,  29, 59, 37, 14, 52, 22, 7,
+  45, 60, 30, 15, 38, 53, 23, 46, 31, 61, 39, 54, 47, 62, 55, 63,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8[64]) = {
+  0,  1,  2,  8,  9,  3,  16, 10, 4,  17, 11, 24, 5,  18, 25, 12,
+  19, 26, 32, 6,  13, 20, 33, 27, 7,  34, 40, 21, 28, 41, 14, 35,
+  48, 42, 29, 36, 49, 22, 43, 15, 56, 37, 50, 44, 30, 57, 23, 51,
+  58, 45, 38, 52, 31, 59, 53, 46, 60, 39, 61, 47, 54, 55, 62, 63,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = {
+  0,   16,  1,   32,  17,  2,   48,  33,  18,  3,   64,  34,  49,  19,  65,
+  80,  50,  4,   35,  66,  20,  81,  96,  51,  5,   36,  82,  97,  67,  112,
+  21,  52,  98,  37,  83,  113, 6,   68,  128, 53,  22,  99,  114, 84,  7,
+  129, 38,  69,  100, 115, 144, 130, 85,  54,  23,  8,   145, 39,  70,  116,
+  101, 131, 160, 146, 55,  86,  24,  71,  132, 117, 161, 40,  9,   102, 147,
+  176, 162, 87,  56,  25,  133, 118, 177, 148, 72,  103, 41,  163, 10,  192,
+  178, 88,  57,  134, 149, 119, 26,  164, 73,  104, 193, 42,  179, 208, 11,
+  135, 89,  165, 120, 150, 58,  194, 180, 27,  74,  209, 105, 151, 136, 43,
+  90,  224, 166, 195, 181, 121, 210, 59,  12,  152, 106, 167, 196, 75,  137,
+  225, 211, 240, 182, 122, 91,  28,  197, 13,  226, 168, 183, 153, 44,  212,
+  138, 107, 241, 60,  29,  123, 198, 184, 227, 169, 242, 76,  213, 154, 45,
+  92,  14,  199, 139, 61,  228, 214, 170, 185, 243, 108, 77,  155, 30,  15,
+  200, 229, 124, 215, 244, 93,  46,  186, 171, 201, 109, 140, 230, 62,  216,
+  245, 31,  125, 78,  156, 231, 47,  187, 202, 217, 94,  246, 141, 63,  232,
+  172, 110, 247, 157, 79,  218, 203, 126, 233, 188, 248, 95,  173, 142, 219,
+  111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, 251,
+  190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239,
+  255,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, col_scan_16x16[256]) = {
+  0,   16,  32,  48,  1,   64,  17,  80,  33,  96,  49,  2,   65,  112, 18,
+  81,  34,  128, 50,  97,  3,   66,  144, 19,  113, 35,  82,  160, 98,  51,
+  129, 4,   67,  176, 20,  114, 145, 83,  36,  99,  130, 52,  192, 5,   161,
+  68,  115, 21,  146, 84,  208, 177, 37,  131, 100, 53,  162, 224, 69,  6,
+  116, 193, 147, 85,  22,  240, 132, 38,  178, 101, 163, 54,  209, 117, 70,
+  7,   148, 194, 86,  179, 225, 23,  133, 39,  164, 8,   102, 210, 241, 55,
+  195, 118, 149, 71,  180, 24,  87,  226, 134, 165, 211, 40,  103, 56,  72,
+  150, 196, 242, 119, 9,   181, 227, 88,  166, 25,  135, 41,  104, 212, 57,
+  151, 197, 120, 73,  243, 182, 136, 167, 213, 89,  10,  228, 105, 152, 198,
+  26,  42,  121, 183, 244, 168, 58,  137, 229, 74,  214, 90,  153, 199, 184,
+  11,  106, 245, 27,  122, 230, 169, 43,  215, 59,  200, 138, 185, 246, 75,
+  12,  91,  154, 216, 231, 107, 28,  44,  201, 123, 170, 60,  247, 232, 76,
+  139, 13,  92,  217, 186, 248, 155, 108, 29,  124, 45,  202, 233, 171, 61,
+  14,  77,  140, 15,  249, 93,  30,  187, 156, 218, 46,  109, 125, 62,  172,
+  78,  203, 31,  141, 234, 94,  47,  188, 63,  157, 110, 250, 219, 79,  126,
+  204, 173, 142, 95,  189, 111, 235, 158, 220, 251, 127, 174, 143, 205, 236,
+  159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239,
+  255,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, row_scan_16x16[256]) = {
+  0,   1,   2,   16,  3,   17,  4,   18,  32,  5,   33,  19,  6,   34,  48,
+  20,  49,  7,   35,  21,  50,  64,  8,   36,  65,  22,  51,  37,  80,  9,
+  66,  52,  23,  38,  81,  67,  10,  53,  24,  82,  68,  96,  39,  11,  54,
+  83,  97,  69,  25,  98,  84,  40,  112, 55,  12,  70,  99,  113, 85,  26,
+  41,  56,  114, 100, 13,  71,  128, 86,  27,  115, 101, 129, 42,  57,  72,
+  116, 14,  87,  130, 102, 144, 73,  131, 117, 28,  58,  15,  88,  43,  145,
+  103, 132, 146, 118, 74,  160, 89,  133, 104, 29,  59,  147, 119, 44,  161,
+  148, 90,  105, 134, 162, 120, 176, 75,  135, 149, 30,  60,  163, 177, 45,
+  121, 91,  106, 164, 178, 150, 192, 136, 165, 179, 31,  151, 193, 76,  122,
+  61,  137, 194, 107, 152, 180, 208, 46,  166, 167, 195, 92,  181, 138, 209,
+  123, 153, 224, 196, 77,  168, 210, 182, 240, 108, 197, 62,  154, 225, 183,
+  169, 211, 47,  139, 93,  184, 226, 212, 241, 198, 170, 124, 155, 199, 78,
+  213, 185, 109, 227, 200, 63,  228, 242, 140, 214, 171, 186, 156, 229, 243,
+  125, 94,  201, 244, 215, 216, 230, 141, 187, 202, 79,  172, 110, 157, 245,
+  217, 231, 95,  246, 232, 126, 203, 247, 233, 173, 218, 142, 111, 158, 188,
+  248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, 175,
+  190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254,
+  255,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = {
+  0,    32,   1,    64,  33,   2,    96,   65,   34,   128,  3,    97,   66,
+  160,  129,  35,   98,  4,    67,   130,  161,  192,  36,   99,   224,  5,
+  162,  193,  68,   131, 37,   100,  225,  194,  256,  163,  69,   132,  6,
+  226,  257,  288,  195, 101,  164,  38,   258,  7,    227,  289,  133,  320,
+  70,   196,  165,  290, 259,  228,  39,   321,  102,  352,  8,    197,  71,
+  134,  322,  291,  260, 353,  384,  229,  166,  103,  40,   354,  323,  292,
+  135,  385,  198,  261, 72,   9,    416,  167,  386,  355,  230,  324,  104,
+  293,  41,   417,  199, 136,  262,  387,  448,  325,  356,  10,   73,   418,
+  231,  168,  449,  294, 388,  105,  419,  263,  42,   200,  357,  450,  137,
+  480,  74,   326,  232, 11,   389,  169,  295,  420,  106,  451,  481,  358,
+  264,  327,  201,  43,  138,  512,  482,  390,  296,  233,  170,  421,  75,
+  452,  359,  12,   513, 265,  483,  328,  107,  202,  514,  544,  422,  391,
+  453,  139,  44,   234, 484,  297,  360,  171,  76,   515,  545,  266,  329,
+  454,  13,   423,  203, 108,  546,  485,  576,  298,  235,  140,  361,  330,
+  172,  547,  45,   455, 267,  577,  486,  77,   204,  362,  608,  14,   299,
+  578,  109,  236,  487, 609,  331,  141,  579,  46,   15,   173,  610,  363,
+  78,   205,  16,   110, 237,  611,  142,  47,   174,  79,   206,  17,   111,
+  238,  48,   143,  80,  175,  112,  207,  49,   18,   239,  81,   113,  19,
+  50,   82,   114,  51,  83,   115,  640,  516,  392,  268,  144,  20,   672,
+  641,  548,  517,  424, 393,  300,  269,  176,  145,  52,   21,   704,  673,
+  642,  580,  549,  518, 456,  425,  394,  332,  301,  270,  208,  177,  146,
+  84,   53,   22,   736, 705,  674,  643,  612,  581,  550,  519,  488,  457,
+  426,  395,  364,  333, 302,  271,  240,  209,  178,  147,  116,  85,   54,
+  23,   737,  706,  675, 613,  582,  551,  489,  458,  427,  365,  334,  303,
+  241,  210,  179,  117, 86,   55,   738,  707,  614,  583,  490,  459,  366,
+  335,  242,  211,  118, 87,   739,  615,  491,  367,  243,  119,  768,  644,
+  520,  396,  272,  148, 24,   800,  769,  676,  645,  552,  521,  428,  397,
+  304,  273,  180,  149, 56,   25,   832,  801,  770,  708,  677,  646,  584,
+  553,  522,  460,  429, 398,  336,  305,  274,  212,  181,  150,  88,   57,
+  26,   864,  833,  802, 771,  740,  709,  678,  647,  616,  585,  554,  523,
+  492,  461,  430,  399, 368,  337,  306,  275,  244,  213,  182,  151,  120,
+  89,   58,   27,   865, 834,  803,  741,  710,  679,  617,  586,  555,  493,
+  462,  431,  369,  338, 307,  245,  214,  183,  121,  90,   59,   866,  835,
+  742,  711,  618,  587, 494,  463,  370,  339,  246,  215,  122,  91,   867,
+  743,  619,  495,  371, 247,  123,  896,  772,  648,  524,  400,  276,  152,
+  28,   928,  897,  804, 773,  680,  649,  556,  525,  432,  401,  308,  277,
+  184,  153,  60,   29,  960,  929,  898,  836,  805,  774,  712,  681,  650,
+  588,  557,  526,  464, 433,  402,  340,  309,  278,  216,  185,  154,  92,
+  61,   30,   992,  961, 930,  899,  868,  837,  806,  775,  744,  713,  682,
+  651,  620,  589,  558, 527,  496,  465,  434,  403,  372,  341,  310,  279,
+  248,  217,  186,  155, 124,  93,   62,   31,   993,  962,  931,  869,  838,
+  807,  745,  714,  683, 621,  590,  559,  497,  466,  435,  373,  342,  311,
+  249,  218,  187,  125, 94,   63,   994,  963,  870,  839,  746,  715,  622,
+  591,  498,  467,  374, 343,  250,  219,  126,  95,   995,  871,  747,  623,
+  499,  375,  251,  127, 900,  776,  652,  528,  404,  280,  156,  932,  901,
+  808,  777,  684,  653, 560,  529,  436,  405,  312,  281,  188,  157,  964,
+  933,  902,  840,  809, 778,  716,  685,  654,  592,  561,  530,  468,  437,
+  406,  344,  313,  282, 220,  189,  158,  996,  965,  934,  903,  872,  841,
+  810,  779,  748,  717, 686,  655,  624,  593,  562,  531,  500,  469,  438,
+  407,  376,  345,  314, 283,  252,  221,  190,  159,  997,  966,  935,  873,
+  842,  811,  749,  718, 687,  625,  594,  563,  501,  470,  439,  377,  346,
+  315,  253,  222,  191, 998,  967,  874,  843,  750,  719,  626,  595,  502,
+  471,  378,  347,  254, 223,  999,  875,  751,  627,  503,  379,  255,  904,
+  780,  656,  532,  408, 284,  936,  905,  812,  781,  688,  657,  564,  533,
+  440,  409,  316,  285, 968,  937,  906,  844,  813,  782,  720,  689,  658,
+  596,  565,  534,  472, 441,  410,  348,  317,  286,  1000, 969,  938,  907,
+  876,  845,  814,  783, 752,  721,  690,  659,  628,  597,  566,  535,  504,
+  473,  442,  411,  380, 349,  318,  287,  1001, 970,  939,  877,  846,  815,
+  753,  722,  691,  629, 598,  567,  505,  474,  443,  381,  350,  319,  1002,
+  971,  878,  847,  754, 723,  630,  599,  506,  475,  382,  351,  1003, 879,
+  755,  631,  507,  383, 908,  784,  660,  536,  412,  940,  909,  816,  785,
+  692,  661,  568,  537, 444,  413,  972,  941,  910,  848,  817,  786,  724,
+  693,  662,  600,  569, 538,  476,  445,  414,  1004, 973,  942,  911,  880,
+  849,  818,  787,  756, 725,  694,  663,  632,  601,  570,  539,  508,  477,
+  446,  415,  1005, 974, 943,  881,  850,  819,  757,  726,  695,  633,  602,
+  571,  509,  478,  447, 1006, 975,  882,  851,  758,  727,  634,  603,  510,
+  479,  1007, 883,  759, 635,  511,  912,  788,  664,  540,  944,  913,  820,
+  789,  696,  665,  572, 541,  976,  945,  914,  852,  821,  790,  728,  697,
+  666,  604,  573,  542, 1008, 977,  946,  915,  884,  853,  822,  791,  760,
+  729,  698,  667,  636, 605,  574,  543,  1009, 978,  947,  885,  854,  823,
+  761,  730,  699,  637, 606,  575,  1010, 979,  886,  855,  762,  731,  638,
+  607,  1011, 887,  763, 639,  916,  792,  668,  948,  917,  824,  793,  700,
+  669,  980,  949,  918, 856,  825,  794,  732,  701,  670,  1012, 981,  950,
+  919,  888,  857,  826, 795,  764,  733,  702,  671,  1013, 982,  951,  889,
+  858,  827,  765,  734, 703,  1014, 983,  890,  859,  766,  735,  1015, 891,
+  767,  920,  796,  952, 921,  828,  797,  984,  953,  922,  860,  829,  798,
+  1016, 985,  954,  923, 892,  861,  830,  799,  1017, 986,  955,  893,  862,
+  831,  1018, 987,  894, 863,  1019, 895,  924,  956,  925,  988,  957,  926,
+  1020, 989,  958,  927, 1021, 990,  959,  1022, 991,  1023,
+};
+
+// Neighborhood 2-tuples for various scans and blocksizes,
+// in {top, left} order for each position in corresponding scan order.
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
+  0, 0, 0, 0, 0,  0, 1, 4, 4, 4,  1,  1, 8,  8,  5,  8, 2,
+  2, 2, 5, 9, 12, 6, 9, 3, 6, 10, 13, 7, 10, 11, 14, 0, 0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
+  0, 0, 0, 0, 4, 4, 0, 0, 8, 8,  1,  1, 5, 5,  1,  1, 9,
+  9, 2, 2, 6, 6, 2, 2, 3, 3, 10, 10, 7, 7, 11, 11, 0, 0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
+  0, 0, 0, 0, 0, 0, 1, 1,  4,  4,  2,  2,  5,  5,  4,  4, 8,
+  8, 6, 6, 8, 8, 9, 9, 12, 12, 10, 10, 13, 13, 14, 14, 0, 0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  8,  8,  0,  0,  16, 16, 1,  1,  24, 24, 9,  9,  1,  1,  32,
+  32, 17, 17, 2,  2,  25, 25, 10, 10, 40, 40, 2,  2,  18, 18, 33, 33, 3,  3,
+  48, 48, 11, 11, 26, 26, 3,  3,  41, 41, 19, 19, 34, 34, 4,  4,  27, 27, 12,
+  12, 49, 49, 42, 42, 20, 20, 4,  4,  35, 35, 5,  5,  28, 28, 50, 50, 43, 43,
+  13, 13, 36, 36, 5,  5,  21, 21, 51, 51, 29, 29, 6,  6,  44, 44, 14, 14, 6,
+  6,  37, 37, 52, 52, 22, 22, 7,  7,  30, 30, 45, 45, 15, 15, 38, 38, 23, 23,
+  53, 53, 31, 31, 46, 46, 39, 39, 54, 54, 47, 47, 55, 55, 0,  0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  1,  1,  0,  0,  8,  8,  2,  2,  8,  8,  9,  9,  3,  3,  16,
+  16, 10, 10, 16, 16, 4,  4,  17, 17, 24, 24, 11, 11, 18, 18, 25, 25, 24, 24,
+  5,  5,  12, 12, 19, 19, 32, 32, 26, 26, 6,  6,  33, 33, 32, 32, 20, 20, 27,
+  27, 40, 40, 13, 13, 34, 34, 40, 40, 41, 41, 28, 28, 35, 35, 48, 48, 21, 21,
+  42, 42, 14, 14, 48, 48, 36, 36, 49, 49, 43, 43, 29, 29, 56, 56, 22, 22, 50,
+  50, 57, 57, 44, 44, 37, 37, 51, 51, 30, 30, 58, 58, 52, 52, 45, 45, 59, 59,
+  38, 38, 60, 60, 46, 46, 53, 53, 54, 54, 61, 61, 62, 62, 0,  0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  0,  0,  8,  8,  1,  8,  1,  1,  9,  16, 16, 16, 2,  9,  2,
+  2,  10, 17, 17, 24, 24, 24, 3,  10, 3,  3,  18, 25, 25, 32, 11, 18, 32, 32,
+  4,  11, 26, 33, 19, 26, 4,  4,  33, 40, 12, 19, 40, 40, 5,  12, 27, 34, 34,
+  41, 20, 27, 13, 20, 5,  5,  41, 48, 48, 48, 28, 35, 35, 42, 21, 28, 6,  6,
+  6,  13, 42, 49, 49, 56, 36, 43, 14, 21, 29, 36, 7,  14, 43, 50, 50, 57, 22,
+  29, 37, 44, 15, 22, 44, 51, 51, 58, 30, 37, 23, 30, 52, 59, 45, 52, 38, 45,
+  31, 38, 53, 60, 46, 53, 39, 46, 54, 61, 47, 54, 55, 62, 0,  0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   16,  16,  32,  32,  0,   0,   48,  48,  1,   1,   64,
+  64,  17,  17,  80,  80,  33,  33,  1,   1,   49,  49,  96,  96,  2,   2,
+  65,  65,  18,  18,  112, 112, 34,  34,  81,  81,  2,   2,   50,  50,  128,
+  128, 3,   3,   97,  97,  19,  19,  66,  66,  144, 144, 82,  82,  35,  35,
+  113, 113, 3,   3,   51,  51,  160, 160, 4,   4,   98,  98,  129, 129, 67,
+  67,  20,  20,  83,  83,  114, 114, 36,  36,  176, 176, 4,   4,   145, 145,
+  52,  52,  99,  99,  5,   5,   130, 130, 68,  68,  192, 192, 161, 161, 21,
+  21,  115, 115, 84,  84,  37,  37,  146, 146, 208, 208, 53,  53,  5,   5,
+  100, 100, 177, 177, 131, 131, 69,  69,  6,   6,   224, 224, 116, 116, 22,
+  22,  162, 162, 85,  85,  147, 147, 38,  38,  193, 193, 101, 101, 54,  54,
+  6,   6,   132, 132, 178, 178, 70,  70,  163, 163, 209, 209, 7,   7,   117,
+  117, 23,  23,  148, 148, 7,   7,   86,  86,  194, 194, 225, 225, 39,  39,
+  179, 179, 102, 102, 133, 133, 55,  55,  164, 164, 8,   8,   71,  71,  210,
+  210, 118, 118, 149, 149, 195, 195, 24,  24,  87,  87,  40,  40,  56,  56,
+  134, 134, 180, 180, 226, 226, 103, 103, 8,   8,   165, 165, 211, 211, 72,
+  72,  150, 150, 9,   9,   119, 119, 25,  25,  88,  88,  196, 196, 41,  41,
+  135, 135, 181, 181, 104, 104, 57,  57,  227, 227, 166, 166, 120, 120, 151,
+  151, 197, 197, 73,  73,  9,   9,   212, 212, 89,  89,  136, 136, 182, 182,
+  10,  10,  26,  26,  105, 105, 167, 167, 228, 228, 152, 152, 42,  42,  121,
+  121, 213, 213, 58,  58,  198, 198, 74,  74,  137, 137, 183, 183, 168, 168,
+  10,  10,  90,  90,  229, 229, 11,  11,  106, 106, 214, 214, 153, 153, 27,
+  27,  199, 199, 43,  43,  184, 184, 122, 122, 169, 169, 230, 230, 59,  59,
+  11,  11,  75,  75,  138, 138, 200, 200, 215, 215, 91,  91,  12,  12,  28,
+  28,  185, 185, 107, 107, 154, 154, 44,  44,  231, 231, 216, 216, 60,  60,
+  123, 123, 12,  12,  76,  76,  201, 201, 170, 170, 232, 232, 139, 139, 92,
+  92,  13,  13,  108, 108, 29,  29,  186, 186, 217, 217, 155, 155, 45,  45,
+  13,  13,  61,  61,  124, 124, 14,  14,  233, 233, 77,  77,  14,  14,  171,
+  171, 140, 140, 202, 202, 30,  30,  93,  93,  109, 109, 46,  46,  156, 156,
+  62,  62,  187, 187, 15,  15,  125, 125, 218, 218, 78,  78,  31,  31,  172,
+  172, 47,  47,  141, 141, 94,  94,  234, 234, 203, 203, 63,  63,  110, 110,
+  188, 188, 157, 157, 126, 126, 79,  79,  173, 173, 95,  95,  219, 219, 142,
+  142, 204, 204, 235, 235, 111, 111, 158, 158, 127, 127, 189, 189, 220, 220,
+  143, 143, 174, 174, 205, 205, 236, 236, 159, 159, 190, 190, 221, 221, 175,
+  175, 237, 237, 206, 206, 222, 222, 191, 191, 238, 238, 207, 207, 223, 223,
+  239, 239, 0,   0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   1,   1,   0,   0,   2,   2,   16,  16,  3,   3,   17,
+  17,  16,  16,  4,   4,   32,  32,  18,  18,  5,   5,   33,  33,  32,  32,
+  19,  19,  48,  48,  6,   6,   34,  34,  20,  20,  49,  49,  48,  48,  7,
+  7,   35,  35,  64,  64,  21,  21,  50,  50,  36,  36,  64,  64,  8,   8,
+  65,  65,  51,  51,  22,  22,  37,  37,  80,  80,  66,  66,  9,   9,   52,
+  52,  23,  23,  81,  81,  67,  67,  80,  80,  38,  38,  10,  10,  53,  53,
+  82,  82,  96,  96,  68,  68,  24,  24,  97,  97,  83,  83,  39,  39,  96,
+  96,  54,  54,  11,  11,  69,  69,  98,  98,  112, 112, 84,  84,  25,  25,
+  40,  40,  55,  55,  113, 113, 99,  99,  12,  12,  70,  70,  112, 112, 85,
+  85,  26,  26,  114, 114, 100, 100, 128, 128, 41,  41,  56,  56,  71,  71,
+  115, 115, 13,  13,  86,  86,  129, 129, 101, 101, 128, 128, 72,  72,  130,
+  130, 116, 116, 27,  27,  57,  57,  14,  14,  87,  87,  42,  42,  144, 144,
+  102, 102, 131, 131, 145, 145, 117, 117, 73,  73,  144, 144, 88,  88,  132,
+  132, 103, 103, 28,  28,  58,  58,  146, 146, 118, 118, 43,  43,  160, 160,
+  147, 147, 89,  89,  104, 104, 133, 133, 161, 161, 119, 119, 160, 160, 74,
+  74,  134, 134, 148, 148, 29,  29,  59,  59,  162, 162, 176, 176, 44,  44,
+  120, 120, 90,  90,  105, 105, 163, 163, 177, 177, 149, 149, 176, 176, 135,
+  135, 164, 164, 178, 178, 30,  30,  150, 150, 192, 192, 75,  75,  121, 121,
+  60,  60,  136, 136, 193, 193, 106, 106, 151, 151, 179, 179, 192, 192, 45,
+  45,  165, 165, 166, 166, 194, 194, 91,  91,  180, 180, 137, 137, 208, 208,
+  122, 122, 152, 152, 208, 208, 195, 195, 76,  76,  167, 167, 209, 209, 181,
+  181, 224, 224, 107, 107, 196, 196, 61,  61,  153, 153, 224, 224, 182, 182,
+  168, 168, 210, 210, 46,  46,  138, 138, 92,  92,  183, 183, 225, 225, 211,
+  211, 240, 240, 197, 197, 169, 169, 123, 123, 154, 154, 198, 198, 77,  77,
+  212, 212, 184, 184, 108, 108, 226, 226, 199, 199, 62,  62,  227, 227, 241,
+  241, 139, 139, 213, 213, 170, 170, 185, 185, 155, 155, 228, 228, 242, 242,
+  124, 124, 93,  93,  200, 200, 243, 243, 214, 214, 215, 215, 229, 229, 140,
+  140, 186, 186, 201, 201, 78,  78,  171, 171, 109, 109, 156, 156, 244, 244,
+  216, 216, 230, 230, 94,  94,  245, 245, 231, 231, 125, 125, 202, 202, 246,
+  246, 232, 232, 172, 172, 217, 217, 141, 141, 110, 110, 157, 157, 187, 187,
+  247, 247, 126, 126, 233, 233, 218, 218, 248, 248, 188, 188, 203, 203, 142,
+  142, 173, 173, 158, 158, 249, 249, 234, 234, 204, 204, 219, 219, 174, 174,
+  189, 189, 250, 250, 220, 220, 190, 190, 205, 205, 235, 235, 206, 206, 236,
+  236, 251, 251, 221, 221, 252, 252, 222, 222, 237, 237, 238, 238, 253, 253,
+  254, 254, 0,   0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   0,   0,   16,  16,  1,   16,  1,   1,   32,  32,  17,
+  32,  2,   17,  2,   2,   48,  48,  18,  33,  33,  48,  3,   18,  49,  64,
+  64,  64,  34,  49,  3,   3,   19,  34,  50,  65,  4,   19,  65,  80,  80,
+  80,  35,  50,  4,   4,   20,  35,  66,  81,  81,  96,  51,  66,  96,  96,
+  5,   20,  36,  51,  82,  97,  21,  36,  67,  82,  97,  112, 5,   5,   52,
+  67,  112, 112, 37,  52,  6,   21,  83,  98,  98,  113, 68,  83,  6,   6,
+  113, 128, 22,  37,  53,  68,  84,  99,  99,  114, 128, 128, 114, 129, 69,
+  84,  38,  53,  7,   22,  7,   7,   129, 144, 23,  38,  54,  69,  100, 115,
+  85,  100, 115, 130, 144, 144, 130, 145, 39,  54,  70,  85,  8,   23,  55,
+  70,  116, 131, 101, 116, 145, 160, 24,  39,  8,   8,   86,  101, 131, 146,
+  160, 160, 146, 161, 71,  86,  40,  55,  9,   24,  117, 132, 102, 117, 161,
+  176, 132, 147, 56,  71,  87,  102, 25,  40,  147, 162, 9,   9,   176, 176,
+  162, 177, 72,  87,  41,  56,  118, 133, 133, 148, 103, 118, 10,  25,  148,
+  163, 57,  72,  88,  103, 177, 192, 26,  41,  163, 178, 192, 192, 10,  10,
+  119, 134, 73,  88,  149, 164, 104, 119, 134, 149, 42,  57,  178, 193, 164,
+  179, 11,  26,  58,  73,  193, 208, 89,  104, 135, 150, 120, 135, 27,  42,
+  74,  89,  208, 208, 150, 165, 179, 194, 165, 180, 105, 120, 194, 209, 43,
+  58,  11,  11,  136, 151, 90,  105, 151, 166, 180, 195, 59,  74,  121, 136,
+  209, 224, 195, 210, 224, 224, 166, 181, 106, 121, 75,  90,  12,  27,  181,
+  196, 12,  12,  210, 225, 152, 167, 167, 182, 137, 152, 28,  43,  196, 211,
+  122, 137, 91,  106, 225, 240, 44,  59,  13,  28,  107, 122, 182, 197, 168,
+  183, 211, 226, 153, 168, 226, 241, 60,  75,  197, 212, 138, 153, 29,  44,
+  76,  91,  13,  13,  183, 198, 123, 138, 45,  60,  212, 227, 198, 213, 154,
+  169, 169, 184, 227, 242, 92,  107, 61,  76,  139, 154, 14,  29,  14,  14,
+  184, 199, 213, 228, 108, 123, 199, 214, 228, 243, 77,  92,  30,  45,  170,
+  185, 155, 170, 185, 200, 93,  108, 124, 139, 214, 229, 46,  61,  200, 215,
+  229, 244, 15,  30,  109, 124, 62,  77,  140, 155, 215, 230, 31,  46,  171,
+  186, 186, 201, 201, 216, 78,  93,  230, 245, 125, 140, 47,  62,  216, 231,
+  156, 171, 94,  109, 231, 246, 141, 156, 63,  78,  202, 217, 187, 202, 110,
+  125, 217, 232, 172, 187, 232, 247, 79,  94,  157, 172, 126, 141, 203, 218,
+  95,  110, 233, 248, 218, 233, 142, 157, 111, 126, 173, 188, 188, 203, 234,
+  249, 219, 234, 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, 250,
+  174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, 190, 205, 236,
+  251, 206, 221, 237, 252, 191, 206, 222, 237, 207, 222, 238, 253, 223, 238,
+  239, 254, 0,   0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
+  0,   0,    0,   0,    0,   0,    32,  32,   1,   32,  1,   1,    64,  64,
+  33,  64,   2,   33,   96,  96,   2,   2,    65,  96,  34,  65,   128, 128,
+  97,  128,  3,   34,   66,  97,   3,   3,    35,  66,  98,  129,  129, 160,
+  160, 160,  4,   35,   67,  98,   192, 192,  4,   4,   130, 161,  161, 192,
+  36,  67,   99,  130,  5,   36,   68,  99,   193, 224, 162, 193,  224, 224,
+  131, 162,  37,  68,   100, 131,  5,   5,    194, 225, 225, 256,  256, 256,
+  163, 194,  69,  100,  132, 163,  6,   37,   226, 257, 6,   6,    195, 226,
+  257, 288,  101, 132,  288, 288,  38,  69,   164, 195, 133, 164,  258, 289,
+  227, 258,  196, 227,  7,   38,   289, 320,  70,  101, 320, 320,  7,   7,
+  165, 196,  39,  70,   102, 133,  290, 321,  259, 290, 228, 259,  321, 352,
+  352, 352,  197, 228,  134, 165,  71,  102,  8,   39,  322, 353,  291, 322,
+  260, 291,  103, 134,  353, 384,  166, 197,  229, 260, 40,  71,   8,   8,
+  384, 384,  135, 166,  354, 385,  323, 354,  198, 229, 292, 323,  72,  103,
+  261, 292,  9,   40,   385, 416,  167, 198,  104, 135, 230, 261,  355, 386,
+  416, 416,  293, 324,  324, 355,  9,   9,    41,  72,  386, 417,  199, 230,
+  136, 167,  417, 448,  262, 293,  356, 387,  73,  104, 387, 418,  231, 262,
+  10,  41,   168, 199,  325, 356,  418, 449,  105, 136, 448, 448,  42,  73,
+  294, 325,  200, 231,  10,  10,   357, 388,  137, 168, 263, 294,  388, 419,
+  74,  105,  419, 450,  449, 480,  326, 357,  232, 263, 295, 326,  169, 200,
+  11,  42,   106, 137,  480, 480,  450, 481,  358, 389, 264, 295,  201, 232,
+  138, 169,  389, 420,  43,  74,   420, 451,  327, 358, 11,  11,   481, 512,
+  233, 264,  451, 482,  296, 327,  75,  106,  170, 201, 482, 513,  512, 512,
+  390, 421,  359, 390,  421, 452,  107, 138,  12,  43,  202, 233,  452, 483,
+  265, 296,  328, 359,  139, 170,  44,  75,   483, 514, 513, 544,  234, 265,
+  297, 328,  422, 453,  12,  12,   391, 422,  171, 202, 76,  107,  514, 545,
+  453, 484,  544, 544,  266, 297,  203, 234,  108, 139, 329, 360,  298, 329,
+  140, 171,  515, 546,  13,  44,   423, 454,  235, 266, 545, 576,  454, 485,
+  45,  76,   172, 203,  330, 361,  576, 576,  13,  13,  267, 298,  546, 577,
+  77,  108,  204, 235,  455, 486,  577, 608,  299, 330, 109, 140,  547, 578,
+  14,  45,   14,  14,   141, 172,  578, 609,  331, 362, 46,  77,   173, 204,
+  15,  15,   78,  109,  205, 236,  579, 610,  110, 141, 15,  46,   142, 173,
+  47,  78,   174, 205,  16,  16,   79,  110,  206, 237, 16,  47,   111, 142,
+  48,  79,   143, 174,  80,  111,  175, 206,  17,  48,  17,  17,   207, 238,
+  49,  80,   81,  112,  18,  18,   18,  49,   50,  81,  82,  113,  19,  50,
+  51,  82,   83,  114,  608, 608,  484, 515,  360, 391, 236, 267,  112, 143,
+  19,  19,   640, 640,  609, 640,  516, 547,  485, 516, 392, 423,  361, 392,
+  268, 299,  237, 268,  144, 175,  113, 144,  20,  51,  20,  20,   672, 672,
+  641, 672,  610, 641,  548, 579,  517, 548,  486, 517, 424, 455,  393, 424,
+  362, 393,  300, 331,  269, 300,  238, 269,  176, 207, 145, 176,  114, 145,
+  52,  83,   21,  52,   21,  21,   704, 704,  673, 704, 642, 673,  611, 642,
+  580, 611,  549, 580,  518, 549,  487, 518,  456, 487, 425, 456,  394, 425,
+  363, 394,  332, 363,  301, 332,  270, 301,  239, 270, 208, 239,  177, 208,
+  146, 177,  115, 146,  84,  115,  53,  84,   22,  53,  22,  22,   705, 736,
+  674, 705,  643, 674,  581, 612,  550, 581,  519, 550, 457, 488,  426, 457,
+  395, 426,  333, 364,  302, 333,  271, 302,  209, 240, 178, 209,  147, 178,
+  85,  116,  54,  85,   23,  54,   706, 737,  675, 706, 582, 613,  551, 582,
+  458, 489,  427, 458,  334, 365,  303, 334,  210, 241, 179, 210,  86,  117,
+  55,  86,   707, 738,  583, 614,  459, 490,  335, 366, 211, 242,  87,  118,
+  736, 736,  612, 643,  488, 519,  364, 395,  240, 271, 116, 147,  23,  23,
+  768, 768,  737, 768,  644, 675,  613, 644,  520, 551, 489, 520,  396, 427,
+  365, 396,  272, 303,  241, 272,  148, 179,  117, 148, 24,  55,   24,  24,
+  800, 800,  769, 800,  738, 769,  676, 707,  645, 676, 614, 645,  552, 583,
+  521, 552,  490, 521,  428, 459,  397, 428,  366, 397, 304, 335,  273, 304,
+  242, 273,  180, 211,  149, 180,  118, 149,  56,  87,  25,  56,   25,  25,
+  832, 832,  801, 832,  770, 801,  739, 770,  708, 739, 677, 708,  646, 677,
+  615, 646,  584, 615,  553, 584,  522, 553,  491, 522, 460, 491,  429, 460,
+  398, 429,  367, 398,  336, 367,  305, 336,  274, 305, 243, 274,  212, 243,
+  181, 212,  150, 181,  119, 150,  88,  119,  57,  88,  26,  57,   26,  26,
+  833, 864,  802, 833,  771, 802,  709, 740,  678, 709, 647, 678,  585, 616,
+  554, 585,  523, 554,  461, 492,  430, 461,  399, 430, 337, 368,  306, 337,
+  275, 306,  213, 244,  182, 213,  151, 182,  89,  120, 58,  89,   27,  58,
+  834, 865,  803, 834,  710, 741,  679, 710,  586, 617, 555, 586,  462, 493,
+  431, 462,  338, 369,  307, 338,  214, 245,  183, 214, 90,  121,  59,  90,
+  835, 866,  711, 742,  587, 618,  463, 494,  339, 370, 215, 246,  91,  122,
+  864, 864,  740, 771,  616, 647,  492, 523,  368, 399, 244, 275,  120, 151,
+  27,  27,   896, 896,  865, 896,  772, 803,  741, 772, 648, 679,  617, 648,
+  524, 555,  493, 524,  400, 431,  369, 400,  276, 307, 245, 276,  152, 183,
+  121, 152,  28,  59,   28,  28,   928, 928,  897, 928, 866, 897,  804, 835,
+  773, 804,  742, 773,  680, 711,  649, 680,  618, 649, 556, 587,  525, 556,
+  494, 525,  432, 463,  401, 432,  370, 401,  308, 339, 277, 308,  246, 277,
+  184, 215,  153, 184,  122, 153,  60,  91,   29,  60,  29,  29,   960, 960,
+  929, 960,  898, 929,  867, 898,  836, 867,  805, 836, 774, 805,  743, 774,
+  712, 743,  681, 712,  650, 681,  619, 650,  588, 619, 557, 588,  526, 557,
+  495, 526,  464, 495,  433, 464,  402, 433,  371, 402, 340, 371,  309, 340,
+  278, 309,  247, 278,  216, 247,  185, 216,  154, 185, 123, 154,  92,  123,
+  61,  92,   30,  61,   30,  30,   961, 992,  930, 961, 899, 930,  837, 868,
+  806, 837,  775, 806,  713, 744,  682, 713,  651, 682, 589, 620,  558, 589,
+  527, 558,  465, 496,  434, 465,  403, 434,  341, 372, 310, 341,  279, 310,
+  217, 248,  186, 217,  155, 186,  93,  124,  62,  93,  31,  62,   962, 993,
+  931, 962,  838, 869,  807, 838,  714, 745,  683, 714, 590, 621,  559, 590,
+  466, 497,  435, 466,  342, 373,  311, 342,  218, 249, 187, 218,  94,  125,
+  63,  94,   963, 994,  839, 870,  715, 746,  591, 622, 467, 498,  343, 374,
+  219, 250,  95,  126,  868, 899,  744, 775,  620, 651, 496, 527,  372, 403,
+  248, 279,  124, 155,  900, 931,  869, 900,  776, 807, 745, 776,  652, 683,
+  621, 652,  528, 559,  497, 528,  404, 435,  373, 404, 280, 311,  249, 280,
+  156, 187,  125, 156,  932, 963,  901, 932,  870, 901, 808, 839,  777, 808,
+  746, 777,  684, 715,  653, 684,  622, 653,  560, 591, 529, 560,  498, 529,
+  436, 467,  405, 436,  374, 405,  312, 343,  281, 312, 250, 281,  188, 219,
+  157, 188,  126, 157,  964, 995,  933, 964,  902, 933, 871, 902,  840, 871,
+  809, 840,  778, 809,  747, 778,  716, 747,  685, 716, 654, 685,  623, 654,
+  592, 623,  561, 592,  530, 561,  499, 530,  468, 499, 437, 468,  406, 437,
+  375, 406,  344, 375,  313, 344,  282, 313,  251, 282, 220, 251,  189, 220,
+  158, 189,  127, 158,  965, 996,  934, 965,  903, 934, 841, 872,  810, 841,
+  779, 810,  717, 748,  686, 717,  655, 686,  593, 624, 562, 593,  531, 562,
+  469, 500,  438, 469,  407, 438,  345, 376,  314, 345, 283, 314,  221, 252,
+  190, 221,  159, 190,  966, 997,  935, 966,  842, 873, 811, 842,  718, 749,
+  687, 718,  594, 625,  563, 594,  470, 501,  439, 470, 346, 377,  315, 346,
+  222, 253,  191, 222,  967, 998,  843, 874,  719, 750, 595, 626,  471, 502,
+  347, 378,  223, 254,  872, 903,  748, 779,  624, 655, 500, 531,  376, 407,
+  252, 283,  904, 935,  873, 904,  780, 811,  749, 780, 656, 687,  625, 656,
+  532, 563,  501, 532,  408, 439,  377, 408,  284, 315, 253, 284,  936, 967,
+  905, 936,  874, 905,  812, 843,  781, 812,  750, 781, 688, 719,  657, 688,
+  626, 657,  564, 595,  533, 564,  502, 533,  440, 471, 409, 440,  378, 409,
+  316, 347,  285, 316,  254, 285,  968, 999,  937, 968, 906, 937,  875, 906,
+  844, 875,  813, 844,  782, 813,  751, 782,  720, 751, 689, 720,  658, 689,
+  627, 658,  596, 627,  565, 596,  534, 565,  503, 534, 472, 503,  441, 472,
+  410, 441,  379, 410,  348, 379,  317, 348,  286, 317, 255, 286,  969, 1000,
+  938, 969,  907, 938,  845, 876,  814, 845,  783, 814, 721, 752,  690, 721,
+  659, 690,  597, 628,  566, 597,  535, 566,  473, 504, 442, 473,  411, 442,
+  349, 380,  318, 349,  287, 318,  970, 1001, 939, 970, 846, 877,  815, 846,
+  722, 753,  691, 722,  598, 629,  567, 598,  474, 505, 443, 474,  350, 381,
+  319, 350,  971, 1002, 847, 878,  723, 754,  599, 630, 475, 506,  351, 382,
+  876, 907,  752, 783,  628, 659,  504, 535,  380, 411, 908, 939,  877, 908,
+  784, 815,  753, 784,  660, 691,  629, 660,  536, 567, 505, 536,  412, 443,
+  381, 412,  940, 971,  909, 940,  878, 909,  816, 847, 785, 816,  754, 785,
+  692, 723,  661, 692,  630, 661,  568, 599,  537, 568, 506, 537,  444, 475,
+  413, 444,  382, 413,  972, 1003, 941, 972,  910, 941, 879, 910,  848, 879,
+  817, 848,  786, 817,  755, 786,  724, 755,  693, 724, 662, 693,  631, 662,
+  600, 631,  569, 600,  538, 569,  507, 538,  476, 507, 445, 476,  414, 445,
+  383, 414,  973, 1004, 942, 973,  911, 942,  849, 880, 818, 849,  787, 818,
+  725, 756,  694, 725,  663, 694,  601, 632,  570, 601, 539, 570,  477, 508,
+  446, 477,  415, 446,  974, 1005, 943, 974,  850, 881, 819, 850,  726, 757,
+  695, 726,  602, 633,  571, 602,  478, 509,  447, 478, 975, 1006, 851, 882,
+  727, 758,  603, 634,  479, 510,  880, 911,  756, 787, 632, 663,  508, 539,
+  912, 943,  881, 912,  788, 819,  757, 788,  664, 695, 633, 664,  540, 571,
+  509, 540,  944, 975,  913, 944,  882, 913,  820, 851, 789, 820,  758, 789,
+  696, 727,  665, 696,  634, 665,  572, 603,  541, 572, 510, 541,  976, 1007,
+  945, 976,  914, 945,  883, 914,  852, 883,  821, 852, 790, 821,  759, 790,
+  728, 759,  697, 728,  666, 697,  635, 666,  604, 635, 573, 604,  542, 573,
+  511, 542,  977, 1008, 946, 977,  915, 946,  853, 884, 822, 853,  791, 822,
+  729, 760,  698, 729,  667, 698,  605, 636,  574, 605, 543, 574,  978, 1009,
+  947, 978,  854, 885,  823, 854,  730, 761,  699, 730, 606, 637,  575, 606,
+  979, 1010, 855, 886,  731, 762,  607, 638,  884, 915, 760, 791,  636, 667,
+  916, 947,  885, 916,  792, 823,  761, 792,  668, 699, 637, 668,  948, 979,
+  917, 948,  886, 917,  824, 855,  793, 824,  762, 793, 700, 731,  669, 700,
+  638, 669,  980, 1011, 949, 980,  918, 949,  887, 918, 856, 887,  825, 856,
+  794, 825,  763, 794,  732, 763,  701, 732,  670, 701, 639, 670,  981, 1012,
+  950, 981,  919, 950,  857, 888,  826, 857,  795, 826, 733, 764,  702, 733,
+  671, 702,  982, 1013, 951, 982,  858, 889,  827, 858, 734, 765,  703, 734,
+  983, 1014, 859, 890,  735, 766,  888, 919,  764, 795, 920, 951,  889, 920,
+  796, 827,  765, 796,  952, 983,  921, 952,  890, 921, 828, 859,  797, 828,
+  766, 797,  984, 1015, 953, 984,  922, 953,  891, 922, 860, 891,  829, 860,
+  798, 829,  767, 798,  985, 1016, 954, 985,  923, 954, 861, 892,  830, 861,
+  799, 830,  986, 1017, 955, 986,  862, 893,  831, 862, 987, 1018, 863, 894,
+  892, 923,  924, 955,  893, 924,  956, 987,  925, 956, 894, 925,  988, 1019,
+  957, 988,  926, 957,  895, 926,  989, 1020, 958, 989, 927, 958,  990, 1021,
+  959, 990,  991, 1022, 0,   0,
+};
+
+// Add 1 to iscan values. This represents the EOB position instead of the index.
+DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_4x4[16]) = {
+  1, 3, 6, 9, 2, 4, 10, 13, 5, 8, 12, 15, 7, 11, 14, 16,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_4x4[16]) = {
+  1, 4, 8, 12, 2, 6, 10, 13, 3, 7, 11, 15, 5, 9, 14, 16,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_4x4[16]) = {
+  1, 2, 4, 6, 3, 5, 7, 10, 8, 9, 12, 14, 11, 13, 15, 16,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_8x8[64]) = {
+  1,  4,  9,  16, 23, 33, 41, 48, 2,  6,  12, 19, 27, 35, 45, 52,
+  3,  8,  14, 21, 29, 39, 47, 55, 5,  11, 17, 25, 32, 42, 51, 57,
+  7,  13, 22, 28, 36, 44, 53, 59, 10, 18, 26, 34, 40, 49, 56, 61,
+  15, 24, 31, 38, 46, 54, 60, 63, 20, 30, 37, 43, 50, 58, 62, 64,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_8x8[64]) = {
+  1,  2,  3,  6,  9,  13, 20, 25, 4,  5,  8,  11, 16, 21, 31, 40,
+  7,  10, 14, 17, 22, 28, 38, 47, 12, 15, 18, 24, 29, 35, 45, 53,
+  19, 23, 26, 32, 36, 42, 51, 58, 27, 30, 34, 39, 44, 50, 56, 60,
+  33, 37, 43, 48, 52, 55, 61, 62, 41, 46, 49, 54, 57, 59, 63, 64,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_8x8[64]) = {
+  1,  3,  6,  10, 15, 23, 32, 38, 2,  5,  9,  14, 20, 27, 39, 45,
+  4,  7,  11, 18, 25, 31, 43, 50, 8,  12, 16, 22, 30, 37, 48, 54,
+  13, 17, 21, 28, 35, 44, 53, 58, 19, 24, 29, 36, 42, 49, 57, 61,
+  26, 33, 40, 46, 51, 56, 60, 63, 34, 41, 47, 52, 55, 59, 62, 64,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_16x16[256]) = {
+  1,  5,  12,  21,  32,  44,  60,  76,  86,  110, 131, 151, 166, 182, 196, 199,
+  2,  7,  15,  24,  35,  48,  65,  82,  96,  115, 136, 154, 172, 189, 202, 213,
+  3,  9,  17,  26,  39,  53,  68,  84,  102, 117, 137, 158, 173, 191, 206, 217,
+  4,  11, 19,  30,  42,  56,  72,  90,  104, 120, 142, 160, 177, 195, 209, 219,
+  6,  13, 22,  33,  46,  59,  75,  94,  105, 124, 145, 165, 180, 197, 211, 224,
+  8,  16, 27,  38,  50,  64,  79,  97,  113, 130, 147, 167, 183, 201, 216, 229,
+  10, 20, 29,  40,  55,  70,  87,  103, 118, 133, 152, 171, 188, 207, 221, 231,
+  14, 25, 36,  47,  61,  74,  92,  109, 123, 138, 155, 175, 190, 208, 225, 236,
+  18, 31, 41,  54,  67,  83,  99,  116, 127, 143, 162, 181, 198, 214, 228, 238,
+  23, 37, 49,  63,  77,  93,  106, 121, 134, 148, 168, 187, 204, 220, 233, 241,
+  28, 45, 57,  71,  85,  100, 114, 128, 141, 157, 176, 194, 210, 227, 237, 245,
+  34, 52, 69,  80,  95,  111, 126, 139, 150, 163, 185, 203, 218, 230, 242, 248,
+  43, 62, 78,  91,  107, 122, 135, 149, 161, 174, 192, 212, 226, 239, 246, 252,
+  51, 73, 88,  101, 119, 129, 146, 159, 169, 184, 205, 223, 234, 243, 250, 254,
+  58, 81, 98,  112, 132, 144, 156, 170, 179, 193, 215, 232, 240, 247, 251, 255,
+  66, 89, 108, 125, 140, 153, 164, 178, 186, 200, 222, 235, 244, 249, 253, 256,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_16x16[256]) = {
+  1,   2,   3,   5,   7,   10,  13,  18,  23,  30,  37,  44,  55,  65,  77,
+  87,  4,   6,   8,   12,  16,  20,  26,  33,  39,  49,  60,  69,  85,  100,
+  116, 131, 9,   11,  14,  19,  24,  28,  34,  43,  52,  61,  73,  89,  104,
+  120, 143, 168, 15,  17,  21,  27,  32,  38,  45,  54,  62,  74,  86,  101,
+  117, 136, 162, 186, 22,  25,  31,  36,  41,  48,  56,  66,  75,  82,  95,
+  113, 134, 155, 180, 206, 29,  35,  40,  46,  51,  59,  68,  78,  88,  97,
+  107, 122, 147, 170, 197, 213, 42,  47,  50,  57,  64,  71,  80,  91,  99,
+  108, 123, 139, 160, 183, 208, 223, 53,  58,  63,  70,  76,  84,  94,  103,
+  111, 121, 135, 151, 177, 196, 216, 227, 67,  72,  79,  83,  92,  98,  109,
+  114, 128, 137, 149, 169, 189, 203, 222, 233, 81,  90,  93,  102, 106, 115,
+  126, 132, 140, 152, 163, 178, 193, 209, 224, 235, 96,  105, 110, 118, 124,
+  129, 144, 145, 156, 166, 176, 191, 207, 220, 234, 240, 112, 119, 125, 130,
+  141, 148, 158, 165, 171, 182, 192, 204, 225, 231, 241, 244, 127, 133, 138,
+  146, 154, 161, 175, 179, 185, 198, 205, 217, 232, 238, 245, 247, 142, 150,
+  157, 167, 173, 181, 190, 200, 201, 211, 221, 229, 239, 243, 250, 252, 153,
+  164, 172, 184, 187, 194, 202, 212, 215, 219, 228, 237, 246, 248, 253, 254,
+  159, 174, 188, 195, 199, 210, 214, 218, 226, 230, 236, 242, 249, 251, 255,
+  256,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_16x16[256]) = {
+  1,   3,   6,   10,  18,  25,  37,  45,  56,  73,  89,  105, 129, 144, 167,
+  180, 2,   5,   9,   14,  21,  31,  41,  55,  67,  80,  97,  114, 142, 155,
+  179, 197, 4,   8,   12,  19,  26,  34,  47,  58,  72,  87,  102, 120, 149,
+  165, 187, 202, 7,   13,  17,  24,  32,  40,  54,  65,  79,  93,  111, 128,
+  154, 170, 194, 209, 11,  15,  20,  29,  38,  48,  59,  68,  85,  99,  115,
+  134, 162, 177, 199, 215, 16,  22,  27,  35,  44,  53,  66,  78,  92,  107,
+  121, 141, 166, 186, 206, 222, 23,  28,  33,  42,  49,  61,  74,  86,  100,
+  117, 131, 152, 176, 191, 212, 226, 30,  36,  43,  50,  60,  70,  82,  96,
+  109, 126, 140, 156, 183, 198, 218, 230, 39,  46,  52,  62,  69,  81,  94,
+  106, 119, 135, 151, 169, 192, 208, 224, 235, 51,  57,  64,  75,  84,  95,
+  110, 118, 130, 148, 164, 178, 200, 214, 229, 239, 63,  71,  77,  88,  98,
+  108, 123, 132, 146, 160, 173, 189, 211, 223, 236, 243, 76,  83,  91,  103,
+  113, 125, 139, 147, 158, 174, 188, 203, 220, 231, 241, 246, 90,  101, 112,
+  124, 133, 143, 157, 168, 181, 190, 204, 217, 232, 238, 247, 251, 104, 116,
+  127, 137, 150, 163, 172, 184, 195, 205, 216, 225, 237, 242, 249, 253, 122,
+  136, 145, 159, 171, 182, 193, 201, 210, 219, 228, 234, 244, 245, 252, 255,
+  138, 153, 161, 175, 185, 196, 207, 213, 221, 227, 233, 240, 248, 250, 254,
+  256,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_32x32[1024]) = {
+  1,    3,    6,    11,   18,   26,   39,   48,   63,   84,   102,  122,  146,
+  171,  194,  205,  211,  220,  230,  234,  246,  258,  276,  300,  343,  357,
+  378,  406,  456,  472,  496,  528,  2,    5,    9,    16,   23,   31,   46,
+  59,   75,   93,   113,  134,  159,  185,  204,  216,  223,  229,  235,  238,
+  257,  275,  299,  318,  356,  377,  405,  427,  471,  495,  527,  552,  4,
+  8,    13,   19,   29,   37,   53,   65,   83,   103,  119,  143,  165,  190,
+  209,  218,  225,  232,  236,  239,  274,  298,  317,  330,  376,  404,  426,
+  441,  494,  526,  551,  568,  7,    12,   17,   24,   32,   44,   61,   74,
+  91,   110,  127,  151,  174,  197,  212,  221,  227,  233,  237,  240,  297,
+  316,  329,  336,  403,  425,  440,  448,  525,  550,  567,  576,  10,   15,
+  20,   30,   38,   51,   66,   79,   96,   117,  135,  158,  180,  202,  215,
+  224,  245,  256,  273,  296,  342,  355,  375,  402,  455,  470,  493,  524,
+  583,  597,  618,  646,  14,   21,   27,   36,   45,   55,   73,   86,   106,
+  124,  141,  164,  183,  206,  217,  226,  255,  272,  295,  315,  354,  374,
+  401,  424,  469,  492,  523,  549,  596,  617,  645,  667,  22,   28,   34,
+  43,   54,   64,   81,   95,   114,  133,  152,  173,  191,  210,  219,  228,
+  271,  294,  314,  328,  373,  400,  423,  439,  491,  522,  548,  566,  616,
+  644,  666,  681,  25,   33,   40,   49,   58,   72,   89,   105,  121,  140,
+  160,  179,  198,  213,  222,  231,  293,  313,  327,  335,  399,  422,  438,
+  447,  521,  547,  565,  575,  643,  665,  680,  688,  35,   41,   47,   57,
+  69,   82,   97,   112,  131,  148,  168,  187,  244,  254,  270,  292,  341,
+  353,  372,  398,  454,  468,  490,  520,  582,  595,  615,  642,  694,  706,
+  724,  748,  42,   50,   56,   68,   78,   92,   108,  125,  139,  162,  178,
+  195,  253,  269,  291,  312,  352,  371,  397,  421,  467,  489,  519,  546,
+  594,  614,  641,  664,  705,  723,  747,  766,  52,   60,   67,   77,   90,
+  100,  120,  132,  150,  169,  182,  201,  268,  290,  311,  326,  370,  396,
+  420,  437,  488,  518,  545,  564,  613,  640,  663,  679,  722,  746,  765,
+  778,  62,   70,   76,   88,   101,  115,  130,  145,  163,  181,  192,  208,
+  289,  310,  325,  334,  395,  419,  436,  446,  517,  544,  563,  574,  639,
+  662,  678,  687,  745,  764,  777,  784,  71,   80,   87,   98,   109,  123,
+  138,  156,  243,  252,  267,  288,  340,  351,  369,  394,  453,  466,  487,
+  516,  581,  593,  612,  638,  693,  704,  721,  744,  789,  799,  814,  834,
+  85,   94,   104,  111,  126,  142,  155,  172,  251,  266,  287,  309,  350,
+  368,  393,  418,  465,  486,  515,  543,  592,  611,  637,  661,  703,  720,
+  743,  763,  798,  813,  833,  849,  99,   107,  116,  128,  144,  157,  170,
+  186,  265,  286,  308,  324,  367,  392,  417,  435,  485,  514,  542,  562,
+  610,  636,  660,  677,  719,  742,  762,  776,  812,  832,  848,  859,  118,
+  129,  137,  149,  161,  176,  189,  199,  285,  307,  323,  333,  391,  416,
+  434,  445,  513,  541,  561,  573,  635,  659,  676,  686,  741,  761,  775,
+  783,  831,  847,  858,  864,  136,  147,  153,  166,  242,  250,  264,  284,
+  339,  349,  366,  390,  452,  464,  484,  512,  580,  591,  609,  634,  692,
+  702,  718,  740,  788,  797,  811,  830,  868,  876,  888,  904,  154,  167,
+  175,  184,  249,  263,  283,  306,  348,  365,  389,  415,  463,  483,  511,
+  540,  590,  608,  633,  658,  701,  717,  739,  760,  796,  810,  829,  846,
+  875,  887,  903,  916,  177,  188,  196,  203,  262,  282,  305,  322,  364,
+  388,  414,  433,  482,  510,  539,  560,  607,  632,  657,  675,  716,  738,
+  759,  774,  809,  828,  845,  857,  886,  902,  915,  924,  193,  200,  207,
+  214,  281,  304,  321,  332,  387,  413,  432,  444,  509,  538,  559,  572,
+  631,  656,  674,  685,  737,  758,  773,  782,  827,  844,  856,  863,  901,
+  914,  923,  928,  241,  248,  261,  280,  338,  347,  363,  386,  451,  462,
+  481,  508,  579,  589,  606,  630,  691,  700,  715,  736,  787,  795,  808,
+  826,  867,  874,  885,  900,  931,  937,  946,  958,  247,  260,  279,  303,
+  346,  362,  385,  412,  461,  480,  507,  537,  588,  605,  629,  655,  699,
+  714,  735,  757,  794,  807,  825,  843,  873,  884,  899,  913,  936,  945,
+  957,  967,  259,  278,  302,  320,  361,  384,  411,  431,  479,  506,  536,
+  558,  604,  628,  654,  673,  713,  734,  756,  772,  806,  824,  842,  855,
+  883,  898,  912,  922,  944,  956,  966,  973,  277,  301,  319,  331,  383,
+  410,  430,  443,  505,  535,  557,  571,  627,  653,  672,  684,  733,  755,
+  771,  781,  823,  841,  854,  862,  897,  911,  921,  927,  955,  965,  972,
+  976,  337,  345,  360,  382,  450,  460,  478,  504,  578,  587,  603,  626,
+  690,  698,  712,  732,  786,  793,  805,  822,  866,  872,  882,  896,  930,
+  935,  943,  954,  978,  982,  988,  996,  344,  359,  381,  409,  459,  477,
+  503,  534,  586,  602,  625,  652,  697,  711,  731,  754,  792,  804,  821,
+  840,  871,  881,  895,  910,  934,  942,  953,  964,  981,  987,  995,  1002,
+  358,  380,  408,  429,  476,  502,  533,  556,  601,  624,  651,  671,  710,
+  730,  753,  770,  803,  820,  839,  853,  880,  894,  909,  920,  941,  952,
+  963,  971,  986,  994,  1001, 1006, 379,  407,  428,  442,  501,  532,  555,
+  570,  623,  650,  670,  683,  729,  752,  769,  780,  819,  838,  852,  861,
+  893,  908,  919,  926,  951,  962,  970,  975,  993,  1000, 1005, 1008, 449,
+  458,  475,  500,  577,  585,  600,  622,  689,  696,  709,  728,  785,  791,
+  802,  818,  865,  870,  879,  892,  929,  933,  940,  950,  977,  980,  985,
+  992,  1009, 1011, 1014, 1018, 457,  474,  499,  531,  584,  599,  621,  649,
+  695,  708,  727,  751,  790,  801,  817,  837,  869,  878,  891,  907,  932,
+  939,  949,  961,  979,  984,  991,  999,  1010, 1013, 1017, 1021, 473,  498,
+  530,  554,  598,  620,  648,  669,  707,  726,  750,  768,  800,  816,  836,
+  851,  877,  890,  906,  918,  938,  948,  960,  969,  983,  990,  998,  1004,
+  1012, 1016, 1020, 1023, 497,  529,  553,  569,  619,  647,  668,  682,  725,
+  749,  767,  779,  815,  835,  850,  860,  889,  905,  917,  925,  947,  959,
+  968,  974,  989,  997,  1003, 1007, 1015, 1019, 1022, 1024,
+};
+
+const ScanOrder vp9_default_scan_orders[TX_SIZES] = {
+  { default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors },
+  { default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors },
+  { default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors },
+  { default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors },
+};
+
+const ScanOrder vp9_scan_orders[TX_SIZES][TX_TYPES] = {
+  { // TX_4X4
+    { default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors },
+    { row_scan_4x4, vp9_row_iscan_4x4, row_scan_4x4_neighbors },
+    { col_scan_4x4, vp9_col_iscan_4x4, col_scan_4x4_neighbors },
+    { default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors } },
+  { // TX_8X8
+    { default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors },
+    { row_scan_8x8, vp9_row_iscan_8x8, row_scan_8x8_neighbors },
+    { col_scan_8x8, vp9_col_iscan_8x8, col_scan_8x8_neighbors },
+    { default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors } },
+  { // TX_16X16
+    { default_scan_16x16, vp9_default_iscan_16x16,
+      default_scan_16x16_neighbors },
+    { row_scan_16x16, vp9_row_iscan_16x16, row_scan_16x16_neighbors },
+    { col_scan_16x16, vp9_col_iscan_16x16, col_scan_16x16_neighbors },
+    { default_scan_16x16, vp9_default_iscan_16x16,
+      default_scan_16x16_neighbors } },
+  { // TX_32X32
+    { default_scan_32x32, vp9_default_iscan_32x32,
+      default_scan_32x32_neighbors },
+    { default_scan_32x32, vp9_default_iscan_32x32,
+      default_scan_32x32_neighbors },
+    { default_scan_32x32, vp9_default_iscan_32x32,
+      default_scan_32x32_neighbors },
+    { default_scan_32x32, vp9_default_iscan_32x32,
+      default_scan_32x32_neighbors } }
+};
diff --git a/media/libvpx/libvpx/vp9/common/vp9_scan.h b/media/libvpx/libvpx/vp9/common/vp9_scan.h
new file mode 100644
index 0000000000..3d1dcc66da
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_scan.h
@@ -0,0 +1,58 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_SCAN_H_
+#define VPX_VP9_COMMON_VP9_SCAN_H_
+
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+#include "vp9/common/vp9_enums.h"
+#include "vp9/common/vp9_blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_NEIGHBORS 2
+
+typedef struct ScanOrder {
+  const int16_t *scan;
+  const int16_t *iscan;
+  const int16_t *neighbors;
+} ScanOrder;
+
+extern const ScanOrder vp9_default_scan_orders[TX_SIZES];
+extern const ScanOrder vp9_scan_orders[TX_SIZES][TX_TYPES];
+
+static INLINE int get_coef_context(const int16_t *neighbors,
+                                   const uint8_t *token_cache, int c) {
+  return (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] +
+          token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >>
+         1;
+}
+
+static INLINE const ScanOrder *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
+                                        PLANE_TYPE type, int block_idx) {
+  const MODE_INFO *const mi = xd->mi[0];
+
+  if (is_inter_block(mi) || type != PLANE_TYPE_Y || xd->lossless) {
+    return &vp9_default_scan_orders[tx_size];
+  } else {
+    const PREDICTION_MODE mode = get_y_mode(mi, block_idx);
+    return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]];
+  }
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_SCAN_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_seg_common.c b/media/libvpx/libvpx/vp9/common/vp9_seg_common.c
new file mode 100644
index 0000000000..1c7a1d2e9a
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_seg_common.c
@@ -0,0 +1,62 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_quant_common.h"
+
+static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 };
+
+static const int seg_feature_data_max[SEG_LVL_MAX] = { MAXQ, MAX_LOOP_FILTER, 3,
+                                                       0 };
+
+// These functions provide access to new segment level features.
+// Eventually these function may be "optimized out" but for the moment,
+// the coding mechanism is still subject to change so these provide a
+// convenient single point of change.
+
+void vp9_clearall_segfeatures(struct segmentation *seg) {
+  vp9_zero(seg->feature_data);
+  vp9_zero(seg->feature_mask);
+  seg->aq_av_offset = 0;
+}
+
+void vp9_enable_segfeature(struct segmentation *seg, int segment_id,
+                           SEG_LVL_FEATURES feature_id) {
+  seg->feature_mask[segment_id] |= 1 << feature_id;
+}
+
+int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id) {
+  return seg_feature_data_max[feature_id];
+}
+
+int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id) {
+  return seg_feature_data_signed[feature_id];
+}
+
+void vp9_set_segdata(struct segmentation *seg, int segment_id,
+                     SEG_LVL_FEATURES feature_id, int seg_data) {
+  assert(seg_data <= seg_feature_data_max[feature_id]);
+  if (seg_data < 0) {
+    assert(seg_feature_data_signed[feature_id]);
+    assert(-seg_data <= seg_feature_data_max[feature_id]);
+  }
+
+  seg->feature_data[segment_id][feature_id] = seg_data;
+}
+
+const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = {
+  2, 4, 6, 8, 10, 12, 0, -1, -2, -3, -4, -5, -6, -7
+};
+
+// TBD? Functions to read and write segment data with range / validity checking
diff --git a/media/libvpx/libvpx/vp9/common/vp9_seg_common.h b/media/libvpx/libvpx/vp9/common/vp9_seg_common.h
new file mode 100644
index 0000000000..5e71c2fca5
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_seg_common.h
@@ -0,0 +1,86 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_SEG_COMMON_H_
+#define VPX_VP9_COMMON_VP9_SEG_COMMON_H_
+
+#include "vpx_dsp/prob.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SEGMENT_DELTADATA 0
+#define SEGMENT_ABSDATA 1
+
+#define MAX_SEGMENTS 8
+#define SEG_TREE_PROBS (MAX_SEGMENTS - 1)
+
+#define PREDICTION_PROBS 3
+
+// Segment ID used to skip background encoding
+#define BACKGROUND_SEG_SKIP_ID 3
+// Number of frames that don't skip after a key frame
+#define FRAMES_NO_SKIPPING_AFTER_KEY 20
+
+// Segment level features.
+typedef enum {
+  SEG_LVL_ALT_Q = 0,      // Use alternate Quantizer ....
+  SEG_LVL_ALT_LF = 1,     // Use alternate loop filter value...
+  SEG_LVL_REF_FRAME = 2,  // Optional Segment reference frame
+  SEG_LVL_SKIP = 3,       // Optional Segment (0,0) + skip mode
+  SEG_LVL_MAX = 4         // Number of features supported
+} SEG_LVL_FEATURES;
+
+struct segmentation {
+  uint8_t enabled;
+  uint8_t update_map;
+  uint8_t update_data;
+  uint8_t abs_delta;
+  uint8_t temporal_update;
+
+  vpx_prob tree_probs[SEG_TREE_PROBS];
+  vpx_prob pred_probs[PREDICTION_PROBS];
+
+  int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
+  uint32_t feature_mask[MAX_SEGMENTS];
+  int aq_av_offset;
+};
+
+static INLINE int segfeature_active(const struct segmentation *seg,
+                                    int segment_id,
+                                    SEG_LVL_FEATURES feature_id) {
+  return seg->enabled && (seg->feature_mask[segment_id] & (1 << feature_id));
+}
+
+void vp9_clearall_segfeatures(struct segmentation *seg);
+
+void vp9_enable_segfeature(struct segmentation *seg, int segment_id,
+                           SEG_LVL_FEATURES feature_id);
+
+int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id);
+
+int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id);
+
+void vp9_set_segdata(struct segmentation *seg, int segment_id,
+                     SEG_LVL_FEATURES feature_id, int seg_data);
+
+static INLINE int get_segdata(const struct segmentation *seg, int segment_id,
+                              SEG_LVL_FEATURES feature_id) {
+  return seg->feature_data[segment_id][feature_id];
+}
+
+extern const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_SEG_COMMON_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_thread_common.c b/media/libvpx/libvpx/vp9/common/vp9_thread_common.c
new file mode 100644
index 0000000000..1c6ecc0fe6
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_thread_common.c
@@ -0,0 +1,602 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include "./vpx_config.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_thread_common.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/common/vp9_loopfilter.h"
+
+#if CONFIG_MULTITHREAD
+static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
+  const int kMaxTryLocks = 4000;
+  int locked = 0;
+  int i;
+
+  for (i = 0; i < kMaxTryLocks; ++i) {
+    if (!pthread_mutex_trylock(mutex)) {
+      locked = 1;
+      break;
+    }
+  }
+
+  if (!locked) pthread_mutex_lock(mutex);
+}
+#endif  // CONFIG_MULTITHREAD
+
+static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) {
+#if CONFIG_MULTITHREAD
+  const int nsync = lf_sync->sync_range;
+
+  if (r && !(c & (nsync - 1))) {
+    pthread_mutex_t *const mutex = &lf_sync->mutex[r - 1];
+    mutex_lock(mutex);
+
+    while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
+      pthread_cond_wait(&lf_sync->cond[r - 1], mutex);
+    }
+    pthread_mutex_unlock(mutex);
+  }
+#else
+  (void)lf_sync;
+  (void)r;
+  (void)c;
+#endif  // CONFIG_MULTITHREAD
+}
+
+static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c,
+                              const int sb_cols) {
+#if CONFIG_MULTITHREAD
+  const int nsync = lf_sync->sync_range;
+  int cur;
+  // Only signal when there are enough filtered SB for next row to run.
+  int sig = 1;
+
+  if (c < sb_cols - 1) {
+    cur = c;
+    if (c % nsync) sig = 0;
+  } else {
+    cur = sb_cols + nsync;
+  }
+
+  if (sig) {
+    mutex_lock(&lf_sync->mutex[r]);
+
+    lf_sync->cur_sb_col[r] = cur;
+
+    pthread_cond_signal(&lf_sync->cond[r]);
+    pthread_mutex_unlock(&lf_sync->mutex[r]);
+  }
+#else
+  (void)lf_sync;
+  (void)r;
+  (void)c;
+  (void)sb_cols;
+#endif  // CONFIG_MULTITHREAD
+}
+
+// Implement row loopfiltering for each thread.
+static INLINE void thread_loop_filter_rows(
+    const YV12_BUFFER_CONFIG *const frame_buffer, VP9_COMMON *const cm,
+    struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop,
+    int y_only, VP9LfSync *const lf_sync) {
+  const int num_planes = y_only ? 1 : MAX_MB_PLANE;
+  const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
+  const int num_active_workers = lf_sync->num_active_workers;
+  int mi_row, mi_col;
+  enum lf_path path;
+  if (y_only)
+    path = LF_PATH_444;
+  else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
+    path = LF_PATH_420;
+  else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
+    path = LF_PATH_444;
+  else
+    path = LF_PATH_SLOW;
+
+  assert(num_active_workers > 0);
+
+  for (mi_row = start; mi_row < stop;
+       mi_row += num_active_workers * MI_BLOCK_SIZE) {
+    MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
+    LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0);
+
+    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) {
+      const int r = mi_row >> MI_BLOCK_SIZE_LOG2;
+      const int c = mi_col >> MI_BLOCK_SIZE_LOG2;
+      int plane;
+
+      sync_read(lf_sync, r, c);
+
+      vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
+
+      vp9_adjust_mask(cm, mi_row, mi_col, lfm);
+
+      vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm);
+      for (plane = 1; plane < num_planes; ++plane) {
+        switch (path) {
+          case LF_PATH_420:
+            vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm);
+            break;
+          case LF_PATH_444:
+            vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm);
+            break;
+          case LF_PATH_SLOW:
+            vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
+                                          mi_row, mi_col);
+            break;
+        }
+      }
+
+      sync_write(lf_sync, r, c, sb_cols);
+    }
+  }
+}
+
+// Row-based multi-threaded loopfilter hook
+static int loop_filter_row_worker(void *arg1, void *arg2) {
+  VP9LfSync *const lf_sync = (VP9LfSync *)arg1;
+  LFWorkerData *const lf_data = (LFWorkerData *)arg2;
+  thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+                          lf_data->start, lf_data->stop, lf_data->y_only,
+                          lf_sync);
+  return 1;
+}
+
+static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm,
+                                struct macroblockd_plane planes[MAX_MB_PLANE],
+                                int start, int stop, int y_only,
+                                VPxWorker *workers, int nworkers,
+                                VP9LfSync *lf_sync) {
+  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+  // Number of superblock rows and cols
+  const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+  const int num_tile_cols = 1 << cm->log2_tile_cols;
+  // Limit the number of workers to prevent changes in frame dimensions from
+  // causing incorrect sync calculations when sb_rows < threads/tile_cols.
+  // Further restrict them by the number of tile columns should the user
+  // request more as this implementation doesn't scale well beyond that.
+  const int num_workers = VPXMIN(nworkers, VPXMIN(num_tile_cols, sb_rows));
+  int i;
+
+  if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
+      num_workers > lf_sync->num_workers) {
+    vp9_loop_filter_dealloc(lf_sync);
+    vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
+  }
+  lf_sync->num_active_workers = num_workers;
+
+  // Initialize cur_sb_col to -1 for all SB rows.
+  memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
+
+  // Set up loopfilter thread data.
+  // The decoder is capping num_workers because it has been observed that using
+  // more threads on the loopfilter than there are cores will hurt performance
+  // on Android. This is because the system will only schedule the tile decode
+  // workers on cores equal to the number of tile columns. Then if the decoder
+  // tries to use more threads for the loopfilter, it will hurt performance
+  // because of contention. If the multithreading code changes in the future
+  // then the number of workers used by the loopfilter should be revisited.
+  for (i = 0; i < num_workers; ++i) {
+    VPxWorker *const worker = &workers[i];
+    LFWorkerData *const lf_data = &lf_sync->lfdata[i];
+
+    worker->hook = loop_filter_row_worker;
+    worker->data1 = lf_sync;
+    worker->data2 = lf_data;
+
+    // Loopfilter data
+    vp9_loop_filter_data_reset(lf_data, frame, cm, planes);
+    lf_data->start = start + i * MI_BLOCK_SIZE;
+    lf_data->stop = stop;
+    lf_data->y_only = y_only;
+
+    // Start loopfiltering
+    if (i == num_workers - 1) {
+      winterface->execute(worker);
+    } else {
+      winterface->launch(worker);
+    }
+  }
+
+  // Wait till all rows are finished
+  for (i = 0; i < num_workers; ++i) {
+    winterface->sync(&workers[i]);
+  }
+}
+
+void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm,
+                              struct macroblockd_plane planes[MAX_MB_PLANE],
+                              int frame_filter_level, int y_only,
+                              int partial_frame, VPxWorker *workers,
+                              int num_workers, VP9LfSync *lf_sync) {
+  int start_mi_row, end_mi_row, mi_rows_to_filter;
+
+  if (!frame_filter_level) return;
+
+  start_mi_row = 0;
+  mi_rows_to_filter = cm->mi_rows;
+  if (partial_frame && cm->mi_rows > 8) {
+    start_mi_row = cm->mi_rows >> 1;
+    start_mi_row &= 0xfffffff8;
+    mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
+  }
+  end_mi_row = start_mi_row + mi_rows_to_filter;
+  vp9_loop_filter_frame_init(cm, frame_filter_level);
+
+  loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, y_only,
+                      workers, num_workers, lf_sync);
+}
+
+void vp9_lpf_mt_init(VP9LfSync *lf_sync, VP9_COMMON *cm, int frame_filter_level,
+                     int num_workers) {
+  const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+
+  if (!frame_filter_level) return;
+
+  if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
+      num_workers > lf_sync->num_workers) {
+    vp9_loop_filter_dealloc(lf_sync);
+    vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
+  }
+
+  // Initialize cur_sb_col to -1 for all SB rows.
+  memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
+
+  lf_sync->corrupted = 0;
+
+  memset(lf_sync->num_tiles_done, 0,
+         sizeof(*lf_sync->num_tiles_done) * sb_rows);
+  cm->lf_row = 0;
+}
+
+// Set up nsync by width.
+static INLINE int get_sync_range(int width) {
+  // nsync numbers are picked by testing. For example, for 4k
+  // video, using 4 gives best performance.
+  if (width < 640)
+    return 1;
+  else if (width <= 1280)
+    return 2;
+  else if (width <= 4096)
+    return 4;
+  else
+    return 8;
+}
+
+// Allocate memory for lf row synchronization
+void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
+                           int width, int num_workers) {
+  lf_sync->rows = rows;
+#if CONFIG_MULTITHREAD
+  {
+    int i;
+
+    CHECK_MEM_ERROR(&cm->error, lf_sync->mutex,
+                    vpx_malloc(sizeof(*lf_sync->mutex) * rows));
+    if (lf_sync->mutex) {
+      for (i = 0; i < rows; ++i) {
+        pthread_mutex_init(&lf_sync->mutex[i], NULL);
+      }
+    }
+
+    CHECK_MEM_ERROR(&cm->error, lf_sync->cond,
+                    vpx_malloc(sizeof(*lf_sync->cond) * rows));
+    if (lf_sync->cond) {
+      for (i = 0; i < rows; ++i) {
+        pthread_cond_init(&lf_sync->cond[i], NULL);
+      }
+    }
+
+    CHECK_MEM_ERROR(&cm->error, lf_sync->lf_mutex,
+                    vpx_malloc(sizeof(*lf_sync->lf_mutex)));
+    pthread_mutex_init(lf_sync->lf_mutex, NULL);
+
+    CHECK_MEM_ERROR(&cm->error, lf_sync->recon_done_mutex,
+                    vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows));
+    if (lf_sync->recon_done_mutex) {
+      for (i = 0; i < rows; ++i) {
+        pthread_mutex_init(&lf_sync->recon_done_mutex[i], NULL);
+      }
+    }
+
+    CHECK_MEM_ERROR(&cm->error, lf_sync->recon_done_cond,
+                    vpx_malloc(sizeof(*lf_sync->recon_done_cond) * rows));
+    if (lf_sync->recon_done_cond) {
+      for (i = 0; i < rows; ++i) {
+        pthread_cond_init(&lf_sync->recon_done_cond[i], NULL);
+      }
+    }
+  }
+#endif  // CONFIG_MULTITHREAD
+
+  CHECK_MEM_ERROR(&cm->error, lf_sync->lfdata,
+                  vpx_malloc(num_workers * sizeof(*lf_sync->lfdata)));
+  lf_sync->num_workers = num_workers;
+  lf_sync->num_active_workers = lf_sync->num_workers;
+
+  CHECK_MEM_ERROR(&cm->error, lf_sync->cur_sb_col,
+                  vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
+
+  CHECK_MEM_ERROR(&cm->error, lf_sync->num_tiles_done,
+                  vpx_malloc(sizeof(*lf_sync->num_tiles_done) *
+                                 mi_cols_aligned_to_sb(cm->mi_rows) >>
+                             MI_BLOCK_SIZE_LOG2));
+
+  // Set up nsync.
+  lf_sync->sync_range = get_sync_range(width);
+}
+
+// Deallocate lf synchronization related mutex and data
+void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) {
+  assert(lf_sync != NULL);
+
+#if CONFIG_MULTITHREAD
+  if (lf_sync->mutex != NULL) {
+    int i;
+    for (i = 0; i < lf_sync->rows; ++i) {
+      pthread_mutex_destroy(&lf_sync->mutex[i]);
+    }
+    vpx_free(lf_sync->mutex);
+  }
+  if (lf_sync->cond != NULL) {
+    int i;
+    for (i = 0; i < lf_sync->rows; ++i) {
+      pthread_cond_destroy(&lf_sync->cond[i]);
+    }
+    vpx_free(lf_sync->cond);
+  }
+  if (lf_sync->recon_done_mutex != NULL) {
+    int i;
+    for (i = 0; i < lf_sync->rows; ++i) {
+      pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]);
+    }
+    vpx_free(lf_sync->recon_done_mutex);
+  }
+
+  if (lf_sync->lf_mutex != NULL) {
+    pthread_mutex_destroy(lf_sync->lf_mutex);
+    vpx_free(lf_sync->lf_mutex);
+  }
+  if (lf_sync->recon_done_cond != NULL) {
+    int i;
+    for (i = 0; i < lf_sync->rows; ++i) {
+      pthread_cond_destroy(&lf_sync->recon_done_cond[i]);
+    }
+    vpx_free(lf_sync->recon_done_cond);
+  }
+#endif  // CONFIG_MULTITHREAD
+
+  vpx_free(lf_sync->lfdata);
+  vpx_free(lf_sync->cur_sb_col);
+  vpx_free(lf_sync->num_tiles_done);
+  // clear the structure as the source of this call may be a resize in which
+  // case this call will be followed by an _alloc() which may fail.
+  vp9_zero(*lf_sync);
+}
+
+static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
+  int return_val = -1;
+  int cur_row;
+  const int max_rows = cm->mi_rows;
+
+#if CONFIG_MULTITHREAD
+  const int tile_cols = 1 << cm->log2_tile_cols;
+
+  pthread_mutex_lock(lf_sync->lf_mutex);
+  if (cm->lf_row < max_rows) {
+    cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
+    return_val = cm->lf_row;
+    cm->lf_row += MI_BLOCK_SIZE;
+    if (cm->lf_row < max_rows) {
+      /* If this is not the last row, make sure the next row is also decoded.
+       * This is because the intra predict has to happen before loop filter */
+      cur_row += 1;
+    }
+  }
+  pthread_mutex_unlock(lf_sync->lf_mutex);
+
+  if (return_val == -1) return return_val;
+
+  pthread_mutex_lock(&lf_sync->recon_done_mutex[cur_row]);
+  if (lf_sync->num_tiles_done[cur_row] < tile_cols) {
+    pthread_cond_wait(&lf_sync->recon_done_cond[cur_row],
+                      &lf_sync->recon_done_mutex[cur_row]);
+  }
+  pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]);
+  pthread_mutex_lock(lf_sync->lf_mutex);
+  if (lf_sync->corrupted) {
+    int row = return_val >> MI_BLOCK_SIZE_LOG2;
+    pthread_mutex_lock(&lf_sync->mutex[row]);
+    lf_sync->cur_sb_col[row] = INT_MAX;
+    pthread_cond_signal(&lf_sync->cond[row]);
+    pthread_mutex_unlock(&lf_sync->mutex[row]);
+    return_val = -1;
+  }
+  pthread_mutex_unlock(lf_sync->lf_mutex);
+#else
+  (void)lf_sync;
+  if (cm->lf_row < max_rows) {
+    cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
+    return_val = cm->lf_row;
+    cm->lf_row += MI_BLOCK_SIZE;
+    if (cm->lf_row < max_rows) {
+      /* If this is not the last row, make sure the next row is also decoded.
+       * This is because the intra predict has to happen before loop filter */
+      cur_row += 1;
+    }
+  }
+#endif  // CONFIG_MULTITHREAD
+
+  return return_val;
+}
+
+void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync) {
+  int mi_row;
+  VP9_COMMON *cm = lf_data->cm;
+
+  while ((mi_row = get_next_row(cm, lf_sync)) != -1 && mi_row < cm->mi_rows) {
+    lf_data->start = mi_row;
+    lf_data->stop = mi_row + MI_BLOCK_SIZE;
+
+    thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+                            lf_data->start, lf_data->stop, lf_data->y_only,
+                            lf_sync);
+  }
+}
+
+void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row,
+                 int corrupted) {
+#if CONFIG_MULTITHREAD
+  pthread_mutex_lock(lf_sync->lf_mutex);
+  lf_sync->corrupted |= corrupted;
+  pthread_mutex_unlock(lf_sync->lf_mutex);
+  pthread_mutex_lock(&lf_sync->recon_done_mutex[row]);
+  lf_sync->num_tiles_done[row] += 1;
+  if (num_tiles == lf_sync->num_tiles_done[row]) {
+    if (is_last_row) {
+      /* The last 2 rows wait on the last row to be done.
+       * So, we have to broadcast the signal in this case.
+       */
+      pthread_cond_broadcast(&lf_sync->recon_done_cond[row]);
+    } else {
+      pthread_cond_signal(&lf_sync->recon_done_cond[row]);
+    }
+  }
+  pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]);
+#else
+  (void)lf_sync;
+  (void)num_tiles;
+  (void)row;
+  (void)is_last_row;
+  (void)corrupted;
+#endif  // CONFIG_MULTITHREAD
+}
+
+void vp9_loopfilter_job(LFWorkerData *lf_data, VP9LfSync *lf_sync) {
+  thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+                          lf_data->start, lf_data->stop, lf_data->y_only,
+                          lf_sync);
+}
+
+// Accumulate frame counts.
+void vp9_accumulate_frame_counts(FRAME_COUNTS *accum,
+                                 const FRAME_COUNTS *counts, int is_dec) {
+  int i, j, k, l, m;
+
+  for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
+    for (j = 0; j < INTRA_MODES; j++)
+      accum->y_mode[i][j] += counts->y_mode[i][j];
+
+  for (i = 0; i < INTRA_MODES; i++)
+    for (j = 0; j < INTRA_MODES; j++)
+      accum->uv_mode[i][j] += counts->uv_mode[i][j];
+
+  for (i = 0; i < PARTITION_CONTEXTS; i++)
+    for (j = 0; j < PARTITION_TYPES; j++)
+      accum->partition[i][j] += counts->partition[i][j];
+
+  if (is_dec) {
+    int n;
+    for (i = 0; i < TX_SIZES; i++)
+      for (j = 0; j < PLANE_TYPES; j++)
+        for (k = 0; k < REF_TYPES; k++)
+          for (l = 0; l < COEF_BANDS; l++)
+            for (m = 0; m < COEFF_CONTEXTS; m++) {
+              accum->eob_branch[i][j][k][l][m] +=
+                  counts->eob_branch[i][j][k][l][m];
+              for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
+                accum->coef[i][j][k][l][m][n] += counts->coef[i][j][k][l][m][n];
+            }
+  } else {
+    for (i = 0; i < TX_SIZES; i++)
+      for (j = 0; j < PLANE_TYPES; j++)
+        for (k = 0; k < REF_TYPES; k++)
+          for (l = 0; l < COEF_BANDS; l++)
+            for (m = 0; m < COEFF_CONTEXTS; m++)
+              accum->eob_branch[i][j][k][l][m] +=
+                  counts->eob_branch[i][j][k][l][m];
+    // In the encoder, coef is only updated at frame
+    // level, so not need to accumulate it here.
+    // for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
+    //   accum->coef[i][j][k][l][m][n] +=
+    //       counts->coef[i][j][k][l][m][n];
+  }
+
+  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
+    for (j = 0; j < SWITCHABLE_FILTERS; j++)
+      accum->switchable_interp[i][j] += counts->switchable_interp[i][j];
+
+  for (i = 0; i < INTER_MODE_CONTEXTS; i++)
+    for (j = 0; j < INTER_MODES; j++)
+      accum->inter_mode[i][j] += counts->inter_mode[i][j];
+
+  for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
+    for (j = 0; j < 2; j++)
+      accum->intra_inter[i][j] += counts->intra_inter[i][j];
+
+  for (i = 0; i < COMP_INTER_CONTEXTS; i++)
+    for (j = 0; j < 2; j++) accum->comp_inter[i][j] += counts->comp_inter[i][j];
+
+  for (i = 0; i < REF_CONTEXTS; i++)
+    for (j = 0; j < 2; j++)
+      for (k = 0; k < 2; k++)
+        accum->single_ref[i][j][k] += counts->single_ref[i][j][k];
+
+  for (i = 0; i < REF_CONTEXTS; i++)
+    for (j = 0; j < 2; j++) accum->comp_ref[i][j] += counts->comp_ref[i][j];
+
+  for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
+    for (j = 0; j < TX_SIZES; j++)
+      accum->tx.p32x32[i][j] += counts->tx.p32x32[i][j];
+
+    for (j = 0; j < TX_SIZES - 1; j++)
+      accum->tx.p16x16[i][j] += counts->tx.p16x16[i][j];
+
+    for (j = 0; j < TX_SIZES - 2; j++)
+      accum->tx.p8x8[i][j] += counts->tx.p8x8[i][j];
+  }
+
+  for (i = 0; i < TX_SIZES; i++)
+    accum->tx.tx_totals[i] += counts->tx.tx_totals[i];
+
+  for (i = 0; i < SKIP_CONTEXTS; i++)
+    for (j = 0; j < 2; j++) accum->skip[i][j] += counts->skip[i][j];
+
+  for (i = 0; i < MV_JOINTS; i++) accum->mv.joints[i] += counts->mv.joints[i];
+
+  for (k = 0; k < 2; k++) {
+    nmv_component_counts *const comps = &accum->mv.comps[k];
+    const nmv_component_counts *const comps_t = &counts->mv.comps[k];
+
+    for (i = 0; i < 2; i++) {
+      comps->sign[i] += comps_t->sign[i];
+      comps->class0_hp[i] += comps_t->class0_hp[i];
+      comps->hp[i] += comps_t->hp[i];
+    }
+
+    for (i = 0; i < MV_CLASSES; i++) comps->classes[i] += comps_t->classes[i];
+
+    for (i = 0; i < CLASS0_SIZE; i++) {
+      comps->class0[i] += comps_t->class0[i];
+      for (j = 0; j < MV_FP_SIZE; j++)
+        comps->class0_fp[i][j] += comps_t->class0_fp[i][j];
+    }
+
+    for (i = 0; i < MV_OFFSET_BITS; i++)
+      for (j = 0; j < 2; j++) comps->bits[i][j] += comps_t->bits[i][j];
+
+    for (i = 0; i < MV_FP_SIZE; i++) comps->fp[i] += comps_t->fp[i];
+  }
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_thread_common.h b/media/libvpx/libvpx/vp9/common/vp9_thread_common.h
new file mode 100644
index 0000000000..5df0117f12
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_thread_common.h
@@ -0,0 +1,83 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_THREAD_COMMON_H_
+#define VPX_VP9_COMMON_VP9_THREAD_COMMON_H_
+#include "./vpx_config.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vpx_util/vpx_thread.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP9Common;
+struct FRAME_COUNTS;
+
+// Loopfilter row synchronization
+typedef struct VP9LfSyncData {
+#if CONFIG_MULTITHREAD
+  pthread_mutex_t *mutex;
+  pthread_cond_t *cond;
+#endif
+  // Allocate memory to store the loop-filtered superblock index in each row.
+  int *cur_sb_col;
+  // The optimal sync_range for different resolution and platform should be
+  // determined by testing. Currently, it is chosen to be a power-of-2 number.
+  int sync_range;
+  int rows;
+
+  // Row-based parallel loopfilter data
+  LFWorkerData *lfdata;
+  int num_workers;         // number of allocated workers.
+  int num_active_workers;  // number of scheduled workers.
+
+#if CONFIG_MULTITHREAD
+  pthread_mutex_t *lf_mutex;
+  pthread_mutex_t *recon_done_mutex;
+  pthread_cond_t *recon_done_cond;
+#endif
+  int *num_tiles_done;
+  int corrupted;
+} VP9LfSync;
+
+// Allocate memory for loopfilter row synchronization.
+void vp9_loop_filter_alloc(VP9LfSync *lf_sync, struct VP9Common *cm, int rows,
+                           int width, int num_workers);
+
+// Deallocate loopfilter synchronization related mutex and data.
+void vp9_loop_filter_dealloc(VP9LfSync *lf_sync);
+
+// Multi-threaded loopfilter that uses the tile threads.
+void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm,
+                              struct macroblockd_plane planes[MAX_MB_PLANE],
+                              int frame_filter_level, int y_only,
+                              int partial_frame, VPxWorker *workers,
+                              int num_workers, VP9LfSync *lf_sync);
+
+// Multi-threaded loopfilter initialisations
+void vp9_lpf_mt_init(VP9LfSync *lf_sync, struct VP9Common *cm,
+                     int frame_filter_level, int num_workers);
+
+void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync);
+
+void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row,
+                 int corrupted);
+
+void vp9_loopfilter_job(LFWorkerData *lf_data, VP9LfSync *lf_sync);
+
+void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
+                                 const struct FRAME_COUNTS *counts, int is_dec);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_THREAD_COMMON_H_
diff --git a/media/libvpx/libvpx/vp9/common/vp9_tile_common.c b/media/libvpx/libvpx/vp9/common/vp9_tile_common.c
new file mode 100644
index 0000000000..672f808adc
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_tile_common.c
@@ -0,0 +1,57 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_tile_common.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+#define MIN_TILE_WIDTH_B64 4
+#define MAX_TILE_WIDTH_B64 64
+
+static int get_tile_offset(int idx, int mis, int log2) {
+  const int sb_cols = mi_cols_aligned_to_sb(mis) >> MI_BLOCK_SIZE_LOG2;
+  const int offset = ((idx * sb_cols) >> log2) << MI_BLOCK_SIZE_LOG2;
+  return VPXMIN(offset, mis);
+}
+
+void vp9_tile_set_row(TileInfo *tile, const VP9_COMMON *cm, int row) {
+  tile->mi_row_start = get_tile_offset(row, cm->mi_rows, cm->log2_tile_rows);
+  tile->mi_row_end = get_tile_offset(row + 1, cm->mi_rows, cm->log2_tile_rows);
+}
+
+void vp9_tile_set_col(TileInfo *tile, const VP9_COMMON *cm, int col) {
+  tile->mi_col_start = get_tile_offset(col, cm->mi_cols, cm->log2_tile_cols);
+  tile->mi_col_end = get_tile_offset(col + 1, cm->mi_cols, cm->log2_tile_cols);
+}
+
+void vp9_tile_init(TileInfo *tile, const VP9_COMMON *cm, int row, int col) {
+  vp9_tile_set_row(tile, cm, row);
+  vp9_tile_set_col(tile, cm, col);
+}
+
+static int get_min_log2_tile_cols(const int sb64_cols) {
+  int min_log2 = 0;
+  while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols) ++min_log2;
+  return min_log2;
+}
+
+static int get_max_log2_tile_cols(const int sb64_cols) {
+  int max_log2 = 1;
+  while ((sb64_cols >> max_log2) >= MIN_TILE_WIDTH_B64) ++max_log2;
+  return max_log2 - 1;
+}
+
+void vp9_get_tile_n_bits(int mi_cols, int *min_log2_tile_cols,
+                         int *max_log2_tile_cols) {
+  const int sb64_cols = mi_cols_aligned_to_sb(mi_cols) >> MI_BLOCK_SIZE_LOG2;
+  *min_log2_tile_cols = get_min_log2_tile_cols(sb64_cols);
+  *max_log2_tile_cols = get_max_log2_tile_cols(sb64_cols);
+  assert(*min_log2_tile_cols <= *max_log2_tile_cols);
+}
diff --git a/media/libvpx/libvpx/vp9/common/vp9_tile_common.h b/media/libvpx/libvpx/vp9/common/vp9_tile_common.h
new file mode 100644
index 0000000000..4ccf0a3d5f
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/vp9_tile_common.h
@@ -0,0 +1,40 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_VP9_TILE_COMMON_H_
+#define VPX_VP9_COMMON_VP9_TILE_COMMON_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP9Common;
+
+typedef struct TileInfo {
+  int mi_row_start, mi_row_end;
+  int mi_col_start, mi_col_end;
+} TileInfo;
+
+// initializes 'tile->mi_(row|col)_(start|end)' for (row, col) based on
+// 'cm->log2_tile_(rows|cols)' & 'cm->mi_(rows|cols)'
+void vp9_tile_init(TileInfo *tile, const struct VP9Common *cm, int row,
+                   int col);
+
+void vp9_tile_set_row(TileInfo *tile, const struct VP9Common *cm, int row);
+void vp9_tile_set_col(TileInfo *tile, const struct VP9Common *cm, int col);
+
+void vp9_get_tile_n_bits(int mi_cols, int *min_log2_tile_cols,
+                         int *max_log2_tile_cols);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_COMMON_VP9_TILE_COMMON_H_
diff --git a/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c b/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c
new file mode 100644
index 0000000000..57b79a732d
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c
@@ -0,0 +1,419 @@
+/*
+ *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
+#include "vpx_dsp/x86/inv_txfm_sse2.h"
+#include "vpx_dsp/x86/transpose_sse2.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
+
+static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in,
+                                                      const int c,
+                                                      __m128i *const s) {
+  const __m128i pair_c = pair_set_epi32(4 * c, 0);
+  __m128i x[2];
+
+  extend_64bit(in, x);
+  s[0] = _mm_mul_epi32(pair_c, x[0]);
+  s[1] = _mm_mul_epi32(pair_c, x[1]);
+}
+
+static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0,
+                                                 const __m128i in1,
+                                                 const int c0, const int c1,
+                                                 __m128i *const s0,
+                                                 __m128i *const s1) {
+  const __m128i pair_c0 = pair_set_epi32(4 * c0, 0);
+  const __m128i pair_c1 = pair_set_epi32(4 * c1, 0);
+  __m128i t00[2], t01[2], t10[2], t11[2];
+  __m128i x0[2], x1[2];
+
+  extend_64bit(in0, x0);
+  extend_64bit(in1, x1);
+  t00[0] = _mm_mul_epi32(pair_c0, x0[0]);
+  t00[1] = _mm_mul_epi32(pair_c0, x0[1]);
+  t01[0] = _mm_mul_epi32(pair_c0, x1[0]);
+  t01[1] = _mm_mul_epi32(pair_c0, x1[1]);
+  t10[0] = _mm_mul_epi32(pair_c1, x0[0]);
+  t10[1] = _mm_mul_epi32(pair_c1, x0[1]);
+  t11[0] = _mm_mul_epi32(pair_c1, x1[0]);
+  t11[1] = _mm_mul_epi32(pair_c1, x1[1]);
+
+  s0[0] = _mm_add_epi64(t00[0], t11[0]);
+  s0[1] = _mm_add_epi64(t00[1], t11[1]);
+  s1[0] = _mm_sub_epi64(t10[0], t01[0]);
+  s1[1] = _mm_sub_epi64(t10[1], t01[1]);
+}
+
+static void highbd_iadst16_4col_sse4_1(__m128i *const io /*io[16]*/) {
+  __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2], s8[2], s9[2],
+      s10[2], s11[2], s12[2], s13[2], s14[2], s15[2];
+  __m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2], x8[2], x9[2],
+      x10[2], x11[2], x12[2], x13[2], x14[2], x15[2];
+
+  // stage 1
+  highbd_iadst_butterfly_sse4_1(io[15], io[0], cospi_1_64, cospi_31_64, s0, s1);
+  highbd_iadst_butterfly_sse4_1(io[13], io[2], cospi_5_64, cospi_27_64, s2, s3);
+  highbd_iadst_butterfly_sse4_1(io[11], io[4], cospi_9_64, cospi_23_64, s4, s5);
+  highbd_iadst_butterfly_sse4_1(io[9], io[6], cospi_13_64, cospi_19_64, s6, s7);
+  highbd_iadst_butterfly_sse4_1(io[7], io[8], cospi_17_64, cospi_15_64, s8, s9);
+  highbd_iadst_butterfly_sse4_1(io[5], io[10], cospi_21_64, cospi_11_64, s10,
+                                s11);
+  highbd_iadst_butterfly_sse4_1(io[3], io[12], cospi_25_64, cospi_7_64, s12,
+                                s13);
+  highbd_iadst_butterfly_sse4_1(io[1], io[14], cospi_29_64, cospi_3_64, s14,
+                                s15);
+
+  x0[0] = _mm_add_epi64(s0[0], s8[0]);
+  x0[1] = _mm_add_epi64(s0[1], s8[1]);
+  x1[0] = _mm_add_epi64(s1[0], s9[0]);
+  x1[1] = _mm_add_epi64(s1[1], s9[1]);
+  x2[0] = _mm_add_epi64(s2[0], s10[0]);
+  x2[1] = _mm_add_epi64(s2[1], s10[1]);
+  x3[0] = _mm_add_epi64(s3[0], s11[0]);
+  x3[1] = _mm_add_epi64(s3[1], s11[1]);
+  x4[0] = _mm_add_epi64(s4[0], s12[0]);
+  x4[1] = _mm_add_epi64(s4[1], s12[1]);
+  x5[0] = _mm_add_epi64(s5[0], s13[0]);
+  x5[1] = _mm_add_epi64(s5[1], s13[1]);
+  x6[0] = _mm_add_epi64(s6[0], s14[0]);
+  x6[1] = _mm_add_epi64(s6[1], s14[1]);
+  x7[0] = _mm_add_epi64(s7[0], s15[0]);
+  x7[1] = _mm_add_epi64(s7[1], s15[1]);
+  x8[0] = _mm_sub_epi64(s0[0], s8[0]);
+  x8[1] = _mm_sub_epi64(s0[1], s8[1]);
+  x9[0] = _mm_sub_epi64(s1[0], s9[0]);
+  x9[1] = _mm_sub_epi64(s1[1], s9[1]);
+  x10[0] = _mm_sub_epi64(s2[0], s10[0]);
+  x10[1] = _mm_sub_epi64(s2[1], s10[1]);
+  x11[0] = _mm_sub_epi64(s3[0], s11[0]);
+  x11[1] = _mm_sub_epi64(s3[1], s11[1]);
+  x12[0] = _mm_sub_epi64(s4[0], s12[0]);
+  x12[1] = _mm_sub_epi64(s4[1], s12[1]);
+  x13[0] = _mm_sub_epi64(s5[0], s13[0]);
+  x13[1] = _mm_sub_epi64(s5[1], s13[1]);
+  x14[0] = _mm_sub_epi64(s6[0], s14[0]);
+  x14[1] = _mm_sub_epi64(s6[1], s14[1]);
+  x15[0] = _mm_sub_epi64(s7[0], s15[0]);
+  x15[1] = _mm_sub_epi64(s7[1], s15[1]);
+
+  x0[0] = dct_const_round_shift_64bit(x0[0]);
+  x0[1] = dct_const_round_shift_64bit(x0[1]);
+  x1[0] = dct_const_round_shift_64bit(x1[0]);
+  x1[1] = dct_const_round_shift_64bit(x1[1]);
+  x2[0] = dct_const_round_shift_64bit(x2[0]);
+  x2[1] = dct_const_round_shift_64bit(x2[1]);
+  x3[0] = dct_const_round_shift_64bit(x3[0]);
+  x3[1] = dct_const_round_shift_64bit(x3[1]);
+  x4[0] = dct_const_round_shift_64bit(x4[0]);
+  x4[1] = dct_const_round_shift_64bit(x4[1]);
+  x5[0] = dct_const_round_shift_64bit(x5[0]);
+  x5[1] = dct_const_round_shift_64bit(x5[1]);
+  x6[0] = dct_const_round_shift_64bit(x6[0]);
+  x6[1] = dct_const_round_shift_64bit(x6[1]);
+  x7[0] = dct_const_round_shift_64bit(x7[0]);
+  x7[1] = dct_const_round_shift_64bit(x7[1]);
+  x8[0] = dct_const_round_shift_64bit(x8[0]);
+  x8[1] = dct_const_round_shift_64bit(x8[1]);
+  x9[0] = dct_const_round_shift_64bit(x9[0]);
+  x9[1] = dct_const_round_shift_64bit(x9[1]);
+  x10[0] = dct_const_round_shift_64bit(x10[0]);
+  x10[1] = dct_const_round_shift_64bit(x10[1]);
+  x11[0] = dct_const_round_shift_64bit(x11[0]);
+  x11[1] = dct_const_round_shift_64bit(x11[1]);
+  x12[0] = dct_const_round_shift_64bit(x12[0]);
+  x12[1] = dct_const_round_shift_64bit(x12[1]);
+  x13[0] = dct_const_round_shift_64bit(x13[0]);
+  x13[1] = dct_const_round_shift_64bit(x13[1]);
+  x14[0] = dct_const_round_shift_64bit(x14[0]);
+  x14[1] = dct_const_round_shift_64bit(x14[1]);
+  x15[0] = dct_const_round_shift_64bit(x15[0]);
+  x15[1] = dct_const_round_shift_64bit(x15[1]);
+  x0[0] = pack_4(x0[0], x0[1]);
+  x1[0] = pack_4(x1[0], x1[1]);
+  x2[0] = pack_4(x2[0], x2[1]);
+  x3[0] = pack_4(x3[0], x3[1]);
+  x4[0] = pack_4(x4[0], x4[1]);
+  x5[0] = pack_4(x5[0], x5[1]);
+  x6[0] = pack_4(x6[0], x6[1]);
+  x7[0] = pack_4(x7[0], x7[1]);
+  x8[0] = pack_4(x8[0], x8[1]);
+  x9[0] = pack_4(x9[0], x9[1]);
+  x10[0] = pack_4(x10[0], x10[1]);
+  x11[0] = pack_4(x11[0], x11[1]);
+  x12[0] = pack_4(x12[0], x12[1]);
+  x13[0] = pack_4(x13[0], x13[1]);
+  x14[0] = pack_4(x14[0], x14[1]);
+  x15[0] = pack_4(x15[0], x15[1]);
+
+  // stage 2
+  s0[0] = x0[0];
+  s1[0] = x1[0];
+  s2[0] = x2[0];
+  s3[0] = x3[0];
+  s4[0] = x4[0];
+  s5[0] = x5[0];
+  s6[0] = x6[0];
+  s7[0] = x7[0];
+  x0[0] = _mm_add_epi32(s0[0], s4[0]);
+  x1[0] = _mm_add_epi32(s1[0], s5[0]);
+  x2[0] = _mm_add_epi32(s2[0], s6[0]);
+  x3[0] = _mm_add_epi32(s3[0], s7[0]);
+  x4[0] = _mm_sub_epi32(s0[0], s4[0]);
+  x5[0] = _mm_sub_epi32(s1[0], s5[0]);
+  x6[0] = _mm_sub_epi32(s2[0], s6[0]);
+  x7[0] = _mm_sub_epi32(s3[0], s7[0]);
+
+  highbd_iadst_butterfly_sse4_1(x8[0], x9[0], cospi_4_64, cospi_28_64, s8, s9);
+  highbd_iadst_butterfly_sse4_1(x10[0], x11[0], cospi_20_64, cospi_12_64, s10,
+                                s11);
+  highbd_iadst_butterfly_sse4_1(x13[0], x12[0], cospi_28_64, cospi_4_64, s13,
+                                s12);
+  highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_12_64, cospi_20_64, s15,
+                                s14);
+
+  x8[0] = _mm_add_epi64(s8[0], s12[0]);
+  x8[1] = _mm_add_epi64(s8[1], s12[1]);
+  x9[0] = _mm_add_epi64(s9[0], s13[0]);
+  x9[1] = _mm_add_epi64(s9[1], s13[1]);
+  x10[0] = _mm_add_epi64(s10[0], s14[0]);
+  x10[1] = _mm_add_epi64(s10[1], s14[1]);
+  x11[0] = _mm_add_epi64(s11[0], s15[0]);
+  x11[1] = _mm_add_epi64(s11[1], s15[1]);
+  x12[0] = _mm_sub_epi64(s8[0], s12[0]);
+  x12[1] = _mm_sub_epi64(s8[1], s12[1]);
+  x13[0] = _mm_sub_epi64(s9[0], s13[0]);
+  x13[1] = _mm_sub_epi64(s9[1], s13[1]);
+  x14[0] = _mm_sub_epi64(s10[0], s14[0]);
+  x14[1] = _mm_sub_epi64(s10[1], s14[1]);
+  x15[0] = _mm_sub_epi64(s11[0], s15[0]);
+  x15[1] = _mm_sub_epi64(s11[1], s15[1]);
+  x8[0] = dct_const_round_shift_64bit(x8[0]);
+  x8[1] = dct_const_round_shift_64bit(x8[1]);
+  x9[0] = dct_const_round_shift_64bit(x9[0]);
+  x9[1] = dct_const_round_shift_64bit(x9[1]);
+  x10[0] = dct_const_round_shift_64bit(x10[0]);
+  x10[1] = dct_const_round_shift_64bit(x10[1]);
+  x11[0] = dct_const_round_shift_64bit(x11[0]);
+  x11[1] = dct_const_round_shift_64bit(x11[1]);
+  x12[0] = dct_const_round_shift_64bit(x12[0]);
+  x12[1] = dct_const_round_shift_64bit(x12[1]);
+  x13[0] = dct_const_round_shift_64bit(x13[0]);
+  x13[1] = dct_const_round_shift_64bit(x13[1]);
+  x14[0] = dct_const_round_shift_64bit(x14[0]);
+  x14[1] = dct_const_round_shift_64bit(x14[1]);
+  x15[0] = dct_const_round_shift_64bit(x15[0]);
+  x15[1] = dct_const_round_shift_64bit(x15[1]);
+  x8[0] = pack_4(x8[0], x8[1]);
+  x9[0] = pack_4(x9[0], x9[1]);
+  x10[0] = pack_4(x10[0], x10[1]);
+  x11[0] = pack_4(x11[0], x11[1]);
+  x12[0] = pack_4(x12[0], x12[1]);
+  x13[0] = pack_4(x13[0], x13[1]);
+  x14[0] = pack_4(x14[0], x14[1]);
+  x15[0] = pack_4(x15[0], x15[1]);
+
+  // stage 3
+  s0[0] = x0[0];
+  s1[0] = x1[0];
+  s2[0] = x2[0];
+  s3[0] = x3[0];
+  highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5);
+  highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6);
+  s8[0] = x8[0];
+  s9[0] = x9[0];
+  s10[0] = x10[0];
+  s11[0] = x11[0];
+  highbd_iadst_butterfly_sse4_1(x12[0], x13[0], cospi_8_64, cospi_24_64, s12,
+                                s13);
+  highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_24_64, cospi_8_64, s15,
+                                s14);
+
+  x0[0] = _mm_add_epi32(s0[0], s2[0]);
+  x1[0] = _mm_add_epi32(s1[0], s3[0]);
+  x2[0] = _mm_sub_epi32(s0[0], s2[0]);
+  x3[0] = _mm_sub_epi32(s1[0], s3[0]);
+  x4[0] = _mm_add_epi64(s4[0], s6[0]);
+  x4[1] = _mm_add_epi64(s4[1], s6[1]);
+  x5[0] = _mm_add_epi64(s5[0], s7[0]);
+  x5[1] = _mm_add_epi64(s5[1], s7[1]);
+  x6[0] = _mm_sub_epi64(s4[0], s6[0]);
+  x6[1] = _mm_sub_epi64(s4[1], s6[1]);
+  x7[0] = _mm_sub_epi64(s5[0], s7[0]);
+  x7[1] = _mm_sub_epi64(s5[1], s7[1]);
+  x4[0] = dct_const_round_shift_64bit(x4[0]);
+  x4[1] = dct_const_round_shift_64bit(x4[1]);
+  x5[0] = dct_const_round_shift_64bit(x5[0]);
+  x5[1] = dct_const_round_shift_64bit(x5[1]);
+  x6[0] = dct_const_round_shift_64bit(x6[0]);
+  x6[1] = dct_const_round_shift_64bit(x6[1]);
+  x7[0] = dct_const_round_shift_64bit(x7[0]);
+  x7[1] = dct_const_round_shift_64bit(x7[1]);
+  x4[0] = pack_4(x4[0], x4[1]);
+  x5[0] = pack_4(x5[0], x5[1]);
+  x6[0] = pack_4(x6[0], x6[1]);
+  x7[0] = pack_4(x7[0], x7[1]);
+  x8[0] = _mm_add_epi32(s8[0], s10[0]);
+  x9[0] = _mm_add_epi32(s9[0], s11[0]);
+  x10[0] = _mm_sub_epi32(s8[0], s10[0]);
+  x11[0] = _mm_sub_epi32(s9[0], s11[0]);
+  x12[0] = _mm_add_epi64(s12[0], s14[0]);
+  x12[1] = _mm_add_epi64(s12[1], s14[1]);
+  x13[0] = _mm_add_epi64(s13[0], s15[0]);
+  x13[1] = _mm_add_epi64(s13[1], s15[1]);
+  x14[0] = _mm_sub_epi64(s12[0], s14[0]);
+  x14[1] = _mm_sub_epi64(s12[1], s14[1]);
+  x15[0] = _mm_sub_epi64(s13[0], s15[0]);
+  x15[1] = _mm_sub_epi64(s13[1], s15[1]);
+  x12[0] = dct_const_round_shift_64bit(x12[0]);
+  x12[1] = dct_const_round_shift_64bit(x12[1]);
+  x13[0] = dct_const_round_shift_64bit(x13[0]);
+  x13[1] = dct_const_round_shift_64bit(x13[1]);
+  x14[0] = dct_const_round_shift_64bit(x14[0]);
+  x14[1] = dct_const_round_shift_64bit(x14[1]);
+  x15[0] = dct_const_round_shift_64bit(x15[0]);
+  x15[1] = dct_const_round_shift_64bit(x15[1]);
+  x12[0] = pack_4(x12[0], x12[1]);
+  x13[0] = pack_4(x13[0], x13[1]);
+  x14[0] = pack_4(x14[0], x14[1]);
+  x15[0] = pack_4(x15[0], x15[1]);
+
+  // stage 4
+  s2[0] = _mm_add_epi32(x2[0], x3[0]);
+  s3[0] = _mm_sub_epi32(x2[0], x3[0]);
+  s6[0] = _mm_add_epi32(x7[0], x6[0]);
+  s7[0] = _mm_sub_epi32(x7[0], x6[0]);
+  s10[0] = _mm_add_epi32(x11[0], x10[0]);
+  s11[0] = _mm_sub_epi32(x11[0], x10[0]);
+  s14[0] = _mm_add_epi32(x14[0], x15[0]);
+  s15[0] = _mm_sub_epi32(x14[0], x15[0]);
+  highbd_iadst_half_butterfly_sse4_1(s2[0], -cospi_16_64, s2);
+  highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3);
+  highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6);
+  highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7);
+  highbd_iadst_half_butterfly_sse4_1(s10[0], cospi_16_64, s10);
+  highbd_iadst_half_butterfly_sse4_1(s11[0], cospi_16_64, s11);
+  highbd_iadst_half_butterfly_sse4_1(s14[0], -cospi_16_64, s14);
+  highbd_iadst_half_butterfly_sse4_1(s15[0], cospi_16_64, s15);
+
+  x2[0] = dct_const_round_shift_64bit(s2[0]);
+  x2[1] = dct_const_round_shift_64bit(s2[1]);
+  x3[0] = dct_const_round_shift_64bit(s3[0]);
+  x3[1] = dct_const_round_shift_64bit(s3[1]);
+  x6[0] = dct_const_round_shift_64bit(s6[0]);
+  x6[1] = dct_const_round_shift_64bit(s6[1]);
+  x7[0] = dct_const_round_shift_64bit(s7[0]);
+  x7[1] = dct_const_round_shift_64bit(s7[1]);
+  x10[0] = dct_const_round_shift_64bit(s10[0]);
+  x10[1] = dct_const_round_shift_64bit(s10[1]);
+  x11[0] = dct_const_round_shift_64bit(s11[0]);
+  x11[1] = dct_const_round_shift_64bit(s11[1]);
+  x14[0] = dct_const_round_shift_64bit(s14[0]);
+  x14[1] = dct_const_round_shift_64bit(s14[1]);
+  x15[0] = dct_const_round_shift_64bit(s15[0]);
+  x15[1] = dct_const_round_shift_64bit(s15[1]);
+  x2[0] = pack_4(x2[0], x2[1]);
+  x3[0] = pack_4(x3[0], x3[1]);
+  x6[0] = pack_4(x6[0], x6[1]);
+  x7[0] = pack_4(x7[0], x7[1]);
+  x10[0] = pack_4(x10[0], x10[1]);
+  x11[0] = pack_4(x11[0], x11[1]);
+  x14[0] = pack_4(x14[0], x14[1]);
+  x15[0] = pack_4(x15[0], x15[1]);
+
+  io[0] = x0[0];
+  io[1] = _mm_sub_epi32(_mm_setzero_si128(), x8[0]);
+  io[2] = x12[0];
+  io[3] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]);
+  io[4] = x6[0];
+  io[5] = x14[0];
+  io[6] = x10[0];
+  io[7] = x2[0];
+  io[8] = x3[0];
+  io[9] = x11[0];
+  io[10] = x15[0];
+  io[11] = x7[0];
+  io[12] = x5[0];
+  io[13] = _mm_sub_epi32(_mm_setzero_si128(), x13[0]);
+  io[14] = x9[0];
+  io[15] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]);
+}
+
+void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t *input, uint16_t *dest,
+                                        int stride, int tx_type, int bd) {
+  int i;
+  __m128i out[16], *in;
+
+  if (bd == 8) {
+    __m128i l[16], r[16];
+
+    in = l;
+    for (i = 0; i < 2; i++) {
+      highbd_load_pack_transpose_32bit_8x8(&input[0], 16, &in[0]);
+      highbd_load_pack_transpose_32bit_8x8(&input[8], 16, &in[8]);
+      if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+        idct16_8col(in, in);
+      } else {
+        vpx_iadst16_8col_sse2(in);
+      }
+      in = r;
+      input += 128;
+    }
+
+    for (i = 0; i < 16; i += 8) {
+      int j;
+      transpose_16bit_8x8(l + i, out);
+      transpose_16bit_8x8(r + i, out + 8);
+      if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+        idct16_8col(out, out);
+      } else {
+        vpx_iadst16_8col_sse2(out);
+      }
+
+      for (j = 0; j < 16; ++j) {
+        highbd_write_buffer_8(dest + j * stride, out[j], bd);
+      }
+      dest += 8;
+    }
+  } else {
+    __m128i all[4][16];
+
+    for (i = 0; i < 4; i++) {
+      in = all[i];
+      highbd_load_transpose_32bit_8x4(&input[0], 16, &in[0]);
+      highbd_load_transpose_32bit_8x4(&input[8], 16, &in[8]);
+      if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+        vpx_highbd_idct16_4col_sse4_1(in);
+      } else {
+        highbd_iadst16_4col_sse4_1(in);
+      }
+      input += 4 * 16;
+    }
+
+    for (i = 0; i < 16; i += 4) {
+      int j;
+      transpose_32bit_4x4(all[0] + i, out + 0);
+      transpose_32bit_4x4(all[1] + i, out + 4);
+      transpose_32bit_4x4(all[2] + i, out + 8);
+      transpose_32bit_4x4(all[3] + i, out + 12);
+      if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+        vpx_highbd_idct16_4col_sse4_1(out);
+      } else {
+        highbd_iadst16_4col_sse4_1(out);
+      }
+
+      for (j = 0; j < 16; ++j) {
+        highbd_write_buffer_4(dest + j * stride, out[j], bd);
+      }
+      dest += 4;
+    }
+  }
+}
diff --git a/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c b/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c
new file mode 100644
index 0000000000..af158536f9
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c
@@ -0,0 +1,131 @@
+/*
+ *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
+#include "vpx_dsp/x86/inv_txfm_sse2.h"
+#include "vpx_dsp/x86/transpose_sse2.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
+
+static INLINE void highbd_iadst4_sse4_1(__m128i *const io) {
+  const __m128i pair_c1 = pair_set_epi32(4 * sinpi_1_9, 0);
+  const __m128i pair_c2 = pair_set_epi32(4 * sinpi_2_9, 0);
+  const __m128i pair_c3 = pair_set_epi32(4 * sinpi_3_9, 0);
+  const __m128i pair_c4 = pair_set_epi32(4 * sinpi_4_9, 0);
+  __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], t0[2], t1[2], t2[2];
+  __m128i temp[2];
+
+  transpose_32bit_4x4(io, io);
+
+  extend_64bit(io[0], temp);
+  s0[0] = _mm_mul_epi32(pair_c1, temp[0]);
+  s0[1] = _mm_mul_epi32(pair_c1, temp[1]);
+  s1[0] = _mm_mul_epi32(pair_c2, temp[0]);
+  s1[1] = _mm_mul_epi32(pair_c2, temp[1]);
+
+  extend_64bit(io[1], temp);
+  s2[0] = _mm_mul_epi32(pair_c3, temp[0]);
+  s2[1] = _mm_mul_epi32(pair_c3, temp[1]);
+
+  extend_64bit(io[2], temp);
+  s3[0] = _mm_mul_epi32(pair_c4, temp[0]);
+  s3[1] = _mm_mul_epi32(pair_c4, temp[1]);
+  s4[0] = _mm_mul_epi32(pair_c1, temp[0]);
+  s4[1] = _mm_mul_epi32(pair_c1, temp[1]);
+
+  extend_64bit(io[3], temp);
+  s5[0] = _mm_mul_epi32(pair_c2, temp[0]);
+  s5[1] = _mm_mul_epi32(pair_c2, temp[1]);
+  s6[0] = _mm_mul_epi32(pair_c4, temp[0]);
+  s6[1] = _mm_mul_epi32(pair_c4, temp[1]);
+
+  t0[0] = _mm_add_epi64(s0[0], s3[0]);
+  t0[1] = _mm_add_epi64(s0[1], s3[1]);
+  t0[0] = _mm_add_epi64(t0[0], s5[0]);
+  t0[1] = _mm_add_epi64(t0[1], s5[1]);
+  t1[0] = _mm_sub_epi64(s1[0], s4[0]);
+  t1[1] = _mm_sub_epi64(s1[1], s4[1]);
+  t1[0] = _mm_sub_epi64(t1[0], s6[0]);
+  t1[1] = _mm_sub_epi64(t1[1], s6[1]);
+  temp[0] = _mm_sub_epi32(io[0], io[2]);
+  temp[0] = _mm_add_epi32(temp[0], io[3]);
+  extend_64bit(temp[0], temp);
+  t2[0] = _mm_mul_epi32(pair_c3, temp[0]);
+  t2[1] = _mm_mul_epi32(pair_c3, temp[1]);
+
+  s0[0] = _mm_add_epi64(t0[0], s2[0]);
+  s0[1] = _mm_add_epi64(t0[1], s2[1]);
+  s1[0] = _mm_add_epi64(t1[0], s2[0]);
+  s1[1] = _mm_add_epi64(t1[1], s2[1]);
+  s3[0] = _mm_add_epi64(t0[0], t1[0]);
+  s3[1] = _mm_add_epi64(t0[1], t1[1]);
+  s3[0] = _mm_sub_epi64(s3[0], s2[0]);
+  s3[1] = _mm_sub_epi64(s3[1], s2[1]);
+
+  s0[0] = dct_const_round_shift_64bit(s0[0]);
+  s0[1] = dct_const_round_shift_64bit(s0[1]);
+  s1[0] = dct_const_round_shift_64bit(s1[0]);
+  s1[1] = dct_const_round_shift_64bit(s1[1]);
+  s2[0] = dct_const_round_shift_64bit(t2[0]);
+  s2[1] = dct_const_round_shift_64bit(t2[1]);
+  s3[0] = dct_const_round_shift_64bit(s3[0]);
+  s3[1] = dct_const_round_shift_64bit(s3[1]);
+  io[0] = pack_4(s0[0], s0[1]);
+  io[1] = pack_4(s1[0], s1[1]);
+  io[2] = pack_4(s2[0], s2[1]);
+  io[3] = pack_4(s3[0], s3[1]);
+}
+
+void vp9_highbd_iht4x4_16_add_sse4_1(const tran_low_t *input, uint16_t *dest,
+                                     int stride, int tx_type, int bd) {
+  __m128i io[4];
+
+  io[0] = _mm_load_si128((const __m128i *)(input + 0));
+  io[1] = _mm_load_si128((const __m128i *)(input + 4));
+  io[2] = _mm_load_si128((const __m128i *)(input + 8));
+  io[3] = _mm_load_si128((const __m128i *)(input + 12));
+
+  if (bd == 8) {
+    __m128i io_short[2];
+
+    io_short[0] = _mm_packs_epi32(io[0], io[1]);
+    io_short[1] = _mm_packs_epi32(io[2], io[3]);
+    if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+      idct4_sse2(io_short);
+    } else {
+      iadst4_sse2(io_short);
+    }
+    if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+      idct4_sse2(io_short);
+    } else {
+      iadst4_sse2(io_short);
+    }
+    io_short[0] = _mm_add_epi16(io_short[0], _mm_set1_epi16(8));
+    io_short[1] = _mm_add_epi16(io_short[1], _mm_set1_epi16(8));
+    io[0] = _mm_srai_epi16(io_short[0], 4);
+    io[1] = _mm_srai_epi16(io_short[1], 4);
+  } else {
+    if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+      highbd_idct4_sse4_1(io);
+    } else {
+      highbd_iadst4_sse4_1(io);
+    }
+    if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+      highbd_idct4_sse4_1(io);
+    } else {
+      highbd_iadst4_sse4_1(io);
+    }
+    io[0] = wraplow_16bit_shift4(io[0], io[1], _mm_set1_epi32(8));
+    io[1] = wraplow_16bit_shift4(io[2], io[3], _mm_set1_epi32(8));
+  }
+
+  recon_and_store_4x4(io, dest, stride, bd);
+}
diff --git a/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c b/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c
new file mode 100644
index 0000000000..7d949b6dbc
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c
@@ -0,0 +1,255 @@
+/*
+ *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
+#include "vpx_dsp/x86/inv_txfm_sse2.h"
+#include "vpx_dsp/x86/transpose_sse2.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
+
+static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in,
+                                                      const int c,
+                                                      __m128i *const s) {
+  const __m128i pair_c = pair_set_epi32(4 * c, 0);
+  __m128i x[2];
+
+  extend_64bit(in, x);
+  s[0] = _mm_mul_epi32(pair_c, x[0]);
+  s[1] = _mm_mul_epi32(pair_c, x[1]);
+}
+
+static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0,
+                                                 const __m128i in1,
+                                                 const int c0, const int c1,
+                                                 __m128i *const s0,
+                                                 __m128i *const s1) {
+  const __m128i pair_c0 = pair_set_epi32(4 * c0, 0);
+  const __m128i pair_c1 = pair_set_epi32(4 * c1, 0);
+  __m128i t00[2], t01[2], t10[2], t11[2];
+  __m128i x0[2], x1[2];
+
+  extend_64bit(in0, x0);
+  extend_64bit(in1, x1);
+  t00[0] = _mm_mul_epi32(pair_c0, x0[0]);
+  t00[1] = _mm_mul_epi32(pair_c0, x0[1]);
+  t01[0] = _mm_mul_epi32(pair_c0, x1[0]);
+  t01[1] = _mm_mul_epi32(pair_c0, x1[1]);
+  t10[0] = _mm_mul_epi32(pair_c1, x0[0]);
+  t10[1] = _mm_mul_epi32(pair_c1, x0[1]);
+  t11[0] = _mm_mul_epi32(pair_c1, x1[0]);
+  t11[1] = _mm_mul_epi32(pair_c1, x1[1]);
+
+  s0[0] = _mm_add_epi64(t00[0], t11[0]);
+  s0[1] = _mm_add_epi64(t00[1], t11[1]);
+  s1[0] = _mm_sub_epi64(t10[0], t01[0]);
+  s1[1] = _mm_sub_epi64(t10[1], t01[1]);
+}
+
+static void highbd_iadst8_sse4_1(__m128i *const io) {
+  __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
+  __m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2];
+
+  transpose_32bit_4x4x2(io, io);
+
+  // stage 1
+  highbd_iadst_butterfly_sse4_1(io[7], io[0], cospi_2_64, cospi_30_64, s0, s1);
+  highbd_iadst_butterfly_sse4_1(io[3], io[4], cospi_18_64, cospi_14_64, s4, s5);
+  x0[0] = _mm_add_epi64(s0[0], s4[0]);
+  x0[1] = _mm_add_epi64(s0[1], s4[1]);
+  x1[0] = _mm_add_epi64(s1[0], s5[0]);
+  x1[1] = _mm_add_epi64(s1[1], s5[1]);
+  x4[0] = _mm_sub_epi64(s0[0], s4[0]);
+  x4[1] = _mm_sub_epi64(s0[1], s4[1]);
+  x5[0] = _mm_sub_epi64(s1[0], s5[0]);
+  x5[1] = _mm_sub_epi64(s1[1], s5[1]);
+
+  highbd_iadst_butterfly_sse4_1(io[5], io[2], cospi_10_64, cospi_22_64, s2, s3);
+  highbd_iadst_butterfly_sse4_1(io[1], io[6], cospi_26_64, cospi_6_64, s6, s7);
+  x2[0] = _mm_add_epi64(s2[0], s6[0]);
+  x2[1] = _mm_add_epi64(s2[1], s6[1]);
+  x3[0] = _mm_add_epi64(s3[0], s7[0]);
+  x3[1] = _mm_add_epi64(s3[1], s7[1]);
+  x6[0] = _mm_sub_epi64(s2[0], s6[0]);
+  x6[1] = _mm_sub_epi64(s2[1], s6[1]);
+  x7[0] = _mm_sub_epi64(s3[0], s7[0]);
+  x7[1] = _mm_sub_epi64(s3[1], s7[1]);
+
+  x0[0] = dct_const_round_shift_64bit(x0[0]);
+  x0[1] = dct_const_round_shift_64bit(x0[1]);
+  x1[0] = dct_const_round_shift_64bit(x1[0]);
+  x1[1] = dct_const_round_shift_64bit(x1[1]);
+  x2[0] = dct_const_round_shift_64bit(x2[0]);
+  x2[1] = dct_const_round_shift_64bit(x2[1]);
+  x3[0] = dct_const_round_shift_64bit(x3[0]);
+  x3[1] = dct_const_round_shift_64bit(x3[1]);
+  x4[0] = dct_const_round_shift_64bit(x4[0]);
+  x4[1] = dct_const_round_shift_64bit(x4[1]);
+  x5[0] = dct_const_round_shift_64bit(x5[0]);
+  x5[1] = dct_const_round_shift_64bit(x5[1]);
+  x6[0] = dct_const_round_shift_64bit(x6[0]);
+  x6[1] = dct_const_round_shift_64bit(x6[1]);
+  x7[0] = dct_const_round_shift_64bit(x7[0]);
+  x7[1] = dct_const_round_shift_64bit(x7[1]);
+  s0[0] = pack_4(x0[0], x0[1]);  // s0 = x0;
+  s1[0] = pack_4(x1[0], x1[1]);  // s1 = x1;
+  s2[0] = pack_4(x2[0], x2[1]);  // s2 = x2;
+  s3[0] = pack_4(x3[0], x3[1]);  // s3 = x3;
+  x4[0] = pack_4(x4[0], x4[1]);
+  x5[0] = pack_4(x5[0], x5[1]);
+  x6[0] = pack_4(x6[0], x6[1]);
+  x7[0] = pack_4(x7[0], x7[1]);
+
+  // stage 2
+  x0[0] = _mm_add_epi32(s0[0], s2[0]);
+  x1[0] = _mm_add_epi32(s1[0], s3[0]);
+  x2[0] = _mm_sub_epi32(s0[0], s2[0]);
+  x3[0] = _mm_sub_epi32(s1[0], s3[0]);
+
+  highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5);
+  highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6);
+
+  x4[0] = _mm_add_epi64(s4[0], s6[0]);
+  x4[1] = _mm_add_epi64(s4[1], s6[1]);
+  x5[0] = _mm_add_epi64(s5[0], s7[0]);
+  x5[1] = _mm_add_epi64(s5[1], s7[1]);
+  x6[0] = _mm_sub_epi64(s4[0], s6[0]);
+  x6[1] = _mm_sub_epi64(s4[1], s6[1]);
+  x7[0] = _mm_sub_epi64(s5[0], s7[0]);
+  x7[1] = _mm_sub_epi64(s5[1], s7[1]);
+  x4[0] = dct_const_round_shift_64bit(x4[0]);
+  x4[1] = dct_const_round_shift_64bit(x4[1]);
+  x5[0] = dct_const_round_shift_64bit(x5[0]);
+  x5[1] = dct_const_round_shift_64bit(x5[1]);
+  x6[0] = dct_const_round_shift_64bit(x6[0]);
+  x6[1] = dct_const_round_shift_64bit(x6[1]);
+  x7[0] = dct_const_round_shift_64bit(x7[0]);
+  x7[1] = dct_const_round_shift_64bit(x7[1]);
+  x4[0] = pack_4(x4[0], x4[1]);
+  x5[0] = pack_4(x5[0], x5[1]);
+  x6[0] = pack_4(x6[0], x6[1]);
+  x7[0] = pack_4(x7[0], x7[1]);
+
+  // stage 3
+  s2[0] = _mm_add_epi32(x2[0], x3[0]);
+  s3[0] = _mm_sub_epi32(x2[0], x3[0]);
+  s6[0] = _mm_add_epi32(x6[0], x7[0]);
+  s7[0] = _mm_sub_epi32(x6[0], x7[0]);
+  highbd_iadst_half_butterfly_sse4_1(s2[0], cospi_16_64, s2);
+  highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3);
+  highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6);
+  highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7);
+
+  x2[0] = dct_const_round_shift_64bit(s2[0]);
+  x2[1] = dct_const_round_shift_64bit(s2[1]);
+  x3[0] = dct_const_round_shift_64bit(s3[0]);
+  x3[1] = dct_const_round_shift_64bit(s3[1]);
+  x6[0] = dct_const_round_shift_64bit(s6[0]);
+  x6[1] = dct_const_round_shift_64bit(s6[1]);
+  x7[0] = dct_const_round_shift_64bit(s7[0]);
+  x7[1] = dct_const_round_shift_64bit(s7[1]);
+  x2[0] = pack_4(x2[0], x2[1]);
+  x3[0] = pack_4(x3[0], x3[1]);
+  x6[0] = pack_4(x6[0], x6[1]);
+  x7[0] = pack_4(x7[0], x7[1]);
+
+  io[0] = x0[0];
+  io[1] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]);
+  io[2] = x6[0];
+  io[3] = _mm_sub_epi32(_mm_setzero_si128(), x2[0]);
+  io[4] = x3[0];
+  io[5] = _mm_sub_epi32(_mm_setzero_si128(), x7[0]);
+  io[6] = x5[0];
+  io[7] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]);
+}
+
+void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest,
+                                     int stride, int tx_type, int bd) {
+  __m128i io[16];
+
+  io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0));
+  io[4] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 4));
+  io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0));
+  io[5] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 4));
+  io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0));
+  io[6] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 4));
+  io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0));
+  io[7] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 4));
+  io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0));
+  io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4));
+  io[9] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 0));
+  io[13] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 4));
+  io[10] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 0));
+  io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4));
+  io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0));
+  io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4));
+
+  if (bd == 8) {
+    __m128i io_short[8];
+
+    io_short[0] = _mm_packs_epi32(io[0], io[4]);
+    io_short[1] = _mm_packs_epi32(io[1], io[5]);
+    io_short[2] = _mm_packs_epi32(io[2], io[6]);
+    io_short[3] = _mm_packs_epi32(io[3], io[7]);
+    io_short[4] = _mm_packs_epi32(io[8], io[12]);
+    io_short[5] = _mm_packs_epi32(io[9], io[13]);
+    io_short[6] = _mm_packs_epi32(io[10], io[14]);
+    io_short[7] = _mm_packs_epi32(io[11], io[15]);
+
+    if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+      vpx_idct8_sse2(io_short);
+    } else {
+      iadst8_sse2(io_short);
+    }
+    if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+      vpx_idct8_sse2(io_short);
+    } else {
+      iadst8_sse2(io_short);
+    }
+    round_shift_8x8(io_short, io);
+  } else {
+    __m128i temp[4];
+
+    if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+      vpx_highbd_idct8x8_half1d_sse4_1(io);
+      vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
+    } else {
+      highbd_iadst8_sse4_1(io);
+      highbd_iadst8_sse4_1(&io[8]);
+    }
+
+    temp[0] = io[4];
+    temp[1] = io[5];
+    temp[2] = io[6];
+    temp[3] = io[7];
+    io[4] = io[8];
+    io[5] = io[9];
+    io[6] = io[10];
+    io[7] = io[11];
+
+    if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+      vpx_highbd_idct8x8_half1d_sse4_1(io);
+      io[8] = temp[0];
+      io[9] = temp[1];
+      io[10] = temp[2];
+      io[11] = temp[3];
+      vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
+    } else {
+      highbd_iadst8_sse4_1(io);
+      io[8] = temp[0];
+      io[9] = temp[1];
+      io[10] = temp[2];
+      io[11] = temp[3];
+      highbd_iadst8_sse4_1(&io[8]);
+    }
+    highbd_idct8x8_final_round(io);
+  }
+  recon_and_store_8x8(io, dest, stride, bd);
+}
diff --git a/media/libvpx/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/media/libvpx/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
new file mode 100644
index 0000000000..ad693718c0
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -0,0 +1,224 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vpx_dsp/x86/inv_txfm_sse2.h"
+
+void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
+                            int tx_type) {
+  __m128i in[2];
+  const __m128i eight = _mm_set1_epi16(8);
+
+  in[0] = load_input_data8(input);
+  in[1] = load_input_data8(input + 8);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      idct4_sse2(in);
+      idct4_sse2(in);
+      break;
+    case ADST_DCT:
+      idct4_sse2(in);
+      iadst4_sse2(in);
+      break;
+    case DCT_ADST:
+      iadst4_sse2(in);
+      idct4_sse2(in);
+      break;
+    default:
+      assert(tx_type == ADST_ADST);
+      iadst4_sse2(in);
+      iadst4_sse2(in);
+      break;
+  }
+
+  // Final round and shift
+  in[0] = _mm_add_epi16(in[0], eight);
+  in[1] = _mm_add_epi16(in[1], eight);
+
+  in[0] = _mm_srai_epi16(in[0], 4);
+  in[1] = _mm_srai_epi16(in[1], 4);
+
+  recon_and_store4x4_sse2(in, dest, stride);
+}
+
+void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
+                            int tx_type) {
+  __m128i in[8];
+  const __m128i final_rounding = _mm_set1_epi16(1 << 4);
+
+  // load input data
+  in[0] = load_input_data8(input);
+  in[1] = load_input_data8(input + 8 * 1);
+  in[2] = load_input_data8(input + 8 * 2);
+  in[3] = load_input_data8(input + 8 * 3);
+  in[4] = load_input_data8(input + 8 * 4);
+  in[5] = load_input_data8(input + 8 * 5);
+  in[6] = load_input_data8(input + 8 * 6);
+  in[7] = load_input_data8(input + 8 * 7);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      vpx_idct8_sse2(in);
+      vpx_idct8_sse2(in);
+      break;
+    case ADST_DCT:
+      vpx_idct8_sse2(in);
+      iadst8_sse2(in);
+      break;
+    case DCT_ADST:
+      iadst8_sse2(in);
+      vpx_idct8_sse2(in);
+      break;
+    default:
+      assert(tx_type == ADST_ADST);
+      iadst8_sse2(in);
+      iadst8_sse2(in);
+      break;
+  }
+
+  // Final rounding and shift
+  in[0] = _mm_adds_epi16(in[0], final_rounding);
+  in[1] = _mm_adds_epi16(in[1], final_rounding);
+  in[2] = _mm_adds_epi16(in[2], final_rounding);
+  in[3] = _mm_adds_epi16(in[3], final_rounding);
+  in[4] = _mm_adds_epi16(in[4], final_rounding);
+  in[5] = _mm_adds_epi16(in[5], final_rounding);
+  in[6] = _mm_adds_epi16(in[6], final_rounding);
+  in[7] = _mm_adds_epi16(in[7], final_rounding);
+
+  in[0] = _mm_srai_epi16(in[0], 5);
+  in[1] = _mm_srai_epi16(in[1], 5);
+  in[2] = _mm_srai_epi16(in[2], 5);
+  in[3] = _mm_srai_epi16(in[3], 5);
+  in[4] = _mm_srai_epi16(in[4], 5);
+  in[5] = _mm_srai_epi16(in[5], 5);
+  in[6] = _mm_srai_epi16(in[6], 5);
+  in[7] = _mm_srai_epi16(in[7], 5);
+
+  recon_and_store(dest + 0 * stride, in[0]);
+  recon_and_store(dest + 1 * stride, in[1]);
+  recon_and_store(dest + 2 * stride, in[2]);
+  recon_and_store(dest + 3 * stride, in[3]);
+  recon_and_store(dest + 4 * stride, in[4]);
+  recon_and_store(dest + 5 * stride, in[5]);
+  recon_and_store(dest + 6 * stride, in[6]);
+  recon_and_store(dest + 7 * stride, in[7]);
+}
+
+static INLINE void load_buffer_8x16(const tran_low_t *const input,
+                                    __m128i *const in) {
+  in[0] = load_input_data8(input + 0 * 16);
+  in[1] = load_input_data8(input + 1 * 16);
+  in[2] = load_input_data8(input + 2 * 16);
+  in[3] = load_input_data8(input + 3 * 16);
+  in[4] = load_input_data8(input + 4 * 16);
+  in[5] = load_input_data8(input + 5 * 16);
+  in[6] = load_input_data8(input + 6 * 16);
+  in[7] = load_input_data8(input + 7 * 16);
+
+  in[8] = load_input_data8(input + 8 * 16);
+  in[9] = load_input_data8(input + 9 * 16);
+  in[10] = load_input_data8(input + 10 * 16);
+  in[11] = load_input_data8(input + 11 * 16);
+  in[12] = load_input_data8(input + 12 * 16);
+  in[13] = load_input_data8(input + 13 * 16);
+  in[14] = load_input_data8(input + 14 * 16);
+  in[15] = load_input_data8(input + 15 * 16);
+}
+
+static INLINE void write_buffer_8x16(uint8_t *const dest, __m128i *const in,
+                                     const int stride) {
+  const __m128i final_rounding = _mm_set1_epi16(1 << 5);
+  // Final rounding and shift
+  in[0] = _mm_adds_epi16(in[0], final_rounding);
+  in[1] = _mm_adds_epi16(in[1], final_rounding);
+  in[2] = _mm_adds_epi16(in[2], final_rounding);
+  in[3] = _mm_adds_epi16(in[3], final_rounding);
+  in[4] = _mm_adds_epi16(in[4], final_rounding);
+  in[5] = _mm_adds_epi16(in[5], final_rounding);
+  in[6] = _mm_adds_epi16(in[6], final_rounding);
+  in[7] = _mm_adds_epi16(in[7], final_rounding);
+  in[8] = _mm_adds_epi16(in[8], final_rounding);
+  in[9] = _mm_adds_epi16(in[9], final_rounding);
+  in[10] = _mm_adds_epi16(in[10], final_rounding);
+  in[11] = _mm_adds_epi16(in[11], final_rounding);
+  in[12] = _mm_adds_epi16(in[12], final_rounding);
+  in[13] = _mm_adds_epi16(in[13], final_rounding);
+  in[14] = _mm_adds_epi16(in[14], final_rounding);
+  in[15] = _mm_adds_epi16(in[15], final_rounding);
+
+  in[0] = _mm_srai_epi16(in[0], 6);
+  in[1] = _mm_srai_epi16(in[1], 6);
+  in[2] = _mm_srai_epi16(in[2], 6);
+  in[3] = _mm_srai_epi16(in[3], 6);
+  in[4] = _mm_srai_epi16(in[4], 6);
+  in[5] = _mm_srai_epi16(in[5], 6);
+  in[6] = _mm_srai_epi16(in[6], 6);
+  in[7] = _mm_srai_epi16(in[7], 6);
+  in[8] = _mm_srai_epi16(in[8], 6);
+  in[9] = _mm_srai_epi16(in[9], 6);
+  in[10] = _mm_srai_epi16(in[10], 6);
+  in[11] = _mm_srai_epi16(in[11], 6);
+  in[12] = _mm_srai_epi16(in[12], 6);
+  in[13] = _mm_srai_epi16(in[13], 6);
+  in[14] = _mm_srai_epi16(in[14], 6);
+  in[15] = _mm_srai_epi16(in[15], 6);
+
+  recon_and_store(dest + 0 * stride, in[0]);
+  recon_and_store(dest + 1 * stride, in[1]);
+  recon_and_store(dest + 2 * stride, in[2]);
+  recon_and_store(dest + 3 * stride, in[3]);
+  recon_and_store(dest + 4 * stride, in[4]);
+  recon_and_store(dest + 5 * stride, in[5]);
+  recon_and_store(dest + 6 * stride, in[6]);
+  recon_and_store(dest + 7 * stride, in[7]);
+  recon_and_store(dest + 8 * stride, in[8]);
+  recon_and_store(dest + 9 * stride, in[9]);
+  recon_and_store(dest + 10 * stride, in[10]);
+  recon_and_store(dest + 11 * stride, in[11]);
+  recon_and_store(dest + 12 * stride, in[12]);
+  recon_and_store(dest + 13 * stride, in[13]);
+  recon_and_store(dest + 14 * stride, in[14]);
+  recon_and_store(dest + 15 * stride, in[15]);
+}
+
+void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
+                               int stride, int tx_type) {
+  __m128i in0[16], in1[16];
+
+  load_buffer_8x16(input, in0);
+  input += 8;
+  load_buffer_8x16(input, in1);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      idct16_sse2(in0, in1);
+      idct16_sse2(in0, in1);
+      break;
+    case ADST_DCT:
+      idct16_sse2(in0, in1);
+      iadst16_sse2(in0, in1);
+      break;
+    case DCT_ADST:
+      iadst16_sse2(in0, in1);
+      idct16_sse2(in0, in1);
+      break;
+    default:
+      assert(tx_type == ADST_ADST);
+      iadst16_sse2(in0, in1);
+      iadst16_sse2(in0, in1);
+      break;
+  }
+
+  write_buffer_8x16(dest, in0, stride);
+  dest += 8;
+  write_buffer_8x16(dest, in1, stride);
+}
diff --git a/media/libvpx/libvpx/vp9/common/x86/vp9_mfqe_sse2.asm b/media/libvpx/libvpx/vp9/common/x86/vp9_mfqe_sse2.asm
new file mode 100644
index 0000000000..ae7c94ea3f
--- /dev/null
+++ b/media/libvpx/libvpx/vp9/common/x86/vp9_mfqe_sse2.asm
@@ -0,0 +1,289 @@
+;
+;  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+;  This file is a duplicate of mfqe_sse2.asm in VP8.
+;  TODO(jackychen): Find a way to fix the duplicate.
+%include "vpx_ports/x86_abi_support.asm"
+
+SECTION .text
+
+;void vp9_filter_by_weight16x16_sse2
+;(
+;    unsigned char *src,
+;    int            src_stride,
+;    unsigned char *dst,
+;    int            dst_stride,
+;    int            src_weight
+;)
+globalsym(vp9_filter_by_weight16x16_sse2)
+sym(vp9_filter_by_weight16x16_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    SAVE_XMM 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movd        xmm0, arg(4)                ; src_weight
+    pshuflw     xmm0, xmm0, 0x0             ; replicate to all low words
+    punpcklqdq  xmm0, xmm0                  ; replicate to all hi words
+
+    movdqa      xmm1, [GLOBAL(tMFQE)]
+    psubw       xmm1, xmm0                  ; dst_weight
+
+    mov         rax, arg(0)                 ; src
+    mov         rsi, arg(1)                 ; src_stride
+    mov         rdx, arg(2)                 ; dst
+    mov         rdi, arg(3)                 ; dst_stride
+
+    mov         rcx, 16                     ; loop count
+    pxor        xmm6, xmm6
+
+.combine:
+    movdqa      xmm2, [rax]
+    movdqa      xmm4, [rdx]
+    add         rax, rsi
+
+    ; src * src_weight
+    movdqa      xmm3, xmm2
+    punpcklbw   xmm2, xmm6
+    punpckhbw   xmm3, xmm6
+    pmullw      xmm2, xmm0
+    pmullw      xmm3, xmm0
+
+    ; dst * dst_weight
+    movdqa      xmm5, xmm4
+    punpcklbw   xmm4, xmm6
+    punpckhbw   xmm5, xmm6
+    pmullw      xmm4, xmm1
+    pmullw      xmm5, xmm1
+
+    ; sum, round and shift
+    paddw       xmm2, xmm4
+    paddw       xmm3, xmm5
+    paddw       xmm2, [GLOBAL(tMFQE_round)]
+    paddw       xmm3, [GLOBAL(tMFQE_round)]
+    psrlw       xmm2, 4
+    psrlw       xmm3, 4
+
+    packuswb    xmm2, xmm3
+    movdqa      [rdx], xmm2
+    add         rdx, rdi
+
+    dec         rcx
+    jnz         .combine
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+
+    ret
+
+;void vp9_filter_by_weight8x8_sse2
+;(
+;    unsigned char *src,
+;    int            src_stride,
+;    unsigned char *dst,
+;    int            dst_stride,
+;    int            src_weight
+;)
+globalsym(vp9_filter_by_weight8x8_sse2)
+sym(vp9_filter_by_weight8x8_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movd        xmm0, arg(4)                ; src_weight
+    pshuflw     xmm0, xmm0, 0x0             ; replicate to all low words
+    punpcklqdq  xmm0, xmm0                  ; replicate to all hi words
+
+    movdqa      xmm1, [GLOBAL(tMFQE)]
+    psubw       xmm1, xmm0                  ; dst_weight
+
+    mov         rax, arg(0)                 ; src
+    mov         rsi, arg(1)                 ; src_stride
+    mov         rdx, arg(2)                 ; dst
+    mov         rdi, arg(3)                 ; dst_stride
+
+    mov         rcx, 8                      ; loop count
+    pxor        xmm4, xmm4
+
+.combine:
+    movq        xmm2, [rax]
+    movq        xmm3, [rdx]
+    add         rax, rsi
+
+    ; src * src_weight
+    punpcklbw   xmm2, xmm4
+    pmullw      xmm2, xmm0
+
+    ; dst * dst_weight
+    punpcklbw   xmm3, xmm4
+    pmullw      xmm3, xmm1
+
+    ; sum, round and shift
+    paddw       xmm2, xmm3
+    paddw       xmm2, [GLOBAL(tMFQE_round)]
+    psrlw       xmm2, 4
+
+    packuswb    xmm2, xmm4
+    movq        [rdx], xmm2
+    add         rdx, rdi
+
+    dec         rcx
+    jnz         .combine
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+
+    ret
+
+;void vp9_variance_and_sad_16x16_sse2 | arg
+;(
+;    unsigned char *src1,          0
+;    int            stride1,       1
+;    unsigned char *src2,          2
+;    int            stride2,       3
+;    unsigned int  *variance,      4
+;    unsigned int  *sad,           5
+;)
+globalsym(vp9_variance_and_sad_16x16_sse2)
+sym(vp9_variance_and_sad_16x16_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    mov         rax,        arg(0)          ; src1
+    mov         rcx,        arg(1)          ; stride1
+    mov         rdx,        arg(2)          ; src2
+    mov         rdi,        arg(3)          ; stride2
+
+    mov         rsi,        16              ; block height
+
+    ; Prep accumulator registers
+    pxor        xmm3, xmm3                  ; SAD
+    pxor        xmm4, xmm4                  ; sum of src2
+    pxor        xmm5, xmm5                  ; sum of src2^2
+
+    ; Because we're working with the actual output frames
+    ; we can't depend on any kind of data alignment.
+.accumulate:
+    movdqa      xmm0, [rax]                 ; src1
+    movdqa      xmm1, [rdx]                 ; src2
+    add         rax, rcx                    ; src1 + stride1
+    add         rdx, rdi                    ; src2 + stride2
+
+    ; SAD(src1, src2)
+    psadbw      xmm0, xmm1
+    paddusw     xmm3, xmm0
+
+    ; SUM(src2)
+    pxor        xmm2, xmm2
+    psadbw      xmm2, xmm1                  ; sum src2 by misusing SAD against 0
+    paddusw     xmm4, xmm2
+
+    ; pmaddubsw would be ideal if it took two unsigned values. instead,
+    ; it expects a signed and an unsigned value. so instead we zero extend
+    ; and operate on words.
+    pxor        xmm2, xmm2
+    movdqa      xmm0, xmm1
+    punpcklbw   xmm0, xmm2
+    punpckhbw   xmm1, xmm2
+    pmaddwd     xmm0, xmm0
+    pmaddwd     xmm1, xmm1
+    paddd       xmm5, xmm0
+    paddd       xmm5, xmm1
+
+    sub         rsi,        1
+    jnz         .accumulate
+
+    ; phaddd only operates on adjacent double words.
+    ; Finalize SAD and store
+    movdqa      xmm0, xmm3
+    psrldq      xmm0, 8
+    paddusw     xmm0, xmm3
+    paddd       xmm0, [GLOBAL(t128)]
+    psrld       xmm0, 8
+
+    mov         rax,  arg(5)
+    movd        [rax], xmm0
+
+    ; Accumulate sum of src2
+    movdqa      xmm0, xmm4
+    psrldq      xmm0, 8
+    paddusw     xmm0, xmm4
+    ; Square src2. Ignore high value
+    pmuludq     xmm0, xmm0
+    psrld       xmm0, 8
+
+    ; phaddw could be used to sum adjacent values but we want
+    ; all the values summed. promote to doubles, accumulate,
+    ; shift and sum
+    pxor        xmm2, xmm2
+    movdqa      xmm1, xmm5
+    punpckldq   xmm1, xmm2
+    punpckhdq   xmm5, xmm2
+    paddd       xmm1, xmm5
+    movdqa      xmm2, xmm1
+    psrldq      xmm1, 8
+    paddd       xmm1, xmm2
+
+    psubd       xmm1, xmm0
+
+    ; (variance + 128) >> 8
+    paddd       xmm1, [GLOBAL(t128)]
+    psrld       xmm1, 8
+    mov         rax,  arg(4)
+
+    movd        [rax], xmm1
+
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+SECTION_RODATA
+align 16
+t128:
+%ifndef __NASM_VER__
+    ddq 128
+%elif CONFIG_BIG_ENDIAN
+    dq  0, 128
+%else
+    dq  128, 0
+%endif
+align 16
+tMFQE: ; 1 << MFQE_PRECISION
+    times 8 dw 0x10
+align 16
+tMFQE_round: ; 1 << (MFQE_PRECISION - 1)
+    times 8 dw 0x08