335 files changed, 117431 insertions, 0 deletions
diff --git a/media/ffvpx/libavcodec/aarch64/fft_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/fft_init_aarch64.c
new file mode 100644
index 0000000000..77f5607960
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/fft_init_aarch64.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+
+#include "libavcodec/fft.h"
+
+void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
+
+void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+
+av_cold void ff_fft_init_aarch64(FFTContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        if (s->nbits < 17) {
+            s->fft_permute = ff_fft_permute_neon;
+            s->fft_calc    = ff_fft_calc_neon;
+        }
+#if CONFIG_MDCT
+        s->imdct_calc   = ff_imdct_calc_neon;
+        s->imdct_half   = ff_imdct_half_neon;
+        s->mdct_calc    = ff_mdct_calc_neon;
+        s->mdct_permutation = FF_MDCT_PERM_INTERLEAVE;
+#endif
+    }
+}
diff --git a/media/ffvpx/libavcodec/aarch64/fft_neon.S b/media/ffvpx/libavcodec/aarch64/fft_neon.S
new file mode 100644
index 0000000000..d7225511dd
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/fft_neon.S
@@ -0,0 +1,447 @@
+/*
+ * ARM NEON optimised FFT
+ *
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2009 Naotoshi Nojiri
+ * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
+ *
+ * This algorithm (though not any of the implementation details) is
+ * based on libdjbfft by D. J. Bernstein.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+#define M_SQRT1_2 0.70710678118654752440
+
+.macro transpose d0, d1, s0, s1
+        trn1            \d0, \s0, \s1
+        trn2            \d1, \s0, \s1
+.endm
+
+
+function fft4_neon
+        AARCH64_VALID_JUMP_TARGET
+        ld1             {v0.2s,v1.2s,v2.2s,v3.2s}, [x0]
+
+        fadd            v4.2s,  v0.2s,  v1.2s   // r0+r1,i0+i1
+        fsub            v6.2s,  v0.2s,  v1.2s   // r0-r1,i0-i1
+
+        ext             v16.8b, v2.8b,  v3.8b,  #4
+        ext             v17.8b, v3.8b,  v2.8b,  #4
+
+        fadd            v5.2s,  v2.2s,  v3.2s   // i2+i3,r2+r3
+        fsub            v7.2s,  v16.2s, v17.2s  // r3-r2,i2-i3
+
+        fadd            v0.2s,  v4.2s,  v5.2s
+        fsub            v2.2s,  v4.2s,  v5.2s
+        fadd            v1.2s,  v6.2s,  v7.2s
+        fsub            v3.2s,  v6.2s,  v7.2s
+
+        st1             {v0.2s,v1.2s,v2.2s,v3.2s}, [x0]
+
+        ret
+endfunc
+
+function fft8_neon
+        AARCH64_VALID_JUMP_TARGET
+        mov             x1,  x0
+        ld1             {v0.2s, v1.2s, v2.2s, v3.2s},  [x0], #32
+        ld1             {v16.2s,v17.2s,v18.2s,v19.2s}, [x0]
+        ext             v22.8b, v2.8b,  v3.8b,  #4
+        ext             v23.8b, v3.8b,  v2.8b,  #4
+        fadd            v4.2s,  v16.2s, v17.2s           // r4+r5,i4+i5
+        fadd            v5.2s,  v18.2s, v19.2s           // r6+r7,i6+i7
+        fsub            v17.2s, v16.2s, v17.2s           // r4-r5,i4-i5
+        fsub            v19.2s, v18.2s, v19.2s           // r6-r7,i6-i7
+        rev64           v27.2s, v28.2s  // ???
+        fadd            v20.2s, v0.2s,  v1.2s            // r0+r1,i0+i1
+        fadd            v21.2s, v2.2s,  v3.2s            // r2+r3,i2+i3
+        fmul            v26.2s, v17.2s, v28.2s           // -a2r*w,a2i*w
+        ext             v6.8b,  v4.8b,  v5.8b,  #4
+        ext             v7.8b,  v5.8b,  v4.8b,  #4
+        fmul            v27.2s, v19.2s, v27.2s           // a3r*w,-a3i*w
+        fsub            v23.2s, v22.2s, v23.2s           // i2-i3,r3-r2
+        fsub            v22.2s, v0.2s,  v1.2s            // r0-r1,i0-i1
+        fmul            v24.2s, v17.2s, v28.s[1]         // a2r*w,a2i*w
+        fmul            v25.2s, v19.2s, v28.s[1]         // a3r*w,a3i*w
+        fadd            v0.2s,  v20.2s, v21.2s
+        fsub            v2.2s,  v20.2s, v21.2s
+        fadd            v1.2s,  v22.2s, v23.2s
+        rev64           v26.2s, v26.2s
+        rev64           v27.2s, v27.2s
+        fsub            v3.2s,  v22.2s, v23.2s
+        fsub            v6.2s,  v6.2s,  v7.2s
+        fadd            v24.2s, v24.2s, v26.2s  // a2r+a2i,a2i-a2r   t1,t2
+        fadd            v25.2s, v25.2s, v27.2s  // a3r-a3i,a3i+a3r   t5,t6
+        fadd            v7.2s,  v4.2s,  v5.2s
+        fsub            v18.2s, v2.2s,  v6.2s
+        ext             v26.8b, v24.8b, v25.8b, #4
+        ext             v27.8b, v25.8b, v24.8b, #4
+        fadd            v2.2s,  v2.2s,  v6.2s
+        fsub            v16.2s, v0.2s,  v7.2s
+        fadd            v5.2s,  v25.2s, v24.2s
+        fsub            v4.2s,  v26.2s, v27.2s
+        fadd            v0.2s,  v0.2s,  v7.2s
+        fsub            v17.2s, v1.2s,  v5.2s
+        fsub            v19.2s, v3.2s,  v4.2s
+        fadd            v3.2s,  v3.2s,  v4.2s
+        fadd            v1.2s,  v1.2s,  v5.2s
+
+        st1             {v16.2s,v17.2s,v18.2s,v19.2s}, [x0]
+        st1             {v0.2s, v1.2s, v2.2s, v3.2s},  [x1]
+
+        ret
+endfunc
+
+function fft16_neon
+        AARCH64_VALID_JUMP_TARGET
+        mov             x1,  x0
+        ld1             {v0.2s, v1.2s, v2.2s, v3.2s},  [x0], #32
+        ld1             {v16.2s,v17.2s,v18.2s,v19.2s}, [x0], #32
+        ext             v22.8b, v2.8b,  v3.8b,  #4
+        ext             v23.8b, v3.8b,  v2.8b,  #4
+        fadd            v4.2s,  v16.2s, v17.2s           // r4+r5,i4+i5
+        fadd            v5.2s,  v18.2s, v19.2s           // r6+r7,i6+i7
+        fsub            v17.2s, v16.2s, v17.2s           // r4-r5,i4-i5
+        fsub            v19.2s, v18.2s, v19.2s           // r6-r7,i6-i7
+        rev64           v27.2s, v28.2s  // ???
+        fadd            v20.2s, v0.2s,  v1.2s            // r0+r1,i0+i1
+        fadd            v21.2s, v2.2s,  v3.2s            // r2+r3,i2+i3
+        fmul            v26.2s, v17.2s, v28.2s           // -a2r*w,a2i*w
+        ext             v6.8b,  v4.8b,  v5.8b,  #4
+        ext             v7.8b,  v5.8b,  v4.8b,  #4
+        fmul            v27.2s, v19.2s, v27.2s           // a3r*w,-a3i*w
+        fsub            v23.2s, v22.2s, v23.2s           // i2-i3,r3-r2
+        fsub            v22.2s, v0.2s,  v1.2s            // r0-r1,i0-i1
+        fmul            v24.2s, v17.2s, v28.s[1]         // a2r*w,a2i*w
+        fmul            v25.2s, v19.2s, v28.s[1]         // a3r*w,a3i*w
+        fadd            v0.2s,  v20.2s, v21.2s
+        fsub            v2.2s,  v20.2s, v21.2s
+        fadd            v1.2s,  v22.2s, v23.2s
+        rev64           v26.2s, v26.2s
+        rev64           v27.2s, v27.2s
+        fsub            v3.2s,  v22.2s, v23.2s
+        fsub            v6.2s,  v6.2s,  v7.2s
+        fadd            v24.2s, v24.2s, v26.2s  // a2r+a2i,a2i-a2r   t1,t2
+        fadd            v25.2s, v25.2s, v27.2s  // a3r-a3i,a3i+a3r   t5,t6
+        fadd            v7.2s,  v4.2s,  v5.2s
+        fsub            v18.2s, v2.2s,  v6.2s
+        ld1             {v20.4s,v21.4s}, [x0], #32
+        ld1             {v22.4s,v23.4s}, [x0], #32
+        ext             v26.8b, v24.8b, v25.8b, #4
+        ext             v27.8b, v25.8b, v24.8b, #4
+        fadd            v2.2s,  v2.2s,  v6.2s
+        fsub            v16.2s, v0.2s,  v7.2s
+        fadd            v5.2s,  v25.2s, v24.2s
+        fsub            v4.2s,  v26.2s, v27.2s
+        transpose       v24.2d, v25.2d, v20.2d, v22.2d
+        transpose       v26.2d, v27.2d, v21.2d, v23.2d
+        fadd            v0.2s,  v0.2s,  v7.2s
+        fsub            v17.2s, v1.2s,  v5.2s
+        fsub            v19.2s, v3.2s,  v4.2s
+        fadd            v3.2s,  v3.2s,  v4.2s
+        fadd            v1.2s,  v1.2s,  v5.2s
+        ext             v20.16b, v21.16b, v21.16b,  #4
+        ext             v21.16b, v23.16b, v23.16b,  #4
+
+        zip1            v0.2d,  v0.2d,  v1.2d   // {z[0],   z[1]}
+        zip1            v1.2d,  v2.2d,  v3.2d   // {z[2],   z[3]}
+        zip1            v2.2d,  v16.2d, v17.2d  // {z[o1],  z[o1+1]}
+        zip1            v3.2d,  v18.2d, v19.2d  // {z[o1+2],z[o1+3]}
+
+        // 2 x fft4
+        transpose       v22.2d, v23.2d, v20.2d, v21.2d
+
+        fadd            v4.4s,  v24.4s, v25.4s
+        fadd            v5.4s,  v26.4s, v27.4s
+        fsub            v6.4s,  v24.4s, v25.4s
+        fsub            v7.4s,  v22.4s, v23.4s
+
+        ld1             {v23.4s},  [x14]
+
+        fadd            v24.4s, v4.4s,  v5.4s   // {z[o2+0],z[o2+1]}
+        fsub            v26.4s, v4.4s,  v5.4s   // {z[o2+2],z[o2+3]}
+        fadd            v25.4s, v6.4s,  v7.4s   // {z[o3+0],z[o3+1]}
+        fsub            v27.4s, v6.4s,  v7.4s   // {z[o3+2],z[o3+3]}
+
+        //fft_pass_neon_16
+        rev64           v7.4s,  v25.4s
+        fmul            v25.4s, v25.4s, v23.s[1]
+        fmul            v7.4s,  v7.4s,  v29.4s
+        fmla            v25.4s, v7.4s,  v23.s[3] // {t1a,t2a,t5a,t6a}
+
+        zip1            v20.4s, v24.4s, v25.4s
+        zip2            v21.4s, v24.4s, v25.4s
+        fneg            v22.4s, v20.4s
+        fadd            v4.4s,  v21.4s, v20.4s
+        fsub            v6.4s,  v20.4s, v21.4s  // just the second half
+        fadd            v5.4s,  v21.4s, v22.4s  // just the first half
+
+        tbl             v4.16b, {v4.16b},        v30.16b // trans4_float
+        tbl             v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
+
+        fsub            v20.4s, v0.4s,  v4.4s   // {z[o2],z[o2+1]}
+        fadd            v16.4s, v0.4s,  v4.4s   // {z[0], z[1]}
+        fsub            v22.4s, v2.4s,  v5.4s   // {z[o3],z[o3+1]}
+        fadd            v18.4s, v2.4s,  v5.4s   // {z[o1],z[o1+1]}
+
+//second half
+        rev64           v6.4s,  v26.4s
+        fmul            v26.4s, v26.4s, v23.s[2]
+        rev64           v7.4s,  v27.4s
+        fmul            v27.4s, v27.4s, v23.s[3]
+        fmul            v6.4s,  v6.4s,  v29.4s
+        fmul            v7.4s,  v7.4s,  v29.4s
+        fmla            v26.4s, v6.4s,  v23.s[2] // {t1,t2,t5,t6}
+        fmla            v27.4s, v7.4s,  v23.s[1] // {t1a,t2a,t5a,t6a}
+
+        zip1            v24.4s, v26.4s, v27.4s
+        zip2            v25.4s, v26.4s, v27.4s
+        fneg            v26.4s, v24.4s
+        fadd            v4.4s,  v25.4s, v24.4s
+        fsub            v6.4s,  v24.4s, v25.4s  // just the second half
+        fadd            v5.4s,  v25.4s, v26.4s  // just the first half
+
+        tbl             v4.16b, {v4.16b},        v30.16b // trans4_float
+        tbl             v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
+
+        fadd            v17.4s, v1.4s, v4.4s    // {z[2], z[3]}
+        fsub            v21.4s, v1.4s, v4.4s    // {z[o2+2],z[o2+3]}
+        fadd            v19.4s, v3.4s, v5.4s    // {z[o1+2],z[o1+3]}
+        fsub            v23.4s, v3.4s, v5.4s    // {z[o3+2],z[o3+3]}
+
+        st1             {v16.4s,v17.4s}, [x1], #32
+        st1             {v18.4s,v19.4s}, [x1], #32
+        st1             {v20.4s,v21.4s}, [x1], #32
+        st1             {v22.4s,v23.4s}, [x1], #32
+
+        ret
+endfunc
+
+
+const  trans4_float, align=4
+        .byte    0,  1,  2,  3
+        .byte    8,  9, 10, 11
+        .byte    4,  5,  6,  7
+        .byte   12, 13, 14, 15
+endconst
+
+const  trans8_float, align=4
+        .byte   24, 25, 26, 27
+        .byte    0,  1,  2,  3
+        .byte   28, 29, 30, 31
+        .byte    4,  5,  6,  7
+endconst
+
+function fft_pass_neon
+        sub             x6,  x2,  #1            // n - 1, loop counter
+        lsl             x5,  x2,  #3            // 2 * n * sizeof FFTSample
+        lsl             x1,  x2,  #4            // 2 * n * sizeof FFTComplex
+        add             x5,  x4,  x5            // wim
+        add             x3,  x1,  x2,  lsl #5   // 4 * n * sizeof FFTComplex
+        add             x2,  x0,  x2,  lsl #5   // &z[o2]
+        add             x3,  x0,  x3            // &z[o3]
+        add             x1,  x0,  x1            // &z[o1]
+        ld1             {v20.4s},[x2]           // {z[o2],z[o2+1]}
+        ld1             {v22.4s},[x3]           // {z[o3],z[o3+1]}
+        ld1             {v4.2s},  [x4], #8      // {wre[0],wre[1]}
+        trn2            v25.2d, v20.2d, v22.2d
+        sub             x5,  x5,  #4            // wim--
+        trn1            v24.2d, v20.2d, v22.2d
+        ld1             {v5.s}[0],  [x5], x7    // d5[0] = wim[-1]
+        rev64           v7.4s,  v25.4s
+        fmul            v25.4s, v25.4s, v4.s[1]
+        ld1             {v16.4s}, [x0]          // {z[0],z[1]}
+        fmul            v7.4s,  v7.4s,  v29.4s
+        ld1             {v17.4s}, [x1]          // {z[o1],z[o1+1]}
+        prfm            pldl1keep, [x2, #16]
+        prfm            pldl1keep, [x3, #16]
+        fmla            v25.4s, v7.4s,  v5.s[0] // {t1a,t2a,t5a,t6a}
+        prfm            pldl1keep, [x0, #16]
+        prfm            pldl1keep, [x1, #16]
+
+        zip1            v20.4s, v24.4s, v25.4s
+        zip2            v21.4s, v24.4s, v25.4s
+        fneg            v22.4s, v20.4s
+        fadd            v4.4s,  v21.4s, v20.4s
+        fsub            v6.4s,  v20.4s, v21.4s  // just the second half
+        fadd            v5.4s,  v21.4s, v22.4s  // just the first half
+
+        tbl             v4.16b, {v4.16b},        v30.16b // trans4_float
+        tbl             v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
+
+        fadd            v20.4s, v16.4s, v4.4s
+        fsub            v22.4s, v16.4s, v4.4s
+        fadd            v21.4s, v17.4s, v5.4s
+        st1             {v20.4s}, [x0], #16     // {z[0], z[1]}
+        fsub            v23.4s, v17.4s, v5.4s
+
+        st1             {v21.4s}, [x1], #16     // {z[o1],z[o1+1]}
+        st1             {v22.4s}, [x2], #16     // {z[o2],z[o2+1]}
+        st1             {v23.4s}, [x3], #16     // {z[o3],z[o3+1]}
+1:
+        ld1             {v20.4s},[x2]    // {z[o2],z[o2+1]}
+        ld1             {v22.4s},[x3]    // {z[o3],z[o3+1]}
+        ld1             {v4.2s}, [x4], #8       // {wre[0],wre[1]}
+        transpose       v26.2d, v27.2d, v20.2d, v22.2d
+        ld1             {v5.2s}, [x5], x7       // {wim[-1],wim[0]}
+        rev64           v6.4s,  v26.4s
+        fmul            v26.4s, v26.4s, v4.s[0]
+        rev64           v7.4s,  v27.4s
+        fmul            v27.4s, v27.4s, v4.s[1]
+        fmul            v6.4s,  v6.4s,  v29.4s
+        fmul            v7.4s,  v7.4s,  v29.4s
+        ld1             {v16.4s},[x0]           // {z[0],z[1]}
+        fmla            v26.4s, v6.4s,  v5.s[1] // {t1,t2,t5,t6}
+        fmla            v27.4s, v7.4s,  v5.s[0] // {t1a,t2a,t5a,t6a}
+        ld1             {v17.4s},[x1]           // {z[o1],z[o1+1]}
+
+        subs            x6,  x6,  #1            // n--
+
+        zip1            v20.4s, v26.4s, v27.4s
+        zip2            v21.4s, v26.4s, v27.4s
+        fneg            v22.4s, v20.4s
+        fadd            v4.4s,  v21.4s, v20.4s
+        fsub            v6.4s,  v20.4s, v21.4s  // just the second half
+        fadd            v5.4s,  v21.4s, v22.4s  // just the first half
+
+        tbl             v4.16b, {v4.16b},        v30.16b // trans4_float
+        tbl             v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
+
+        fadd            v20.4s, v16.4s, v4.4s
+        fsub            v22.4s, v16.4s, v4.4s
+        fadd            v21.4s, v17.4s, v5.4s
+        st1             {v20.4s}, [x0], #16     // {z[0], z[1]}
+        fsub            v23.4s, v17.4s, v5.4s
+
+        st1             {v21.4s}, [x1], #16     // {z[o1],z[o1+1]}
+        st1             {v22.4s}, [x2], #16     // {z[o2],z[o2+1]}
+        st1             {v23.4s}, [x3], #16     // {z[o3],z[o3+1]}
+        b.ne            1b
+
+        ret
+endfunc
+
+.macro  def_fft n, n2, n4
+function fft\n\()_neon, align=6
+        AARCH64_VALID_JUMP_TARGET
+        AARCH64_SIGN_LINK_REGISTER
+        stp             x28, x30, [sp, #-16]!
+        add             x28, x0,  #\n4*2*8
+        bl              fft\n2\()_neon
+        mov             x0,  x28
+        bl              fft\n4\()_neon
+        add             x0,  x28, #\n4*1*8
+        bl              fft\n4\()_neon
+        sub             x0,  x28, #\n4*2*8
+        ldp             x28, x30, [sp], #16
+        AARCH64_VALIDATE_LINK_REGISTER
+        movrel          x4,  X(ff_cos_\n)
+        mov             x2,  #\n4>>1
+        b               fft_pass_neon
+endfunc
+.endm
+
+        def_fft    32,    16,     8
+        def_fft    64,    32,    16
+        def_fft   128,    64,    32
+        def_fft   256,   128,    64
+        def_fft   512,   256,   128
+        def_fft  1024,   512,   256
+        def_fft  2048,  1024,   512
+        def_fft  4096,  2048,  1024
+        def_fft  8192,  4096,  2048
+        def_fft 16384,  8192,  4096
+        def_fft 32768, 16384,  8192
+        def_fft 65536, 32768, 16384
+
+function ff_fft_calc_neon, export=1
+        prfm            pldl1keep, [x1]
+        movrel          x10, trans4_float
+        ldr             w2,  [x0]
+        movrel          x11, trans8_float
+        sub             w2,  w2,  #2
+        movrel          x3,  fft_tab_neon
+        ld1             {v30.16b}, [x10]
+        mov             x7,  #-8
+        movrel          x12, pmmp
+        ldr             x3,  [x3, x2, lsl #3]
+        movrel          x13, mppm
+        movrel          x14, X(ff_cos_16)
+        ld1             {v31.16b}, [x11]
+        mov             x0,  x1
+        ld1             {v29.4s},  [x12]         // pmmp
+        ld1             {v28.4s},  [x13]
+        br              x3
+endfunc
+
+function ff_fft_permute_neon, export=1
+        mov             x6,  #1
+        ldr             w2,  [x0]       // nbits
+        ldr             x3,  [x0, #16]  // tmp_buf
+        ldr             x0,  [x0, #8]   // revtab
+        lsl             x6,  x6, x2
+        mov             x2,  x6
+1:
+        ld1             {v0.2s,v1.2s}, [x1], #16
+        ldr             w4,  [x0], #4
+        uxth            w5,  w4
+        lsr             w4,  w4,  #16
+        add             x5,  x3,  x5,  lsl #3
+        add             x4,  x3,  x4,  lsl #3
+        st1             {v0.2s}, [x5]
+        st1             {v1.2s}, [x4]
+        subs            x6,  x6, #2
+        b.gt            1b
+
+        sub             x1,  x1,  x2,  lsl #3
+1:
+        ld1             {v0.4s,v1.4s}, [x3], #32
+        st1             {v0.4s,v1.4s}, [x1], #32
+        subs            x2,  x2,  #4
+        b.gt            1b
+
+        ret
+endfunc
+
+const   fft_tab_neon, relocate=1
+        .quad fft4_neon
+        .quad fft8_neon
+        .quad fft16_neon
+        .quad fft32_neon
+        .quad fft64_neon
+        .quad fft128_neon
+        .quad fft256_neon
+        .quad fft512_neon
+        .quad fft1024_neon
+        .quad fft2048_neon
+        .quad fft4096_neon
+        .quad fft8192_neon
+        .quad fft16384_neon
+        .quad fft32768_neon
+        .quad fft65536_neon
+endconst
+
+const   pmmp, align=4
+        .float          +1.0, -1.0, -1.0, +1.0
+endconst
+
+const   mppm, align=4
+        .float          -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
+endconst
diff --git a/media/ffvpx/libavcodec/aarch64/h264chroma_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/h264chroma_init_aarch64.c
new file mode 100644
index 0000000000..00fc7b20f1
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/h264chroma_init_aarch64.c
@@ -0,0 +1,59 @@
+/*
+ * ARM NEON optimised H.264 chroma functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/h264chroma.h"
+
+#include "config.h"
+
+void ff_put_h264_chroma_mc8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+void ff_put_h264_chroma_mc4_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+void ff_put_h264_chroma_mc2_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+
+void ff_avg_h264_chroma_mc8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+void ff_avg_h264_chroma_mc4_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+void ff_avg_h264_chroma_mc2_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+
+av_cold void ff_h264chroma_init_aarch64(H264ChromaContext *c, int bit_depth)
+{
+    const int high_bit_depth = bit_depth > 8;
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags) && !high_bit_depth) {
+        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
+        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
+        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
+
+        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
+        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
+        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
+    }
+}
diff --git a/media/ffvpx/libavcodec/aarch64/h264cmc_neon.S b/media/ffvpx/libavcodec/aarch64/h264cmc_neon.S
new file mode 100644
index 0000000000..88ccd727d0
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/h264cmc_neon.S
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config_components.h"
+
+#include "libavutil/aarch64/asm.S"
+
+/* chroma_mc8(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h, int x, int y) */
+.macro  h264_chroma_mc8 type, codec=h264
+function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
+  .ifc \type,avg
+        mov             x8,  x0
+  .endif
+        prfm            pldl1strm, [x1]
+        prfm            pldl1strm, [x1, x2]
+  .ifc \codec,rv40
+        movrel          x6,  rv40bias
+        lsr             w9,  w5,  #1
+        lsr             w10, w4,  #1
+        lsl             w9,  w9,  #3
+        lsl             w10, w10, #1
+        add             w9,  w9,  w10
+        add             x6,  x6,  w9, UXTW
+        ld1r            {v22.8H}, [x6]
+  .endif
+  .ifc \codec,vc1
+        movi            v22.8H,   #28
+  .endif
+        mul             w7,  w4,  w5
+        lsl             w14, w5,  #3
+        lsl             w13, w4,  #3
+        cmp             w7,  #0
+        sub             w6,  w14, w7
+        sub             w12, w13, w7
+        sub             w4,  w7,  w13
+        sub             w4,  w4,  w14
+        add             w4,  w4,  #64
+        b.eq            2f
+
+        dup             v0.8B,  w4
+        dup             v1.8B,  w12
+        ld1             {v4.8B, v5.8B}, [x1], x2
+        dup             v2.8B,  w6
+        dup             v3.8B,  w7
+        ext             v5.8B,  v4.8B,  v5.8B,  #1
+1:      ld1             {v6.8B, v7.8B}, [x1], x2
+        umull           v16.8H, v4.8B,  v0.8B
+        umlal           v16.8H, v5.8B,  v1.8B
+        ext             v7.8B,  v6.8B,  v7.8B,  #1
+        ld1             {v4.8B, v5.8B}, [x1], x2
+        umlal           v16.8H, v6.8B,  v2.8B
+        prfm            pldl1strm, [x1]
+        ext             v5.8B,  v4.8B,  v5.8B,  #1
+        umlal           v16.8H, v7.8B,  v3.8B
+        umull           v17.8H, v6.8B,  v0.8B
+        subs            w3,  w3,  #2
+        umlal           v17.8H, v7.8B, v1.8B
+        umlal           v17.8H, v4.8B, v2.8B
+        umlal           v17.8H, v5.8B, v3.8B
+        prfm            pldl1strm, [x1, x2]
+  .ifc \codec,h264
+        rshrn           v16.8B, v16.8H, #6
+        rshrn           v17.8B, v17.8H, #6
+  .else
+        add             v16.8H, v16.8H, v22.8H
+        add             v17.8H, v17.8H, v22.8H
+        shrn            v16.8B, v16.8H, #6
+        shrn            v17.8B, v17.8H, #6
+  .endif
+  .ifc \type,avg
+        ld1             {v20.8B}, [x8], x2
+        ld1             {v21.8B}, [x8], x2
+        urhadd          v16.8B, v16.8B, v20.8B
+        urhadd          v17.8B, v17.8B, v21.8B
+  .endif
+        st1             {v16.8B}, [x0], x2
+        st1             {v17.8B}, [x0], x2
+        b.gt            1b
+        ret
+
+2:      adds            w12, w12, w6
+        dup             v0.8B, w4
+        b.eq            5f
+        tst             w6,  w6
+        dup             v1.8B, w12
+        b.eq            4f
+
+        ld1             {v4.8B}, [x1], x2
+3:      ld1             {v6.8B}, [x1], x2
+        umull           v16.8H, v4.8B,  v0.8B
+        umlal           v16.8H, v6.8B,  v1.8B
+        ld1             {v4.8B}, [x1], x2
+        umull           v17.8H, v6.8B,  v0.8B
+        umlal           v17.8H, v4.8B,  v1.8B
+        prfm            pldl1strm, [x1]
+  .ifc \codec,h264
+        rshrn           v16.8B, v16.8H, #6
+        rshrn           v17.8B, v17.8H, #6
+  .else
+        add             v16.8H, v16.8H, v22.8H
+        add             v17.8H, v17.8H, v22.8H
+        shrn            v16.8B, v16.8H, #6
+        shrn            v17.8B, v17.8H, #6
+  .endif
+        prfm            pldl1strm, [x1, x2]
+  .ifc \type,avg
+        ld1             {v20.8B}, [x8], x2
+        ld1             {v21.8B}, [x8], x2
+        urhadd          v16.8B, v16.8B, v20.8B
+        urhadd          v17.8B, v17.8B, v21.8B
+  .endif
+        subs            w3,  w3,  #2
+        st1             {v16.8B}, [x0], x2
+        st1             {v17.8B}, [x0], x2
+        b.gt            3b
+        ret
+
+4:      ld1             {v4.8B, v5.8B}, [x1], x2
+        ld1             {v6.8B, v7.8B}, [x1], x2
+        ext             v5.8B,  v4.8B,  v5.8B,  #1
+        ext             v7.8B,  v6.8B,  v7.8B,  #1
+        prfm            pldl1strm, [x1]
+        subs            w3,  w3,  #2
+        umull           v16.8H, v4.8B, v0.8B
+        umlal           v16.8H, v5.8B, v1.8B
+        umull           v17.8H, v6.8B, v0.8B
+        umlal           v17.8H, v7.8B, v1.8B
+        prfm            pldl1strm, [x1, x2]
+  .ifc \codec,h264
+        rshrn           v16.8B, v16.8H, #6
+        rshrn           v17.8B, v17.8H, #6
+  .else
+        add             v16.8H, v16.8H, v22.8H
+        add             v17.8H, v17.8H, v22.8H
+        shrn            v16.8B, v16.8H, #6
+        shrn            v17.8B, v17.8H, #6
+  .endif
+  .ifc \type,avg
+        ld1             {v20.8B}, [x8], x2
+        ld1             {v21.8B}, [x8], x2
+        urhadd          v16.8B, v16.8B, v20.8B
+        urhadd          v17.8B, v17.8B, v21.8B
+  .endif
+        st1             {v16.8B}, [x0], x2
+        st1             {v17.8B}, [x0], x2
+        b.gt            4b
+        ret
+
+5:      ld1             {v4.8B}, [x1], x2
+        ld1             {v5.8B}, [x1], x2
+        prfm            pldl1strm, [x1]
+        subs            w3,  w3,  #2
+        umull           v16.8H, v4.8B, v0.8B
+        umull           v17.8H, v5.8B, v0.8B
+        prfm            pldl1strm, [x1, x2]
+  .ifc \codec,h264
+        rshrn           v16.8B, v16.8H, #6
+        rshrn           v17.8B, v17.8H, #6
+  .else
+        add             v16.8H, v16.8H, v22.8H
+        add             v17.8H, v17.8H, v22.8H
+        shrn            v16.8B, v16.8H, #6
+        shrn            v17.8B, v17.8H, #6
+  .endif
+  .ifc \type,avg
+        ld1             {v20.8B}, [x8], x2
+        ld1             {v21.8B}, [x8], x2
+        urhadd          v16.8B, v16.8B, v20.8B
+        urhadd          v17.8B, v17.8B, v21.8B
+  .endif
+        st1             {v16.8B}, [x0], x2
+        st1             {v17.8B}, [x0], x2
+        b.gt            5b
+        ret
+endfunc
+.endm
+
+/* chroma_mc4(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h, int x, int y) */
+.macro  h264_chroma_mc4 type, codec=h264
+function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
+  .ifc \type,avg
+        mov             x8,  x0
+  .endif
+        prfm            pldl1strm, [x1]
+        prfm            pldl1strm, [x1, x2]
+  .ifc \codec,rv40
+        movrel          x6,  rv40bias
+        lsr             w9,  w5,  #1
+        lsr             w10, w4,  #1
+        lsl             w9,  w9,  #3
+        lsl             w10, w10, #1
+        add             w9,  w9,  w10
+        add             x6,  x6,  w9, UXTW
+        ld1r            {v22.8H}, [x6]
+  .endif
+  .ifc \codec,vc1
+        movi            v22.8H,   #28
+  .endif
+        mul             w7,  w4,  w5
+        lsl             w14, w5,  #3
+        lsl             w13, w4,  #3
+        cmp             w7,  #0
+        sub             w6,  w14, w7
+        sub             w12, w13, w7
+        sub             w4,  w7,  w13
+        sub             w4,  w4,  w14
+        add             w4,  w4,  #64
+        b.eq            2f
+
+        dup             v24.8B,  w4
+        dup             v25.8B,  w12
+        ld1             {v4.8B}, [x1], x2
+        dup             v26.8B,  w6
+        dup             v27.8B,  w7
+        ext             v5.8B,  v4.8B,  v5.8B, #1
+        trn1            v0.2S,  v24.2S, v25.2S
+        trn1            v2.2S,  v26.2S, v27.2S
+        trn1            v4.2S,  v4.2S,  v5.2S
+1:      ld1             {v6.8B}, [x1], x2
+        ext             v7.8B,  v6.8B,  v7.8B, #1
+        trn1            v6.2S,  v6.2S,  v7.2S
+        umull           v18.8H, v4.8B,  v0.8B
+        umlal           v18.8H, v6.8B,  v2.8B
+        ld1             {v4.8B}, [x1], x2
+        ext             v5.8B,  v4.8B,  v5.8B, #1
+        trn1            v4.2S,  v4.2S,  v5.2S
+        prfm            pldl1strm, [x1]
+        umull           v19.8H, v6.8B,  v0.8B
+        umlal           v19.8H, v4.8B,  v2.8B
+        trn1            v30.2D, v18.2D, v19.2D
+        trn2            v31.2D, v18.2D, v19.2D
+        add             v18.8H, v30.8H, v31.8H
+  .ifc \codec,h264
+        rshrn           v16.8B, v18.8H, #6
+  .else
+        add             v18.8H, v18.8H, v22.8H
+        shrn            v16.8B, v18.8H, #6
+  .endif
+        subs            w3,  w3,  #2
+        prfm            pldl1strm, [x1, x2]
+  .ifc \type,avg
+        ld1             {v20.S}[0], [x8], x2
+        ld1             {v20.S}[1], [x8], x2
+        urhadd          v16.8B, v16.8B, v20.8B
+  .endif
+        st1             {v16.S}[0], [x0], x2
+        st1             {v16.S}[1], [x0], x2
+        b.gt            1b
+        ret
+
+2:      adds            w12, w12, w6
+        dup             v30.8B, w4
+        b.eq            5f
+        tst             w6,  w6
+        dup             v31.8B, w12
+        trn1            v0.2S,  v30.2S, v31.2S
+        trn2            v1.2S,  v30.2S, v31.2S
+        b.eq            4f
+
+        ext             v1.8B,  v0.8B,  v1.8B, #4
+        ld1             {v4.S}[0], [x1], x2
+3:      ld1             {v4.S}[1], [x1], x2
+        umull           v18.8H, v4.8B,  v0.8B
+        ld1             {v4.S}[0], [x1], x2
+        umull           v19.8H, v4.8B,  v1.8B
+        trn1            v30.2D, v18.2D, v19.2D
+        trn2            v31.2D, v18.2D, v19.2D
+        add             v18.8H, v30.8H, v31.8H
+        prfm            pldl1strm, [x1]
+  .ifc \codec,h264
+        rshrn           v16.8B, v18.8H, #6
+  .else
+        add             v18.8H, v18.8H, v22.8H
+        shrn            v16.8B, v18.8H, #6
+  .endif
+  .ifc \type,avg
+        ld1             {v20.S}[0], [x8], x2
+        ld1             {v20.S}[1], [x8], x2
+        urhadd          v16.8B, v16.8B, v20.8B
+  .endif
+        subs            w3,  w3,  #2
+        prfm            pldl1strm, [x1, x2]
+        st1             {v16.S}[0], [x0], x2
+        st1             {v16.S}[1], [x0], x2
+        b.gt            3b
+        ret
+
+4:      ld1             {v4.8B}, [x1], x2
+        ld1             {v6.8B}, [x1], x2
+        ext             v5.8B,  v4.8B,  v5.8B, #1
+        ext             v7.8B,  v6.8B,  v7.8B, #1
+        trn1            v4.2S,  v4.2S,  v5.2S
+        trn1            v6.2S,  v6.2S,  v7.2S
+        umull           v18.8H, v4.8B,  v0.8B
+        umull           v19.8H, v6.8B,  v0.8B
+        subs            w3,  w3,  #2
+        trn1            v30.2D, v18.2D, v19.2D
+        trn2            v31.2D, v18.2D, v19.2D
+        add             v18.8H, v30.8H, v31.8H
+        prfm            pldl1strm, [x1]
+  .ifc \codec,h264
+        rshrn           v16.8B, v18.8H, #6
+  .else
+        add             v18.8H, v18.8H, v22.8H
+        shrn            v16.8B, v18.8H, #6
+  .endif
+  .ifc \type,avg
+        ld1             {v20.S}[0], [x8], x2
+        ld1             {v20.S}[1], [x8], x2
+        urhadd          v16.8B, v16.8B, v20.8B
+  .endif
+        prfm            pldl1strm, [x1]
+        st1             {v16.S}[0], [x0], x2
+        st1             {v16.S}[1], [x0], x2
+        b.gt            4b
+        ret
+
+5:      ld1             {v4.S}[0], [x1], x2
+        ld1             {v4.S}[1], [x1], x2
+        umull           v18.8H, v4.8B,  v30.8B
+        subs            w3,  w3,  #2
+        prfm            pldl1strm, [x1]
+  .ifc \codec,h264
+        rshrn           v16.8B, v18.8H, #6
+  .else
+        add             v18.8H, v18.8H, v22.8H
+        shrn            v16.8B, v18.8H, #6
+  .endif
+  .ifc \type,avg
+        ld1             {v20.S}[0], [x8], x2
+        ld1             {v20.S}[1], [x8], x2
+        urhadd          v16.8B, v16.8B, v20.8B
+  .endif
+        prfm            pldl1strm, [x1]
+        st1             {v16.S}[0], [x0], x2
+        st1             {v16.S}[1], [x0], x2
+        b.gt            5b
+        ret
+endfunc
+.endm
+
+.macro  h264_chroma_mc2 type
+function ff_\type\()_h264_chroma_mc2_neon, export=1
+        prfm            pldl1strm, [x1]
+        prfm            pldl1strm, [x1, x2]
+        orr             w7,  w4,  w5
+        cbz             w7,  2f
+
+        mul             w7,  w4,  w5
+        lsl             w14, w5,  #3
+        lsl             w13, w4,  #3
+        sub             w6,  w14, w7
+        sub             w12, w13, w7
+        sub             w4,  w7,  w13
+        sub             w4,  w4,  w14
+        add             w4,  w4,  #64
+        dup             v0.8B,  w4
+        dup             v2.8B,  w12
+        dup             v1.8B,  w6
+        dup             v3.8B,  w7
+        trn1            v0.4H,  v0.4H,  v2.4H
+        trn1            v1.4H,  v1.4H,  v3.4H
+1:
+        ld1             {v4.S}[0],  [x1], x2
+        ld1             {v4.S}[1],  [x1], x2
+        rev64           v5.2S,  v4.2S
+        ld1             {v5.S}[1],  [x1]
+        ext             v6.8B,  v4.8B,  v5.8B,  #1
+        ext             v7.8B,  v5.8B,  v4.8B,  #1
+        trn1            v4.4H,  v4.4H,  v6.4H
+        trn1            v5.4H,  v5.4H,  v7.4H
+        umull           v16.8H, v4.8B,  v0.8B
+        umlal           v16.8H, v5.8B,  v1.8B
+  .ifc \type,avg
+        ld1             {v18.H}[0], [x0], x2
+        ld1             {v18.H}[2], [x0]
+        sub             x0,  x0,  x2
+  .endif
+        rev64           v17.4S, v16.4S
+        add             v16.8H, v16.8H, v17.8H
+        rshrn           v16.8B, v16.8H, #6
+  .ifc \type,avg
+        urhadd          v16.8B, v16.8B, v18.8B
+  .endif
+        st1             {v16.H}[0], [x0], x2
+        st1             {v16.H}[2], [x0], x2
+        subs            w3,  w3,  #2
+        b.gt            1b
+        ret
+
+2:
+        ld1             {v16.H}[0], [x1], x2
+        ld1             {v16.H}[1], [x1], x2
+  .ifc \type,avg
+        ld1             {v18.H}[0], [x0], x2
+        ld1             {v18.H}[1], [x0]
+        sub             x0,  x0,  x2
+        urhadd          v16.8B, v16.8B, v18.8B
+  .endif
+        st1             {v16.H}[0], [x0], x2
+        st1             {v16.H}[1], [x0], x2
+        subs            w3,  w3,  #2
+        b.gt            2b
+        ret
+endfunc
+.endm
+
+        h264_chroma_mc8 put
+        h264_chroma_mc8 avg
+        h264_chroma_mc4 put
+        h264_chroma_mc4 avg
+        h264_chroma_mc2 put
+        h264_chroma_mc2 avg
+
+#if CONFIG_RV40_DECODER
+const   rv40bias
+        .short           0, 16, 32, 16
+        .short          32, 28, 32, 28
+        .short           0, 32, 16, 32
+        .short          32, 28, 32, 28
+endconst
+
+        h264_chroma_mc8 put, rv40
+        h264_chroma_mc8 avg, rv40
+        h264_chroma_mc4 put, rv40
+        h264_chroma_mc4 avg, rv40
+#endif
+
+#if CONFIG_VC1DSP
+        h264_chroma_mc8 put, vc1
+        h264_chroma_mc8 avg, vc1
+        h264_chroma_mc4 put, vc1
+        h264_chroma_mc4 avg, vc1
+#endif
diff --git a/media/ffvpx/libavcodec/aarch64/h264dsp_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/h264dsp_init_aarch64.c
new file mode 100644
index 0000000000..6bf3ecb8a1
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/h264dsp_init_aarch64.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/h264dsp.h"
+
+void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                     int beta, int8_t *tc0);
+void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                     int beta, int8_t *tc0);
+void ff_h264_v_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                           int beta);
+void ff_h264_h_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                           int beta);
+void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                       int beta, int8_t *tc0);
+void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                       int beta, int8_t *tc0);
+void ff_h264_h_loop_filter_chroma422_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                          int beta, int8_t *tc0);
+void ff_h264_v_loop_filter_chroma_intra_neon(uint8_t *pix, ptrdiff_t stride,
+                                             int alpha, int beta);
+void ff_h264_h_loop_filter_chroma_intra_neon(uint8_t *pix, ptrdiff_t stride,
+                                             int alpha, int beta);
+void ff_h264_h_loop_filter_chroma422_intra_neon(uint8_t *pix, ptrdiff_t stride,
+                                                int alpha, int beta);
+void ff_h264_h_loop_filter_chroma_mbaff_intra_neon(uint8_t *pix, ptrdiff_t stride,
+                                                   int alpha, int beta);
+
+void ff_weight_h264_pixels_16_neon(uint8_t *dst, ptrdiff_t stride, int height,
+                                   int log2_den, int weight, int offset);
+void ff_weight_h264_pixels_8_neon(uint8_t *dst, ptrdiff_t stride, int height,
+                                  int log2_den, int weight, int offset);
+void ff_weight_h264_pixels_4_neon(uint8_t *dst, ptrdiff_t stride, int height,
+                                  int log2_den, int weight, int offset);
+
+void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                     int height, int log2_den, int weightd,
+                                     int weights, int offset);
+void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                    int height, int log2_den, int weightd,
+                                    int weights, int offset);
+void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                    int height, int log2_den, int weightd,
+                                    int weights, int offset);
+
+void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
+                             int16_t *block, int stride,
+                             const uint8_t nnzc[5 * 8]);
+void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset,
+                                  int16_t *block, int stride,
+                                  const uint8_t nnzc[5 * 8]);
+void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
+                            int16_t *block, int stride,
+                            const uint8_t nnzc[15 * 8]);
+
+void ff_h264_idct8_add_neon(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct8_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
+                             int16_t *block, int stride,
+                             const uint8_t nnzc[5 * 8]);
+
+void ff_h264_v_loop_filter_luma_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                        int beta, int8_t *tc0);
+void ff_h264_h_loop_filter_luma_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                        int beta, int8_t *tc0);
+void ff_h264_v_loop_filter_luma_intra_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                              int beta);
+void ff_h264_h_loop_filter_luma_intra_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                              int beta);
+void ff_h264_v_loop_filter_chroma_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                          int beta, int8_t *tc0);
+void ff_h264_h_loop_filter_chroma_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                          int beta, int8_t *tc0);
+void ff_h264_h_loop_filter_chroma422_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                             int beta, int8_t *tc0);
+void ff_h264_v_loop_filter_chroma_intra_neon_10(uint8_t *pix, ptrdiff_t stride,
+                                                int alpha, int beta);
+void ff_h264_h_loop_filter_chroma_intra_neon_10(uint8_t *pix, ptrdiff_t stride,
+                                                int alpha, int beta);
+void ff_h264_h_loop_filter_chroma422_intra_neon_10(uint8_t *pix, ptrdiff_t stride,
+                                                   int alpha, int beta);
+void ff_h264_h_loop_filter_chroma_mbaff_intra_neon_10(uint8_t *pix, ptrdiff_t stride,
+                                                      int alpha, int beta);
+
+av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
+                                     const int chroma_format_idc)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags) && bit_depth == 8) {
+        c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
+        c->h264_h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
+        c->h264_v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
+        c->h264_h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
+
+        c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
+        c->h264_v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon;
+
+        if (chroma_format_idc <= 1) {
+            c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+            c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon;
+            c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
+        } else {
+            c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon;
+            c->h264_h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon;
+            c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma422_intra_neon;
+            c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_intra_neon;
+        }
+
+        c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
+        c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
+        c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
+
+        c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
+        c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
+        c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
+
+        c->h264_idct_add        = ff_h264_idct_add_neon;
+        c->h264_idct_dc_add     = ff_h264_idct_dc_add_neon;
+        c->h264_idct_add16      = ff_h264_idct_add16_neon;
+        c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
+        if (chroma_format_idc <= 1)
+            c->h264_idct_add8   = ff_h264_idct_add8_neon;
+        c->h264_idct8_add       = ff_h264_idct8_add_neon;
+        c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
+        c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
+    } else if (have_neon(cpu_flags) && bit_depth == 10) {
+        c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon_10;
+        c->h264_v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon_10;
+
+        if (chroma_format_idc <= 1) {
+            c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon_10;
+            c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon_10;
+            c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon_10;
+        } else {
+            c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon_10;
+            c->h264_h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon_10;
+            c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma422_intra_neon_10;
+            c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_intra_neon_10;
+        }
+    }
+}
diff --git a/media/ffvpx/libavcodec/aarch64/h264dsp_neon.S b/media/ffvpx/libavcodec/aarch64/h264dsp_neon.S
new file mode 100644
index 0000000000..ea221e6862
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/h264dsp_neon.S
@@ -0,0 +1,1076 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
+ * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+#include "neon.S"
+
+.macro  h264_loop_filter_start
+        cmp             w2,  #0
+        ldr             w6,  [x4]
+        ccmp            w3,  #0, #0, ne
+        mov             v24.S[0], w6
+        and             w8,  w6,  w6,  lsl #16
+        b.eq            1f
+        ands            w8,  w8,  w8,  lsl #8
+        b.ge            2f
+1:
+        ret
+2:
+.endm
+
+.macro  h264_loop_filter_luma
+        dup             v22.16B, w2                     // alpha
+        uxtl            v24.8H,  v24.8B
+        uabd            v21.16B, v16.16B, v0.16B        // abs(p0 - q0)
+        uxtl            v24.4S,  v24.4H
+        uabd            v28.16B, v18.16B, v16.16B       // abs(p1 - p0)
+        sli             v24.8H,  v24.8H,  #8
+        uabd            v30.16B, v2.16B,  v0.16B        // abs(q1 - q0)
+        sli             v24.4S,  v24.4S,  #16
+        cmhi            v21.16B, v22.16B, v21.16B       // < alpha
+        dup             v22.16B, w3                     // beta
+        cmlt            v23.16B, v24.16B, #0
+        cmhi            v28.16B, v22.16B, v28.16B       // < beta
+        cmhi            v30.16B, v22.16B, v30.16B       // < beta
+        bic             v21.16B, v21.16B, v23.16B
+        uabd            v17.16B, v20.16B, v16.16B       // abs(p2 - p0)
+        and             v21.16B, v21.16B, v28.16B
+        uabd            v19.16B,  v4.16B,  v0.16B       // abs(q2 - q0)
+        and             v21.16B, v21.16B, v30.16B      // < beta
+        shrn            v30.8b,  v21.8h,  #4
+        mov             x7, v30.d[0]
+        cmhi            v17.16B, v22.16B, v17.16B       // < beta
+        cmhi            v19.16B, v22.16B, v19.16B       // < beta
+        cbz             x7,  9f
+        and             v17.16B, v17.16B, v21.16B
+        and             v19.16B, v19.16B, v21.16B
+        and             v24.16B, v24.16B, v21.16B
+        urhadd          v28.16B, v16.16B,  v0.16B
+        sub             v21.16B, v24.16B, v17.16B
+        uqadd           v23.16B, v18.16B, v24.16B
+        uhadd           v20.16B, v20.16B, v28.16B
+        sub             v21.16B, v21.16B, v19.16B
+        uhadd           v28.16B,  v4.16B, v28.16B
+        umin            v23.16B, v23.16B, v20.16B
+        uqsub           v22.16B, v18.16B, v24.16B
+        uqadd           v4.16B,   v2.16B, v24.16B
+        umax            v23.16B, v23.16B, v22.16B
+        uqsub           v22.16B,  v2.16B, v24.16B
+        umin            v28.16B,  v4.16B, v28.16B
+        uxtl            v4.8H,    v0.8B
+        umax            v28.16B, v28.16B, v22.16B
+        uxtl2           v20.8H,   v0.16B
+        usubw           v4.8H,    v4.8H,  v16.8B
+        usubw2          v20.8H,  v20.8H,  v16.16B
+        shl             v4.8H,    v4.8H,  #2
+        shl             v20.8H,  v20.8H,  #2
+        uaddw           v4.8H,    v4.8H,  v18.8B
+        uaddw2          v20.8H,  v20.8H,  v18.16B
+        usubw           v4.8H,    v4.8H,   v2.8B
+        usubw2          v20.8H,  v20.8H,   v2.16B
+        rshrn           v4.8B,    v4.8H,  #3
+        rshrn2          v4.16B,  v20.8H,  #3
+        bsl             v17.16B, v23.16B, v18.16B
+        bsl             v19.16B, v28.16B,  v2.16B
+        neg             v23.16B, v21.16B
+        uxtl            v28.8H,  v16.8B
+        smin            v4.16B,   v4.16B, v21.16B
+        uxtl2           v21.8H,  v16.16B
+        smax            v4.16B,   v4.16B, v23.16B
+        uxtl            v22.8H,   v0.8B
+        uxtl2           v24.8H,   v0.16B
+        saddw           v28.8H,  v28.8H,  v4.8B
+        saddw2          v21.8H,  v21.8H,  v4.16B
+        ssubw           v22.8H,  v22.8H,  v4.8B
+        ssubw2          v24.8H,  v24.8H,  v4.16B
+        sqxtun          v16.8B,  v28.8H
+        sqxtun2         v16.16B, v21.8H
+        sqxtun          v0.8B,   v22.8H
+        sqxtun2         v0.16B,  v24.8H
+.endm
+
+function ff_h264_v_loop_filter_luma_neon, export=1
+        h264_loop_filter_start
+
+        ld1             {v0.16B},  [x0], x1
+        ld1             {v2.16B},  [x0], x1
+        ld1             {v4.16B},  [x0], x1
+        sub             x0,  x0,  x1, lsl #2
+        sub             x0,  x0,  x1, lsl #1
+        ld1             {v20.16B},  [x0], x1
+        ld1             {v18.16B},  [x0], x1
+        ld1             {v16.16B},  [x0], x1
+
+        h264_loop_filter_luma
+
+        sub             x0,  x0,  x1, lsl #1
+        st1             {v17.16B},  [x0], x1
+        st1             {v16.16B}, [x0], x1
+        st1             {v0.16B},  [x0], x1
+        st1             {v19.16B}, [x0]
+9:
+        ret
+endfunc
+
+function ff_h264_h_loop_filter_luma_neon, export=1
+        h264_loop_filter_start
+
+        sub             x0,  x0,  #4
+        ld1             {v6.8B},  [x0], x1
+        ld1             {v20.8B}, [x0], x1
+        ld1             {v18.8B}, [x0], x1
+        ld1             {v16.8B}, [x0], x1
+        ld1             {v0.8B},  [x0], x1
+        ld1             {v2.8B},  [x0], x1
+        ld1             {v4.8B},  [x0], x1
+        ld1             {v26.8B}, [x0], x1
+        ld1             {v6.D}[1],  [x0], x1
+        ld1             {v20.D}[1], [x0], x1
+        ld1             {v18.D}[1], [x0], x1
+        ld1             {v16.D}[1], [x0], x1
+        ld1             {v0.D}[1],  [x0], x1
+        ld1             {v2.D}[1],  [x0], x1
+        ld1             {v4.D}[1],  [x0], x1
+        ld1             {v26.D}[1], [x0], x1
+
+        transpose_8x16B v6, v20, v18, v16, v0, v2, v4, v26, v21, v23
+
+        h264_loop_filter_luma
+
+        transpose_4x16B v17, v16, v0, v19, v21, v23, v25, v27
+
+        sub             x0,  x0,  x1, lsl #4
+        add             x0,  x0,  #2
+        st1             {v17.S}[0],  [x0], x1
+        st1             {v16.S}[0], [x0], x1
+        st1             {v0.S}[0],  [x0], x1
+        st1             {v19.S}[0], [x0], x1
+        st1             {v17.S}[1],  [x0], x1
+        st1             {v16.S}[1], [x0], x1
+        st1             {v0.S}[1],  [x0], x1
+        st1             {v19.S}[1], [x0], x1
+        st1             {v17.S}[2],  [x0], x1
+        st1             {v16.S}[2], [x0], x1
+        st1             {v0.S}[2],  [x0], x1
+        st1             {v19.S}[2], [x0], x1
+        st1             {v17.S}[3],  [x0], x1
+        st1             {v16.S}[3], [x0], x1
+        st1             {v0.S}[3],  [x0], x1
+        st1             {v19.S}[3], [x0], x1
+9:
+        ret
+endfunc
+
+
+.macro h264_loop_filter_start_intra
+        orr             w4,  w2,  w3
+        cbnz            w4,  1f
+        ret
+1:
+        dup             v30.16b, w2                // alpha
+        dup             v31.16b, w3                // beta
+.endm
+
+.macro h264_loop_filter_luma_intra
+        uabd            v16.16b, v7.16b,  v0.16b        // abs(p0 - q0)
+        uabd            v17.16b, v6.16b,  v7.16b        // abs(p1 - p0)
+        uabd            v18.16b, v1.16b,  v0.16b        // abs(q1 - q0)
+        cmhi            v19.16b, v30.16b, v16.16b       // < alpha
+        cmhi            v17.16b, v31.16b, v17.16b       // < beta
+        cmhi            v18.16b, v31.16b, v18.16b       // < beta
+
+        movi            v29.16b, #2
+        ushr            v30.16b, v30.16b, #2            // alpha >> 2
+        add             v30.16b, v30.16b, v29.16b       // (alpha >> 2) + 2
+        cmhi            v16.16b, v30.16b, v16.16b       // < (alpha >> 2) + 2
+
+        and             v19.16b, v19.16b, v17.16b
+        and             v19.16b, v19.16b, v18.16b
+        shrn            v20.8b,  v19.8h,  #4
+        mov             x4, v20.d[0]
+        cbz             x4, 9f
+
+        ushll           v20.8h,  v6.8b,   #1
+        ushll           v22.8h,  v1.8b,   #1
+        ushll2          v21.8h,  v6.16b,  #1
+        ushll2          v23.8h,  v1.16b,  #1
+        uaddw           v20.8h,  v20.8h,  v7.8b
+        uaddw           v22.8h,  v22.8h,  v0.8b
+        uaddw2          v21.8h,  v21.8h,  v7.16b
+        uaddw2          v23.8h,  v23.8h,  v0.16b
+        uaddw           v20.8h,  v20.8h,  v1.8b
+        uaddw           v22.8h,  v22.8h,  v6.8b
+        uaddw2          v21.8h,  v21.8h,  v1.16b
+        uaddw2          v23.8h,  v23.8h,  v6.16b
+
+        rshrn           v24.8b,  v20.8h,  #2 // p0'_1
+        rshrn           v25.8b,  v22.8h,  #2 // q0'_1
+        rshrn2          v24.16b, v21.8h,  #2 // p0'_1
+        rshrn2          v25.16b, v23.8h,  #2 // q0'_1
+
+        uabd            v17.16b, v5.16b,  v7.16b        // abs(p2 - p0)
+        uabd            v18.16b, v2.16b,  v0.16b        // abs(q2 - q0)
+        cmhi            v17.16b, v31.16b, v17.16b       // < beta
+        cmhi            v18.16b, v31.16b, v18.16b       // < beta
+
+        and             v17.16b, v16.16b, v17.16b  // if_2 && if_3
+        and             v18.16b, v16.16b, v18.16b  // if_2 && if_4
+
+        not             v30.16b, v17.16b
+        not             v31.16b, v18.16b
+
+        and             v30.16b, v30.16b, v19.16b  // if_1 && !(if_2 && if_3)
+        and             v31.16b, v31.16b, v19.16b  // if_1 && !(if_2 && if_4)
+
+        and             v17.16b, v19.16b, v17.16b  // if_1 && if_2 && if_3
+        and             v18.16b, v19.16b, v18.16b  // if_1 && if_2 && if_4
+
+        //calc            p, v7, v6, v5, v4, v17, v7, v6, v5, v4
+        uaddl           v26.8h,  v5.8b,   v7.8b
+        uaddl2          v27.8h,  v5.16b,  v7.16b
+        uaddw           v26.8h,  v26.8h,  v0.8b
+        uaddw2          v27.8h,  v27.8h,  v0.16b
+        add             v20.8h,  v20.8h,  v26.8h
+        add             v21.8h,  v21.8h,  v27.8h
+        uaddw           v20.8h,  v20.8h,  v0.8b
+        uaddw2          v21.8h,  v21.8h,  v0.16b
+        rshrn           v20.8b,  v20.8h,  #3 // p0'_2
+        rshrn2          v20.16b, v21.8h,  #3 // p0'_2
+        uaddw           v26.8h,  v26.8h,  v6.8b
+        uaddw2          v27.8h,  v27.8h,  v6.16b
+        rshrn           v21.8b,  v26.8h,  #2 // p1'_2
+        rshrn2          v21.16b, v27.8h,  #2 // p1'_2
+        uaddl           v28.8h,  v4.8b,   v5.8b
+        uaddl2          v29.8h,  v4.16b,  v5.16b
+        shl             v28.8h,  v28.8h,  #1
+        shl             v29.8h,  v29.8h,  #1
+        add             v28.8h,  v28.8h,  v26.8h
+        add             v29.8h,  v29.8h,  v27.8h
+        rshrn           v19.8b,  v28.8h,  #3 // p2'_2
+        rshrn2          v19.16b, v29.8h,  #3 // p2'_2
+
+        //calc            q, v0, v1, v2, v3, v18, v0, v1, v2, v3
+        uaddl           v26.8h,  v2.8b,   v0.8b
+        uaddl2          v27.8h,  v2.16b,  v0.16b
+        uaddw           v26.8h,  v26.8h,  v7.8b
+        uaddw2          v27.8h,  v27.8h,  v7.16b
+        add             v22.8h,  v22.8h,  v26.8h
+        add             v23.8h,  v23.8h,  v27.8h
+        uaddw           v22.8h,  v22.8h,  v7.8b
+        uaddw2          v23.8h,  v23.8h,  v7.16b
+        rshrn           v22.8b,  v22.8h,  #3 // q0'_2
+        rshrn2          v22.16b, v23.8h,  #3 // q0'_2
+        uaddw           v26.8h,  v26.8h,  v1.8b
+        uaddw2          v27.8h,  v27.8h,  v1.16b
+        rshrn           v23.8b,  v26.8h,  #2 // q1'_2
+        rshrn2          v23.16b, v27.8h,  #2 // q1'_2
+        uaddl           v28.8h,  v2.8b,   v3.8b
+        uaddl2          v29.8h,  v2.16b,  v3.16b
+        shl             v28.8h,  v28.8h,  #1
+        shl             v29.8h,  v29.8h,  #1
+        add             v28.8h,  v28.8h,  v26.8h
+        add             v29.8h,  v29.8h,  v27.8h
+        rshrn           v26.8b,  v28.8h,  #3 // q2'_2
+        rshrn2          v26.16b, v29.8h,  #3 // q2'_2
+
+        bit             v7.16b,  v24.16b, v30.16b  // p0'_1
+        bit             v0.16b,  v25.16b, v31.16b  // q0'_1
+        bit             v7.16b,  v20.16b, v17.16b  // p0'_2
+        bit             v6.16b,  v21.16b, v17.16b  // p1'_2
+        bit             v5.16b,  v19.16b, v17.16b  // p2'_2
+        bit             v0.16b,  v22.16b, v18.16b  // q0'_2
+        bit             v1.16b,  v23.16b, v18.16b  // q1'_2
+        bit             v2.16b,  v26.16b, v18.16b  // q2'_2
+.endm
+
+function ff_h264_v_loop_filter_luma_intra_neon, export=1
+        h264_loop_filter_start_intra
+
+        ld1             {v0.16b},  [x0], x1 // q0
+        ld1             {v1.16b},  [x0], x1 // q1
+        ld1             {v2.16b},  [x0], x1 // q2
+        ld1             {v3.16b},  [x0], x1 // q3
+        sub             x0,  x0,  x1, lsl #3
+        ld1             {v4.16b},  [x0], x1 // p3
+        ld1             {v5.16b},  [x0], x1 // p2
+        ld1             {v6.16b},  [x0], x1 // p1
+        ld1             {v7.16b},  [x0]     // p0
+
+        h264_loop_filter_luma_intra
+
+        sub             x0,  x0,  x1, lsl #1
+        st1             {v5.16b}, [x0], x1  // p2
+        st1             {v6.16b}, [x0], x1  // p1
+        st1             {v7.16b}, [x0], x1  // p0
+        st1             {v0.16b}, [x0], x1  // q0
+        st1             {v1.16b}, [x0], x1  // q1
+        st1             {v2.16b}, [x0]      // q2
+9:
+        ret
+endfunc
+
+function ff_h264_h_loop_filter_luma_intra_neon, export=1
+        h264_loop_filter_start_intra
+
+        sub             x0,  x0,  #4
+        ld1             {v4.8b},  [x0], x1
+        ld1             {v5.8b},  [x0], x1
+        ld1             {v6.8b},  [x0], x1
+        ld1             {v7.8b},  [x0], x1
+        ld1             {v0.8b},  [x0], x1
+        ld1             {v1.8b},  [x0], x1
+        ld1             {v2.8b},  [x0], x1
+        ld1             {v3.8b},  [x0], x1
+        ld1             {v4.d}[1],  [x0], x1
+        ld1             {v5.d}[1],  [x0], x1
+        ld1             {v6.d}[1],  [x0], x1
+        ld1             {v7.d}[1],  [x0], x1
+        ld1             {v0.d}[1],  [x0], x1
+        ld1             {v1.d}[1],  [x0], x1
+        ld1             {v2.d}[1],  [x0], x1
+        ld1             {v3.d}[1],  [x0], x1
+
+        transpose_8x16B v4, v5, v6, v7, v0, v1, v2, v3, v21, v23
+
+        h264_loop_filter_luma_intra
+
+        transpose_8x16B v4, v5, v6, v7, v0, v1, v2, v3, v21, v23
+
+        sub             x0,  x0,  x1, lsl #4
+        st1             {v4.8b},  [x0], x1
+        st1             {v5.8b},  [x0], x1
+        st1             {v6.8b},  [x0], x1
+        st1             {v7.8b},  [x0], x1
+        st1             {v0.8b},  [x0], x1
+        st1             {v1.8b},  [x0], x1
+        st1             {v2.8b},  [x0], x1
+        st1             {v3.8b},  [x0], x1
+        st1             {v4.d}[1],  [x0], x1
+        st1             {v5.d}[1],  [x0], x1
+        st1             {v6.d}[1],  [x0], x1
+        st1             {v7.d}[1],  [x0], x1
+        st1             {v0.d}[1],  [x0], x1
+        st1             {v1.d}[1],  [x0], x1
+        st1             {v2.d}[1],  [x0], x1
+        st1             {v3.d}[1],  [x0], x1
+9:
+        ret
+endfunc
+
+.macro  h264_loop_filter_chroma
+        dup             v22.8B, w2              // alpha
+        dup             v23.8B, w3              // beta
+        uxtl            v24.8H, v24.8B
+        uabd            v26.8B, v16.8B, v0.8B   // abs(p0 - q0)
+        uabd            v28.8B, v18.8B, v16.8B  // abs(p1 - p0)
+        uabd            v30.8B, v2.8B,  v0.8B   // abs(q1 - q0)
+        cmhi            v26.8B, v22.8B, v26.8B  // < alpha
+        cmhi            v28.8B, v23.8B, v28.8B  // < beta
+        cmhi            v30.8B, v23.8B, v30.8B  // < beta
+        uxtl            v4.8H,  v0.8B
+        and             v26.8B, v26.8B, v28.8B
+        usubw           v4.8H,  v4.8H,  v16.8B
+        and             v26.8B, v26.8B, v30.8B
+        shl             v4.8H,  v4.8H,  #2
+        mov             x8,  v26.d[0]
+        sli             v24.8H, v24.8H, #8
+        uaddw           v4.8H,  v4.8H,  v18.8B
+        cbz             x8,  9f
+        usubw           v4.8H,  v4.8H,  v2.8B
+        rshrn           v4.8B,  v4.8H,  #3
+        smin            v4.8B,  v4.8B,  v24.8B
+        neg             v25.8B, v24.8B
+        smax            v4.8B,  v4.8B,  v25.8B
+        uxtl            v22.8H, v0.8B
+        and             v4.8B,  v4.8B,  v26.8B
+        uxtl            v28.8H, v16.8B
+        saddw           v28.8H, v28.8H, v4.8B
+        ssubw           v22.8H, v22.8H, v4.8B
+        sqxtun          v16.8B, v28.8H
+        sqxtun          v0.8B,  v22.8H
+.endm
+
+function ff_h264_v_loop_filter_chroma_neon, export=1
+        h264_loop_filter_start
+
+        sub             x0,  x0,  x1, lsl #1
+        ld1             {v18.8B}, [x0], x1
+        ld1             {v16.8B}, [x0], x1
+        ld1             {v0.8B},  [x0], x1
+        ld1             {v2.8B},  [x0]
+
+        h264_loop_filter_chroma
+
+        sub             x0,  x0,  x1, lsl #1
+        st1             {v16.8B}, [x0], x1
+        st1             {v0.8B},  [x0], x1
+9:
+        ret
+endfunc
+
+function ff_h264_h_loop_filter_chroma_neon, export=1
+        h264_loop_filter_start
+
+        sub             x0,  x0,  #2
+h_loop_filter_chroma420:
+        ld1             {v18.S}[0], [x0], x1
+        ld1             {v16.S}[0], [x0], x1
+        ld1             {v0.S}[0],  [x0], x1
+        ld1             {v2.S}[0],  [x0], x1
+        ld1             {v18.S}[1], [x0], x1
+        ld1             {v16.S}[1], [x0], x1
+        ld1             {v0.S}[1],  [x0], x1
+        ld1             {v2.S}[1],  [x0], x1
+
+        transpose_4x8B  v18, v16, v0, v2, v28, v29, v30, v31
+
+        h264_loop_filter_chroma
+
+        transpose_4x8B  v18, v16, v0, v2, v28, v29, v30, v31
+
+        sub             x0,  x0,  x1, lsl #3
+        st1             {v18.S}[0], [x0], x1
+        st1             {v16.S}[0], [x0], x1
+        st1             {v0.S}[0],  [x0], x1
+        st1             {v2.S}[0],  [x0], x1
+        st1             {v18.S}[1], [x0], x1
+        st1             {v16.S}[1], [x0], x1
+        st1             {v0.S}[1],  [x0], x1
+        st1             {v2.S}[1],  [x0], x1
+9:
+        ret
+endfunc
+
+function ff_h264_h_loop_filter_chroma422_neon, export=1
+        h264_loop_filter_start
+        add             x5,  x0,  x1
+        sub             x0,  x0,  #2
+        add             x1,  x1,  x1
+        mov             x7,  x30
+        bl              h_loop_filter_chroma420
+        mov             x30, x7
+        sub             x0,  x5,  #2
+        mov             v24.s[0], w6
+        b               h_loop_filter_chroma420
+endfunc
+
+.macro h264_loop_filter_chroma_intra
+        uabd            v26.8b,  v16.8b,  v17.8b  // abs(p0 - q0)
+        uabd            v27.8b,  v18.8b,  v16.8b  // abs(p1 - p0)
+        uabd            v28.8b,  v19.8b,  v17.8b  // abs(q1 - q0)
+        cmhi            v26.8b,  v30.8b,  v26.8b  // < alpha
+        cmhi            v27.8b,  v31.8b,  v27.8b  // < beta
+        cmhi            v28.8b,  v31.8b,  v28.8b  // < beta
+        and             v26.8b,  v26.8b,  v27.8b
+        and             v26.8b,  v26.8b,  v28.8b
+        mov             x2, v26.d[0]
+
+        ushll           v4.8h,   v18.8b,  #1
+        ushll           v6.8h,   v19.8b,  #1
+        cbz             x2, 9f
+        uaddl           v20.8h,  v16.8b,  v19.8b
+        uaddl           v22.8h,  v17.8b,  v18.8b
+        add             v20.8h,  v20.8h,  v4.8h
+        add             v22.8h,  v22.8h,  v6.8h
+        uqrshrn         v24.8b,  v20.8h,  #2
+        uqrshrn         v25.8b,  v22.8h,  #2
+        bit             v16.8b,  v24.8b,  v26.8b
+        bit             v17.8b,  v25.8b,  v26.8b
+.endm
+
+function ff_h264_v_loop_filter_chroma_intra_neon, export=1
+        h264_loop_filter_start_intra
+
+        sub             x0,  x0,  x1, lsl #1
+        ld1             {v18.8b}, [x0], x1
+        ld1             {v16.8b}, [x0], x1
+        ld1             {v17.8b}, [x0], x1
+        ld1             {v19.8b}, [x0]
+
+        h264_loop_filter_chroma_intra
+
+        sub             x0,  x0,  x1, lsl #1
+        st1             {v16.8b}, [x0], x1
+        st1             {v17.8b}, [x0], x1
+
+9:
+        ret
+endfunc
+
+function ff_h264_h_loop_filter_chroma_mbaff_intra_neon, export=1
+        h264_loop_filter_start_intra
+
+        sub             x4,  x0,  #2
+        sub             x0,  x0,  #1
+        ld1             {v18.8b}, [x4], x1
+        ld1             {v16.8b}, [x4], x1
+        ld1             {v17.8b}, [x4], x1
+        ld1             {v19.8b}, [x4], x1
+
+        transpose_4x8B v18, v16, v17, v19, v26, v27, v28, v29
+
+        h264_loop_filter_chroma_intra
+
+        st2             {v16.b,v17.b}[0], [x0], x1
+        st2             {v16.b,v17.b}[1], [x0], x1
+        st2             {v16.b,v17.b}[2], [x0], x1
+        st2             {v16.b,v17.b}[3], [x0], x1
+
+9:
+        ret
+endfunc
+
+function ff_h264_h_loop_filter_chroma_intra_neon, export=1
+        h264_loop_filter_start_intra
+
+        sub             x4,  x0,  #2
+        sub             x0,  x0,  #1
+h_loop_filter_chroma420_intra:
+        ld1             {v18.8b}, [x4], x1
+        ld1             {v16.8b}, [x4], x1
+        ld1             {v17.8b}, [x4], x1
+        ld1             {v19.8b}, [x4], x1
+        ld1             {v18.s}[1], [x4], x1
+        ld1             {v16.s}[1], [x4], x1
+        ld1             {v17.s}[1], [x4], x1
+        ld1             {v19.s}[1], [x4], x1
+
+        transpose_4x8B v18, v16, v17, v19, v26, v27, v28, v29
+
+        h264_loop_filter_chroma_intra
+
+        st2             {v16.b,v17.b}[0], [x0], x1
+        st2             {v16.b,v17.b}[1], [x0], x1
+        st2             {v16.b,v17.b}[2], [x0], x1
+        st2             {v16.b,v17.b}[3], [x0], x1
+        st2             {v16.b,v17.b}[4], [x0], x1
+        st2             {v16.b,v17.b}[5], [x0], x1
+        st2             {v16.b,v17.b}[6], [x0], x1
+        st2             {v16.b,v17.b}[7], [x0], x1
+
+9:
+        ret
+endfunc
+
+function ff_h264_h_loop_filter_chroma422_intra_neon, export=1
+        h264_loop_filter_start_intra
+        sub             x4,  x0,  #2
+        add             x5,  x0,  x1, lsl #3
+        sub             x0,  x0,  #1
+        mov             x7,  x30
+        bl              h_loop_filter_chroma420_intra
+        sub             x0,  x5,  #1
+        mov             x30, x7
+        b               h_loop_filter_chroma420_intra
+endfunc
+
+.macro  biweight_16     macs, macd
+        dup             v0.16B,  w5
+        dup             v1.16B,  w6
+        mov             v4.16B,  v16.16B
+        mov             v6.16B,  v16.16B
+1:      subs            w3,  w3,  #2
+        ld1             {v20.16B}, [x0], x2
+        \macd           v4.8H,   v0.8B,  v20.8B
+        \macd\()2       v6.8H,   v0.16B, v20.16B
+        ld1             {v22.16B}, [x1], x2
+        \macs           v4.8H,   v1.8B,  v22.8B
+        \macs\()2       v6.8H,   v1.16B, v22.16B
+        mov             v24.16B, v16.16B
+        ld1             {v28.16B}, [x0], x2
+        mov             v26.16B, v16.16B
+        \macd           v24.8H,  v0.8B,  v28.8B
+        \macd\()2       v26.8H,  v0.16B, v28.16B
+        ld1             {v30.16B}, [x1], x2
+        \macs           v24.8H,  v1.8B,  v30.8B
+        \macs\()2       v26.8H,  v1.16B, v30.16B
+        sshl            v4.8H,   v4.8H,  v18.8H
+        sshl            v6.8H,   v6.8H,  v18.8H
+        sqxtun          v4.8B,   v4.8H
+        sqxtun2         v4.16B,  v6.8H
+        sshl            v24.8H,  v24.8H, v18.8H
+        sshl            v26.8H,  v26.8H, v18.8H
+        sqxtun          v24.8B,  v24.8H
+        sqxtun2         v24.16B, v26.8H
+        mov             v6.16B,  v16.16B
+        st1             {v4.16B},  [x7], x2
+        mov             v4.16B,  v16.16B
+        st1             {v24.16B}, [x7], x2
+        b.ne            1b
+        ret
+.endm
+
+.macro  biweight_8      macs, macd
+        dup             v0.8B,  w5
+        dup             v1.8B,  w6
+        mov             v2.16B,  v16.16B
+        mov             v20.16B, v16.16B
+1:      subs            w3,  w3,  #2
+        ld1             {v4.8B}, [x0], x2
+        \macd           v2.8H,  v0.8B,  v4.8B
+        ld1             {v5.8B}, [x1], x2
+        \macs           v2.8H,  v1.8B,  v5.8B
+        ld1             {v6.8B}, [x0], x2
+        \macd           v20.8H, v0.8B,  v6.8B
+        ld1             {v7.8B}, [x1], x2
+        \macs           v20.8H, v1.8B,  v7.8B
+        sshl            v2.8H,  v2.8H,  v18.8H
+        sqxtun          v2.8B,  v2.8H
+        sshl            v20.8H, v20.8H, v18.8H
+        sqxtun          v4.8B,  v20.8H
+        mov             v20.16B, v16.16B
+        st1             {v2.8B}, [x7], x2
+        mov             v2.16B,  v16.16B
+        st1             {v4.8B}, [x7], x2
+        b.ne            1b
+        ret
+.endm
+
+.macro  biweight_4      macs, macd
+        dup             v0.8B,  w5
+        dup             v1.8B,  w6
+        mov             v2.16B, v16.16B
+        mov             v20.16B,v16.16B
+1:      subs            w3,  w3,  #4
+        ld1             {v4.S}[0], [x0], x2
+        ld1             {v4.S}[1], [x0], x2
+        \macd           v2.8H,  v0.8B,  v4.8B
+        ld1             {v5.S}[0], [x1], x2
+        ld1             {v5.S}[1], [x1], x2
+        \macs           v2.8H,  v1.8B,  v5.8B
+        b.lt            2f
+        ld1             {v6.S}[0], [x0], x2
+        ld1             {v6.S}[1], [x0], x2
+        \macd           v20.8H, v0.8B,  v6.8B
+        ld1             {v7.S}[0], [x1], x2
+        ld1             {v7.S}[1], [x1], x2
+        \macs           v20.8H, v1.8B,  v7.8B
+        sshl            v2.8H,  v2.8H,  v18.8H
+        sqxtun          v2.8B,  v2.8H
+        sshl            v20.8H, v20.8H, v18.8H
+        sqxtun          v4.8B,  v20.8H
+        mov             v20.16B, v16.16B
+        st1             {v2.S}[0], [x7], x2
+        st1             {v2.S}[1], [x7], x2
+        mov             v2.16B,  v16.16B
+        st1             {v4.S}[0], [x7], x2
+        st1             {v4.S}[1], [x7], x2
+        b.ne            1b
+        ret
+2:      sshl            v2.8H,  v2.8H,  v18.8H
+        sqxtun          v2.8B,  v2.8H
+        st1             {v2.S}[0], [x7], x2
+        st1             {v2.S}[1], [x7], x2
+        ret
+.endm
+
+.macro  biweight_func   w
+function ff_biweight_h264_pixels_\w\()_neon, export=1
+        lsr             w8,  w5,  #31
+        add             w7,  w7,  #1
+        eor             w8,  w8,  w6,  lsr #30
+        orr             w7,  w7,  #1
+        dup             v18.8H,   w4
+        lsl             w7,  w7,  w4
+        not             v18.16B,  v18.16B
+        dup             v16.8H,   w7
+        mov             x7,  x0
+        cbz             w8,  10f
+        subs            w8,  w8,  #1
+        b.eq            20f
+        subs            w8,  w8,  #1
+        b.eq            30f
+        b               40f
+10:     biweight_\w     umlal, umlal
+20:     neg             w5, w5
+        biweight_\w     umlal, umlsl
+30:     neg             w5, w5
+        neg             w6, w6
+        biweight_\w     umlsl, umlsl
+40:     neg             w6, w6
+        biweight_\w     umlsl, umlal
+endfunc
+.endm
+
+        biweight_func   16
+        biweight_func   8
+        biweight_func   4
+
+.macro  weight_16       add
+        dup             v0.16B,  w4
+1:      subs            w2,  w2,  #2
+        ld1             {v20.16B}, [x0], x1
+        umull           v4.8H,   v0.8B,  v20.8B
+        umull2          v6.8H,   v0.16B, v20.16B
+        ld1             {v28.16B}, [x0], x1
+        umull           v24.8H,  v0.8B,  v28.8B
+        umull2          v26.8H,  v0.16B, v28.16B
+        \add            v4.8H,   v16.8H, v4.8H
+        srshl           v4.8H,   v4.8H,  v18.8H
+        \add            v6.8H,   v16.8H, v6.8H
+        srshl           v6.8H,   v6.8H,  v18.8H
+        sqxtun          v4.8B,   v4.8H
+        sqxtun2         v4.16B,  v6.8H
+        \add            v24.8H,  v16.8H, v24.8H
+        srshl           v24.8H,  v24.8H, v18.8H
+        \add            v26.8H,  v16.8H, v26.8H
+        srshl           v26.8H,  v26.8H, v18.8H
+        sqxtun          v24.8B,  v24.8H
+        sqxtun2         v24.16B, v26.8H
+        st1             {v4.16B},  [x5], x1
+        st1             {v24.16B}, [x5], x1
+        b.ne            1b
+        ret
+.endm
+
+.macro  weight_8        add
+        dup             v0.8B,  w4
+1:      subs            w2,  w2,  #2
+        ld1             {v4.8B}, [x0], x1
+        umull           v2.8H,  v0.8B,  v4.8B
+        ld1             {v6.8B}, [x0], x1
+        umull           v20.8H, v0.8B,  v6.8B
+        \add            v2.8H,  v16.8H,  v2.8H
+        srshl           v2.8H,  v2.8H,  v18.8H
+        sqxtun          v2.8B,  v2.8H
+        \add            v20.8H, v16.8H,  v20.8H
+        srshl           v20.8H, v20.8H, v18.8H
+        sqxtun          v4.8B,  v20.8H
+        st1             {v2.8B}, [x5], x1
+        st1             {v4.8B}, [x5], x1
+        b.ne            1b
+        ret
+.endm
+
+.macro  weight_4        add
+        dup             v0.8B,  w4
+1:      subs            w2,  w2,  #4
+        ld1             {v4.S}[0], [x0], x1
+        ld1             {v4.S}[1], [x0], x1
+        umull           v2.8H,  v0.8B,  v4.8B
+        b.lt            2f
+        ld1             {v6.S}[0], [x0], x1
+        ld1             {v6.S}[1], [x0], x1
+        umull           v20.8H, v0.8B,  v6.8B
+        \add            v2.8H,  v16.8H,  v2.8H
+        srshl           v2.8H,  v2.8H,  v18.8H
+        sqxtun          v2.8B,  v2.8H
+        \add            v20.8H, v16.8H,  v20.8H
+        srshl           v20.8H, v20.8h, v18.8H
+        sqxtun          v4.8B,  v20.8H
+        st1             {v2.S}[0], [x5], x1
+        st1             {v2.S}[1], [x5], x1
+        st1             {v4.S}[0], [x5], x1
+        st1             {v4.S}[1], [x5], x1
+        b.ne            1b
+        ret
+2:      \add            v2.8H,  v16.8H,  v2.8H
+        srshl           v2.8H,  v2.8H,  v18.8H
+        sqxtun          v2.8B,  v2.8H
+        st1             {v2.S}[0], [x5], x1
+        st1             {v2.S}[1], [x5], x1
+        ret
+.endm
+
+.macro  weight_func     w
+function ff_weight_h264_pixels_\w\()_neon, export=1
+        cmp             w3,  #1
+        mov             w6,  #1
+        lsl             w5,  w5,  w3
+        dup             v16.8H,  w5
+        mov             x5,  x0
+        b.le            20f
+        sub             w6,  w6,  w3
+        dup             v18.8H,  w6
+        cmp             w4, #0
+        b.lt            10f
+        weight_\w       shadd
+10:     neg             w4,  w4
+        weight_\w       shsub
+20:     neg             w6,  w3
+        dup             v18.8H,  w6
+        cmp             w4,  #0
+        b.lt            10f
+        weight_\w       add
+10:     neg             w4,  w4
+        weight_\w       sub
+endfunc
+.endm
+
+        weight_func     16
+        weight_func     8
+        weight_func     4
+
+.macro  h264_loop_filter_start_10
+        cmp             w2,  #0
+        ldr             w6,  [x4]
+        ccmp            w3,  #0,  #0,  ne
+        lsl             w2,  w2,  #2
+        mov             v24.S[0], w6
+        lsl             w3,  w3,  #2
+        and             w8,  w6,  w6,  lsl #16
+        b.eq            1f
+        ands            w8,  w8,  w8,  lsl #8
+        b.ge            2f
+1:
+        ret
+2:
+.endm
+
+.macro h264_loop_filter_start_intra_10
+        orr             w4,  w2,  w3
+        cbnz            w4,  1f
+        ret
+1:
+        lsl             w2,  w2,  #2
+        lsl             w3,  w3,  #2
+        dup             v30.8h,   w2              // alpha
+        dup             v31.8h,   w3              // beta
+.endm
+
+.macro  h264_loop_filter_chroma_10
+        dup             v22.8h,  w2               // alpha
+        dup             v23.8h,  w3               // beta
+        uxtl            v24.8h,  v24.8b           // tc0
+
+        uabd            v26.8h,  v16.8h,  v0.8h   // abs(p0 - q0)
+        uabd            v28.8h,  v18.8h,  v16.8h  // abs(p1 - p0)
+        uabd            v30.8h,  v2.8h,   v0.8h   // abs(q1 - q0)
+        cmhi            v26.8h,  v22.8h,  v26.8h  // < alpha
+        cmhi            v28.8h,  v23.8h,  v28.8h  // < beta
+        cmhi            v30.8h,  v23.8h,  v30.8h  // < beta
+
+        and             v26.16b, v26.16b, v28.16b
+        mov             v4.16b,  v0.16b
+        sub             v4.8h,   v4.8h,   v16.8h
+        and             v26.16b, v26.16b, v30.16b
+        shl             v4.8h,   v4.8h,   #2
+        mov             x8, v26.d[0]
+        mov             x9, v26.d[1]
+        sli             v24.8h,  v24.8h,  #8
+        uxtl            v24.8h,  v24.8b
+        add             v4.8h,   v4.8h,   v18.8h
+        adds            x8,  x8,  x9
+        shl             v24.8h,  v24.8h,  #2
+
+        b.eq            9f
+
+        movi            v31.8h, #3                // (tc0 - 1) << (BIT_DEPTH - 8)) + 1
+        uqsub           v24.8h,  v24.8h,  v31.8h
+        sub             v4.8h,   v4.8h,   v2.8h
+        srshr           v4.8h,   v4.8h,   #3
+        smin            v4.8h,   v4.8h,   v24.8h
+        neg             v25.8h,  v24.8h
+        smax            v4.8h,   v4.8h,   v25.8h
+        and             v4.16b,  v4.16b,  v26.16b
+        add             v16.8h,  v16.8h,  v4.8h
+        sub             v0.8h,   v0.8h,   v4.8h
+
+        mvni            v4.8h,   #0xFC, lsl #8    // 1023 for clipping
+        movi            v5.8h,   #0
+        smin            v0.8h,   v0.8h,   v4.8h
+        smin            v16.8h,  v16.8h,  v4.8h
+        smax            v0.8h,   v0.8h,   v5.8h
+        smax            v16.8h,  v16.8h,  v5.8h
+.endm
+
+function ff_h264_v_loop_filter_chroma_neon_10, export=1
+        h264_loop_filter_start_10
+
+        mov             x10,  x0
+        sub             x0,  x0,  x1, lsl #1
+        ld1             {v18.8h}, [x0 ], x1
+        ld1             {v0.8h},  [x10], x1
+        ld1             {v16.8h}, [x0 ], x1
+        ld1             {v2.8h},  [x10]
+
+        h264_loop_filter_chroma_10
+
+        sub             x0,  x10,  x1, lsl #1
+        st1             {v16.8h}, [x0], x1
+        st1             {v0.8h},  [x0], x1
+9:
+        ret
+endfunc
+
+function ff_h264_h_loop_filter_chroma_neon_10, export=1
+        h264_loop_filter_start_10
+
+        sub             x0,  x0,  #4 // access the 2nd left pixel
+h_loop_filter_chroma420_10:
+        add             x10,  x0,  x1,  lsl #2
+        ld1             {v18.d}[0], [x0 ], x1
+        ld1             {v18.d}[1], [x10], x1
+        ld1             {v16.d}[0], [x0 ], x1
+        ld1             {v16.d}[1], [x10], x1
+        ld1             {v0.d}[0],  [x0 ], x1
+        ld1             {v0.d}[1],  [x10], x1
+        ld1             {v2.d}[0],  [x0 ], x1
+        ld1             {v2.d}[1],  [x10], x1
+
+        transpose_4x8H  v18, v16, v0, v2, v28, v29, v30, v31
+
+        h264_loop_filter_chroma_10
+
+        transpose_4x8H  v18, v16, v0, v2, v28, v29, v30, v31
+
+        sub             x0,  x10,  x1, lsl #3
+        st1             {v18.d}[0], [x0], x1
+        st1             {v16.d}[0], [x0], x1
+        st1             {v0.d}[0],  [x0], x1
+        st1             {v2.d}[0],  [x0], x1
+        st1             {v18.d}[1], [x0], x1
+        st1             {v16.d}[1], [x0], x1
+        st1             {v0.d}[1],  [x0], x1
+        st1             {v2.d}[1],  [x0], x1
+9:
+        ret
+endfunc
+
+function ff_h264_h_loop_filter_chroma422_neon_10, export=1
+        h264_loop_filter_start_10
+        add             x5,  x0,  x1
+        sub             x0,  x0,  #4
+        add             x1,  x1,  x1
+        mov             x7,  x30
+        bl              h_loop_filter_chroma420_10
+        mov             x30, x7
+        sub             x0,  x5,  #4
+        mov             v24.s[0], w6
+        b               h_loop_filter_chroma420_10
+endfunc
+
+.macro h264_loop_filter_chroma_intra_10
+        uabd            v26.8h,  v16.8h,  v17.8h  // abs(p0 - q0)
+        uabd            v27.8h,  v18.8h,  v16.8h  // abs(p1 - p0)
+        uabd            v28.8h,  v19.8h,  v17.8h  // abs(q1 - q0)
+        cmhi            v26.8h,  v30.8h,  v26.8h  // < alpha
+        cmhi            v27.8h,  v31.8h,  v27.8h  // < beta
+        cmhi            v28.8h,  v31.8h,  v28.8h  // < beta
+        and             v26.16b, v26.16b, v27.16b
+        and             v26.16b, v26.16b, v28.16b
+        mov             x2, v26.d[0]
+        mov             x3, v26.d[1]
+
+        shl             v4.8h,  v18.8h,  #1
+        shl             v6.8h,  v19.8h,  #1
+
+        adds            x2,  x2,  x3
+        b.eq            9f
+
+        add             v20.8h,  v16.8h,  v19.8h
+        add             v22.8h,  v17.8h,  v18.8h
+        add             v20.8h,  v20.8h,  v4.8h
+        add             v22.8h,  v22.8h,  v6.8h
+        urshr           v24.8h,  v20.8h,  #2
+        urshr           v25.8h,  v22.8h,  #2
+        bit             v16.16b, v24.16b, v26.16b
+        bit             v17.16b, v25.16b, v26.16b
+.endm
+
+function ff_h264_v_loop_filter_chroma_intra_neon_10, export=1
+        h264_loop_filter_start_intra_10
+        mov             x9,  x0
+        sub             x0,  x0,  x1, lsl #1
+        ld1             {v18.8h}, [x0], x1
+        ld1             {v17.8h}, [x9], x1
+        ld1             {v16.8h}, [x0], x1
+        ld1             {v19.8h}, [x9]
+
+        h264_loop_filter_chroma_intra_10
+
+        sub             x0,  x9,  x1, lsl #1
+        st1             {v16.8h}, [x0], x1
+        st1             {v17.8h}, [x0], x1
+
+9:
+        ret
+endfunc
+
+function ff_h264_h_loop_filter_chroma_mbaff_intra_neon_10, export=1
+        h264_loop_filter_start_intra_10
+
+        sub             x4,  x0,  #4
+        sub             x0,  x0,  #2
+        add             x9,  x4,  x1, lsl #1
+        ld1             {v18.8h}, [x4], x1
+        ld1             {v17.8h}, [x9], x1
+        ld1             {v16.8h}, [x4], x1
+        ld1             {v19.8h}, [x9], x1
+
+        transpose_4x8H v18, v16, v17, v19, v26, v27, v28, v29
+
+        h264_loop_filter_chroma_intra_10
+
+        st2             {v16.h,v17.h}[0], [x0], x1
+        st2             {v16.h,v17.h}[1], [x0], x1
+        st2             {v16.h,v17.h}[2], [x0], x1
+        st2             {v16.h,v17.h}[3], [x0], x1
+
+9:
+        ret
+endfunc
+
+function ff_h264_h_loop_filter_chroma_intra_neon_10, export=1
+        h264_loop_filter_start_intra_10
+        sub             x4,  x0,  #4
+        sub             x0,  x0,  #2
+h_loop_filter_chroma420_intra_10:
+        add             x9,  x4,  x1, lsl #2
+        ld1             {v18.4h},   [x4], x1
+        ld1             {v18.d}[1], [x9], x1
+        ld1             {v16.4h},   [x4], x1
+        ld1             {v16.d}[1], [x9], x1
+        ld1             {v17.4h},   [x4], x1
+        ld1             {v17.d}[1], [x9], x1
+        ld1             {v19.4h},   [x4], x1
+        ld1             {v19.d}[1], [x9], x1
+
+        transpose_4x8H v18, v16, v17, v19, v26, v27, v28, v29
+
+        h264_loop_filter_chroma_intra_10
+
+        st2             {v16.h,v17.h}[0], [x0], x1
+        st2             {v16.h,v17.h}[1], [x0], x1
+        st2             {v16.h,v17.h}[2], [x0], x1
+        st2             {v16.h,v17.h}[3], [x0], x1
+        st2             {v16.h,v17.h}[4], [x0], x1
+        st2             {v16.h,v17.h}[5], [x0], x1
+        st2             {v16.h,v17.h}[6], [x0], x1
+        st2             {v16.h,v17.h}[7], [x0], x1
+
+9:
+        ret
+endfunc
+
+function ff_h264_h_loop_filter_chroma422_intra_neon_10, export=1
+        h264_loop_filter_start_intra_10
+        sub             x4,  x0,  #4
+        add             x5,  x0,  x1, lsl #3
+        sub             x0,  x0,  #2
+        mov             x7,  x30
+        bl              h_loop_filter_chroma420_intra_10
+        mov             x4,  x9
+        sub             x0,  x5,  #2
+        mov             x30, x7
+        b               h_loop_filter_chroma420_intra_10
+endfunc
diff --git a/media/ffvpx/libavcodec/aarch64/h264idct_neon.S b/media/ffvpx/libavcodec/aarch64/h264idct_neon.S
new file mode 100644
index 0000000000..375da31d65
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/h264idct_neon.S
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+#include "neon.S"
+
+function ff_h264_idct_add_neon, export=1
+.L_ff_h264_idct_add_neon:
+        AARCH64_VALID_CALL_TARGET
+        ld1             {v0.4H, v1.4H, v2.4H, v3.4H},  [x1]
+        sxtw            x2,     w2
+        movi            v30.8H, #0
+
+        add             v4.4H,  v0.4H,  v2.4H
+        sshr            v16.4H, v1.4H,  #1
+        st1             {v30.8H},    [x1], #16
+        sshr            v17.4H, v3.4H,  #1
+        st1             {v30.8H},    [x1], #16
+        sub             v5.4H,  v0.4H,  v2.4H
+        sub             v6.4H,  v16.4H, v3.4H
+        add             v7.4H,  v1.4H,  v17.4H
+        add             v0.4H,  v4.4H,  v7.4H
+        add             v1.4H,  v5.4H,  v6.4H
+        sub             v2.4H,  v5.4H,  v6.4H
+        sub             v3.4H,  v4.4H,  v7.4H
+
+        transpose_4x4H  v0, v1, v2, v3, v4, v5, v6, v7
+
+        add             v4.4H,  v0.4H,  v2.4H
+        ld1             {v18.S}[0], [x0], x2
+        sshr            v16.4H,  v3.4H,  #1
+        sshr            v17.4H,  v1.4H,  #1
+        ld1             {v18.S}[1], [x0], x2
+        sub             v5.4H,  v0.4H,  v2.4H
+        ld1             {v19.S}[1], [x0], x2
+        add             v6.4H,  v16.4H, v1.4H
+        ins             v4.D[1],  v5.D[0]
+        sub             v7.4H,  v17.4H, v3.4H
+        ld1             {v19.S}[0], [x0], x2
+        ins             v6.D[1],  v7.D[0]
+        sub             x0,  x0,  x2, lsl #2
+        add             v0.8H,  v4.8H,  v6.8H
+        sub             v1.8H,  v4.8H,  v6.8H
+
+        srshr           v0.8H,  v0.8H,  #6
+        srshr           v1.8H,  v1.8H,  #6
+
+        uaddw           v0.8H,  v0.8H,  v18.8B
+        uaddw           v1.8H,  v1.8H,  v19.8B
+
+        sqxtun          v0.8B, v0.8H
+        sqxtun          v1.8B, v1.8H
+
+        st1             {v0.S}[0],  [x0], x2
+        st1             {v0.S}[1],  [x0], x2
+        st1             {v1.S}[1],  [x0], x2
+        st1             {v1.S}[0],  [x0], x2
+
+        sub             x1,  x1,  #32
+        ret
+endfunc
+
+function ff_h264_idct_dc_add_neon, export=1
+.L_ff_h264_idct_dc_add_neon:
+        AARCH64_VALID_CALL_TARGET
+        sxtw            x2,  w2
+        mov             w3,       #0
+        ld1r            {v2.8H},  [x1]
+        strh            w3,       [x1]
+        srshr           v2.8H,  v2.8H,  #6
+        ld1             {v0.S}[0],  [x0], x2
+        ld1             {v0.S}[1],  [x0], x2
+        uaddw           v3.8H,  v2.8H,  v0.8B
+        ld1             {v1.S}[0],  [x0], x2
+        ld1             {v1.S}[1],  [x0], x2
+        uaddw           v4.8H,  v2.8H,  v1.8B
+        sqxtun          v0.8B,  v3.8H
+        sqxtun          v1.8B,  v4.8H
+        sub             x0,  x0,  x2, lsl #2
+        st1             {v0.S}[0],  [x0], x2
+        st1             {v0.S}[1],  [x0], x2
+        st1             {v1.S}[0],  [x0], x2
+        st1             {v1.S}[1],  [x0], x2
+        ret
+endfunc
+
+function ff_h264_idct_add16_neon, export=1
+        mov             x12, x30
+        mov             x6,  x0         // dest
+        mov             x5,  x1         // block_offset
+        mov             x1,  x2         // block
+        mov             w9,  w3         // stride
+        movrel          x7,  scan8
+        mov             x10, #16
+        movrel          x13, .L_ff_h264_idct_dc_add_neon
+        movrel          x14, .L_ff_h264_idct_add_neon
+1:      mov             w2,  w9
+        ldrb            w3,  [x7], #1
+        ldrsw           x0,  [x5], #4
+        ldrb            w3,  [x4,  w3,  uxtw]
+        subs            w3,  w3,  #1
+        b.lt            2f
+        ldrsh           w3,  [x1]
+        add             x0,  x0,  x6
+        ccmp            w3,  #0,  #4,  eq
+        csel            x15, x13, x14, ne
+        blr             x15
+2:      subs            x10, x10, #1
+        add             x1,  x1,  #32
+        b.ne            1b
+        ret             x12
+endfunc
+
+function ff_h264_idct_add16intra_neon, export=1
+        mov             x12, x30
+        mov             x6,  x0         // dest
+        mov             x5,  x1         // block_offset
+        mov             x1,  x2         // block
+        mov             w9,  w3         // stride
+        movrel          x7,  scan8
+        mov             x10, #16
+        movrel          x13, .L_ff_h264_idct_dc_add_neon
+        movrel          x14, .L_ff_h264_idct_add_neon
+1:      mov             w2,  w9
+        ldrb            w3,  [x7], #1
+        ldrsw           x0,  [x5], #4
+        ldrb            w3,  [x4,  w3,  uxtw]
+        add             x0,  x0,  x6
+        cmp             w3,  #0
+        ldrsh           w3,  [x1]
+        csel            x15, x13, x14, eq
+        ccmp            w3,  #0,  #0,  eq
+        b.eq            2f
+        blr             x15
+2:      subs            x10, x10, #1
+        add             x1,  x1,  #32
+        b.ne            1b
+        ret             x12
+endfunc
+
+function ff_h264_idct_add8_neon, export=1
+        stp             x19, x20, [sp, #-0x40]!
+        mov             x12, x30
+        ldp             x6,  x15, [x0]          // dest[0], dest[1]
+        add             x5,  x1,  #16*4         // block_offset
+        add             x9,  x2,  #16*32        // block
+        mov             w19, w3                 // stride
+        movrel          x13, .L_ff_h264_idct_dc_add_neon
+        movrel          x14, .L_ff_h264_idct_add_neon
+        movrel          x7,  scan8, 16
+        mov             x10, #0
+        mov             x11, #16
+1:      mov             w2,  w19
+        ldrb            w3,  [x7, x10]          // scan8[i]
+        ldrsw           x0,  [x5, x10, lsl #2]  // block_offset[i]
+        ldrb            w3,  [x4, w3,  uxtw]    // nnzc[ scan8[i] ]
+        add             x0,  x0,  x6            // block_offset[i] + dst[j-1]
+        add             x1,  x9,  x10, lsl #5   // block + i * 16
+        cmp             w3,  #0
+        ldrsh           w3,  [x1]               // block[i*16]
+        csel            x20, x13, x14, eq
+        ccmp            w3,  #0,  #0,  eq
+        b.eq            2f
+        blr             x20
+2:      add             x10, x10, #1
+        cmp             x10, #4
+        csel            x10, x11, x10, eq     // mov x10, #16
+        csel            x6,  x15, x6,  eq
+        cmp             x10, #20
+        b.lt            1b
+        ldp             x19, x20, [sp], #0x40
+        ret             x12
+endfunc
+
+.macro  idct8x8_cols    pass
+  .if \pass == 0
+        va      .req    v18
+        vb      .req    v30
+        sshr            v18.8H, v26.8H, #1
+        add             v16.8H, v24.8H, v28.8H
+        ld1             {v30.8H, v31.8H}, [x1]
+        st1             {v19.8H}, [x1],  #16
+        st1             {v19.8H}, [x1],  #16
+        sub             v17.8H,  v24.8H, v28.8H
+        sshr            v19.8H,  v30.8H, #1
+        sub             v18.8H,  v18.8H,  v30.8H
+        add             v19.8H,  v19.8H,  v26.8H
+  .else
+        va      .req    v30
+        vb      .req    v18
+        sshr            v30.8H, v26.8H, #1
+        sshr            v19.8H, v18.8H, #1
+        add             v16.8H, v24.8H, v28.8H
+        sub             v17.8H, v24.8H, v28.8H
+        sub             v30.8H, v30.8H, v18.8H
+        add             v19.8H, v19.8H, v26.8H
+  .endif
+        add             v26.8H, v17.8H, va.8H
+        sub             v28.8H, v17.8H, va.8H
+        add             v24.8H, v16.8H, v19.8H
+        sub             vb.8H,  v16.8H, v19.8H
+        sub             v16.8H, v29.8H, v27.8H
+        add             v17.8H, v31.8H, v25.8H
+        sub             va.8H,  v31.8H, v25.8H
+        add             v19.8H, v29.8H, v27.8H
+        sub             v16.8H, v16.8H, v31.8H
+        sub             v17.8H, v17.8H, v27.8H
+        add             va.8H,  va.8H,  v29.8H
+        add             v19.8H, v19.8H, v25.8H
+        sshr            v25.8H, v25.8H, #1
+        sshr            v27.8H, v27.8H, #1
+        sshr            v29.8H, v29.8H, #1
+        sshr            v31.8H, v31.8H, #1
+        sub             v16.8H, v16.8H, v31.8H
+        sub             v17.8H, v17.8H, v27.8H
+        add             va.8H,  va.8H,  v29.8H
+        add             v19.8H, v19.8H, v25.8H
+        sshr            v25.8H, v16.8H, #2
+        sshr            v27.8H, v17.8H, #2
+        sshr            v29.8H, va.8H,  #2
+        sshr            v31.8H, v19.8H, #2
+        sub             v19.8H, v19.8H, v25.8H
+        sub             va.8H,  v27.8H, va.8H
+        add             v17.8H, v17.8H, v29.8H
+        add             v16.8H, v16.8H, v31.8H
+  .if \pass == 0
+        sub             v31.8H, v24.8H, v19.8H
+        add             v24.8H, v24.8H, v19.8H
+        add             v25.8H, v26.8H, v18.8H
+        sub             v18.8H, v26.8H, v18.8H
+        add             v26.8H, v28.8H, v17.8H
+        add             v27.8H, v30.8H, v16.8H
+        sub             v29.8H, v28.8H, v17.8H
+        sub             v28.8H, v30.8H, v16.8H
+  .else
+        sub             v31.8H, v24.8H, v19.8H
+        add             v24.8H, v24.8H, v19.8H
+        add             v25.8H, v26.8H, v30.8H
+        sub             v30.8H, v26.8H, v30.8H
+        add             v26.8H, v28.8H, v17.8H
+        sub             v29.8H, v28.8H, v17.8H
+        add             v27.8H, v18.8H, v16.8H
+        sub             v28.8H, v18.8H, v16.8H
+  .endif
+        .unreq          va
+        .unreq          vb
+.endm
+
+function ff_h264_idct8_add_neon, export=1
+.L_ff_h264_idct8_add_neon:
+        AARCH64_VALID_CALL_TARGET
+        movi            v19.8H,   #0
+        sxtw            x2,       w2
+        ld1             {v24.8H, v25.8H}, [x1]
+        st1             {v19.8H},  [x1],   #16
+        st1             {v19.8H},  [x1],   #16
+        ld1             {v26.8H, v27.8H}, [x1]
+        st1             {v19.8H},  [x1],   #16
+        st1             {v19.8H},  [x1],   #16
+        ld1             {v28.8H, v29.8H}, [x1]
+        st1             {v19.8H},  [x1],   #16
+        st1             {v19.8H},  [x1],   #16
+
+        idct8x8_cols    0
+        transpose_8x8H  v24, v25, v26, v27, v28, v29, v18, v31, v6, v7
+        idct8x8_cols    1
+
+        mov             x3,  x0
+        srshr           v24.8H, v24.8H, #6
+        ld1             {v0.8B},     [x0], x2
+        srshr           v25.8H, v25.8H, #6
+        ld1             {v1.8B},     [x0], x2
+        srshr           v26.8H, v26.8H, #6
+        ld1             {v2.8B},     [x0], x2
+        srshr           v27.8H, v27.8H, #6
+        ld1             {v3.8B},     [x0], x2
+        srshr           v28.8H, v28.8H, #6
+        ld1             {v4.8B},     [x0], x2
+        srshr           v29.8H, v29.8H, #6
+        ld1             {v5.8B},     [x0], x2
+        srshr           v30.8H, v30.8H, #6
+        ld1             {v6.8B},     [x0], x2
+        srshr           v31.8H, v31.8H, #6
+        ld1             {v7.8B},     [x0], x2
+        uaddw           v24.8H, v24.8H, v0.8B
+        uaddw           v25.8H, v25.8H, v1.8B
+        uaddw           v26.8H, v26.8H, v2.8B
+        sqxtun          v0.8B,  v24.8H
+        uaddw           v27.8H, v27.8H, v3.8B
+        sqxtun          v1.8B,  v25.8H
+        uaddw           v28.8H, v28.8H, v4.8B
+        sqxtun          v2.8B,  v26.8H
+        st1             {v0.8B},     [x3], x2
+        uaddw           v29.8H, v29.8H, v5.8B
+        sqxtun          v3.8B,  v27.8H
+        st1             {v1.8B},     [x3], x2
+        uaddw           v30.8H, v30.8H, v6.8B
+        sqxtun          v4.8B,  v28.8H
+        st1             {v2.8B},     [x3], x2
+        uaddw           v31.8H, v31.8H, v7.8B
+        sqxtun          v5.8B,  v29.8H
+        st1             {v3.8B},     [x3], x2
+        sqxtun          v6.8B,  v30.8H
+        sqxtun          v7.8B,  v31.8H
+        st1             {v4.8B},     [x3], x2
+        st1             {v5.8B},     [x3], x2
+        st1             {v6.8B},     [x3], x2
+        st1             {v7.8B},     [x3], x2
+
+        sub             x1,  x1,  #128
+        ret
+endfunc
+
+function ff_h264_idct8_dc_add_neon, export=1
+.L_ff_h264_idct8_dc_add_neon:
+        AARCH64_VALID_CALL_TARGET
+        mov             w3,       #0
+        sxtw            x2,       w2
+        ld1r            {v31.8H}, [x1]
+        strh            w3,       [x1]
+        ld1             {v0.8B},  [x0], x2
+        srshr           v31.8H, v31.8H, #6
+        ld1             {v1.8B},     [x0], x2
+        ld1             {v2.8B},     [x0], x2
+        uaddw           v24.8H, v31.8H, v0.8B
+        ld1             {v3.8B},     [x0], x2
+        uaddw           v25.8H, v31.8H, v1.8B
+        ld1             {v4.8B},     [x0], x2
+        uaddw           v26.8H, v31.8H, v2.8B
+        ld1             {v5.8B},     [x0], x2
+        uaddw           v27.8H, v31.8H, v3.8B
+        ld1             {v6.8B},     [x0], x2
+        uaddw           v28.8H, v31.8H, v4.8B
+        ld1             {v7.8B},     [x0], x2
+        uaddw           v29.8H, v31.8H, v5.8B
+        uaddw           v30.8H, v31.8H, v6.8B
+        uaddw           v31.8H, v31.8H, v7.8B
+        sqxtun          v0.8B,  v24.8H
+        sqxtun          v1.8B,  v25.8H
+        sqxtun          v2.8B,  v26.8H
+        sqxtun          v3.8B,  v27.8H
+        sub             x0,  x0,  x2, lsl #3
+        st1             {v0.8B},     [x0], x2
+        sqxtun          v4.8B,  v28.8H
+        st1             {v1.8B},     [x0], x2
+        sqxtun          v5.8B,  v29.8H
+        st1             {v2.8B},     [x0], x2
+        sqxtun          v6.8B,  v30.8H
+        st1             {v3.8B},     [x0], x2
+        sqxtun          v7.8B,  v31.8H
+        st1             {v4.8B},     [x0], x2
+        st1             {v5.8B},     [x0], x2
+        st1             {v6.8B},     [x0], x2
+        st1             {v7.8B},     [x0], x2
+        ret
+endfunc
+
+function ff_h264_idct8_add4_neon, export=1
+        mov             x12, x30
+        mov             x6,  x0
+        mov             x5,  x1
+        mov             x1,  x2
+        mov             w2,  w3
+        movrel          x7,  scan8
+        mov             w10, #16
+        movrel          x13, .L_ff_h264_idct8_dc_add_neon
+        movrel          x14, .L_ff_h264_idct8_add_neon
+1:      ldrb            w9,  [x7], #4
+        ldrsw           x0,  [x5], #16
+        ldrb            w9,  [x4, w9, UXTW]
+        subs            w9,  w9,  #1
+        b.lt            2f
+        ldrsh           w11,  [x1]
+        add             x0,  x6,  x0
+        ccmp            w11, #0,  #4,  eq
+        csel            x15, x13, x14, ne
+        blr             x15
+2:      subs            w10, w10, #4
+        add             x1,  x1,  #128
+        b.ne            1b
+        ret             x12
+endfunc
+
+const   scan8
+        .byte           4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
+        .byte           6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
+        .byte           4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
+        .byte           6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
+        .byte           4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
+        .byte           6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
+        .byte           4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
+        .byte           6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
+        .byte           4+11*8, 5+11*8, 4+12*8, 5+12*8
+        .byte           6+11*8, 7+11*8, 6+12*8, 7+12*8
+        .byte           4+13*8, 5+13*8, 4+14*8, 5+14*8
+        .byte           6+13*8, 7+13*8, 6+14*8, 7+14*8
+endconst
diff --git a/media/ffvpx/libavcodec/aarch64/h264pred_init.c b/media/ffvpx/libavcodec/aarch64/h264pred_init.c
new file mode 100644
index 0000000000..0ae8f70d23
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/h264pred_init.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/h264pred.h"
+
+void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_hor_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_plane_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_128_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_left_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_top_dc_neon(uint8_t *src, ptrdiff_t stride);
+
+void ff_pred8x8_vert_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_hor_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_plane_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_128_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_left_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_top_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l0t_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride);
+
+void ff_pred16x16_vert_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_hor_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_plane_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_top_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+
+void ff_pred8x8_vert_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_hor_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_plane_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_128_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_left_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_top_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l0t_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0lt_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l00_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0l0_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+
+static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id,
+                                        const int bit_depth,
+                                        const int chroma_format_idc)
+{
+    if (bit_depth == 8) {
+        if (chroma_format_idc <= 1) {
+            h->pred8x8[VERT_PRED8x8     ] = ff_pred8x8_vert_neon;
+            h->pred8x8[HOR_PRED8x8      ] = ff_pred8x8_hor_neon;
+            if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
+                h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon;
+            h->pred8x8[DC_128_PRED8x8   ] = ff_pred8x8_128_dc_neon;
+            if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 &&
+                codec_id != AV_CODEC_ID_VP8) {
+                h->pred8x8[DC_PRED8x8     ] = ff_pred8x8_dc_neon;
+                h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon;
+                h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon;
+                h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon;
+                h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon;
+                h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon;
+                h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon;
+            }
+        }
+
+        h->pred16x16[DC_PRED8x8     ] = ff_pred16x16_dc_neon;
+        h->pred16x16[VERT_PRED8x8   ] = ff_pred16x16_vert_neon;
+        h->pred16x16[HOR_PRED8x8    ] = ff_pred16x16_hor_neon;
+        h->pred16x16[LEFT_DC_PRED8x8] = ff_pred16x16_left_dc_neon;
+        h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_neon;
+        h->pred16x16[DC_128_PRED8x8 ] = ff_pred16x16_128_dc_neon;
+        if (codec_id != AV_CODEC_ID_SVQ3 && codec_id != AV_CODEC_ID_RV40 &&
+            codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
+            h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_neon;
+    }
+    if (bit_depth == 10) {
+        if (chroma_format_idc <= 1) {
+            h->pred8x8[VERT_PRED8x8     ] = ff_pred8x8_vert_neon_10;
+            h->pred8x8[HOR_PRED8x8      ] = ff_pred8x8_hor_neon_10;
+            if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
+                h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon_10;
+            h->pred8x8[DC_128_PRED8x8   ] = ff_pred8x8_128_dc_neon_10;
+            if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 &&
+                codec_id != AV_CODEC_ID_VP8) {
+                h->pred8x8[DC_PRED8x8     ] = ff_pred8x8_dc_neon_10;
+                h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon_10;
+                h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon_10;
+                h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon_10;
+                h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon_10;
+                h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon_10;
+                h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon_10;
+            }
+        }
+
+        h->pred16x16[DC_PRED8x8     ] = ff_pred16x16_dc_neon_10;
+        h->pred16x16[VERT_PRED8x8   ] = ff_pred16x16_vert_neon_10;
+        h->pred16x16[HOR_PRED8x8    ] = ff_pred16x16_hor_neon_10;
+        h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_neon_10;
+        if (codec_id != AV_CODEC_ID_SVQ3 && codec_id != AV_CODEC_ID_RV40 &&
+            codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
+            h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_neon_10;
+    }
+}
+
+av_cold void ff_h264_pred_init_aarch64(H264PredContext *h, int codec_id,
+                                       int bit_depth, const int chroma_format_idc)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
+        h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
+}
diff --git a/media/ffvpx/libavcodec/aarch64/h264pred_neon.S b/media/ffvpx/libavcodec/aarch64/h264pred_neon.S
new file mode 100644
index 0000000000..ea37689f34
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/h264pred_neon.S
@@ -0,0 +1,765 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+.macro ldcol.8  rd,  rs,  rt,  n=8,  hi=0
+.if \n >= 8 || \hi == 0
+        ld1             {\rd\().b}[0],  [\rs], \rt
+        ld1             {\rd\().b}[1],  [\rs], \rt
+        ld1             {\rd\().b}[2],  [\rs], \rt
+        ld1             {\rd\().b}[3],  [\rs], \rt
+.endif
+.if \n >= 8 || \hi == 1
+        ld1             {\rd\().b}[4],  [\rs], \rt
+        ld1             {\rd\().b}[5],  [\rs], \rt
+        ld1             {\rd\().b}[6],  [\rs], \rt
+        ld1             {\rd\().b}[7],  [\rs], \rt
+.endif
+.if \n == 16
+        ld1             {\rd\().b}[8],  [\rs], \rt
+        ld1             {\rd\().b}[9],  [\rs], \rt
+        ld1             {\rd\().b}[10], [\rs], \rt
+        ld1             {\rd\().b}[11], [\rs], \rt
+        ld1             {\rd\().b}[12], [\rs], \rt
+        ld1             {\rd\().b}[13], [\rs], \rt
+        ld1             {\rd\().b}[14], [\rs], \rt
+        ld1             {\rd\().b}[15], [\rs], \rt
+.endif
+.endm
+
+function ff_pred16x16_128_dc_neon, export=1
+        movi            v0.16b,  #128
+        b               .L_pred16x16_dc_end
+endfunc
+
+function ff_pred16x16_top_dc_neon, export=1
+        sub             x2,  x0,  x1
+        ld1             {v0.16b},  [x2]
+        uaddlv          h0,  v0.16b
+        rshrn           v0.8b,  v0.8h,  #4
+        dup             v0.16b, v0.b[0]
+        b               .L_pred16x16_dc_end
+endfunc
+
+function ff_pred16x16_left_dc_neon, export=1
+        sub             x2,  x0,  #1
+        ldcol.8         v0,  x2,  x1, 16
+        uaddlv          h0,  v0.16b
+        rshrn           v0.8b,  v0.8h,  #4
+        dup             v0.16b, v0.b[0]
+        b               .L_pred16x16_dc_end
+endfunc
+
+function ff_pred16x16_dc_neon, export=1
+        sub             x2,  x0,  x1
+        sub             x3,  x0,  #1
+        ld1             {v0.16b}, [x2]
+        ldcol.8         v1,  x3,  x1, 16
+        uaddlv          h0,  v0.16b
+        uaddlv          h1,  v1.16b
+        add             v0.4h,  v0.4h,  v1.4h
+        rshrn           v0.8b,  v0.8h,  #5
+        dup             v0.16b, v0.b[0]
+.L_pred16x16_dc_end:
+        mov             w3,  #8
+6:      st1             {v0.16b}, [x0], x1
+        subs            w3,  w3,  #1
+        st1             {v0.16b}, [x0], x1
+        b.ne            6b
+        ret
+endfunc
+
+function ff_pred16x16_hor_neon, export=1
+        sub             x2,  x0,  #1
+        mov             w3,  #16
+1:      ld1r            {v0.16b}, [x2], x1
+        subs            w3,  w3,  #1
+        st1             {v0.16b}, [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred16x16_vert_neon, export=1
+        sub             x2,  x0,  x1
+        add             x1,  x1,  x1
+        ld1             {v0.16b}, [x2], x1
+        mov             w3,  #8
+1:      subs            w3,  w3,  #1
+        st1             {v0.16b}, [x0], x1
+        st1             {v0.16b}, [x2], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred16x16_plane_neon, export=1
+        sub             x3,  x0,  x1
+        movrel          x4,  p16weight
+        add             x2,  x3,  #8
+        sub             x3,  x3,  #1
+        ld1             {v0.8b},  [x3]
+        ld1             {v2.8b},  [x2], x1
+        ldcol.8         v1,  x3,  x1
+        add             x3,  x3,  x1
+        ldcol.8         v3,  x3,  x1
+        rev64           v0.8b,  v0.8b
+        rev64           v1.8b,  v1.8b
+        uaddl           v7.8h,  v2.8b,  v3.8b
+        usubl           v2.8h,  v2.8b,  v0.8b
+        usubl           v3.8h,  v3.8b,  v1.8b
+        ld1             {v0.8h},     [x4]
+        mul             v2.8h,  v2.8h,  v0.8h
+        mul             v3.8h,  v3.8h,  v0.8h
+        addp            v2.8h,  v2.8h,  v3.8h
+        addp            v2.8h,  v2.8h,  v2.8h
+        addp            v2.4h,  v2.4h,  v2.4h
+        sshll           v3.4s,  v2.4h,  #2
+        saddw           v2.4s,  v3.4s,  v2.4h
+        rshrn           v4.4h,  v2.4s,  #6
+        trn2            v5.4h,  v4.4h,  v4.4h
+        add             v2.4h,  v4.4h,  v5.4h
+        shl             v3.4h,  v2.4h,  #3
+        ext             v7.16b, v7.16b, v7.16b, #14
+        sub             v3.4h,  v3.4h,  v2.4h   // 7 * (b + c)
+        add             v7.4h,  v7.4h,  v0.4h
+        shl             v2.4h,  v7.4h,  #4
+        sub             v2.4h,  v2.4h,  v3.4h
+        shl             v3.4h,  v4.4h,  #4
+        ext             v0.16b, v0.16b, v0.16b, #14
+        sub             v6.4h,  v5.4h,  v3.4h
+        mov             v0.h[0],  wzr
+        mul             v0.8h,  v0.8h,  v4.h[0]
+        dup             v1.8h,  v2.h[0]
+        dup             v2.8h,  v4.h[0]
+        dup             v3.8h,  v6.h[0]
+        shl             v2.8h,  v2.8h,  #3
+        add             v1.8h,  v1.8h,  v0.8h
+        add             v3.8h,  v3.8h,  v2.8h
+        mov             w3,  #16
+1:
+        sqshrun         v0.8b,  v1.8h,  #5
+        add             v1.8h,  v1.8h,  v2.8h
+        sqshrun2        v0.16b, v1.8h,  #5
+        add             v1.8h,  v1.8h,  v3.8h
+        subs            w3,  w3,  #1
+        st1             {v0.16b}, [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+const   p16weight, align=4
+        .short          1,2,3,4,5,6,7,8
+endconst
+const   p8weight, align=4
+        .short          1,2,3,4,1,2,3,4
+endconst
+
+function ff_pred8x8_hor_neon, export=1
+        sub             x2,  x0,  #1
+        mov             w3,  #8
+1:      ld1r            {v0.8b},  [x2], x1
+        subs            w3,  w3,  #1
+        st1             {v0.8b},  [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred8x8_vert_neon, export=1
+        sub             x2,  x0,  x1
+        lsl             x1,  x1,  #1
+        ld1             {v0.8b},  [x2], x1
+        mov             w3,  #4
+1:      subs            w3,  w3,  #1
+        st1             {v0.8b},  [x0], x1
+        st1             {v0.8b},  [x2], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred8x8_plane_neon, export=1
+        sub             x3,  x0,  x1
+        movrel          x4,  p8weight
+        movrel          x5,  p16weight
+        add             x2,  x3,  #4
+        sub             x3,  x3,  #1
+        ld1             {v0.s}[0],  [x3]
+        ld1             {v2.s}[0],  [x2], x1
+        ldcol.8         v0,  x3,  x1,  4,  hi=1
+        add             x3,  x3,  x1
+        ldcol.8         v3,  x3,  x1,  4
+        uaddl           v7.8h,  v2.8b,  v3.8b
+        rev32           v0.8b,  v0.8b
+        trn1            v2.2s,  v2.2s,  v3.2s
+        usubl           v2.8h,  v2.8b,  v0.8b
+        ld1             {v6.8h},  [x4]
+        mul             v2.8h,  v2.8h,  v6.8h
+        ld1             {v0.8h},  [x5]
+        saddlp          v2.4s,  v2.8h
+        addp            v2.4s,  v2.4s,  v2.4s
+        shl             v3.4s,  v2.4s,  #4
+        add             v2.4s,  v3.4s,  v2.4s
+        rshrn           v5.4h,  v2.4s,  #5
+        addp            v2.4h,  v5.4h,  v5.4h
+        shl             v3.4h,  v2.4h,  #1
+        add             v3.4h,  v3.4h,  v2.4h
+        rev64           v7.4h,  v7.4h
+        add             v7.4h,  v7.4h,  v0.4h
+        shl             v2.4h,  v7.4h,  #4
+        sub             v2.4h,  v2.4h,  v3.4h
+        ext             v0.16b, v0.16b, v0.16b, #14
+        mov             v0.h[0],  wzr
+        mul             v0.8h,  v0.8h,  v5.h[0]
+        dup             v1.8h,  v2.h[0]
+        dup             v2.8h,  v5.h[1]
+        add             v1.8h,  v1.8h,  v0.8h
+        mov             w3,  #8
+1:
+        sqshrun         v0.8b,  v1.8h,  #5
+        subs            w3,  w3,  #1
+        add             v1.8h,  v1.8h,  v2.8h
+        st1             {v0.8b},  [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred8x8_128_dc_neon, export=1
+        movi            v0.8b,  #128
+        movi            v1.8b,  #128
+        b               .L_pred8x8_dc_end
+endfunc
+
+function ff_pred8x8_top_dc_neon, export=1
+        sub             x2,  x0,  x1
+        ld1             {v0.8b},  [x2]
+        uaddlp          v0.4h,  v0.8b
+        addp            v0.4h,  v0.4h,  v0.4h
+        zip1            v0.8h,  v0.8h,  v0.8h
+        rshrn           v2.8b,  v0.8h,  #2
+        zip1            v0.8b,  v2.8b,  v2.8b
+        zip1            v1.8b,  v2.8b,  v2.8b
+        b               .L_pred8x8_dc_end
+endfunc
+
+function ff_pred8x8_left_dc_neon, export=1
+        sub             x2,  x0,  #1
+        ldcol.8         v0,  x2,  x1
+        uaddlp          v0.4h,  v0.8b
+        addp            v0.4h,  v0.4h,  v0.4h
+        rshrn           v2.8b,  v0.8h,  #2
+        dup             v1.8b,  v2.b[1]
+        dup             v0.8b,  v2.b[0]
+        b               .L_pred8x8_dc_end
+endfunc
+
+function ff_pred8x8_dc_neon, export=1
+        sub             x2,  x0,  x1
+        sub             x3,  x0,  #1
+        ld1             {v0.8b}, [x2]
+        ldcol.8         v1,  x3,  x1
+        uaddlp          v0.4h,  v0.8b
+        uaddlp          v1.4h,  v1.8b
+        trn1            v2.2s,  v0.2s,  v1.2s
+        trn2            v3.2s,  v0.2s,  v1.2s
+        addp            v4.4h,  v2.4h,  v3.4h
+        addp            v5.4h,  v4.4h,  v4.4h
+        rshrn           v6.8b,  v5.8h,  #3
+        rshrn           v7.8b,  v4.8h,  #2
+        dup             v0.8b,  v6.b[0]
+        dup             v2.8b,  v7.b[2]
+        dup             v1.8b,  v7.b[3]
+        dup             v3.8b,  v6.b[1]
+        zip1            v0.2s,  v0.2s,  v2.2s
+        zip1            v1.2s,  v1.2s,  v3.2s
+.L_pred8x8_dc_end:
+        mov             w3,  #4
+        add             x2,  x0,  x1,  lsl #2
+6:      subs            w3,  w3,  #1
+        st1             {v0.8b},  [x0], x1
+        st1             {v1.8b},  [x2], x1
+        b.ne            6b
+        ret
+endfunc
+
+function ff_pred8x8_l0t_dc_neon, export=1
+        sub             x2,  x0,  x1
+        sub             x3,  x0,  #1
+        ld1             {v0.8b},  [x2]
+        ldcol.8         v1,  x3,  x1,  4
+        zip1            v0.4s,  v0.4s,  v1.4s
+        uaddlp          v0.8h,  v0.16b
+        addp            v0.8h,  v0.8h,  v0.8h
+        addp            v1.4h,  v0.4h,  v0.4h
+        rshrn           v2.8b,  v0.8h,  #2
+        rshrn           v3.8b,  v1.8h,  #3
+        dup             v4.8b,  v3.b[0]
+        dup             v6.8b,  v2.b[2]
+        dup             v5.8b,  v2.b[0]
+        zip1            v0.2s,  v4.2s,  v6.2s
+        zip1            v1.2s,  v5.2s,  v6.2s
+        b               .L_pred8x8_dc_end
+endfunc
+
+function ff_pred8x8_l00_dc_neon, export=1
+        sub             x2,  x0,  #1
+        ldcol.8         v0,  x2,  x1,  4
+        uaddlp          v0.4h,  v0.8b
+        addp            v0.4h,  v0.4h,  v0.4h
+        rshrn           v0.8b,  v0.8h,  #2
+        movi            v1.8b,  #128
+        dup             v0.8b,  v0.b[0]
+        b               .L_pred8x8_dc_end
+endfunc
+
+function ff_pred8x8_0lt_dc_neon, export=1
+        add             x3,  x0,  x1,  lsl #2
+        sub             x2,  x0,  x1
+        sub             x3,  x3,  #1
+        ld1             {v0.8b},  [x2]
+        ldcol.8         v1,  x3,  x1,  4,  hi=1
+        zip1            v0.4s,  v0.4s,  v1.4s
+        uaddlp          v0.8h,  v0.16b
+        addp            v0.8h,  v0.8h,  v0.8h
+        addp            v1.4h,  v0.4h,  v0.4h
+        rshrn           v2.8b,  v0.8h,  #2
+        rshrn           v3.8b,  v1.8h,  #3
+        dup             v4.8b,  v2.b[0]
+        dup             v5.8b,  v2.b[3]
+        dup             v6.8b,  v2.b[2]
+        dup             v7.8b,  v3.b[1]
+        zip1            v0.2s,  v4.2s,  v6.2s
+        zip1            v1.2s,  v5.2s,  v7.2s
+        b               .L_pred8x8_dc_end
+endfunc
+
+function ff_pred8x8_0l0_dc_neon, export=1
+        add             x2,  x0,  x1,  lsl #2
+        sub             x2,  x2,  #1
+        ldcol.8         v1,  x2,  x1,  4
+        uaddlp          v2.4h,  v1.8b
+        addp            v2.4h,  v2.4h,  v2.4h
+        rshrn           v1.8b,  v2.8h,  #2
+        movi            v0.8b,  #128
+        dup             v1.8b,  v1.b[0]
+        b               .L_pred8x8_dc_end
+endfunc
+
+.macro ldcol.16  rd,  rs,  rt,  n=4,  hi=0
+.if \n >= 4 && \hi == 0
+        ld1             {\rd\().h}[0],  [\rs], \rt
+        ld1             {\rd\().h}[1],  [\rs], \rt
+        ld1             {\rd\().h}[2],  [\rs], \rt
+        ld1             {\rd\().h}[3],  [\rs], \rt
+.endif
+.if \n == 8 || \hi == 1
+        ld1             {\rd\().h}[4],  [\rs], \rt
+        ld1             {\rd\().h}[5],  [\rs], \rt
+        ld1             {\rd\().h}[6],  [\rs], \rt
+        ld1             {\rd\().h}[7],  [\rs], \rt
+.endif
+.endm
+
+// slower than C
+/*
+function ff_pred16x16_128_dc_neon_10, export=1
+        movi            v0.8h, #2, lsl #8 // 512, 1 << (bit_depth - 1)
+
+        b               .L_pred16x16_dc_10_end
+endfunc
+*/
+
+function ff_pred16x16_top_dc_neon_10, export=1
+        sub             x2,  x0,  x1
+
+        ld1             {v0.8h, v1.8h}, [x2]
+
+        add             v0.8h, v0.8h, v1.8h
+        addv            h0, v0.8h
+
+        urshr           v0.4h,  v0.4h,  #4
+        dup             v0.8h, v0.h[0]
+        b               .L_pred16x16_dc_10_end
+endfunc
+
+// slower than C
+/*
+function ff_pred16x16_left_dc_neon_10, export=1
+        sub             x2,  x0,  #2 // access to the "left" column
+        ldcol.16        v0,  x2,  x1,  8
+        ldcol.16        v1,  x2,  x1,  8 // load "left" column
+
+        add             v0.8h, v0.8h, v1.8h
+        addv            h0,  v0.8h
+
+        urshr           v0.4h,  v0.4h,  #4
+        dup             v0.8h, v0.h[0]
+        b               .L_pred16x16_dc_10_end
+endfunc
+*/
+
+function ff_pred16x16_dc_neon_10, export=1
+        sub             x2,  x0,  x1 // access to the "top" row
+        sub             x3,  x0,  #2 // access to the "left" column
+
+        ld1             {v0.8h, v1.8h}, [x2]
+        ldcol.16        v2,  x3,  x1,  8
+        ldcol.16        v3,  x3,  x1,  8 // load pixels in "top" row and "left" col
+
+        add             v0.8h, v0.8h, v1.8h
+        add             v2.8h, v2.8h, v3.8h
+        add             v0.8h, v0.8h, v2.8h
+        addv            h0, v0.8h
+
+        urshr           v0.4h,  v0.4h,  #5
+        dup             v0.8h,  v0.h[0]
+.L_pred16x16_dc_10_end:
+        mov             v1.16b,  v0.16b
+        mov             w3,  #8
+6:      st1             {v0.8h, v1.8h}, [x0], x1
+        subs            w3,  w3,  #1
+        st1             {v0.8h, v1.8h}, [x0], x1
+        b.ne            6b
+        ret
+endfunc
+
+function ff_pred16x16_hor_neon_10, export=1
+        sub             x2,  x0,  #2
+        add             x3,  x0,  #16
+
+        mov             w4,  #16
+1:      ld1r            {v0.8h},  [x2],  x1
+        subs            w4,  w4,  #1
+        st1             {v0.8h},  [x0],  x1
+        st1             {v0.8h},  [x3],  x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred16x16_vert_neon_10, export=1
+        sub             x2,  x0,  x1
+        add             x1,  x1,  x1
+
+        ld1             {v0.8h, v1.8h},  [x2],  x1
+
+        mov             w3,  #8
+1:      subs            w3,  w3,  #1
+        st1             {v0.8h, v1.8h},  [x0],  x1
+        st1             {v0.8h, v1.8h},  [x2],  x1
+
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred16x16_plane_neon_10, export=1
+        sub             x3,  x0,  x1
+        movrel          x4,  p16weight
+        add             x2,  x3,  #16
+        sub             x3,  x3,  #2
+        ld1             {v0.8h},  [x3]
+        ld1             {v2.8h},  [x2], x1
+        ldcol.16        v1,  x3,  x1, 8
+        add             x3,  x3,  x1
+        ldcol.16        v3,  x3,  x1, 8
+
+        rev64           v16.8h,  v0.8h
+        rev64           v17.8h,  v1.8h
+        ext             v0.16b, v16.16b, v16.16b, #8
+        ext             v1.16b, v17.16b, v17.16b, #8
+
+        add             v7.8h,  v2.8h,  v3.8h
+        sub             v2.8h,  v2.8h,  v0.8h
+        sub             v3.8h,  v3.8h,  v1.8h
+        ld1             {v0.8h},     [x4]
+        mul             v2.8h,  v2.8h,  v0.8h
+        mul             v3.8h,  v3.8h,  v0.8h
+        addp            v2.8h,  v2.8h,  v3.8h
+        addp            v2.8h,  v2.8h,  v2.8h
+        addp            v2.4h,  v2.4h,  v2.4h
+        sshll           v3.4s,  v2.4h,  #2
+        saddw           v2.4s,  v3.4s,  v2.4h
+        rshrn           v4.4h,  v2.4s,  #6
+        trn2            v5.4h,  v4.4h,  v4.4h
+        add             v2.4h,  v4.4h,  v5.4h
+        shl             v3.4h,  v2.4h,  #3
+        ext             v7.16b, v7.16b, v7.16b, #14
+        sub             v3.4h,  v3.4h,  v2.4h   // 7 * (b + c)
+        add             v7.4h,  v7.4h,  v0.4h
+        shl             v2.4h,  v7.4h,  #4
+        ssubl           v2.4s,  v2.4h,  v3.4h
+        shl             v3.4h,  v4.4h,  #4
+        ext             v0.16b, v0.16b, v0.16b, #14
+        ssubl           v6.4s,  v5.4h,  v3.4h
+
+        mov             v0.h[0],  wzr
+        mul             v0.8h,  v0.8h,  v4.h[0]
+        dup             v16.4s, v2.s[0]
+        dup             v17.4s, v2.s[0]
+        dup             v2.8h,  v4.h[0]
+        dup             v3.4s,  v6.s[0]
+        shl             v2.8h,  v2.8h,  #3
+        saddw           v16.4s, v16.4s, v0.4h
+        saddw2          v17.4s, v17.4s, v0.8h
+        saddw           v3.4s,  v3.4s,  v2.4h
+
+        mov             w3,      #16
+        mvni            v4.8h,   #0xFC, lsl #8 // 1023 for clipping
+1:
+        sqshrun         v0.4h,  v16.4s, #5
+        sqshrun2        v0.8h,  v17.4s, #5
+        saddw           v16.4s, v16.4s, v2.4h
+        saddw           v17.4s, v17.4s, v2.4h
+        sqshrun         v1.4h,  v16.4s, #5
+        sqshrun2        v1.8h,  v17.4s, #5
+        add             v16.4s, v16.4s, v3.4s
+        add             v17.4s, v17.4s, v3.4s
+
+        subs            w3,  w3,  #1
+
+        smin            v0.8h,  v0.8h,  v4.8h
+        smin            v1.8h,  v1.8h,  v4.8h
+
+        st1             {v0.8h, v1.8h}, [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred8x8_hor_neon_10, export=1
+        sub             x2,  x0,  #2
+        mov             w3,  #8
+
+1:      ld1r            {v0.8h},  [x2], x1
+        subs            w3,  w3,  #1
+        st1             {v0.8h},  [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred8x8_vert_neon_10, export=1
+        sub             x2,  x0,  x1
+        lsl             x1,  x1,  #1
+
+        ld1             {v0.8h},  [x2], x1
+        mov             w3,  #4
+1:      subs            w3,  w3,  #1
+        st1             {v0.8h},  [x0], x1
+        st1             {v0.8h},  [x2], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred8x8_plane_neon_10, export=1
+        sub             x3,  x0,  x1
+        movrel          x4,  p8weight
+        movrel          x5,  p16weight
+        add             x2,  x3,  #8
+        sub             x3,  x3,  #2
+        ld1             {v0.d}[0],  [x3]
+        ld1             {v2.d}[0],  [x2], x1
+        ldcol.16        v0,  x3,  x1,  hi=1
+        add             x3,  x3,  x1
+        ldcol.16        v3,  x3,  x1,  4
+        add             v7.8h,  v2.8h,  v3.8h
+        rev64           v0.8h,  v0.8h
+        trn1            v2.2d,  v2.2d,  v3.2d
+        sub             v2.8h,  v2.8h,  v0.8h
+        ld1             {v6.8h},  [x4]
+        mul             v2.8h,  v2.8h,  v6.8h
+        ld1             {v0.8h},  [x5]
+        saddlp          v2.4s,  v2.8h
+        addp            v2.4s,  v2.4s,  v2.4s
+        shl             v3.4s,  v2.4s,  #4
+        add             v2.4s,  v3.4s,  v2.4s
+        rshrn           v5.4h,  v2.4s,  #5
+        addp            v2.4h,  v5.4h,  v5.4h
+        shl             v3.4h,  v2.4h,  #1
+        add             v3.4h,  v3.4h,  v2.4h
+        rev64           v7.4h,  v7.4h
+        add             v7.4h,  v7.4h,  v0.4h
+        shl             v2.4h,  v7.4h,  #4
+        ssubl           v2.4s,  v2.4h,  v3.4h
+        ext             v0.16b, v0.16b, v0.16b, #14
+        mov             v0.h[0],  wzr
+        mul             v0.8h,  v0.8h,  v5.h[0]
+        dup             v1.4s,  v2.s[0]
+        dup             v2.4s,  v2.s[0]
+        dup             v3.8h,  v5.h[1]
+        saddw           v1.4s,  v1.4s,  v0.4h
+        saddw2          v2.4s,  v2.4s,  v0.8h
+        mov             w3,  #8
+        mvni            v4.8h,  #0xFC,  lsl #8 // 1023 for clipping
+1:
+        sqshrun         v0.4h,  v1.4s,  #5
+        sqshrun2        v0.8h,  v2.4s,  #5
+
+        saddw           v1.4s,  v1.4s,  v3.4h
+        saddw           v2.4s,  v2.4s,  v3.4h
+
+        subs            w3,  w3,  #1
+
+        smin            v0.8h,  v0.8h,  v4.8h
+
+        st1             {v0.8h},  [x0],  x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred8x8_128_dc_neon_10, export=1
+        movi            v0.8h,  #2, lsl #8      // 512, 1 << (bit_depth - 1)
+        movi            v1.8h,  #2, lsl #8
+        b               .L_pred8x8_dc_10_end
+endfunc
+
+function ff_pred8x8_top_dc_neon_10, export=1
+        sub             x2,  x0,  x1
+        ld1             {v0.8h},  [x2]
+
+        addp            v0.8h,  v0.8h,  v0.8h
+        addp            v0.4h,  v0.4h,  v0.4h
+        zip1            v0.4h,  v0.4h,  v0.4h
+        urshr           v2.4h,  v0.4h,  #2
+        zip1            v0.8h,  v2.8h,  v2.8h
+        zip1            v1.8h,  v2.8h,  v2.8h
+        b               .L_pred8x8_dc_10_end
+endfunc
+
+function ff_pred8x8_left_dc_neon_10, export=1
+        sub             x2,  x0,  #2
+        ldcol.16        v0,  x2,  x1,  8
+
+        addp            v0.8h,  v0.8h,  v0.8h
+        addp            v0.4h,  v0.4h,  v0.4h
+        urshr           v2.4h,  v0.4h,  #2
+        dup             v1.8h,  v2.h[1]
+        dup             v0.8h,  v2.h[0]
+        b               .L_pred8x8_dc_10_end
+endfunc
+
+function ff_pred8x8_dc_neon_10, export=1
+        sub             x2,  x0,  x1
+        sub             x3,  x0,  #2
+
+        ld1             {v0.8h}, [x2]
+        ldcol.16        v1,  x3,  x1, 8
+
+        addp            v0.8h,  v0.8h, v0.8h
+        addp            v1.8h,  v1.8h, v1.8h
+        trn1            v2.2s,  v0.2s,  v1.2s
+        trn2            v3.2s,  v0.2s,  v1.2s
+        addp            v4.4h,  v2.4h,  v3.4h
+        addp            v5.4h,  v4.4h,  v4.4h
+        urshr           v6.4h,  v5.4h,  #3
+        urshr           v7.4h,  v4.4h,  #2
+        dup             v0.8h,  v6.h[0]
+        dup             v2.8h,  v7.h[2]
+        dup             v1.8h,  v7.h[3]
+        dup             v3.8h,  v6.h[1]
+        zip1            v0.2d,  v0.2d,  v2.2d
+        zip1            v1.2d,  v1.2d,  v3.2d
+.L_pred8x8_dc_10_end:
+        mov             w3,  #4
+        add             x2,  x0,  x1,  lsl #2
+
+6:      st1             {v0.8h},  [x0], x1
+        subs            w3,  w3,  #1
+        st1             {v1.8h},  [x2], x1
+        b.ne            6b
+        ret
+endfunc
+
+function ff_pred8x8_l0t_dc_neon_10, export=1
+        sub             x2,  x0,  x1
+        sub             x3,  x0,  #2
+
+        ld1             {v0.8h},  [x2]
+        ldcol.16        v1,  x3,  x1, 4
+
+        addp            v0.8h,  v0.8h,  v0.8h
+        addp            v1.4h,  v1.4h,  v1.4h
+        addp            v0.4h,  v0.4h,  v0.4h
+        addp            v1.4h,  v1.4h,  v1.4h
+        add             v1.4h,  v1.4h,  v0.4h
+
+        urshr           v2.4h,  v0.4h,  #2
+        urshr           v3.4h,  v1.4h,  #3      // the pred4x4 part
+
+        dup             v4.4h,  v3.h[0]
+        dup             v5.4h,  v2.h[0]
+        dup             v6.4h,  v2.h[1]
+
+        zip1            v0.2d,  v4.2d,  v6.2d
+        zip1            v1.2d,  v5.2d,  v6.2d
+        b               .L_pred8x8_dc_10_end
+endfunc
+
+function ff_pred8x8_l00_dc_neon_10, export=1
+        sub             x2,  x0,  #2
+
+        ldcol.16        v0,  x2,  x1,  4
+
+        addp            v0.4h,  v0.4h,  v0.4h
+        addp            v0.4h,  v0.4h,  v0.4h
+        urshr           v0.4h,  v0.4h,  #2
+
+        movi            v1.8h,  #2, lsl #8      // 512
+        dup             v0.8h,  v0.h[0]
+        b               .L_pred8x8_dc_10_end
+endfunc
+
+function ff_pred8x8_0lt_dc_neon_10, export=1
+        add             x3,  x0,  x1,  lsl #2
+        sub             x2,  x0,  x1
+        sub             x3,  x3,  #2
+
+        ld1             {v0.8h},  [x2]
+        ldcol.16        v1,  x3,  x1,  hi=1
+
+        addp            v0.8h,  v0.8h,  v0.8h
+        addp            v1.8h,  v1.8h,  v1.8h
+        addp            v0.4h,  v0.4h,  v0.4h
+        addp            v1.4h,  v1.4h,  v1.4h
+        zip1            v0.2s,  v0.2s,  v1.2s
+        add             v1.4h,  v0.4h,  v1.4h
+
+        urshr           v2.4h,  v0.4h,  #2
+        urshr           v3.4h,  v1.4h,  #3
+
+        dup             v4.4h,  v2.h[0]
+        dup             v5.4h,  v2.h[3]
+        dup             v6.4h,  v2.h[1]
+        dup             v7.4h,  v3.h[1]
+
+        zip1            v0.2d,  v4.2d,  v6.2d
+        zip1            v1.2d,  v5.2d,  v7.2d
+        b               .L_pred8x8_dc_10_end
+endfunc
+
+function ff_pred8x8_0l0_dc_neon_10, export=1
+        add             x2,  x0,  x1,  lsl #2
+        sub             x2,  x2,  #2
+
+        ldcol.16        v1,  x2,  x1,  4
+
+        addp            v2.8h,  v1.8h,  v1.8h
+        addp            v2.4h,  v2.4h,  v2.4h
+        urshr           v1.4h,  v2.4h,  #2
+
+        movi            v0.8h,  #2,  lsl #8     // 512
+        dup             v1.8h,  v1.h[0]
+        b               .L_pred8x8_dc_10_end
+endfunc
diff --git a/media/ffvpx/libavcodec/aarch64/hpeldsp_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/hpeldsp_init_aarch64.c
new file mode 100644
index 0000000000..144ae2bcc4
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/hpeldsp_init_aarch64.c
@@ -0,0 +1,123 @@
+/*
+ * ARM NEON optimised DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/hpeldsp.h"
+
+void     ff_put_pixels16_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void  ff_put_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void  ff_put_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void ff_put_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void      ff_put_pixels8_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void   ff_put_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void   ff_put_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void  ff_put_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+
+void  ff_put_pixels16_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void  ff_put_pixels16_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void   ff_put_pixels8_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void   ff_put_pixels8_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void  ff_put_pixels8_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+
+void     ff_avg_pixels16_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void  ff_avg_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void  ff_avg_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void ff_avg_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void      ff_avg_pixels8_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void   ff_avg_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void   ff_avg_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void  ff_avg_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+
+void  ff_avg_pixels16_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void  ff_avg_pixels16_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+
+av_cold void ff_hpeldsp_init_aarch64(HpelDSPContext *c, int flags)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
+        c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
+        c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
+        c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
+        c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
+        c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
+        c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
+        c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;
+
+        c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
+        c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
+        c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
+        c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
+        c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
+        c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
+        c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
+        c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
+
+        c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
+        c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_neon;
+        c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_neon;
+        c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_neon;
+        c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
+        c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_neon;
+        c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_neon;
+        c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_neon;
+
+        c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_neon;
+        c->avg_no_rnd_pixels_tab[1] = ff_avg_pixels16_x2_no_rnd_neon;
+        c->avg_no_rnd_pixels_tab[2] = ff_avg_pixels16_y2_no_rnd_neon;
+        c->avg_no_rnd_pixels_tab[3] = ff_avg_pixels16_xy2_no_rnd_neon;
+    }
+}
diff --git a/media/ffvpx/libavcodec/aarch64/hpeldsp_neon.S b/media/ffvpx/libavcodec/aarch64/hpeldsp_neon.S
new file mode 100644
index 0000000000..a491c173bb
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/hpeldsp_neon.S
@@ -0,0 +1,397 @@
+/*
+ * ARM NEON optimised DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+.macro  pixels16        rnd=1, avg=0
+  .if \avg
+        mov             x12, x0
+  .endif
+1:      ld1             {v0.16B},  [x1], x2
+        ld1             {v1.16B},  [x1], x2
+        ld1             {v2.16B},  [x1], x2
+        ld1             {v3.16B},  [x1], x2
+  .if \avg
+        ld1             {v4.16B},  [x12], x2
+        urhadd          v0.16B,  v0.16B,  v4.16B
+        ld1             {v5.16B},  [x12], x2
+        urhadd          v1.16B,  v1.16B,  v5.16B
+        ld1             {v6.16B},  [x12], x2
+        urhadd          v2.16B,  v2.16B,  v6.16B
+        ld1             {v7.16B},  [x12], x2
+        urhadd          v3.16B,  v3.16B,  v7.16B
+  .endif
+        subs            w3,  w3,  #4
+        st1             {v0.16B},  [x0], x2
+        st1             {v1.16B},  [x0], x2
+        st1             {v2.16B},  [x0], x2
+        st1             {v3.16B},  [x0], x2
+        b.ne            1b
+        ret
+.endm
+
+.macro  pixels16_x2     rnd=1, avg=0
+1:      ld1             {v0.16B, v1.16B}, [x1], x2
+        ld1             {v2.16B, v3.16B}, [x1], x2
+        subs            w3,  w3,  #2
+        ext             v1.16B,  v0.16B,  v1.16B,  #1
+        avg             v0.16B,  v0.16B,  v1.16B
+        ext             v3.16B,  v2.16B,  v3.16B,  #1
+        avg             v2.16B,  v2.16B,  v3.16B
+  .if \avg
+        ld1             {v1.16B}, [x0], x2
+        ld1             {v3.16B}, [x0]
+        urhadd          v0.16B,  v0.16B,  v1.16B
+        urhadd          v2.16B,  v2.16B,  v3.16B
+        sub             x0,  x0,  x2
+  .endif
+        st1             {v0.16B}, [x0], x2
+        st1             {v2.16B}, [x0], x2
+        b.ne            1b
+        ret
+.endm
+
+.macro  pixels16_y2     rnd=1, avg=0
+        sub             w3,  w3,  #2
+        ld1             {v0.16B}, [x1], x2
+        ld1             {v1.16B}, [x1], x2
+1:      subs            w3,  w3,  #2
+        avg             v2.16B,  v0.16B,  v1.16B
+        ld1             {v0.16B}, [x1], x2
+        avg             v3.16B,  v0.16B,  v1.16B
+        ld1             {v1.16B}, [x1], x2
+  .if \avg
+        ld1             {v4.16B}, [x0], x2
+        ld1             {v5.16B}, [x0]
+        urhadd          v2.16B,  v2.16B,  v4.16B
+        urhadd          v3.16B,  v3.16B,  v5.16B
+        sub             x0,  x0,  x2
+  .endif
+        st1             {v2.16B}, [x0], x2
+        st1             {v3.16B}, [x0], x2
+        b.ne            1b
+
+        avg             v2.16B,  v0.16B,  v1.16B
+        ld1             {v0.16B}, [x1], x2
+        avg             v3.16B,  v0.16B,  v1.16B
+  .if \avg
+        ld1             {v4.16B}, [x0], x2
+        ld1             {v5.16B}, [x0]
+        urhadd          v2.16B,  v2.16B,  v4.16B
+        urhadd          v3.16B,  v3.16B,  v5.16B
+        sub             x0,  x0,  x2
+  .endif
+        st1             {v2.16B},     [x0], x2
+        st1             {v3.16B},     [x0], x2
+
+        ret
+.endm
+
+.macro  pixels16_xy2    rnd=1, avg=0
+        sub             w3,  w3,  #2
+        ld1             {v0.16B, v1.16B}, [x1], x2
+        ld1             {v4.16B, v5.16B}, [x1], x2
+NRND    movi            v26.8H, #1
+        ext             v1.16B,  v0.16B,  v1.16B,  #1
+        ext             v5.16B,  v4.16B,  v5.16B,  #1
+        uaddl           v16.8H,  v0.8B,   v1.8B
+        uaddl2          v20.8H,  v0.16B,  v1.16B
+        uaddl           v18.8H,  v4.8B,   v5.8B
+        uaddl2          v22.8H,  v4.16B,  v5.16B
+1:      subs            w3,  w3,  #2
+        ld1             {v0.16B, v1.16B}, [x1], x2
+        add             v24.8H,  v16.8H,  v18.8H
+NRND    add             v24.8H,  v24.8H,  v26.8H
+        ext             v30.16B, v0.16B,  v1.16B,  #1
+        add             v1.8H,   v20.8H,  v22.8H
+        mshrn           v28.8B,  v24.8H,  #2
+NRND    add             v1.8H,   v1.8H,   v26.8H
+        mshrn2          v28.16B, v1.8H,   #2
+  .if \avg
+        ld1             {v16.16B},        [x0]
+        urhadd          v28.16B, v28.16B, v16.16B
+  .endif
+        uaddl           v16.8H,  v0.8B,   v30.8B
+        ld1             {v2.16B, v3.16B}, [x1], x2
+        uaddl2          v20.8H,  v0.16B,  v30.16B
+        st1             {v28.16B},        [x0], x2
+        add             v24.8H,  v16.8H,  v18.8H
+NRND    add             v24.8H,  v24.8H,  v26.8H
+        ext             v3.16B,  v2.16B,  v3.16B,  #1
+        add             v0.8H,   v20.8H,  v22.8H
+        mshrn           v30.8B,  v24.8H,  #2
+NRND    add             v0.8H,   v0.8H,   v26.8H
+        mshrn2          v30.16B, v0.8H,   #2
+  .if \avg
+        ld1             {v18.16B},        [x0]
+        urhadd          v30.16B, v30.16B, v18.16B
+  .endif
+        uaddl           v18.8H,   v2.8B,  v3.8B
+        uaddl2          v22.8H,   v2.16B, v3.16B
+        st1             {v30.16B},        [x0], x2
+        b.gt            1b
+
+        ld1             {v0.16B, v1.16B}, [x1], x2
+        add             v24.8H,  v16.8H,  v18.8H
+NRND    add             v24.8H,  v24.8H,  v26.8H
+        ext             v30.16B, v0.16B,  v1.16B,  #1
+        add             v1.8H,   v20.8H,  v22.8H
+        mshrn           v28.8B,  v24.8H,  #2
+NRND    add             v1.8H,   v1.8H,   v26.8H
+        mshrn2          v28.16B, v1.8H,   #2
+  .if \avg
+        ld1             {v16.16B},        [x0]
+        urhadd          v28.16B, v28.16B, v16.16B
+  .endif
+        uaddl           v16.8H,  v0.8B,   v30.8B
+        uaddl2          v20.8H,  v0.16B,  v30.16B
+        st1             {v28.16B},        [x0], x2
+        add             v24.8H,  v16.8H,  v18.8H
+NRND    add             v24.8H,  v24.8H,  v26.8H
+        add             v0.8H,   v20.8H,  v22.8H
+        mshrn           v30.8B,  v24.8H,  #2
+NRND    add             v0.8H,   v0.8H,   v26.8H
+        mshrn2          v30.16B, v0.8H,   #2
+  .if \avg
+        ld1             {v18.16B},        [x0]
+        urhadd          v30.16B, v30.16B, v18.16B
+  .endif
+        st1             {v30.16B},        [x0], x2
+
+        ret
+.endm
+
+.macro  pixels8         rnd=1, avg=0
+1:      ld1             {v0.8B}, [x1], x2
+        ld1             {v1.8B}, [x1], x2
+        ld1             {v2.8B}, [x1], x2
+        ld1             {v3.8B}, [x1], x2
+  .if \avg
+        ld1             {v4.8B}, [x0], x2
+        urhadd          v0.8B,  v0.8B,  v4.8B
+        ld1             {v5.8B}, [x0], x2
+        urhadd          v1.8B,  v1.8B,  v5.8B
+        ld1             {v6.8B}, [x0], x2
+        urhadd          v2.8B,  v2.8B,  v6.8B
+        ld1             {v7.8B}, [x0], x2
+        urhadd          v3.8B,  v3.8B,  v7.8B
+        sub             x0,  x0,  x2,  lsl #2
+  .endif
+        subs            w3,  w3,  #4
+        st1             {v0.8B}, [x0], x2
+        st1             {v1.8B}, [x0], x2
+        st1             {v2.8B}, [x0], x2
+        st1             {v3.8B}, [x0], x2
+        b.ne            1b
+        ret
+.endm
+
+.macro  pixels8_x2      rnd=1, avg=0
+1:      ld1             {v0.8B, v1.8B}, [x1], x2
+        ext             v1.8B,  v0.8B,  v1.8B,  #1
+        ld1             {v2.8B, v3.8B}, [x1], x2
+        ext             v3.8B,  v2.8B,  v3.8B,  #1
+        subs            w3,  w3,  #2
+        avg             v0.8B,   v0.8B,   v1.8B
+        avg             v2.8B,   v2.8B,   v3.8B
+  .if \avg
+        ld1             {v4.8B},     [x0], x2
+        ld1             {v5.8B},     [x0]
+        urhadd          v0.8B,   v0.8B,   v4.8B
+        urhadd          v2.8B,   v2.8B,   v5.8B
+        sub             x0,  x0,  x2
+  .endif
+        st1             {v0.8B}, [x0], x2
+        st1             {v2.8B}, [x0], x2
+        b.ne            1b
+        ret
+.endm
+
+.macro  pixels8_y2      rnd=1, avg=0
+        sub             w3,  w3,  #2
+        ld1             {v0.8B},  [x1], x2
+        ld1             {v1.8B},  [x1], x2
+1:      subs            w3,  w3,  #2
+        avg             v4.8B,  v0.8B,  v1.8B
+        ld1             {v0.8B},  [x1], x2
+        avg             v5.8B,  v0.8B,  v1.8B
+        ld1             {v1.8B},  [x1], x2
+  .if \avg
+        ld1             {v2.8B},     [x0], x2
+        ld1             {v3.8B},     [x0]
+        urhadd          v4.8B,  v4.8B,  v2.8B
+        urhadd          v5.8B,  v5.8B,  v3.8B
+        sub             x0,  x0,  x2
+  .endif
+        st1             {v4.8B},     [x0], x2
+        st1             {v5.8B},     [x0], x2
+        b.ne            1b
+
+        avg             v4.8B,  v0.8B,  v1.8B
+        ld1             {v0.8B},  [x1], x2
+        avg             v5.8B,  v0.8B,  v1.8B
+  .if \avg
+        ld1             {v2.8B},     [x0], x2
+        ld1             {v3.8B},     [x0]
+        urhadd          v4.8B,  v4.8B,  v2.8B
+        urhadd          v5.8B,  v5.8B,  v3.8B
+        sub             x0,  x0,  x2
+  .endif
+        st1             {v4.8B},     [x0], x2
+        st1             {v5.8B},     [x0], x2
+
+        ret
+.endm
+
+.macro  pixels8_xy2     rnd=1, avg=0
+        sub             w3,  w3,  #2
+        ld1             {v0.16B},     [x1], x2
+        ld1             {v1.16B},     [x1], x2
+NRND    movi            v19.8H, #1
+        ext             v4.16B,  v0.16B,  v4.16B,  #1
+        ext             v6.16B,  v1.16B,  v6.16B,  #1
+        uaddl           v16.8H,  v0.8B,  v4.8B
+        uaddl           v17.8H,  v1.8B,  v6.8B
+1:      subs            w3,  w3,  #2
+        ld1             {v0.16B},     [x1], x2
+        add             v18.8H, v16.8H,  v17.8H
+        ext             v4.16B,  v0.16B,  v4.16B,  #1
+NRND    add             v18.8H, v18.8H, v19.8H
+        uaddl           v16.8H,  v0.8B,  v4.8B
+        mshrn           v5.8B,  v18.8H, #2
+        ld1             {v1.16B},     [x1], x2
+        add             v18.8H, v16.8H,  v17.8H
+  .if \avg
+        ld1             {v7.8B},     [x0]
+        urhadd          v5.8B,  v5.8B,  v7.8B
+  .endif
+NRND    add             v18.8H, v18.8H, v19.8H
+        st1             {v5.8B},     [x0], x2
+        mshrn           v7.8B,  v18.8H, #2
+  .if \avg
+        ld1             {v5.8B},     [x0]
+        urhadd          v7.8B,  v7.8B,  v5.8B
+  .endif
+        ext             v6.16B,  v1.16B,  v6.16B,  #1
+        uaddl           v17.8H,  v1.8B,   v6.8B
+        st1             {v7.8B},     [x0], x2
+        b.gt            1b
+
+        ld1             {v0.16B},     [x1], x2
+        add             v18.8H, v16.8H, v17.8H
+        ext             v4.16B, v0.16B, v4.16B,  #1
+NRND    add             v18.8H, v18.8H, v19.8H
+        uaddl           v16.8H,  v0.8B, v4.8B
+        mshrn           v5.8B,  v18.8H, #2
+        add             v18.8H, v16.8H, v17.8H
+  .if \avg
+        ld1             {v7.8B},     [x0]
+        urhadd          v5.8B,  v5.8B,  v7.8B
+  .endif
+NRND    add             v18.8H, v18.8H, v19.8H
+        st1             {v5.8B},     [x0], x2
+        mshrn           v7.8B,  v18.8H, #2
+  .if \avg
+        ld1             {v5.8B},     [x0]
+        urhadd          v7.8B,  v7.8B,  v5.8B
+  .endif
+        st1             {v7.8B},     [x0], x2
+
+        ret
+.endm
+
+.macro  pixfunc         pfx, name, suf, rnd=1, avg=0
+  .if \rnd
+    .macro avg  rd, rn, rm
+        urhadd          \rd, \rn, \rm
+    .endm
+    .macro mshrn rd, rn, rm
+        rshrn           \rd, \rn, \rm
+    .endm
+    .macro mshrn2 rd, rn, rm
+        rshrn2          \rd, \rn, \rm
+    .endm
+    .macro NRND insn:vararg
+    .endm
+  .else
+    .macro avg  rd, rn, rm
+        uhadd           \rd, \rn, \rm
+    .endm
+    .macro mshrn rd, rn, rm
+        shrn            \rd, \rn, \rm
+    .endm
+    .macro mshrn2 rd, rn, rm
+        shrn2           \rd, \rn, \rm
+    .endm
+    .macro NRND insn:vararg
+        \insn
+    .endm
+  .endif
+function ff_\pfx\name\suf\()_neon, export=1
+        \name           \rnd, \avg
+endfunc
+        .purgem         avg
+        .purgem         mshrn
+        .purgem         mshrn2
+        .purgem         NRND
+.endm
+
+.macro  pixfunc2        pfx, name, avg=0
+        pixfunc         \pfx, \name,          rnd=1, avg=\avg
+        pixfunc         \pfx, \name, _no_rnd, rnd=0, avg=\avg
+.endm
+
+function ff_put_h264_qpel16_mc00_neon, export=1
+        mov             w3,  #16
+endfunc
+
+        pixfunc         put_, pixels16,     avg=0
+        pixfunc2        put_, pixels16_x2,  avg=0
+        pixfunc2        put_, pixels16_y2,  avg=0
+        pixfunc2        put_, pixels16_xy2, avg=0
+
+function ff_avg_h264_qpel16_mc00_neon, export=1
+        mov             w3,  #16
+endfunc
+
+        pixfunc         avg_, pixels16,     avg=1
+        pixfunc2        avg_, pixels16_x2,  avg=1
+        pixfunc2        avg_, pixels16_y2,  avg=1
+        pixfunc2        avg_, pixels16_xy2, avg=1
+
+function ff_put_h264_qpel8_mc00_neon, export=1
+        mov             w3,  #8
+endfunc
+
+        pixfunc         put_, pixels8,     avg=0
+        pixfunc2        put_, pixels8_x2,  avg=0
+        pixfunc2        put_, pixels8_y2,  avg=0
+        pixfunc2        put_, pixels8_xy2, avg=0
+
+function ff_avg_h264_qpel8_mc00_neon, export=1
+        mov             w3,  #8
+endfunc
+
+        pixfunc         avg_, pixels8,     avg=1
+        pixfunc         avg_, pixels8_x2,  avg=1
+        pixfunc         avg_, pixels8_y2,  avg=1
+        pixfunc         avg_, pixels8_xy2, avg=1
diff --git a/media/ffvpx/libavcodec/aarch64/idct.h b/media/ffvpx/libavcodec/aarch64/idct.h
new file mode 100644
index 0000000000..97ee0a64af
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/idct.h
@@ -0,0 +1,29 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AARCH64_IDCT_H
+#define AVCODEC_AARCH64_IDCT_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_simple_idct_neon(int16_t *data);
+void ff_simple_idct_put_neon(uint8_t *dest, ptrdiff_t line_size, int16_t *data);
+void ff_simple_idct_add_neon(uint8_t *dest, ptrdiff_t line_size, int16_t *data);
+
+#endif /* AVCODEC_AARCH64_IDCT_H */
diff --git a/media/ffvpx/libavcodec/aarch64/idctdsp_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/idctdsp_init_aarch64.c
new file mode 100644
index 0000000000..eec21aa5a2
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/idctdsp_init_aarch64.c
@@ -0,0 +1,55 @@
+/*
+ * ARM-NEON-optimized IDCT functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2017 Matthieu Bouron <matthieu.bouron@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/idctdsp.h"
+#include "idct.h"
+
+void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, ptrdiff_t);
+void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, ptrdiff_t);
+void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, ptrdiff_t);
+
+av_cold void ff_idctdsp_init_aarch64(IDCTDSPContext *c, AVCodecContext *avctx,
+                                     unsigned high_bit_depth)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        if (!avctx->lowres && !high_bit_depth) {
+            if (avctx->idct_algo == FF_IDCT_AUTO ||
+                avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+                avctx->idct_algo == FF_IDCT_SIMPLENEON) {
+                c->idct_put  = ff_simple_idct_put_neon;
+                c->idct_add  = ff_simple_idct_add_neon;
+                c->idct      = ff_simple_idct_neon;
+                c->perm_type = FF_IDCT_PERM_PARTTRANS;
+            }
+        }
+
+        c->add_pixels_clamped        = ff_add_pixels_clamped_neon;
+        c->put_pixels_clamped        = ff_put_pixels_clamped_neon;
+        c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
+    }
+}
diff --git a/media/ffvpx/libavcodec/aarch64/idctdsp_neon.S b/media/ffvpx/libavcodec/aarch64/idctdsp_neon.S
new file mode 100644
index 0000000000..7f47611206
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/idctdsp_neon.S
@@ -0,0 +1,130 @@
+/*
+ * IDCT AArch64 NEON optimisations
+ *
+ * Copyright (c) 2022 Ben Avison <bavison@riscosopen.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+// Clamp 16-bit signed block coefficients to unsigned 8-bit
+// On entry:
+//   x0 -> array of 64x 16-bit coefficients
+//   x1 -> 8-bit results
+//   x2 = row stride for results, bytes
+function ff_put_pixels_clamped_neon, export=1
+        ld1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #64
+        ld1             {v4.16b, v5.16b, v6.16b, v7.16b}, [x0]
+        sqxtun          v0.8b, v0.8h
+        sqxtun          v1.8b, v1.8h
+        sqxtun          v2.8b, v2.8h
+        sqxtun          v3.8b, v3.8h
+        sqxtun          v4.8b, v4.8h
+        st1             {v0.8b}, [x1], x2
+        sqxtun          v0.8b, v5.8h
+        st1             {v1.8b}, [x1], x2
+        sqxtun          v1.8b, v6.8h
+        st1             {v2.8b}, [x1], x2
+        sqxtun          v2.8b, v7.8h
+        st1             {v3.8b}, [x1], x2
+        st1             {v4.8b}, [x1], x2
+        st1             {v0.8b}, [x1], x2
+        st1             {v1.8b}, [x1], x2
+        st1             {v2.8b}, [x1]
+        ret
+endfunc
+
+// Clamp 16-bit signed block coefficients to signed 8-bit (biased by 128)
+// On entry:
+//   x0 -> array of 64x 16-bit coefficients
+//   x1 -> 8-bit results
+//   x2 = row stride for results, bytes
+function ff_put_signed_pixels_clamped_neon, export=1
+        ld1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #64
+        movi            v4.8b, #128
+        ld1             {v16.16b, v17.16b, v18.16b, v19.16b}, [x0]
+        sqxtn           v0.8b, v0.8h
+        sqxtn           v1.8b, v1.8h
+        sqxtn           v2.8b, v2.8h
+        sqxtn           v3.8b, v3.8h
+        sqxtn           v5.8b, v16.8h
+        add             v0.8b, v0.8b, v4.8b
+        sqxtn           v6.8b, v17.8h
+        add             v1.8b, v1.8b, v4.8b
+        sqxtn           v7.8b, v18.8h
+        add             v2.8b, v2.8b, v4.8b
+        sqxtn           v16.8b, v19.8h
+        add             v3.8b, v3.8b, v4.8b
+        st1             {v0.8b}, [x1], x2
+        add             v0.8b, v5.8b, v4.8b
+        st1             {v1.8b}, [x1], x2
+        add             v1.8b, v6.8b, v4.8b
+        st1             {v2.8b}, [x1], x2
+        add             v2.8b, v7.8b, v4.8b
+        st1             {v3.8b}, [x1], x2
+        add             v3.8b, v16.8b, v4.8b
+        st1             {v0.8b}, [x1], x2
+        st1             {v1.8b}, [x1], x2
+        st1             {v2.8b}, [x1], x2
+        st1             {v3.8b}, [x1]
+        ret
+endfunc
+
+// Add 16-bit signed block coefficients to unsigned 8-bit
+// On entry:
+//   x0 -> array of 64x 16-bit coefficients
+//   x1 -> 8-bit input and results
+//   x2 = row stride for 8-bit input and results, bytes
+function ff_add_pixels_clamped_neon, export=1
+        ld1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #64
+        mov             x3, x1
+        ld1             {v4.8b}, [x1], x2
+        ld1             {v5.8b}, [x1], x2
+        ld1             {v6.8b}, [x1], x2
+        ld1             {v7.8b}, [x1], x2
+        ld1             {v16.16b, v17.16b, v18.16b, v19.16b}, [x0]
+        uaddw           v0.8h, v0.8h, v4.8b
+        uaddw           v1.8h, v1.8h, v5.8b
+        uaddw           v2.8h, v2.8h, v6.8b
+        ld1             {v4.8b}, [x1], x2
+        uaddw           v3.8h, v3.8h, v7.8b
+        ld1             {v5.8b}, [x1], x2
+        sqxtun          v0.8b, v0.8h
+        ld1             {v6.8b}, [x1], x2
+        sqxtun          v1.8b, v1.8h
+        ld1             {v7.8b}, [x1]
+        sqxtun          v2.8b, v2.8h
+        sqxtun          v3.8b, v3.8h
+        uaddw           v4.8h, v16.8h, v4.8b
+        st1             {v0.8b}, [x3], x2
+        uaddw           v0.8h, v17.8h, v5.8b
+        st1             {v1.8b}, [x3], x2
+        uaddw           v1.8h, v18.8h, v6.8b
+        st1             {v2.8b}, [x3], x2
+        uaddw           v2.8h, v19.8h, v7.8b
+        sqxtun          v4.8b, v4.8h
+        sqxtun          v0.8b, v0.8h
+        st1             {v3.8b}, [x3], x2
+        sqxtun          v1.8b, v1.8h
+        sqxtun          v2.8b, v2.8h
+        st1             {v4.8b}, [x3], x2
+        st1             {v0.8b}, [x3], x2
+        st1             {v1.8b}, [x3], x2
+        st1             {v2.8b}, [x3]
+        ret
+endfunc
diff --git a/media/ffvpx/libavcodec/aarch64/mdct_neon.S b/media/ffvpx/libavcodec/aarch64/mdct_neon.S
new file mode 100644
index 0000000000..98b09bf1ab
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/mdct_neon.S
@@ -0,0 +1,326 @@
+/*
+ * AArch64 NEON optimised MDCT
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+function ff_imdct_half_neon, export=1
+        stp             x19, x20, [sp, #-32]!
+        AARCH64_SIGN_LINK_REGISTER
+        str             x30, [sp, #16]
+        mov             x12, #1
+        ldr             w14, [x0, #28]          // mdct_bits
+        ldr             x4,  [x0, #32]          // tcos
+        ldr             x3,  [x0, #8]           // revtab
+        lsl             x12, x12, x14           // n  = 1 << nbits
+        lsr             x14, x12, #2            // n4 = n >> 2
+        add             x7,  x2,  x12,  lsl #1
+        mov             x12, #-16
+        sub             x7,  x7,  #16
+
+        ld2             {v16.2s,v17.2s}, [x7], x12 // d16=x,n1 d17=x,n0
+        ld2             {v0.2s,v1.2s},   [x2], #16 // d0 =m0,x d1 =m1,x
+        rev64           v17.2s, v17.2s
+        ld2             {v2.2s,v3.2s},   [x4], #16 // d2=c0,c1 d3=s0,s2
+        fmul            v6.2s,  v17.2s, v2.2s
+        fmul            v7.2s,  v0.2s,  v2.2s
+1:
+        subs            x14, x14, #2
+        ldr             w6,  [x3], #4
+        fmul            v4.2s,  v0.2s,  v3.2s
+        fmul            v5.2s,  v17.2s, v3.2s
+        fsub            v4.2s,  v6.2s,  v4.2s
+        fadd            v5.2s,  v5.2s,  v7.2s
+        ubfm            x8,  x6,  #16, #31
+        ubfm            x6,  x6,  #0,  #15
+        add             x8,  x1,  x8,  lsl #3
+        add             x6,  x1,  x6,  lsl #3
+        b.eq            2f
+        ld2             {v16.2s,v17.2s}, [x7], x12
+        ld2             {v0.2s,v1.2s},   [x2], #16
+        rev64           v17.2s, v17.2s
+        ld2             {v2.2s,v3.2s},   [x4], #16    // d2=c0,c1 d3=s0,s2
+        fmul            v6.2s,  v17.2s, v2.2s
+        fmul            v7.2s,  v0.2s,  v2.2s
+        st2             {v4.s,v5.s}[0], [x6]
+        st2             {v4.s,v5.s}[1], [x8]
+        b               1b
+2:
+        st2             {v4.s,v5.s}[0], [x6]
+        st2             {v4.s,v5.s}[1], [x8]
+
+        mov             x19, x0
+        mov             x20, x1
+        bl              X(ff_fft_calc_neon)
+
+        mov             x12, #1
+        ldr             w14, [x19, #28]          // mdct_bits
+        ldr             x4,  [x19, #32]          // tcos
+        lsl             x12, x12, x14            // n  = 1 << nbits
+        lsr             x14, x12, #3             // n8 = n >> 3
+
+        add             x4,  x4,  x14, lsl #3
+        add             x6,  x20, x14, lsl #3
+        sub             x1,  x4,  #16
+        sub             x3,  x6,  #16
+
+        mov             x7,  #-16
+        mov             x8,  x6
+        mov             x0,  x3
+
+        ld2             {v0.2s,v1.2s},  [x3], x7 // d0 =i1,r1 d1 =i0,r0
+        ld2             {v20.2s,v21.2s},[x6], #16 // d20=i2,r2 d21=i3,r3
+        ld2             {v16.2s,v17.2s},[x1], x7 // d16=c1,c0 d18=s1,s0
+3:
+        subs            x14, x14, #2
+        fmul            v7.2s,  v0.2s,  v17.2s
+        ld2             {v18.2s,v19.2s},[x4], #16    // d17=c2,c3 d19=s2,s3
+        fmul            v4.2s,  v1.2s,  v17.2s
+        fmul            v6.2s,  v21.2s, v19.2s
+        fmul            v5.2s,  v20.2s, v19.2s
+        fmul            v22.2s, v1.2s,  v16.2s
+        fmul            v23.2s, v21.2s, v18.2s
+        fmul            v24.2s, v0.2s,  v16.2s
+        fmul            v25.2s, v20.2s, v18.2s
+        fadd            v7.2s,  v7.2s,  v22.2s
+        fadd            v5.2s,  v5.2s,  v23.2s
+        fsub            v4.2s,  v4.2s,  v24.2s
+        fsub            v6.2s,  v6.2s,  v25.2s
+        b.eq            4f
+        ld2             {v0.2s,v1.2s},  [x3], x7
+        ld2             {v20.2s,v21.2s},[x6], #16
+        ld2             {v16.2s,v17.2s},[x1], x7 // d16=c1,c0 d18=s1,s0
+        rev64           v5.2s,  v5.2s
+        rev64           v7.2s,  v7.2s
+        st2             {v4.2s,v5.2s},  [x0], x7
+        st2             {v6.2s,v7.2s},  [x8], #16
+        b               3b
+4:
+        rev64           v5.2s,  v5.2s
+        rev64           v7.2s,  v7.2s
+        st2             {v4.2s,v5.2s},  [x0]
+        st2             {v6.2s,v7.2s},  [x8]
+
+        ldr             x30, [sp, #16]
+        AARCH64_VALIDATE_LINK_REGISTER
+        ldp             x19, x20, [sp], #32
+
+        ret
+endfunc
+
+function ff_imdct_calc_neon, export=1
+        stp             x19, x20, [sp, #-32]!
+        AARCH64_SIGN_LINK_REGISTER
+        str             x30, [sp, #16]
+        ldr             w3,  [x0, #28]          // mdct_bits
+        mov             x19, #1
+        mov             x20, x1
+        lsl             x19, x19, x3
+        add             x1,  x1,  x19
+
+        bl              X(ff_imdct_half_neon)
+
+        add             x0,  x20, x19,  lsl #2
+        add             x1,  x20, x19,  lsl #1
+        sub             x0,  x0,  #8
+        sub             x2,  x1,  #16
+        mov             x3,  #-16
+        mov             x6,  #-8
+1:
+        ld1             {v0.4s}, [x2], x3
+        prfum           pldl1keep, [x0, #-16]
+        rev64           v0.4s, v0.4s
+        ld1             {v2.2s,v3.2s}, [x1], #16
+        fneg            v4.4s,  v0.4s
+        prfum           pldl1keep, [x2, #-16]
+        rev64           v2.2s, v2.2s
+        rev64           v3.2s, v3.2s
+        ext             v4.16b, v4.16b, v4.16b, #8
+        st1             {v2.2s}, [x0], x6
+        st1             {v3.2s}, [x0], x6
+        st1             {v4.4s}, [x20], #16
+        subs            x19, x19,  #16
+        b.gt            1b
+
+        ldr             x30, [sp, #16]
+        AARCH64_VALIDATE_LINK_REGISTER
+        ldp             x19, x20, [sp], #32
+
+        ret
+endfunc
+
+
+function ff_mdct_calc_neon, export=1
+        stp             x19, x20, [sp, #-32]!
+        AARCH64_SIGN_LINK_REGISTER
+        str             x30, [sp, #16]
+
+        mov             x12, #1
+        ldr             w14, [x0, #28]          // mdct_bits
+        ldr             x4,  [x0, #32]          // tcos
+        ldr             x3,  [x0, #8]           // revtab
+        lsl             x14, x12, x14           // n  = 1 << nbits
+        add             x7,  x2,  x14           // in4u
+        sub             x9,  x7,  #16           // in4d
+        add             x2,  x7,  x14, lsl #1   // in3u
+        add             x8,  x9,  x14, lsl #1   // in3d
+        add             x5,  x4,  x14, lsl #1
+        sub             x5,  x5,  #16
+        sub             x3,  x3,  #4
+        mov             x12, #-16
+        lsr             x13, x14, #1
+
+        ld2             {v16.2s,v17.2s}, [x9], x12  // in0u0,in0u1 in4d1,in4d0
+        ld2             {v18.2s,v19.2s}, [x8], x12  // in2u0,in2u1 in3d1,in3d0
+        ld2             {v0.2s, v1.2s},  [x7], #16  // in4u0,in4u1 in2d1,in2d0
+        rev64           v17.2s, v17.2s              // in4d0,in4d1 in3d0,in3d1
+        rev64           v19.2s, v19.2s              // in4d0,in4d1 in3d0,in3d1
+        ld2             {v2.2s, v3.2s},  [x2], #16  // in3u0,in3u1 in1d1,in1d0
+        fsub            v0.2s,  v17.2s, v0.2s       // in4d-in4u      I
+        ld2             {v20.2s,v21.2s}, [x4], #16  // c0,c1 s0,s1
+        rev64           v1.2s,  v1.2s               // in2d0,in2d1 in1d0,in1d1
+        rev64           v3.2s,  v3.2s               // in2d0,in2d1 in1d0,in1d1
+        ld2             {v30.2s,v31.2s}, [x5], x12  // c2,c3 s2,s3
+        fadd            v2.2s,  v2.2s,  v19.2s      // in3u+in3d     -R
+        fsub            v16.2s, v16.2s, v1.2s       // in0u-in2d      R
+        fadd            v18.2s, v18.2s, v3.2s       // in2u+in1d     -I
+1:
+        fmul            v7.2s,  v0.2s,  v21.2s      //  I*s
+        ldr             w10, [x3, x13]
+        fmul            v6.2s,  v2.2s,  v20.2s      // -R*c
+        ldr             w6,  [x3, #4]!
+        fmul            v4.2s,  v2.2s,  v21.2s      // -R*s
+        fmul            v5.2s,  v0.2s,  v20.2s      //  I*c
+        fmul            v24.2s, v16.2s, v30.2s      //  R*c
+        fmul            v25.2s, v18.2s, v31.2s      // -I*s
+        fmul            v22.2s, v16.2s, v31.2s      //  R*s
+        fmul            v23.2s, v18.2s, v30.2s      //  I*c
+        subs            x14, x14, #16
+        subs            x13, x13, #8
+        fsub            v6.2s,  v6.2s,  v7.2s       // -R*c-I*s
+        fadd            v7.2s,  v4.2s,  v5.2s       // -R*s+I*c
+        fsub            v24.2s, v25.2s, v24.2s      // I*s-R*c
+        fadd            v25.2s, v22.2s, v23.2s      // R*s-I*c
+        b.eq            1f
+        mov             x12, #-16
+        ld2             {v16.2s,v17.2s}, [x9], x12  // in0u0,in0u1 in4d1,in4d0
+        ld2             {v18.2s,v19.2s}, [x8], x12  // in2u0,in2u1 in3d1,in3d0
+        fneg            v7.2s,  v7.2s               //  R*s-I*c
+        ld2             {v0.2s, v1.2s},  [x7], #16  // in4u0,in4u1 in2d1,in2d0
+        rev64           v17.2s, v17.2s              // in4d0,in4d1 in3d0,in3d1
+        rev64           v19.2s, v19.2s              // in4d0,in4d1 in3d0,in3d1
+        ld2             {v2.2s, v3.2s},  [x2], #16  // in3u0,in3u1 in1d1,in1d0
+        fsub            v0.2s,  v17.2s, v0.2s       // in4d-in4u      I
+        ld2             {v20.2s,v21.2s}, [x4], #16  // c0,c1 s0,s1
+        rev64           v1.2s,  v1.2s               // in2d0,in2d1 in1d0,in1d1
+        rev64           v3.2s,  v3.2s               // in2d0,in2d1 in1d0,in1d1
+        ld2             {v30.2s,v31.2s}, [x5], x12  // c2,c3 s2,s3
+        fadd            v2.2s,  v2.2s,  v19.2s      // in3u+in3d     -R
+        fsub            v16.2s, v16.2s, v1.2s       // in0u-in2d      R
+        fadd            v18.2s, v18.2s, v3.2s       // in2u+in1d     -I
+        ubfm            x12, x6,  #16, #31
+        ubfm            x6,  x6,  #0,  #15
+        add             x12, x1,  x12, lsl #3
+        add             x6,  x1,  x6,  lsl #3
+        st2             {v6.s,v7.s}[0],   [x6]
+        st2             {v6.s,v7.s}[1],   [x12]
+        ubfm            x6,  x10, #16, #31
+        ubfm            x10, x10, #0,  #15
+        add             x6 , x1,  x6,  lsl #3
+        add             x10, x1,  x10, lsl #3
+        st2             {v24.s,v25.s}[0], [x10]
+        st2             {v24.s,v25.s}[1], [x6]
+        b               1b
+1:
+        fneg            v7.2s,  v7.2s           //  R*s-I*c
+        ubfm            x12, x6,  #16, #31
+        ubfm            x6,  x6,  #0,  #15
+        add             x12, x1,  x12, lsl #3
+        add             x6,  x1,  x6,  lsl #3
+        st2             {v6.s,v7.s}[0],   [x6]
+        st2             {v6.s,v7.s}[1],   [x12]
+        ubfm            x6,  x10, #16, #31
+        ubfm            x10, x10, #0,  #15
+        add             x6 , x1,  x6,  lsl #3
+        add             x10, x1,  x10, lsl #3
+        st2             {v24.s,v25.s}[0], [x10]
+        st2             {v24.s,v25.s}[1], [x6]
+
+        mov             x19, x0
+        mov             x20, x1
+        bl              X(ff_fft_calc_neon)
+
+        mov             x12, #1
+        ldr             w14, [x19, #28]         // mdct_bits
+        ldr             x4,  [x19, #32]         // tcos
+        lsl             x12, x12, x14           // n  = 1 << nbits
+        lsr             x14, x12, #3            // n8 = n >> 3
+
+        add             x4,  x4,  x14, lsl #3
+        add             x6,  x20, x14, lsl #3
+        sub             x1,  x4,  #16
+        sub             x3,  x6,  #16
+
+        mov             x7,  #-16
+        mov             x8,  x6
+        mov             x0,  x3
+
+        ld2             {v0.2s,v1.2s},   [x3], x7   // d0 =r1,i1 d1 =r0,i0
+        ld2             {v20.2s,v21.2s}, [x6], #16  // d20=r2,i2 d21=r3,i3
+        ld2             {v16.2s,v17.2s}, [x1], x7   // c1,c0 s1,s0
+1:
+        subs            x14, x14, #2
+        fmul            v7.2s,  v0.2s,  v17.2s      // r1*s1,r0*s0
+        ld2             {v18.2s,v19.2s}, [x4], #16  // c2,c3 s2,s3
+        fmul            v4.2s,  v1.2s,  v17.2s      // i1*s1,i0*s0
+        fmul            v6.2s,  v21.2s, v19.2s      // i2*s2,i3*s3
+        fmul            v5.2s,  v20.2s, v19.2s      // r2*s2,r3*s3
+        fmul            v24.2s, v0.2s,  v16.2s      // r1*c1,r0*c0
+        fmul            v25.2s, v20.2s, v18.2s      // r2*c2,r3*c3
+        fmul            v22.2s, v21.2s, v18.2s      // i2*c2,i3*c3
+        fmul            v23.2s, v1.2s,  v16.2s      // i1*c1,i0*c0
+        fadd            v4.2s,  v4.2s,  v24.2s      // i1*s1+r1*c1,i0*s0+r0*c0
+        fadd            v6.2s,  v6.2s,  v25.2s      // i2*s2+r2*c2,i3*s3+r3*c3
+        fsub            v5.2s,  v22.2s, v5.2s       // i2*c2-r2*s2,i3*c3-r3*s3
+        fsub            v7.2s,  v23.2s, v7.2s       // i1*c1-r1*s1,i0*c0-r0*s0
+        fneg            v4.2s,  v4.2s
+        fneg            v6.2s,  v6.2s
+        b.eq            1f
+        ld2             {v0.2s, v1.2s},  [x3], x7
+        ld2             {v20.2s,v21.2s}, [x6], #16
+        ld2             {v16.2s,v17.2s}, [x1], x7   // c1,c0 s1,s0
+        rev64           v5.2s,  v5.2s
+        rev64           v7.2s,  v7.2s
+        st2             {v4.2s,v5.2s},  [x0], x7
+        st2             {v6.2s,v7.2s},  [x8], #16
+        b               1b
+1:
+        rev64           v5.2s,  v5.2s
+        rev64           v7.2s,  v7.2s
+        st2             {v4.2s,v5.2s},  [x0]
+        st2             {v6.2s,v7.2s},  [x8]
+
+        ldr             x30, [sp, #16]
+        AARCH64_VALIDATE_LINK_REGISTER
+        ldp             x19, x20, [sp], #32
+
+        ret
+endfunc
diff --git a/media/ffvpx/libavcodec/aarch64/moz.build b/media/ffvpx/libavcodec/aarch64/moz.build
new file mode 100644
index 0000000000..7126a39648
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/moz.build
@@ -0,0 +1,59 @@
+## -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+## vim: set filetype=python:
+## This Source Code Form is subject to the terms of the Mozilla Public
+## License, v. 2.0. If a copy of the MPL was not distributed with this
+## file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+if not CONFIG['MOZ_FFVPX_AUDIOONLY']:
+    SOURCES += [
+        'fft_init_aarch64.c',
+        'fft_neon.S',
+        'h264chroma_init_aarch64.c',
+        'h264cmc_neon.S',
+        'h264dsp_init_aarch64.c',
+        'h264dsp_neon.S',
+        'h264idct_neon.S',
+        'h264pred_init.c',
+        'h264pred_neon.S',
+        'hpeldsp_init_aarch64.c',
+        'hpeldsp_neon.S',
+        'idctdsp_init_aarch64.c',
+        'idctdsp_neon.S',
+        'mdct_neon.S',
+        'mpegaudiodsp_init.c',
+        'mpegaudiodsp_neon.S',
+        'neon.S',
+        'simple_idct_neon.S',
+        'videodsp.S',
+        'videodsp_init.c',
+        'vp8dsp_init_aarch64.c',
+        'vp8dsp_neon.S',
+        'vp9dsp_init_10bpp_aarch64.c',
+        'vp9dsp_init_12bpp_aarch64.c',
+        'vp9dsp_init_aarch64.c',
+        'vp9itxfm_16bpp_neon.S',
+        'vp9itxfm_neon.S',
+        'vp9lpf_16bpp_neon.S',
+        'vp9lpf_neon.S',
+        'vp9mc_16bpp_neon.S',
+        'vp9mc_aarch64.S',
+        'vp9mc_neon.S',
+    ]
+else:
+    SOURCES += [
+        'fft_init_aarch64.c',
+        'fft_neon.S',
+        'idctdsp_init_aarch64.c',
+        'idctdsp_neon.S',
+        'mpegaudiodsp_init.c',
+        'mpegaudiodsp_neon.S',
+        'simple_idct_neon.S',
+    ]
+
+if CONFIG['OS_ARCH'] == 'WINNT':
+    USE_INTEGRATED_CLANGCL_AS = True
+    DEFINES['EXTERN_ASM'] = ''
+
+FINAL_LIBRARY = 'mozavcodec'
+
+include('/media/ffvpx/ffvpxcommon.mozbuild')
diff --git a/media/ffvpx/libavcodec/aarch64/mpegaudiodsp_init.c b/media/ffvpx/libavcodec/aarch64/mpegaudiodsp_init.c
new file mode 100644
index 0000000000..5d966af5f4
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/mpegaudiodsp_init.c
@@ -0,0 +1,40 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/mpegaudiodsp.h"
+#include "config.h"
+
+void ff_mpadsp_apply_window_fixed_neon(int32_t *synth_buf, int32_t *window,
+                                       int *dither, int16_t *samples, ptrdiff_t incr);
+void ff_mpadsp_apply_window_float_neon(float *synth_buf, float *window,
+                                       int *dither, float *samples, ptrdiff_t incr);
+
+av_cold void ff_mpadsp_init_aarch64(MPADSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        s->apply_window_fixed = ff_mpadsp_apply_window_fixed_neon;
+        s->apply_window_float = ff_mpadsp_apply_window_float_neon;
+    }
+}
diff --git a/media/ffvpx/libavcodec/aarch64/mpegaudiodsp_neon.S b/media/ffvpx/libavcodec/aarch64/mpegaudiodsp_neon.S
new file mode 100644
index 0000000000..b6ef131228
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/mpegaudiodsp_neon.S
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+#define FRAC_BITS   23   // fractional bits for sb_samples and dct
+#define WFRAC_BITS  16   // fractional bits for window
+#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
+
+const   tbl_rev128_s, align=4
+        .byte           12, 13, 14, 15
+        .byte            8,  9, 10, 11
+        .byte            4,  5,  6,  7
+        .byte            0,  1,  2,  3
+endconst
+
+.macro   apply_window   type, st
+function ff_mpadsp_apply_window_\type\()_neon, export=1
+        mov             x7,  x0
+        add             x8,  x0,  #512<<2
+        ld1             {v0.4s,v1.4s,v2.4s,v3.4s},  [x7],  #64
+        ld1             {v4.4s,v5.4s,v6.4s,v7.4s},  [x7],  #64
+        st1             {v0.4s,v1.4s,v2.4s,v3.4s},  [x8],  #64
+        st1             {v4.4s,v5.4s,v6.4s,v7.4s},  [x8],  #64
+        movrel          x15, tbl_rev128_s
+        ld1             {v27.4s}, [x15]
+.ifc \type, fixed
+        lsl             x4,  x4,  #1
+.else
+        lsl             x4,  x4,  #2
+.endif
+        add             x10, x0,  #45<<2
+        add             x0,  x0,  #16<<2
+        add             x1,  x1,  #16<<2
+        add             x5,  x3,  x4,  lsl #5
+        sub             x5,  x5,  x4            // samples2
+        neg             x13, x4                 // -incr
+        mov             x9,  #64<<2
+.ifc \type, fixed
+        ld1r            {v16.2s}, [x2]          // dither_state
+        sxtl            v16.2d, v16.2s
+        movi            v29.2d, #0
+        movi            v30.2d, #(1<<OUT_SHIFT)-1
+        trn1            v31.2d, v29.2d, v30.2d
+        trn2            v30.2d, v30.2d, v29.2d
+        trn1            v16.2d, v16.2d, v29.2d
+.else
+        movi            v16.4s, #0
+        movi            v28.4s, #0
+.endif
+        mov             x14, #4
+1:
+        mov             x8,  x0
+        sub             x7,  x1,  #3<<2
+        sub             x6,  x1,  x14, lsl #4
+        add             x7,  x7,  x14, lsl #4
+        add             x11, x6, #(32)<<2      // w  + 32
+        add             x12, x7, #(32)<<2      // w2 + 32
+        mov             x15, #8
+        movi            v17.2d, #0
+        movi            v18.2d, #0
+        movi            v19.2d, #0
+2:
+        subs            x15, x15, #1
+        ld1             {v0.4s},  [x8],  x9
+        ld1             {v1.4s},  [x10], x9
+        ld1             {v2.4s},  [x6],  x9
+        ld1             {v3.4s},  [x7],  x9
+        tbl             v6.16b, {v0.16b}, v27.16b
+        tbl             v7.16b, {v1.16b}, v27.16b
+        ld1             {v4.4s},  [x11], x9
+        ld1             {v5.4s},  [x12], x9
+        MLA             v16, v2, v0
+        MLA2            v17, v2, v0
+        MLS             v18, v3, v6
+        MLS2            v19, v3, v6
+        MLS             v16, v4, v7
+        MLS2            v17, v4, v7
+        MLS             v18, v5, v1
+        MLS2            v19, v5, v1
+        b.gt            2b
+
+        cmp             x14, #4
+        sub             x10, x10, #64<<5        // 64 * 8 * sizeof(int32_t)
+
+.ifc \type, fixed
+        and             v28.16b, v16.16b, v30.16b
+        ext             v28.16b, v29.16b, v28.16b, #8
+
+        b.eq            4f
+        round_sample    v19, 1, 1
+4:
+        round_sample    v16, 1, 0
+        shrn            v16.2s, v16.2d,  #OUT_SHIFT
+        round_sample    v19, 0, 0
+        shrn            v19.2s, v19.2d,  #OUT_SHIFT
+        round_sample    v17, 0, 1
+        round_sample    v18, 1, 1
+        round_sample    v17, 1, 0
+        shrn2           v16.4s, v17.2d,  #OUT_SHIFT
+        round_sample    v18, 0, 0
+        shrn2           v19.4s, v18.2d,  #OUT_SHIFT
+        sqxtn           v16.4h, v16.4s
+        sqxtn           v18.4h, v19.4s
+.else
+        ext             v18.16b, v18.16b, v18.16b, #8
+.endif
+
+        st1             {v16.\st\()}[0], [x3], x4
+        b.eq            4f
+        st1             {v18.\st\()}[1], [x5], x13
+4:
+        st1             {v16.\st\()}[1], [x3], x4
+        st1             {v18.\st\()}[0], [x5], x13
+        st1             {v16.\st\()}[2], [x3], x4
+        st1             {v18.\st\()}[3], [x5], x13
+        st1             {v16.\st\()}[3], [x3], x4
+        st1             {v18.\st\()}[2], [x5], x13
+
+        mov             v16.16b, v28.16b
+
+        subs            x14, x14, #1
+        add             x0,  x0,  #4<<2
+        sub             x10, x10, #4<<2
+        b.gt            1b
+
+// computing samples[16]
+        add             x6,  x1,  #32<<2
+        ld1             {v0.2s},  [x6],  x9
+        ld1             {v1.2s},  [x0],  x9
+.rept   3
+        ld1             {v2.2s},  [x6],  x9
+        ld1             {v3.2s},  [x0],  x9
+        MLS             v16, v0,  v1
+        ld1             {v0.2s},  [x6],  x9
+        ld1             {v1.2s},  [x0],  x9
+        MLS             v16, v2,  v3
+.endr
+        ld1             {v2.2s},  [x6],  x9
+        ld1             {v3.2s},  [x0],  x9
+        MLS             v16, v0,  v1
+        MLS             v16, v2,  v3
+
+.ifc \type, fixed
+        and             v28.16b, v16.16b, v30.16b
+        shrn            v20.2s,  v16.2d,  #OUT_SHIFT
+        xtn             v28.2s,  v28.2d
+        sqxtn           v20.4h,  v20.4s
+        st1             {v28.s}[0], [x2]        // save dither_state
+        st1             {v20.h}[0], [x3]
+.else
+        st1             {v16.s}[0], [x3]
+.endif
+
+        ret
+endfunc
+.purgem round_sample
+.purgem MLA
+.purgem MLA2
+.purgem MLS
+.purgem MLS2
+.endm
+
+
+.macro  round_sample    r, idx, next
+        add             \r\().2d, \r\().2d, v28.2d
+.if \idx == 0
+        and             v28.16b,  \r\().16b,  v30.16b
+.else // \idx == 1
+        and             v28.16b,  \r\().16b,  v31.16b
+.endif
+.if \idx != \next
+  .if \next == 0
+        ext             v28.16b, v28.16b, v29.16b, #8
+  .else
+        ext             v28.16b, v29.16b, v28.16b, #8
+  .endif
+.endif
+.endm
+.macro  MLA             d, s1, s2
+        smlal           \d\().2d, \s1\().2s, \s2\().2s
+.endm
+.macro  MLA2            d, s1, s2
+        smlal2          \d\().2d, \s1\().4s, \s2\().4s
+.endm
+.macro  MLS             d, s1, s2
+        smlsl           \d\().2d, \s1\().2s, \s2\().2s
+.endm
+.macro  MLS2            d, s1, s2
+        smlsl2          \d\().2d, \s1\().4s, \s2\().4s
+.endm
+apply_window fixed, h
+
+
+// nothing to do for round_sample and ML{A,S}2
+.macro  round_sample    r, idx, next
+.endm
+.macro  MLA2            d, s1, s2
+.endm
+.macro  MLS2            d, s1, s2
+.endm
+.macro  MLA             d, s1, s2
+        fmla            \d\().4s, \s1\().4s, \s2\().4s
+.endm
+.macro  MLS             d, s1, s2
+        fmls            \d\().4s, \s1\().4s, \s2\().4s
+.endm
+apply_window float, s
diff --git a/media/ffvpx/libavcodec/aarch64/neon.S b/media/ffvpx/libavcodec/aarch64/neon.S
new file mode 100644
index 0000000000..1ad32c359d
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/neon.S
@@ -0,0 +1,162 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+.macro  transpose_8x8B  r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
+        trn1            \r8\().8B,  \r0\().8B,  \r1\().8B
+        trn2            \r9\().8B,  \r0\().8B,  \r1\().8B
+        trn1            \r1\().8B,  \r2\().8B,  \r3\().8B
+        trn2            \r3\().8B,  \r2\().8B,  \r3\().8B
+        trn1            \r0\().8B,  \r4\().8B,  \r5\().8B
+        trn2            \r5\().8B,  \r4\().8B,  \r5\().8B
+        trn1            \r2\().8B,  \r6\().8B,  \r7\().8B
+        trn2            \r7\().8B,  \r6\().8B,  \r7\().8B
+
+        trn1            \r4\().4H,  \r0\().4H,  \r2\().4H
+        trn2            \r2\().4H,  \r0\().4H,  \r2\().4H
+        trn1            \r6\().4H,  \r5\().4H,  \r7\().4H
+        trn2            \r7\().4H,  \r5\().4H,  \r7\().4H
+        trn1            \r5\().4H,  \r9\().4H,  \r3\().4H
+        trn2            \r9\().4H,  \r9\().4H,  \r3\().4H
+        trn1            \r3\().4H,  \r8\().4H,  \r1\().4H
+        trn2            \r8\().4H,  \r8\().4H,  \r1\().4H
+
+        trn1            \r0\().2S,  \r3\().2S,  \r4\().2S
+        trn2            \r4\().2S,  \r3\().2S,  \r4\().2S
+
+        trn1            \r1\().2S,  \r5\().2S,  \r6\().2S
+        trn2            \r5\().2S,  \r5\().2S,  \r6\().2S
+
+        trn2            \r6\().2S,  \r8\().2S,  \r2\().2S
+        trn1            \r2\().2S,  \r8\().2S,  \r2\().2S
+
+        trn1            \r3\().2S,  \r9\().2S,  \r7\().2S
+        trn2            \r7\().2S,  \r9\().2S,  \r7\().2S
+.endm
+
+.macro  transpose_8x16B r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
+        trn1            \t0\().16B, \r0\().16B, \r1\().16B
+        trn2            \t1\().16B, \r0\().16B, \r1\().16B
+        trn1            \r1\().16B, \r2\().16B, \r3\().16B
+        trn2            \r3\().16B, \r2\().16B, \r3\().16B
+        trn1            \r0\().16B, \r4\().16B, \r5\().16B
+        trn2            \r5\().16B, \r4\().16B, \r5\().16B
+        trn1            \r2\().16B, \r6\().16B, \r7\().16B
+        trn2            \r7\().16B, \r6\().16B, \r7\().16B
+
+        trn1            \r4\().8H,  \r0\().8H,  \r2\().8H
+        trn2            \r2\().8H,  \r0\().8H,  \r2\().8H
+        trn1            \r6\().8H,  \r5\().8H,  \r7\().8H
+        trn2            \r7\().8H,  \r5\().8H,  \r7\().8H
+        trn1            \r5\().8H,  \t1\().8H,  \r3\().8H
+        trn2            \t1\().8H,  \t1\().8H,  \r3\().8H
+        trn1            \r3\().8H,  \t0\().8H,  \r1\().8H
+        trn2            \t0\().8H,  \t0\().8H,  \r1\().8H
+
+        trn1            \r0\().4S,  \r3\().4S,  \r4\().4S
+        trn2            \r4\().4S,  \r3\().4S,  \r4\().4S
+
+        trn1            \r1\().4S,  \r5\().4S,  \r6\().4S
+        trn2            \r5\().4S,  \r5\().4S,  \r6\().4S
+
+        trn2            \r6\().4S,  \t0\().4S,  \r2\().4S
+        trn1            \r2\().4S,  \t0\().4S,  \r2\().4S
+
+        trn1            \r3\().4S,  \t1\().4S,  \r7\().4S
+        trn2            \r7\().4S,  \t1\().4S,  \r7\().4S
+.endm
+
+.macro  transpose_4x16B r0, r1, r2, r3, t4, t5, t6, t7
+        trn1            \t4\().16B, \r0\().16B,  \r1\().16B
+        trn2            \t5\().16B, \r0\().16B,  \r1\().16B
+        trn1            \t6\().16B, \r2\().16B,  \r3\().16B
+        trn2            \t7\().16B, \r2\().16B,  \r3\().16B
+
+        trn1            \r0\().8H,  \t4\().8H,  \t6\().8H
+        trn2            \r2\().8H,  \t4\().8H,  \t6\().8H
+        trn1            \r1\().8H,  \t5\().8H,  \t7\().8H
+        trn2            \r3\().8H,  \t5\().8H,  \t7\().8H
+.endm
+
+.macro  transpose_4x8B  r0, r1, r2, r3, t4, t5, t6, t7
+        trn1            \t4\().8B,  \r0\().8B,  \r1\().8B
+        trn2            \t5\().8B,  \r0\().8B,  \r1\().8B
+        trn1            \t6\().8B,  \r2\().8B,  \r3\().8B
+        trn2            \t7\().8B,  \r2\().8B,  \r3\().8B
+
+        trn1            \r0\().4H,  \t4\().4H,  \t6\().4H
+        trn2            \r2\().4H,  \t4\().4H,  \t6\().4H
+        trn1            \r1\().4H,  \t5\().4H,  \t7\().4H
+        trn2            \r3\().4H,  \t5\().4H,  \t7\().4H
+.endm
+
+.macro  transpose_4x4H  r0, r1, r2, r3, r4, r5, r6, r7
+        trn1            \r4\().4H,  \r0\().4H,  \r1\().4H
+        trn2            \r5\().4H,  \r0\().4H,  \r1\().4H
+        trn1            \r6\().4H,  \r2\().4H,  \r3\().4H
+        trn2            \r7\().4H,  \r2\().4H,  \r3\().4H
+
+        trn1            \r0\().2S,  \r4\().2S,  \r6\().2S
+        trn2            \r2\().2S,  \r4\().2S,  \r6\().2S
+        trn1            \r1\().2S,  \r5\().2S,  \r7\().2S
+        trn2            \r3\().2S,  \r5\().2S,  \r7\().2S
+.endm
+
+.macro transpose_4x8H r0, r1, r2, r3, t4, t5, t6, t7
+        trn1            \t4\().8H,  \r0\().8H,  \r1\().8H
+        trn2            \t5\().8H,  \r0\().8H,  \r1\().8H
+        trn1            \t6\().8H,  \r2\().8H,  \r3\().8H
+        trn2            \t7\().8H,  \r2\().8H,  \r3\().8H
+
+        trn1            \r0\().4S,  \t4\().4S,  \t6\().4S
+        trn2            \r2\().4S,  \t4\().4S,  \t6\().4S
+        trn1            \r1\().4S,  \t5\().4S,  \t7\().4S
+        trn2            \r3\().4S,  \t5\().4S,  \t7\().4S
+.endm
+
+.macro  transpose_8x8H  r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
+        trn1            \r8\().8H,  \r0\().8H,  \r1\().8H
+        trn2            \r9\().8H,  \r0\().8H,  \r1\().8H
+        trn1            \r1\().8H,  \r2\().8H,  \r3\().8H
+        trn2            \r3\().8H,  \r2\().8H,  \r3\().8H
+        trn1            \r0\().8H,  \r4\().8H,  \r5\().8H
+        trn2            \r5\().8H,  \r4\().8H,  \r5\().8H
+        trn1            \r2\().8H,  \r6\().8H,  \r7\().8H
+        trn2            \r7\().8H,  \r6\().8H,  \r7\().8H
+
+        trn1            \r4\().4S,  \r0\().4S,  \r2\().4S
+        trn2            \r2\().4S,  \r0\().4S,  \r2\().4S
+        trn1            \r6\().4S,  \r5\().4S,  \r7\().4S
+        trn2            \r7\().4S,  \r5\().4S,  \r7\().4S
+        trn1            \r5\().4S,  \r9\().4S,  \r3\().4S
+        trn2            \r9\().4S,  \r9\().4S,  \r3\().4S
+        trn1            \r3\().4S,  \r8\().4S,  \r1\().4S
+        trn2            \r8\().4S,  \r8\().4S,  \r1\().4S
+
+        trn1            \r0\().2D,  \r3\().2D,  \r4\().2D
+        trn2            \r4\().2D,  \r3\().2D,  \r4\().2D
+
+        trn1            \r1\().2D,  \r5\().2D,  \r6\().2D
+        trn2            \r5\().2D,  \r5\().2D,  \r6\().2D
+
+        trn2            \r6\().2D,  \r8\().2D,  \r2\().2D
+        trn1            \r2\().2D,  \r8\().2D,  \r2\().2D
+
+        trn1            \r3\().2D,  \r9\().2D,  \r7\().2D
+        trn2            \r7\().2D,  \r9\().2D,  \r7\().2D
+
+.endm
diff --git a/media/ffvpx/libavcodec/aarch64/simple_idct_neon.S b/media/ffvpx/libavcodec/aarch64/simple_idct_neon.S
new file mode 100644
index 0000000000..210182ff21
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/simple_idct_neon.S
@@ -0,0 +1,362 @@
+/*
+ * ARM NEON IDCT
+ *
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2017 Matthieu Bouron <matthieu.bouron@gmail.com>
+ *
+ * Based on Simple IDCT
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+#define Z1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define Z2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define Z3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define Z4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define Z5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define Z6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define Z7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define Z4c ((1<<(COL_SHIFT-1))/Z4)
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+#define z1 v0.H[0]
+#define z2 v0.H[1]
+#define z3 v0.H[2]
+#define z4 v0.H[3]
+#define z5 v0.H[4]
+#define z6 v0.H[5]
+#define z7 v0.H[6]
+#define z4c v0.H[7]
+
+const   idct_coeff_neon, align=4
+        .short Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z4c
+endconst
+
+.macro idct_start data
+        prfm            pldl1keep, [\data]
+        mov             x10, x30
+        movrel          x3, idct_coeff_neon
+        ld1             {v0.2D}, [x3]
+.endm
+
+.macro idct_end
+        ret             x10
+.endm
+
+.macro smull1 a, b, c
+        smull           \a, \b, \c
+.endm
+
+.macro smlal1 a, b, c
+        smlal           \a, \b, \c
+.endm
+
+.macro smlsl1 a, b, c
+        smlsl           \a, \b, \c
+.endm
+
+.macro idct_col4_top y1, y2, y3, y4, i, l
+        smull\i         v7.4S,  \y3\l, z2
+        smull\i         v16.4S, \y3\l, z6
+        smull\i         v17.4S, \y2\l, z1
+        add             v19.4S, v23.4S, v7.4S
+        smull\i         v18.4S, \y2\l, z3
+        add             v20.4S, v23.4S, v16.4S
+        smull\i         v5.4S,  \y2\l, z5
+        sub             v21.4S, v23.4S, v16.4S
+        smull\i         v6.4S,  \y2\l, z7
+        sub             v22.4S, v23.4S, v7.4S
+
+        smlal\i         v17.4S, \y4\l, z3
+        smlsl\i         v18.4S, \y4\l, z7
+        smlsl\i         v5.4S,  \y4\l, z1
+        smlsl\i         v6.4S,  \y4\l, z5
+.endm
+
+.macro idct_row4_neon y1, y2, y3, y4, pass
+        ld1             {\y1\().2D,\y2\().2D}, [x2], #32
+        movi            v23.4S, #1<<2, lsl #8
+        orr             v5.16B, \y1\().16B, \y2\().16B
+        ld1             {\y3\().2D,\y4\().2D}, [x2], #32
+        orr             v6.16B, \y3\().16B, \y4\().16B
+        orr             v5.16B, v5.16B, v6.16B
+        mov             x3, v5.D[1]
+        smlal           v23.4S, \y1\().4H, z4
+
+        idct_col4_top   \y1, \y2, \y3, \y4, 1, .4H
+
+        cmp             x3, #0
+        b.eq            \pass\()f
+
+        smull2          v7.4S, \y1\().8H, z4
+        smlal2          v17.4S, \y2\().8H, z5
+        smlsl2          v18.4S, \y2\().8H, z1
+        smull2          v16.4S, \y3\().8H, z2
+        smlal2          v5.4S, \y2\().8H, z7
+        add             v19.4S, v19.4S, v7.4S
+        sub             v20.4S, v20.4S, v7.4S
+        sub             v21.4S, v21.4S, v7.4S
+        add             v22.4S, v22.4S, v7.4S
+        smlal2          v6.4S, \y2\().8H, z3
+        smull2          v7.4S, \y3\().8H, z6
+        smlal2          v17.4S, \y4\().8H, z7
+        smlsl2          v18.4S, \y4\().8H, z5
+        smlal2          v5.4S, \y4\().8H, z3
+        smlsl2          v6.4S, \y4\().8H, z1
+        add             v19.4S, v19.4S, v7.4S
+        sub             v20.4S, v20.4S, v16.4S
+        add             v21.4S, v21.4S, v16.4S
+        sub             v22.4S, v22.4S, v7.4S
+
+\pass:  add             \y3\().4S, v19.4S, v17.4S
+        add             \y4\().4S, v20.4S, v18.4S
+        shrn            \y1\().4H, \y3\().4S, #ROW_SHIFT
+        shrn            \y2\().4H, \y4\().4S, #ROW_SHIFT
+        add             v7.4S, v21.4S, v5.4S
+        add             v16.4S, v22.4S, v6.4S
+        shrn            \y3\().4H, v7.4S, #ROW_SHIFT
+        shrn            \y4\().4H, v16.4S, #ROW_SHIFT
+        sub             v22.4S, v22.4S, v6.4S
+        sub             v19.4S, v19.4S, v17.4S
+        sub             v21.4S, v21.4S, v5.4S
+        shrn2           \y1\().8H, v22.4S, #ROW_SHIFT
+        sub             v20.4S, v20.4S, v18.4S
+        shrn2           \y2\().8H, v21.4S, #ROW_SHIFT
+        shrn2           \y3\().8H, v20.4S, #ROW_SHIFT
+        shrn2           \y4\().8H, v19.4S, #ROW_SHIFT
+
+        trn1            v16.8H, \y1\().8H, \y2\().8H
+        trn2            v17.8H, \y1\().8H, \y2\().8H
+        trn1            v18.8H, \y3\().8H, \y4\().8H
+        trn2            v19.8H, \y3\().8H, \y4\().8H
+        trn1            \y1\().4S, v16.4S, v18.4S
+        trn1            \y2\().4S, v17.4S, v19.4S
+        trn2            \y3\().4S, v16.4S, v18.4S
+        trn2            \y4\().4S, v17.4S, v19.4S
+.endm
+
+.macro declare_idct_col4_neon i, l
+function idct_col4_neon\i
+        dup             v23.4H, z4c
+.if \i == 1
+        add             v23.4H, v23.4H, v24.4H
+.else
+        mov             v5.D[0], v24.D[1]
+        add             v23.4H, v23.4H, v5.4H
+.endif
+        smull           v23.4S, v23.4H, z4
+
+        idct_col4_top   v24, v25, v26, v27, \i, \l
+
+        mov             x4, v28.D[\i - 1]
+        mov             x5, v29.D[\i - 1]
+        cmp             x4, #0
+        b.eq            1f
+
+        smull\i         v7.4S,  v28\l,  z4
+        add             v19.4S, v19.4S, v7.4S
+        sub             v20.4S, v20.4S, v7.4S
+        sub             v21.4S, v21.4S, v7.4S
+        add             v22.4S, v22.4S, v7.4S
+
+1:      mov             x4, v30.D[\i - 1]
+        cmp             x5, #0
+        b.eq            2f
+
+        smlal\i         v17.4S, v29\l, z5
+        smlsl\i         v18.4S, v29\l, z1
+        smlal\i         v5.4S,  v29\l, z7
+        smlal\i         v6.4S,  v29\l, z3
+
+2:      mov             x5, v31.D[\i - 1]
+        cmp             x4, #0
+        b.eq            3f
+
+        smull\i         v7.4S,  v30\l, z6
+        smull\i         v16.4S, v30\l, z2
+        add             v19.4S, v19.4S, v7.4S
+        sub             v22.4S, v22.4S, v7.4S
+        sub             v20.4S, v20.4S, v16.4S
+        add             v21.4S, v21.4S, v16.4S
+
+3:      cmp             x5, #0
+        b.eq            4f
+
+        smlal\i         v17.4S, v31\l, z7
+        smlsl\i         v18.4S, v31\l, z5
+        smlal\i         v5.4S,  v31\l, z3
+        smlsl\i         v6.4S,  v31\l, z1
+
+4:      addhn           v7.4H, v19.4S, v17.4S
+        addhn2          v7.8H, v20.4S, v18.4S
+        subhn           v18.4H, v20.4S, v18.4S
+        subhn2          v18.8H, v19.4S, v17.4S
+
+        addhn           v16.4H, v21.4S, v5.4S
+        addhn2          v16.8H, v22.4S, v6.4S
+        subhn           v17.4H, v22.4S, v6.4S
+        subhn2          v17.8H, v21.4S, v5.4S
+
+        ret
+endfunc
+.endm
+
+declare_idct_col4_neon 1, .4H
+declare_idct_col4_neon 2, .8H
+
+function ff_simple_idct_put_neon, export=1
+        idct_start      x2
+
+        idct_row4_neon  v24, v25, v26, v27, 1
+        idct_row4_neon  v28, v29, v30, v31, 2
+        bl              idct_col4_neon1
+
+        sqshrun         v1.8B,  v7.8H, #COL_SHIFT-16
+        sqshrun2        v1.16B, v16.8H, #COL_SHIFT-16
+        sqshrun         v3.8B,  v17.8H, #COL_SHIFT-16
+        sqshrun2        v3.16B, v18.8H, #COL_SHIFT-16
+
+        bl              idct_col4_neon2
+
+        sqshrun         v2.8B,  v7.8H, #COL_SHIFT-16
+        sqshrun2        v2.16B, v16.8H, #COL_SHIFT-16
+        sqshrun         v4.8B,  v17.8H, #COL_SHIFT-16
+        sqshrun2        v4.16B, v18.8H, #COL_SHIFT-16
+
+        zip1            v16.4S, v1.4S, v2.4S
+        zip2            v17.4S, v1.4S, v2.4S
+
+        st1             {v16.D}[0], [x0], x1
+        st1             {v16.D}[1], [x0], x1
+
+        zip1            v18.4S, v3.4S, v4.4S
+        zip2            v19.4S, v3.4S, v4.4S
+
+        st1             {v17.D}[0], [x0], x1
+        st1             {v17.D}[1], [x0], x1
+        st1             {v18.D}[0], [x0], x1
+        st1             {v18.D}[1], [x0], x1
+        st1             {v19.D}[0], [x0], x1
+        st1             {v19.D}[1], [x0], x1
+
+        idct_end
+endfunc
+
+function ff_simple_idct_add_neon, export=1
+        idct_start      x2
+
+        idct_row4_neon  v24, v25, v26, v27, 1
+        idct_row4_neon  v28, v29, v30, v31, 2
+        bl              idct_col4_neon1
+
+        sshr            v1.8H, v7.8H, #COL_SHIFT-16
+        sshr            v2.8H, v16.8H, #COL_SHIFT-16
+        sshr            v3.8H, v17.8H, #COL_SHIFT-16
+        sshr            v4.8H, v18.8H, #COL_SHIFT-16
+
+        bl              idct_col4_neon2
+
+        sshr            v7.8H, v7.8H, #COL_SHIFT-16
+        sshr            v16.8H, v16.8H, #COL_SHIFT-16
+        sshr            v17.8H, v17.8H, #COL_SHIFT-16
+        sshr            v18.8H, v18.8H, #COL_SHIFT-16
+
+        mov             x9,  x0
+        ld1             {v19.D}[0], [x0], x1
+        zip1            v23.2D, v1.2D, v7.2D
+        zip2            v24.2D, v1.2D, v7.2D
+        ld1             {v19.D}[1], [x0], x1
+        zip1            v25.2D, v2.2D, v16.2D
+        zip2            v26.2D, v2.2D, v16.2D
+        ld1             {v20.D}[0], [x0], x1
+        zip1            v27.2D, v3.2D, v17.2D
+        zip2            v28.2D, v3.2D, v17.2D
+        ld1             {v20.D}[1], [x0], x1
+        zip1            v29.2D, v4.2D, v18.2D
+        zip2            v30.2D, v4.2D, v18.2D
+        ld1             {v21.D}[0], [x0], x1
+        uaddw           v23.8H, v23.8H, v19.8B
+        uaddw2          v24.8H, v24.8H, v19.16B
+        ld1             {v21.D}[1], [x0], x1
+        sqxtun          v23.8B, v23.8H
+        sqxtun2         v23.16B, v24.8H
+        ld1             {v22.D}[0], [x0], x1
+        uaddw           v24.8H, v25.8H, v20.8B
+        uaddw2          v25.8H, v26.8H, v20.16B
+        ld1             {v22.D}[1], [x0], x1
+        sqxtun          v24.8B, v24.8H
+        sqxtun2         v24.16B, v25.8H
+        st1             {v23.D}[0], [x9], x1
+        uaddw           v25.8H, v27.8H, v21.8B
+        uaddw2          v26.8H, v28.8H, v21.16B
+        st1             {v23.D}[1], [x9], x1
+        sqxtun          v25.8B, v25.8H
+        sqxtun2         v25.16B, v26.8H
+        st1             {v24.D}[0], [x9], x1
+        uaddw           v26.8H, v29.8H, v22.8B
+        uaddw2          v27.8H, v30.8H, v22.16B
+        st1             {v24.D}[1], [x9], x1
+        sqxtun          v26.8B, v26.8H
+        sqxtun2         v26.16B, v27.8H
+        st1             {v25.D}[0], [x9], x1
+        st1             {v25.D}[1], [x9], x1
+        st1             {v26.D}[0], [x9], x1
+        st1             {v26.D}[1], [x9], x1
+
+        idct_end
+endfunc
+
+function ff_simple_idct_neon, export=1
+        idct_start      x0
+
+        mov             x2,  x0
+        idct_row4_neon  v24, v25, v26, v27, 1
+        idct_row4_neon  v28, v29, v30, v31, 2
+        sub             x2, x2, #128
+        bl              idct_col4_neon1
+
+        sshr            v1.8H, v7.8H, #COL_SHIFT-16
+        sshr            v2.8H, v16.8H, #COL_SHIFT-16
+        sshr            v3.8H, v17.8H, #COL_SHIFT-16
+        sshr            v4.8H, v18.8H, #COL_SHIFT-16
+
+        bl              idct_col4_neon2
+
+        sshr            v7.8H, v7.8H, #COL_SHIFT-16
+        sshr            v16.8H, v16.8H, #COL_SHIFT-16
+        sshr            v17.8H, v17.8H, #COL_SHIFT-16
+        sshr            v18.8H, v18.8H, #COL_SHIFT-16
+
+        zip1            v23.2D, v1.2D, v7.2D
+        zip2            v24.2D, v1.2D, v7.2D
+        st1             {v23.2D,v24.2D}, [x2], #32
+        zip1            v25.2D, v2.2D, v16.2D
+        zip2            v26.2D, v2.2D, v16.2D
+        st1             {v25.2D,v26.2D}, [x2], #32
+        zip1            v27.2D, v3.2D, v17.2D
+        zip2            v28.2D, v3.2D, v17.2D
+        st1             {v27.2D,v28.2D}, [x2], #32
+        zip1            v29.2D, v4.2D, v18.2D
+        zip2            v30.2D, v4.2D, v18.2D
+        st1             {v29.2D,v30.2D}, [x2], #32
+
+        idct_end
+endfunc
diff --git a/media/ffvpx/libavcodec/aarch64/vc1dsp_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/vc1dsp_init_aarch64.c
new file mode 100644
index 0000000000..3bc0bd17ee
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vc1dsp_init_aarch64.c
@@ -0,0 +1,141 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavutil/intreadwrite.h"
+#include "libavcodec/vc1dsp.h"
+
+#include "config.h"
+
+void ff_vc1_inv_trans_8x8_neon(int16_t *block);
+void ff_vc1_inv_trans_8x4_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+void ff_vc1_inv_trans_4x8_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+void ff_vc1_inv_trans_4x4_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+
+void ff_vc1_inv_trans_8x8_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+void ff_vc1_inv_trans_8x4_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+void ff_vc1_inv_trans_4x8_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+void ff_vc1_inv_trans_4x4_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+
+void ff_vc1_v_loop_filter4_neon(uint8_t *src, ptrdiff_t stride, int pq);
+void ff_vc1_h_loop_filter4_neon(uint8_t *src, ptrdiff_t stride, int pq);
+void ff_vc1_v_loop_filter8_neon(uint8_t *src, ptrdiff_t stride, int pq);
+void ff_vc1_h_loop_filter8_neon(uint8_t *src, ptrdiff_t stride, int pq);
+void ff_vc1_v_loop_filter16_neon(uint8_t *src, ptrdiff_t stride, int pq);
+void ff_vc1_h_loop_filter16_neon(uint8_t *src, ptrdiff_t stride, int pq);
+
+void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                int h, int x, int y);
+void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                int h, int x, int y);
+void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                int h, int x, int y);
+void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                int h, int x, int y);
+
+int ff_vc1_unescape_buffer_helper_neon(const uint8_t *src, int size, uint8_t *dst);
+
+static int vc1_unescape_buffer_neon(const uint8_t *src, int size, uint8_t *dst)
+{
+    /* Dealing with starting and stopping, and removing escape bytes, are
+     * comparatively less time-sensitive, so are more clearly expressed using
+     * a C wrapper around the assembly inner loop. Note that we assume a
+     * little-endian machine that supports unaligned loads. */
+    int dsize = 0;
+    while (size >= 4)
+    {
+        int found = 0;
+        while (!found && (((uintptr_t) dst) & 7) && size >= 4)
+        {
+            found = (AV_RL32(src) &~ 0x03000000) == 0x00030000;
+            if (!found)
+            {
+                *dst++ = *src++;
+                --size;
+                ++dsize;
+            }
+        }
+        if (!found)
+        {
+            int skip = size - ff_vc1_unescape_buffer_helper_neon(src, size, dst);
+            dst += skip;
+            src += skip;
+            size -= skip;
+            dsize += skip;
+            while (!found && size >= 4)
+            {
+                found = (AV_RL32(src) &~ 0x03000000) == 0x00030000;
+                if (!found)
+                {
+                    *dst++ = *src++;
+                    --size;
+                    ++dsize;
+                }
+            }
+        }
+        if (found)
+        {
+            *dst++ = *src++;
+            *dst++ = *src++;
+            ++src;
+            size -= 3;
+            dsize += 2;
+        }
+    }
+    while (size > 0)
+    {
+        *dst++ = *src++;
+        --size;
+        ++dsize;
+    }
+    return dsize;
+}
+
+av_cold void ff_vc1dsp_init_aarch64(VC1DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        dsp->vc1_inv_trans_8x8 = ff_vc1_inv_trans_8x8_neon;
+        dsp->vc1_inv_trans_8x4 = ff_vc1_inv_trans_8x4_neon;
+        dsp->vc1_inv_trans_4x8 = ff_vc1_inv_trans_4x8_neon;
+        dsp->vc1_inv_trans_4x4 = ff_vc1_inv_trans_4x4_neon;
+        dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_neon;
+        dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_neon;
+        dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_neon;
+        dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_neon;
+
+        dsp->vc1_v_loop_filter4  = ff_vc1_v_loop_filter4_neon;
+        dsp->vc1_h_loop_filter4  = ff_vc1_h_loop_filter4_neon;
+        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_neon;
+        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_neon;
+        dsp->vc1_v_loop_filter16 = ff_vc1_v_loop_filter16_neon;
+        dsp->vc1_h_loop_filter16 = ff_vc1_h_loop_filter16_neon;
+
+        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_neon;
+        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_neon;
+        dsp->put_no_rnd_vc1_chroma_pixels_tab[1] = ff_put_vc1_chroma_mc4_neon;
+        dsp->avg_no_rnd_vc1_chroma_pixels_tab[1] = ff_avg_vc1_chroma_mc4_neon;
+
+        dsp->vc1_unescape_buffer = vc1_unescape_buffer_neon;
+    }
+}
diff --git a/media/ffvpx/libavcodec/aarch64/videodsp.S b/media/ffvpx/libavcodec/aarch64/videodsp.S
new file mode 100644
index 0000000000..fe2da0658e
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/videodsp.S
@@ -0,0 +1,29 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+function ff_prefetch_aarch64, export=1
+1:
+        subs            w2,  w2,  #2
+        prfm            pldl1strm, [x0]
+        prfm            pldl1strm, [x0,  x1]
+        add             x0,  x0,  x1,  lsl #1
+        b.gt            1b
+        ret
+endfunc
diff --git a/media/ffvpx/libavcodec/aarch64/videodsp_init.c b/media/ffvpx/libavcodec/aarch64/videodsp_init.c
new file mode 100644
index 0000000000..1f77a918d7
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/videodsp_init.c
@@ -0,0 +1,32 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/videodsp.h"
+
+void ff_prefetch_aarch64(const uint8_t *mem, ptrdiff_t stride, int h);
+
+av_cold void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_armv8(cpu_flags))
+        ctx->prefetch = ff_prefetch_aarch64;
+}
diff --git a/media/ffvpx/libavcodec/aarch64/vp8dsp.h b/media/ffvpx/libavcodec/aarch64/vp8dsp.h
new file mode 100644
index 0000000000..4e59de28b1
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp8dsp.h
@@ -0,0 +1,75 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AARCH64_VP8DSP_H
+#define AVCODEC_AARCH64_VP8DSP_H
+
+#include "libavcodec/vp8dsp.h"
+
+#define VP8_LF_Y(hv, inner, opt)                                             \
+    void ff_vp8_##hv##_loop_filter16##inner##_##opt(uint8_t *dst,            \
+                                                    ptrdiff_t stride,        \
+                                                    int flim_E, int flim_I,  \
+                                                    int hev_thresh)
+
+#define VP8_LF_UV(hv, inner, opt)                                            \
+    void ff_vp8_##hv##_loop_filter8uv##inner##_##opt(uint8_t *dstU,          \
+                                                     uint8_t *dstV,          \
+                                                     ptrdiff_t stride,       \
+                                                     int flim_E, int flim_I, \
+                                                     int hev_thresh)
+
+#define VP8_LF_SIMPLE(hv, opt)                                          \
+    void ff_vp8_##hv##_loop_filter16_simple_##opt(uint8_t *dst,         \
+                                                  ptrdiff_t stride,     \
+                                                  int flim)
+
+#define VP8_LF_HV(inner, opt)                   \
+    VP8_LF_Y(h,  inner, opt);                   \
+    VP8_LF_Y(v,  inner, opt);                   \
+    VP8_LF_UV(h, inner, opt);                   \
+    VP8_LF_UV(v, inner, opt)
+
+#define VP8_LF(opt)                             \
+    VP8_LF_HV(,       opt);                     \
+    VP8_LF_HV(_inner, opt);                     \
+    VP8_LF_SIMPLE(h, opt);                      \
+    VP8_LF_SIMPLE(v, opt)
+
+#define VP8_MC(n, opt)                                                  \
+    void ff_put_vp8_##n##_##opt(uint8_t *dst, ptrdiff_t dststride,      \
+                                const uint8_t *src, ptrdiff_t srcstride,\
+                                int h, int x, int y)
+
+#define VP8_EPEL(w, opt)                        \
+    VP8_MC(pixels ## w, opt);                   \
+    VP8_MC(epel ## w ## _h4, opt);              \
+    VP8_MC(epel ## w ## _h6, opt);              \
+    VP8_MC(epel ## w ## _v4, opt);              \
+    VP8_MC(epel ## w ## _h4v4, opt);            \
+    VP8_MC(epel ## w ## _h6v4, opt);            \
+    VP8_MC(epel ## w ## _v6, opt);              \
+    VP8_MC(epel ## w ## _h4v6, opt);            \
+    VP8_MC(epel ## w ## _h6v6, opt)
+
+#define VP8_BILIN(w, opt)                       \
+    VP8_MC(bilin ## w ## _h, opt);              \
+    VP8_MC(bilin ## w ## _v, opt);              \
+    VP8_MC(bilin ## w ## _hv, opt)
+
+#endif /* AVCODEC_AARCH64_VP8DSP_H */
diff --git a/media/ffvpx/libavcodec/aarch64/vp8dsp_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/vp8dsp_init_aarch64.c
new file mode 100644
index 0000000000..fc7e831d17
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp8dsp_init_aarch64.c
@@ -0,0 +1,124 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/vp8dsp.h"
+#include "vp8dsp.h"
+
+void ff_vp8_luma_dc_wht_neon(int16_t block[4][4][16], int16_t dc[16]);
+
+void ff_vp8_idct_add_neon(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+void ff_vp8_idct_dc_add_neon(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+void ff_vp8_idct_dc_add4y_neon(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
+void ff_vp8_idct_dc_add4uv_neon(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
+
+VP8_LF(neon);
+
+VP8_EPEL(16, neon);
+VP8_EPEL(8,  neon);
+VP8_EPEL(4,  neon);
+
+VP8_BILIN(16, neon);
+VP8_BILIN(8,  neon);
+VP8_BILIN(4,  neon);
+
+av_cold void ff_vp78dsp_init_aarch64(VP8DSPContext *dsp)
+{
+    if (!have_neon(av_get_cpu_flags()))
+        return;
+    dsp->put_vp8_epel_pixels_tab[0][0][0] = ff_put_vp8_pixels16_neon;
+    dsp->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_neon;
+    dsp->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_neon;
+    dsp->put_vp8_epel_pixels_tab[0][2][2] = ff_put_vp8_epel16_h6v6_neon;
+
+    dsp->put_vp8_epel_pixels_tab[1][0][0] = ff_put_vp8_pixels8_neon;
+    dsp->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_neon;
+    dsp->put_vp8_epel_pixels_tab[1][0][2] = ff_put_vp8_epel8_h6_neon;
+    dsp->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_neon;
+    dsp->put_vp8_epel_pixels_tab[1][1][1] = ff_put_vp8_epel8_h4v4_neon;
+    dsp->put_vp8_epel_pixels_tab[1][1][2] = ff_put_vp8_epel8_h6v4_neon;
+    dsp->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_neon;
+    dsp->put_vp8_epel_pixels_tab[1][2][1] = ff_put_vp8_epel8_h4v6_neon;
+    dsp->put_vp8_epel_pixels_tab[1][2][2] = ff_put_vp8_epel8_h6v6_neon;
+
+    dsp->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_neon;
+    dsp->put_vp8_epel_pixels_tab[2][0][2] = ff_put_vp8_epel4_h6_neon;
+    dsp->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_neon;
+    dsp->put_vp8_epel_pixels_tab[2][1][1] = ff_put_vp8_epel4_h4v4_neon;
+    dsp->put_vp8_epel_pixels_tab[2][1][2] = ff_put_vp8_epel4_h6v4_neon;
+    dsp->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_neon;
+    dsp->put_vp8_epel_pixels_tab[2][2][1] = ff_put_vp8_epel4_h4v6_neon;
+    dsp->put_vp8_epel_pixels_tab[2][2][2] = ff_put_vp8_epel4_h6v6_neon;
+
+    dsp->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][0][1] = ff_put_vp8_bilin16_h_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][0][2] = ff_put_vp8_bilin16_h_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][1][0] = ff_put_vp8_bilin16_v_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][1][1] = ff_put_vp8_bilin16_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][1][2] = ff_put_vp8_bilin16_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][2][0] = ff_put_vp8_bilin16_v_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][2][1] = ff_put_vp8_bilin16_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][2][2] = ff_put_vp8_bilin16_hv_neon;
+
+    dsp->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][0][1] = ff_put_vp8_bilin8_h_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][0][2] = ff_put_vp8_bilin8_h_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][1][0] = ff_put_vp8_bilin8_v_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][1][1] = ff_put_vp8_bilin8_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][1][2] = ff_put_vp8_bilin8_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][2][0] = ff_put_vp8_bilin8_v_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][2][1] = ff_put_vp8_bilin8_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][2][2] = ff_put_vp8_bilin8_hv_neon;
+
+    dsp->put_vp8_bilinear_pixels_tab[2][0][1] = ff_put_vp8_bilin4_h_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][0][2] = ff_put_vp8_bilin4_h_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][1][0] = ff_put_vp8_bilin4_v_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][1][1] = ff_put_vp8_bilin4_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][1][2] = ff_put_vp8_bilin4_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][2][0] = ff_put_vp8_bilin4_v_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][2][1] = ff_put_vp8_bilin4_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][2][2] = ff_put_vp8_bilin4_hv_neon;
+}
+
+av_cold void ff_vp8dsp_init_aarch64(VP8DSPContext *dsp)
+{
+    if (!have_neon(av_get_cpu_flags()))
+        return;
+    dsp->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_neon;
+
+    dsp->vp8_idct_add       = ff_vp8_idct_add_neon;
+    dsp->vp8_idct_dc_add    = ff_vp8_idct_dc_add_neon;
+    dsp->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_neon;
+    dsp->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_neon;
+
+    dsp->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16_neon;
+    dsp->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16_neon;
+    dsp->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_neon;
+    dsp->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_neon;
+
+    dsp->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16_inner_neon;
+    dsp->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16_inner_neon;
+    dsp->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_neon;
+    dsp->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_neon;
+
+    dsp->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter16_simple_neon;
+    dsp->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter16_simple_neon;
+}
diff --git a/media/ffvpx/libavcodec/aarch64/vp8dsp_neon.S b/media/ffvpx/libavcodec/aarch64/vp8dsp_neon.S
new file mode 100644
index 0000000000..4bbf16d1a4
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp8dsp_neon.S
@@ -0,0 +1,1790 @@
+/*
+ * VP8 NEON optimisations
+ *
+ * Copyright (c) 2010 Rob Clark <rob@ti.com>
+ * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2018 Magnus Röös <mla2.roos@gmail.com>
+ * Copyright (c) 2019 Martin Storsjo <martin@martin.st>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+#include "neon.S"
+
+function ff_vp8_luma_dc_wht_neon, export=1
+        ld1             {v0.4h - v3.4h}, [x1]
+        movi            v30.8h, #0
+
+        add             v4.4h,  v0.4h,  v3.4h
+        add             v6.4h,  v1.4h,  v2.4h
+        st1             {v30.8h}, [x1], #16
+        sub             v7.4h,  v1.4h,  v2.4h
+        sub             v5.4h,  v0.4h,  v3.4h
+        st1             {v30.8h}, [x1]
+        add             v0.4h,  v4.4h,  v6.4h
+        add             v1.4h,  v5.4h,  v7.4h
+        sub             v2.4h,  v4.4h,  v6.4h
+        sub             v3.4h,  v5.4h,  v7.4h
+
+        movi            v16.4h, #3
+
+        transpose_4x4H  v0, v1, v2, v3, v4, v5, v6, v7
+
+        add             v0.4h,  v0.4h,  v16.4h
+
+        add             v4.4h,  v0.4h,  v3.4h
+        add             v6.4h,  v1.4h,  v2.4h
+        sub             v7.4h,  v1.4h,  v2.4h
+        sub             v5.4h,  v0.4h,  v3.4h
+        add             v0.4h,  v4.4h,  v6.4h
+        add             v1.4h,  v5.4h,  v7.4h
+        sub             v2.4h,  v4.4h,  v6.4h
+        sub             v3.4h,  v5.4h,  v7.4h
+
+        sshr            v0.4h,  v0.4h,  #3
+        sshr            v1.4h,  v1.4h,  #3
+        sshr            v2.4h,  v2.4h,  #3
+        sshr            v3.4h,  v3.4h,  #3
+
+        mov             x3,  #32
+        st1             {v0.h}[0],  [x0], x3
+        st1             {v1.h}[0],  [x0], x3
+        st1             {v2.h}[0],  [x0], x3
+        st1             {v3.h}[0],  [x0], x3
+        st1             {v0.h}[1],  [x0], x3
+        st1             {v1.h}[1],  [x0], x3
+        st1             {v2.h}[1],  [x0], x3
+        st1             {v3.h}[1],  [x0], x3
+        st1             {v0.h}[2],  [x0], x3
+        st1             {v1.h}[2],  [x0], x3
+        st1             {v2.h}[2],  [x0], x3
+        st1             {v3.h}[2],  [x0], x3
+        st1             {v0.h}[3],  [x0], x3
+        st1             {v1.h}[3],  [x0], x3
+        st1             {v2.h}[3],  [x0], x3
+        st1             {v3.h}[3],  [x0], x3
+
+        ret
+endfunc
+
+function ff_vp8_idct_add_neon, export=1
+        ld1             {v0.8b - v3.8b},  [x1]
+        mov             w4,  #20091
+        movk            w4,  #35468/2, lsl #16
+        dup             v4.2s, w4
+
+        smull           v26.4s, v1.4h,  v4.h[0]
+        smull           v27.4s, v3.4h,  v4.h[0]
+        sqdmulh         v20.4h, v1.4h,  v4.h[1]
+        sqdmulh         v23.4h, v3.4h,  v4.h[1]
+        shrn            v21.4h, v26.4s, #16
+        shrn            v22.4h, v27.4s, #16
+        add             v21.4h, v21.4h, v1.4h
+        add             v22.4h, v22.4h, v3.4h
+
+        add             v16.4h,  v0.4h,   v2.4h
+        sub             v17.4h,  v0.4h,   v2.4h
+
+        add             v18.4h,  v21.4h,  v23.4h
+        sub             v19.4h,  v20.4h,  v22.4h
+
+        add             v0.4h,   v16.4h,  v18.4h
+        add             v1.4h,   v17.4h,  v19.4h
+        sub             v3.4h,   v16.4h,  v18.4h
+        sub             v2.4h,   v17.4h,  v19.4h
+
+        transpose_4x4H  v0, v1, v2, v3, v24, v5, v6, v7
+
+        movi            v29.8h, #0
+        smull           v26.4s,     v1.4h,  v4.h[0]
+        st1             {v29.8h},   [x1],   #16
+        smull           v27.4s,     v3.4h,  v4.h[0]
+        st1             {v29.16b},  [x1]
+        sqdmulh         v21.4h,     v1.4h,  v4.h[1]
+        sqdmulh         v23.4h,     v3.4h,  v4.h[1]
+        shrn            v20.4h,     v26.4s, #16
+        shrn            v22.4h,     v27.4s, #16
+        add             v20.4h,     v20.4h, v1.4h
+        add             v22.4h,     v22.4h, v3.4h
+        add             v16.4h,     v0.4h,  v2.4h
+        sub             v17.4h,     v0.4h,  v2.4h
+
+        add             v18.4h,     v20.4h, v23.4h
+        ld1             {v24.s}[0], [x0],   x2
+        sub             v19.4h, v21.4h, v22.4h
+        ld1             {v25.s}[0], [x0],   x2
+        add             v0.4h,      v16.4h, v18.4h
+        add             v1.4h,      v17.4h, v19.4h
+        ld1             {v26.s}[0], [x0],   x2
+        sub             v3.4h,      v16.4h, v18.4h
+        sub             v2.4h,      v17.4h, v19.4h
+        ld1             {v27.s}[0], [x0],   x2
+        srshr           v0.4h,      v0.4h,  #3
+        srshr           v1.4h,      v1.4h,  #3
+        srshr           v2.4h,      v2.4h,  #3
+        srshr           v3.4h,      v3.4h,  #3
+
+        sub             x0,  x0,  x2,  lsl #2
+
+        transpose_4x4H  v0, v1, v2, v3, v5, v6, v7, v16
+
+        uaddw           v0.8h,  v0.8h, v24.8b
+        uaddw           v1.8h,  v1.8h, v25.8b
+        uaddw           v2.8h,  v2.8h, v26.8b
+        uaddw           v3.8h,  v3.8h, v27.8b
+        sqxtun          v0.8b,  v0.8h
+        sqxtun          v1.8b,  v1.8h
+        sqxtun          v2.8b,  v2.8h
+        sqxtun          v3.8b,  v3.8h
+
+        st1             {v0.s}[0],  [x0], x2
+        st1             {v1.s}[0],  [x0], x2
+        st1             {v2.s}[0],  [x0], x2
+        st1             {v3.s}[0],  [x0], x2
+
+        ret
+endfunc
+
+function ff_vp8_idct_dc_add4uv_neon, export=1
+        movi            v0.4h,  #0
+        mov             x3,     #32
+        ld1r            {v16.4h},  [x1]
+        st1             {v0.h}[0], [x1], x3
+        ld1r            {v17.4h},  [x1]
+        st1             {v0.h}[0], [x1], x3
+        ld1r            {v18.4h},  [x1]
+        st1             {v0.h}[0], [x1], x3
+        ld1r            {v19.4h},  [x1]
+        st1             {v0.h}[0], [x1], x3
+        ins             v16.d[1],  v17.d[0]
+        ins             v18.d[1],  v19.d[0]
+        mov             x3,  x0
+        srshr           v16.8h,    v16.8h,  #3            // dc >>= 3
+        ld1             {v0.8b},   [x0], x2
+        srshr           v18.8h,    v18.8h,  #3
+        ld1             {v1.8b},   [x0], x2
+        uaddw           v20.8h,    v16.8h, v0.8b
+        ld1             {v2.8b},   [x0], x2
+        uaddw           v0.8h,     v16.8h, v1.8b
+        ld1             {v3.8b},   [x0], x2
+        uaddw           v22.8h,    v16.8h, v2.8b
+        ld1             {v4.8b},   [x0], x2
+        uaddw           v2.8h,     v16.8h, v3.8b
+        ld1             {v5.8b},   [x0], x2
+        uaddw           v24.8h,    v18.8h, v4.8b
+        ld1             {v6.8b},   [x0], x2
+        uaddw           v4.8h,     v18.8h, v5.8b
+        ld1             {v7.8b},   [x0], x2
+        uaddw           v26.8h,    v18.8h, v6.8b
+        sqxtun          v20.8b,    v20.8h
+        uaddw           v6.8h,     v18.8h, v7.8b
+        sqxtun          v21.8b,    v0.8h
+        sqxtun          v22.8b,    v22.8h
+        st1             {v20.8b},  [x3], x2
+        sqxtun          v23.8b,    v2.8h
+        st1             {v21.8b},  [x3], x2
+        sqxtun          v24.8b,    v24.8h
+        st1             {v22.8b},  [x3], x2
+        sqxtun          v25.8b,    v4.8h
+        st1             {v23.8b},  [x3], x2
+        sqxtun          v26.8b,    v26.8h
+        st1             {v24.8b},  [x3], x2
+        sqxtun          v27.8b,    v6.8h
+        st1             {v25.8b},  [x3], x2
+        st1             {v26.8b},  [x3], x2
+        st1             {v27.8b},  [x3], x2
+
+        ret
+endfunc
+
+function ff_vp8_idct_dc_add4y_neon, export=1
+        movi            v0.16b,  #0
+        mov             x3,  #32
+        ld1r            {v16.4h},    [x1]
+        st1             {v0.h}[0],   [x1], x3
+        ld1r            {v17.4h},    [x1]
+        st1             {v0.h}[0],   [x1], x3
+        zip1            v16.2d,      v16.2d, v17.2d
+        ld1r            {v18.4h},    [x1]
+        st1             {v0.h}[0],   [x1], x3
+        ld1r            {v19.4h},    [x1]
+        st1             {v0.h}[0],   [x1], x3
+        zip1            v18.2d,      v18.2d, v19.2d
+        srshr           v16.8h,      v16.8h,  #3            // dc >>= 3
+        ld1             {v0.16b},     [x0], x2
+        srshr           v18.8h,       v18.8h,  #3
+        ld1             {v1.16b},     [x0], x2
+        uaddw           v20.8h,       v16.8h,  v0.8b
+        ld1             {v2.16b},     [x0], x2
+        uaddw2          v0.8h,        v18.8h,   v0.16b
+        ld1             {v3.16b},     [x0], x2
+        uaddw           v21.8h, v16.8h,  v1.8b
+        uaddw2          v1.8h,  v18.8h,  v1.16b
+        uaddw           v22.8h, v16.8h,  v2.8b
+        uaddw2          v2.8h,  v18.8h,  v2.16b
+        uaddw           v23.8h, v16.8h,  v3.8b
+        uaddw2          v3.8h,  v18.8h,  v3.16b
+        sub             x0,  x0,  x2,  lsl #2
+        sqxtun          v20.8b,  v20.8h
+        sqxtun2         v20.16b, v0.8h
+        sqxtun          v21.8b,  v21.8h
+        sqxtun2         v21.16b, v1.8h
+        sqxtun          v22.8b,  v22.8h
+        st1             {v20.16b},    [x0], x2
+        sqxtun2         v22.16b, v2.8h
+        st1             {v21.16b},    [x0], x2
+        sqxtun          v23.8b,  v23.8h
+        st1             {v22.16b},    [x0], x2
+        sqxtun2         v23.16b, v3.8h
+        st1             {v23.16b},    [x0], x2
+
+        ret
+endfunc
+
+function ff_vp8_idct_dc_add_neon, export=1
+        mov             w3,       #0
+        ld1r            {v2.8h},  [x1]
+        strh            w3,       [x1]
+        srshr           v2.8h,  v2.8h,  #3
+        ld1             {v0.s}[0],  [x0], x2
+        ld1             {v0.s}[1],  [x0], x2
+        uaddw           v3.8h,  v2.8h,  v0.8b
+        ld1             {v1.s}[0],  [x0], x2
+        ld1             {v1.s}[1],  [x0], x2
+        uaddw           v4.8h,  v2.8h,  v1.8b
+        sqxtun          v0.8b,  v3.8h
+        sqxtun          v1.8b,  v4.8h
+        sub             x0,  x0,  x2, lsl #2
+        st1             {v0.s}[0],  [x0], x2
+        st1             {v0.s}[1],  [x0], x2
+        st1             {v1.s}[0],  [x0], x2
+        st1             {v1.s}[1],  [x0], x2
+        ret
+endfunc
+
+// Register layout:
+//   P3..Q3 -> v0..v7
+//   flim_E -> v22
+//   flim_I -> v23
+//   hev_thresh -> x5
+//
+.macro  vp8_loop_filter, inner=0, simple=0, hev_thresh
+    .if \simple
+        uabd            v17.16b, v3.16b,  v4.16b      // abs(P0-Q0)
+        uabd            v23.16b, v2.16b,  v5.16b      // abs(P1-Q1)
+        uqadd           v17.16b, v17.16b, v17.16b     // abs(P0-Q0) * 2
+        ushr            v18.16b, v23.16b, #1          // abs(P1-Q1) / 2
+        uqadd           v19.16b, v17.16b,  v18.16b    // (abs(P0-Q0)*2) + (abs(P1-Q1)/2)
+        movi            v21.16b, #0x80
+        cmhs            v16.16b, v22.16b, v19.16b    // (abs(P0-Q0)*2) + (abs(P1-Q1)/2) <= flim
+    .else
+        // calculate hev and normal_limit:
+        uabd            v20.16b, v2.16b,  v3.16b      // abs(P1-P0)
+        uabd            v21.16b, v5.16b,  v4.16b      // abs(Q1-Q0)
+        uabd            v18.16b, v0.16b,  v1.16b      // abs(P3-P2)
+        uabd            v19.16b, v1.16b,  v2.16b      // abs(P2-P1)
+        cmhs            v16.16b, v23.16b, v20.16b     // abs(P1-P0) <= flim_I
+        cmhs            v17.16b, v23.16b, v21.16b     // abs(Q1-Q0) <= flim_I
+        cmhs            v18.16b, v23.16b, v18.16b     // abs(P3-P2) <= flim_I
+        cmhs            v19.16b, v23.16b, v19.16b     // abs(P2-P1) <= flim_I
+        and             v16.16b, v17.16b, v16.16b
+        uabd            v17.16b, v7.16b,  v6.16b      // abs(Q3-Q2)
+        and             v16.16b, v16.16b, v19.16b
+        uabd            v19.16b, v6.16b,  v5.16b      // abs(Q2-Q1)
+        and             v16.16b, v16.16b, v18.16b
+        cmhs            v18.16b, v23.16b, v17.16b     // abs(Q3-Q2) <= flim_I
+        cmhs            v19.16b, v23.16b, v19.16b     // abs(Q2-Q1) <= flim_I
+        uabd            v17.16b, v3.16b,  v4.16b      // abs(P0-Q0)
+        uabd            v23.16b, v2.16b,  v5.16b      // abs(P1-Q1)
+        and             v16.16b, v16.16b, v18.16b
+        uqadd           v17.16b, v17.16b, v17.16b     // abs(P0-Q0) * 2
+        and             v16.16b, v16.16b, v19.16b
+        ushr            v18.16b, v23.16b, #1          // abs(P1-Q1) / 2
+        dup             v23.16b, \hev_thresh          // hev_thresh
+        uqadd           v19.16b, v17.16b, v18.16b     // (abs(P0-Q0)*2) + (abs(P1-Q1)/2)
+        cmhi            v20.16b, v20.16b, v23.16b     // abs(P1-P0) > hev_thresh
+        cmhs            v19.16b, v22.16b, v19.16b     // (abs(P0-Q0)*2) + (abs(P1-Q1)/2) <= flim_E
+        cmhi            v22.16b, v21.16b, v23.16b     // abs(Q1-Q0) > hev_thresh
+        and             v16.16b, v16.16b, v19.16b
+        movi            v21.16b, #0x80
+        orr             v17.16b, v20.16b, v22.16b
+    .endif
+
+        // at this point:
+        //   v16: normal_limit
+        //   v17: hev
+
+        // convert to signed value:
+        eor            v3.16b, v3.16b, v21.16b           // PS0 = P0 ^ 0x80
+        eor            v4.16b, v4.16b, v21.16b           // QS0 = Q0 ^ 0x80
+
+        movi           v20.8h, #3
+        ssubl          v18.8h, v4.8b,  v3.8b             // QS0 - PS0
+        ssubl2         v19.8h, v4.16b, v3.16b            //   (widened to 16bit)
+        eor            v2.16b, v2.16b, v21.16b           // PS1 = P1 ^ 0x80
+        eor            v5.16b, v5.16b, v21.16b           // QS1 = Q1 ^ 0x80
+        mul            v18.8h, v18.8h, v20.8h            // w = 3 * (QS0 - PS0)
+        mul            v19.8h, v19.8h, v20.8h
+
+        sqsub          v20.16b, v2.16b, v5.16b           // clamp(PS1-QS1)
+        movi           v22.16b, #4
+        movi           v23.16b, #3
+    .if \inner
+        and            v20.16b, v20.16b, v17.16b         // if(hev) w += clamp(PS1-QS1)
+    .endif
+        saddw          v18.8h,  v18.8h, v20.8b           // w += clamp(PS1-QS1)
+        saddw2         v19.8h,  v19.8h, v20.16b
+        sqxtn          v18.8b,  v18.8h                   // narrow result back into v18
+        sqxtn2         v18.16b, v19.8h
+    .if !\inner && !\simple
+        eor            v1.16b,  v1.16b,  v21.16b         // PS2 = P2 ^ 0x80
+        eor            v6.16b,  v6.16b,  v21.16b         // QS2 = Q2 ^ 0x80
+    .endif
+        and            v18.16b, v18.16b, v16.16b         // w &= normal_limit
+
+        // registers used at this point..
+        //   v0 -> P3  (don't corrupt)
+        //   v1-v6 -> PS2-QS2
+        //   v7 -> Q3  (don't corrupt)
+        //   v17 -> hev
+        //   v18 -> w
+        //   v21 -> #0x80
+        //   v22 -> #4
+        //   v23 -> #3
+        //   v16, v19, v29 -> unused
+        //
+        // filter_common:   is4tap==1
+        //   c1 = clamp(w + 4) >> 3;
+        //   c2 = clamp(w + 3) >> 3;
+        //   Q0 = s2u(QS0 - c1);
+        //   P0 = s2u(PS0 + c2);
+
+    .if \simple
+        sqadd          v19.16b, v18.16b, v22.16b           // c1 = clamp((w&hev)+4)
+        sqadd          v20.16b, v18.16b, v23.16b           // c2 = clamp((w&hev)+3)
+        sshr           v19.16b, v19.16b, #3                // c1 >>= 3
+        sshr           v20.16b, v20.16b, #3                // c2 >>= 3
+        sqsub          v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
+        sqadd          v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
+        eor            v4.16b,  v4.16b,  v21.16b           // Q0 = QS0 ^ 0x80
+        eor            v3.16b,  v3.16b,  v21.16b           // P0 = PS0 ^ 0x80
+        eor            v5.16b,  v5.16b,  v21.16b           // Q1 = QS1 ^ 0x80
+        eor            v2.16b,  v2.16b,  v21.16b           // P1 = PS1 ^ 0x80
+    .elseif \inner
+        // the !is4tap case of filter_common, only used for inner blocks
+        //   c3 = ((c1&~hev) + 1) >> 1;
+        //   Q1 = s2u(QS1 - c3);
+        //   P1 = s2u(PS1 + c3);
+        sqadd          v19.16b, v18.16b, v22.16b           // c1 = clamp((w&hev)+4)
+        sqadd          v20.16b, v18.16b, v23.16b           // c2 = clamp((w&hev)+3)
+        sshr           v19.16b, v19.16b, #3                // c1 >>= 3
+        sshr           v20.16b, v20.16b, #3                // c2 >>= 3
+        sqsub          v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
+        sqadd          v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
+        bic            v19.16b, v19.16b, v17.16b           // c1 & ~hev
+        eor            v4.16b,  v4.16b,  v21.16b           // Q0 = QS0 ^ 0x80
+        srshr          v19.16b, v19.16b, #1                // c3 >>= 1
+        eor            v3.16b,  v3.16b,  v21.16b           // P0 = PS0 ^ 0x80
+        sqsub          v5.16b,  v5.16b,  v19.16b           // QS1 = clamp(QS1-c3)
+        sqadd          v2.16b,  v2.16b,  v19.16b           // PS1 = clamp(PS1+c3)
+        eor            v5.16b,  v5.16b,  v21.16b           // Q1 = QS1 ^ 0x80
+        eor            v2.16b,  v2.16b,  v21.16b           // P1 = PS1 ^ 0x80
+    .else
+        and            v20.16b, v18.16b, v17.16b           // w & hev
+        sqadd          v19.16b, v20.16b, v22.16b           // c1 = clamp((w&hev)+4)
+        sqadd          v20.16b, v20.16b, v23.16b           // c2 = clamp((w&hev)+3)
+        sshr           v19.16b, v19.16b, #3                // c1 >>= 3
+        sshr           v20.16b, v20.16b, #3                // c2 >>= 3
+        bic            v18.16b, v18.16b, v17.16b           // w &= ~hev
+        sqsub          v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
+        sqadd          v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
+
+        // filter_mbedge:
+        //   a = clamp((27*w + 63) >> 7);
+        //   Q0 = s2u(QS0 - a);
+        //   P0 = s2u(PS0 + a);
+        //   a = clamp((18*w + 63) >> 7);
+        //   Q1 = s2u(QS1 - a);
+        //   P1 = s2u(PS1 + a);
+        //   a = clamp((9*w + 63) >> 7);
+        //   Q2 = s2u(QS2 - a);
+        //   P2 = s2u(PS2 + a);
+        movi           v17.8h,  #63
+        sshll          v22.8h,  v18.8b, #3
+        sshll2         v23.8h,  v18.16b, #3
+        saddw          v22.8h,  v22.8h, v18.8b
+        saddw2         v23.8h,  v23.8h, v18.16b
+        add            v16.8h,  v17.8h, v22.8h
+        add            v17.8h,  v17.8h, v23.8h           //  9*w + 63
+        add            v19.8h,  v16.8h, v22.8h
+        add            v20.8h,  v17.8h, v23.8h           // 18*w + 63
+        add            v22.8h,  v19.8h, v22.8h
+        add            v23.8h,  v20.8h, v23.8h           // 27*w + 63
+        sqshrn         v16.8b,  v16.8h,  #7
+        sqshrn2        v16.16b, v17.8h, #7              // clamp(( 9*w + 63)>>7)
+        sqshrn         v19.8b,  v19.8h, #7
+        sqshrn2        v19.16b, v20.8h, #7              // clamp((18*w + 63)>>7)
+        sqshrn         v22.8b,  v22.8h, #7
+        sqshrn2        v22.16b, v23.8h, #7              // clamp((27*w + 63)>>7)
+        sqadd          v1.16b,  v1.16b,  v16.16b        // PS2 = clamp(PS2+a)
+        sqsub          v6.16b,  v6.16b,  v16.16b        // QS2 = clamp(QS2-a)
+        sqadd          v2.16b,  v2.16b,  v19.16b        // PS1 = clamp(PS1+a)
+        sqsub          v5.16b,  v5.16b,  v19.16b        // QS1 = clamp(QS1-a)
+        sqadd          v3.16b,  v3.16b,  v22.16b        // PS0 = clamp(PS0+a)
+        sqsub          v4.16b,  v4.16b,  v22.16b        // QS0 = clamp(QS0-a)
+        eor            v3.16b,  v3.16b,  v21.16b        // P0 = PS0 ^ 0x80
+        eor            v4.16b,  v4.16b,  v21.16b        // Q0 = QS0 ^ 0x80
+        eor            v2.16b,  v2.16b,  v21.16b        // P1 = PS1 ^ 0x80
+        eor            v5.16b,  v5.16b,  v21.16b        // Q1 = QS1 ^ 0x80
+        eor            v1.16b,  v1.16b,  v21.16b        // P2 = PS2 ^ 0x80
+        eor            v6.16b,  v6.16b,  v21.16b        // Q2 = QS2 ^ 0x80
+    .endif
+.endm
+
+.macro  vp8_v_loop_filter16 name, inner=0, simple=0
+function ff_vp8_v_loop_filter16\name\()_neon, export=1
+        sub             x0,  x0,  x1,  lsl #1+!\simple
+
+        // Load pixels:
+    .if !\simple
+        ld1             {v0.16b},     [x0], x1 // P3
+        ld1             {v1.16b},     [x0], x1 // P2
+    .endif
+        ld1             {v2.16b},     [x0], x1 // P1
+        ld1             {v3.16b},     [x0], x1 // P0
+        ld1             {v4.16b},     [x0], x1 // Q0
+        ld1             {v5.16b},     [x0], x1 // Q1
+    .if !\simple
+        ld1             {v6.16b},     [x0], x1 // Q2
+        ld1             {v7.16b},     [x0]     // Q3
+        dup             v23.16b, w3                 // flim_I
+    .endif
+        dup             v22.16b, w2                 // flim_E
+
+        vp8_loop_filter inner=\inner, simple=\simple, hev_thresh=w4
+
+        // back up to P2:  dst -= stride * 6
+        sub             x0,  x0,  x1,  lsl #2
+    .if !\simple
+        sub             x0,  x0,  x1,  lsl #1
+
+        // Store pixels:
+        st1             {v1.16b},     [x0], x1 // P2
+    .endif
+        st1             {v2.16b},     [x0], x1 // P1
+        st1             {v3.16b},     [x0], x1 // P0
+        st1             {v4.16b},     [x0], x1 // Q0
+        st1             {v5.16b},     [x0], x1 // Q1
+    .if !\simple
+        st1             {v6.16b},     [x0]     // Q2
+    .endif
+
+        ret
+endfunc
+.endm
+
+vp8_v_loop_filter16
+vp8_v_loop_filter16 _inner,  inner=1
+vp8_v_loop_filter16 _simple, simple=1
+
+.macro  vp8_v_loop_filter8uv name, inner=0
+function ff_vp8_v_loop_filter8uv\name\()_neon, export=1
+        sub             x0,  x0,  x2,  lsl #2
+        sub             x1,  x1,  x2,  lsl #2
+        // Load pixels:
+        ld1          {v0.d}[0],     [x0], x2  // P3
+        ld1          {v0.d}[1],     [x1], x2  // P3
+        ld1          {v1.d}[0],     [x0], x2  // P2
+        ld1          {v1.d}[1],     [x1], x2  // P2
+        ld1          {v2.d}[0],     [x0], x2  // P1
+        ld1          {v2.d}[1],     [x1], x2  // P1
+        ld1          {v3.d}[0],     [x0], x2  // P0
+        ld1          {v3.d}[1],     [x1], x2  // P0
+        ld1          {v4.d}[0],     [x0], x2  // Q0
+        ld1          {v4.d}[1],     [x1], x2  // Q0
+        ld1          {v5.d}[0],     [x0], x2  // Q1
+        ld1          {v5.d}[1],     [x1], x2  // Q1
+        ld1          {v6.d}[0],     [x0], x2  // Q2
+        ld1          {v6.d}[1],     [x1], x2  // Q2
+        ld1          {v7.d}[0],     [x0]      // Q3
+        ld1          {v7.d}[1],     [x1]      // Q3
+
+        dup          v22.16b, w3                 // flim_E
+        dup          v23.16b, w4                 // flim_I
+
+        vp8_loop_filter inner=\inner, hev_thresh=w5
+
+        // back up to P2:  u,v -= stride * 6
+        sub          x0,  x0,  x2,  lsl #2
+        sub          x1,  x1,  x2,  lsl #2
+        sub          x0,  x0,  x2,  lsl #1
+        sub          x1,  x1,  x2,  lsl #1
+
+        // Store pixels:
+
+        st1          {v1.d}[0],     [x0], x2  // P2
+        st1          {v1.d}[1],     [x1], x2  // P2
+        st1          {v2.d}[0],     [x0], x2  // P1
+        st1          {v2.d}[1],     [x1], x2  // P1
+        st1          {v3.d}[0],     [x0], x2  // P0
+        st1          {v3.d}[1],     [x1], x2  // P0
+        st1          {v4.d}[0],     [x0], x2  // Q0
+        st1          {v4.d}[1],     [x1], x2  // Q0
+        st1          {v5.d}[0],     [x0], x2  // Q1
+        st1          {v5.d}[1],     [x1], x2  // Q1
+        st1          {v6.d}[0],     [x0]      // Q2
+        st1          {v6.d}[1],     [x1]      // Q2
+
+        ret
+endfunc
+.endm
+
+vp8_v_loop_filter8uv
+vp8_v_loop_filter8uv _inner, inner=1
+
+.macro  vp8_h_loop_filter16 name, inner=0, simple=0
+function ff_vp8_h_loop_filter16\name\()_neon, export=1
+
+        sub             x0,  x0,  #4
+        // Load pixels:
+        ld1             {v0.d}[0], [x0], x1
+        ld1             {v1.d}[0], [x0], x1
+        ld1             {v2.d}[0], [x0], x1
+        ld1             {v3.d}[0], [x0], x1
+        ld1             {v4.d}[0], [x0], x1
+        ld1             {v5.d}[0], [x0], x1
+        ld1             {v6.d}[0], [x0], x1
+        ld1             {v7.d}[0], [x0], x1
+        ld1             {v0.d}[1], [x0], x1
+        ld1             {v1.d}[1], [x0], x1
+        ld1             {v2.d}[1], [x0], x1
+        ld1             {v3.d}[1], [x0], x1
+        ld1             {v4.d}[1], [x0], x1
+        ld1             {v5.d}[1], [x0], x1
+        ld1             {v6.d}[1], [x0], x1
+        ld1             {v7.d}[1], [x0], x1
+
+        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
+
+        dup             v22.16b, w2                 // flim_E
+    .if !\simple
+        dup             v23.16b, w3                 // flim_I
+    .endif
+
+        vp8_loop_filter inner=\inner, simple=\simple, hev_thresh=w4
+
+        sub             x0,  x0,  x1, lsl #4    // backup 16 rows
+
+        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
+
+        // Store pixels:
+        st1             {v0.d}[0], [x0], x1
+        st1             {v1.d}[0], [x0], x1
+        st1             {v2.d}[0], [x0], x1
+        st1             {v3.d}[0], [x0], x1
+        st1             {v4.d}[0], [x0], x1
+        st1             {v5.d}[0], [x0], x1
+        st1             {v6.d}[0], [x0], x1
+        st1             {v7.d}[0], [x0], x1
+        st1             {v0.d}[1], [x0], x1
+        st1             {v1.d}[1], [x0], x1
+        st1             {v2.d}[1], [x0], x1
+        st1             {v3.d}[1], [x0], x1
+        st1             {v4.d}[1], [x0], x1
+        st1             {v5.d}[1], [x0], x1
+        st1             {v6.d}[1], [x0], x1
+        st1             {v7.d}[1], [x0]
+
+        ret
+endfunc
+.endm
+
+vp8_h_loop_filter16
+vp8_h_loop_filter16 _inner,  inner=1
+vp8_h_loop_filter16 _simple, simple=1
+
+.macro  vp8_h_loop_filter8uv name, inner=0
+function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
+        sub             x0,  x0,  #4
+        sub             x1,  x1,  #4
+
+        // Load pixels:
+        ld1          {v0.d}[0],     [x0], x2 // load u
+        ld1          {v0.d}[1],     [x1], x2 // load v
+        ld1          {v1.d}[0],     [x0], x2
+        ld1          {v1.d}[1],     [x1], x2
+        ld1          {v2.d}[0],     [x0], x2
+        ld1          {v2.d}[1],     [x1], x2
+        ld1          {v3.d}[0],     [x0], x2
+        ld1          {v3.d}[1],     [x1], x2
+        ld1          {v4.d}[0],     [x0], x2
+        ld1          {v4.d}[1],     [x1], x2
+        ld1          {v5.d}[0],     [x0], x2
+        ld1          {v5.d}[1],     [x1], x2
+        ld1          {v6.d}[0],     [x0], x2
+        ld1          {v6.d}[1],     [x1], x2
+        ld1          {v7.d}[0],     [x0], x2
+        ld1          {v7.d}[1],     [x1], x2
+
+        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
+
+        dup             v22.16b, w3                 // flim_E
+        dup             v23.16b, w4                 // flim_I
+
+        vp8_loop_filter inner=\inner, hev_thresh=w5
+
+        sub             x0,  x0,  x2, lsl #3    // backup u 8 rows
+        sub             x1,  x1,  x2, lsl #3    // backup v 8 rows
+
+        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
+
+        // Store pixels:
+        st1          {v0.d}[0],     [x0], x2 // load u
+        st1          {v0.d}[1],     [x1], x2 // load v
+        st1          {v1.d}[0],     [x0], x2
+        st1          {v1.d}[1],     [x1], x2
+        st1          {v2.d}[0],     [x0], x2
+        st1          {v2.d}[1],     [x1], x2
+        st1          {v3.d}[0],     [x0], x2
+        st1          {v3.d}[1],     [x1], x2
+        st1          {v4.d}[0],     [x0], x2
+        st1          {v4.d}[1],     [x1], x2
+        st1          {v5.d}[0],     [x0], x2
+        st1          {v5.d}[1],     [x1], x2
+        st1          {v6.d}[0],     [x0], x2
+        st1          {v6.d}[1],     [x1], x2
+        st1          {v7.d}[0],     [x0]
+        st1          {v7.d}[1],     [x1]
+
+        ret
+
+endfunc
+.endm
+
+vp8_h_loop_filter8uv
+vp8_h_loop_filter8uv _inner, inner=1
+
+
+function ff_put_vp8_pixels16_neon, export=1
+1:
+        subs            w4, w4, #4
+        ld1             {v0.16b},     [x2], x3
+        ld1             {v1.16b},     [x2], x3
+        ld1             {v2.16b},     [x2], x3
+        ld1             {v3.16b},     [x2], x3
+        st1             {v0.16b},     [x0], x1
+        st1             {v1.16b},     [x0], x1
+        st1             {v2.16b},     [x0], x1
+        st1             {v3.16b},     [x0], x1
+        b.gt            1b
+        ret
+endfunc
+
+function ff_put_vp8_pixels8_neon, export=1
+1:
+        subs            w4, w4, #4
+        ld1             {v0.8b},   [x2], x3
+        ld1             {v0.d}[1], [x2], x3
+        ld1             {v1.8b},   [x2], x3
+        ld1             {v1.d}[1], [x2], x3
+        st1             {v0.8b},   [x0], x1
+        st1             {v0.d}[1], [x0], x1
+        st1             {v1.8b},   [x0], x1
+        st1             {v1.d}[1], [x0], x1
+        b.gt            1b
+        ret
+endfunc
+
+/* 4/6-tap 8th-pel MC */
+
+.macro  vp8_epel8_h6    d,   s0,   s1
+        ext             v22.8b, \s0\().8b,  \s1\().8b,  #1
+        uxtl            v18.8h, \s0\().8b
+        ext             v23.8b, \s0\().8b,  \s1\().8b,  #2
+        uxtl            v19.8h, v22.8b
+        ext             v24.8b, \s0\().8b,  \s1\().8b,  #3
+        uxtl            v21.8h, v23.8b
+        ext             v25.8b, \s0\().8b,  \s1\().8b,  #4
+        uxtl            v22.8h, v24.8b
+        ext             v26.8b, \s0\().8b,  \s1\().8b,  #5
+        uxtl            v25.8h, v25.8b
+        mul             v21.8h, v21.8h, v0.h[2]
+        uxtl            v26.8h, v26.8b
+        mul             v22.8h, v22.8h, v0.h[3]
+        mls             v21.8h, v19.8h, v0.h[1]
+        mls             v22.8h, v25.8h, v0.h[4]
+        mla             v21.8h, v18.8h, v0.h[0]
+        mla             v22.8h, v26.8h, v0.h[5]
+        sqadd           v22.8h, v21.8h, v22.8h
+        sqrshrun        \d\().8b, v22.8h, #7
+.endm
+
+.macro  vp8_epel16_h6   d0,  v0,  v1
+        ext             v22.16b, \v0\().16b, \v1\().16b, #3
+        ext             v23.16b, \v0\().16b, \v1\().16b, #4
+        uxtl            v19.8h,  v22.8b
+        uxtl2           v22.8h,  v22.16b
+        ext             v3.16b,  \v0\().16b, \v1\().16b, #2
+        uxtl            v20.8h,  v23.8b
+        uxtl2           v23.8h,  v23.16b
+        ext             v16.16b, \v0\().16b, \v1\().16b, #1
+        uxtl            v18.8h,  v3.8b
+        uxtl2           v3.8h,   v3.16b
+        ext             v2.16b,  \v0\().16b, \v1\().16b, #5
+        uxtl            v21.8h,  v2.8b
+        uxtl2           v2.8h,   v2.16b
+        uxtl            v17.8h,  v16.8b
+        uxtl2           v16.8h,  v16.16b
+        mul             v19.8h,  v19.8h, v0.h[3]
+        mul             v18.8h,  v18.8h, v0.h[2]
+        mul             v3.8h,   v3.8h,  v0.h[2]
+        mul             v22.8h,  v22.8h, v0.h[3]
+        mls             v19.8h,  v20.8h, v0.h[4]
+        uxtl            v20.8h,  \v0\().8b
+        uxtl2           v1.8h,   \v0\().16b
+        mls             v18.8h,  v17.8h, v0.h[1]
+        mls             v3.8h,   v16.8h, v0.h[1]
+        mls             v22.8h,  v23.8h, v0.h[4]
+        mla             v18.8h,  v20.8h, v0.h[0]
+        mla             v19.8h,  v21.8h, v0.h[5]
+        mla             v3.8h,   v1.8h,  v0.h[0]
+        mla             v22.8h,  v2.8h,  v0.h[5]
+        sqadd           v19.8h,  v18.8h, v19.8h
+        sqadd           v22.8h,  v3.8h,  v22.8h
+        sqrshrun        \d0\().8b,  v19.8h, #7
+        sqrshrun2       \d0\().16b, v22.8h, #7
+.endm
+
+.macro  vp8_epel8_v6_y2 d0, d1, s0, s1, s2, s3, s4, s5, s6
+        uxtl            \s0\().8h, \s0\().8b
+        uxtl            \s3\().8h, \s3\().8b
+        uxtl            \s6\().8h, \s6\().8b
+        uxtl            \s1\().8h, \s1\().8b
+        uxtl            \s4\().8h, \s4\().8b
+        uxtl            \s2\().8h, \s2\().8b
+        uxtl            \s5\().8h, \s5\().8b
+        mul             \s0\().8h, \s0\().8h, v0.h[0]
+        mul             v31.8h   , \s3\().8h, v0.h[3]
+        mul             \s3\().8h, \s3\().8h, v0.h[2]
+        mul             \s6\().8h, \s6\().8h, v0.h[5]
+
+        mls             \s0\().8h, \s1\().8h, v0.h[1]
+        mls             v31.8h   , \s4\().8h, v0.h[4]
+        mls             \s3\().8h, \s2\().8h, v0.h[1]
+        mls             \s6\().8h, \s5\().8h, v0.h[4]
+
+        mla             \s0\().8h, \s2\().8h, v0.h[2]
+        mla             v31.8h   , \s5\().8h, v0.h[5]
+        mla             \s3\().8h, \s1\().8h, v0.h[0]
+        mla             \s6\().8h, \s4\().8h, v0.h[3]
+        sqadd           v31.8h   , \s0\().8h, v31.8h
+        sqadd           \s6\().8h, \s3\().8h, \s6\().8h
+        sqrshrun        \d0\().8b, v31.8h,    #7
+        sqrshrun        \d1\().8b, \s6\().8h, #7
+.endm
+
+.macro  vp8_epel8_h4    d,   v0,   v1
+        ext             v22.8b, \v0\().8b,  \v1\().8b,  #1
+        uxtl            v19.8h, \v0\().8b
+        ext             v23.8b, \v0\().8b,  \v1\().8b,  #2
+        uxtl            v20.8h, v22.8b
+        ext             v25.8b, \v0\().8b,  \v1\().8b,  #3
+        uxtl            v22.8h, v23.8b
+        uxtl            v25.8h, v25.8b
+        mul             v20.8h, v20.8h, v0.h[2]
+        mul             v22.8h, v22.8h, v0.h[3]
+        mls             v20.8h, v19.8h, v0.h[1]
+        mls             v22.8h, v25.8h, v0.h[4]
+        sqadd           v22.8h, v20.8h, v22.8h
+        sqrshrun        \d\().8b, v22.8h, #7
+.endm
+
+.macro  vp8_epel8_v4_y2 d0, s0, s1, s2, s3, s4
+        uxtl            \s0\().8h,  \s0\().8b
+        uxtl            \s1\().8h,  \s1\().8b
+        uxtl            \s2\().8h,  \s2\().8b
+        uxtl            \s3\().8h,  \s3\().8b
+        uxtl            \s4\().8h,  \s4\().8b
+        mul             v21.8h,     \s1\().8h, v0.h[2]
+        mul             v23.8h,     \s2\().8h, v0.h[3]
+        mul             \s2\().8h,  \s2\().8h, v0.h[2]
+        mul             v22.8h,     \s3\().8h, v0.h[3]
+        mls             v21.8h,     \s0\().8h, v0.h[1]
+        mls             v23.8h,     \s3\().8h, v0.h[4]
+        mls             \s2\().8h,  \s1\().8h, v0.h[1]
+        mls             v22.8h,     \s4\().8h, v0.h[4]
+        sqadd           v21.8h,     v21.8h,    v23.8h
+        sqadd           \s2\().8h,  \s2\().8h, v22.8h
+        sqrshrun        \d0\().8b,  v21.8h,    #7
+        sqrshrun2       \d0\().16b, \s2\().8h, #7
+.endm
+
+
+// note: worst case sum of all 6-tap filter values * 255 is 0x7f80 so 16 bit
+// arithmetic can be used to apply filters
+const   subpel_filters, align=4
+        .short     0,   6, 123,  12,   1,   0,   0,   0
+        .short     2,  11, 108,  36,   8,   1,   0,   0
+        .short     0,   9,  93,  50,   6,   0,   0,   0
+        .short     3,  16,  77,  77,  16,   3,   0,   0
+        .short     0,   6,  50,  93,   9,   0,   0,   0
+        .short     1,   8,  36, 108,  11,   2,   0,   0
+        .short     0,   1,  12, 123,   6,   0,   0,   0
+endconst
+
+function ff_put_vp8_epel16_v6_neon, export=1
+        sub             x2,  x2,  x3,  lsl #1
+
+        sxtw            x4,  w4
+        sxtw            x6,  w6
+        movrel          x17,  subpel_filters, -16
+        add             x6,  x17,  x6, lsl #4  // y
+        ld1             {v0.8h},     [x6]
+1:
+        ld1             {v1.1d - v2.1d},    [x2], x3
+        ld1             {v3.1d - v4.1d},    [x2], x3
+        ld1             {v16.1d - v17.1d},  [x2], x3
+        ld1             {v18.1d - v19.1d},  [x2], x3
+        ld1             {v20.1d - v21.1d},  [x2], x3
+        ld1             {v22.1d - v23.1d},  [x2], x3
+        ld1             {v24.1d - v25.1d},  [x2]
+        sub             x2,  x2,  x3, lsl #2
+
+        vp8_epel8_v6_y2 v1, v3, v1, v3, v16, v18, v20, v22, v24
+        vp8_epel8_v6_y2 v2, v4, v2, v4, v17, v19, v21, v23, v25
+
+        st1             {v1.1d - v2.1d}, [x0], x1
+        st1             {v3.1d - v4.1d}, [x0], x1
+        subs            x4, x4, #2
+        b.ne            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_epel16_h6_neon, export=1
+        sub             x2,  x2,  #2
+        sxtw            x5,  w5 // x
+
+        // first pass (horizontal):
+        movrel          x17,  subpel_filters, -16
+        add             x5,  x17,  x5, lsl #4 // x
+        ld1             {v0.8h},  [x5]
+1:
+        ld1             {v1.16b, v2.16b}, [x2], x3
+        vp8_epel16_h6   v1, v1, v2
+        st1             {v1.16b}, [x0], x1
+
+        subs            w4, w4, #1
+        b.ne            1b
+        ret
+endfunc
+
+
+function ff_put_vp8_epel16_h6v6_neon, export=1
+        sub             x2,  x2,  x3,  lsl #1
+        sub             x2,  x2,  #2
+
+        // first pass (horizontal):
+        movrel          x17,  subpel_filters, -16
+        sxtw            x5,  w5 // x
+        add             x16,  x17,  x5, lsl #4 // x
+        sub             sp,  sp,  #336+16
+        ld1             {v0.8h},  [x16]
+        add             x7,  sp,  #15
+        sxtw            x4,  w4
+        add             x16, x4, #5   // h
+        bic             x7,  x7,  #15
+1:
+        ld1             {v1.16b, v2.16b}, [x2], x3
+        vp8_epel16_h6   v1, v1, v2
+        st1             {v1.16b}, [x7], #16
+        subs            x16, x16, #1
+        b.ne            1b
+
+
+        // second pass (vertical):
+        sxtw            x6,  w6
+        add             x6,  x17,  x6, lsl #4  // y
+        add             x7,  sp,  #15
+        ld1             {v0.8h},     [x6]
+        bic             x7,  x7,  #15
+2:
+        ld1             {v1.8b - v4.8b},    [x7], #32
+        ld1             {v16.8b - v19.8b},  [x7], #32
+        ld1             {v20.8b - v23.8b},  [x7], #32
+        ld1             {v24.8b - v25.8b},  [x7]
+        sub             x7,  x7,  #64
+
+        vp8_epel8_v6_y2 v1, v3, v1, v3, v16, v18, v20, v22, v24
+        vp8_epel8_v6_y2 v2, v4, v2, v4, v17, v19, v21, v23, v25
+        trn1            v1.2d, v1.2d, v2.2d
+        trn1            v3.2d, v3.2d, v4.2d
+
+        st1             {v1.16b}, [x0], x1
+        st1             {v3.16b}, [x0], x1
+        subs            x4, x4, #2
+        b.ne            2b
+
+        add             sp,  sp,  #336+16
+        ret
+endfunc
+
+function ff_put_vp8_epel8_v6_neon, export=1
+        sub             x2,  x2,  x3,  lsl #1
+
+        movrel          x7,  subpel_filters, -16
+        add             x6,  x7,  w6, uxtw #4
+        ld1             {v0.8h},  [x6]
+1:
+        ld1             {v2.8b},  [x2], x3
+        ld1             {v3.8b},  [x2], x3
+        ld1             {v4.8b},  [x2], x3
+        ld1             {v5.8b},  [x2], x3
+        ld1             {v6.8b},  [x2], x3
+        ld1             {v7.8b},  [x2], x3
+        ld1             {v28.8b}, [x2]
+
+        sub             x2,  x2,  x3,  lsl #2
+
+        vp8_epel8_v6_y2 v2, v3, v2, v3, v4, v5, v6, v7, v28
+
+        st1             {v2.8b}, [x0], x1
+        st1             {v3.8b}, [x0], x1
+        subs            w4,  w4,  #2
+        b.ne            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_epel8_h6_neon, export=1
+        sub             x2,  x2,  #2
+
+        movrel          x7,  subpel_filters, -16
+        add             x5,  x7,  w5, uxtw #4
+        ld1             {v0.8h},        [x5]
+1:
+        ld1             {v2.8b, v3.8b}, [x2], x3
+
+        vp8_epel8_h6    v2,  v2,  v3
+
+        st1             {v2.8b}, [x0], x1
+        subs            w4,  w4,  #1
+        b.ne            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_epel8_h6v6_neon, export=1
+        sub             x2,  x2,  x3,  lsl #1
+        sub             x2,  x2,  #2
+        sxtw            x4,  w4
+
+        // first pass (horizontal):
+        movrel          x17,  subpel_filters, -16
+        sxtw            x5,  w5
+        add             x5,  x17,  x5, lsl #4 // x
+        sub             sp,  sp,  #168+16
+        ld1             {v0.8h},  [x5]
+        add             x7,  sp,  #15
+        add             x16, x4,  #5   // h
+        bic             x7,  x7,  #15
+1:
+        ld1             {v1.8b, v2.8b}, [x2], x3
+
+        vp8_epel8_h6    v1, v1, v2
+
+        st1             {v1.8b}, [x7], #8
+        subs            x16, x16, #1
+        b.ne            1b
+
+        // second pass (vertical):
+        sxtw            x6,  w6
+        add             x6,  x17,  x6, lsl #4  // y
+        add             x7,  sp,   #15
+        ld1             {v0.8h},   [x6]
+        bic             x7,  x7,   #15
+2:
+        ld1             {v1.8b - v4.8b}, [x7], #32
+        ld1             {v5.8b - v7.8b}, [x7]
+
+        sub             x7,  x7,  #16
+
+        vp8_epel8_v6_y2 v1, v2, v1, v2, v3, v4, v5, v6, v7
+
+        st1             {v1.8b}, [x0], x1
+        st1             {v2.8b}, [x0], x1
+        subs            x4, x4, #2
+        b.ne            2b
+
+        add             sp,  sp,  #168+16
+        ret
+endfunc
+
+function ff_put_vp8_epel8_v4_neon, export=1
+        sub             x2,  x2,  x3
+
+        movrel          x7,  subpel_filters, -16
+        add             x6,  x7,  w6, uxtw #4
+        ld1             {v0.8h},     [x6]
+1:
+        ld1             {v2.8b},     [x2], x3
+        ld1             {v3.8b},     [x2], x3
+        ld1             {v4.8b},     [x2], x3
+        ld1             {v5.8b},     [x2], x3
+        ld1             {v6.8b},     [x2]
+        sub             x2,  x2,  x3,  lsl #1
+
+        vp8_epel8_v4_y2 v2, v2, v3, v4, v5, v6
+
+        st1             {v2.d}[0], [x0], x1
+        st1             {v2.d}[1], [x0], x1
+        subs            w4,  w4,  #2
+        b.ne            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_epel8_h4_neon, export=1
+        sub             x2,  x2,  #1
+
+        movrel          x7,  subpel_filters, -16
+        add             x5,  x7,  w5, uxtw #4
+        ld1             {v0.8h},       [x5]
+1:
+        ld1             {v2.8b,v3.8b}, [x2], x3
+
+        vp8_epel8_h4    v2,  v2,  v3
+
+        st1             {v2.8b}, [x0], x1
+        subs            w4,  w4,  #1
+        b.ne            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_epel8_h4v6_neon, export=1
+        sub             x2,  x2,  x3,  lsl #1
+        sub             x2,  x2,  #1
+        sxtw            x4,  w4
+
+        // first pass (horizontal):
+        movrel          x17,  subpel_filters, -16
+        sxtw            x5,  w5
+        add             x5,  x17,  x5, lsl #4 // x
+        sub             sp,  sp,  #168+16
+        ld1             {v0.8h},  [x5]
+        add             x7,  sp,  #15
+        add             x16, x4, #5   // h
+        bic             x7,  x7,  #15
+1:
+        ld1             {v1.8b, v2.8b}, [x2], x3
+
+        vp8_epel8_h4    v1, v1, v2
+
+        st1             {v1.8b}, [x7], #8
+        subs            x16, x16, #1
+        b.ne            1b
+
+        // second pass (vertical):
+        sxtw            x6,  w6
+        add             x6,  x17,  x6, lsl #4  // y
+        add             x7,  sp,   #15
+        ld1             {v0.8h},   [x6]
+        bic             x7,  x7,   #15
+2:
+        ld1             {v1.8b - v4.8b}, [x7], #32
+        ld1             {v5.8b - v7.8b}, [x7]
+
+        sub             x7,  x7,  #16
+
+        vp8_epel8_v6_y2 v1, v2, v1, v2, v3, v4, v5, v6, v7
+
+        st1             {v1.8b}, [x0], x1
+        st1             {v2.8b}, [x0], x1
+        subs            x4, x4, #2
+        b.ne            2b
+
+        add             sp,  sp,  #168+16
+        ret
+endfunc
+
+function ff_put_vp8_epel8_h4v4_neon, export=1
+        sub             x2,  x2,  x3
+        sub             x2,  x2,  #1
+        sxtw            x4,  w4
+
+
+        // first pass (horizontal):
+        movrel          x17,  subpel_filters, -16
+        sxtw            x5,  w5
+        add             x5,  x17,  x5, lsl #4 // x
+        sub             sp,  sp,  #168+16
+        ld1             {v0.8h},  [x5]
+        add             x7,  sp,  #15
+        add             x16, x4, #3   // h
+        bic             x7,  x7,  #15
+1:
+        ld1             {v1.8b, v2.8b}, [x2], x3
+
+        vp8_epel8_h4    v1, v1, v2
+
+        st1             {v1.8b}, [x7], #8
+        subs            x16, x16, #1
+        b.ne            1b
+
+        // second pass (vertical):
+        sxtw            x6,  w6
+        add             x6,  x17,  x6, lsl #4  // y
+        add             x7,  sp,   #15
+        ld1             {v0.8h},   [x6]
+        bic             x7,  x7,   #15
+2:
+        ld1             {v1.8b - v2.8b}, [x7], #16
+        ld1             {v3.8b - v5.8b}, [x7]
+
+        vp8_epel8_v4_y2 v1, v1, v2, v3, v4, v5
+
+        st1             {v1.d}[0], [x0], x1
+        st1             {v1.d}[1], [x0], x1
+        subs            x4, x4, #2
+        b.ne            2b
+
+        add             sp,  sp,  #168+16
+        ret
+endfunc
+
+function ff_put_vp8_epel8_h6v4_neon, export=1
+        sub             x2,  x2,  x3
+        sub             x2,  x2,  #2
+        sxtw            x4,  w4
+
+
+        // first pass (horizontal):
+        movrel          x17,  subpel_filters, -16
+        sxtw            x5,  w5
+        add             x5,  x17,  x5, lsl #4 // x
+        sub             sp,  sp,  #168+16
+        ld1             {v0.8h},  [x5]
+        add             x7,  sp,  #15
+        add             x16, x4, #3   // h
+        bic             x7,  x7,  #15
+1:
+        ld1             {v1.8b, v2.8b}, [x2], x3
+
+        vp8_epel8_h6    v1, v1, v2
+
+        st1             {v1.8b}, [x7], #8
+        subs            x16, x16, #1
+        b.ne            1b
+
+        // second pass (vertical):
+        sxtw            x6,  w6
+        add             x6,  x17,  x6, lsl #4  // y
+        add             x7,  sp,   #15
+        ld1             {v0.8h},   [x6]
+        bic             x7,  x7,   #15
+2:
+        ld1             {v1.8b - v2.8b}, [x7], #16
+        ld1             {v3.8b - v5.8b}, [x7]
+
+        vp8_epel8_v4_y2 v1, v1, v2, v3, v4, v5
+
+        st1             {v1.d}[0], [x0], x1
+        st1             {v1.d}[1], [x0], x1
+        subs            x4, x4, #2
+        b.ne            2b
+
+        add             sp,  sp,  #168+16
+        ret
+endfunc
+
+function ff_put_vp8_epel4_v6_neon, export=1
+        sub             x2,  x2,  x3,  lsl #1
+
+        movrel          x7,  subpel_filters, -16
+        add             x6,  x7,  w6, uxtw #4
+        ld1             {v0.8h},    [x6]
+1:
+        ld1r            {v2.2s},    [x2], x3
+        ld1r            {v3.2s},    [x2], x3
+        ld1r            {v4.2s},    [x2], x3
+        ld1r            {v5.2s},    [x2], x3
+        ld1r            {v6.2s},    [x2], x3
+        ld1r            {v7.2s},    [x2], x3
+        ld1r            {v28.2s},   [x2]
+        sub             x2,  x2,  x3,  lsl #2
+        ld1             {v2.s}[1],  [x2], x3
+        ld1             {v3.s}[1],  [x2], x3
+        ld1             {v4.s}[1],  [x2], x3
+        ld1             {v5.s}[1],  [x2], x3
+        ld1             {v6.s}[1],  [x2], x3
+        ld1             {v7.s}[1],  [x2], x3
+        ld1             {v28.s}[1], [x2]
+        sub             x2,  x2,  x3,  lsl #2
+
+        vp8_epel8_v6_y2 v2, v3, v2, v3, v4, v5, v6, v7, v28
+
+        st1             {v2.s}[0],  [x0], x1
+        st1             {v3.s}[0],  [x0], x1
+        st1             {v2.s}[1],  [x0], x1
+        st1             {v3.s}[1],  [x0], x1
+        subs            w4,  w4,  #4
+        b.ne            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_epel4_h6_neon, export=1
+        sub             x2,  x2,  #2
+
+        movrel          x7,  subpel_filters, -16
+        add             x5,  x7,  w5, uxtw #4
+        ld1             {v0.8h},       [x5]
+1:
+        ld1             {v2.8b,v3.8b}, [x2], x3
+        vp8_epel8_h6    v2,  v2,  v3
+        st1             {v2.s}[0], [x0], x1
+        subs            w4,  w4,  #1
+        b.ne            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_epel4_h6v6_neon, export=1
+        sub             x2,  x2,  x3,  lsl #1
+        sub             x2,  x2,  #2
+
+        movrel          x7,  subpel_filters, -16
+        add             x5,  x7,  w5, uxtw #4
+        ld1             {v0.8h},       [x5]
+
+        sub             sp,  sp,  #52
+        add             w8,  w4,  #5
+        mov             x9,  sp
+1:
+        ld1             {v2.8b,v3.8b}, [x2], x3
+        vp8_epel8_h6    v2,  v2,  v3
+        st1             {v2.s}[0],     [x9], #4
+        subs            w8,  w8,  #1
+        b.ne            1b
+
+        add             x6,  x7,  w6, uxtw #4
+        ld1             {v0.8h},       [x6]
+        mov             x9,  sp
+2:
+        ld1             {v2.8b,v3.8b}, [x9], #16
+        ld1             {v6.8b},       [x9], #8
+        ld1r            {v28.2s},      [x9]
+        sub             x9,  x9,  #16
+        ld1             {v4.8b,v5.8b}, [x9], #16
+        ld1             {v7.8b},       [x9], #8
+        ld1             {v28.s}[1],    [x9]
+        sub             x9,  x9,  #16
+        trn1            v1.2s, v2.2s, v4.2s
+        trn2            v4.2s, v2.2s, v4.2s
+        trn1            v2.2s, v3.2s, v5.2s
+        trn2            v5.2s, v3.2s, v5.2s
+        trn1            v3.2s, v6.2s, v7.2s
+        trn2            v7.2s, v6.2s, v7.2s
+        vp8_epel8_v6_y2 v2, v3, v1, v4, v2, v5, v3, v7, v28
+        st1             {v2.s}[0],  [x0], x1
+        st1             {v3.s}[0],  [x0], x1
+        st1             {v2.s}[1],  [x0], x1
+        st1             {v3.s}[1],  [x0], x1
+        subs            w4,  w4,  #4
+        b.ne            2b
+
+        add             sp,  sp,  #52
+        ret
+endfunc
+
+function ff_put_vp8_epel4_h4v6_neon, export=1
+        sub             x2,  x2,  x3,  lsl #1
+        sub             x2,  x2,  #1
+
+        movrel          x7,  subpel_filters, -16
+        add             x5,  x7,  w5, uxtw #4
+        ld1             {v0.8h},       [x5]
+
+        sub             sp,  sp,  #52
+        add             w8,  w4,  #5
+        mov             x9,  sp
+1:
+        ld1             {v2.8b},       [x2], x3
+        vp8_epel8_h4    v2,  v2,  v2
+        st1             {v2.s}[0],     [x9], #4
+        subs            w8,  w8,  #1
+        b.ne            1b
+
+        add             x6,  x7,  w6, uxtw #4
+        ld1             {v0.8h},       [x6]
+        mov             x9,  sp
+2:
+        ld1             {v2.8b,v3.8b}, [x9], #16
+        ld1             {v6.8b},       [x9], #8
+        ld1r            {v28.2s},      [x9]
+        sub             x9,  x9,  #16
+        ld1             {v4.8b,v5.8b}, [x9], #16
+        ld1             {v7.8b},       [x9], #8
+        ld1             {v28.s}[1],    [x9]
+        sub             x9,  x9,  #16
+        trn1            v1.2s, v2.2s, v4.2s
+        trn2            v4.2s, v2.2s, v4.2s
+        trn1            v2.2s, v3.2s, v5.2s
+        trn2            v5.2s, v3.2s, v5.2s
+        trn1            v3.2s, v6.2s, v7.2s
+        trn2            v7.2s, v6.2s, v7.2s
+        vp8_epel8_v6_y2 v2, v3, v1, v4, v2, v5, v3, v7, v28
+        st1             {v2.s}[0],  [x0], x1
+        st1             {v3.s}[0],  [x0], x1
+        st1             {v2.s}[1],  [x0], x1
+        st1             {v3.s}[1],  [x0], x1
+        subs            w4,  w4,  #4
+        b.ne            2b
+
+        add             sp,  sp,  #52
+        ret
+endfunc
+
+function ff_put_vp8_epel4_h6v4_neon, export=1
+        sub             x2,  x2,  x3
+        sub             x2,  x2,  #2
+
+        movrel          x7,  subpel_filters, -16
+        add             x5,  x7,  w5, uxtw #4
+        ld1             {v0.8h},       [x5]
+
+        sub             sp,  sp,  #44
+        add             w8,  w4,  #3
+        mov             x9,  sp
+1:
+        ld1             {v2.8b,v3.8b}, [x2], x3
+        vp8_epel8_h6    v2, v2, v3
+        st1             {v2.s}[0],     [x9], #4
+        subs            w8,  w8,  #1
+        b.ne            1b
+
+        add             x6,  x7,  w6, uxtw #4
+        ld1             {v0.8h},       [x6]
+        mov             x9,  sp
+2:
+        ld1             {v2.8b,v3.8b}, [x9], #16
+        ld1r            {v6.2s},       [x9]
+        sub             x9,  x9,  #8
+        ld1             {v4.8b,v5.8b}, [x9], #16
+        ld1             {v6.s}[1],     [x9]
+        sub             x9,  x9,  #8
+        trn1            v1.2s, v2.2s, v4.2s
+        trn2            v4.2s, v2.2s, v4.2s
+        trn1            v2.2s, v3.2s, v5.2s
+        trn2            v5.2s, v3.2s, v5.2s
+        vp8_epel8_v4_y2 v1, v1, v4, v2, v5, v6
+        st1             {v1.s}[0],  [x0], x1
+        st1             {v1.s}[2],  [x0], x1
+        st1             {v1.s}[1],  [x0], x1
+        st1             {v1.s}[3],  [x0], x1
+        subs            w4,  w4,  #4
+        b.ne            2b
+
+        add             sp,  sp,  #44
+        ret
+endfunc
+
+function ff_put_vp8_epel4_h4_neon, export=1
+        sub             x2,  x2,  #1
+
+        movrel          x7,  subpel_filters, -16
+        add             x5,  x7,  w5, uxtw #4
+        ld1             {v0.8h},    [x5]
+1:
+        ld1             {v2.8b},    [x2], x3
+        vp8_epel8_h4    v2,  v2,  v2
+        st1             {v2.s}[0],  [x0], x1
+        subs            w4,  w4,  #1
+        b.ne            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_epel4_v4_neon, export=1
+        sub             x2,  x2,  x3
+
+        movrel          x7,  subpel_filters, -16
+        add             x6,  x7,  w6, uxtw #4
+        ld1             {v0.8h},   [x6]
+1:
+        ld1r            {v2.2s},   [x2], x3
+        ld1r            {v3.2s},   [x2], x3
+        ld1r            {v4.2s},   [x2], x3
+        ld1r            {v5.2s},   [x2], x3
+        ld1r            {v6.2s},   [x2]
+        sub             x2,  x2,  x3,  lsl #1
+        ld1             {v2.s}[1], [x2], x3
+        ld1             {v3.s}[1], [x2], x3
+        ld1             {v4.s}[1], [x2], x3
+        ld1             {v5.s}[1], [x2], x3
+        ld1             {v6.s}[1], [x2]
+        sub             x2,  x2,  x3,  lsl #1
+
+        vp8_epel8_v4_y2 v2, v2, v3, v4, v5, v6
+
+        st1             {v2.s}[0], [x0], x1
+        st1             {v2.s}[2], [x0], x1
+        st1             {v2.s}[1], [x0], x1
+        st1             {v2.s}[3], [x0], x1
+        subs            w4,  w4,  #4
+        b.ne            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_epel4_h4v4_neon, export=1
+        sub             x2,  x2,  x3
+        sub             x2,  x2,  #1
+
+        movrel          x7,  subpel_filters, -16
+        add             x5,  x7,  w5, uxtw #4
+        ld1             {v0.8h},       [x5]
+
+        sub             sp,  sp,  #44
+        add             w8,  w4,  #3
+        mov             x9,  sp
+1:
+        ld1             {v2.8b},       [x2], x3
+        vp8_epel8_h4    v2,  v2,  v3
+        st1             {v2.s}[0],     [x9], #4
+        subs            w8,  w8,  #1
+        b.ne            1b
+
+        add             x6,  x7,  w6, uxtw #4
+        ld1             {v0.8h},       [x6]
+        mov             x9,  sp
+2:
+        ld1             {v2.8b,v3.8b}, [x9], #16
+        ld1r            {v6.2s},       [x9]
+        sub             x9,  x9,  #8
+        ld1             {v4.8b,v5.8b}, [x9], #16
+        ld1             {v6.s}[1],     [x9]
+        sub             x9,  x9,  #8
+        trn1            v1.2s, v2.2s, v4.2s
+        trn2            v4.2s, v2.2s, v4.2s
+        trn1            v2.2s, v3.2s, v5.2s
+        trn2            v5.2s, v3.2s, v5.2s
+        vp8_epel8_v4_y2 v1, v1, v4, v2, v5, v6
+        st1             {v1.s}[0], [x0], x1
+        st1             {v1.s}[2], [x0], x1
+        st1             {v1.s}[1], [x0], x1
+        st1             {v1.s}[3], [x0], x1
+        subs            w4,  w4,  #4
+        b.ne            2b
+
+        add             sp,  sp,  #44
+        ret
+endfunc
+
+/* Bilinear MC */
+
+function ff_put_vp8_bilin16_h_neon, export=1
+        mov             w7,     #8
+        dup             v0.8b,  w5
+        sub             w5,     w7,     w5
+        dup             v1.8b,  w5
+1:
+        subs            w4,     w4,     #2
+        ld1             {v2.8b,v3.8b,v4.8b},    [x2], x3
+        ext             v5.8b,  v3.8b,  v4.8b,  #1
+        ext             v4.8b,  v2.8b,  v3.8b,  #1
+        umull           v16.8h, v2.8b,  v1.8b
+        umlal           v16.8h, v4.8b,  v0.8b
+        ld1             {v18.8b,v19.8b,v20.8b}, [x2], x3
+        umull           v6.8h,  v3.8b,  v1.8b
+        umlal           v6.8h,  v5.8b,  v0.8b
+        ext             v21.8b, v19.8b, v20.8b, #1
+        ext             v20.8b, v18.8b, v19.8b, #1
+        umull           v22.8h, v18.8b, v1.8b
+        umlal           v22.8h, v20.8b, v0.8b
+        umull           v24.8h, v19.8b, v1.8b
+        umlal           v24.8h, v21.8b, v0.8b
+        rshrn           v4.8b,  v16.8h, #3
+        rshrn2          v4.16b, v6.8h,  #3
+        rshrn           v6.8b,  v22.8h, #3
+        rshrn2          v6.16b, v24.8h, #3
+        st1             {v4.16b}, [x0], x1
+        st1             {v6.16b}, [x0], x1
+        b.gt            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_bilin16_v_neon, export=1
+        mov             w7,     #8
+        dup             v0.16b, w6
+        sub             w6,     w7,     w6
+        dup             v1.16b, w6
+
+        ld1             {v2.16b}, [x2], x3
+1:
+        subs            w4,     w4,     #2
+        ld1             {v4.16b}, [x2], x3
+        umull           v6.8h,  v2.8b,  v1.8b
+        umlal           v6.8h,  v4.8b,  v0.8b
+        umull2          v16.8h, v2.16b, v1.16b
+        umlal2          v16.8h, v4.16b, v0.16b
+        ld1             {v2.16b}, [x2], x3
+        umull           v18.8h, v4.8b,  v1.8b
+        umlal           v18.8h, v2.8b,  v0.8b
+        umull2          v20.8h, v4.16b, v1.16b
+        umlal2          v20.8h, v2.16b, v0.16b
+        rshrn           v4.8b,  v6.8h,  #3
+        rshrn2          v4.16b, v16.8h, #3
+        rshrn           v6.8b,  v18.8h, #3
+        rshrn2          v6.16b, v20.8h, #3
+        st1             {v4.16b}, [x0], x1
+        st1             {v6.16b}, [x0], x1
+        b.gt            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_bilin16_hv_neon, export=1
+        mov             w7,      #8
+        dup             v0.8b,   w5            // mx
+        sub             w5,      w7,     w5
+        dup             v1.8b,   w5
+        dup             v2.16b,  w6            // my
+        sub             w6,      w7,     w6
+        dup             v3.16b,  w6
+
+        ld1             {v4.8b,v5.8b,v6.8b},    [x2], x3
+
+        ext             v7.8b,   v5.8b,  v6.8b, #1
+        ext             v6.8b,   v4.8b,  v5.8b, #1
+        umull           v16.8h,  v4.8b,  v1.8b
+        umlal           v16.8h,  v6.8b,  v0.8b
+        umull           v18.8h,  v5.8b,  v1.8b
+        umlal           v18.8h,  v7.8b,  v0.8b
+        rshrn           v4.8b,   v16.8h, #3
+        rshrn2          v4.16b,  v18.8h, #3
+1:
+        subs            w4,  w4,  #2
+        ld1             {v18.8b,v19.8b,v20.8b},  [x2], x3
+        ext             v21.8b,  v19.8b, v20.8b, #1
+        ext             v20.8b,  v18.8b, v19.8b, #1
+        umull           v22.8h,  v18.8b, v1.8b
+        umlal           v22.8h,  v20.8b, v0.8b
+        ld1             {v26.8b,v27.8b,v28.8b},  [x2], x3
+        umull           v24.8h,  v19.8b, v1.8b
+        umlal           v24.8h,  v21.8b, v0.8b
+        ext             v29.8b,  v27.8b, v28.8b, #1
+        ext             v28.8b,  v26.8b, v27.8b, #1
+        umull           v16.8h,  v26.8b, v1.8b
+        umlal           v16.8h,  v28.8b, v0.8b
+        umull           v18.8h,  v27.8b, v1.8b
+        umlal           v18.8h,  v29.8b, v0.8b
+        rshrn           v6.8b,   v22.8h, #3
+        rshrn2          v6.16b,  v24.8h, #3
+        umull           v24.8h,  v4.8b,  v3.8b
+        umlal           v24.8h,  v6.8b,  v2.8b
+        umull2          v30.8h,  v4.16b, v3.16b
+        umlal2          v30.8h,  v6.16b, v2.16b
+        rshrn           v4.8b,   v16.8h, #3
+        rshrn2          v4.16b,  v18.8h, #3
+        umull           v20.8h,  v6.8b,  v3.8b
+        umlal           v20.8h,  v4.8b,  v2.8b
+        umull2          v22.8h,  v6.16b, v3.16b
+        umlal2          v22.8h,  v4.16b, v2.16b
+        rshrn           v24.8b,  v24.8h, #3
+        rshrn2          v24.16b, v30.8h, #3
+        st1             {v24.16b}, [x0], x1
+        rshrn           v20.8b,  v20.8h, #3
+        rshrn2          v20.16b, v22.8h, #3
+        st1             {v20.16b}, [x0], x1
+        b.gt            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_bilin8_h_neon, export=1
+        mov             w7,     #8
+        dup             v0.8b,  w5
+        sub             w5,     w7,     w5
+        dup             v1.8b,  w5
+1:
+        subs            w4,     w4,     #2
+        ld1             {v2.8b,v3.8b},  [x2],  x3
+        ext             v3.8b,  v2.8b,  v3.8b, #1
+        umull           v4.8h,  v2.8b,  v1.8b
+        umlal           v4.8h,  v3.8b,  v0.8b
+        ld1             {v6.8b,v7.8b},  [x2],  x3
+        ext             v7.8b,  v6.8b,  v7.8b, #1
+        umull           v16.8h, v6.8b,  v1.8b
+        umlal           v16.8h, v7.8b,  v0.8b
+        rshrn           v4.8b,  v4.8h,  #3
+        rshrn           v16.8b, v16.8h, #3
+        st1             {v4.8b},  [x0], x1
+        st1             {v16.8b}, [x0], x1
+        b.gt            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_bilin8_v_neon, export=1
+        mov             w7,      #8
+        dup             v0.8b,   w6
+        sub             w6,      w7,    w6
+        dup             v1.8b,   w6
+
+        ld1             {v2.8b}, [x2],  x3
+1:
+        subs            w4,      w4,    #2
+        ld1             {v3.8b}, [x2],  x3
+        umull           v4.8h,   v2.8b, v1.8b
+        umlal           v4.8h,   v3.8b, v0.8b
+        ld1             {v2.8b}, [x2],  x3
+        umull           v6.8h,   v3.8b, v1.8b
+        umlal           v6.8h,   v2.8b, v0.8b
+        rshrn           v4.8b,   v4.8h, #3
+        rshrn           v6.8b,   v6.8h, #3
+        st1             {v4.8b}, [x0],  x1
+        st1             {v6.8b}, [x0],  x1
+        b.gt            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_bilin8_hv_neon, export=1
+        mov             w7,     #8
+        dup             v0.8b,  w5             // mx
+        sub             w5,     w7,     w5
+        dup             v1.8b,  w5
+        dup             v2.8b,  w6             // my
+        sub             w6,     w7,     w6
+        dup             v3.8b,  w6
+
+        ld1             {v4.8b,v5.8b},  [x2],  x3
+        ext             v5.8b,  v4.8b,  v5.8b, #1
+        umull           v18.8h, v4.8b,  v1.8b
+        umlal           v18.8h, v5.8b,  v0.8b
+        rshrn           v22.8b, v18.8h, #3
+1:
+        subs            w4,     w4,     #2
+        ld1             {v6.8b,v7.8b},  [x2],  x3
+        ext             v7.8b,  v6.8b,  v7.8b, #1
+        umull           v16.8h, v6.8b,  v1.8b
+        umlal           v16.8h, v7.8b,  v0.8b
+        ld1             {v4.8b,v5.8b},  [x2],  x3
+        ext             v5.8b,  v4.8b,  v5.8b, #1
+        umull           v18.8h, v4.8b,  v1.8b
+        umlal           v18.8h, v5.8b,  v0.8b
+        rshrn           v16.8b, v16.8h, #3
+        umull           v20.8h, v22.8b, v3.8b
+        umlal           v20.8h, v16.8b, v2.8b
+        rshrn           v22.8b, v18.8h, #3
+        umull           v24.8h, v16.8b, v3.8b
+        umlal           v24.8h, v22.8b, v2.8b
+        rshrn           v20.8b, v20.8h, #3
+        st1             {v20.8b}, [x0], x1
+        rshrn           v23.8b, v24.8h, #3
+        st1             {v23.8b}, [x0], x1
+        b.gt            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_bilin4_h_neon, export=1
+        mov             w7,      #8
+        dup             v0.8b,   w5
+        sub             w5,      w7,     w5
+        dup             v1.8b,   w5
+1:
+        subs            w4,      w4,     #2
+        ld1             {v2.8b}, [x2],   x3
+        ext             v3.8b,   v2.8b,  v3.8b,  #1
+        ld1             {v6.8b}, [x2],   x3
+        ext             v7.8b,   v6.8b,  v7.8b,  #1
+        trn1            v2.2s,   v2.2s,  v6.2s
+        trn1            v3.2s,   v3.2s,  v7.2s
+        umull           v4.8h,   v2.8b,  v1.8b
+        umlal           v4.8h,   v3.8b,  v0.8b
+        rshrn           v4.8b,   v4.8h,  #3
+        st1             {v4.s}[0], [x0], x1
+        st1             {v4.s}[1], [x0], x1
+        b.gt            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_bilin4_v_neon, export=1
+        mov             w7,     #8
+        dup             v0.8b,  w6
+        sub             w6,     w7,  w6
+        dup             v1.8b,  w6
+
+        ld1r            {v2.2s},    [x2], x3
+1:
+        ld1r            {v3.2s},   [x2]
+        ld1             {v2.s}[1], [x2], x3
+        ld1             {v3.s}[1], [x2], x3
+        umull           v4.8h,  v2.8b,  v1.8b
+        umlal           v4.8h,  v3.8b,  v0.8b
+        trn2            v2.2s,  v3.2s,  v2.2s
+        rshrn           v4.8b,  v4.8h,  #3
+        st1             {v4.s}[0], [x0], x1
+        st1             {v4.s}[1], [x0], x1
+        subs            w4,     w4,     #2
+        b.gt            1b
+
+        ret
+endfunc
+
+function ff_put_vp8_bilin4_hv_neon, export=1
+        mov             w7,      #8
+        dup             v0.8b,   w5             // mx
+        sub             w5,      w7,     w5
+        dup             v1.8b,   w5
+        dup             v2.8b,   w6             // my
+        sub             w6,      w7,     w6
+        dup             v3.8b,   w6
+
+        ld1             {v4.8b}, [x2],   x3
+        ext             v5.8b,   v4.8b,  v4.8b,  #1
+        umull           v18.8h,  v4.8b,  v1.8b
+        umlal           v18.8h,  v5.8b,  v0.8b
+        rshrn           v22.8b,  v18.8h, #3
+1:
+        subs            w4,      w4,     #2
+        ld1             {v6.8b}, [x2],   x3
+        ext             v7.8b,   v6.8b,  v6.8b,  #1
+        ld1             {v4.8b}, [x2],   x3
+        ext             v5.8b,   v4.8b,  v4.8b,  #1
+        trn1            v6.2s,   v6.2s,  v4.2s
+        trn1            v7.2s,   v7.2s,  v5.2s
+        umull           v16.8h,  v6.8b,  v1.8b
+        umlal           v16.8h,  v7.8b,  v0.8b
+        rshrn           v16.8b,  v16.8h, #3
+        umull           v20.8h,  v16.8b, v2.8b
+        trn1            v22.2s,  v22.2s, v16.2s
+        umlal           v20.8h,  v22.8b, v3.8b
+        rev64           v22.2s,  v16.2s
+        rshrn           v20.8b,  v20.8h, #3
+        st1             {v20.s}[0], [x0], x1
+        st1             {v20.s}[1], [x0], x1
+        b.gt            1b
+
+        ret
+endfunc
diff --git a/media/ffvpx/libavcodec/aarch64/vp9dsp_init.h b/media/ffvpx/libavcodec/aarch64/vp9dsp_init.h
new file mode 100644
index 0000000000..9df1752c62
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp9dsp_init.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2017 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AARCH64_VP9DSP_INIT_H
+#define AVCODEC_AARCH64_VP9DSP_INIT_H
+
+#include "libavcodec/vp9dsp.h"
+
+void ff_vp9dsp_init_10bpp_aarch64(VP9DSPContext *dsp);
+void ff_vp9dsp_init_12bpp_aarch64(VP9DSPContext *dsp);
+
+#endif /* AVCODEC_AARCH64_VP9DSP_INIT_H */
diff --git a/media/ffvpx/libavcodec/aarch64/vp9dsp_init_10bpp_aarch64.c b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_10bpp_aarch64.c
new file mode 100644
index 0000000000..0fa0d7f8c2
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_10bpp_aarch64.c
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2017 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BPP 10
+#define INIT_FUNC ff_vp9dsp_init_10bpp_aarch64
+#include "vp9dsp_init_16bpp_aarch64_template.c"
diff --git a/media/ffvpx/libavcodec/aarch64/vp9dsp_init_12bpp_aarch64.c b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_12bpp_aarch64.c
new file mode 100644
index 0000000000..dae2232403
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_12bpp_aarch64.c
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2017 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BPP 12
+#define INIT_FUNC ff_vp9dsp_init_12bpp_aarch64
+#include "vp9dsp_init_16bpp_aarch64_template.c"
diff --git a/media/ffvpx/libavcodec/aarch64/vp9dsp_init_16bpp_aarch64_template.c b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_16bpp_aarch64_template.c
new file mode 100644
index 0000000000..d2a4e90b3a
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_16bpp_aarch64_template.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2017 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/aarch64/cpu.h"
+#include "vp9dsp_init.h"
+
+#define declare_fpel(type, sz, suffix)                                          \
+void ff_vp9_##type##sz##suffix##_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
+                                      const uint8_t *src, ptrdiff_t src_stride, \
+                                      int h, int mx, int my)
+
+#define decl_mc_func(op, filter, dir, sz, bpp)                                                   \
+void ff_vp9_##op##_##filter##sz##_##dir##_##bpp##_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
+                                                       const uint8_t *src, ptrdiff_t src_stride, \
+                                                       int h, int mx, int my)
+
+#define define_8tap_2d_fn(op, filter, sz, bpp)                                      \
+static void op##_##filter##sz##_hv_##bpp##_neon(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                const uint8_t *src,                 \
+                                                ptrdiff_t src_stride,               \
+                                                int h, int mx, int my)              \
+{                                                                                   \
+    LOCAL_ALIGNED_16(uint8_t, temp, [((1 + (sz < 64)) * sz + 8) * sz * 2]);         \
+    /* We only need h + 7 lines, but the horizontal filter assumes an               \
+     * even number of rows, so filter h + 8 lines here. */                          \
+    ff_vp9_put_##filter##sz##_h_##bpp##_neon(temp, 2 * sz,                          \
+                                             src - 3 * src_stride, src_stride,      \
+                                             h + 8, mx, 0);                         \
+    ff_vp9_##op##_##filter##sz##_v_##bpp##_neon(dst, dst_stride,                    \
+                                                temp + 3 * 2 * sz, 2 * sz,          \
+                                                h, 0, my);                          \
+}
+
+#define decl_filter_funcs(op, dir, sz, bpp)  \
+    decl_mc_func(op, regular, dir, sz, bpp); \
+    decl_mc_func(op, sharp,   dir, sz, bpp); \
+    decl_mc_func(op, smooth,  dir, sz, bpp)
+
+#define decl_mc_funcs(sz, bpp)           \
+    decl_filter_funcs(put, h,  sz, bpp); \
+    decl_filter_funcs(avg, h,  sz, bpp); \
+    decl_filter_funcs(put, v,  sz, bpp); \
+    decl_filter_funcs(avg, v,  sz, bpp); \
+    decl_filter_funcs(put, hv, sz, bpp); \
+    decl_filter_funcs(avg, hv, sz, bpp)
+
+#define ff_vp9_copy32_neon  ff_vp9_copy32_aarch64
+#define ff_vp9_copy64_neon  ff_vp9_copy64_aarch64
+#define ff_vp9_copy128_neon ff_vp9_copy128_aarch64
+
+declare_fpel(copy, 128, );
+declare_fpel(copy, 64,  );
+declare_fpel(copy, 32,  );
+declare_fpel(copy, 16,  );
+declare_fpel(copy, 8,   );
+declare_fpel(avg, 64, _16);
+declare_fpel(avg, 32, _16);
+declare_fpel(avg, 16, _16);
+declare_fpel(avg, 8,  _16);
+declare_fpel(avg, 4,  _16);
+
+decl_mc_funcs(64, BPP);
+decl_mc_funcs(32, BPP);
+decl_mc_funcs(16, BPP);
+decl_mc_funcs(8, BPP);
+decl_mc_funcs(4, BPP);
+
+#define define_8tap_2d_funcs(sz, bpp)        \
+    define_8tap_2d_fn(put, regular, sz, bpp) \
+    define_8tap_2d_fn(put, sharp,   sz, bpp) \
+    define_8tap_2d_fn(put, smooth,  sz, bpp) \
+    define_8tap_2d_fn(avg, regular, sz, bpp) \
+    define_8tap_2d_fn(avg, sharp,   sz, bpp) \
+    define_8tap_2d_fn(avg, smooth,  sz, bpp)
+
+define_8tap_2d_funcs(64, BPP)
+define_8tap_2d_funcs(32, BPP)
+define_8tap_2d_funcs(16, BPP)
+define_8tap_2d_funcs(8,  BPP)
+define_8tap_2d_funcs(4,  BPP)
+
+static av_cold void vp9dsp_mc_init_aarch64(VP9DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+#define init_fpel(idx1, idx2, sz, type, suffix)      \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_##type##sz##suffix
+
+#define init_copy(idx, sz, suffix) \
+    init_fpel(idx, 0, sz, copy, suffix)
+
+#define init_avg(idx, sz, suffix) \
+    init_fpel(idx, 1, sz, avg,  suffix)
+
+#define init_copy_avg(idx, sz1, sz2) \
+    init_copy(idx, sz2, _neon);      \
+    init_avg (idx, sz1, _16_neon)
+
+    if (have_armv8(cpu_flags)) {
+        init_copy(0, 128, _aarch64);
+        init_copy(1, 64,  _aarch64);
+        init_copy(2, 32,  _aarch64);
+    }
+
+    if (have_neon(cpu_flags)) {
+#define init_mc_func(idx1, idx2, op, filter, fname, dir, mx, my, sz, pfx, bpp) \
+    dsp->mc[idx1][filter][idx2][mx][my] = pfx##op##_##fname##sz##_##dir##_##bpp##_neon
+
+#define init_mc_funcs(idx, dir, mx, my, sz, pfx, bpp)                                   \
+    init_mc_func(idx, 0, put, FILTER_8TAP_REGULAR, regular, dir, mx, my, sz, pfx, bpp); \
+    init_mc_func(idx, 0, put, FILTER_8TAP_SHARP,   sharp,   dir, mx, my, sz, pfx, bpp); \
+    init_mc_func(idx, 0, put, FILTER_8TAP_SMOOTH,  smooth,  dir, mx, my, sz, pfx, bpp); \
+    init_mc_func(idx, 1, avg, FILTER_8TAP_REGULAR, regular, dir, mx, my, sz, pfx, bpp); \
+    init_mc_func(idx, 1, avg, FILTER_8TAP_SHARP,   sharp,   dir, mx, my, sz, pfx, bpp); \
+    init_mc_func(idx, 1, avg, FILTER_8TAP_SMOOTH,  smooth,  dir, mx, my, sz, pfx, bpp)
+
+#define init_mc_funcs_dirs(idx, sz, bpp)            \
+    init_mc_funcs(idx, v,  0, 1, sz, ff_vp9_, bpp); \
+    init_mc_funcs(idx, h,  1, 0, sz, ff_vp9_, bpp); \
+    init_mc_funcs(idx, hv, 1, 1, sz,        , bpp)
+
+
+        init_avg(0, 64, _16_neon);
+        init_avg(1, 32, _16_neon);
+        init_avg(2, 16, _16_neon);
+        init_copy_avg(3, 8, 16);
+        init_copy_avg(4, 4, 8);
+
+        init_mc_funcs_dirs(0, 64, BPP);
+        init_mc_funcs_dirs(1, 32, BPP);
+        init_mc_funcs_dirs(2, 16, BPP);
+        init_mc_funcs_dirs(3, 8,  BPP);
+        init_mc_funcs_dirs(4, 4,  BPP);
+    }
+}
+
+#define define_itxfm2(type_a, type_b, sz, bpp)                                     \
+void ff_vp9_##type_a##_##type_b##_##sz##x##sz##_add_##bpp##_neon(uint8_t *_dst,    \
+                                                                 ptrdiff_t stride, \
+                                                                 int16_t *_block, int eob)
+#define define_itxfm(type_a, type_b, sz, bpp) define_itxfm2(type_a, type_b, sz, bpp)
+
+#define define_itxfm_funcs(sz, bpp)      \
+    define_itxfm(idct,  idct,  sz, bpp); \
+    define_itxfm(iadst, idct,  sz, bpp); \
+    define_itxfm(idct,  iadst, sz, bpp); \
+    define_itxfm(iadst, iadst, sz, bpp)
+
+define_itxfm_funcs(4,  BPP);
+define_itxfm_funcs(8,  BPP);
+define_itxfm_funcs(16, BPP);
+define_itxfm(idct, idct, 32, BPP);
+define_itxfm(iwht, iwht, 4,  BPP);
+
+
+static av_cold void vp9dsp_itxfm_init_aarch64(VP9DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+#define init_itxfm2(tx, sz, bpp)                                               \
+    dsp->itxfm_add[tx][DCT_DCT]   = ff_vp9_idct_idct_##sz##_add_##bpp##_neon;  \
+    dsp->itxfm_add[tx][DCT_ADST]  = ff_vp9_iadst_idct_##sz##_add_##bpp##_neon; \
+    dsp->itxfm_add[tx][ADST_DCT]  = ff_vp9_idct_iadst_##sz##_add_##bpp##_neon; \
+    dsp->itxfm_add[tx][ADST_ADST] = ff_vp9_iadst_iadst_##sz##_add_##bpp##_neon
+#define init_itxfm(tx, sz, bpp) init_itxfm2(tx, sz, bpp)
+
+#define init_idct2(tx, nm, bpp)     \
+    dsp->itxfm_add[tx][DCT_DCT]   = \
+    dsp->itxfm_add[tx][ADST_DCT]  = \
+    dsp->itxfm_add[tx][DCT_ADST]  = \
+    dsp->itxfm_add[tx][ADST_ADST] = ff_vp9_##nm##_add_##bpp##_neon
+#define init_idct(tx, nm, bpp) init_idct2(tx, nm, bpp)
+
+        init_itxfm(TX_4X4,   4x4,   BPP);
+        init_itxfm(TX_8X8,   8x8,   BPP);
+        init_itxfm(TX_16X16, 16x16, BPP);
+        init_idct(TX_32X32, idct_idct_32x32, BPP);
+        init_idct(4,        iwht_iwht_4x4,   BPP);
+    }
+}
+
+#define define_loop_filter(dir, wd, size, bpp) \
+void ff_vp9_loop_filter_##dir##_##wd##_##size##_##bpp##_neon(uint8_t *dst, ptrdiff_t stride, int E, int I, int H)
+
+#define define_loop_filters(wd, size, bpp) \
+    define_loop_filter(h, wd, size, bpp);  \
+    define_loop_filter(v, wd, size, bpp)
+
+define_loop_filters(4,  8,  BPP);
+define_loop_filters(8,  8,  BPP);
+define_loop_filters(16, 8,  BPP);
+
+define_loop_filters(16, 16, BPP);
+
+define_loop_filters(44, 16, BPP);
+define_loop_filters(48, 16, BPP);
+define_loop_filters(84, 16, BPP);
+define_loop_filters(88, 16, BPP);
+
+static av_cold void vp9dsp_loopfilter_init_aarch64(VP9DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+#define init_lpf_func_8(idx1, idx2, dir, wd, bpp) \
+    dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_8_##bpp##_neon
+
+#define init_lpf_func_16(idx, dir, bpp) \
+    dsp->loop_filter_16[idx] = ff_vp9_loop_filter_##dir##_16_16_##bpp##_neon
+
+#define init_lpf_func_mix2(idx1, idx2, idx3, dir, wd, bpp) \
+    dsp->loop_filter_mix2[idx1][idx2][idx3] = ff_vp9_loop_filter_##dir##_##wd##_16_##bpp##_neon
+
+#define init_lpf_funcs_8_wd(idx, wd, bpp) \
+    init_lpf_func_8(idx, 0, h, wd, bpp);  \
+    init_lpf_func_8(idx, 1, v, wd, bpp)
+
+#define init_lpf_funcs_16(bpp)   \
+    init_lpf_func_16(0, h, bpp); \
+    init_lpf_func_16(1, v, bpp)
+
+#define init_lpf_funcs_mix2_wd(idx1, idx2, wd, bpp) \
+    init_lpf_func_mix2(idx1, idx2, 0, h, wd, bpp);  \
+    init_lpf_func_mix2(idx1, idx2, 1, v, wd, bpp)
+
+#define init_lpf_funcs_8(bpp)        \
+    init_lpf_funcs_8_wd(0, 4,  bpp); \
+    init_lpf_funcs_8_wd(1, 8,  bpp); \
+    init_lpf_funcs_8_wd(2, 16, bpp)
+
+#define init_lpf_funcs_mix2(bpp)           \
+    init_lpf_funcs_mix2_wd(0, 0, 44, bpp); \
+    init_lpf_funcs_mix2_wd(0, 1, 48, bpp); \
+    init_lpf_funcs_mix2_wd(1, 0, 84, bpp); \
+    init_lpf_funcs_mix2_wd(1, 1, 88, bpp)
+
+        init_lpf_funcs_8(BPP);
+        init_lpf_funcs_16(BPP);
+        init_lpf_funcs_mix2(BPP);
+    }
+}
+
+av_cold void INIT_FUNC(VP9DSPContext *dsp)
+{
+    vp9dsp_mc_init_aarch64(dsp);
+    vp9dsp_loopfilter_init_aarch64(dsp);
+    vp9dsp_itxfm_init_aarch64(dsp);
+}
diff --git a/media/ffvpx/libavcodec/aarch64/vp9dsp_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_aarch64.c
new file mode 100644
index 0000000000..4d1fee62de
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_aarch64.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2016 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "vp9dsp_init.h"
+
+#define declare_fpel(type, sz)                                          \
+void ff_vp9_##type##sz##_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
+                              const uint8_t *src, ptrdiff_t src_stride, \
+                              int h, int mx, int my)
+
+#define declare_copy_avg(sz) \
+    declare_fpel(copy, sz);  \
+    declare_fpel(avg , sz)
+
+#define decl_mc_func(op, filter, dir, sz)                                                \
+void ff_vp9_##op##_##filter##sz##_##dir##_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
+                                               const uint8_t *src, ptrdiff_t src_stride, \
+                                               int h, int mx, int my)
+
+#define define_8tap_2d_fn(op, filter, sz)                                         \
+static void op##_##filter##sz##_hv_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
+                                        const uint8_t *src, ptrdiff_t src_stride, \
+                                        int h, int mx, int my)                    \
+{                                                                                 \
+    LOCAL_ALIGNED_16(uint8_t, temp, [((1 + (sz < 64)) * sz + 8) * sz]);           \
+    /* We only need h + 7 lines, but the horizontal filter assumes an             \
+     * even number of rows, so filter h + 8 lines here. */                        \
+    ff_vp9_put_##filter##sz##_h_neon(temp, sz,                                    \
+                                     src - 3 * src_stride, src_stride,            \
+                                     h + 8, mx, 0);                               \
+    ff_vp9_##op##_##filter##sz##_v_neon(dst, dst_stride,                          \
+                                        temp + 3 * sz, sz,                        \
+                                        h, 0, my);                                \
+}
+
+#define decl_filter_funcs(op, dir, sz)  \
+    decl_mc_func(op, regular, dir, sz); \
+    decl_mc_func(op, sharp,   dir, sz); \
+    decl_mc_func(op, smooth,  dir, sz)
+
+#define decl_mc_funcs(sz)           \
+    decl_filter_funcs(put, h,  sz); \
+    decl_filter_funcs(avg, h,  sz); \
+    decl_filter_funcs(put, v,  sz); \
+    decl_filter_funcs(avg, v,  sz); \
+    decl_filter_funcs(put, hv, sz); \
+    decl_filter_funcs(avg, hv, sz)
+
+#define ff_vp9_copy32_neon ff_vp9_copy32_aarch64
+#define ff_vp9_copy64_neon ff_vp9_copy64_aarch64
+
+declare_copy_avg(64);
+declare_copy_avg(32);
+declare_copy_avg(16);
+declare_copy_avg(8);
+declare_copy_avg(4);
+
+decl_mc_funcs(64);
+decl_mc_funcs(32);
+decl_mc_funcs(16);
+decl_mc_funcs(8);
+decl_mc_funcs(4);
+
+#define define_8tap_2d_funcs(sz)        \
+    define_8tap_2d_fn(put, regular, sz) \
+    define_8tap_2d_fn(put, sharp,   sz) \
+    define_8tap_2d_fn(put, smooth,  sz) \
+    define_8tap_2d_fn(avg, regular, sz) \
+    define_8tap_2d_fn(avg, sharp,   sz) \
+    define_8tap_2d_fn(avg, smooth,  sz)
+
+define_8tap_2d_funcs(64)
+define_8tap_2d_funcs(32)
+define_8tap_2d_funcs(16)
+define_8tap_2d_funcs(8)
+define_8tap_2d_funcs(4)
+
+static av_cold void vp9dsp_mc_init_aarch64(VP9DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+#define init_fpel(idx1, idx2, sz, type, suffix)      \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_##type##sz##suffix
+
+#define init_copy(idx, sz, suffix) \
+    init_fpel(idx, 0, sz, copy, suffix)
+
+#define init_avg(idx, sz, suffix) \
+    init_fpel(idx, 1, sz, avg,  suffix)
+
+#define init_copy_avg(idx, sz) \
+    init_copy(idx, sz, _neon); \
+    init_avg (idx, sz, _neon)
+
+    if (have_armv8(cpu_flags)) {
+        init_copy(0, 64, _aarch64);
+        init_copy(1, 32, _aarch64);
+    }
+
+    if (have_neon(cpu_flags)) {
+#define init_mc_func(idx1, idx2, op, filter, fname, dir, mx, my, sz, pfx) \
+    dsp->mc[idx1][filter][idx2][mx][my] = pfx##op##_##fname##sz##_##dir##_neon
+
+#define init_mc_funcs(idx, dir, mx, my, sz, pfx)                                   \
+    init_mc_func(idx, 0, put, FILTER_8TAP_REGULAR, regular, dir, mx, my, sz, pfx); \
+    init_mc_func(idx, 0, put, FILTER_8TAP_SHARP,   sharp,   dir, mx, my, sz, pfx); \
+    init_mc_func(idx, 0, put, FILTER_8TAP_SMOOTH,  smooth,  dir, mx, my, sz, pfx); \
+    init_mc_func(idx, 1, avg, FILTER_8TAP_REGULAR, regular, dir, mx, my, sz, pfx); \
+    init_mc_func(idx, 1, avg, FILTER_8TAP_SHARP,   sharp,   dir, mx, my, sz, pfx); \
+    init_mc_func(idx, 1, avg, FILTER_8TAP_SMOOTH,  smooth,  dir, mx, my, sz, pfx)
+
+#define init_mc_funcs_dirs(idx, sz)            \
+    init_mc_funcs(idx, h,  1, 0, sz, ff_vp9_); \
+    init_mc_funcs(idx, v,  0, 1, sz, ff_vp9_); \
+    init_mc_funcs(idx, hv, 1, 1, sz,)
+
+        init_avg(0, 64, _neon);
+        init_avg(1, 32, _neon);
+        init_copy_avg(2, 16);
+        init_copy_avg(3, 8);
+        init_copy_avg(4, 4);
+
+        init_mc_funcs_dirs(0, 64);
+        init_mc_funcs_dirs(1, 32);
+        init_mc_funcs_dirs(2, 16);
+        init_mc_funcs_dirs(3, 8);
+        init_mc_funcs_dirs(4, 4);
+    }
+}
+
+#define define_itxfm(type_a, type_b, sz)                                   \
+void ff_vp9_##type_a##_##type_b##_##sz##x##sz##_add_neon(uint8_t *_dst,    \
+                                                         ptrdiff_t stride, \
+                                                         int16_t *_block, int eob)
+
+#define define_itxfm_funcs(sz)      \
+    define_itxfm(idct,  idct,  sz); \
+    define_itxfm(iadst, idct,  sz); \
+    define_itxfm(idct,  iadst, sz); \
+    define_itxfm(iadst, iadst, sz)
+
+define_itxfm_funcs(4);
+define_itxfm_funcs(8);
+define_itxfm_funcs(16);
+define_itxfm(idct, idct, 32);
+define_itxfm(iwht, iwht, 4);
+
+
+static av_cold void vp9dsp_itxfm_init_aarch64(VP9DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+#define init_itxfm(tx, sz)                                             \
+    dsp->itxfm_add[tx][DCT_DCT]   = ff_vp9_idct_idct_##sz##_add_neon;  \
+    dsp->itxfm_add[tx][DCT_ADST]  = ff_vp9_iadst_idct_##sz##_add_neon; \
+    dsp->itxfm_add[tx][ADST_DCT]  = ff_vp9_idct_iadst_##sz##_add_neon; \
+    dsp->itxfm_add[tx][ADST_ADST] = ff_vp9_iadst_iadst_##sz##_add_neon
+
+#define init_idct(tx, nm)           \
+    dsp->itxfm_add[tx][DCT_DCT]   = \
+    dsp->itxfm_add[tx][ADST_DCT]  = \
+    dsp->itxfm_add[tx][DCT_ADST]  = \
+    dsp->itxfm_add[tx][ADST_ADST] = ff_vp9_##nm##_add_neon
+
+        init_itxfm(TX_4X4, 4x4);
+        init_itxfm(TX_8X8, 8x8);
+        init_itxfm(TX_16X16, 16x16);
+        init_idct(TX_32X32, idct_idct_32x32);
+        init_idct(4, iwht_iwht_4x4);
+    }
+}
+
+#define define_loop_filter(dir, wd, len) \
+void ff_vp9_loop_filter_##dir##_##wd##_##len##_neon(uint8_t *dst, ptrdiff_t stride, int E, int I, int H)
+
+#define define_loop_filters(wd, len) \
+    define_loop_filter(h, wd, len);  \
+    define_loop_filter(v, wd, len)
+
+define_loop_filters(4, 8);
+define_loop_filters(8, 8);
+define_loop_filters(16, 8);
+
+define_loop_filters(16, 16);
+
+define_loop_filters(44, 16);
+define_loop_filters(48, 16);
+define_loop_filters(84, 16);
+define_loop_filters(88, 16);
+
+static av_cold void vp9dsp_loopfilter_init_aarch64(VP9DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        dsp->loop_filter_8[0][1] = ff_vp9_loop_filter_v_4_8_neon;
+        dsp->loop_filter_8[0][0] = ff_vp9_loop_filter_h_4_8_neon;
+        dsp->loop_filter_8[1][1] = ff_vp9_loop_filter_v_8_8_neon;
+        dsp->loop_filter_8[1][0] = ff_vp9_loop_filter_h_8_8_neon;
+        dsp->loop_filter_8[2][1] = ff_vp9_loop_filter_v_16_8_neon;
+        dsp->loop_filter_8[2][0] = ff_vp9_loop_filter_h_16_8_neon;
+
+        dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_neon;
+        dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_neon;
+
+        dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_neon;
+        dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_neon;
+        dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_neon;
+        dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_neon;
+        dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_neon;
+        dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_neon;
+        dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_neon;
+        dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_neon;
+    }
+}
+
+av_cold void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp)
+{
+    if (bpp == 10) {
+        ff_vp9dsp_init_10bpp_aarch64(dsp);
+        return;
+    } else if (bpp == 12) {
+        ff_vp9dsp_init_12bpp_aarch64(dsp);
+        return;
+    } else if (bpp != 8)
+        return;
+
+    vp9dsp_mc_init_aarch64(dsp);
+    vp9dsp_loopfilter_init_aarch64(dsp);
+    vp9dsp_itxfm_init_aarch64(dsp);
+}
diff --git a/media/ffvpx/libavcodec/aarch64/vp9itxfm_16bpp_neon.S b/media/ffvpx/libavcodec/aarch64/vp9itxfm_16bpp_neon.S
new file mode 100644
index 0000000000..c5f43d36a3
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp9itxfm_16bpp_neon.S
@@ -0,0 +1,2017 @@
+/*
+ * Copyright (c) 2017 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+#include "neon.S"
+
+const itxfm4_coeffs, align=4
+        .short  11585, 0, 6270, 15137
+iadst4_coeffs:
+        .short  5283, 15212, 9929, 13377
+endconst
+
+const iadst8_coeffs, align=4
+        .short  16305, 1606, 14449, 7723, 10394, 12665, 4756, 15679
+idct_coeffs:
+        .short  11585, 0, 6270, 15137, 3196, 16069, 13623, 9102
+        .short  1606, 16305, 12665, 10394, 7723, 14449, 15679, 4756
+        .short  804, 16364, 12140, 11003, 7005, 14811, 15426, 5520
+        .short  3981, 15893, 14053, 8423, 9760, 13160, 16207, 2404
+endconst
+
+const iadst16_coeffs, align=4
+        .short  16364, 804, 15893, 3981, 11003, 12140, 8423, 14053
+        .short  14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207
+endconst
+
+.macro transpose_4x4s r0, r1, r2, r3, r4, r5, r6, r7
+        trn1            \r4\().4s,  \r0\().4s,  \r1\().4s
+        trn2            \r5\().4s,  \r0\().4s,  \r1\().4s
+        trn1            \r6\().4s,  \r2\().4s,  \r3\().4s
+        trn2            \r7\().4s,  \r2\().4s,  \r3\().4s
+        trn1            \r0\().2d,  \r4\().2d,  \r6\().2d
+        trn2            \r2\().2d,  \r4\().2d,  \r6\().2d
+        trn1            \r1\().2d,  \r5\().2d,  \r7\().2d
+        trn2            \r3\().2d,  \r5\().2d,  \r7\().2d
+.endm
+
+// Transpose a 8x8 matrix of 32 bit elements, where each row is spread out
+// over two registers.
+.macro transpose_8x8s r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, t0, t1, t2, t3
+        transpose_4x4s  \r0,  \r2,  \r4,  \r6,  \t0, \t1, \t2, \t3
+        transpose_4x4s  \r9,  \r11, \r13, \r15, \t0, \t1, \t2, \t3
+
+        // Do 4x4 transposes of r1,r3,r5,r7 and r8,r10,r12,r14
+        // while swapping the two 4x4 matrices between each other
+
+        // First step of the 4x4 transpose of r1-r7, into t0-t3
+        trn1            \t0\().4s,  \r1\().4s,  \r3\().4s
+        trn2            \t1\().4s,  \r1\().4s,  \r3\().4s
+        trn1            \t2\().4s,  \r5\().4s,  \r7\().4s
+        trn2            \t3\().4s,  \r5\().4s,  \r7\().4s
+
+        // First step of the 4x4 transpose of r8-r12, into r1-r7
+        trn1            \r1\().4s,  \r8\().4s,  \r10\().4s
+        trn2            \r3\().4s,  \r8\().4s,  \r10\().4s
+        trn1            \r5\().4s,  \r12\().4s, \r14\().4s
+        trn2            \r7\().4s,  \r12\().4s, \r14\().4s
+
+        // Second step of the 4x4 transpose of r1-r7 (now in t0-r3), into r8-r12
+        trn1            \r8\().2d,  \t0\().2d,  \t2\().2d
+        trn2            \r12\().2d, \t0\().2d,  \t2\().2d
+        trn1            \r10\().2d, \t1\().2d,  \t3\().2d
+        trn2            \r14\().2d, \t1\().2d,  \t3\().2d
+
+        // Second step of the 4x4 transpose of r8-r12 (now in r1-r7), in place as far as possible
+        trn1            \t0\().2d,  \r1\().2d,  \r5\().2d
+        trn2            \r5\().2d,  \r1\().2d,  \r5\().2d
+        trn1            \t1\().2d,  \r3\().2d,  \r7\().2d
+        trn2            \r7\().2d,  \r3\().2d,  \r7\().2d
+
+        // Move the outputs of trn1 back in place
+        mov             \r1\().16b,  \t0\().16b
+        mov             \r3\().16b,  \t1\().16b
+.endm
+
+// out1 = ((in1 + in2) * d0[0] + (1 << 13)) >> 14
+// out2 = ((in1 - in2) * d0[0] + (1 << 13)) >> 14
+// in/out are .4s registers; this can do with 4 temp registers, but is
+// more efficient if 6 temp registers are available.
+.macro dmbutterfly0 out1, out2, in1, in2, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, neg=0
+.if \neg > 0
+        neg             \tmp4\().4s, v0.4s
+.endif
+        add             \tmp1\().4s, \in1\().4s,  \in2\().4s
+        sub             \tmp2\().4s, \in1\().4s,  \in2\().4s
+.if \neg > 0
+        smull           \tmp3\().2d, \tmp1\().2s, \tmp4\().s[0]
+        smull2          \tmp4\().2d, \tmp1\().4s, \tmp4\().s[0]
+.else
+        smull           \tmp3\().2d, \tmp1\().2s, v0.s[0]
+        smull2          \tmp4\().2d, \tmp1\().4s, v0.s[0]
+.endif
+.ifb \tmp5
+        rshrn           \out1\().2s, \tmp3\().2d, #14
+        rshrn2          \out1\().4s, \tmp4\().2d, #14
+        smull           \tmp3\().2d, \tmp2\().2s, v0.s[0]
+        smull2          \tmp4\().2d, \tmp2\().4s, v0.s[0]
+        rshrn           \out2\().2s, \tmp3\().2d, #14
+        rshrn2          \out2\().4s, \tmp4\().2d, #14
+.else
+        smull           \tmp5\().2d, \tmp2\().2s, v0.s[0]
+        smull2          \tmp6\().2d, \tmp2\().4s, v0.s[0]
+        rshrn           \out1\().2s, \tmp3\().2d, #14
+        rshrn2          \out1\().4s, \tmp4\().2d, #14
+        rshrn           \out2\().2s, \tmp5\().2d, #14
+        rshrn2          \out2\().4s, \tmp6\().2d, #14
+.endif
+.endm
+
+// Same as dmbutterfly0 above, but treating the input in in2 as zero,
+// writing the same output into both out1 and out2.
+.macro dmbutterfly0_h out1, out2, in1, in2, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6
+        smull           \tmp1\().2d, \in1\().2s,  v0.s[0]
+        smull2          \tmp2\().2d, \in1\().4s,  v0.s[0]
+        rshrn           \out1\().2s, \tmp1\().2d, #14
+        rshrn2          \out1\().4s, \tmp2\().2d, #14
+        rshrn           \out2\().2s, \tmp1\().2d, #14
+        rshrn2          \out2\().4s, \tmp2\().2d, #14
+.endm
+
+// out1,out2 = in1 * coef1 - in2 * coef2
+// out3,out4 = in1 * coef2 + in2 * coef1
+// out are 4 x .2d registers, in are 2 x .4s registers
+.macro dmbutterfly_l out1, out2, out3, out4, in1, in2, coef1, coef2
+        smull           \out1\().2d, \in1\().2s, \coef1
+        smull2          \out2\().2d, \in1\().4s, \coef1
+        smull           \out3\().2d, \in1\().2s, \coef2
+        smull2          \out4\().2d, \in1\().4s, \coef2
+        smlsl           \out1\().2d, \in2\().2s, \coef2
+        smlsl2          \out2\().2d, \in2\().4s, \coef2
+        smlal           \out3\().2d, \in2\().2s, \coef1
+        smlal2          \out4\().2d, \in2\().4s, \coef1
+.endm
+
+// inout1 = (inout1 * coef1 - inout2 * coef2 + (1 << 13)) >> 14
+// inout2 = (inout1 * coef2 + inout2 * coef1 + (1 << 13)) >> 14
+// inout are 2 x .4s registers
+.macro dmbutterfly inout1, inout2, coef1, coef2, tmp1, tmp2, tmp3, tmp4, neg=0
+        dmbutterfly_l   \tmp1, \tmp2, \tmp3, \tmp4, \inout1, \inout2, \coef1, \coef2
+.if \neg > 0
+        neg             \tmp3\().2d, \tmp3\().2d
+        neg             \tmp4\().2d, \tmp4\().2d
+.endif
+        rshrn           \inout1\().2s, \tmp1\().2d,  #14
+        rshrn2          \inout1\().4s, \tmp2\().2d,  #14
+        rshrn           \inout2\().2s, \tmp3\().2d,  #14
+        rshrn2          \inout2\().4s, \tmp4\().2d,  #14
+.endm
+
+// Same as dmbutterfly above, but treating the input in inout2 as zero
+.macro dmbutterfly_h1 inout1, inout2, coef1, coef2, tmp1, tmp2, tmp3, tmp4
+        smull           \tmp1\().2d, \inout1\().2s, \coef1
+        smull2          \tmp2\().2d, \inout1\().4s, \coef1
+        smull           \tmp3\().2d, \inout1\().2s, \coef2
+        smull2          \tmp4\().2d, \inout1\().4s, \coef2
+        rshrn           \inout1\().2s, \tmp1\().2d, #14
+        rshrn2          \inout1\().4s, \tmp2\().2d, #14
+        rshrn           \inout2\().2s, \tmp3\().2d, #14
+        rshrn2          \inout2\().4s, \tmp4\().2d, #14
+.endm
+
+// Same as dmbutterfly above, but treating the input in inout1 as zero
+.macro dmbutterfly_h2 inout1, inout2, coef1, coef2, tmp1, tmp2, tmp3, tmp4
+        smull           \tmp1\().2d, \inout2\().2s, \coef2
+        smull2          \tmp2\().2d, \inout2\().4s, \coef2
+        smull           \tmp3\().2d, \inout2\().2s, \coef1
+        smull2          \tmp4\().2d, \inout2\().4s, \coef1
+        neg             \tmp1\().2d, \tmp1\().2d
+        neg             \tmp2\().2d, \tmp2\().2d
+        rshrn           \inout2\().2s, \tmp3\().2d, #14
+        rshrn2          \inout2\().4s, \tmp4\().2d, #14
+        rshrn           \inout1\().2s, \tmp1\().2d, #14
+        rshrn2          \inout1\().4s, \tmp2\().2d, #14
+.endm
+
+.macro dsmull_h out1, out2, in, coef
+        smull           \out1\().2d, \in\().2s, \coef
+        smull2          \out2\().2d, \in\().4s, \coef
+.endm
+
+.macro drshrn_h out, in1, in2, shift
+        rshrn           \out\().2s, \in1\().2d, \shift
+        rshrn2          \out\().4s, \in2\().2d, \shift
+.endm
+
+
+// out1 = in1 + in2
+// out2 = in1 - in2
+.macro butterfly_4s out1, out2, in1, in2
+        add             \out1\().4s, \in1\().4s, \in2\().4s
+        sub             \out2\().4s, \in1\().4s, \in2\().4s
+.endm
+
+// out1 = in1 - in2
+// out2 = in1 + in2
+.macro butterfly_4s_r out1, out2, in1, in2
+        sub             \out1\().4s, \in1\().4s, \in2\().4s
+        add             \out2\().4s, \in1\().4s, \in2\().4s
+.endm
+
+// out1 = (in1,in2 + in3,in4 + (1 << 13)) >> 14
+// out2 = (in1,in2 - in3,in4 + (1 << 13)) >> 14
+// out are 2 x .4s registers, in are 4 x .2d registers
+.macro dbutterfly_n out1, out2, in1, in2, in3, in4, tmp1, tmp2, tmp3, tmp4
+        add             \tmp1\().2d, \in1\().2d, \in3\().2d
+        add             \tmp2\().2d, \in2\().2d, \in4\().2d
+        sub             \tmp3\().2d, \in1\().2d, \in3\().2d
+        sub             \tmp4\().2d, \in2\().2d, \in4\().2d
+        rshrn           \out1\().2s, \tmp1\().2d,  #14
+        rshrn2          \out1\().4s, \tmp2\().2d,  #14
+        rshrn           \out2\().2s, \tmp3\().2d,  #14
+        rshrn2          \out2\().4s, \tmp4\().2d,  #14
+.endm
+
+.macro iwht4_10 c0, c1, c2, c3
+        add             \c0\().4s, \c0\().4s, \c1\().4s
+        sub             v17.4s,    \c2\().4s, \c3\().4s
+        sub             v16.4s,    \c0\().4s, v17.4s
+        sshr            v16.4s,    v16.4s,    #1
+        sub             \c2\().4s, v16.4s,    \c1\().4s
+        sub             \c1\().4s, v16.4s,    \c3\().4s
+        add             \c3\().4s, v17.4s,    \c2\().4s
+        sub             \c0\().4s, \c0\().4s, \c1\().4s
+.endm
+
+.macro iwht4_12 c0, c1, c2, c3
+        iwht4_10        \c0, \c1, \c2, \c3
+.endm
+
+.macro idct4_10 c0, c1, c2, c3
+        mul             v22.4s,    \c1\().4s, v0.s[3]
+        mul             v20.4s,    \c1\().4s, v0.s[2]
+        add             v16.4s,    \c0\().4s, \c2\().4s
+        sub             v17.4s,    \c0\().4s, \c2\().4s
+        mla             v22.4s,    \c3\().4s, v0.s[2]
+        mul             v18.4s,    v16.4s,    v0.s[0]
+        mul             v24.4s,    v17.4s,    v0.s[0]
+        mls             v20.4s,    \c3\().4s, v0.s[3]
+        srshr           v22.4s,    v22.4s,    #14
+        srshr           v18.4s,    v18.4s,    #14
+        srshr           v24.4s,    v24.4s,    #14
+        srshr           v20.4s,    v20.4s,    #14
+        add             \c0\().4s, v18.4s,    v22.4s
+        sub             \c3\().4s, v18.4s,    v22.4s
+        add             \c1\().4s, v24.4s,    v20.4s
+        sub             \c2\().4s, v24.4s,    v20.4s
+.endm
+
+.macro idct4_12 c0, c1, c2, c3
+        smull           v22.2d,    \c1\().2s, v0.s[3]
+        smull2          v23.2d,    \c1\().4s, v0.s[3]
+        smull           v20.2d,    \c1\().2s, v0.s[2]
+        smull2          v21.2d,    \c1\().4s, v0.s[2]
+        add             v16.4s,    \c0\().4s, \c2\().4s
+        sub             v17.4s,    \c0\().4s, \c2\().4s
+        smlal           v22.2d,    \c3\().2s, v0.s[2]
+        smlal2          v23.2d,    \c3\().4s, v0.s[2]
+        smull           v18.2d,    v16.2s,    v0.s[0]
+        smull2          v19.2d,    v16.4s,    v0.s[0]
+        smull           v24.2d,    v17.2s,    v0.s[0]
+        smull2          v25.2d,    v17.4s,    v0.s[0]
+        smlsl           v20.2d,    \c3\().2s, v0.s[3]
+        smlsl2          v21.2d,    \c3\().4s, v0.s[3]
+        rshrn           v22.2s,    v22.2d,    #14
+        rshrn2          v22.4s,    v23.2d,    #14
+        rshrn           v18.2s,    v18.2d,    #14
+        rshrn2          v18.4s,    v19.2d,    #14
+        rshrn           v24.2s,    v24.2d,    #14
+        rshrn2          v24.4s,    v25.2d,    #14
+        rshrn           v20.2s,    v20.2d,    #14
+        rshrn2          v20.4s,    v21.2d,    #14
+        add             \c0\().4s, v18.4s,    v22.4s
+        sub             \c3\().4s, v18.4s,    v22.4s
+        add             \c1\().4s, v24.4s,    v20.4s
+        sub             \c2\().4s, v24.4s,    v20.4s
+.endm
+
+.macro iadst4_10 c0, c1, c2, c3
+        mul             v16.4s,    \c0\().4s, v1.s[0]
+        mla             v16.4s,    \c2\().4s, v1.s[1]
+        mla             v16.4s,    \c3\().4s, v1.s[2]
+        mul             v18.4s,    \c0\().4s, v1.s[2]
+        mls             v18.4s,    \c2\().4s, v1.s[0]
+        sub             \c0\().4s, \c0\().4s, \c2\().4s
+        mls             v18.4s,    \c3\().4s, v1.s[1]
+        add             \c0\().4s, \c0\().4s, \c3\().4s
+        mul             v22.4s,    \c1\().4s, v1.s[3]
+        mul             v20.4s,    \c0\().4s, v1.s[3]
+        add             v24.4s,    v16.4s,    v22.4s
+        add             v26.4s,    v18.4s,    v22.4s
+        srshr           \c0\().4s, v24.4s,    #14
+        add             v16.4s,    v16.4s,    v18.4s
+        srshr           \c1\().4s, v26.4s,    #14
+        sub             v16.4s,    v16.4s,    v22.4s
+        srshr           \c2\().4s, v20.4s,    #14
+        srshr           \c3\().4s, v16.4s,    #14
+.endm
+
+.macro iadst4_12 c0, c1, c2, c3
+        smull           v16.2d,    \c0\().2s, v1.s[0]
+        smull2          v17.2d,    \c0\().4s, v1.s[0]
+        smlal           v16.2d,    \c2\().2s, v1.s[1]
+        smlal2          v17.2d,    \c2\().4s, v1.s[1]
+        smlal           v16.2d,    \c3\().2s, v1.s[2]
+        smlal2          v17.2d,    \c3\().4s, v1.s[2]
+        smull           v18.2d,    \c0\().2s, v1.s[2]
+        smull2          v19.2d,    \c0\().4s, v1.s[2]
+        smlsl           v18.2d,    \c2\().2s, v1.s[0]
+        smlsl2          v19.2d,    \c2\().4s, v1.s[0]
+        sub             \c0\().4s, \c0\().4s, \c2\().4s
+        smlsl           v18.2d,    \c3\().2s, v1.s[1]
+        smlsl2          v19.2d,    \c3\().4s, v1.s[1]
+        add             \c0\().4s, \c0\().4s, \c3\().4s
+        smull           v22.2d,    \c1\().2s, v1.s[3]
+        smull2          v23.2d,    \c1\().4s, v1.s[3]
+        smull           v20.2d,    \c0\().2s, v1.s[3]
+        smull2          v21.2d,    \c0\().4s, v1.s[3]
+        add             v24.2d,    v16.2d,    v22.2d
+        add             v25.2d,    v17.2d,    v23.2d
+        add             v26.2d,    v18.2d,    v22.2d
+        add             v27.2d,    v19.2d,    v23.2d
+        rshrn           \c0\().2s, v24.2d,    #14
+        rshrn2          \c0\().4s, v25.2d,    #14
+        add             v16.2d,    v16.2d,    v18.2d
+        add             v17.2d,    v17.2d,    v19.2d
+        rshrn           \c1\().2s, v26.2d,    #14
+        rshrn2          \c1\().4s, v27.2d,    #14
+        sub             v16.2d,    v16.2d,    v22.2d
+        sub             v17.2d,    v17.2d,    v23.2d
+        rshrn           \c2\().2s, v20.2d,    #14
+        rshrn2          \c2\().4s, v21.2d,    #14
+        rshrn           \c3\().2s, v16.2d,    #14
+        rshrn2          \c3\().4s, v17.2d,    #14
+.endm
+
+// The public functions in this file have got the following signature:
+// void itxfm_add(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+
+.macro itxfm_func4x4 txfm1, txfm2, bpp
+function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_\bpp\()_neon, export=1
+.ifc \txfm1,\txfm2
+.ifc \txfm1,idct
+        movrel          x4,  itxfm4_coeffs
+        ld1             {v0.4h}, [x4]
+        sxtl            v0.4s,  v0.4h
+.endif
+.ifc \txfm1,iadst
+        movrel          x4,  iadst4_coeffs
+        ld1             {v0.d}[1], [x4]
+        sxtl2           v1.4s,  v0.8h
+.endif
+.else
+        movrel          x4,  itxfm4_coeffs
+        ld1             {v0.8h}, [x4]
+        sxtl2           v1.4s,  v0.8h
+        sxtl            v0.4s,  v0.4h
+.endif
+
+        movi            v30.4s, #0
+        movi            v31.4s, #0
+.ifc \txfm1\()_\txfm2,idct_idct
+        cmp             w3,  #1
+        b.ne            1f
+        // DC-only for idct/idct
+        ld1             {v2.s}[0],  [x2]
+        smull           v2.2d,  v2.2s, v0.s[0]
+        rshrn           v2.2s,  v2.2d, #14
+        smull           v2.2d,  v2.2s, v0.s[0]
+        rshrn           v2.2s,  v2.2d, #14
+        st1             {v31.s}[0], [x2]
+        dup             v4.4s,  v2.s[0]
+        mov             v5.16b, v4.16b
+        mov             v6.16b, v4.16b
+        mov             v7.16b, v4.16b
+        b               2f
+.endif
+
+1:
+        ld1             {v4.4s,v5.4s,v6.4s,v7.4s},  [x2]
+        st1             {v30.4s,v31.4s}, [x2], #32
+
+.ifc \txfm1,iwht
+        sshr            v4.4s,  v4.4s,  #2
+        sshr            v5.4s,  v5.4s,  #2
+        sshr            v6.4s,  v6.4s,  #2
+        sshr            v7.4s,  v7.4s,  #2
+.endif
+
+        \txfm1\()4_\bpp v4,  v5,  v6,  v7
+
+        st1             {v30.4s,v31.4s}, [x2], #32
+        // Transpose 4x4 with 32 bit elements
+        transpose_4x4s  v4,  v5,  v6,  v7,  v16, v17, v18, v19
+
+        \txfm2\()4_\bpp v4,  v5,  v6,  v7
+2:
+        mvni            v31.8h, #((0xff << (\bpp - 8)) & 0xff), lsl #8
+        ld1             {v0.4h},   [x0], x1
+        ld1             {v1.4h},   [x0], x1
+.ifnc \txfm1,iwht
+        srshr           v4.4s,  v4.4s,  #4
+        srshr           v5.4s,  v5.4s,  #4
+        srshr           v6.4s,  v6.4s,  #4
+        srshr           v7.4s,  v7.4s,  #4
+.endif
+        uaddw           v4.4s,  v4.4s,  v0.4h
+        uaddw           v5.4s,  v5.4s,  v1.4h
+        ld1             {v2.4h},   [x0], x1
+        ld1             {v3.4h},   [x0], x1
+        sqxtun          v0.4h,  v4.4s
+        sqxtun2         v0.8h,  v5.4s
+        sub             x0,  x0,  x1, lsl #2
+
+        uaddw           v6.4s,  v6.4s,  v2.4h
+        umin            v0.8h,  v0.8h,  v31.8h
+        uaddw           v7.4s,  v7.4s,  v3.4h
+        st1             {v0.4h},   [x0], x1
+        sqxtun          v2.4h,  v6.4s
+        sqxtun2         v2.8h,  v7.4s
+        umin            v2.8h,  v2.8h,  v31.8h
+
+        st1             {v0.d}[1], [x0], x1
+        st1             {v2.4h},   [x0], x1
+        st1             {v2.d}[1], [x0], x1
+
+        ret
+endfunc
+.endm
+
+.macro itxfm_funcs4x4 bpp
+itxfm_func4x4 idct,  idct,  \bpp
+itxfm_func4x4 iadst, idct,  \bpp
+itxfm_func4x4 idct,  iadst, \bpp
+itxfm_func4x4 iadst, iadst, \bpp
+itxfm_func4x4 iwht,  iwht,  \bpp
+.endm
+
+itxfm_funcs4x4 10
+itxfm_funcs4x4 12
+
+function idct8x8_dc_add_neon
+        movrel          x4,  idct_coeffs
+        ld1             {v0.4h}, [x4]
+
+        movi            v1.4h,  #0
+        sxtl            v0.4s,  v0.4h
+
+        ld1             {v2.s}[0],  [x2]
+        smull           v2.2d,  v2.2s,  v0.s[0]
+        rshrn           v2.2s,  v2.2d,  #14
+        smull           v2.2d,  v2.2s,  v0.s[0]
+        rshrn           v2.2s,  v2.2d,  #14
+        st1             {v1.s}[0],  [x2]
+        dup             v2.4s,  v2.s[0]
+
+        srshr           v2.4s,  v2.4s,  #5
+
+        mov             x4,  #8
+        mov             x3,  x0
+        dup             v31.8h, w5
+1:
+        // Loop to add the constant from v2 into all 8x8 outputs
+        subs            x4,  x4,  #2
+        ld1             {v3.8h},  [x0], x1
+        ld1             {v4.8h},  [x0], x1
+        uaddw           v16.4s, v2.4s,  v3.4h
+        uaddw2          v17.4s, v2.4s,  v3.8h
+        uaddw           v18.4s, v2.4s,  v4.4h
+        uaddw2          v19.4s, v2.4s,  v4.8h
+        sqxtun          v3.4h,  v16.4s
+        sqxtun2         v3.8h,  v17.4s
+        sqxtun          v4.4h,  v18.4s
+        sqxtun2         v4.8h,  v19.4s
+        umin            v3.8h,  v3.8h,  v31.8h
+        umin            v4.8h,  v4.8h,  v31.8h
+        st1             {v3.8h},  [x3], x1
+        st1             {v4.8h},  [x3], x1
+        b.ne            1b
+
+        ret
+endfunc
+
+.macro idct8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1, t2, t3, t4, t5
+        dmbutterfly0    \r0, \r4, \r0, \r4, \t0, \t1, \t2, \t3, \t4, \t5 // r0 = t0a, r4 = t1a
+        dmbutterfly     \r2, \r6, v0.s[2], v0.s[3], \t0, \t1, \t2, \t3   // r2 = t2a, r6 = t3a
+        dmbutterfly     \r1, \r7, v1.s[0], v1.s[1], \t0, \t1, \t2, \t3   // r1 = t4a, r7 = t7a
+        dmbutterfly     \r5, \r3, v1.s[2], v1.s[3], \t0, \t1, \t2, \t3   // r5 = t5a, r3 = t6a
+
+        butterfly_4s    \t0, \t1, \r0, \r6 // t0 = t0, t1 = t3
+        butterfly_4s    \t2, \r5, \r1, \r5 // t2 = t4, r5 = t5a
+        butterfly_4s    \t3, \r6, \r7, \r3 // t3 = t7, r6 = t6a
+        butterfly_4s    \r7, \r4, \r4, \r2 // r7 = t1, r4 = t2
+
+        dmbutterfly0    \r6, \r5, \r6, \r5, \r0, \r1, \r2, \r3, \t4, \t5 // r6 = t6, r5 = t5
+
+        butterfly_4s    \r1, \r6, \r7, \r6 // r1 = out[1], r6 = out[6]
+        butterfly_4s    \r0, \r7, \t0, \t3 // r0 = out[0], r7 = out[7]
+        butterfly_4s    \r2, \r5, \r4, \r5 // r2 = out[2], r5 = out[5]
+        butterfly_4s    \r3, \r4, \t1, \t2 // r3 = out[3], r4 = out[4]
+.endm
+
+.macro iadst8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1, t2, t3, t4, t5
+        dmbutterfly_l   \t2, \t3, \t0, \t1, \r7, \r0, v2.s[1], v2.s[0]   // t2,t3 = t1a, t0,t1 = t0a
+        dmbutterfly_l   \r0, \r7, \t4, \t5, \r3, \r4, v3.s[1], v3.s[0]   // r0,r7 = t5a, t4,t5 = t4a
+
+        dbutterfly_n    \r3, \t0, \t0, \t1, \t4, \t5, \r3, \r4, \t0, \t1 // r3 = t0, t0 = t4
+        dbutterfly_n    \r4, \t1, \t2, \t3, \r0, \r7, \r4, \t1, \t4, \t5 // r4 = t1, t1 = t5
+
+        dmbutterfly_l   \t4, \t5, \t2, \t3, \r5, \r2, v2.s[3], v2.s[2]   // t4,t5 = t3a, t2,t3 = t2a
+        dmbutterfly_l   \r2, \r5, \r0, \r7, \r1, \r6, v3.s[3], v3.s[2]   // r2,r5 = t7a, r0,r7 = t6a
+
+        dbutterfly_n    \r1, \t2, \t2, \t3, \r0, \r7, \r1, \r6, \t2, \t3 // r1 = t2, t2 = t6
+        dbutterfly_n    \r0, \t4, \t4, \t5, \r2, \r5, \r0, \r7, \t4, \t5 // r0 = t3, t4 = t7
+
+        butterfly_4s    \r7, \r4, \r4, \r0   // r7 = -out[7], r4 = t3
+        neg             \r7\().4s, \r7\().4s // r7 = out[7]
+        butterfly_4s    \r0, \r1, \r3, \r1   // r0 = out[0],  r1 = t2
+
+        dmbutterfly_l   \r2, \r3, \t3, \t5, \t0, \t1, v0.s[2], v0.s[3]   // r2,r3 = t5a, t3,t5 = t4a
+        dmbutterfly_l   \t0, \t1, \r5, \r6, \t4, \t2, v0.s[3], v0.s[2]   // t0,t1 = t6a, r5,r6 = t7a
+
+        dbutterfly_n    \r6, \t2, \r2, \r3, \r5, \r6, \t2, \t4, \r2, \r3 // r6 = out[6],  t2 = t7
+
+        dmbutterfly0    \r3, \r4, \r1, \r4, \t4, \r5, \r1, \r2           // r3 = -out[3], r4 = out[4]
+        neg             \r3\().4s, \r3\().4s  // r3 = out[3]
+
+        dbutterfly_n    \r1, \t0, \t3, \t5, \t0, \t1, \r1, \r2, \t0, \t1 // r1 = -out[1], t0 = t6
+        neg             \r1\().4s, \r1\().4s  // r1 = out[1]
+
+        dmbutterfly0    \r2, \r5, \t0, \t2, \t1, \t3, \t4, \t5           // r2 = out[2],  r5 = -out[5]
+        neg             \r5\().4s, \r5\().4s  // r5 = out[5]
+.endm
+
+
+.macro itxfm_func8x8 txfm1, txfm2
+function vp9_\txfm1\()_\txfm2\()_8x8_add_16_neon
+.ifc \txfm1\()_\txfm2,idct_idct
+        cmp             w3,  #1
+        b.eq            idct8x8_dc_add_neon
+.endif
+        // The iadst also uses a few coefficients from
+        // idct, so those always need to be loaded.
+.ifc \txfm1\()_\txfm2,idct_idct
+        movrel          x4,  idct_coeffs
+.else
+        movrel          x4,  iadst8_coeffs
+        ld1             {v1.8h}, [x4], #16
+        stp             d8,  d9,  [sp, #-0x10]!
+        sxtl2           v3.4s,  v1.8h
+        sxtl            v2.4s,  v1.4h
+.endif
+        ld1             {v0.8h}, [x4]
+        sxtl2           v1.4s,  v0.8h
+        sxtl            v0.4s,  v0.4h
+
+        movi            v4.4s, #0
+        movi            v5.4s, #0
+        movi            v6.4s, #0
+        movi            v7.4s, #0
+
+1:
+        ld1             {v16.4s,v17.4s,v18.4s,v19.4s},  [x2], #64
+        ld1             {v20.4s,v21.4s,v22.4s,v23.4s},  [x2], #64
+        ld1             {v24.4s,v25.4s,v26.4s,v27.4s},  [x2], #64
+        ld1             {v28.4s,v29.4s,v30.4s,v31.4s},  [x2], #64
+        sub             x2,  x2,  #256
+        st1             {v4.4s,v5.4s,v6.4s,v7.4s},      [x2], #64
+        st1             {v4.4s,v5.4s,v6.4s,v7.4s},      [x2], #64
+        st1             {v4.4s,v5.4s,v6.4s,v7.4s},      [x2], #64
+        st1             {v4.4s,v5.4s,v6.4s,v7.4s},      [x2], #64
+
+.ifc \txfm1\()_\txfm2,idct_idct
+        idct8           v16, v18, v20, v22, v24, v26, v28, v30, v2,  v3,  v4,  v5,  v6,  v7
+        idct8           v17, v19, v21, v23, v25, v27, v29, v31, v2,  v3,  v4,  v5,  v6,  v7
+.else
+        \txfm1\()8      v16, v18, v20, v22, v24, v26, v28, v30, v4,  v5,  v6,  v7,  v8,  v9
+        \txfm1\()8      v17, v19, v21, v23, v25, v27, v29, v31, v4,  v5,  v6,  v7,  v8,  v9
+.endif
+
+        // Transpose 8x8 with 16 bit elements
+        transpose_8x8s  v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v4, v5, v6, v7
+
+.ifc \txfm1\()_\txfm2,idct_idct
+        idct8           v16, v18, v20, v22, v24, v26, v28, v30, v2,  v3,  v4,  v5,  v6,  v7
+        idct8           v17, v19, v21, v23, v25, v27, v29, v31, v2,  v3,  v4,  v5,  v6,  v7
+.else
+        \txfm2\()8      v16, v18, v20, v22, v24, v26, v28, v30, v4,  v5,  v6,  v7,  v8,  v9
+        \txfm2\()8      v17, v19, v21, v23, v25, v27, v29, v31, v4,  v5,  v6,  v7,  v8,  v9
+.endif
+2:
+        mov             x3,  x0
+        // Add into the destination
+        ld1             {v0.8h},  [x0], x1
+        srshr           v16.4s, v16.4s, #5
+        srshr           v17.4s, v17.4s, #5
+        ld1             {v1.8h},  [x0], x1
+        srshr           v18.4s, v18.4s, #5
+        srshr           v19.4s, v19.4s, #5
+        ld1             {v2.8h},  [x0], x1
+        srshr           v20.4s, v20.4s, #5
+        srshr           v21.4s, v21.4s, #5
+        uaddw           v16.4s, v16.4s, v0.4h
+        uaddw2          v17.4s, v17.4s, v0.8h
+        ld1             {v3.8h},  [x0], x1
+        srshr           v22.4s, v22.4s, #5
+        srshr           v23.4s, v23.4s, #5
+        uaddw           v18.4s, v18.4s, v1.4h
+        uaddw2          v19.4s, v19.4s, v1.8h
+        ld1             {v4.8h},  [x0], x1
+        srshr           v24.4s, v24.4s, #5
+        srshr           v25.4s, v25.4s, #5
+        uaddw           v20.4s, v20.4s, v2.4h
+        uaddw2          v21.4s, v21.4s, v2.8h
+        sqxtun          v0.4h,  v16.4s
+        sqxtun2         v0.8h,  v17.4s
+        dup             v16.8h, w5
+        ld1             {v5.8h},  [x0], x1
+        srshr           v26.4s, v26.4s, #5
+        srshr           v27.4s, v27.4s, #5
+        uaddw           v22.4s, v22.4s, v3.4h
+        uaddw2          v23.4s, v23.4s, v3.8h
+        sqxtun          v1.4h,  v18.4s
+        sqxtun2         v1.8h,  v19.4s
+        umin            v0.8h,  v0.8h,  v16.8h
+        ld1             {v6.8h},  [x0], x1
+        srshr           v28.4s, v28.4s, #5
+        srshr           v29.4s, v29.4s, #5
+        uaddw           v24.4s, v24.4s, v4.4h
+        uaddw2          v25.4s, v25.4s, v4.8h
+        sqxtun          v2.4h,  v20.4s
+        sqxtun2         v2.8h,  v21.4s
+        umin            v1.8h,  v1.8h,  v16.8h
+        ld1             {v7.8h},  [x0], x1
+        srshr           v30.4s, v30.4s, #5
+        srshr           v31.4s, v31.4s, #5
+        uaddw           v26.4s, v26.4s, v5.4h
+        uaddw2          v27.4s, v27.4s, v5.8h
+        sqxtun          v3.4h,  v22.4s
+        sqxtun2         v3.8h,  v23.4s
+        umin            v2.8h,  v2.8h,  v16.8h
+
+        st1             {v0.8h},  [x3], x1
+        uaddw           v28.4s, v28.4s, v6.4h
+        uaddw2          v29.4s, v29.4s, v6.8h
+        st1             {v1.8h},  [x3], x1
+        sqxtun          v4.4h,  v24.4s
+        sqxtun2         v4.8h,  v25.4s
+        umin            v3.8h,  v3.8h,  v16.8h
+        st1             {v2.8h},  [x3], x1
+        uaddw           v30.4s, v30.4s, v7.4h
+        uaddw2          v31.4s, v31.4s, v7.8h
+        st1             {v3.8h},  [x3], x1
+        sqxtun          v5.4h,  v26.4s
+        sqxtun2         v5.8h,  v27.4s
+        umin            v4.8h,  v4.8h,  v16.8h
+        st1             {v4.8h},  [x3], x1
+        sqxtun          v6.4h,  v28.4s
+        sqxtun2         v6.8h,  v29.4s
+        umin            v5.8h,  v5.8h,  v16.8h
+        st1             {v5.8h},  [x3], x1
+        sqxtun          v7.4h,  v30.4s
+        sqxtun2         v7.8h,  v31.4s
+        umin            v6.8h,  v6.8h,  v16.8h
+
+        st1             {v6.8h},  [x3], x1
+        umin            v7.8h,  v7.8h,  v16.8h
+        st1             {v7.8h},  [x3], x1
+
+.ifnc \txfm1\()_\txfm2,idct_idct
+        ldp             d8,  d9,  [sp], 0x10
+.endif
+        ret
+endfunc
+
+function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_10_neon, export=1
+        mov             x5,  #0x03ff
+        b               vp9_\txfm1\()_\txfm2\()_8x8_add_16_neon
+endfunc
+
+function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_12_neon, export=1
+        mov             x5,  #0x0fff
+        b               vp9_\txfm1\()_\txfm2\()_8x8_add_16_neon
+endfunc
+.endm
+
+itxfm_func8x8 idct,  idct
+itxfm_func8x8 iadst, idct
+itxfm_func8x8 idct,  iadst
+itxfm_func8x8 iadst, iadst
+
+
+function idct16x16_dc_add_neon
+        movrel          x4,  idct_coeffs
+        ld1             {v0.4h}, [x4]
+        sxtl            v0.4s,  v0.4h
+
+        movi            v1.4h,  #0
+
+        ld1             {v2.s}[0],  [x2]
+        smull           v2.2d,  v2.2s,  v0.s[0]
+        rshrn           v2.2s,  v2.2d,  #14
+        smull           v2.2d,  v2.2s,  v0.s[0]
+        rshrn           v2.2s,  v2.2d,  #14
+        st1             {v1.s}[0],  [x2]
+        dup             v2.4s,  v2.s[0]
+
+        srshr           v0.4s,  v2.4s,  #6
+
+        mov             x3, x0
+        mov             x4, #16
+        dup             v31.8h, w13
+1:
+        // Loop to add the constant from v2 into all 16x16 outputs
+        subs            x4,  x4,  #2
+        ld1             {v1.8h,v2.8h},  [x0], x1
+        uaddw           v16.4s, v0.4s,  v1.4h
+        uaddw2          v17.4s, v0.4s,  v1.8h
+        ld1             {v3.8h,v4.8h},  [x0], x1
+        uaddw           v18.4s, v0.4s,  v2.4h
+        uaddw2          v19.4s, v0.4s,  v2.8h
+        uaddw           v20.4s, v0.4s,  v3.4h
+        uaddw2          v21.4s, v0.4s,  v3.8h
+        uaddw           v22.4s, v0.4s,  v4.4h
+        uaddw2          v23.4s, v0.4s,  v4.8h
+        sqxtun          v1.4h,  v16.4s
+        sqxtun2         v1.8h,  v17.4s
+        sqxtun          v2.4h,  v18.4s
+        sqxtun2         v2.8h,  v19.4s
+        sqxtun          v3.4h,  v20.4s
+        sqxtun2         v3.8h,  v21.4s
+        sqxtun          v4.4h,  v22.4s
+        sqxtun2         v4.8h,  v23.4s
+        umin            v1.8h,  v1.8h,  v31.8h
+        umin            v2.8h,  v2.8h,  v31.8h
+        st1             {v1.8h,v2.8h},  [x3], x1
+        umin            v3.8h,  v3.8h,  v31.8h
+        umin            v4.8h,  v4.8h,  v31.8h
+        st1             {v3.8h,v4.8h},  [x3], x1
+        b.ne            1b
+
+        ret
+endfunc
+
+.macro idct16_end
+        butterfly_4s    v18, v7,  v4,  v7                // v18 = t0a,  v7  = t7a
+        butterfly_4s    v19, v22, v5,  v22               // v19 = t1a,  v22 = t6
+        butterfly_4s    v4,  v26, v20, v26               // v4  = t2a,  v26 = t5
+        butterfly_4s    v5,  v6,  v28, v6                // v5  = t3a,  v6  = t4
+        butterfly_4s    v20, v28, v16, v24               // v20 = t8a,  v28 = t11a
+        butterfly_4s    v24, v21, v23, v21               // v24 = t9,   v21 = t10
+        butterfly_4s    v23, v27, v25, v27               // v23 = t14,  v27 = t13
+        butterfly_4s    v25, v29, v29, v17               // v25 = t15a, v29 = t12a
+
+        dmbutterfly0    v8,  v9,  v27, v21, v8,  v9,  v16, v17, v30, v31 // v8  = t13a, v9  = t10a
+        dmbutterfly0    v28, v27, v29, v28, v21, v29, v16, v17, v30, v31 // v28 = t12,  v27 = t11
+
+        butterfly_4s    v16, v31, v18, v25               // v16 = out[0], v31 = out[15]
+        butterfly_4s    v17, v30, v19, v23               // v17 = out[1], v30 = out[14]
+        butterfly_4s_r  v25, v22, v22, v24               // v25 = out[9], v22 = out[6]
+        butterfly_4s    v23, v24, v7,  v20               // v23 = out[7], v24 = out[8]
+        butterfly_4s    v18, v29, v4,  v8                // v18 = out[2], v29 = out[13]
+        butterfly_4s    v19, v28, v5,  v28               // v19 = out[3], v28 = out[12]
+        butterfly_4s    v20, v27, v6,  v27               // v20 = out[4], v27 = out[11]
+        butterfly_4s    v21, v26, v26, v9                // v21 = out[5], v26 = out[10]
+        ret
+.endm
+
+function idct16
+        dmbutterfly0    v16, v24, v16, v24, v4, v5, v6, v7, v8, v9 // v16 = t0a,  v24 = t1a
+        dmbutterfly     v20, v28, v0.s[2], v0.s[3], v4, v5, v6, v7 // v20 = t2a,  v28 = t3a
+        dmbutterfly     v18, v30, v1.s[0], v1.s[1], v4, v5, v6, v7 // v18 = t4a,  v30 = t7a
+        dmbutterfly     v26, v22, v1.s[2], v1.s[3], v4, v5, v6, v7 // v26 = t5a,  v22 = t6a
+        dmbutterfly     v17, v31, v2.s[0], v2.s[1], v4, v5, v6, v7 // v17 = t8a,  v31 = t15a
+        dmbutterfly     v25, v23, v2.s[2], v2.s[3], v4, v5, v6, v7 // v25 = t9a,  v23 = t14a
+        dmbutterfly     v21, v27, v3.s[0], v3.s[1], v4, v5, v6, v7 // v21 = t10a, v27 = t13a
+        dmbutterfly     v29, v19, v3.s[2], v3.s[3], v4, v5, v6, v7 // v29 = t11a, v19 = t12a
+
+        butterfly_4s    v4,  v28, v16, v28               // v4  = t0,   v28 = t3
+        butterfly_4s    v5,  v20, v24, v20               // v5  = t1,   v20 = t2
+        butterfly_4s    v6,  v26, v18, v26               // v6  = t4,   v26 = t5
+        butterfly_4s    v7,  v22, v30, v22               // v7  = t7,   v22 = t6
+        butterfly_4s    v16, v25, v17, v25               // v16 = t8,   v25 = t9
+        butterfly_4s    v24, v21, v29, v21               // v24 = t11,  v21 = t10
+        butterfly_4s    v17, v27, v19, v27               // v17 = t12,  v27 = t13
+        butterfly_4s    v29, v23, v31, v23               // v29 = t15,  v23 = t14
+
+        dmbutterfly0    v22, v26, v22, v26, v8, v9, v18, v19, v30, v31        // v22 = t6a,  v26 = t5a
+        dmbutterfly     v23, v25, v0.s[2], v0.s[3], v18, v19, v30, v31        // v23 = t9a,  v25 = t14a
+        dmbutterfly     v27, v21, v0.s[2], v0.s[3], v18, v19, v30, v31, neg=1 // v27 = t13a, v21 = t10a
+        idct16_end
+endfunc
+
+function idct16_half
+        dmbutterfly0_h  v16, v24, v16, v24, v4, v5, v6, v7, v8, v9 // v16 = t0a,  v24 = t1a
+        dmbutterfly_h1  v20, v28, v0.s[2], v0.s[3], v4, v5, v6, v7 // v20 = t2a,  v28 = t3a
+        dmbutterfly_h1  v18, v30, v1.s[0], v1.s[1], v4, v5, v6, v7 // v18 = t4a,  v30 = t7a
+        dmbutterfly_h2  v26, v22, v1.s[2], v1.s[3], v4, v5, v6, v7 // v26 = t5a,  v22 = t6a
+        dmbutterfly_h1  v17, v31, v2.s[0], v2.s[1], v4, v5, v6, v7 // v17 = t8a,  v31 = t15a
+        dmbutterfly_h2  v25, v23, v2.s[2], v2.s[3], v4, v5, v6, v7 // v25 = t9a,  v23 = t14a
+        dmbutterfly_h1  v21, v27, v3.s[0], v3.s[1], v4, v5, v6, v7 // v21 = t10a, v27 = t13a
+        dmbutterfly_h2  v29, v19, v3.s[2], v3.s[3], v4, v5, v6, v7 // v29 = t11a, v19 = t12a
+
+        butterfly_4s    v4,  v28, v16, v28               // v4  = t0,   v28 = t3
+        butterfly_4s    v5,  v20, v24, v20               // v5  = t1,   v20 = t2
+        butterfly_4s    v6,  v26, v18, v26               // v6  = t4,   v26 = t5
+        butterfly_4s    v7,  v22, v30, v22               // v7  = t7,   v22 = t6
+        butterfly_4s    v16, v25, v17, v25               // v16 = t8,   v25 = t9
+        butterfly_4s    v24, v21, v29, v21               // v24 = t11,  v21 = t10
+        butterfly_4s    v17, v27, v19, v27               // v17 = t12,  v27 = t13
+        butterfly_4s    v29, v23, v31, v23               // v29 = t15,  v23 = t14
+
+        dmbutterfly0    v22, v26, v22, v26, v8, v9, v18, v19, v30, v31        // v22 = t6a,  v26 = t5a
+        dmbutterfly     v23, v25, v0.s[2], v0.s[3], v18, v19, v30, v31        // v23 = t9a,  v25 = t14a
+        dmbutterfly     v27, v21, v0.s[2], v0.s[3], v18, v19, v30, v31, neg=1 // v27 = t13a, v21 = t10a
+        idct16_end
+endfunc
+
+function idct16_quarter
+        dsmull_h        v24, v25, v19, v3.s[3]
+        dsmull_h        v4,  v5,  v17, v2.s[0]
+        dsmull_h        v7,  v6,  v18, v1.s[1]
+        dsmull_h        v30, v31, v18, v1.s[0]
+        neg             v24.2d,  v24.2d
+        neg             v25.2d,  v25.2d
+        dsmull_h        v29, v28, v17, v2.s[1]
+        dsmull_h        v26, v27, v19, v3.s[2]
+        dsmull_h        v22, v23, v16, v0.s[0]
+        drshrn_h        v24, v24, v25, #14
+        drshrn_h        v16, v4,  v5,  #14
+        drshrn_h        v7,  v7,  v6,  #14
+        drshrn_h        v6,  v30, v31, #14
+        drshrn_h        v29, v29, v28, #14
+        drshrn_h        v17, v26, v27, #14
+        drshrn_h        v28, v22, v23, #14
+
+        dmbutterfly_l   v20, v21, v22, v23, v17, v24, v0.s[2], v0.s[3]
+        dmbutterfly_l   v18, v19, v30, v31, v29, v16, v0.s[2], v0.s[3]
+        neg             v22.2d,  v22.2d
+        neg             v23.2d,  v23.2d
+        drshrn_h        v27, v20, v21, #14
+        drshrn_h        v21, v22, v23, #14
+        drshrn_h        v23, v18, v19, #14
+        drshrn_h        v25, v30, v31, #14
+        mov             v4.16b,  v28.16b
+        mov             v5.16b,  v28.16b
+        dmbutterfly0    v22, v26, v7,  v6,  v18, v19, v30, v31
+        mov             v20.16b, v28.16b
+        idct16_end
+endfunc
+
+function iadst16
+        ld1             {v0.8h,v1.8h}, [x11]
+        sxtl            v2.4s,  v1.4h
+        sxtl2           v3.4s,  v1.8h
+        sxtl2           v1.4s,  v0.8h
+        sxtl            v0.4s,  v0.4h
+
+        dmbutterfly_l   v6,  v7,  v4,  v5,  v31, v16, v0.s[1], v0.s[0]   // v6,v7   = t1,   v4,v5   = t0
+        dmbutterfly_l   v10, v11, v8,  v9,  v23, v24, v1.s[1], v1.s[0]   // v10,v11 = t9,   v8,v9   = t8
+        dbutterfly_n    v31, v24, v6,  v7,  v10, v11, v12, v13, v10, v11 // v31     = t1a,  v24     = t9a
+        dmbutterfly_l   v14, v15, v12, v13, v29, v18, v0.s[3], v0.s[2]   // v14,v15 = t3,   v12,v13 = t2
+        dbutterfly_n    v16, v23, v4,  v5,  v8,  v9,  v6,  v7,  v8,  v9  // v16     = t0a,  v23     = t8a
+
+        dmbutterfly_l   v6,  v7,  v4,  v5,  v21, v26, v1.s[3], v1.s[2]   // v6,v7   = t11,  v4,v5   = t10
+        dbutterfly_n    v29, v26, v14, v15, v6,  v7,  v8,  v9,  v6,  v7  // v29     = t3a,  v26     = t11a
+        dmbutterfly_l   v10, v11, v8,  v9,  v27, v20, v2.s[1], v2.s[0]   // v10,v11 = t5,   v8,v9   = t4
+        dbutterfly_n    v18, v21, v12, v13, v4,  v5,  v6,  v7,  v4,  v5  // v18     = t2a,  v21     = t10a
+
+        dmbutterfly_l   v14, v15, v12, v13, v19, v28, v3.s[1], v3.s[0]   // v14,v15 = t13,  v12,v13 = t12
+        dbutterfly_n    v20, v28, v10, v11, v14, v15, v4,  v5,  v14, v15 // v20     = t5a,  v28     = t13a
+        dmbutterfly_l   v6,  v7,  v4,  v5,  v25, v22, v2.s[3], v2.s[2]   // v6,v7   = t7,   v4,v5   = t6
+        dbutterfly_n    v27, v19, v8,  v9,  v12, v13, v10, v11, v12, v13 // v27     = t4a,  v19     = t12a
+
+        dmbutterfly_l   v10, v11, v8,  v9,  v17, v30, v3.s[3], v3.s[2]   // v10,v11 = t15,  v8,v9   = t14
+        ld1             {v0.8h}, [x10]
+        dbutterfly_n    v22, v30, v6,  v7,  v10, v11, v12, v13, v10, v11 // v22     = t7a,  v30     = t15a
+        sxtl2           v1.4s,  v0.8h
+        sxtl            v0.4s,  v0.4h
+        dmbutterfly_l   v14, v15, v12, v13, v23, v24, v1.s[0], v1.s[1]   // v14,v15 = t9,   v12,v13 = t8
+        dbutterfly_n    v25, v17, v4,  v5,  v8,  v9,  v6,  v7,  v8,  v9  // v25     = t6a,  v17     = t14a
+
+        dmbutterfly_l   v4,  v5,  v6,  v7,  v28, v19, v1.s[1], v1.s[0]   // v4,v5   = t12,  v6,v7   = t13
+        dbutterfly_n    v23, v19, v12, v13, v4,  v5,  v8,  v9,  v4,  v5  // v23     = t8a,  v19     = t12a
+        dmbutterfly_l   v10, v11, v8,  v9,  v21, v26, v1.s[2], v1.s[3]   // v10,v11 = t11,  v8,v9   = t10
+        butterfly_4s_r  v4,  v27, v16, v27               // v4  = t4,   v27 = t0
+        dbutterfly_n    v24, v28, v14, v15, v6,  v7,  v12, v13, v6,  v7  // v24     = t9a,  v28     = t13a
+
+        dmbutterfly_l   v12, v13, v14, v15, v30, v17, v1.s[3], v1.s[2]   // v12,v13 = t14,  v14,v15 = t15
+        butterfly_4s_r  v5,  v20, v31, v20               // v5  = t5, v20 = t1
+        dbutterfly_n    v21, v17, v8,  v9,  v12, v13, v6,  v7,  v12, v13 // v21     = t10a, v17     = t14a
+        dbutterfly_n    v26, v30, v10, v11, v14, v15, v8,  v9,  v14, v15 // v26     = t11a, v30     = t15a
+
+        butterfly_4s_r  v6,  v25, v18, v25               // v6  = t6, v25 = t2
+        butterfly_4s_r  v7,  v22, v29, v22               // v7  = t7, v22 = t3
+
+        dmbutterfly_l   v10, v11, v8,  v9,  v19, v28, v0.s[2], v0.s[3]   // v10,v11 = t13,  v8,v9   = t12
+        dmbutterfly_l   v12, v13, v14, v15, v30, v17, v0.s[3], v0.s[2]   // v12,v13 = t14,  v14,v15 = t15
+
+        dbutterfly_n    v18, v30, v8,  v9,  v12, v13, v16, v17, v12, v13 // v18   = out[2], v30     = t14a
+        dbutterfly_n    v29, v17, v10, v11, v14, v15, v12, v13, v14, v15 // v29 = -out[13], v17     = t15a
+        neg             v29.4s, v29.4s                   // v29 = out[13]
+
+        dmbutterfly_l   v10, v11, v8,  v9,  v4,  v5,  v0.s[2], v0.s[3]   // v10,v11 = t5a,  v8,v9   = t4a
+        dmbutterfly_l   v12, v13, v14, v15, v7,  v6,  v0.s[3], v0.s[2]   // v12,v13 = t6a,  v14,v15 = t7a
+
+        butterfly_4s    v2,  v6,  v27, v25               // v2 = out[0], v6 = t2a
+        butterfly_4s    v3,  v7,  v23, v21               // v3 =-out[1], v7 = t10
+
+        dbutterfly_n    v19, v31, v8,  v9,  v12, v13, v4,  v5,  v8,  v9  // v19 = -out[3],  v31 = t6
+        neg             v19.4s, v19.4s                   // v19 = out[3]
+        dbutterfly_n    v28, v16, v10, v11, v14, v15, v4,  v5,  v10, v11 // v28 = out[12],  v16 = t7
+
+        butterfly_4s    v5,  v8,  v20, v22               // v5 =-out[15],v8 = t3a
+        butterfly_4s    v4,  v9,  v24, v26               // v4 = out[14],v9 = t11
+
+        dmbutterfly0    v23, v24, v6,  v8,  v10, v11, v12, v13, v14, v15, 1 // v23 = out[7], v24 = out[8]
+        dmbutterfly0    v21, v26, v30, v17, v10, v11, v12, v13, v14, v15, 1 // v21 = out[5], v26 = out[10]
+        dmbutterfly0    v20, v27, v16, v31, v10, v11, v12, v13, v14, v15    // v20 = out[4], v27 = out[11]
+        dmbutterfly0    v22, v25, v9,  v7,  v10, v11, v12, v13, v14, v15    // v22 = out[6], v25 = out[9]
+
+        neg             v31.4s,  v5.4s                    // v31 = out[15]
+        neg             v17.4s,  v3.4s                    // v17 = out[1]
+
+        mov             v16.16b, v2.16b
+        mov             v30.16b, v4.16b
+        ret
+endfunc
+
+// Helper macros; we can't use these expressions directly within
+// e.g. .irp due to the extra concatenation \(). Therefore wrap
+// them in macros to allow using .irp below.
+.macro load i, src, inc
+        ld1             {v\i\().4s},  [\src], \inc
+.endm
+.macro store i, dst, inc
+        st1             {v\i\().4s},  [\dst], \inc
+.endm
+.macro movi_v i, size, imm
+        movi            v\i\()\size,  \imm
+.endm
+.macro load_clear i, src, inc
+        ld1             {v\i\().4s}, [\src]
+        st1             {v4.4s},  [\src], \inc
+.endm
+
+.macro load_add_store coef0, coef1, coef2, coef3, coef4, coef5, coef6, coef7
+        srshr           \coef0, \coef0, #6
+        ld1             {v4.4h},   [x0], x1
+        srshr           \coef1, \coef1, #6
+        ld1             {v4.d}[1], [x3], x1
+        srshr           \coef2, \coef2, #6
+        ld1             {v5.4h},   [x0], x1
+        srshr           \coef3, \coef3, #6
+        uaddw           \coef0, \coef0, v4.4h
+        ld1             {v5.d}[1], [x3], x1
+        srshr           \coef4, \coef4, #6
+        uaddw2          \coef1, \coef1, v4.8h
+        ld1             {v6.4h},   [x0], x1
+        srshr           \coef5, \coef5, #6
+        uaddw           \coef2, \coef2, v5.4h
+        ld1             {v6.d}[1], [x3], x1
+        sqxtun          v4.4h,  \coef0
+        srshr           \coef6, \coef6, #6
+        uaddw2          \coef3, \coef3, v5.8h
+        ld1             {v7.4h},   [x0], x1
+        sqxtun2         v4.8h,  \coef1
+        srshr           \coef7, \coef7, #6
+        uaddw           \coef4, \coef4, v6.4h
+        ld1             {v7.d}[1], [x3], x1
+        umin            v4.8h,  v4.8h,  v8.8h
+        sub             x0,  x0,  x1, lsl #2
+        sub             x3,  x3,  x1, lsl #2
+        sqxtun          v5.4h,  \coef2
+        uaddw2          \coef5, \coef5, v6.8h
+        st1             {v4.4h},   [x0], x1
+        sqxtun2         v5.8h,  \coef3
+        uaddw           \coef6, \coef6, v7.4h
+        st1             {v4.d}[1], [x3], x1
+        umin            v5.8h,  v5.8h,  v8.8h
+        sqxtun          v6.4h,  \coef4
+        uaddw2          \coef7, \coef7, v7.8h
+        st1             {v5.4h},   [x0], x1
+        sqxtun2         v6.8h,  \coef5
+        st1             {v5.d}[1], [x3], x1
+        umin            v6.8h,  v6.8h,  v8.8h
+        sqxtun          v7.4h,  \coef6
+        st1             {v6.4h},   [x0], x1
+        sqxtun2         v7.8h,  \coef7
+        st1             {v6.d}[1], [x3], x1
+        umin            v7.8h,  v7.8h,  v8.8h
+        st1             {v7.4h},   [x0], x1
+        st1             {v7.d}[1], [x3], x1
+.endm
+
+// Read a vertical 4x16 slice out of a 16x16 matrix, do a transform on it,
+// transpose into a horizontal 16x4 slice and store.
+// x0 = dst (temp buffer)
+// x1 = slice offset
+// x2 = src
+// x9 = input stride
+.macro itxfm16_1d_funcs txfm
+function \txfm\()16_1d_4x16_pass1_neon
+        mov             x14, x30
+
+        movi            v4.4s, #0
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        load_clear      \i,  x2,  x9
+.endr
+
+        bl              \txfm\()16
+
+        // Do four 4x4 transposes. Originally, v16-v31 contain the
+        // 16 rows. Afterwards, v16-v19, v20-v23, v24-v27 and v28-v31
+        // contain the four transposed 4x4 blocks.
+        transpose_4x4s  v16, v17, v18, v19, v4, v5, v6, v7
+        transpose_4x4s  v20, v21, v22, v23, v4, v5, v6, v7
+        transpose_4x4s  v24, v25, v26, v27, v4, v5, v6, v7
+        transpose_4x4s  v28, v29, v30, v31, v4, v5, v6, v7
+
+        // Store the transposed 4x4 blocks horizontally.
+        cmp             x1,  #12
+        b.eq            1f
+.irp i, 16, 20, 24, 28, 17, 21, 25, 29, 18, 22, 26, 30, 19, 23, 27, 31
+        store           \i,  x0,  #16
+.endr
+        ret             x14
+1:
+        // Special case: For the last input column (x1 == 12),
+        // which would be stored as the last row in the temp buffer,
+        // don't store the first 4x4 block, but keep it in registers
+        // for the first slice of the second pass (where it is the
+        // last 4x4 block).
+        add             x0,  x0,  #16
+        st1             {v20.4s},  [x0], #16
+        st1             {v24.4s},  [x0], #16
+        st1             {v28.4s},  [x0], #16
+        add             x0,  x0,  #16
+        st1             {v21.4s},  [x0], #16
+        st1             {v25.4s},  [x0], #16
+        st1             {v29.4s},  [x0], #16
+        add             x0,  x0,  #16
+        st1             {v22.4s},  [x0], #16
+        st1             {v26.4s},  [x0], #16
+        st1             {v30.4s},  [x0], #16
+        add             x0,  x0,  #16
+        st1             {v23.4s},  [x0], #16
+        st1             {v27.4s},  [x0], #16
+        st1             {v31.4s},  [x0], #16
+
+        mov             v28.16b, v16.16b
+        mov             v29.16b, v17.16b
+        mov             v30.16b, v18.16b
+        mov             v31.16b, v19.16b
+        ret             x14
+endfunc
+
+// Read a vertical 4x16 slice out of a 16x16 matrix, do a transform on it,
+// load the destination pixels (from a similar 4x16 slice), add and store back.
+// x0 = dst
+// x1 = dst stride
+// x2 = src (temp buffer)
+// x3 = slice offset
+// x9 = temp buffer stride
+function \txfm\()16_1d_4x16_pass2_neon
+        mov             x14, x30
+
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27
+        load            \i,  x2,  x9
+.endr
+        cbz             x3,  1f
+.irp i, 28, 29, 30, 31
+        load            \i,  x2,  x9
+.endr
+1:
+
+        add             x3,  x0,  x1
+        lsl             x1,  x1,  #1
+        bl              \txfm\()16
+
+        dup             v8.8h, w13
+        load_add_store  v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s
+        load_add_store  v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
+
+        ret             x14
+endfunc
+.endm
+
+itxfm16_1d_funcs idct
+itxfm16_1d_funcs iadst
+
+// This is the minimum eob value for each subpartition, in increments of 4
+const min_eob_idct_idct_16, align=4
+        .short  0, 10, 38, 89
+endconst
+
+.macro itxfm_func16x16 txfm1, txfm2
+function vp9_\txfm1\()_\txfm2\()_16x16_add_16_neon
+.ifc \txfm1\()_\txfm2,idct_idct
+        cmp             w3,  #1
+        b.eq            idct16x16_dc_add_neon
+.endif
+        mov             x15, x30
+        // iadst16 requires clobbering v8-v15, idct16 only clobbers v8-v9.
+.ifnc \txfm1\()_\txfm2,idct_idct
+        stp             d14, d15, [sp, #-0x10]!
+        stp             d12, d13, [sp, #-0x10]!
+        stp             d10, d11, [sp, #-0x10]!
+.endif
+        stp             d8,  d9,  [sp, #-0x10]!
+
+        sub             sp,  sp,  #1024
+
+        mov             x4,  x0
+        mov             x5,  x1
+        mov             x6,  x2
+
+        movrel          x10, idct_coeffs
+.ifnc \txfm1\()_\txfm2,idct_idct
+        movrel          x11, iadst16_coeffs
+.endif
+.ifc \txfm1,idct
+        ld1             {v0.8h,v1.8h}, [x10]
+        sxtl            v2.4s,  v1.4h
+        sxtl2           v3.4s,  v1.8h
+        sxtl2           v1.4s,  v0.8h
+        sxtl            v0.4s,  v0.4h
+.endif
+        mov             x9,  #64
+
+.ifc \txfm1\()_\txfm2,idct_idct
+        cmp             w3,  #10
+        b.le            idct16x16_quarter_add_16_neon
+        cmp             w3,  #38
+        b.le            idct16x16_half_add_16_neon
+
+        movrel          x12, min_eob_idct_idct_16, 2
+.endif
+
+.irp i, 0, 4, 8, 12
+        add             x0,  sp,  #(\i*64)
+.ifc \txfm1\()_\txfm2,idct_idct
+.if \i > 0
+        ldrh            w1,  [x12], #2
+        cmp             w3,  w1
+        mov             x1,  #(16 - \i)/4
+        b.le            1f
+.endif
+.endif
+        mov             x1,  #\i
+        add             x2,  x6,  #(\i*4)
+        bl              \txfm1\()16_1d_4x16_pass1_neon
+.endr
+.ifc \txfm1\()_\txfm2,iadst_idct
+        ld1             {v0.8h,v1.8h}, [x10]
+        sxtl            v2.4s,  v1.4h
+        sxtl2           v3.4s,  v1.8h
+        sxtl2           v1.4s,  v0.8h
+        sxtl            v0.4s,  v0.4h
+.endif
+
+.ifc \txfm1\()_\txfm2,idct_idct
+        b               3f
+1:
+        // Set v28-v31 to zero, for the in-register passthrough of
+        // coefficients to pass 2.
+        movi            v28.4s,  #0
+        movi            v29.4s,  #0
+        movi            v30.4s,  #0
+        movi            v31.4s,  #0
+2:
+        subs            x1,  x1,  #1
+.rept 4
+        st1             {v28.4s,v29.4s,v30.4s,v31.4s}, [x0], x9
+.endr
+        b.ne            2b
+3:
+.endif
+
+.irp i, 0, 4, 8, 12
+        add             x0,  x4,  #(\i*2)
+        mov             x1,  x5
+        add             x2,  sp,  #(\i*4)
+        mov             x3,  #\i
+        bl              \txfm2\()16_1d_4x16_pass2_neon
+.endr
+
+        add             sp,  sp,  #1024
+        ldp             d8,  d9,  [sp], 0x10
+.ifnc \txfm1\()_\txfm2,idct_idct
+        ldp             d10, d11, [sp], 0x10
+        ldp             d12, d13, [sp], 0x10
+        ldp             d14, d15, [sp], 0x10
+.endif
+        ret             x15
+endfunc
+
+function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_10_neon, export=1
+        mov             x13, #0x03ff
+        b               vp9_\txfm1\()_\txfm2\()_16x16_add_16_neon
+endfunc
+
+function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_12_neon, export=1
+        mov             x13, #0x0fff
+        b               vp9_\txfm1\()_\txfm2\()_16x16_add_16_neon
+endfunc
+.endm
+
+itxfm_func16x16 idct,  idct
+itxfm_func16x16 iadst, idct
+itxfm_func16x16 idct,  iadst
+itxfm_func16x16 iadst, iadst
+
+function idct16_1d_4x16_pass1_quarter_neon
+        mov             x14, x30
+
+        movi            v4.4s, #0
+.irp i, 16, 17, 18, 19
+        load_clear      \i,  x2,  x9
+.endr
+
+        bl              idct16_quarter
+
+        // Do four 4x4 transposes. Originally, v16-v31 contain the
+        // 16 rows. Afterwards, v16-v19, v20-v23, v24-v27 and v28-v31
+        // contain the four transposed 4x4 blocks.
+        transpose_4x4s  v16, v17, v18, v19, v4, v5, v6, v7
+        transpose_4x4s  v20, v21, v22, v23, v4, v5, v6, v7
+        transpose_4x4s  v24, v25, v26, v27, v4, v5, v6, v7
+        transpose_4x4s  v28, v29, v30, v31, v4, v5, v6, v7
+
+        // Store the transposed 4x4 blocks horizontally.
+        // The first 4x4 block is kept in registers for the second pass,
+        // store the rest in the temp buffer.
+        add             x0,  x0,  #16
+        st1             {v20.4s},  [x0], #16
+        st1             {v24.4s},  [x0], #16
+        st1             {v28.4s},  [x0], #16
+        add             x0,  x0,  #16
+        st1             {v21.4s},  [x0], #16
+        st1             {v25.4s},  [x0], #16
+        st1             {v29.4s},  [x0], #16
+        add             x0,  x0,  #16
+        st1             {v22.4s},  [x0], #16
+        st1             {v26.4s},  [x0], #16
+        st1             {v30.4s},  [x0], #16
+        add             x0,  x0,  #16
+        st1             {v23.4s},  [x0], #16
+        st1             {v27.4s},  [x0], #16
+        st1             {v31.4s},  [x0], #16
+        ret             x14
+endfunc
+
+function idct16_1d_4x16_pass2_quarter_neon
+        mov             x14, x30
+
+        // Only load the top 4 lines, and only do it for the later slices.
+        // For the first slice, d16-d19 is kept in registers from the first pass.
+        cbz             x3,  1f
+.irp i, 16, 17, 18, 19
+        load            \i,  x2,  x9
+.endr
+1:
+
+        add             x3,  x0,  x1
+        lsl             x1,  x1,  #1
+        bl              idct16_quarter
+
+        dup             v8.8h, w13
+        load_add_store  v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s
+        load_add_store  v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
+
+        ret             x14
+endfunc
+
+function idct16_1d_4x16_pass1_half_neon
+        mov             x14, x30
+
+        movi            v4.4s, #0
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23
+        load_clear      \i,  x2,  x9
+.endr
+
+        bl              idct16_half
+
+        // Do four 4x4 transposes. Originally, v16-v31 contain the
+        // 16 rows. Afterwards, v16-v19, v20-v23, v24-v27 and v28-v31
+        // contain the four transposed 4x4 blocks.
+        transpose_4x4s  v16, v17, v18, v19, v4, v5, v6, v7
+        transpose_4x4s  v20, v21, v22, v23, v4, v5, v6, v7
+        transpose_4x4s  v24, v25, v26, v27, v4, v5, v6, v7
+        transpose_4x4s  v28, v29, v30, v31, v4, v5, v6, v7
+
+        // Store the transposed 4x4 blocks horizontally.
+        cmp             x1,  #4
+        b.eq            1f
+.irp i, 16, 20, 24, 28, 17, 21, 25, 29, 18, 22, 26, 30, 19, 23, 27, 31
+        store           \i,  x0,  #16
+.endr
+        ret             x14
+1:
+        // Special case: For the second input column (r1 == 4),
+        // which would be stored as the second row in the temp buffer,
+        // don't store the first 4x4 block, but keep it in registers
+        // for the first slice of the second pass (where it is the
+        // second 4x4 block).
+        add             x0,  x0,  #16
+        st1             {v20.4s},  [x0], #16
+        st1             {v24.4s},  [x0], #16
+        st1             {v28.4s},  [x0], #16
+        add             x0,  x0,  #16
+        st1             {v21.4s},  [x0], #16
+        st1             {v25.4s},  [x0], #16
+        st1             {v29.4s},  [x0], #16
+        add             x0,  x0,  #16
+        st1             {v22.4s},  [x0], #16
+        st1             {v26.4s},  [x0], #16
+        st1             {v30.4s},  [x0], #16
+        add             x0,  x0,  #16
+        st1             {v23.4s},  [x0], #16
+        st1             {v27.4s},  [x0], #16
+        st1             {v31.4s},  [x0], #16
+
+        mov             v20.16b, v16.16b
+        mov             v21.16b, v17.16b
+        mov             v22.16b, v18.16b
+        mov             v23.16b, v19.16b
+        ret             x14
+endfunc
+
+function idct16_1d_4x16_pass2_half_neon
+        mov             x14, x30
+
+.irp i, 16, 17, 18, 19
+        load            \i,  x2,  x9
+.endr
+        cbz             x3,  1f
+.irp i, 20, 21, 22, 23
+        load            \i,  x2,  x9
+.endr
+1:
+
+        add             x3,  x0,  x1
+        lsl             x1,  x1,  #1
+        bl              idct16_half
+
+        dup             v8.8h, w13
+        load_add_store  v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s
+        load_add_store  v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
+
+        ret             x14
+endfunc
+
+.macro idct16_partial size
+function idct16x16_\size\()_add_16_neon
+        add             x0,  sp,  #(0*64)
+        mov             x1,  #0
+        add             x2,  x6,  #(0*4)
+        bl              idct16_1d_4x16_pass1_\size\()_neon
+.ifc \size,half
+        add             x0,  sp,  #(4*64)
+        mov             x1,  #4
+        add             x2,  x6,  #(4*4)
+        bl              idct16_1d_4x16_pass1_\size\()_neon
+.endif
+
+.irp i, 0, 4, 8, 12
+        add             x0,  x4,  #(\i*2)
+        mov             x1,  x5
+        add             x2,  sp,  #(\i*4)
+        mov             x3,  #\i
+        bl              idct16_1d_4x16_pass2_\size\()_neon
+.endr
+
+        add             sp,  sp,  #1024
+        ldp             d8,  d9,  [sp], 0x10
+        ret             x15
+endfunc
+.endm
+
+idct16_partial quarter
+idct16_partial half
+
+function idct32x32_dc_add_neon
+        movrel          x4,  idct_coeffs
+        ld1             {v0.4h}, [x4]
+        sxtl            v0.4s,  v0.4h
+
+        movi            v1.4h,  #0
+
+        ld1             {v2.s}[0],  [x2]
+        smull           v2.2d,  v2.2s,  v0.s[0]
+        rshrn           v2.2s,  v2.2d,  #14
+        smull           v2.2d,  v2.2s,  v0.s[0]
+        rshrn           v2.2s,  v2.2d,  #14
+        st1             {v1.s}[0],  [x2]
+        dup             v2.4s,  v2.s[0]
+
+        srshr           v0.4s,  v2.4s,  #6
+
+        mov             x3,  x0
+        mov             x4,  #32
+        sub             x1,  x1,  #32
+        dup             v31.8h, w13
+1:
+        // Loop to add the constant v0 into all 32x32 outputs
+        subs            x4,  x4,  #1
+        ld1             {v1.8h,v2.8h},  [x0], #32
+        uaddw           v16.4s, v0.4s,  v1.4h
+        uaddw2          v17.4s, v0.4s,  v1.8h
+        ld1             {v3.8h,v4.8h},  [x0], x1
+        uaddw           v18.4s, v0.4s,  v2.4h
+        uaddw2          v19.4s, v0.4s,  v2.8h
+        uaddw           v20.4s, v0.4s,  v3.4h
+        uaddw2          v21.4s, v0.4s,  v3.8h
+        uaddw           v22.4s, v0.4s,  v4.4h
+        uaddw2          v23.4s, v0.4s,  v4.8h
+        sqxtun          v1.4h,  v16.4s
+        sqxtun2         v1.8h,  v17.4s
+        sqxtun          v2.4h,  v18.4s
+        sqxtun2         v2.8h,  v19.4s
+        sqxtun          v3.4h,  v20.4s
+        sqxtun2         v3.8h,  v21.4s
+        sqxtun          v4.4h,  v22.4s
+        sqxtun2         v4.8h,  v23.4s
+        umin            v1.8h,  v1.8h,  v31.8h
+        umin            v2.8h,  v2.8h,  v31.8h
+        st1             {v1.8h,v2.8h},  [x3], #32
+        umin            v3.8h,  v3.8h,  v31.8h
+        umin            v4.8h,  v4.8h,  v31.8h
+        st1             {v3.8h,v4.8h},  [x3], x1
+        b.ne            1b
+
+        ret
+endfunc
+
+.macro idct32_end
+        butterfly_4s    v16, v5,  v4,  v5  // v16 = t16a, v5  = t19a
+        butterfly_4s    v17, v20, v23, v20 // v17 = t17,  v20 = t18
+        butterfly_4s    v18, v6,  v7,  v6  // v18 = t23a, v6  = t20a
+        butterfly_4s    v19, v21, v22, v21 // v19 = t22,  v21 = t21
+        butterfly_4s    v4,  v28, v28, v30 // v4  = t24a, v28 = t27a
+        butterfly_4s    v23, v26, v25, v26 // v23 = t25,  v26 = t26
+        butterfly_4s    v7,  v8,  v29, v31 // v7  = t31a, v3  = t28a
+        butterfly_4s    v22, v27, v24, v27 // v22 = t30,  v27 = t29
+
+        dmbutterfly     v27, v20, v0.s[2], v0.s[3], v24, v25, v30, v31        // v27 = t18a, v20 = t29a
+        dmbutterfly     v8,  v5,  v0.s[2], v0.s[3], v24, v25, v30, v31        // v3  = t19,  v5  = t28
+        dmbutterfly     v28, v6,  v0.s[2], v0.s[3], v24, v25, v30, v31, neg=1 // v28 = t27,  v6  = t20
+        dmbutterfly     v26, v21, v0.s[2], v0.s[3], v24, v25, v30, v31, neg=1 // v26 = t26a, v21 = t21a
+
+        butterfly_4s    v31, v24, v7,  v4  // v31 = t31,  v24 = t24
+        butterfly_4s    v30, v25, v22, v23 // v30 = t30a, v25 = t25a
+        butterfly_4s_r  v23, v16, v16, v18 // v23 = t23,  v16 = t16
+        butterfly_4s_r  v22, v17, v17, v19 // v22 = t22a, v17 = t17a
+        butterfly_4s    v18, v21, v27, v21 // v18 = t18,  v21 = t21
+        butterfly_4s_r  v27, v28, v5,  v28 // v27 = t27a, v28 = t28a
+        butterfly_4s    v29, v26, v20, v26 // v29 = t29,  v26 = t26
+        butterfly_4s    v19, v20, v8,  v6  // v19 = t19a, v20 = t20
+
+        dmbutterfly0    v27, v20, v27, v20, v4, v5, v6, v7, v8, v9 // v27 = t27,  v20 = t20
+        dmbutterfly0    v26, v21, v26, v21, v4, v5, v6, v7, v8, v9 // v26 = t26a, v21 = t21a
+        dmbutterfly0    v25, v22, v25, v22, v4, v5, v6, v7, v8, v9 // v25 = t25,  v22 = t22
+        dmbutterfly0    v24, v23, v24, v23, v4, v5, v6, v7, v8, v9 // v24 = t24a, v23 = t23a
+        ret
+.endm
+
+function idct32_odd
+        dmbutterfly     v16, v31, v10.s[0], v10.s[1], v4, v5, v6, v7 // v16 = t16a, v31 = t31a
+        dmbutterfly     v24, v23, v10.s[2], v10.s[3], v4, v5, v6, v7 // v24 = t17a, v23 = t30a
+        dmbutterfly     v20, v27, v11.s[0], v11.s[1], v4, v5, v6, v7 // v20 = t18a, v27 = t29a
+        dmbutterfly     v28, v19, v11.s[2], v11.s[3], v4, v5, v6, v7 // v28 = t19a, v19 = t28a
+        dmbutterfly     v18, v29, v12.s[0], v12.s[1], v4, v5, v6, v7 // v18 = t20a, v29 = t27a
+        dmbutterfly     v26, v21, v12.s[2], v12.s[3], v4, v5, v6, v7 // v26 = t21a, v21 = t26a
+        dmbutterfly     v22, v25, v13.s[0], v13.s[1], v4, v5, v6, v7 // v22 = t22a, v25 = t25a
+        dmbutterfly     v30, v17, v13.s[2], v13.s[3], v4, v5, v6, v7 // v30 = t23a, v17 = t24a
+
+        butterfly_4s    v4,  v24, v16, v24 // v4  = t16, v24 = t17
+        butterfly_4s    v5,  v20, v28, v20 // v5  = t19, v20 = t18
+        butterfly_4s    v6,  v26, v18, v26 // v6  = t20, v26 = t21
+        butterfly_4s    v7,  v22, v30, v22 // v7  = t23, v22 = t22
+        butterfly_4s    v28, v25, v17, v25 // v28 = t24, v25 = t25
+        butterfly_4s    v30, v21, v29, v21 // v30 = t27, v21 = t26
+        butterfly_4s    v29, v23, v31, v23 // v29 = t31, v23 = t30
+        butterfly_4s    v31, v27, v19, v27 // v31 = t28, v27 = t29
+
+        dmbutterfly     v23, v24, v1.s[0], v1.s[1], v16, v17, v18, v19        // v23 = t17a, v24 = t30a
+        dmbutterfly     v27, v20, v1.s[0], v1.s[1], v16, v17, v18, v19, neg=1 // v27 = t29a, v20 = t18a
+        dmbutterfly     v21, v26, v1.s[2], v1.s[3], v16, v17, v18, v19        // v21 = t21a, v26 = t26a
+        dmbutterfly     v25, v22, v1.s[2], v1.s[3], v16, v17, v18, v19, neg=1 // v25 = t25a, v22 = t22a
+        idct32_end
+endfunc
+
+function idct32_odd_half
+        dmbutterfly_h1  v16, v31, v10.s[0], v10.s[1], v4, v5, v6, v7 // v16 = t16a, v31 = t31a
+        dmbutterfly_h2  v24, v23, v10.s[2], v10.s[3], v4, v5, v6, v7 // v24 = t17a, v23 = t30a
+        dmbutterfly_h1  v20, v27, v11.s[0], v11.s[1], v4, v5, v6, v7 // v20 = t18a, v27 = t29a
+        dmbutterfly_h2  v28, v19, v11.s[2], v11.s[3], v4, v5, v6, v7 // v28 = t19a, v19 = t28a
+        dmbutterfly_h1  v18, v29, v12.s[0], v12.s[1], v4, v5, v6, v7 // v18 = t20a, v29 = t27a
+        dmbutterfly_h2  v26, v21, v12.s[2], v12.s[3], v4, v5, v6, v7 // v26 = t21a, v21 = t26a
+        dmbutterfly_h1  v22, v25, v13.s[0], v13.s[1], v4, v5, v6, v7 // v22 = t22a, v25 = t25a
+        dmbutterfly_h2  v30, v17, v13.s[2], v13.s[3], v4, v5, v6, v7 // v30 = t23a, v17 = t24a
+
+        butterfly_4s    v4,  v24, v16, v24 // v4  = t16, v24 = t17
+        butterfly_4s    v5,  v20, v28, v20 // v5  = t19, v20 = t18
+        butterfly_4s    v6,  v26, v18, v26 // v6  = t20, v26 = t21
+        butterfly_4s    v7,  v22, v30, v22 // v7  = t23, v22 = t22
+        butterfly_4s    v28, v25, v17, v25 // v28 = t24, v25 = t25
+        butterfly_4s    v30, v21, v29, v21 // v30 = t27, v21 = t26
+        butterfly_4s    v29, v23, v31, v23 // v29 = t31, v23 = t30
+        butterfly_4s    v31, v27, v19, v27 // v31 = t28, v27 = t29
+
+        dmbutterfly     v23, v24, v1.s[0], v1.s[1], v16, v17, v18, v19        // v23 = t17a, v24 = t30a
+        dmbutterfly     v27, v20, v1.s[0], v1.s[1], v16, v17, v18, v19, neg=1 // v27 = t29a, v20 = t18a
+        dmbutterfly     v21, v26, v1.s[2], v1.s[3], v16, v17, v18, v19        // v21 = t21a, v26 = t26a
+        dmbutterfly     v25, v22, v1.s[2], v1.s[3], v16, v17, v18, v19, neg=1 // v25 = t25a, v22 = t22a
+        idct32_end
+endfunc
+
+function idct32_odd_quarter
+        dsmull_h        v4,  v5,  v16, v10.s[0]
+        dsmull_h        v28, v29, v19, v11.s[3]
+        dsmull_h        v30, v31, v16, v10.s[1]
+        dsmull_h        v22, v23, v17, v13.s[2]
+        dsmull_h        v7,  v6,  v17, v13.s[3]
+        dsmull_h        v26, v27, v19, v11.s[2]
+        dsmull_h        v20, v21, v18, v12.s[0]
+        dsmull_h        v24, v25, v18, v12.s[1]
+
+        neg             v28.2d, v28.2d
+        neg             v29.2d, v29.2d
+        neg             v7.2d,  v7.2d
+        neg             v6.2d,  v6.2d
+
+        drshrn_h        v4,  v4,  v5,  #14
+        drshrn_h        v5,  v28, v29, #14
+        drshrn_h        v29, v30, v31, #14
+        drshrn_h        v28, v22, v23, #14
+        drshrn_h        v7,  v7,  v6,  #14
+        drshrn_h        v31, v26, v27, #14
+        drshrn_h        v6,  v20, v21, #14
+        drshrn_h        v30, v24, v25, #14
+
+        dmbutterfly_l   v16, v17, v18, v19, v29, v4,  v1.s[0], v1.s[1]
+        dmbutterfly_l   v27, v26, v20, v21, v31, v5,  v1.s[0], v1.s[1]
+        drshrn_h        v23, v16, v17, #14
+        drshrn_h        v24, v18, v19, #14
+        neg             v20.2d, v20.2d
+        neg             v21.2d, v21.2d
+        drshrn_h        v27, v27, v26, #14
+        drshrn_h        v20, v20, v21, #14
+        dmbutterfly_l   v16, v17, v18, v19, v30, v6,  v1.s[2], v1.s[3]
+        drshrn_h        v21, v16, v17, #14
+        drshrn_h        v26, v18, v19, #14
+        dmbutterfly_l   v16, v17, v18, v19, v28, v7,  v1.s[2], v1.s[3]
+        drshrn_h        v25, v16, v17, #14
+        neg             v18.2d, v18.2d
+        neg             v19.2d, v19.2d
+        drshrn_h        v22, v18, v19, #14
+
+        idct32_end
+endfunc
+
+.macro idct32_funcs suffix
+// Do an 32-point IDCT of a 4x32 slice out of a 32x32 matrix.
+// The 32-point IDCT can be decomposed into two 16-point IDCTs;
+// a normal IDCT16 with every other input component (the even ones, with
+// each output written twice), followed by a separate 16-point IDCT
+// of the odd inputs, added/subtracted onto the outputs of the first idct16.
+// x0 = dst (temp buffer)
+// x1 = unused
+// x2 = src
+// x9 = double input stride
+function idct32_1d_4x32_pass1\suffix\()_neon
+        mov             x14, x30
+
+        movi            v4.4s,  #0
+
+        // v16 = IN(0), v17 = IN(2) ... v31 = IN(30)
+.ifb \suffix
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        load_clear      \i, x2, x9
+.endr
+.endif
+.ifc \suffix,_quarter
+.irp i, 16, 17, 18, 19
+        load_clear      \i, x2, x9
+.endr
+.endif
+.ifc \suffix,_half
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23
+        load_clear      \i, x2, x9
+.endr
+.endif
+
+        bl              idct16\suffix
+
+        // Do four 4x4 transposes. Originally, v16-v31 contain the
+        // 16 rows. Afterwards, v16-v19, v20-v23, v24-v27 and v28-v31
+        // contain the four transposed 4x4 blocks.
+        transpose_4x4s  v16, v17, v18, v19, v4, v5, v6, v7
+        transpose_4x4s  v20, v21, v22, v23, v4, v5, v6, v7
+        transpose_4x4s  v24, v25, v26, v27, v4, v5, v6, v7
+        transpose_4x4s  v28, v29, v30, v31, v4, v5, v6, v7
+
+        // Store the registers a, b, c, d horizontally, followed by the
+        // same registers d, c, b, a mirrored.
+.macro store_rev a, b, c, d
+        // There's no rev128 instruction, but we reverse each 64 bit
+        // half, and then flip them using an ext with 8 bytes offset.
+        rev64           v7.4s, \d
+        st1             {\a},  [x0], #16
+        ext             v7.16b, v7.16b, v7.16b, #8
+        st1             {\b},  [x0], #16
+        rev64           v6.4s, \c
+        st1             {\c},  [x0], #16
+        ext             v6.16b, v6.16b, v6.16b, #8
+        st1             {\d},  [x0], #16
+        rev64           v5.4s, \b
+        st1             {v7.4s},  [x0], #16
+        ext             v5.16b, v5.16b, v5.16b, #8
+        st1             {v6.4s},  [x0], #16
+        rev64           v4.4s, \a
+        st1             {v5.4s},  [x0], #16
+        ext             v4.16b, v4.16b, v4.16b, #8
+        st1             {v4.4s},  [x0], #16
+.endm
+        store_rev       v16.4s, v20.4s, v24.4s, v28.4s
+        store_rev       v17.4s, v21.4s, v25.4s, v29.4s
+        store_rev       v18.4s, v22.4s, v26.4s, v30.4s
+        store_rev       v19.4s, v23.4s, v27.4s, v31.4s
+        sub             x0,  x0,  #512
+.purgem store_rev
+
+        // Move x2 back to the start of the input, and move
+        // to the first odd row
+.ifb \suffix
+        sub             x2,  x2,  x9, lsl #4
+.endif
+.ifc \suffix,_quarter
+        sub             x2,  x2,  x9, lsl #2
+.endif
+.ifc \suffix,_half
+        sub             x2,  x2,  x9, lsl #3
+.endif
+        add             x2,  x2,  #128
+
+        movi            v4.4s,  #0
+        // v16 = IN(1), v17 = IN(3) ... v31 = IN(31)
+.ifb \suffix
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        load_clear      \i, x2, x9
+.endr
+.endif
+.ifc \suffix,_quarter
+.irp i, 16, 17, 18, 19
+        load_clear      \i, x2, x9
+.endr
+.endif
+.ifc \suffix,_half
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23
+        load_clear      \i, x2, x9
+.endr
+.endif
+
+        bl              idct32_odd\suffix
+
+        transpose_4x4s  v31, v30, v29, v28, v4, v5, v6, v7
+        transpose_4x4s  v27, v26, v25, v24, v4, v5, v6, v7
+        transpose_4x4s  v23, v22, v21, v20, v4, v5, v6, v7
+        transpose_4x4s  v19, v18, v17, v16, v4, v5, v6, v7
+
+        // Store the registers a, b, c, d horizontally,
+        // adding into the output first, and the mirrored,
+        // subtracted from the output.
+.macro store_rev a, b, c, d, a16b, b16b
+        ld1             {v4.4s},  [x0]
+        rev64           v9.4s, \d
+        add             v4.4s, v4.4s, \a
+        st1             {v4.4s},  [x0], #16
+        rev64           v8.4s, \c
+        ld1             {v4.4s},  [x0]
+        ext             v9.16b, v9.16b, v9.16b, #8
+        add             v4.4s, v4.4s, \b
+        st1             {v4.4s},  [x0], #16
+        ext             v8.16b, v8.16b, v8.16b, #8
+        ld1             {v4.4s},  [x0]
+        rev64           \b, \b
+        add             v4.4s, v4.4s, \c
+        st1             {v4.4s},  [x0], #16
+        rev64           \a, \a
+        ld1             {v4.4s},  [x0]
+        ext             \b16b, \b16b, \b16b, #8
+        add             v4.4s, v4.4s, \d
+        st1             {v4.4s},  [x0], #16
+        ext             \a16b, \a16b, \a16b, #8
+        ld1             {v4.4s},  [x0]
+        sub             v4.4s, v4.4s, v9.4s
+        st1             {v4.4s},  [x0], #16
+        ld1             {v4.4s},  [x0]
+        sub             v4.4s, v4.4s, v8.4s
+        st1             {v4.4s},  [x0], #16
+        ld1             {v4.4s},  [x0]
+        sub             v4.4s, v4.4s, \b
+        st1             {v4.4s},  [x0], #16
+        ld1             {v4.4s},  [x0]
+        sub             v4.4s, v4.4s, \a
+        st1             {v4.4s},  [x0], #16
+.endm
+
+        store_rev       v31.4s, v27.4s, v23.4s, v19.4s, v31.16b, v27.16b
+        store_rev       v30.4s, v26.4s, v22.4s, v18.4s, v30.16b, v26.16b
+        store_rev       v29.4s, v25.4s, v21.4s, v17.4s, v29.16b, v25.16b
+        store_rev       v28.4s, v24.4s, v20.4s, v16.4s, v28.16b, v24.16b
+.purgem store_rev
+        ret             x14
+endfunc
+
+// This is mostly the same as 4x32_pass1, but without the transpose,
+// and use the source as temp buffer between the two idct passes, and
+// add into the destination.
+// x0 = dst
+// x1 = dst stride
+// x2 = src (temp buffer)
+// x7 = negative double temp buffer stride
+// x9 = double temp buffer stride
+function idct32_1d_4x32_pass2\suffix\()_neon
+        mov             x14, x30
+
+        // v16 = IN(0), v17 = IN(2) ... v31 = IN(30)
+.ifb \suffix
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        load            \i, x2, x9
+.endr
+        sub             x2,  x2,  x9, lsl #4
+.endif
+.ifc \suffix,_quarter
+.irp i, 16, 17, 18, 19
+        load            \i, x2, x9
+.endr
+        sub             x2,  x2,  x9, lsl #2
+.endif
+.ifc \suffix,_half
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23
+        load            \i, x2, x9
+.endr
+        sub             x2,  x2,  x9, lsl #3
+.endif
+
+        bl              idct16\suffix
+
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        store           \i, x2, x9
+.endr
+
+        sub             x2,  x2,  x9, lsl #4
+        add             x2,  x2,  #128
+
+        // v16 = IN(1), v17 = IN(3) ... v31 = IN(31)
+.ifb \suffix
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        load            \i, x2, x9
+.endr
+        sub             x2,  x2,  x9, lsl #4
+.endif
+.ifc \suffix,_quarter
+.irp i, 16, 17, 18, 19
+        load            \i, x2, x9
+.endr
+        sub             x2,  x2,  x9, lsl #2
+.endif
+.ifc \suffix,_half
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23
+        load            \i, x2, x9
+.endr
+        sub             x2,  x2,  x9, lsl #3
+.endif
+        sub             x2,  x2,  #128
+
+        bl              idct32_odd\suffix
+
+.macro load_acc_store a, b, c, d, neg=0
+.if \neg == 0
+        ld1             {v4.4s},  [x2], x9
+        ld1             {v5.4s},  [x2], x9
+        add             v4.4s, v4.4s, \a
+        ld1             {v6.4s},  [x2], x9
+        add             v5.4s, v5.4s, \b
+        ld1             {v7.4s},  [x2], x9
+        add             v6.4s, v6.4s, \c
+        add             v7.4s, v7.4s, \d
+.else
+        ld1             {v4.4s},  [x2], x7
+        ld1             {v5.4s},  [x2], x7
+        sub             v4.4s, v4.4s, \a
+        ld1             {v6.4s},  [x2], x7
+        sub             v5.4s, v5.4s, \b
+        ld1             {v7.4s},  [x2], x7
+        sub             v6.4s, v6.4s, \c
+        sub             v7.4s, v7.4s, \d
+.endif
+        ld1             {v8.4h},   [x0], x1
+        ld1             {v8.d}[1], [x0], x1
+        srshr           v4.4s, v4.4s, #6
+        ld1             {v9.4h},   [x0], x1
+        srshr           v5.4s, v5.4s, #6
+        uaddw           v4.4s, v4.4s, v8.4h
+        ld1             {v9.d}[1], [x0], x1
+        srshr           v6.4s, v6.4s, #6
+        uaddw2          v5.4s, v5.4s, v8.8h
+        srshr           v7.4s, v7.4s, #6
+        sub             x0,  x0,  x1, lsl #2
+        uaddw           v6.4s, v6.4s, v9.4h
+        sqxtun          v4.4h, v4.4s
+        uaddw2          v7.4s, v7.4s, v9.8h
+        sqxtun2         v4.8h, v5.4s
+        umin            v4.8h, v4.8h, v15.8h
+        st1             {v4.4h},   [x0], x1
+        sqxtun          v5.4h, v6.4s
+        st1             {v4.d}[1], [x0], x1
+        sqxtun2         v5.8h, v7.4s
+        umin            v5.8h, v5.8h, v15.8h
+        st1             {v5.4h},   [x0], x1
+        st1             {v5.d}[1], [x0], x1
+.endm
+        load_acc_store  v31.4s, v30.4s, v29.4s, v28.4s
+        load_acc_store  v27.4s, v26.4s, v25.4s, v24.4s
+        load_acc_store  v23.4s, v22.4s, v21.4s, v20.4s
+        load_acc_store  v19.4s, v18.4s, v17.4s, v16.4s
+        sub             x2,  x2,  x9
+        load_acc_store  v16.4s, v17.4s, v18.4s, v19.4s, 1
+        load_acc_store  v20.4s, v21.4s, v22.4s, v23.4s, 1
+        load_acc_store  v24.4s, v25.4s, v26.4s, v27.4s, 1
+        load_acc_store  v28.4s, v29.4s, v30.4s, v31.4s, 1
+.purgem load_acc_store
+        ret             x14
+endfunc
+.endm
+
+idct32_funcs
+idct32_funcs _quarter
+idct32_funcs _half
+
+const min_eob_idct_idct_32, align=4
+        .short  0, 9, 34, 70, 135, 240, 336, 448
+endconst
+
+function vp9_idct_idct_32x32_add_16_neon
+        cmp             w3,  #1
+        b.eq            idct32x32_dc_add_neon
+
+        movrel          x10, idct_coeffs
+
+        mov             x15, x30
+        stp             d8,  d9,  [sp, #-0x10]!
+        stp             d10, d11, [sp, #-0x10]!
+        stp             d12, d13, [sp, #-0x10]!
+        stp             d14, d15, [sp, #-0x10]!
+
+        sub             sp,  sp,  #4096
+
+        mov             x4,  x0
+        mov             x5,  x1
+        mov             x6,  x2
+
+        // Double stride of the input, since we only read every other line
+        mov             x9,  #256
+        neg             x7,  x9
+
+        ld1             {v0.8h,v1.8h},   [x10], #32
+        sxtl            v2.4s,  v1.4h
+        sxtl2           v3.4s,  v1.8h
+        sxtl2           v1.4s,  v0.8h
+        sxtl            v0.4s,  v0.4h
+        ld1             {v10.8h,v11.8h}, [x10]
+        sxtl            v12.4s, v11.4h
+        sxtl2           v13.4s, v11.8h
+        sxtl2           v11.4s, v10.8h
+        sxtl            v10.4s, v10.4h
+
+        dup             v15.8h, w13
+
+        cmp             w3,  #34
+        b.le            idct32x32_quarter_add_16_neon
+        cmp             w3,  #135
+        b.le            idct32x32_half_add_16_neon
+
+        movrel          x12, min_eob_idct_idct_32, 2
+
+.irp i, 0, 4, 8, 12, 16, 20, 24, 28
+        add             x0,  sp,  #(\i*128)
+.if \i > 0
+        ldrh            w1,  [x12], #2
+        cmp             w3,  w1
+        mov             x1,  #(32 - \i)/4
+        b.le            1f
+.endif
+        add             x2,  x6,  #(\i*4)
+        bl              idct32_1d_4x32_pass1_neon
+.endr
+        b               3f
+
+1:
+        // Write zeros to the temp buffer for pass 2
+        movi            v16.4s,  #0
+        movi            v17.4s,  #0
+        movi            v18.4s,  #0
+        movi            v19.4s,  #0
+2:
+        subs            x1,  x1,  #1
+.rept 4
+        st1             {v16.4s,v17.4s,v18.4s,v19.4s},  [x0], #64
+        st1             {v16.4s,v17.4s,v18.4s,v19.4s},  [x0], #64
+.endr
+        b.ne            2b
+3:
+.irp i, 0, 4, 8, 12, 16, 20, 24, 28
+        add             x0,  x4,  #(\i*2)
+        mov             x1,  x5
+        add             x2,  sp,  #(\i*4)
+        bl              idct32_1d_4x32_pass2_neon
+.endr
+
+        add             sp,  sp,  #4096
+        ldp             d14, d15, [sp], 0x10
+        ldp             d12, d13, [sp], 0x10
+        ldp             d10, d11, [sp], 0x10
+        ldp             d8,  d9,  [sp], 0x10
+
+        ret             x15
+endfunc
+
+function ff_vp9_idct_idct_32x32_add_10_neon, export=1
+        mov             x13, #0x03ff
+        b               vp9_idct_idct_32x32_add_16_neon
+endfunc
+
+function ff_vp9_idct_idct_32x32_add_12_neon, export=1
+        mov             x13, #0x0fff
+        b               vp9_idct_idct_32x32_add_16_neon
+endfunc
+
+.macro idct32_partial size
+function idct32x32_\size\()_add_16_neon
+.irp i, 0, 4
+        add             x0,  sp,  #(\i*128)
+.ifc \size,quarter
+.if \i == 4
+        cmp             w3,  #9
+        b.le            1f
+.endif
+.endif
+        add             x2,  x6,  #(\i*4)
+        bl              idct32_1d_4x32_pass1_\size\()_neon
+.endr
+
+.ifc \size,half
+.irp i, 8, 12
+        add             x0,  sp,  #(\i*128)
+.if \i == 12
+        cmp             w3,  #70
+        b.le            1f
+.endif
+        add             x2,  x6,  #(\i*4)
+        bl              idct32_1d_4x32_pass1_\size\()_neon
+.endr
+.endif
+        b               3f
+
+1:
+        // Write zeros to the temp buffer for pass 2
+        movi            v16.4s,  #0
+        movi            v17.4s,  #0
+        movi            v18.4s,  #0
+        movi            v19.4s,  #0
+
+.rept 4
+        st1             {v16.4s,v17.4s,v18.4s,v19.4s},  [x0], #64
+        st1             {v16.4s,v17.4s,v18.4s,v19.4s},  [x0], #64
+.endr
+
+3:
+.irp i, 0, 4, 8, 12, 16, 20, 24, 28
+        add             x0,  x4,  #(\i*2)
+        mov             x1,  x5
+        add             x2,  sp,  #(\i*4)
+        bl              idct32_1d_4x32_pass2_\size\()_neon
+.endr
+
+        add             sp,  sp,  #4096
+        ldp             d14, d15, [sp], 0x10
+        ldp             d12, d13, [sp], 0x10
+        ldp             d10, d11, [sp], 0x10
+        ldp             d8,  d9,  [sp], 0x10
+
+        ret             x15
+endfunc
+.endm
+
+idct32_partial quarter
+idct32_partial half
diff --git a/media/ffvpx/libavcodec/aarch64/vp9itxfm_neon.S b/media/ffvpx/libavcodec/aarch64/vp9itxfm_neon.S
new file mode 100644
index 0000000000..a27f7b8ae5
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp9itxfm_neon.S
@@ -0,0 +1,1580 @@
+/*
+ * Copyright (c) 2016 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+#include "neon.S"
+
+const itxfm4_coeffs, align=4
+        .short  11585, 0, 6270, 15137
+iadst4_coeffs:
+        .short  5283, 15212, 9929, 13377
+endconst
+
+const iadst8_coeffs, align=4
+        .short  16305, 1606, 14449, 7723, 10394, 12665, 4756, 15679
+idct_coeffs:
+        .short  11585, 0, 6270, 15137, 3196, 16069, 13623, 9102
+        .short  1606, 16305, 12665, 10394, 7723, 14449, 15679, 4756
+        .short  804, 16364, 12140, 11003, 7005, 14811, 15426, 5520
+        .short  3981, 15893, 14053, 8423, 9760, 13160, 16207, 2404
+endconst
+
+const iadst16_coeffs, align=4
+        .short  16364, 804, 15893, 3981, 11003, 12140, 8423, 14053
+        .short  14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207
+endconst
+
+// out1 = ((in1 + in2) * v0[0] + (1 << 13)) >> 14
+// out2 = ((in1 - in2) * v0[0] + (1 << 13)) >> 14
+// in/out are .8h registers; this can do with 4 temp registers, but is
+// more efficient if 6 temp registers are available.
+.macro dmbutterfly0 out1, out2, in1, in2, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, neg=0
+.if \neg > 0
+        neg             \tmp4\().4h, v0.4h
+.endif
+        add             \tmp1\().8h, \in1\().8h,  \in2\().8h
+        sub             \tmp2\().8h, \in1\().8h,  \in2\().8h
+.if \neg > 0
+        smull           \tmp3\().4s, \tmp1\().4h, \tmp4\().h[0]
+        smull2          \tmp4\().4s, \tmp1\().8h, \tmp4\().h[0]
+.else
+        smull           \tmp3\().4s, \tmp1\().4h, v0.h[0]
+        smull2          \tmp4\().4s, \tmp1\().8h, v0.h[0]
+.endif
+.ifb \tmp5
+        rshrn           \out1\().4h, \tmp3\().4s, #14
+        rshrn2          \out1\().8h, \tmp4\().4s, #14
+        smull           \tmp3\().4s, \tmp2\().4h, v0.h[0]
+        smull2          \tmp4\().4s, \tmp2\().8h, v0.h[0]
+        rshrn           \out2\().4h, \tmp3\().4s, #14
+        rshrn2          \out2\().8h, \tmp4\().4s, #14
+.else
+        smull           \tmp5\().4s, \tmp2\().4h, v0.h[0]
+        smull2          \tmp6\().4s, \tmp2\().8h, v0.h[0]
+        rshrn           \out1\().4h, \tmp3\().4s, #14
+        rshrn2          \out1\().8h, \tmp4\().4s, #14
+        rshrn           \out2\().4h, \tmp5\().4s, #14
+        rshrn2          \out2\().8h, \tmp6\().4s, #14
+.endif
+.endm
+
+// Same as dmbutterfly0 above, but treating the input in in2 as zero,
+// writing the same output into both out1 and out2.
+.macro dmbutterfly0_h out1, out2, in1, in2, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6
+        smull           \tmp1\().4s,  \in1\().4h,  v0.h[0]
+        smull2          \tmp2\().4s,  \in1\().8h,  v0.h[0]
+        rshrn           \out1\().4h,  \tmp1\().4s, #14
+        rshrn2          \out1\().8h,  \tmp2\().4s, #14
+        rshrn           \out2\().4h,  \tmp1\().4s, #14
+        rshrn2          \out2\().8h,  \tmp2\().4s, #14
+.endm
+
+// out1,out2 = in1 * coef1 - in2 * coef2
+// out3,out4 = in1 * coef2 + in2 * coef1
+// out are 4 x .4s registers, in are 2 x .8h registers
+.macro dmbutterfly_l out1, out2, out3, out4, in1, in2, coef1, coef2
+        smull           \out1\().4s, \in1\().4h, \coef1
+        smull2          \out2\().4s, \in1\().8h, \coef1
+        smull           \out3\().4s, \in1\().4h, \coef2
+        smull2          \out4\().4s, \in1\().8h, \coef2
+        smlsl           \out1\().4s, \in2\().4h, \coef2
+        smlsl2          \out2\().4s, \in2\().8h, \coef2
+        smlal           \out3\().4s, \in2\().4h, \coef1
+        smlal2          \out4\().4s, \in2\().8h, \coef1
+.endm
+
+// inout1 = (inout1 * coef1 - inout2 * coef2 + (1 << 13)) >> 14
+// inout2 = (inout1 * coef2 + inout2 * coef1 + (1 << 13)) >> 14
+// inout are 2 x .8h registers
+.macro dmbutterfly inout1, inout2, coef1, coef2, tmp1, tmp2, tmp3, tmp4, neg=0
+        dmbutterfly_l   \tmp1, \tmp2, \tmp3, \tmp4, \inout1, \inout2, \coef1, \coef2
+.if \neg > 0
+        neg             \tmp3\().4s, \tmp3\().4s
+        neg             \tmp4\().4s, \tmp4\().4s
+.endif
+        rshrn           \inout1\().4h, \tmp1\().4s,  #14
+        rshrn2          \inout1\().8h, \tmp2\().4s,  #14
+        rshrn           \inout2\().4h, \tmp3\().4s,  #14
+        rshrn2          \inout2\().8h, \tmp4\().4s,  #14
+.endm
+
+// Same as dmbutterfly above, but treating the input in inout2 as zero
+.macro dmbutterfly_h1 inout1, inout2, coef1, coef2, tmp1, tmp2, tmp3, tmp4
+        smull           \tmp1\().4s, \inout1\().4h, \coef1
+        smull2          \tmp2\().4s, \inout1\().8h, \coef1
+        smull           \tmp3\().4s, \inout1\().4h, \coef2
+        smull2          \tmp4\().4s, \inout1\().8h, \coef2
+        rshrn           \inout1\().4h, \tmp1\().4s, #14
+        rshrn2          \inout1\().8h, \tmp2\().4s, #14
+        rshrn           \inout2\().4h, \tmp3\().4s, #14
+        rshrn2          \inout2\().8h, \tmp4\().4s, #14
+.endm
+
+// Same as dmbutterfly above, but treating the input in inout1 as zero
+.macro dmbutterfly_h2 inout1, inout2, coef1, coef2, tmp1, tmp2, tmp3, tmp4
+        smull           \tmp1\().4s, \inout2\().4h, \coef2
+        smull2          \tmp2\().4s, \inout2\().8h, \coef2
+        smull           \tmp3\().4s, \inout2\().4h, \coef1
+        smull2          \tmp4\().4s, \inout2\().8h, \coef1
+        neg             \tmp1\().4s, \tmp1\().4s
+        neg             \tmp2\().4s, \tmp2\().4s
+        rshrn           \inout2\().4h, \tmp3\().4s, #14
+        rshrn2          \inout2\().8h, \tmp4\().4s, #14
+        rshrn           \inout1\().4h, \tmp1\().4s, #14
+        rshrn2          \inout1\().8h, \tmp2\().4s, #14
+.endm
+
+.macro dsmull_h out1, out2, in, coef
+        smull           \out1\().4s, \in\().4h, \coef
+        smull2          \out2\().4s, \in\().8h, \coef
+.endm
+
+.macro drshrn_h out, in1, in2, shift
+        rshrn           \out\().4h, \in1\().4s, \shift
+        rshrn2          \out\().8h, \in2\().4s, \shift
+.endm
+
+
+// out1 = in1 + in2
+// out2 = in1 - in2
+.macro butterfly_8h out1, out2, in1, in2
+        add             \out1\().8h, \in1\().8h, \in2\().8h
+        sub             \out2\().8h, \in1\().8h, \in2\().8h
+.endm
+
+// out1 = in1 - in2
+// out2 = in1 + in2
+.macro butterfly_8h_r out1, out2, in1, in2
+        sub             \out1\().8h, \in1\().8h, \in2\().8h
+        add             \out2\().8h, \in1\().8h, \in2\().8h
+.endm
+
+// out1 = (in1,in2 + in3,in4 + (1 << 13)) >> 14
+// out2 = (in1,in2 - in3,in4 + (1 << 13)) >> 14
+// out are 2 x .8h registers, in are 4 x .4s registers
+.macro dbutterfly_n out1, out2, in1, in2, in3, in4, tmp1, tmp2, tmp3, tmp4
+        add             \tmp1\().4s, \in1\().4s, \in3\().4s
+        add             \tmp2\().4s, \in2\().4s, \in4\().4s
+        sub             \tmp3\().4s, \in1\().4s, \in3\().4s
+        sub             \tmp4\().4s, \in2\().4s, \in4\().4s
+        rshrn           \out1\().4h, \tmp1\().4s,  #14
+        rshrn2          \out1\().8h, \tmp2\().4s,  #14
+        rshrn           \out2\().4h, \tmp3\().4s,  #14
+        rshrn2          \out2\().8h, \tmp4\().4s,  #14
+.endm
+
+.macro iwht4 c0, c1, c2, c3
+        add             \c0\().4h, \c0\().4h, \c1\().4h
+        sub             v17.4h,    \c2\().4h, \c3\().4h
+        sub             v16.4h,    \c0\().4h, v17.4h
+        sshr            v16.4h,    v16.4h,    #1
+        sub             \c2\().4h, v16.4h,    \c1\().4h
+        sub             \c1\().4h, v16.4h,    \c3\().4h
+        add             \c3\().4h, v17.4h,    \c2\().4h
+        sub             \c0\().4h, \c0\().4h, \c1\().4h
+.endm
+
+.macro idct4 c0, c1, c2, c3
+        smull           v22.4s,    \c1\().4h, v0.h[3]
+        smull           v20.4s,    \c1\().4h, v0.h[2]
+        add             v16.4h,    \c0\().4h, \c2\().4h
+        sub             v17.4h,    \c0\().4h, \c2\().4h
+        smlal           v22.4s,    \c3\().4h, v0.h[2]
+        smull           v18.4s,    v16.4h,    v0.h[0]
+        smull           v19.4s,    v17.4h,    v0.h[0]
+        smlsl           v20.4s,    \c3\().4h, v0.h[3]
+        rshrn           v22.4h,    v22.4s,    #14
+        rshrn           v18.4h,    v18.4s,    #14
+        rshrn           v19.4h,    v19.4s,    #14
+        rshrn           v20.4h,    v20.4s,    #14
+        add             \c0\().4h, v18.4h,    v22.4h
+        sub             \c3\().4h, v18.4h,    v22.4h
+        add             \c1\().4h, v19.4h,    v20.4h
+        sub             \c2\().4h, v19.4h,    v20.4h
+.endm
+
+.macro iadst4 c0, c1, c2, c3
+        smull           v16.4s,    \c0\().4h, v0.h[4]
+        smlal           v16.4s,    \c2\().4h, v0.h[5]
+        smlal           v16.4s,    \c3\().4h, v0.h[6]
+        smull           v17.4s,    \c0\().4h, v0.h[6]
+        smlsl           v17.4s,    \c2\().4h, v0.h[4]
+        sub             \c0\().4h, \c0\().4h, \c2\().4h
+        smlsl           v17.4s,    \c3\().4h, v0.h[5]
+        add             \c0\().4h, \c0\().4h, \c3\().4h
+        smull           v19.4s,    \c1\().4h, v0.h[7]
+        smull           v18.4s,    \c0\().4h, v0.h[7]
+        add             v20.4s,    v16.4s,    v19.4s
+        add             v21.4s,    v17.4s,    v19.4s
+        rshrn           \c0\().4h, v20.4s,    #14
+        add             v16.4s,    v16.4s,    v17.4s
+        rshrn           \c1\().4h, v21.4s,    #14
+        sub             v16.4s,    v16.4s,    v19.4s
+        rshrn           \c2\().4h, v18.4s,    #14
+        rshrn           \c3\().4h, v16.4s,    #14
+.endm
+
+// The public functions in this file have got the following signature:
+// void itxfm_add(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+
+.macro itxfm_func4x4 txfm1, txfm2
+function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_neon, export=1
+.ifc \txfm1,\txfm2
+.ifc \txfm1,idct
+        movrel          x4,  itxfm4_coeffs
+        ld1             {v0.4h}, [x4]
+.endif
+.ifc \txfm1,iadst
+        movrel          x4,  iadst4_coeffs
+        ld1             {v0.d}[1], [x4]
+.endif
+.else
+        movrel          x4,  itxfm4_coeffs
+        ld1             {v0.8h}, [x4]
+.endif
+
+        movi            v31.8h, #0
+.ifc \txfm1\()_\txfm2,idct_idct
+        cmp             w3,  #1
+        b.ne            1f
+        // DC-only for idct/idct
+        ld1             {v2.h}[0], [x2]
+        smull           v2.4s,  v2.4h, v0.h[0]
+        rshrn           v2.4h,  v2.4s, #14
+        smull           v2.4s,  v2.4h, v0.h[0]
+        rshrn           v2.4h,  v2.4s, #14
+        st1             {v31.h}[0], [x2]
+        dup             v4.4h,  v2.h[0]
+        mov             v5.16b, v4.16b
+        mov             v6.16b, v4.16b
+        mov             v7.16b, v4.16b
+        b               2f
+.endif
+
+1:
+        ld1             {v4.4h,v5.4h,v6.4h,v7.4h},  [x2]
+        st1             {v31.8h}, [x2], #16
+
+.ifc \txfm1,iwht
+        sshr            v4.4h,  v4.4h,  #2
+        sshr            v5.4h,  v5.4h,  #2
+        sshr            v6.4h,  v6.4h,  #2
+        sshr            v7.4h,  v7.4h,  #2
+.endif
+
+        \txfm1\()4      v4,  v5,  v6,  v7
+
+        st1             {v31.8h}, [x2], #16
+        // Transpose 4x4 with 16 bit elements
+        transpose_4x4H  v4,  v5,  v6,  v7,  v16, v17, v18, v19
+
+        \txfm2\()4      v4,  v5,  v6,  v7
+2:
+        ld1             {v0.s}[0],   [x0], x1
+        ld1             {v1.s}[0],   [x0], x1
+.ifnc \txfm1,iwht
+        srshr           v4.4h,  v4.4h,  #4
+        srshr           v5.4h,  v5.4h,  #4
+        srshr           v6.4h,  v6.4h,  #4
+        srshr           v7.4h,  v7.4h,  #4
+.endif
+        uaddw           v4.8h,  v4.8h,  v0.8b
+        uaddw           v5.8h,  v5.8h,  v1.8b
+        ld1             {v2.s}[0],   [x0], x1
+        ld1             {v3.s}[0],   [x0], x1
+        sqxtun          v0.8b,  v4.8h
+        sqxtun          v1.8b,  v5.8h
+        sub             x0,  x0,  x1, lsl #2
+
+        uaddw           v6.8h,  v6.8h,  v2.8b
+        uaddw           v7.8h,  v7.8h,  v3.8b
+        st1             {v0.s}[0],  [x0], x1
+        sqxtun          v2.8b,  v6.8h
+        sqxtun          v3.8b,  v7.8h
+
+        st1             {v1.s}[0],  [x0], x1
+        st1             {v2.s}[0],  [x0], x1
+        st1             {v3.s}[0],  [x0], x1
+
+        ret
+endfunc
+.endm
+
+itxfm_func4x4 idct,  idct
+itxfm_func4x4 iadst, idct
+itxfm_func4x4 idct,  iadst
+itxfm_func4x4 iadst, iadst
+itxfm_func4x4 iwht,  iwht
+
+
+.macro idct8
+        dmbutterfly0    v16, v20, v16, v20, v2, v3, v4, v5, v6, v7 // v16 = t0a, v20 = t1a
+        dmbutterfly     v18, v22, v0.h[2], v0.h[3], v2, v3, v4, v5 // v18 = t2a, v22 = t3a
+        dmbutterfly     v17, v23, v0.h[4], v0.h[5], v2, v3, v4, v5 // v17 = t4a, v23 = t7a
+        dmbutterfly     v21, v19, v0.h[6], v0.h[7], v2, v3, v4, v5 // v21 = t5a, v19 = t6a
+
+        butterfly_8h    v24, v25, v16, v22 // v24 = t0, v25 = t3
+        butterfly_8h    v28, v29, v17, v21 // v28 = t4, v29 = t5a
+        butterfly_8h    v30, v31, v23, v19 // v30 = t7, v31 = t6a
+        butterfly_8h    v26, v27, v20, v18 // v26 = t1, v27 = t2
+
+        dmbutterfly0    v31, v29, v31, v29, v2, v3, v4, v5, v6, v7 // v31 = t6, v29 = t5
+
+        butterfly_8h    v16, v23, v24, v30 // v16 = out[0], v23 = out[7]
+        butterfly_8h    v17, v22, v26, v31 // v17 = out[1], v22 = out[6]
+        butterfly_8h    v18, v21, v27, v29 // q13 = out[2], q10 = out[5]
+        butterfly_8h    v19, v20, v25, v28 // v17 = out[3], q12 = out[4]
+.endm
+
+.macro iadst8
+        dmbutterfly_l   v24, v25, v26, v27, v23, v16, v1.h[1], v1.h[0]   // v24,v25 = t1a, v26,v27 = t0a
+        dmbutterfly_l   v28, v29, v30, v31, v21, v18, v1.h[3], v1.h[2]   // v28,v29 = t3a, v30,v31 = t2a
+        dmbutterfly_l   v2,  v3,  v4,  v5,  v19, v20, v1.h[5], v1.h[4]   // v2,v3   = t5a, v4,v5   = t4a
+        dmbutterfly_l   v16, v18, v21, v23, v17, v22, v1.h[7], v1.h[6]   // v16,v18 = t7a, v21,v23 = t6a
+
+        dbutterfly_n    v4,  v5,  v26, v27, v4,  v5,  v6,  v7, v26, v27  // v4  = t0, v5  = t4
+        dbutterfly_n    v2,  v3,  v24, v25, v2,  v3,  v6,  v7, v26, v27  // v2  = t1, v3  = t5
+        dbutterfly_n    v24, v25, v30, v31, v21, v23, v6,  v7, v26, v27  // v24 = t2, v25 = t6
+        dbutterfly_n    v30, v31, v28, v29, v16, v18, v6,  v7, v26, v27  // v30 = t3, v31 = t7
+
+        butterfly_8h    v16, v6,  v4, v24 // v16 = out[0],  v6 = t2
+        butterfly_8h    v23, v7,  v2, v30 // v23 = -out[7], v7 = t3
+        neg             v23.8h,   v23.8h  // v23 = out[7]
+
+        dmbutterfly0    v19, v20, v6, v7, v24, v26, v27, v28, v29, v30   // v19 = -out[3], v20 = out[4]
+        neg             v19.8h,   v19.8h  // v19 = out[3]
+
+        dmbutterfly_l   v26, v27, v28, v29, v5,  v3,  v0.h[2], v0.h[3]   // v26,v27 = t5a, v28,v29 = t4a
+        dmbutterfly_l   v2,  v3,  v4,  v5,  v31, v25, v0.h[3], v0.h[2]   // v2,v3   = t6a, v4,v5   = t7a
+
+        dbutterfly_n    v17, v30, v28, v29, v2,  v3,  v6,  v7,  v24, v25 // v17 = -out[1], v30 = t6
+        dbutterfly_n    v22, v31, v26, v27, v4,  v5,  v6,  v7,  v24, v25 // v22 = out[6],  v31 = t7
+        neg             v17.8h,   v17.8h  // v17 = out[1]
+
+        dmbutterfly0    v18, v21, v30, v31, v2,  v3,  v4,  v5,  v6,  v7  // v18 = out[2], v21 = -out[5]
+        neg             v21.8h,   v21.8h  // v21 = out[5]
+.endm
+
+
+.macro itxfm_func8x8 txfm1, txfm2
+function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
+        // The iadst also uses a few coefficients from
+        // idct, so those always need to be loaded.
+.ifc \txfm1\()_\txfm2,idct_idct
+        movrel          x4,  idct_coeffs
+.else
+        movrel          x4,  iadst8_coeffs
+        ld1             {v1.8h}, [x4], #16
+.endif
+        ld1             {v0.8h}, [x4]
+
+        movi            v2.8h, #0
+        movi            v3.8h, #0
+        movi            v4.8h, #0
+        movi            v5.8h, #0
+
+.ifc \txfm1\()_\txfm2,idct_idct
+        cmp             w3,  #1
+        b.ne            1f
+        // DC-only for idct/idct
+        ld1             {v2.h}[0],  [x2]
+        smull           v2.4s,  v2.4h, v0.h[0]
+        rshrn           v2.4h,  v2.4s, #14
+        smull           v2.4s,  v2.4h, v0.h[0]
+        rshrn           v2.4h,  v2.4s, #14
+        st1             {v3.h}[0],  [x2]
+        dup             v16.8h,  v2.h[0]
+        mov             v17.16b, v16.16b
+        mov             v18.16b, v16.16b
+        mov             v19.16b, v16.16b
+        mov             v20.16b, v16.16b
+        mov             v21.16b, v16.16b
+        mov             v22.16b, v16.16b
+        mov             v23.16b, v16.16b
+        b               2f
+.endif
+1:
+        ld1             {v16.8h,v17.8h,v18.8h,v19.8h},  [x2], #64
+        ld1             {v20.8h,v21.8h,v22.8h,v23.8h},  [x2], #64
+        sub             x2,  x2,  #128
+        st1             {v2.8h,v3.8h,v4.8h,v5.8h},      [x2], #64
+        st1             {v2.8h,v3.8h,v4.8h,v5.8h},      [x2], #64
+
+        \txfm1\()8
+
+        // Transpose 8x8 with 16 bit elements
+        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v24, v25
+
+        \txfm2\()8
+2:
+        mov             x3,  x0
+        // Add into the destination
+        ld1             {v0.8b},  [x0], x1
+        srshr           v16.8h, v16.8h, #5
+        ld1             {v1.8b},  [x0], x1
+        srshr           v17.8h, v17.8h, #5
+        ld1             {v2.8b},  [x0], x1
+        srshr           v18.8h, v18.8h, #5
+        uaddw           v16.8h, v16.8h, v0.8b
+        ld1             {v3.8b},  [x0], x1
+        srshr           v19.8h, v19.8h, #5
+        uaddw           v17.8h, v17.8h, v1.8b
+        ld1             {v4.8b},  [x0], x1
+        srshr           v20.8h, v20.8h, #5
+        uaddw           v18.8h, v18.8h, v2.8b
+        sqxtun          v0.8b,  v16.8h
+        ld1             {v5.8b},  [x0], x1
+        srshr           v21.8h, v21.8h, #5
+        uaddw           v19.8h, v19.8h, v3.8b
+        sqxtun          v1.8b,  v17.8h
+        ld1             {v6.8b},  [x0], x1
+        srshr           v22.8h, v22.8h, #5
+        uaddw           v20.8h, v20.8h, v4.8b
+        sqxtun          v2.8b,  v18.8h
+        ld1             {v7.8b},  [x0], x1
+        srshr           v23.8h, v23.8h, #5
+        uaddw           v21.8h, v21.8h, v5.8b
+        sqxtun          v3.8b,  v19.8h
+
+        st1             {v0.8b},  [x3], x1
+        uaddw           v22.8h, v22.8h, v6.8b
+        st1             {v1.8b},  [x3], x1
+        sqxtun          v4.8b,  v20.8h
+        st1             {v2.8b},  [x3], x1
+        uaddw           v23.8h, v23.8h, v7.8b
+        st1             {v3.8b},  [x3], x1
+        sqxtun          v5.8b,  v21.8h
+        st1             {v4.8b},  [x3], x1
+        sqxtun          v6.8b,  v22.8h
+        st1             {v5.8b},  [x3], x1
+        sqxtun          v7.8b,  v23.8h
+
+        st1             {v6.8b},  [x3], x1
+        st1             {v7.8b},  [x3], x1
+
+        ret
+endfunc
+.endm
+
+itxfm_func8x8 idct,  idct
+itxfm_func8x8 iadst, idct
+itxfm_func8x8 idct,  iadst
+itxfm_func8x8 iadst, iadst
+
+
+function idct16x16_dc_add_neon
+        movrel          x4,  idct_coeffs
+        ld1             {v0.4h}, [x4]
+
+        movi            v1.4h,  #0
+
+        ld1             {v2.h}[0], [x2]
+        smull           v2.4s,  v2.4h,  v0.h[0]
+        rshrn           v2.4h,  v2.4s,  #14
+        smull           v2.4s,  v2.4h,  v0.h[0]
+        rshrn           v2.4h,  v2.4s,  #14
+        dup             v2.8h,  v2.h[0]
+        st1             {v1.h}[0], [x2]
+
+        srshr           v2.8h,  v2.8h,  #6
+
+        mov             x3,  x0
+        mov             x4,  #16
+1:
+        // Loop to add the constant from v2 into all 16x16 outputs
+        subs            x4,  x4,  #2
+        ld1             {v3.16b},  [x0], x1
+        ld1             {v4.16b},  [x0], x1
+        uaddw           v16.8h, v2.8h,  v3.8b
+        uaddw2          v17.8h, v2.8h,  v3.16b
+        uaddw           v18.8h, v2.8h,  v4.8b
+        uaddw2          v19.8h, v2.8h,  v4.16b
+        sqxtun          v3.8b,  v16.8h
+        sqxtun2         v3.16b, v17.8h
+        sqxtun          v4.8b,  v18.8h
+        sqxtun2         v4.16b, v19.8h
+        st1             {v3.16b},  [x3], x1
+        st1             {v4.16b},  [x3], x1
+        b.ne            1b
+
+        ret
+endfunc
+
+.macro idct16_end
+        butterfly_8h    v18, v7,  v4,  v7                // v18 = t0a,  v7  = t7a
+        butterfly_8h    v19, v22, v5,  v22               // v19 = t1a,  v22 = t6
+        butterfly_8h    v4,  v26, v20, v26               // v4  = t2a,  v26 = t5
+        butterfly_8h    v5,  v6,  v28, v6                // v5  = t3a,  v6  = t4
+        butterfly_8h    v20, v28, v16, v24               // v20 = t8a,  v28 = t11a
+        butterfly_8h    v24, v21, v23, v21               // v24 = t9,   v21 = t10
+        butterfly_8h    v23, v27, v25, v27               // v23 = t14,  v27 = t13
+        butterfly_8h    v25, v29, v29, v17               // v25 = t15a, v29 = t12a
+
+        dmbutterfly0    v2,  v3,  v27, v21, v2,  v3,  v16, v17, v30, v31 // v2  = t13a, v3  = t10a
+        dmbutterfly0    v28, v27, v29, v28, v21, v29, v16, v17, v30, v31 // v28 = t12,  v27 = t11
+
+        butterfly_8h    v16, v31, v18, v25               // v16 = out[0], v31 = out[15]
+        butterfly_8h    v17, v30, v19, v23               // v17 = out[1], v30 = out[14]
+        butterfly_8h_r  v25, v22, v22, v24               // v25 = out[9], v22 = out[6]
+        butterfly_8h    v23, v24, v7,  v20               // v23 = out[7], v24 = out[8]
+        butterfly_8h    v18, v29, v4,  v2                // v18 = out[2], v29 = out[13]
+        butterfly_8h    v19, v28, v5,  v28               // v19 = out[3], v28 = out[12]
+        butterfly_8h    v20, v27, v6,  v27               // v20 = out[4], v27 = out[11]
+        butterfly_8h    v21, v26, v26, v3                // v21 = out[5], v26 = out[10]
+        ret
+.endm
+
+function idct16
+        dmbutterfly0    v16, v24, v16, v24, v2, v3, v4, v5, v6, v7 // v16 = t0a,  v24 = t1a
+        dmbutterfly     v20, v28, v0.h[2], v0.h[3], v2, v3, v4, v5 // v20 = t2a,  v28 = t3a
+        dmbutterfly     v18, v30, v0.h[4], v0.h[5], v2, v3, v4, v5 // v18 = t4a,  v30 = t7a
+        dmbutterfly     v26, v22, v0.h[6], v0.h[7], v2, v3, v4, v5 // v26 = t5a,  v22 = t6a
+        dmbutterfly     v17, v31, v1.h[0], v1.h[1], v2, v3, v4, v5 // v17 = t8a,  v31 = t15a
+        dmbutterfly     v25, v23, v1.h[2], v1.h[3], v2, v3, v4, v5 // v25 = t9a,  v23 = t14a
+        dmbutterfly     v21, v27, v1.h[4], v1.h[5], v2, v3, v4, v5 // v21 = t10a, v27 = t13a
+        dmbutterfly     v29, v19, v1.h[6], v1.h[7], v2, v3, v4, v5 // v29 = t11a, v19 = t12a
+
+        butterfly_8h    v4,  v28, v16, v28               // v4  = t0,   v28 = t3
+        butterfly_8h    v5,  v20, v24, v20               // v5  = t1,   v20 = t2
+        butterfly_8h    v6,  v26, v18, v26               // v6  = t4,   v26 = t5
+        butterfly_8h    v7,  v22, v30, v22               // v7  = t7,   v22 = t6
+        butterfly_8h    v16, v25, v17, v25               // v16 = t8,   v25 = t9
+        butterfly_8h    v24, v21, v29, v21               // v24 = t11,  v21 = t10
+        butterfly_8h    v17, v27, v19, v27               // v17 = t12,  v27 = t13
+        butterfly_8h    v29, v23, v31, v23               // v29 = t15,  v23 = t14
+
+        dmbutterfly0    v22, v26, v22, v26, v2, v3, v18, v19, v30, v31        // v22 = t6a,  v26 = t5a
+        dmbutterfly     v23, v25, v0.h[2], v0.h[3], v18, v19, v30, v31        // v23 = t9a,  v25 = t14a
+        dmbutterfly     v27, v21, v0.h[2], v0.h[3], v18, v19, v30, v31, neg=1 // v27 = t13a, v21 = t10a
+        idct16_end
+endfunc
+
+function idct16_half
+        dmbutterfly0_h  v16, v24, v16, v24, v2, v3, v4, v5, v6, v7 // v16 = t0a,  v24 = t1a
+        dmbutterfly_h1  v20, v28, v0.h[2], v0.h[3], v2, v3, v4, v5 // v20 = t2a,  v28 = t3a
+        dmbutterfly_h1  v18, v30, v0.h[4], v0.h[5], v2, v3, v4, v5 // v18 = t4a,  v30 = t7a
+        dmbutterfly_h2  v26, v22, v0.h[6], v0.h[7], v2, v3, v4, v5 // v26 = t5a,  v22 = t6a
+        dmbutterfly_h1  v17, v31, v1.h[0], v1.h[1], v2, v3, v4, v5 // v17 = t8a,  v31 = t15a
+        dmbutterfly_h2  v25, v23, v1.h[2], v1.h[3], v2, v3, v4, v5 // v25 = t9a,  v23 = t14a
+        dmbutterfly_h1  v21, v27, v1.h[4], v1.h[5], v2, v3, v4, v5 // v21 = t10a, v27 = t13a
+        dmbutterfly_h2  v29, v19, v1.h[6], v1.h[7], v2, v3, v4, v5 // v29 = t11a, v19 = t12a
+
+        butterfly_8h    v4,  v28, v16, v28               // v4  = t0,   v28 = t3
+        butterfly_8h    v5,  v20, v24, v20               // v5  = t1,   v20 = t2
+        butterfly_8h    v6,  v26, v18, v26               // v6  = t4,   v26 = t5
+        butterfly_8h    v7,  v22, v30, v22               // v7  = t7,   v22 = t6
+        butterfly_8h    v16, v25, v17, v25               // v16 = t8,   v25 = t9
+        butterfly_8h    v24, v21, v29, v21               // v24 = t11,  v21 = t10
+        butterfly_8h    v17, v27, v19, v27               // v17 = t12,  v27 = t13
+        butterfly_8h    v29, v23, v31, v23               // v29 = t15,  v23 = t14
+
+        dmbutterfly0    v22, v26, v22, v26, v2, v3, v18, v19, v30, v31        // v22 = t6a,  v26 = t5a
+        dmbutterfly     v23, v25, v0.h[2], v0.h[3], v18, v19, v30, v31        // v23 = t9a,  v25 = t14a
+        dmbutterfly     v27, v21, v0.h[2], v0.h[3], v18, v19, v30, v31, neg=1 // v27 = t13a, v21 = t10a
+        idct16_end
+endfunc
+
+function idct16_quarter
+        dsmull_h        v24, v25, v19, v1.h[7]
+        dsmull_h        v4,  v5,  v17, v1.h[0]
+        dsmull_h        v7,  v6,  v18, v0.h[5]
+        dsmull_h        v30, v31, v18, v0.h[4]
+        neg             v24.4s,  v24.4s
+        neg             v25.4s,  v25.4s
+        dsmull_h        v29, v28, v17, v1.h[1]
+        dsmull_h        v26, v27, v19, v1.h[6]
+        dsmull_h        v22, v23, v16, v0.h[0]
+        drshrn_h        v24, v24, v25, #14
+        drshrn_h        v16, v4,  v5,  #14
+        drshrn_h        v7,  v7,  v6,  #14
+        drshrn_h        v6,  v30, v31, #14
+        drshrn_h        v29, v29, v28, #14
+        drshrn_h        v17, v26, v27, #14
+        drshrn_h        v28, v22, v23, #14
+
+        dmbutterfly_l   v20, v21, v22, v23, v17, v24, v0.h[2], v0.h[3]
+        dmbutterfly_l   v18, v19, v30, v31, v29, v16, v0.h[2], v0.h[3]
+        neg             v22.4s,  v22.4s
+        neg             v23.4s,  v23.4s
+        drshrn_h        v27, v20, v21, #14
+        drshrn_h        v21, v22, v23, #14
+        drshrn_h        v23, v18, v19, #14
+        drshrn_h        v25, v30, v31, #14
+        mov             v4.16b,  v28.16b
+        mov             v5.16b,  v28.16b
+        dmbutterfly0    v22, v26, v7,  v6,  v18, v19, v30, v31
+        mov             v20.16b, v28.16b
+        idct16_end
+endfunc
+
+function iadst16
+        ld1             {v0.8h,v1.8h}, [x11]
+
+        dmbutterfly_l   v6,  v7,  v4,  v5,  v31, v16, v0.h[1], v0.h[0]   // v6,v7   = t1,   v4,v5   = t0
+        dmbutterfly_l   v10, v11, v8,  v9,  v23, v24, v0.h[5], v0.h[4]   // v10,v11 = t9,   v8,v9   = t8
+        dbutterfly_n    v31, v24, v6,  v7,  v10, v11, v12, v13, v10, v11 // v31     = t1a,  v24     = t9a
+        dmbutterfly_l   v14, v15, v12, v13, v29, v18, v0.h[3], v0.h[2]   // v14,v15 = t3,   v12,v13 = t2
+        dbutterfly_n    v16, v23, v4,  v5,  v8,  v9,  v6,  v7,  v8,  v9  // v16     = t0a,  v23     = t8a
+
+        dmbutterfly_l   v6,  v7,  v4,  v5,  v21, v26, v0.h[7], v0.h[6]   // v6,v7   = t11,  v4,v5   = t10
+        dbutterfly_n    v29, v26, v14, v15, v6,  v7,  v8,  v9,  v6,  v7  // v29     = t3a,  v26     = t11a
+        dmbutterfly_l   v10, v11, v8,  v9,  v27, v20, v1.h[1], v1.h[0]   // v10,v11 = t5,   v8,v9   = t4
+        dbutterfly_n    v18, v21, v12, v13, v4,  v5,  v6,  v7,  v4,  v5  // v18     = t2a,  v21     = t10a
+
+        dmbutterfly_l   v14, v15, v12, v13, v19, v28, v1.h[5], v1.h[4]   // v14,v15 = t13,  v12,v13 = t12
+        dbutterfly_n    v20, v28, v10, v11, v14, v15, v4,  v5,  v14, v15 // v20     = t5a,  v28     = t13a
+        dmbutterfly_l   v6,  v7,  v4,  v5,  v25, v22, v1.h[3], v1.h[2]   // v6,v7   = t7,   v4,v5   = t6
+        dbutterfly_n    v27, v19, v8,  v9,  v12, v13, v10, v11, v12, v13 // v27     = t4a,  v19     = t12a
+
+        dmbutterfly_l   v10, v11, v8,  v9,  v17, v30, v1.h[7], v1.h[6]   // v10,v11 = t15,  v8,v9   = t14
+        ld1             {v0.8h}, [x10]
+        dbutterfly_n    v22, v30, v6,  v7,  v10, v11, v12, v13, v10, v11 // v22     = t7a,  v30     = t15a
+        dmbutterfly_l   v14, v15, v12, v13, v23, v24, v0.h[4], v0.h[5]   // v14,v15 = t9,   v12,v13 = t8
+        dbutterfly_n    v25, v17, v4,  v5,  v8,  v9,  v6,  v7,  v8,  v9  // v25     = t6a,  v17     = t14a
+
+        dmbutterfly_l   v4,  v5,  v6,  v7,  v28, v19, v0.h[5], v0.h[4]   // v4,v5   = t12,  v6,v7   = t13
+        dbutterfly_n    v23, v19, v12, v13, v4,  v5,  v8,  v9,  v4,  v5  // v23     = t8a,  v19     = t12a
+        dmbutterfly_l   v10, v11, v8,  v9,  v21, v26, v0.h[6], v0.h[7]   // v10,v11 = t11,  v8,v9   = t10
+        butterfly_8h_r  v4,  v27, v16, v27               // v4  = t4,   v27 = t0
+        dbutterfly_n    v24, v28, v14, v15, v6,  v7,  v12, v13, v6,  v7  // v24     = t9a,  v28     = t13a
+
+        dmbutterfly_l   v12, v13, v14, v15, v30, v17, v0.h[7], v0.h[6]   // v12,v13 = t14,  v14,v15 = t15
+        butterfly_8h_r  v5,  v20, v31, v20               // v5  = t5, v20 = t1
+        dbutterfly_n    v21, v17, v8,  v9,  v12, v13, v6,  v7,  v12, v13 // v21     = t10a, v17     = t14a
+        dbutterfly_n    v26, v30, v10, v11, v14, v15, v8,  v9,  v14, v15 // v26     = t11a, v30     = t15a
+
+        butterfly_8h_r  v6,  v25, v18, v25               // v6  = t6, v25 = t2
+        butterfly_8h_r  v7,  v22, v29, v22               // v7  = t7, v22 = t3
+
+        dmbutterfly_l   v10, v11, v8,  v9,  v19, v28, v0.h[2], v0.h[3]   // v10,v11 = t13,  v8,v9   = t12
+        dmbutterfly_l   v12, v13, v14, v15, v30, v17, v0.h[3], v0.h[2]   // v12,v13 = t14,  v14,v15 = t15
+
+        dbutterfly_n    v18, v30, v8,  v9,  v12, v13, v16, v17, v12, v13 // v18   = out[2], v30     = t14a
+        dbutterfly_n    v29, v17, v10, v11, v14, v15, v12, v13, v14, v15 // v29 = -out[13], v17     = t15a
+        neg             v29.8h, v29.8h                   // v29 = out[13]
+
+        dmbutterfly_l   v10, v11, v8,  v9,  v4,  v5,  v0.h[2], v0.h[3]   // v10,v11 = t5a,  v8,v9   = t4a
+        dmbutterfly_l   v12, v13, v14, v15, v7,  v6,  v0.h[3], v0.h[2]   // v12,v13 = t6a,  v14,v15 = t7a
+
+        butterfly_8h    v2,  v6,  v27, v25               // v2 = out[0], v6 = t2a
+        butterfly_8h    v3,  v7,  v23, v21               // v3 =-out[1], v7 = t10
+
+        dbutterfly_n    v19, v31, v8,  v9,  v12, v13, v4,  v5,  v8,  v9  // v19 = -out[3],  v31 = t6
+        neg             v19.8h, v19.8h                   // v19 = out[3]
+        dbutterfly_n    v28, v16, v10, v11, v14, v15, v4,  v5,  v10, v11 // v28 = out[12],  v16 = t7
+
+        butterfly_8h    v5,  v8,  v20, v22               // v5 =-out[15],v8 = t3a
+        butterfly_8h    v4,  v9,  v24, v26               // v4 = out[14],v9 = t11
+
+        dmbutterfly0    v23, v24, v6,  v8,  v10, v11, v12, v13, v14, v15, 1 // v23 = out[7], v24 = out[8]
+        dmbutterfly0    v21, v26, v30, v17, v10, v11, v12, v13, v14, v15, 1 // v21 = out[5], v26 = out[10]
+        dmbutterfly0    v20, v27, v16, v31, v10, v11, v12, v13, v14, v15    // v20 = out[4], v27 = out[11]
+        dmbutterfly0    v22, v25, v9,  v7,  v10, v11, v12, v13, v14, v15    // v22 = out[6], v25 = out[9]
+
+        neg             v31.8h,  v5.8h                    // v31 = out[15]
+        neg             v17.8h,  v3.8h                    // v17 = out[1]
+
+        mov             v16.16b, v2.16b
+        mov             v30.16b, v4.16b
+        ret
+endfunc
+
+// Helper macros; we can't use these expressions directly within
+// e.g. .irp due to the extra concatenation \(). Therefore wrap
+// them in macros to allow using .irp below.
+.macro load i, src, inc
+        ld1             {v\i\().8h},  [\src], \inc
+.endm
+.macro store i, dst, inc
+        st1             {v\i\().8h},  [\dst], \inc
+.endm
+.macro movi_v i, size, imm
+        movi            v\i\()\size,  \imm
+.endm
+.macro load_clear i, src, inc
+        ld1             {v\i\().8h}, [\src]
+        st1             {v2.8h},  [\src], \inc
+.endm
+
+.macro load_add_store coef0, coef1, coef2, coef3, coef4, coef5, coef6, coef7, tmp1, tmp2
+        srshr           \coef0, \coef0, #6
+        ld1             {v2.8b},  [x0], x1
+        srshr           \coef1, \coef1, #6
+        ld1             {v3.8b},  [x3], x1
+        srshr           \coef2, \coef2, #6
+        ld1             {v4.8b},  [x0], x1
+        srshr           \coef3, \coef3, #6
+        uaddw           \coef0, \coef0, v2.8b
+        ld1             {v5.8b},  [x3], x1
+        uaddw           \coef1, \coef1, v3.8b
+        srshr           \coef4, \coef4, #6
+        ld1             {v6.8b},  [x0], x1
+        srshr           \coef5, \coef5, #6
+        ld1             {v7.8b},  [x3], x1
+        sqxtun          v2.8b,  \coef0
+        srshr           \coef6, \coef6, #6
+        sqxtun          v3.8b,  \coef1
+        srshr           \coef7, \coef7, #6
+        uaddw           \coef2, \coef2, v4.8b
+        ld1             {\tmp1},  [x0], x1
+        uaddw           \coef3, \coef3, v5.8b
+        ld1             {\tmp2},  [x3], x1
+        sqxtun          v4.8b,  \coef2
+        sub             x0,  x0,  x1, lsl #2
+        sub             x3,  x3,  x1, lsl #2
+        sqxtun          v5.8b,  \coef3
+        uaddw           \coef4, \coef4, v6.8b
+        st1             {v2.8b},  [x0], x1
+        uaddw           \coef5, \coef5, v7.8b
+        st1             {v3.8b},  [x3], x1
+        sqxtun          v6.8b,  \coef4
+        st1             {v4.8b},  [x0], x1
+        sqxtun          v7.8b,  \coef5
+        st1             {v5.8b},  [x3], x1
+        uaddw           \coef6, \coef6, \tmp1
+        st1             {v6.8b},  [x0], x1
+        uaddw           \coef7, \coef7, \tmp2
+        st1             {v7.8b},  [x3], x1
+        sqxtun          \tmp1,  \coef6
+        sqxtun          \tmp2,  \coef7
+        st1             {\tmp1},  [x0], x1
+        st1             {\tmp2},  [x3], x1
+.endm
+
+// Read a vertical 8x16 slice out of a 16x16 matrix, do a transform on it,
+// transpose into a horizontal 16x8 slice and store.
+// x0 = dst (temp buffer)
+// x1 = slice offset
+// x2 = src
+// x9 = input stride
+.macro itxfm16_1d_funcs txfm
+function \txfm\()16_1d_8x16_pass1_neon
+        mov             x14, x30
+
+        movi            v2.8h, #0
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        load_clear      \i,  x2,  x9
+.endr
+
+        bl              \txfm\()16
+
+        // Do two 8x8 transposes. Originally, v16-v31 contain the
+        // 16 rows. Afterwards, v16-v23 and v24-v31 contain the two
+        // transposed 8x8 blocks.
+        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v2, v3
+        transpose_8x8H  v24, v25, v26, v27, v28, v29, v30, v31, v2, v3
+
+        // Store the transposed 8x8 blocks horizontally.
+        cmp             x1,  #8
+        b.eq            1f
+.irp i, 16, 24, 17, 25, 18, 26, 19, 27, 20, 28, 21, 29, 22, 30, 23, 31
+        store           \i,  x0,  #16
+.endr
+        ret             x14
+1:
+        // Special case: For the last input column (x1 == 8),
+        // which would be stored as the last row in the temp buffer,
+        // don't store the first 8x8 block, but keep it in registers
+        // for the first slice of the second pass (where it is the
+        // last 8x8 block).
+.irp i, 24, 25, 26, 27, 28, 29, 30, 31
+        add             x0,  x0,  #16
+        store           \i,  x0,  #16
+.endr
+        mov             v24.16b, v16.16b
+        mov             v25.16b, v17.16b
+        mov             v26.16b, v18.16b
+        mov             v27.16b, v19.16b
+        mov             v28.16b, v20.16b
+        mov             v29.16b, v21.16b
+        mov             v30.16b, v22.16b
+        mov             v31.16b, v23.16b
+        ret             x14
+endfunc
+
+// Read a vertical 8x16 slice out of a 16x16 matrix, do a transform on it,
+// load the destination pixels (from a similar 8x16 slice), add and store back.
+// x0 = dst
+// x1 = dst stride
+// x2 = src (temp buffer)
+// x3 = slice offset
+// x9 = temp buffer stride
+function \txfm\()16_1d_8x16_pass2_neon
+        mov             x14, x30
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23
+        load            \i,  x2,  x9
+.endr
+        cbz             x3,  1f
+.irp i, 24, 25, 26, 27, 28, 29, 30, 31
+        load            \i,  x2,  x9
+.endr
+1:
+
+        add             x3,  x0,  x1
+        lsl             x1,  x1,  #1
+        bl              \txfm\()16
+
+        load_add_store  v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v16.8b, v17.8b
+        load_add_store  v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h, v16.8b, v17.8b
+
+        ret             x14
+endfunc
+.endm
+
+itxfm16_1d_funcs idct
+itxfm16_1d_funcs iadst
+
+.macro itxfm_func16x16 txfm1, txfm2
+function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1
+.ifc \txfm1\()_\txfm2,idct_idct
+        cmp             w3,  #1
+        b.eq            idct16x16_dc_add_neon
+.endif
+        mov             x15, x30
+        // iadst16 requires clobbering v8-v15, but idct16 doesn't need to.
+.ifnc \txfm1\()_\txfm2,idct_idct
+        stp             d8,  d9,  [sp, #-0x40]!
+        stp             d14, d15, [sp, #0x30]
+        stp             d12, d13, [sp, #0x20]
+        stp             d10, d11, [sp, #0x10]
+.endif
+
+        sub             sp,  sp,  #512
+
+        mov             x4,  x0
+        mov             x5,  x1
+        mov             x6,  x2
+
+        movrel          x10, idct_coeffs
+.ifnc \txfm1\()_\txfm2,idct_idct
+        movrel          x11, iadst16_coeffs
+.endif
+.ifc \txfm1,idct
+        ld1             {v0.8h,v1.8h}, [x10]
+.endif
+        mov             x9,  #32
+
+.ifc \txfm1\()_\txfm2,idct_idct
+        cmp             w3,  #10
+        b.le            idct16x16_quarter_add_neon
+        cmp             w3,  #38
+        b.le            idct16x16_half_add_neon
+.endif
+
+.irp i, 0, 8
+        add             x0,  sp,  #(\i*32)
+.ifc \txfm1\()_\txfm2,idct_idct
+.if \i == 8
+        cmp             w3,  #38
+        b.le            1f
+.endif
+.endif
+        mov             x1,  #\i
+        add             x2,  x6,  #(\i*2)
+        bl              \txfm1\()16_1d_8x16_pass1_neon
+.endr
+.ifc \txfm1\()_\txfm2,iadst_idct
+        ld1             {v0.8h,v1.8h}, [x10]
+.endif
+
+.ifc \txfm1\()_\txfm2,idct_idct
+        b               3f
+1:
+        // Set v24-v31 to zero, for the in-register passthrough of
+        // coefficients to pass 2. Since we only do two slices, this can
+        // only ever happen for the second slice. So we only need to store
+        // zeros to the temp buffer for the second half of the buffer.
+        // Move x0 to the second half, and use x9 == 32 as increment.
+        add             x0,  x0,  #16
+.irp i, 24, 25, 26, 27, 28, 29, 30, 31
+        movi_v          \i,  .16b, #0
+        st1             {v24.8h},  [x0], x9
+.endr
+3:
+.endif
+
+.irp i, 0, 8
+        add             x0,  x4,  #(\i)
+        mov             x1,  x5
+        add             x2,  sp,  #(\i*2)
+        mov             x3,  #\i
+        bl              \txfm2\()16_1d_8x16_pass2_neon
+.endr
+
+        add             sp,  sp,  #512
+.ifnc \txfm1\()_\txfm2,idct_idct
+        ldp             d10, d11, [sp, #0x10]
+        ldp             d12, d13, [sp, #0x20]
+        ldp             d14, d15, [sp, #0x30]
+        ldp             d8,  d9,  [sp], #0x40
+.endif
+        ret             x15
+endfunc
+.endm
+
+itxfm_func16x16 idct,  idct
+itxfm_func16x16 iadst, idct
+itxfm_func16x16 idct,  iadst
+itxfm_func16x16 iadst, iadst
+
+function idct16_1d_8x16_pass1_quarter_neon
+        mov             x14, x30
+        movi            v2.8h, #0
+.irp i, 16, 17, 18, 19
+        load_clear      \i,  x2,  x9
+.endr
+
+        bl              idct16_quarter
+
+        // Do two 8x8 transposes. Originally, v16-v31 contain the
+        // 16 rows. Afterwards, v16-v23 and v24-v31 contain the two
+        // transposed 8x8 blocks.
+        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v2, v3
+        transpose_8x8H  v24, v25, v26, v27, v28, v29, v30, v31, v2, v3
+
+        // Store the transposed 8x8 blocks horizontally.
+        // The first 8x8 block is kept in registers for the second pass,
+        // store the rest in the temp buffer.
+        // Since only a 4x4 part of the input was nonzero, this means that
+        // only 4 rows are nonzero after transposing, and the second pass
+        // only reads the topmost 4 rows. Therefore only store the topmost
+        // 4 rows.
+        add             x0,  x0,  #16
+.irp i, 24, 25, 26, 27
+        store           \i,  x0,  x9
+.endr
+        ret             x14
+endfunc
+
+function idct16_1d_8x16_pass2_quarter_neon
+        mov             x14, x30
+        cbz             x3,  1f
+.irp i, 16, 17, 18, 19
+        load            \i,  x2,  x9
+.endr
+1:
+
+        add             x3,  x0,  x1
+        lsl             x1,  x1,  #1
+        bl              idct16_quarter
+
+        load_add_store  v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v16.8b, v17.8b
+        load_add_store  v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h, v16.8b, v17.8b
+
+        ret             x14
+endfunc
+
+function idct16_1d_8x16_pass1_half_neon
+        mov             x14, x30
+        movi            v2.8h, #0
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23
+        load_clear      \i,  x2,  x9
+.endr
+
+        bl              idct16_half
+
+        // Do two 8x8 transposes. Originally, v16-v31 contain the
+        // 16 rows. Afterwards, v16-v23 and v24-v31 contain the two
+        // transposed 8x8 blocks.
+        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v2, v3
+        transpose_8x8H  v24, v25, v26, v27, v28, v29, v30, v31, v2, v3
+
+        // Store the transposed 8x8 blocks horizontally.
+        // The first 8x8 block is kept in registers for the second pass,
+        // store the rest in the temp buffer.
+        add             x0,  x0,  #16
+.irp i, 24, 25, 26, 27, 28, 29, 30, 31
+        store           \i,  x0,  x9
+.endr
+        ret             x14
+endfunc
+
+function idct16_1d_8x16_pass2_half_neon
+        mov             x14, x30
+        cbz             x3,  1f
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23
+        load            \i,  x2,  x9
+.endr
+1:
+
+        add             x3,  x0,  x1
+        lsl             x1,  x1,  #1
+        bl              idct16_half
+
+        load_add_store  v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v16.8b, v17.8b
+        load_add_store  v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h, v16.8b, v17.8b
+
+        ret             x14
+endfunc
+
+.macro idct16_partial size
+function idct16x16_\size\()_add_neon
+        add             x0,  sp,  #(0*32)
+        add             x2,  x6,  #(0*2)
+        bl              idct16_1d_8x16_pass1_\size\()_neon
+.irp i, 0, 8
+        add             x0,  x4,  #(\i)
+        mov             x1,  x5
+        add             x2,  sp,  #(\i*2)
+        mov             x3,  #\i
+        bl              idct16_1d_8x16_pass2_\size\()_neon
+.endr
+
+        add             sp,  sp,  #512
+        ret             x15
+endfunc
+.endm
+
+idct16_partial quarter
+idct16_partial half
+
+function idct32x32_dc_add_neon
+        movrel          x4,  idct_coeffs
+        ld1             {v0.4h}, [x4]
+
+        movi            v1.4h,  #0
+
+        ld1             {v2.h}[0], [x2]
+        smull           v2.4s,  v2.4h,  v0.h[0]
+        rshrn           v2.4h,  v2.4s,  #14
+        smull           v2.4s,  v2.4h,  v0.h[0]
+        rshrn           v2.4h,  v2.4s,  #14
+        dup             v2.8h,  v2.h[0]
+        st1             {v1.h}[0], [x2]
+
+        srshr           v0.8h,  v2.8h,  #6
+
+        mov             x3,  x0
+        mov             x4,  #32
+1:
+        // Loop to add the constant v0 into all 32x32 outputs
+        subs            x4,  x4,  #2
+        ld1             {v1.16b,v2.16b},  [x0], x1
+        uaddw           v16.8h, v0.8h,  v1.8b
+        uaddw2          v17.8h, v0.8h,  v1.16b
+        ld1             {v3.16b,v4.16b},  [x0], x1
+        uaddw           v18.8h, v0.8h,  v2.8b
+        uaddw2          v19.8h, v0.8h,  v2.16b
+        uaddw           v20.8h, v0.8h,  v3.8b
+        uaddw2          v21.8h, v0.8h,  v3.16b
+        uaddw           v22.8h, v0.8h,  v4.8b
+        uaddw2          v23.8h, v0.8h,  v4.16b
+        sqxtun          v1.8b,  v16.8h
+        sqxtun2         v1.16b, v17.8h
+        sqxtun          v2.8b,  v18.8h
+        sqxtun2         v2.16b, v19.8h
+        sqxtun          v3.8b,  v20.8h
+        sqxtun2         v3.16b, v21.8h
+        st1             {v1.16b,v2.16b},  [x3], x1
+        sqxtun          v4.8b,  v22.8h
+        sqxtun2         v4.16b, v23.8h
+        st1             {v3.16b,v4.16b},  [x3], x1
+        b.ne            1b
+
+        ret
+endfunc
+
+.macro idct32_end
+        butterfly_8h    v16, v5,  v4,  v5  // v16 = t16a, v5  = t19a
+        butterfly_8h    v17, v20, v23, v20 // v17 = t17,  v20 = t18
+        butterfly_8h    v18, v6,  v7,  v6  // v18 = t23a, v6  = t20a
+        butterfly_8h    v19, v21, v22, v21 // v19 = t22,  v21 = t21
+        butterfly_8h    v4,  v28, v28, v30 // v4  = t24a, v28 = t27a
+        butterfly_8h    v23, v26, v25, v26 // v23 = t25,  v26 = t26
+        butterfly_8h    v7,  v3,  v29, v31 // v7  = t31a, v3  = t28a
+        butterfly_8h    v22, v27, v24, v27 // v22 = t30,  v27 = t29
+
+        dmbutterfly     v27, v20, v0.h[2], v0.h[3], v24, v25, v30, v31        // v27 = t18a, v20 = t29a
+        dmbutterfly     v3,  v5,  v0.h[2], v0.h[3], v24, v25, v30, v31        // v3  = t19,  v5  = t28
+        dmbutterfly     v28, v6,  v0.h[2], v0.h[3], v24, v25, v30, v31, neg=1 // v28 = t27,  v6  = t20
+        dmbutterfly     v26, v21, v0.h[2], v0.h[3], v24, v25, v30, v31, neg=1 // v26 = t26a, v21 = t21a
+
+        butterfly_8h    v31, v24, v7,  v4  // v31 = t31,  v24 = t24
+        butterfly_8h    v30, v25, v22, v23 // v30 = t30a, v25 = t25a
+        butterfly_8h_r  v23, v16, v16, v18 // v23 = t23,  v16 = t16
+        butterfly_8h_r  v22, v17, v17, v19 // v22 = t22a, v17 = t17a
+        butterfly_8h    v18, v21, v27, v21 // v18 = t18,  v21 = t21
+        butterfly_8h_r  v27, v28, v5,  v28 // v27 = t27a, v28 = t28a
+        butterfly_8h    v29, v26, v20, v26 // v29 = t29,  v26 = t26
+        butterfly_8h    v19, v20, v3,  v6  // v19 = t19a, v20 = t20
+
+        dmbutterfly0    v27, v20, v27, v20, v2, v3, v4, v5, v6, v7 // v27 = t27,  v20 = t20
+        dmbutterfly0    v26, v21, v26, v21, v2, v3, v4, v5, v6, v7 // v26 = t26a, v21 = t21a
+        dmbutterfly0    v25, v22, v25, v22, v2, v3, v4, v5, v6, v7 // v25 = t25,  v22 = t22
+        dmbutterfly0    v24, v23, v24, v23, v2, v3, v4, v5, v6, v7 // v24 = t24a, v23 = t23a
+        ret
+.endm
+
+function idct32_odd
+        dmbutterfly     v16, v31, v8.h[0], v8.h[1], v4, v5, v6, v7 // v16 = t16a, v31 = t31a
+        dmbutterfly     v24, v23, v8.h[2], v8.h[3], v4, v5, v6, v7 // v24 = t17a, v23 = t30a
+        dmbutterfly     v20, v27, v8.h[4], v8.h[5], v4, v5, v6, v7 // v20 = t18a, v27 = t29a
+        dmbutterfly     v28, v19, v8.h[6], v8.h[7], v4, v5, v6, v7 // v28 = t19a, v19 = t28a
+        dmbutterfly     v18, v29, v9.h[0], v9.h[1], v4, v5, v6, v7 // v18 = t20a, v29 = t27a
+        dmbutterfly     v26, v21, v9.h[2], v9.h[3], v4, v5, v6, v7 // v26 = t21a, v21 = t26a
+        dmbutterfly     v22, v25, v9.h[4], v9.h[5], v4, v5, v6, v7 // v22 = t22a, v25 = t25a
+        dmbutterfly     v30, v17, v9.h[6], v9.h[7], v4, v5, v6, v7 // v30 = t23a, v17 = t24a
+
+        butterfly_8h    v4,  v24, v16, v24 // v4  = t16, v24 = t17
+        butterfly_8h    v5,  v20, v28, v20 // v5  = t19, v20 = t18
+        butterfly_8h    v6,  v26, v18, v26 // v6  = t20, v26 = t21
+        butterfly_8h    v7,  v22, v30, v22 // v7  = t23, v22 = t22
+        butterfly_8h    v28, v25, v17, v25 // v28 = t24, v25 = t25
+        butterfly_8h    v30, v21, v29, v21 // v30 = t27, v21 = t26
+        butterfly_8h    v29, v23, v31, v23 // v29 = t31, v23 = t30
+        butterfly_8h    v31, v27, v19, v27 // v31 = t28, v27 = t29
+
+        dmbutterfly     v23, v24, v0.h[4], v0.h[5], v16, v17, v18, v19        // v23 = t17a, v24 = t30a
+        dmbutterfly     v27, v20, v0.h[4], v0.h[5], v16, v17, v18, v19, neg=1 // v27 = t29a, v20 = t18a
+        dmbutterfly     v21, v26, v0.h[6], v0.h[7], v16, v17, v18, v19        // v21 = t21a, v26 = t26a
+        dmbutterfly     v25, v22, v0.h[6], v0.h[7], v16, v17, v18, v19, neg=1 // v25 = t25a, v22 = t22a
+        idct32_end
+endfunc
+
+function idct32_odd_half
+        dmbutterfly_h1  v16, v31, v8.h[0], v8.h[1], v4, v5, v6, v7 // v16 = t16a, v31 = t31a
+        dmbutterfly_h2  v24, v23, v8.h[2], v8.h[3], v4, v5, v6, v7 // v24 = t17a, v23 = t30a
+        dmbutterfly_h1  v20, v27, v8.h[4], v8.h[5], v4, v5, v6, v7 // v20 = t18a, v27 = t29a
+        dmbutterfly_h2  v28, v19, v8.h[6], v8.h[7], v4, v5, v6, v7 // v28 = t19a, v19 = t28a
+        dmbutterfly_h1  v18, v29, v9.h[0], v9.h[1], v4, v5, v6, v7 // v18 = t20a, v29 = t27a
+        dmbutterfly_h2  v26, v21, v9.h[2], v9.h[3], v4, v5, v6, v7 // v26 = t21a, v21 = t26a
+        dmbutterfly_h1  v22, v25, v9.h[4], v9.h[5], v4, v5, v6, v7 // v22 = t22a, v25 = t25a
+        dmbutterfly_h2  v30, v17, v9.h[6], v9.h[7], v4, v5, v6, v7 // v30 = t23a, v17 = t24a
+
+        butterfly_8h    v4,  v24, v16, v24 // v4  = t16, v24 = t17
+        butterfly_8h    v5,  v20, v28, v20 // v5  = t19, v20 = t18
+        butterfly_8h    v6,  v26, v18, v26 // v6  = t20, v26 = t21
+        butterfly_8h    v7,  v22, v30, v22 // v7  = t23, v22 = t22
+        butterfly_8h    v28, v25, v17, v25 // v28 = t24, v25 = t25
+        butterfly_8h    v30, v21, v29, v21 // v30 = t27, v21 = t26
+        butterfly_8h    v29, v23, v31, v23 // v29 = t31, v23 = t30
+        butterfly_8h    v31, v27, v19, v27 // v31 = t28, v27 = t29
+
+        dmbutterfly     v23, v24, v0.h[4], v0.h[5], v16, v17, v18, v19        // v23 = t17a, v24 = t30a
+        dmbutterfly     v27, v20, v0.h[4], v0.h[5], v16, v17, v18, v19, neg=1 // v27 = t29a, v20 = t18a
+        dmbutterfly     v21, v26, v0.h[6], v0.h[7], v16, v17, v18, v19        // v21 = t21a, v26 = t26a
+        dmbutterfly     v25, v22, v0.h[6], v0.h[7], v16, v17, v18, v19, neg=1 // v25 = t25a, v22 = t22a
+        idct32_end
+endfunc
+
+function idct32_odd_quarter
+        dsmull_h        v4,  v5,  v16, v8.h[0]
+        dsmull_h        v28, v29, v19, v8.h[7]
+        dsmull_h        v30, v31, v16, v8.h[1]
+        dsmull_h        v22, v23, v17, v9.h[6]
+        dsmull_h        v7,  v6,  v17, v9.h[7]
+        dsmull_h        v26, v27, v19, v8.h[6]
+        dsmull_h        v20, v21, v18, v9.h[0]
+        dsmull_h        v24, v25, v18, v9.h[1]
+
+        neg             v28.4s, v28.4s
+        neg             v29.4s, v29.4s
+        neg             v7.4s,  v7.4s
+        neg             v6.4s,  v6.4s
+
+        drshrn_h        v4,  v4,  v5,  #14
+        drshrn_h        v5,  v28, v29, #14
+        drshrn_h        v29, v30, v31, #14
+        drshrn_h        v28, v22, v23, #14
+        drshrn_h        v7,  v7,  v6,  #14
+        drshrn_h        v31, v26, v27, #14
+        drshrn_h        v6,  v20, v21, #14
+        drshrn_h        v30, v24, v25, #14
+
+        dmbutterfly_l   v16, v17, v18, v19, v29, v4,  v0.h[4], v0.h[5]
+        dmbutterfly_l   v27, v26, v20, v21, v31, v5,  v0.h[4], v0.h[5]
+        drshrn_h        v23, v16, v17, #14
+        drshrn_h        v24, v18, v19, #14
+        neg             v20.4s, v20.4s
+        neg             v21.4s, v21.4s
+        drshrn_h        v27, v27, v26, #14
+        drshrn_h        v20, v20, v21, #14
+        dmbutterfly_l   v16, v17, v18, v19, v30, v6,  v0.h[6], v0.h[7]
+        drshrn_h        v21, v16, v17, #14
+        drshrn_h        v26, v18, v19, #14
+        dmbutterfly_l   v16, v17, v18, v19, v28, v7,  v0.h[6], v0.h[7]
+        drshrn_h        v25, v16, v17, #14
+        neg             v18.4s, v18.4s
+        neg             v19.4s, v19.4s
+        drshrn_h        v22, v18, v19, #14
+
+        idct32_end
+endfunc
+
+.macro idct32_funcs suffix
+// Do an 32-point IDCT of a 8x32 slice out of a 32x32 matrix.
+// The 32-point IDCT can be decomposed into two 16-point IDCTs;
+// a normal IDCT16 with every other input component (the even ones, with
+// each output written twice), followed by a separate 16-point IDCT
+// of the odd inputs, added/subtracted onto the outputs of the first idct16.
+// x0 = dst (temp buffer)
+// x1 = unused
+// x2 = src
+// x9 = double input stride
+function idct32_1d_8x32_pass1\suffix\()_neon
+        mov             x14, x30
+        movi            v2.8h,  #0
+
+        // v16 = IN(0), v17 = IN(2) ... v31 = IN(30)
+.ifb \suffix
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        load_clear      \i, x2, x9
+.endr
+.endif
+.ifc \suffix,_quarter
+.irp i, 16, 17, 18, 19
+        load_clear      \i, x2, x9
+.endr
+.endif
+.ifc \suffix,_half
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23
+        load_clear      \i, x2, x9
+.endr
+.endif
+
+        bl              idct16\suffix
+
+        // Do two 8x8 transposes. Originally, v16-v31 contain the
+        // 16 rows. Afterwards, v16-v23 and v24-v31 contain the
+        // two transposed 8x8 blocks.
+        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v2, v3
+        transpose_8x8H  v24, v25, v26, v27, v28, v29, v30, v31, v2, v3
+
+        // Store the registers a, b horizontally, followed by the
+        // same registers b, a mirrored.
+.macro store_rev a, b
+        // There's no rev128 instruction, but we reverse each 64 bit
+        // half, and then flip them using an ext with 8 bytes offset.
+        rev64           v3.8h, \b
+        st1             {\a},  [x0], #16
+        rev64           v2.8h, \a
+        ext             v3.16b, v3.16b, v3.16b, #8
+        st1             {\b},  [x0], #16
+        ext             v2.16b, v2.16b, v2.16b, #8
+        st1             {v3.8h},  [x0], #16
+        st1             {v2.8h},  [x0], #16
+.endm
+        store_rev       v16.8h, v24.8h
+        store_rev       v17.8h, v25.8h
+        store_rev       v18.8h, v26.8h
+        store_rev       v19.8h, v27.8h
+        store_rev       v20.8h, v28.8h
+        store_rev       v21.8h, v29.8h
+        store_rev       v22.8h, v30.8h
+        store_rev       v23.8h, v31.8h
+        sub             x0,  x0,  #512
+.purgem store_rev
+
+        // Move x2 back to the start of the input, and move
+        // to the first odd row
+.ifb \suffix
+        sub             x2,  x2,  x9, lsl #4
+.endif
+.ifc \suffix,_quarter
+        sub             x2,  x2,  x9, lsl #2
+.endif
+.ifc \suffix,_half
+        sub             x2,  x2,  x9, lsl #3
+.endif
+        add             x2,  x2,  #64
+
+        movi            v2.8h,  #0
+        // v16 = IN(1), v17 = IN(3) ... v31 = IN(31)
+.ifb \suffix
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        load_clear      \i, x2, x9
+.endr
+.endif
+.ifc \suffix,_quarter
+.irp i, 16, 17, 18, 19
+        load_clear      \i, x2, x9
+.endr
+.endif
+.ifc \suffix,_half
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23
+        load_clear      \i, x2, x9
+.endr
+.endif
+
+        bl              idct32_odd\suffix
+
+        transpose_8x8H  v31, v30, v29, v28, v27, v26, v25, v24, v2, v3
+        transpose_8x8H  v23, v22, v21, v20, v19, v18, v17, v16, v2, v3
+
+        // Store the registers a, b horizontally,
+        // adding into the output first, and the mirrored,
+        // subtracted from the output.
+.macro store_rev a, b
+        ld1             {v4.8h},  [x0]
+        rev64           v3.8h, \b
+        add             v4.8h, v4.8h, \a
+        rev64           v2.8h, \a
+        st1             {v4.8h},  [x0], #16
+        ext             v3.16b, v3.16b, v3.16b, #8
+        ld1             {v5.8h},  [x0]
+        ext             v2.16b, v2.16b, v2.16b, #8
+        add             v5.8h, v5.8h, \b
+        st1             {v5.8h},  [x0], #16
+        ld1             {v6.8h},  [x0]
+        sub             v6.8h, v6.8h, v3.8h
+        st1             {v6.8h},  [x0], #16
+        ld1             {v7.8h},  [x0]
+        sub             v7.8h, v7.8h, v2.8h
+        st1             {v7.8h},  [x0], #16
+.endm
+
+        store_rev       v31.8h, v23.8h
+        store_rev       v30.8h, v22.8h
+        store_rev       v29.8h, v21.8h
+        store_rev       v28.8h, v20.8h
+        store_rev       v27.8h, v19.8h
+        store_rev       v26.8h, v18.8h
+        store_rev       v25.8h, v17.8h
+        store_rev       v24.8h, v16.8h
+.purgem store_rev
+        ret             x14
+endfunc
+
+// This is mostly the same as 8x32_pass1, but without the transpose,
+// and use the source as temp buffer between the two idct passes, and
+// add into the destination.
+// x0 = dst
+// x1 = dst stride
+// x2 = src (temp buffer)
+// x7 = negative double temp buffer stride
+// x9 = double temp buffer stride
+function idct32_1d_8x32_pass2\suffix\()_neon
+        mov             x14, x30
+        // v16 = IN(0), v17 = IN(2) ... v31 = IN(30)
+.ifb \suffix
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        load            \i, x2, x9
+.endr
+        sub             x2,  x2,  x9, lsl #4
+.endif
+.ifc \suffix,_quarter
+.irp i, 16, 17, 18, 19
+        load            \i, x2, x9
+.endr
+        sub             x2,  x2,  x9, lsl #2
+.endif
+.ifc \suffix,_half
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23
+        load            \i, x2, x9
+.endr
+        sub             x2,  x2,  x9, lsl #3
+.endif
+
+        bl              idct16\suffix
+
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        store           \i, x2, x9
+.endr
+
+        sub             x2,  x2,  x9, lsl #4
+        add             x2,  x2,  #64
+
+        // v16 = IN(1), v17 = IN(3) ... v31 = IN(31)
+.ifb \suffix
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        load            \i, x2, x9
+.endr
+        sub             x2,  x2,  x9, lsl #4
+.endif
+.ifc \suffix,_quarter
+.irp i, 16, 17, 18, 19
+        load            \i, x2, x9
+.endr
+        sub             x2,  x2,  x9, lsl #2
+.endif
+.ifc \suffix,_half
+.irp i, 16, 17, 18, 19, 20, 21, 22, 23
+        load            \i, x2, x9
+.endr
+        sub             x2,  x2,  x9, lsl #3
+.endif
+        sub             x2,  x2,  #64
+
+        bl              idct32_odd\suffix
+
+.macro load_acc_store a, b, c, d, neg=0
+.if \neg == 0
+        ld1             {v4.8h},  [x2], x9
+        ld1             {v5.8h},  [x2], x9
+        add             v4.8h, v4.8h, \a
+        ld1             {v6.8h},  [x2], x9
+        add             v5.8h, v5.8h, \b
+        ld1             {v7.8h},  [x2], x9
+        add             v6.8h, v6.8h, \c
+        add             v7.8h, v7.8h, \d
+.else
+        ld1             {v4.8h},  [x2], x7
+        ld1             {v5.8h},  [x2], x7
+        sub             v4.8h, v4.8h, \a
+        ld1             {v6.8h},  [x2], x7
+        sub             v5.8h, v5.8h, \b
+        ld1             {v7.8h},  [x2], x7
+        sub             v6.8h, v6.8h, \c
+        sub             v7.8h, v7.8h, \d
+.endif
+        ld1             {v10.8b}, [x0], x1
+        ld1             {v11.8b}, [x0], x1
+        srshr           v4.8h, v4.8h, #6
+        ld1             {v2.8b}, [x0], x1
+        srshr           v5.8h, v5.8h, #6
+        uaddw           v4.8h, v4.8h, v10.8b
+        ld1             {v3.8b}, [x0], x1
+        srshr           v6.8h, v6.8h, #6
+        uaddw           v5.8h, v5.8h, v11.8b
+        srshr           v7.8h, v7.8h, #6
+        sub             x0,  x0,  x1, lsl #2
+        uaddw           v6.8h, v6.8h, v2.8b
+        sqxtun          v4.8b, v4.8h
+        uaddw           v7.8h, v7.8h, v3.8b
+        sqxtun          v5.8b, v5.8h
+        st1             {v4.8b}, [x0], x1
+        sqxtun          v6.8b, v6.8h
+        st1             {v5.8b}, [x0], x1
+        sqxtun          v7.8b, v7.8h
+        st1             {v6.8b}, [x0], x1
+        st1             {v7.8b}, [x0], x1
+.endm
+        load_acc_store  v31.8h, v30.8h, v29.8h, v28.8h
+        load_acc_store  v27.8h, v26.8h, v25.8h, v24.8h
+        load_acc_store  v23.8h, v22.8h, v21.8h, v20.8h
+        load_acc_store  v19.8h, v18.8h, v17.8h, v16.8h
+        sub             x2,  x2,  x9
+        load_acc_store  v16.8h, v17.8h, v18.8h, v19.8h, 1
+        load_acc_store  v20.8h, v21.8h, v22.8h, v23.8h, 1
+        load_acc_store  v24.8h, v25.8h, v26.8h, v27.8h, 1
+        load_acc_store  v28.8h, v29.8h, v30.8h, v31.8h, 1
+.purgem load_acc_store
+        ret             x14
+endfunc
+.endm
+
+idct32_funcs
+idct32_funcs _quarter
+idct32_funcs _half
+
+const min_eob_idct_idct_32, align=4
+        .short  0, 34, 135, 336
+endconst
+
+function ff_vp9_idct_idct_32x32_add_neon, export=1
+        cmp             w3,  #1
+        b.eq            idct32x32_dc_add_neon
+
+        movrel          x10, idct_coeffs
+
+        mov             x15, x30
+
+        stp             d8,  d9,  [sp, #-0x20]!
+        stp             d10, d11, [sp, #0x10]
+
+        sub             sp,  sp,  #2048
+
+        mov             x4,  x0
+        mov             x5,  x1
+        mov             x6,  x2
+
+        // Double stride of the input, since we only read every other line
+        mov             x9,  #128
+        neg             x7,  x9
+
+        ld1             {v0.8h,v1.8h}, [x10], #32
+        ld1             {v8.8h,v9.8h}, [x10]
+
+        cmp             w3,  #34
+        b.le            idct32x32_quarter_add_neon
+        cmp             w3,  #135
+        b.le            idct32x32_half_add_neon
+
+        movrel          x12, min_eob_idct_idct_32, 2
+
+.irp i, 0, 8, 16, 24
+        add             x0,  sp,  #(\i*64)
+.if \i > 0
+        ldrh            w1,  [x12], #2
+        cmp             w3,  w1
+        mov             x1,  #(32 - \i)/4
+        b.le            1f
+.endif
+        add             x2,  x6,  #(\i*2)
+        bl              idct32_1d_8x32_pass1_neon
+.endr
+        b               3f
+
+1:
+        // Write zeros to the temp buffer for pass 2
+        movi            v16.8h,  #0
+        movi            v17.8h,  #0
+        movi            v18.8h,  #0
+        movi            v19.8h,  #0
+2:
+        subs            x1,  x1,  #1
+.rept 4
+        st1             {v16.8h,v17.8h,v18.8h,v19.8h},  [x0], #64
+.endr
+        b.ne            2b
+3:
+.irp i, 0, 8, 16, 24
+        add             x0,  x4,  #(\i)
+        mov             x1,  x5
+        add             x2,  sp,  #(\i*2)
+        bl              idct32_1d_8x32_pass2_neon
+.endr
+
+        add             sp,  sp,  #2048
+
+        ldp             d10, d11, [sp, #0x10]
+        ldp             d8,  d9,  [sp], #0x20
+
+        ret             x15
+endfunc
+
+.macro idct32_partial size
+function idct32x32_\size\()_add_neon
+        add             x0,  sp,  #(0*64)
+        add             x2,  x6,  #(0*2)
+        bl              idct32_1d_8x32_pass1_\size\()_neon
+.ifc \size,half
+        add             x0,  sp,  #(8*64)
+        add             x2,  x6,  #(8*2)
+        bl              idct32_1d_8x32_pass1_\size\()_neon
+.endif
+.irp i, 0, 8, 16, 24
+        add             x0,  x4,  #(\i)
+        mov             x1,  x5
+        add             x2,  sp,  #(\i*2)
+        bl              idct32_1d_8x32_pass2_\size\()_neon
+.endr
+
+        add             sp,  sp,  #2048
+
+        ldp             d10, d11, [sp, #0x10]
+        ldp             d8,  d9,  [sp], #0x20
+
+        ret             x15
+endfunc
+.endm
+
+idct32_partial quarter
+idct32_partial half
diff --git a/media/ffvpx/libavcodec/aarch64/vp9lpf_16bpp_neon.S b/media/ffvpx/libavcodec/aarch64/vp9lpf_16bpp_neon.S
new file mode 100644
index 0000000000..e3e70491c6
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp9lpf_16bpp_neon.S
@@ -0,0 +1,861 @@
+/*
+ * Copyright (c) 2017 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+#include "neon.S"
+
+
+// The input to and output from this macro is in the registers v16-v31,
+// and v0-v7 are used as scratch registers.
+// p7 = v16 .. p3 = v20, p0 = v23, q0 = v24, q3 = v27, q7 = v31
+// Depending on the width of the loop filter, we either use v16-v19
+// and v28-v31 as temp registers, or v8-v15.
+.macro loop_filter wd, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8
+        dup             v0.8h,  w2                   // E
+        dup             v2.8h,  w3                   // I
+        dup             v3.8h,  w4                   // H
+
+        uabd            v4.8h,  v20.8h, v21.8h       // abs(p3 - p2)
+        uabd            v5.8h,  v21.8h, v22.8h       // abs(p2 - p1)
+        uabd            v6.8h,  v22.8h, v23.8h       // abs(p1 - p0)
+        uabd            v7.8h,  v24.8h, v25.8h       // abs(q0 - q1)
+        uabd            \tmp1\().8h,  v25.8h, v26.8h // abs(q1 - q2)
+        uabd            \tmp2\().8h,  v26.8h, v27.8h // abs(q2 - q3)
+        umax            v4.8h,  v4.8h,  v5.8h
+        umax            v5.8h,  v6.8h,  v7.8h
+        umax            \tmp1\().8h,  \tmp1\().8h, \tmp2\().8h
+        uabd            v6.8h,  v23.8h, v24.8h       // abs(p0 - q0)
+        umax            v4.8h,  v4.8h,  v5.8h
+        add             v6.8h,  v6.8h,  v6.8h        // abs(p0 - q0) * 2
+        uabd            v5.8h,  v22.8h, v25.8h       // abs(p1 - q1)
+        umax            v4.8h,  v4.8h,  \tmp1\().8h  // max(abs(p3 - p2), ..., abs(q2 - q3))
+        ushr            v5.8h,  v5.8h,  #1
+        cmhs            v4.8h,  v2.8h,  v4.8h        // max(abs()) <= I
+        add             v6.8h,  v6.8h,  v5.8h        // abs(p0 - q0) * 2 + abs(p1 - q1) >> 1
+        cmhs            v6.8h,  v0.8h,  v6.8h
+        and             v4.16b, v4.16b, v6.16b       // fm
+
+        // If no pixels need filtering, just exit as soon as possible
+        mov             x11, v4.d[0]
+        mov             x12, v4.d[1]
+        adds            x11, x11, x12
+        b.ne            1f
+        ret             x10
+1:
+
+.if \wd >= 8
+        dup             v0.8h,  w5
+
+        uabd            v6.8h,  v20.8h, v23.8h       // abs(p3 - p0)
+        uabd            v2.8h,  v21.8h, v23.8h       // abs(p2 - p0)
+        uabd            v1.8h,  v22.8h, v23.8h       // abs(p1 - p0)
+        uabd            \tmp1\().8h,  v25.8h, v24.8h // abs(q1 - q0)
+        uabd            \tmp2\().8h,  v26.8h, v24.8h // abs(q2 - q0)
+        uabd            \tmp3\().8h,  v27.8h, v24.8h // abs(q3 - q0)
+        umax            v6.8h,  v6.8h,  v2.8h
+        umax            v1.8h,  v1.8h,  \tmp1\().8h
+        umax            \tmp2\().8h,  \tmp2\().8h,  \tmp3\().8h
+.if \wd == 16
+        uabd            v7.8h,  v16.8h, v23.8h       // abs(p7 - p0)
+        umax            v6.8h,  v6.8h,  v1.8h
+        uabd            v2.8h,  v17.8h, v23.8h       // abs(p6 - p0)
+        umax            v6.8h,  v6.8h,  \tmp2\().8h
+        uabd            v1.8h,  v18.8h, v23.8h       // abs(p5 - p0)
+        cmhs            v6.8h,  v0.8h,  v6.8h        // flat8in
+        uabd            v8.8h,  v19.8h, v23.8h       // abs(p4 - p0)
+        and             v6.16b, v6.16b, v4.16b       // flat8in && fm
+        uabd            v9.8h,  v28.8h, v24.8h       // abs(q4 - q0)
+        bic             v4.16b, v4.16b, v6.16b       // fm && !flat8in
+        uabd            v10.8h, v29.8h, v24.8h       // abs(q5 - q0)
+        uabd            v11.8h, v30.8h, v24.8h       // abs(q6 - q0)
+        uabd            v12.8h, v31.8h, v24.8h       // abs(q7 - q0)
+
+        umax            v7.8h,  v7.8h,  v2.8h
+        umax            v1.8h,  v1.8h,  v8.8h
+        umax            v9.8h,  v9.8h,  v10.8h
+        umax            v11.8h, v11.8h, v12.8h
+        // The rest of the calculation of flat8out is interleaved below
+.else
+        // The rest of the calculation of flat8in is interleaved below
+.endif
+.endif
+
+        // Calculate the normal inner loop filter for 2 or 4 pixels
+        uabd            v5.8h,  v22.8h, v23.8h                  // abs(p1 - p0)
+.if \wd == 16
+        umax            v7.8h,  v7.8h,  v1.8h
+        umax            v9.8h,  v9.8h,  v11.8h
+.elseif \wd == 8
+        umax            v6.8h,  v6.8h,  v1.8h
+.endif
+        uabd            v1.8h,  v25.8h, v24.8h                  // abs(q1 - q0)
+.if \wd == 16
+        umax            v7.8h,  v7.8h,  v9.8h
+.elseif \wd == 8
+        umax            v6.8h,  v6.8h,  \tmp2\().8h
+.endif
+        dup             \tmp2\().8h,  w6                        // left shift for saturation
+        sub             \tmp1\().8h,  v22.8h,  v25.8h           // p1 - q1
+        neg             \tmp6\().8h,  \tmp2\().8h               // negative left shift after saturation
+        umax            v5.8h,  v5.8h,  v1.8h                   // max(abs(p1 - p0), abs(q1 - q0))
+        sub             \tmp3\().8h,  v24.8h,  v23.8h           // q0 - p0
+        movi            \tmp5\().8h,  #3
+.if \wd == 8
+        cmhs            v6.8h,  v0.8h,  v6.8h                   // flat8in
+.endif
+        cmhs            v5.8h,  v3.8h,  v5.8h                   // !hev
+.if \wd == 8
+        and             v6.16b, v6.16b, v4.16b                  // flat8in && fm
+.endif
+        sqshl           \tmp1\().8h,  \tmp1\().8h,  \tmp2\().8h
+.if \wd == 16
+        cmhs            v7.8h,  v0.8h,  v7.8h                   // flat8out
+.elseif \wd == 8
+        bic             v4.16b, v4.16b, v6.16b                  // fm && !flat8in
+.endif
+        and             v5.16b,  v5.16b,  v4.16b                // !hev && fm && !flat8in
+.if \wd == 16
+        and             v7.16b, v7.16b, v6.16b                  // flat8out && flat8in && fm
+.endif
+        sshl            \tmp1\().8h,  \tmp1\().8h,  \tmp6\().8h // av_clip_int2p(p1 - q1, BIT_DEPTH - 1)
+
+        mul             \tmp3\().8h,  \tmp3\().8h,  \tmp5\().8h // 3 * (q0 - p0)
+        bic             \tmp1\().16b, \tmp1\().16b, v5.16b      // if (!hev) av_clip_int8 = 0
+        movi            v2.8h,  #4
+        add             \tmp3\().8h,  \tmp3\().8h,  \tmp1\().8h // 3 * (q0 - p0) [+ av_clip_int8(p1 - q1)]
+        movi            v3.8h,  #3
+        sqshl           \tmp1\().8h,  \tmp3\().8h,  \tmp2\().8h
+        movi            \tmp5\().8h,  #0
+        sshl            \tmp1\().8h,  \tmp1\().8h,  \tmp6\().8h // av_clip_int2p(3 * (q0 - p0) [+ av_clip_int2p(p1 - q1)], BIT_DEPTH - 1) = f
+        dup             \tmp6\().8h,  w7                        // max pixel value
+.if \wd == 16
+        bic             v6.16b, v6.16b, v7.16b                  // fm && flat8in && !flat8out
+.endif
+
+        ushr            \tmp2\().8h,  \tmp6\().8h,  #1          // (1 << (BIT_DEPTH - 1)) - 1
+
+        add             \tmp3\().8h,  \tmp1\().8h,  v2.8h       // f + 4
+        add             \tmp4\().8h,  \tmp1\().8h,  v3.8h       // f + 3
+        smin            \tmp3\().8h,  \tmp3\().8h,  \tmp2\().8h // FFMIN(f + 4, (1 << (BIT_DEPTH - 1)) - 1)
+        smin            \tmp4\().8h,  \tmp4\().8h,  \tmp2\().8h // FFMIN(f + 3, (1 << (BIT_DEPTH - 1)) - 1)
+        sshr            \tmp3\().8h,  \tmp3\().8h,  #3          // f1
+        sshr            \tmp4\().8h,  \tmp4\().8h,  #3          // f2
+
+        add             v0.8h,   v23.8h,  \tmp4\().8h           // p0 + f2
+        sub             v2.8h,   v24.8h,  \tmp3\().8h           // q0 - f1
+        smin            v0.8h,   v0.8h,   \tmp6\().8h
+        smin            v2.8h,   v2.8h,   \tmp6\().8h
+        srshr           \tmp3\().8h, \tmp3\().8h, #1            // f = (f1 + 1) >> 1
+        smax            v0.8h,   v0.8h,   \tmp5\().8h           // out p0
+        smax            v2.8h,   v2.8h,   \tmp5\().8h           // out q0
+        bit             v23.16b, v0.16b,  v4.16b                // if (fm && !flat8in)
+        bit             v24.16b, v2.16b,  v4.16b
+
+        add             v0.8h,  v22.8h,  \tmp3\().8h            // p1 + f
+        sub             v2.8h,  v25.8h,  \tmp3\().8h            // q1 - f
+.if \wd >= 8
+        mov             x11, v6.d[0]
+.endif
+        smin            v0.8h,  v0.8h,  \tmp6\().8h
+        smin            v2.8h,  v2.8h,  \tmp6\().8h
+.if \wd >= 8
+        mov             x12, v6.d[1]
+.endif
+        smax            v0.8h,  v0.8h,  \tmp5\().8h             // out p1
+        smax            v2.8h,  v2.8h,  \tmp5\().8h             // out q1
+.if \wd >= 8
+        adds            x11, x11, x12
+.endif
+        bit             v22.16b, v0.16b,  v5.16b                // if (!hev && fm && !flat8in)
+        bit             v25.16b, v2.16b,  v5.16b
+
+        // If no pixels need flat8in, jump to flat8out
+        // (or to a writeout of the inner 4 pixels, for wd=8)
+.if \wd >= 8
+.if \wd == 16
+        b.eq            6f
+.else
+        b.ne            1f
+        ret             x13
+1:
+.endif
+
+        // flat8in
+        add             \tmp1\().8h, v20.8h, v21.8h
+        add             \tmp3\().8h, v22.8h, v25.8h
+        add             \tmp5\().8h, v20.8h, v22.8h
+        add             \tmp7\().8h, v23.8h, v26.8h
+        add             v0.8h,  \tmp1\().8h, \tmp1\().8h
+        add             v0.8h,  v0.8h,  v23.8h
+        add             v0.8h,  v0.8h,  v24.8h
+        add             v0.8h,  v0.8h,  \tmp5\().8h
+        sub             \tmp3\().8h, \tmp3\().8h, \tmp1\().8h
+        sub             \tmp7\().8h, \tmp7\().8h, \tmp5\().8h
+        urshr           v2.8h,  v0.8h,  #3                      // out p2
+
+        add             v0.8h,  v0.8h,  \tmp3\().8h
+        add             \tmp1\().8h, v20.8h,  v23.8h
+        add             \tmp3\().8h, v24.8h,  v27.8h
+        urshr           v3.8h,  v0.8h,  #3                      // out p1
+
+        add             v0.8h,  v0.8h,  \tmp7\().8h
+        sub             \tmp3\().8h, \tmp3\().8h, \tmp1\().8h
+        add             \tmp5\().8h, v21.8h,  v24.8h
+        add             \tmp7\().8h, v25.8h,  v27.8h
+        urshr           v4.8h,  v0.8h,  #3                      // out p0
+
+        add             v0.8h,  v0.8h,  \tmp3\().8h
+        sub             \tmp7\().8h, \tmp7\().8h, \tmp5\().8h
+        add             \tmp1\().8h, v22.8h,  v25.8h
+        add             \tmp3\().8h, v26.8h,  v27.8h
+        urshr           v5.8h,  v0.8h,  #3                      // out q0
+
+        add             v0.8h,  v0.8h,  \tmp7\().8h
+        sub             \tmp3\().8h, \tmp3\().8h, \tmp1\().8h
+        urshr           \tmp5\().8h, v0.8h,  #3                 // out q1
+
+        add             v0.8h,  v0.8h,  \tmp3\().8h
+        // The output here is written back into the input registers. This doesn't
+        // matter for the flat8part below, since we only update those pixels
+        // which won't be touched below.
+        bit             v21.16b, v2.16b,  v6.16b
+        bit             v22.16b, v3.16b,  v6.16b
+        bit             v23.16b, v4.16b,  v6.16b
+        urshr           \tmp6\().8h,  v0.8h,  #3                // out q2
+        bit             v24.16b, v5.16b,  v6.16b
+        bit             v25.16b, \tmp5\().16b,  v6.16b
+        bit             v26.16b, \tmp6\().16b,  v6.16b
+.endif
+.if \wd == 16
+6:
+        orr             v2.16b,  v6.16b,  v7.16b
+        mov             x11, v2.d[0]
+        mov             x12, v2.d[1]
+        adds            x11, x11, x12
+        b.ne            1f
+        // If no pixels needed flat8in nor flat8out, jump to a
+        // writeout of the inner 4 pixels
+        ret             x14
+1:
+
+        mov             x11, v7.d[0]
+        mov             x12, v7.d[1]
+        adds            x11, x11, x12
+        b.ne            1f
+        // If no pixels need flat8out, jump to a writeout of the inner 6 pixels
+        ret             x15
+
+1:
+        // flat8out
+        // This writes all outputs into v2-v17 (skipping v6 and v16).
+        // If this part is skipped, the output is read from v21-v26 (which is the input
+        // to this section).
+        shl             v0.8h,   v16.8h,  #3     // 8 * v16
+        sub             v0.8h,   v0.8h,   v16.8h // 7 * v16
+        add             v0.8h,   v0.8h,   v17.8h
+        add             v8.8h,   v17.8h,  v18.8h
+        add             v10.8h,  v19.8h,  v20.8h
+        add             v0.8h,   v0.8h,   v8.8h
+        add             v8.8h,   v16.8h,  v17.8h
+        add             v12.8h,  v21.8h,  v22.8h
+        add             v0.8h,   v0.8h,   v10.8h
+        add             v10.8h,  v18.8h,  v25.8h
+        add             v14.8h,  v23.8h,  v24.8h
+        sub             v10.8h,  v10.8h,  v8.8h
+        add             v0.8h,   v0.8h,   v12.8h
+        add             v0.8h,   v0.8h,   v14.8h
+        add             v12.8h,  v16.8h,  v18.8h
+        add             v14.8h,  v19.8h,  v26.8h
+        urshr           v2.8h,   v0.8h,   #4
+
+        add             v0.8h,   v0.8h,   v10.8h
+        add             v8.8h,   v16.8h,  v19.8h
+        add             v10.8h,  v20.8h,  v27.8h
+        sub             v14.8h,  v14.8h,  v12.8h
+        bif             v2.16b,  v17.16b, v7.16b
+        urshr           v3.8h ,  v0.8h,   #4
+
+        add             v0.8h,   v0.8h,   v14.8h
+        add             v12.8h,  v16.8h,  v20.8h
+        add             v14.8h,  v21.8h,  v28.8h
+        sub             v10.8h,  v10.8h,  v8.8h
+        bif             v3.16b,  v18.16b, v7.16b
+        urshr           v4.8h,   v0.8h,   #4
+
+        add             v0.8h,   v0.8h,   v10.8h
+        add             v8.8h,   v16.8h,  v21.8h
+        add             v10.8h,  v22.8h,  v29.8h
+        sub             v14.8h,  v14.8h,  v12.8h
+        bif             v4.16b,  v19.16b, v7.16b
+        urshr           v5.8h,   v0.8h,   #4
+
+        add             v0.8h,   v0.8h,   v14.8h
+        add             v12.8h,  v16.8h,  v22.8h
+        add             v14.8h,  v23.8h,  v30.8h
+        sub             v10.8h,  v10.8h,  v8.8h
+        bif             v5.16b,  v20.16b, v7.16b
+        urshr           v6.8h,   v0.8h,   #4
+
+        add             v0.8h,   v0.8h,   v10.8h
+        add             v10.8h,  v16.8h,  v23.8h
+        sub             v14.8h,  v14.8h,  v12.8h
+        add             v12.8h,  v24.8h,  v31.8h
+        bif             v6.16b,  v21.16b, v7.16b
+        urshr           v8.8h,   v0.8h,   #4
+
+        add             v0.8h,   v0.8h,   v14.8h
+        sub             v10.8h,  v12.8h,  v10.8h
+        add             v12.8h,  v17.8h,  v24.8h
+        add             v14.8h,  v25.8h,  v31.8h
+        bif             v8.16b,  v22.16b, v7.16b
+        urshr           v9.8h,   v0.8h,   #4
+
+        add             v0.8h,   v0.8h,   v10.8h
+        sub             v14.8h,  v14.8h,  v12.8h
+        add             v12.8h,  v26.8h,  v31.8h
+        bif             v9.16b,  v23.16b, v7.16b
+        urshr           v10.8h,  v0.8h,   #4
+
+        add             v0.8h,   v0.8h,   v14.8h
+        add             v14.8h,  v18.8h,  v25.8h
+        add             v18.8h,  v19.8h,  v26.8h
+        sub             v12.8h,  v12.8h,  v14.8h
+        add             v14.8h,  v27.8h,  v31.8h
+        bif             v10.16b, v24.16b, v7.16b
+        urshr           v11.8h,  v0.8h,   #4
+
+        add             v0.8h,   v0.8h,   v12.8h
+        add             v12.8h,  v20.8h,  v27.8h
+        sub             v14.8h,  v14.8h,  v18.8h
+        add             v18.8h,  v28.8h,  v31.8h
+        bif             v11.16b, v25.16b, v7.16b
+        sub             v18.8h,  v18.8h,  v12.8h
+        urshr           v12.8h,  v0.8h,   #4
+
+        add             v0.8h,   v0.8h,   v14.8h
+        add             v14.8h,  v21.8h,  v28.8h
+        add             v20.8h,  v29.8h,  v31.8h
+        bif             v12.16b, v26.16b, v7.16b
+        urshr           v13.8h,  v0.8h,   #4
+
+        add             v0.8h,   v0.8h,   v18.8h
+        sub             v20.8h,  v20.8h,  v14.8h
+        add             v18.8h,  v22.8h,  v29.8h
+        add             v22.8h,  v30.8h,  v31.8h
+        bif             v13.16b, v27.16b, v7.16b
+        urshr           v14.8h,  v0.8h,   #4
+
+        add             v0.8h,   v0.8h,   v20.8h
+        sub             v22.8h,  v22.8h,  v18.8h
+        bif             v14.16b, v28.16b, v7.16b
+        urshr           v15.8h,  v0.8h,   #4
+
+        add             v0.8h,   v0.8h,   v22.8h
+        bif             v15.16b, v29.16b, v7.16b
+        urshr           v17.8h,  v0.8h,   #4
+        bif             v17.16b, v30.16b, v7.16b
+.endif
+.endm
+
+// For wd <= 8, we use v16-v19 and v28-v31 for temp registers,
+// while we need those for inputs/outputs in wd=16 and use v8-v15
+// for temp registers there instead.
+function vp9_loop_filter_4
+        loop_filter     4,  v16, v17, v18, v19, v28, v29, v30, v31
+        ret
+endfunc
+
+function vp9_loop_filter_8
+        loop_filter     8,  v16, v17, v18, v19, v28, v29, v30, v31
+        ret
+endfunc
+
+function vp9_loop_filter_16
+        loop_filter     16, v8,  v9,  v10, v11, v12, v13, v14, v15
+        ret
+endfunc
+
+.macro loop_filter_4
+        bl              vp9_loop_filter_4
+.endm
+
+.macro loop_filter_8
+        // calculate alternative 'return' targets
+        adr             x13, 6f
+        bl              vp9_loop_filter_8
+.endm
+
+.macro loop_filter_16
+        // calculate alternative 'return' targets
+        adr             x14, 7f
+        adr             x15, 8f
+        bl              vp9_loop_filter_16
+.endm
+
+
+// The public functions in this file have got the following signature:
+// void loop_filter(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr);
+
+.macro bpp_frontend func, bpp, push
+function ff_\func\()_\bpp\()_neon, export=1
+.if \push
+        mov             x16, x30
+        stp             d8,  d9,  [sp, #-0x40]!
+        stp             d14, d15, [sp, #0x30]
+        stp             d12, d13, [sp, #0x20]
+        stp             d10, d11, [sp, #0x10]
+.endif
+        lsl             w2,  w2,  #\bpp - 8
+        lsl             w3,  w3,  #\bpp - 8
+        lsl             w4,  w4,  #\bpp - 8
+        mov             x5,  #1 << (\bpp - 8)
+        mov             x6,  #16 - \bpp
+        mov             x7,  #((1 << \bpp) - 1)
+.if \push
+        bl              \func\()_16_neon
+        ldp             d10, d11, [sp, #0x10]
+        ldp             d12, d13, [sp, #0x20]
+        ldp             d14, d15, [sp, #0x30]
+        ldp             d8,  d9,  [sp], #0x40
+        ret             x16
+.else
+        b               \func\()_16_neon
+.endif
+endfunc
+.endm
+
+.macro bpp_frontends func, push=0
+        bpp_frontend    \func, 10, \push
+        bpp_frontend    \func, 12, \push
+.endm
+
+.macro bpp_frontend_rep func, suffix, int_suffix, dir, bpp, push
+function ff_\func\()_\suffix\()_\bpp\()_neon, export=1
+        mov             x16, x30
+.if \push
+        stp             d8,  d9,  [sp, #-0x40]!
+        stp             d14, d15, [sp, #0x30]
+        stp             d12, d13, [sp, #0x20]
+        stp             d10, d11, [sp, #0x10]
+.endif
+        lsl             w2,  w2,  #\bpp - 8
+        lsl             w3,  w3,  #\bpp - 8
+        lsl             w4,  w4,  #\bpp - 8
+        mov             x5,  #1 << (\bpp - 8)
+        mov             x6,  #16 - \bpp
+        mov             x7,  #((1 << \bpp) - 1)
+        bl              \func\()_\int_suffix\()_16_neon
+.ifc \dir,h
+        add             x0,  x0,  x1, lsl #3
+.else
+        add             x0,  x0,  #16
+.endif
+        bl              \func\()_\int_suffix\()_16_neon
+.if \push
+        ldp             d10, d11, [sp, #0x10]
+        ldp             d12, d13, [sp, #0x20]
+        ldp             d14, d15, [sp, #0x30]
+        ldp             d8,  d9,  [sp], 0x40
+.endif
+        ret             x16
+endfunc
+.endm
+
+.macro bpp_frontends_rep func, suffix, int_suffix, dir, push=0
+        bpp_frontend_rep \func, \suffix, \int_suffix, \dir, 10, \push
+        bpp_frontend_rep \func, \suffix, \int_suffix, \dir, 12, \push
+.endm
+
+.macro bpp_frontend_mix2 wd1, wd2, dir, bpp
+function ff_vp9_loop_filter_\dir\()_\wd1\()\wd2\()_16_\bpp\()_neon, export=1
+        mov             x16, x30
+        lsr             w8,  w2,  #8
+        lsr             w14, w3,  #8
+        lsr             w15, w4,  #8
+        and             w2,  w2,  #0xff
+        and             w3,  w3,  #0xff
+        and             w4,  w4,  #0xff
+        lsl             w2,  w2,  #\bpp - 8
+        lsl             w3,  w3,  #\bpp - 8
+        lsl             w4,  w4,  #\bpp - 8
+        mov             x5,  #1 << (\bpp - 8)
+        mov             x6,  #16 - \bpp
+        mov             x7,  #((1 << \bpp) - 1)
+        bl              vp9_loop_filter_\dir\()_\wd1\()_8_16_neon
+.ifc \dir,h
+        add             x0,  x0,  x1, lsl #3
+.else
+        add             x0,  x0,  #16
+.endif
+        lsl             w2,  w8,  #\bpp - 8
+        lsl             w3,  w14, #\bpp - 8
+        lsl             w4,  w15, #\bpp - 8
+        bl              vp9_loop_filter_\dir\()_\wd2\()_8_16_neon
+        ret             x16
+endfunc
+.endm
+
+.macro bpp_frontends_mix2 wd1, wd2
+        bpp_frontend_mix2 \wd1, \wd2, v, 10
+        bpp_frontend_mix2 \wd1, \wd2, v, 12
+        bpp_frontend_mix2 \wd1, \wd2, h, 10
+        bpp_frontend_mix2 \wd1, \wd2, h, 12
+.endm
+
+function vp9_loop_filter_v_4_8_16_neon
+        mov             x10, x30
+        sub             x9,  x0,  x1, lsl #2
+        ld1             {v20.8h}, [x9], x1 // p3
+        ld1             {v24.8h}, [x0], x1 // q0
+        ld1             {v21.8h}, [x9], x1 // p2
+        ld1             {v25.8h}, [x0], x1 // q1
+        ld1             {v22.8h}, [x9], x1 // p1
+        ld1             {v26.8h}, [x0], x1 // q2
+        ld1             {v23.8h}, [x9], x1 // p0
+        ld1             {v27.8h}, [x0], x1 // q3
+        sub             x0,  x0,  x1, lsl #2
+        sub             x9,  x9,  x1, lsl #1
+
+        loop_filter_4
+
+        st1             {v22.8h}, [x9], x1
+        st1             {v24.8h}, [x0], x1
+        st1             {v23.8h}, [x9], x1
+        st1             {v25.8h}, [x0], x1
+        sub             x0,  x0,  x1, lsl #1
+
+        ret             x10
+endfunc
+
+bpp_frontends vp9_loop_filter_v_4_8
+
+function vp9_loop_filter_h_4_8_16_neon
+        mov             x10, x30
+        sub             x9,  x0,  #8
+        add             x0,  x9,  x1, lsl #2
+        ld1             {v20.8h}, [x9], x1
+        ld1             {v24.8h}, [x0], x1
+        ld1             {v21.8h}, [x9], x1
+        ld1             {v25.8h}, [x0], x1
+        ld1             {v22.8h}, [x9], x1
+        ld1             {v26.8h}, [x0], x1
+        ld1             {v23.8h}, [x9], x1
+        ld1             {v27.8h}, [x0], x1
+
+        sub             x9,  x9,  x1, lsl #2
+        sub             x0,  x0,  x1, lsl #3
+        add             x0,  x0,  #8
+
+        transpose_8x8H  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+
+        loop_filter_4
+
+        // Move x9 forward by 2 pixels; we don't need to rewrite the
+        // outermost 2 pixels since they aren't changed.
+        add             x9,  x9,  #4
+        add             x0,  x9,  x1, lsl #2
+
+        // We only will write the mid 4 pixels back; after the loop filter,
+        // these are in v22, v23, v24, v25, ordered as rows (8x4 pixels).
+        // We need to transpose them to columns, done with a 4x8 transpose
+        // (which in practice is two 4x4 transposes of the two 4x4 halves
+        // of the 8x4 pixels; into 4x8 pixels).
+        transpose_4x8H  v22, v23, v24, v25, v26, v27, v28, v29
+        st1             {v22.d}[0], [x9], x1
+        st1             {v22.d}[1], [x0], x1
+        st1             {v23.d}[0], [x9], x1
+        st1             {v23.d}[1], [x0], x1
+        st1             {v24.d}[0], [x9], x1
+        st1             {v24.d}[1], [x0], x1
+        st1             {v25.d}[0], [x9], x1
+        st1             {v25.d}[1], [x0], x1
+        sub             x0,  x0,  x1, lsl #3
+        add             x0,  x0,  #4
+
+        ret             x10
+endfunc
+
+bpp_frontends vp9_loop_filter_h_4_8
+
+function vp9_loop_filter_v_8_8_16_neon
+        mov             x10, x30
+        sub             x9,  x0,  x1, lsl #2
+        ld1             {v20.8h}, [x9], x1 // p3
+        ld1             {v24.8h}, [x0], x1 // q0
+        ld1             {v21.8h}, [x9], x1 // p2
+        ld1             {v25.8h}, [x0], x1 // q1
+        ld1             {v22.8h}, [x9], x1 // p1
+        ld1             {v26.8h}, [x0], x1 // q2
+        ld1             {v23.8h}, [x9], x1 // p0
+        ld1             {v27.8h}, [x0], x1 // q3
+        sub             x9,  x9,  x1, lsl #2
+        sub             x0,  x0,  x1, lsl #2
+        add             x9,  x9,  x1
+
+        loop_filter_8
+
+        st1             {v21.8h}, [x9], x1
+        st1             {v24.8h}, [x0], x1
+        st1             {v22.8h}, [x9], x1
+        st1             {v25.8h}, [x0], x1
+        st1             {v23.8h}, [x9], x1
+        st1             {v26.8h}, [x0], x1
+        sub             x0,  x0,  x1, lsl #1
+        sub             x0,  x0,  x1
+
+        ret             x10
+6:
+        sub             x9,  x0,  x1, lsl #1
+        st1             {v22.8h}, [x9], x1
+        st1             {v24.8h}, [x0], x1
+        st1             {v23.8h}, [x9], x1
+        st1             {v25.8h}, [x0], x1
+        sub             x0,  x0,  x1, lsl #1
+        ret             x10
+endfunc
+
+bpp_frontends vp9_loop_filter_v_8_8
+
+function vp9_loop_filter_h_8_8_16_neon
+        mov             x10, x30
+        sub             x9,  x0,  #8
+        add             x0,  x9,  x1, lsl #2
+        ld1             {v20.8h}, [x9], x1
+        ld1             {v24.8h}, [x0], x1
+        ld1             {v21.8h}, [x9], x1
+        ld1             {v25.8h}, [x0], x1
+        ld1             {v22.8h}, [x9], x1
+        ld1             {v26.8h}, [x0], x1
+        ld1             {v23.8h}, [x9], x1
+        ld1             {v27.8h}, [x0], x1
+
+        sub             x9,  x9,  x1, lsl #2
+        sub             x0,  x0,  x1, lsl #3
+        add             x0,  x0,  #8
+
+        transpose_8x8H  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+
+        loop_filter_8
+
+        add             x0,  x9,  x1, lsl #2
+
+        // Even though only 6 pixels per row have been changed, we write the
+        // full 8 pixel registers.
+        transpose_8x8H  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+
+        st1             {v20.8h}, [x9], x1
+        st1             {v24.8h}, [x0], x1
+        st1             {v21.8h}, [x9], x1
+        st1             {v25.8h}, [x0], x1
+        st1             {v22.8h}, [x9], x1
+        st1             {v26.8h}, [x0], x1
+        st1             {v23.8h}, [x9], x1
+        st1             {v27.8h}, [x0], x1
+        sub             x0,  x0,  x1, lsl #3
+        add             x0,  x0,  #8
+
+        ret             x10
+6:
+        // If we didn't need to do the flat8in part, we use the same writeback
+        // as in loop_filter_h_4_8.
+        add             x9,  x9,  #4
+        add             x0,  x9,  x1, lsl #2
+        transpose_4x8H  v22, v23, v24, v25, v26, v27, v28, v29
+        st1             {v22.d}[0], [x9], x1
+        st1             {v22.d}[1], [x0], x1
+        st1             {v23.d}[0], [x9], x1
+        st1             {v23.d}[1], [x0], x1
+        st1             {v24.d}[0], [x9], x1
+        st1             {v24.d}[1], [x0], x1
+        st1             {v25.d}[0], [x9], x1
+        st1             {v25.d}[1], [x0], x1
+        sub             x0,  x0,  x1, lsl #3
+        add             x0,  x0,  #4
+        ret             x10
+endfunc
+
+bpp_frontends vp9_loop_filter_h_8_8
+
+bpp_frontends_mix2 4, 4
+bpp_frontends_mix2 4, 8
+bpp_frontends_mix2 8, 4
+bpp_frontends_mix2 8, 8
+
+function vp9_loop_filter_v_16_8_16_neon
+        mov             x10, x30
+        sub             x9,  x0,  x1, lsl #3
+        ld1             {v16.8h}, [x9], x1 // p7
+        ld1             {v24.8h}, [x0], x1 // q0
+        ld1             {v17.8h}, [x9], x1 // p6
+        ld1             {v25.8h}, [x0], x1 // q1
+        ld1             {v18.8h}, [x9], x1 // p5
+        ld1             {v26.8h}, [x0], x1 // q2
+        ld1             {v19.8h}, [x9], x1 // p4
+        ld1             {v27.8h}, [x0], x1 // q3
+        ld1             {v20.8h}, [x9], x1 // p3
+        ld1             {v28.8h}, [x0], x1 // q4
+        ld1             {v21.8h}, [x9], x1 // p2
+        ld1             {v29.8h}, [x0], x1 // q5
+        ld1             {v22.8h}, [x9], x1 // p1
+        ld1             {v30.8h}, [x0], x1 // q6
+        ld1             {v23.8h}, [x9], x1 // p0
+        ld1             {v31.8h}, [x0], x1 // q7
+        sub             x9,  x9,  x1, lsl #3
+        sub             x0,  x0,  x1, lsl #3
+        add             x9,  x9,  x1
+
+        loop_filter_16
+
+        // If we did the flat8out part, we get the output in
+        // v2-v17 (skipping v7 and v16). x9 points to x0 - 7 * stride,
+        // store v2-v9 there, and v10-v17 into x0.
+        st1             {v2.8h},  [x9], x1
+        st1             {v10.8h}, [x0], x1
+        st1             {v3.8h},  [x9], x1
+        st1             {v11.8h}, [x0], x1
+        st1             {v4.8h},  [x9], x1
+        st1             {v12.8h}, [x0], x1
+        st1             {v5.8h},  [x9], x1
+        st1             {v13.8h}, [x0], x1
+        st1             {v6.8h},  [x9], x1
+        st1             {v14.8h}, [x0], x1
+        st1             {v8.8h},  [x9], x1
+        st1             {v15.8h}, [x0], x1
+        st1             {v9.8h},  [x9], x1
+        st1             {v17.8h}, [x0], x1
+        sub             x0,  x0,  x1, lsl #3
+        add             x0,  x0,  x1
+
+        ret             x10
+8:
+        add             x9,  x9,  x1, lsl #2
+        // If we didn't do the flat8out part, the output is left in the
+        // input registers.
+        st1             {v21.8h}, [x9], x1
+        st1             {v24.8h}, [x0], x1
+        st1             {v22.8h}, [x9], x1
+        st1             {v25.8h}, [x0], x1
+        st1             {v23.8h}, [x9], x1
+        st1             {v26.8h}, [x0], x1
+        sub             x0,  x0,  x1, lsl #1
+        sub             x0,  x0,  x1
+        ret             x10
+7:
+        sub             x9,  x0,  x1, lsl #1
+        st1             {v22.8h}, [x9], x1
+        st1             {v24.8h}, [x0], x1
+        st1             {v23.8h}, [x9], x1
+        st1             {v25.8h}, [x0], x1
+        sub             x0,  x0,  x1, lsl #1
+        ret             x10
+endfunc
+
+bpp_frontends vp9_loop_filter_v_16_8, push=1
+bpp_frontends_rep vp9_loop_filter_v_16, 16, 8, v, push=1
+
+function vp9_loop_filter_h_16_8_16_neon
+        mov             x10, x30
+        sub             x9,  x0,  #16
+        ld1             {v16.8h}, [x9], x1
+        ld1             {v24.8h}, [x0], x1
+        ld1             {v17.8h}, [x9], x1
+        ld1             {v25.8h}, [x0], x1
+        ld1             {v18.8h}, [x9], x1
+        ld1             {v26.8h}, [x0], x1
+        ld1             {v19.8h}, [x9], x1
+        ld1             {v27.8h}, [x0], x1
+        ld1             {v20.8h}, [x9], x1
+        ld1             {v28.8h}, [x0], x1
+        ld1             {v21.8h}, [x9], x1
+        ld1             {v29.8h}, [x0], x1
+        ld1             {v22.8h}, [x9], x1
+        ld1             {v30.8h}, [x0], x1
+        ld1             {v23.8h}, [x9], x1
+        ld1             {v31.8h}, [x0], x1
+        sub             x0,  x0,  x1, lsl #3
+        sub             x9,  x9,  x1, lsl #3
+
+        // The 16x8 pixels read above is in two 8x8 blocks; the left
+        // half in v16-v23, and the right half in v24-v31. Do two 8x8 transposes
+        // of this, to get one column per register.
+        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
+        transpose_8x8H  v24, v25, v26, v27, v28, v29, v30, v31, v0, v1
+
+        loop_filter_16
+
+        transpose_8x8H  v16, v2,  v3,  v4,  v5,  v6,  v8,  v9,  v0, v1
+        transpose_8x8H  v10, v11, v12, v13, v14, v15, v17, v31, v0, v1
+
+        st1             {v16.8h}, [x9], x1
+        st1             {v10.8h}, [x0], x1
+        st1             {v2.8h},  [x9], x1
+        st1             {v11.8h}, [x0], x1
+        st1             {v3.8h},  [x9], x1
+        st1             {v12.8h}, [x0], x1
+        st1             {v4.8h},  [x9], x1
+        st1             {v13.8h}, [x0], x1
+        st1             {v5.8h},  [x9], x1
+        st1             {v14.8h}, [x0], x1
+        st1             {v6.8h},  [x9], x1
+        st1             {v15.8h}, [x0], x1
+        st1             {v8.8h},  [x9], x1
+        st1             {v17.8h}, [x0], x1
+        st1             {v9.8h},  [x9], x1
+        st1             {v31.8h}, [x0], x1
+        sub             x0,  x0,  x1, lsl #3
+
+        ret             x10
+8:
+        // The same writeback as in loop_filter_h_8_8
+        sub             x9,  x0,  #8
+        add             x0,  x9,  x1, lsl #2
+        transpose_8x8H  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+
+        st1             {v20.8h}, [x9], x1
+        st1             {v24.8h}, [x0], x1
+        st1             {v21.8h}, [x9], x1
+        st1             {v25.8h}, [x0], x1
+        st1             {v22.8h}, [x9], x1
+        st1             {v26.8h}, [x0], x1
+        st1             {v23.8h}, [x9], x1
+        st1             {v27.8h}, [x0], x1
+        sub             x0,  x0,  x1, lsl #3
+        add             x0,  x0,  #8
+        ret             x10
+7:
+        // The same writeback as in loop_filter_h_4_8
+        sub             x9,  x0,  #4
+        add             x0,  x9,  x1, lsl #2
+        transpose_4x8H  v22, v23, v24, v25, v26, v27, v28, v29
+        st1             {v22.d}[0], [x9], x1
+        st1             {v22.d}[1], [x0], x1
+        st1             {v23.d}[0], [x9], x1
+        st1             {v23.d}[1], [x0], x1
+        st1             {v24.d}[0], [x9], x1
+        st1             {v24.d}[1], [x0], x1
+        st1             {v25.d}[0], [x9], x1
+        st1             {v25.d}[1], [x0], x1
+        sub             x0,  x0,  x1, lsl #3
+        add             x0,  x0,  #4
+        ret             x10
+endfunc
+
+bpp_frontends vp9_loop_filter_h_16_8, push=1
+bpp_frontends_rep vp9_loop_filter_h_16, 16, 8, h, push=1
diff --git a/media/ffvpx/libavcodec/aarch64/vp9lpf_neon.S b/media/ffvpx/libavcodec/aarch64/vp9lpf_neon.S
new file mode 100644
index 0000000000..9a79f48df3
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp9lpf_neon.S
@@ -0,0 +1,1334 @@
+/*
+ * Copyright (c) 2016 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+#include "neon.S"
+
+
+// The main loop filter macro is templated and can produce filters for
+// vectors of 8 or 16 bytes. The register mapping throughout the filter
+// is close to identical to the arm version (please try to maintain this,
+// if either is changed!). When the arm version uses e.g. d20 for the
+// input variable p3, the aarch64 version uses v20.8b or v20.16b, depending
+// on vector length.
+//
+// The number of elements in the vector is passed in via the macro parameter
+// \sz, which is either .8b or .16b. For simple instructions that doesn't
+// lengthen or narrow things, this can easily be templated like this:
+//      uabd            v4\sz,  v20\sz, v21\sz
+//
+// For instructions that lengthen or narrow content, the arm version would
+// have used q registers. For these instructions, we have macros that expand
+// into either a single e.g. uaddl instruction, or into a uaddl + uaddl2
+// pair, depending on the \sz parameter. Wherever the arm version would have
+// used a q register, these macros instead take two v registers, i.e. q3
+// is mapped to v6+v7. For the case with 8 byte input vectors, such a
+// lengthening operation is only stored in v6.8h (what was in q3 in the arm
+// case), while the 16 byte input vectors will use v6.8h + v7.8h.
+// Such a macro invocation would look like this:
+//      uaddl_sz        v8.8h,  v9.8h,  v17, v18, \sz
+//
+// That is, in the 8 byte input vector case, the second register in these
+// register pairs will be unused.
+// Unfortunately, this makes the code quite hard to read. For readability,
+// see the arm version instead.
+
+
+.macro add_sz dst1, dst2, in1, in2, in3, in4, sz
+        add             \dst1,  \in1,  \in3
+.ifc \sz, .16b
+        add             \dst2,  \in2,  \in4
+.endif
+.endm
+
+.macro sub_sz dst1, dst2, in1, in2, in3, in4, sz
+        sub             \dst1,  \in1,  \in3
+.ifc \sz, .16b
+        sub             \dst2,  \in2,  \in4
+.endif
+.endm
+
+.macro uaddw_sz dst1, dst2, in1, in2, in3, sz
+        uaddw           \dst1,  \in1, \in3\().8b
+.ifc \sz, .16b
+        uaddw2          \dst2,  \in2, \in3\().16b
+.endif
+.endm
+
+.macro usubw_sz dst1, dst2, in1, in2, in3, sz
+        usubw           \dst1,  \in1, \in3\().8b
+.ifc \sz, .16b
+        usubw2          \dst2,  \in2, \in3\().16b
+.endif
+.endm
+
+.macro usubl_sz dst1, dst2, in1, in2, sz
+        usubl           \dst1,  \in1\().8b,  \in2\().8b
+.ifc \sz, .16b
+        usubl2          \dst2,  \in1\().16b, \in2\().16b
+.endif
+.endm
+
+.macro sqxtn_sz dst, in1, in2, sz
+        sqxtn           \dst\().8b,  \in1
+.ifc \sz, .16b
+        sqxtn2          \dst\().16b, \in2
+.endif
+.endm
+
+.macro sqxtun_sz dst, in1, in2, sz
+        sqxtun          \dst\().8b,  \in1
+.ifc \sz, .16b
+        sqxtun2         \dst\().16b, \in2
+.endif
+.endm
+
+.macro mul_sz dst1, dst2, in1, in2, in3, in4, sz
+        mul             \dst1,  \in1,  \in3
+.ifc \sz, .16b
+        mul             \dst2,  \in2,  \in4
+.endif
+.endm
+
+.macro saddw_sz dst1, dst2, in1, in2, in3, sz
+        saddw           \dst1,  \in1, \in3\().8b
+.ifc \sz, .16b
+        saddw2          \dst2,  \in2, \in3\().16b
+.endif
+.endm
+
+.macro ssubw_sz dst1, dst2, in1, in2, in3, sz
+        ssubw           \dst1,  \in1, \in3\().8b
+.ifc \sz, .16b
+        ssubw2          \dst2,  \in2, \in3\().16b
+.endif
+.endm
+
+.macro uxtl_sz dst1, dst2, in, sz
+        uxtl            \dst1,  \in\().8b
+.ifc \sz, .16b
+        uxtl2           \dst2,  \in\().16b
+.endif
+.endm
+
+.macro uaddl_sz dst1, dst2, in1, in2, sz
+        uaddl           \dst1,  \in1\().8b,  \in2\().8b
+.ifc \sz, .16b
+        uaddl2          \dst2,  \in1\().16b, \in2\().16b
+.endif
+.endm
+
+.macro rshrn_sz dst, in1, in2, shift, sz
+        rshrn           \dst\().8b,  \in1, \shift
+.ifc \sz, .16b
+        rshrn2          \dst\().16b, \in2, \shift
+.endif
+.endm
+
+.macro ushll_sz dst1, dst2, in, shift, sz
+        ushll           \dst1,  \in\().8b,  \shift
+.ifc \sz, .16b
+        ushll2          \dst2,  \in\().16b, \shift
+.endif
+.endm
+
+// The input to and output from this macro is in the registers v16-v31,
+// and v0-v7 are used as scratch registers.
+// p7 = v16 .. p3 = v20, p0 = v23, q0 = v24, q3 = v27, q7 = v31
+// Depending on the width of the loop filter, we either use v16-v19
+// and v28-v31 as temp registers, or v8-v15.
+// When comparing to the arm version, tmpq1 == tmp1 + tmp2,
+// tmpq2 == tmp3 + tmp4, etc.
+.macro loop_filter wd, sz, mix, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8
+.if \mix == 0
+        dup             v0\sz,  w2        // E
+        dup             v2\sz,  w3        // I
+        dup             v3\sz,  w4        // H
+.else
+        dup             v0.8h,  w2        // E
+        dup             v2.8h,  w3        // I
+        dup             v3.8h,  w4        // H
+        rev16           v1.16b, v0.16b    // E
+        rev16           v4.16b, v2.16b    // I
+        rev16           v5.16b, v3.16b    // H
+        uzp1            v0.16b, v0.16b, v1.16b
+        uzp1            v2.16b, v2.16b, v4.16b
+        uzp1            v3.16b, v3.16b, v5.16b
+.endif
+
+        uabd            v4\sz,  v20\sz, v21\sz        // abs(p3 - p2)
+        uabd            v5\sz,  v21\sz, v22\sz        // abs(p2 - p1)
+        uabd            v6\sz,  v22\sz, v23\sz        // abs(p1 - p0)
+        uabd            v7\sz,  v24\sz, v25\sz        // abs(q0 - q1)
+        uabd            \tmp1\sz,  v25\sz, v26\sz     // abs(q1 - q2)
+        uabd            \tmp2\sz,  v26\sz, v27\sz     // abs(q2 - q3)
+        umax            v4\sz,  v4\sz,  v5\sz
+        umax            v5\sz,  v6\sz,  v7\sz
+        umax            \tmp1\sz, \tmp1\sz, \tmp2\sz
+        uabd            v6\sz,  v23\sz, v24\sz        // abs(p0 - q0)
+        umax            v4\sz,  v4\sz,  v5\sz
+        uqadd           v6\sz,  v6\sz,  v6\sz         // abs(p0 - q0) * 2
+        uabd            v5\sz,  v22\sz, v25\sz        // abs(p1 - q1)
+        umax            v4\sz,  v4\sz,  \tmp1\sz      // max(abs(p3 - p2), ..., abs(q2 - q3))
+        ushr            v5\sz,  v5\sz,  #1
+        cmhs            v4\sz,  v2\sz,  v4\sz         // max(abs()) <= I
+        uqadd           v6\sz,  v6\sz,  v5\sz         // abs(p0 - q0) * 2 + abs(p1 - q1) >> 1
+        cmhs            v5\sz,  v0\sz,  v6\sz
+        and             v4\sz,  v4\sz,  v5\sz         // fm
+
+        // If no pixels need filtering, just exit as soon as possible
+        mov             x5,  v4.d[0]
+.ifc \sz, .16b
+        mov             x6,  v4.d[1]
+        adds            x5,  x5,  x6
+        b.eq            9f
+.else
+        cbz             x5,  9f
+.endif
+
+.if \wd >= 8
+        movi            v0\sz,  #1
+
+        uabd            v6\sz,  v20\sz, v23\sz    // abs(p3 - p0)
+        uabd            v2\sz,  v21\sz, v23\sz    // abs(p2 - p0)
+        uabd            v1\sz,  v22\sz, v23\sz    // abs(p1 - p0)
+        uabd            \tmp1\sz,  v25\sz, v24\sz // abs(q1 - q0)
+        uabd            \tmp2\sz,  v26\sz, v24\sz // abs(q2 - q0)
+        uabd            \tmp3\sz,  v27\sz, v24\sz // abs(q3 - q0)
+        umax            v6\sz,  v6\sz,  v2\sz
+        umax            v1\sz,  v1\sz,  \tmp1\sz
+        umax            \tmp2\sz,  \tmp2\sz,  \tmp3\sz
+.if \wd == 16
+        uabd            v7\sz,  v16\sz, v23\sz    // abs(p7 - p0)
+        umax            v6\sz,  v6\sz,  v1\sz
+        uabd            v2\sz,  v17\sz, v23\sz    // abs(p6 - p0)
+        umax            v6\sz,  v6\sz,  \tmp2\sz
+        uabd            v1\sz,  v18\sz, v23\sz    // abs(p5 - p0)
+        cmhs            v6\sz,  v0\sz,  v6\sz     // flat8in
+        uabd            v8\sz,  v19\sz, v23\sz    // abs(p4 - p0)
+        and             v6\sz,  v6\sz,  v4\sz     // flat8in && fm
+        uabd            v9\sz,  v28\sz, v24\sz    // abs(q4 - q0)
+        bic             v4\sz,  v4\sz,  v6\sz     // fm && !flat8in
+        uabd            v10\sz, v29\sz, v24\sz    // abs(q5 - q0)
+        uabd            v11\sz, v30\sz, v24\sz    // abs(q6 - q0)
+        uabd            v12\sz, v31\sz, v24\sz    // abs(q7 - q0)
+
+        umax            v7\sz,  v7\sz,  v2\sz
+        umax            v1\sz,  v1\sz,  v8\sz
+        umax            v9\sz,  v9\sz,  v10\sz
+        umax            v11\sz, v11\sz, v12\sz
+        // The rest of the calculation of flat8out is interleaved below
+.else
+        // The rest of the calculation of flat8in is interleaved below
+.endif
+.endif
+
+        // Calculate the normal inner loop filter for 2 or 4 pixels
+        uabd            v5\sz,  v22\sz, v23\sz // abs(p1 - p0)
+.if \wd == 16
+        umax            v7\sz,  v7\sz,  v1\sz
+        umax            v9\sz,  v9\sz,  v11\sz
+.elseif \wd == 8
+        umax            v6\sz,  v6\sz,  v1\sz
+.endif
+        uabd            v1\sz,  v25\sz, v24\sz // abs(q1 - q0)
+.if \wd == 16
+        umax            v7\sz,  v7\sz,  v9\sz
+.elseif \wd == 8
+        umax            v6\sz,  v6\sz,  \tmp2\sz
+.endif
+        usubl_sz        \tmp1\().8h,  \tmp2\().8h,  v22,  v25, \sz // p1 - q1
+        umax            v5\sz,  v5\sz,  v1\sz  // max(abs(p1 - p0), abs(q1 - q0))
+.if \mix != 0
+        mov             v1.d[0], x11
+.endif
+        usubl_sz        \tmp3\().8h,  \tmp4\().8h,  v24,  v23, \sz // q0 - p0
+        movi            \tmp5\().8h,  #3
+.if \wd == 8
+        cmhs            v6\sz,  v0\sz,  v6\sz  // flat8in
+.endif
+.if \mix != 0
+        sxtl            v1.8h,  v1.8b
+.endif
+        cmhs            v5\sz,  v3\sz,  v5\sz  // !hev
+.if \wd == 8
+        // If a 4/8 or 8/4 mix is used, clear the relevant half of v6
+.if \mix != 0
+        and             v6\sz,  v6\sz,  v1.16b
+.endif
+        and             v6\sz,  v6\sz,  v4\sz  // flat8in && fm
+.endif
+        sqxtn_sz        \tmp1,        \tmp1\().8h,  \tmp2\().8h, \sz // av_clip_int8(p1 - q1)
+.if \wd == 16
+        cmhs            v7\sz,  v0\sz,  v7\sz  // flat8out
+.elseif \wd == 8
+        bic             v4\sz,  v4\sz,  v6\sz  // fm && !flat8in
+.endif
+        and             v5\sz,  v5\sz,  v4\sz  // !hev && fm && !flat8in
+.if \wd == 16
+        and             v7\sz,  v7\sz,  v6\sz  // flat8out && flat8in && fm
+.endif
+
+        mul_sz          \tmp3\().8h,  \tmp4\().8h,  \tmp3\().8h, \tmp4\().8h,  \tmp5\().8h,  \tmp5\().8h, \sz // 3 * (q0 - p0)
+        bic             \tmp1\sz,  \tmp1\sz,  v5\sz    // if (!hev) av_clip_int8 = 0
+        movi            v2\sz,  #4
+        saddw_sz        \tmp3\().8h,  \tmp4\().8h,  \tmp3\().8h, \tmp4\().8h,  \tmp1, \sz // 3 * (q0 - p0) [+ av_clip_int8(p1 - q1)]
+        movi            v3\sz,  #3
+        sqxtn_sz        \tmp1,        \tmp3\().8h,  \tmp4\().8h, \sz       // f
+.if \wd == 16
+        bic             v6\sz,  v6\sz,  v7\sz  // fm && flat8in && !flat8out
+.endif
+
+        sqadd           \tmp3\sz,  \tmp1\sz,  v2\sz // FFMIN(f + 4, 127)
+        sqadd           \tmp4\sz,  \tmp1\sz,  v3\sz // FFMIN(f + 3, 127)
+        uxtl_sz         v0.8h,  v1.8h,  v23, \sz    // p0
+        sshr            \tmp3\sz,  \tmp3\sz,  #3    // f1
+        sshr            \tmp4\sz,  \tmp4\sz,  #3    // f2
+
+        uxtl_sz         v2.8h,  v3.8h,  v24, \sz    // q0
+        saddw_sz        v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp4, \sz // p0 + f2
+        ssubw_sz        v2.8h,  v3.8h,  v2.8h,  v3.8h,  \tmp3, \sz // q0 - f1
+        sqxtun_sz       v0,  v0.8h,  v1.8h,  \sz    // out p0
+        sqxtun_sz       v1,  v2.8h,  v3.8h,  \sz    // out q0
+        srshr           \tmp3\sz, \tmp3\sz, #1      // f = (f1 + 1) >> 1
+        bit             v23\sz, v0\sz,  v4\sz       // if (fm && !flat8in)
+        bit             v24\sz, v1\sz,  v4\sz
+
+        uxtl_sz         v0.8h,  v1.8h,  v22, \sz    // p1
+        uxtl_sz         v2.8h,  v3.8h,  v25, \sz    // q1
+.if \wd >= 8
+        mov             x5,  v6.d[0]
+.ifc \sz, .16b
+        mov             x6,  v6.d[1]
+.endif
+.endif
+        saddw_sz        v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp3, \sz // p1 + f
+        ssubw_sz        v2.8h,  v3.8h,  v2.8h,  v3.8h,  \tmp3, \sz // q1 - f
+        sqxtun_sz       v0,  v0.8h,  v1.8h, \sz     // out p1
+        sqxtun_sz       v2,  v2.8h,  v3.8h, \sz     // out q1
+.if \wd >= 8
+.ifc \sz, .16b
+        adds            x5,  x5,  x6
+.endif
+.endif
+        bit             v22\sz, v0\sz,  v5\sz       // if (!hev && fm && !flat8in)
+        bit             v25\sz, v2\sz,  v5\sz
+
+        // If no pixels need flat8in, jump to flat8out
+        // (or to a writeout of the inner 4 pixels, for wd=8)
+.if \wd >= 8
+.ifc \sz, .16b
+        b.eq            6f
+.else
+        cbz             x5,  6f
+.endif
+
+        // flat8in
+        uaddl_sz        \tmp1\().8h, \tmp2\().8h,  v20, v21, \sz
+        uaddl_sz        \tmp3\().8h, \tmp4\().8h,  v22, v25, \sz
+        uaddl_sz        \tmp5\().8h, \tmp6\().8h,  v20, v22, \sz
+        uaddl_sz        \tmp7\().8h, \tmp8\().8h,  v23, v26, \sz
+        add_sz          v0.8h,  v1.8h,  \tmp1\().8h, \tmp2\().8h, \tmp1\().8h, \tmp2\().8h, \sz
+        uaddw_sz        v0.8h,  v1.8h,  v0.8h,  v1.8h,  v23, \sz
+        uaddw_sz        v0.8h,  v1.8h,  v0.8h,  v1.8h,  v24, \sz
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp5\().8h, \tmp6\().8h, \sz
+        sub_sz          \tmp3\().8h, \tmp4\().8h,  \tmp3\().8h, \tmp4\().8h,  \tmp1\().8h, \tmp2\().8h, \sz
+        sub_sz          \tmp7\().8h, \tmp8\().8h,  \tmp7\().8h, \tmp8\().8h,  \tmp5\().8h, \tmp6\().8h, \sz
+        rshrn_sz        v2,  v0.8h,  v1.8h,  #3,  \sz // out p2
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp3\().8h, \tmp4\().8h, \sz
+        uaddl_sz        \tmp1\().8h, \tmp2\().8h,  v20,  v23, \sz
+        uaddl_sz        \tmp3\().8h, \tmp4\().8h,  v24,  v27, \sz
+        rshrn_sz        v3,  v0.8h,  v1.8h,  #3,  \sz // out p1
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp7\().8h, \tmp8\().8h, \sz
+        sub_sz          \tmp3\().8h, \tmp4\().8h,  \tmp3\().8h, \tmp4\().8h,  \tmp1\().8h, \tmp2\().8h, \sz
+        uaddl_sz        \tmp5\().8h, \tmp6\().8h,  v21,  v24, \sz
+        uaddl_sz        \tmp7\().8h, \tmp8\().8h,  v25,  v27, \sz
+        rshrn_sz        v4,  v0.8h,  v1.8h,  #3,  \sz // out p0
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp3\().8h, \tmp4\().8h, \sz
+        sub_sz          \tmp7\().8h, \tmp8\().8h,  \tmp7\().8h, \tmp8\().8h,  \tmp5\().8h, \tmp6\().8h, \sz
+        uaddl_sz        \tmp1\().8h, \tmp2\().8h,  v22,  v25, \sz
+        uaddl_sz        \tmp3\().8h, \tmp4\().8h,  v26,  v27, \sz
+        rshrn_sz        v5,  v0.8h,  v1.8h,  #3,  \sz // out q0
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp7\().8h, \tmp8\().8h, \sz
+        sub_sz          \tmp3\().8h, \tmp4\().8h,  \tmp3\().8h, \tmp4\().8h,  \tmp1\().8h, \tmp2\().8h, \sz
+        rshrn_sz        \tmp5,  v0.8h,  v1.8h,  #3,  \sz // out q1
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp3\().8h, \tmp4\().8h, \sz
+        // The output here is written back into the input registers. This doesn't
+        // matter for the flat8part below, since we only update those pixels
+        // which won't be touched below.
+        bit             v21\sz, v2\sz,  v6\sz
+        bit             v22\sz, v3\sz,  v6\sz
+        bit             v23\sz, v4\sz,  v6\sz
+        rshrn_sz        \tmp6,  v0.8h,  v1.8h,  #3,  \sz // out q2
+        bit             v24\sz, v5\sz,  v6\sz
+        bit             v25\sz, \tmp5\sz,  v6\sz
+        bit             v26\sz, \tmp6\sz,  v6\sz
+.endif
+.if \wd == 16
+6:
+        orr             v2\sz,  v6\sz,  v7\sz
+        mov             x5,  v2.d[0]
+.ifc \sz, .16b
+        mov             x6,  v2.d[1]
+        adds            x5,  x5,  x6
+        b.ne            1f
+.else
+        cbnz            x5,  1f
+.endif
+        // If no pixels needed flat8in nor flat8out, jump to a
+        // writeout of the inner 4 pixels
+        ret             x14
+1:
+
+        mov             x5,  v7.d[0]
+.ifc \sz, .16b
+        mov             x6,  v7.d[1]
+        adds            x5,  x5,  x6
+        b.ne            1f
+.else
+        cbnz            x5,  1f
+.endif
+        // If no pixels need flat8out, jump to a writeout of the inner 6 pixels
+        ret             x15
+
+1:
+        // flat8out
+        // This writes all outputs into v2-v17 (skipping v6 and v16).
+        // If this part is skipped, the output is read from v21-v26 (which is the input
+        // to this section).
+        ushll_sz        v0.8h,  v1.8h,  v16,  #3,  \sz           // 8 * v16
+        usubw_sz        v0.8h,  v1.8h,  v0.8h,  v1.8h,  v16, \sz // 7 * v16
+        uaddw_sz        v0.8h,  v1.8h,  v0.8h,  v1.8h,  v17, \sz
+        uaddl_sz        v8.8h,  v9.8h,  v17, v18, \sz
+        uaddl_sz        v10.8h, v11.8h, v19, v20, \sz
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v8.8h,  v9.8h,  \sz
+        uaddl_sz        v8.8h,  v9.8h,  v16, v17, \sz
+        uaddl_sz        v12.8h, v13.8h, v21, v22, \sz
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v10.8h, v11.8h, \sz
+        uaddl_sz        v10.8h, v11.8h, v18, v25, \sz
+        uaddl_sz        v14.8h, v15.8h, v23, v24, \sz
+        sub_sz          v10.8h, v11.8h, v10.8h, v11.8h, v8.8h,  v9.8h,  \sz
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v12.8h, v13.8h, \sz
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v14.8h, v15.8h, \sz
+        uaddl_sz        v12.8h, v13.8h, v16, v18, \sz
+        uaddl_sz        v14.8h, v15.8h, v19, v26, \sz
+        rshrn_sz        v2,  v0.8h,  v1.8h,  #4,  \sz
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v10.8h, v11.8h, \sz
+        uaddl_sz        v8.8h,  v9.8h,  v16, v19, \sz
+        uaddl_sz        v10.8h, v11.8h, v20, v27, \sz
+        sub_sz          v14.8h, v15.8h, v14.8h, v15.8h, v12.8h, v13.8h, \sz
+        bif             v2\sz,  v17\sz, v7\sz
+        rshrn_sz        v3,  v0.8h,  v1.8h,  #4,  \sz
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v14.8h, v15.8h, \sz
+        uaddl_sz        v12.8h, v13.8h, v16, v20, \sz
+        uaddl_sz        v14.8h, v15.8h, v21, v28, \sz
+        sub_sz          v10.8h, v11.8h, v10.8h, v11.8h, v8.8h,  v9.8h,  \sz
+        bif             v3\sz,  v18\sz, v7\sz
+        rshrn_sz        v4,  v0.8h,  v1.8h,  #4,  \sz
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v10.8h, v11.8h, \sz
+        uaddl_sz        v8.8h,  v9.8h,  v16, v21, \sz
+        uaddl_sz        v10.8h, v11.8h, v22, v29, \sz
+        sub_sz          v14.8h, v15.8h, v14.8h, v15.8h, v12.8h, v13.8h, \sz
+        bif             v4\sz,  v19\sz, v7\sz
+        rshrn_sz        v5,  v0.8h,  v1.8h,  #4,  \sz
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v14.8h, v15.8h, \sz
+        uaddl_sz        v12.8h, v13.8h, v16, v22, \sz
+        uaddl_sz        v14.8h, v15.8h, v23, v30, \sz
+        sub_sz          v10.8h, v11.8h, v10.8h, v11.8h, v8.8h,  v9.8h,  \sz
+        bif             v5\sz,  v20\sz, v7\sz
+        rshrn_sz        v6,  v0.8h,  v1.8h,  #4,  \sz
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v10.8h, v11.8h, \sz
+        uaddl_sz        v10.8h, v11.8h, v16, v23, \sz
+        sub_sz          v14.8h, v15.8h, v14.8h, v15.8h, v12.8h, v13.8h, \sz
+        uaddl_sz        v12.8h, v13.8h, v24, v31, \sz
+        bif             v6\sz,  v21\sz, v7\sz
+        rshrn_sz        v8,  v0.8h,  v1.8h,  #4,  \sz
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v14.8h, v15.8h, \sz
+        sub_sz          v10.8h, v11.8h, v12.8h, v13.8h, v10.8h, v11.8h, \sz
+        uaddl_sz        v12.8h, v13.8h, v17, v24, \sz
+        uaddl_sz        v14.8h, v15.8h, v25, v31, \sz
+        bif             v8\sz,  v22\sz, v7\sz
+        rshrn_sz        v9,  v0.8h,  v1.8h,  #4,  \sz
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v10.8h, v11.8h, \sz
+        sub_sz          v14.8h, v15.8h, v14.8h, v15.8h, v12.8h, v13.8h, \sz
+        uaddl_sz        v12.8h, v13.8h, v26, v31, \sz
+        bif             v9\sz,  v23\sz, v7\sz
+        rshrn_sz        v10, v0.8h,  v1.8h,  #4,  \sz
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v14.8h, v15.8h, \sz
+        uaddl_sz        v14.8h, v15.8h, v18, v25, \sz
+        uaddl_sz        v18.8h, v19.8h, v19, v26, \sz
+        sub_sz          v12.8h, v13.8h, v12.8h, v13.8h, v14.8h, v15.8h, \sz
+        uaddl_sz        v14.8h, v15.8h, v27, v31, \sz
+        bif             v10\sz, v24\sz, v7\sz
+        rshrn_sz        v11, v0.8h,  v1.8h,  #4,  \sz
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v12.8h, v13.8h, \sz
+        uaddl_sz        v12.8h, v13.8h, v20, v27, \sz
+        sub_sz          v14.8h, v15.8h, v14.8h, v15.8h, v18.8h, v19.8h, \sz
+        uaddl_sz        v18.8h, v19.8h, v28, v31, \sz
+        bif             v11\sz, v25\sz, v7\sz
+        sub_sz          v18.8h, v19.8h, v18.8h, v19.8h, v12.8h, v13.8h, \sz
+        rshrn_sz        v12, v0.8h,  v1.8h,  #4,  \sz
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v14.8h, v15.8h, \sz
+        uaddl_sz        v14.8h, v15.8h, v21, v28, \sz
+        uaddl_sz        v20.8h, v21.8h, v29, v31, \sz
+        bif             v12\sz, v26\sz, v7\sz
+        rshrn_sz        v13, v0.8h,  v1.8h,  #4,  \sz
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v18.8h, v19.8h, \sz
+        sub_sz          v20.8h, v21.8h, v20.8h, v21.8h, v14.8h, v15.8h, \sz
+        uaddl_sz        v18.8h, v19.8h, v22, v29, \sz
+        uaddl_sz        v22.8h, v23.8h, v30, v31, \sz
+        bif             v13\sz, v27\sz, v7\sz
+        rshrn_sz        v14, v0.8h,  v1.8h,  #4,  \sz
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v20.8h, v21.8h, \sz
+        sub_sz          v22.8h, v23.8h, v22.8h, v23.8h, v18.8h, v19.8h, \sz
+        bif             v14\sz, v28\sz, v7\sz
+        rshrn_sz        v15, v0.8h,  v1.8h,  #4,  \sz
+
+        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v22.8h, v23.8h, \sz
+        bif             v15\sz, v29\sz, v7\sz
+        rshrn_sz        v17, v0.8h,  v1.8h,  #4,  \sz
+        bif             v17\sz, v30\sz, v7\sz
+.endif
+.endm
+
+// For wd <= 8, we use v16-v19 and v28-v31 for temp registers,
+// while we need those for inputs/outputs in wd=16 and use v8-v15
+// for temp registers there instead.
+function vp9_loop_filter_4
+        loop_filter     4,  .8b,  0,    v16, v17, v18, v19, v28, v29, v30, v31
+        ret
+9:
+        ret             x10
+endfunc
+
+function vp9_loop_filter_4_16b_mix_44
+        loop_filter     4,  .16b, 44,   v16, v17, v18, v19, v28, v29, v30, v31
+        ret
+9:
+        ret             x10
+endfunc
+
+function vp9_loop_filter_8
+        loop_filter     8,  .8b,  0,    v16, v17, v18, v19, v28, v29, v30, v31
+        ret
+6:
+        ret             x13
+9:
+        ret             x10
+endfunc
+
+function vp9_loop_filter_8_16b_mix
+        loop_filter     8,  .16b, 88,   v16, v17, v18, v19, v28, v29, v30, v31
+        ret
+6:
+        ret             x13
+9:
+        ret             x10
+endfunc
+
+function vp9_loop_filter_16
+        loop_filter     16, .8b,  0,    v8,  v9,  v10, v11, v12, v13, v14, v15
+        ret
+9:
+        ldp             d10, d11, [sp, #0x10]
+        ldp             d12, d13, [sp, #0x20]
+        ldp             d14, d15, [sp, #0x30]
+        ldp             d8,  d9,  [sp], #0x40
+        ret             x10
+endfunc
+
+function vp9_loop_filter_16_16b
+        loop_filter     16, .16b, 0,    v8,  v9,  v10, v11, v12, v13, v14, v15
+        ret
+9:
+        ldp             d10, d11, [sp, #0x10]
+        ldp             d12, d13, [sp, #0x20]
+        ldp             d14, d15, [sp, #0x30]
+        ldp             d8,  d9,  [sp], #0x40
+        ret             x10
+endfunc
+
+.macro loop_filter_4
+        bl              vp9_loop_filter_4
+.endm
+
+.macro loop_filter_4_16b_mix mix
+        bl              vp9_loop_filter_4_16b_mix_\mix
+.endm
+
+.macro loop_filter_8
+        // calculate alternative 'return' targets
+        adr             x13, 6f
+        bl              vp9_loop_filter_8
+.endm
+
+.macro loop_filter_8_16b_mix mix
+        // calculate alternative 'return' targets
+        adr             x13, 6f
+.if \mix == 48
+        mov             x11, #0xffffffff00000000
+.elseif \mix == 84
+        mov             x11, #0x00000000ffffffff
+.else
+        mov             x11, #0xffffffffffffffff
+.endif
+        bl              vp9_loop_filter_8_16b_mix
+.endm
+
+.macro loop_filter_16
+        // calculate alternative 'return' targets
+        adr             x14, 7f
+        adr             x15, 8f
+        bl              vp9_loop_filter_16
+.endm
+
+.macro loop_filter_16_16b
+        // calculate alternative 'return' targets
+        adr             x14, 7f
+        adr             x15, 8f
+        bl              vp9_loop_filter_16_16b
+.endm
+
+
+// The public functions in this file have got the following signature:
+// void loop_filter(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr);
+
+function ff_vp9_loop_filter_v_4_8_neon, export=1
+        mov             x10, x30
+        sub             x9,  x0,  x1, lsl #2
+        ld1             {v20.8b}, [x9], x1 // p3
+        ld1             {v24.8b}, [x0], x1 // q0
+        ld1             {v21.8b}, [x9], x1 // p2
+        ld1             {v25.8b}, [x0], x1 // q1
+        ld1             {v22.8b}, [x9], x1 // p1
+        ld1             {v26.8b}, [x0], x1 // q2
+        ld1             {v23.8b}, [x9], x1 // p0
+        ld1             {v27.8b}, [x0], x1 // q3
+        sub             x0,  x0,  x1, lsl #2
+        sub             x9,  x9,  x1, lsl #1
+
+        loop_filter_4
+
+        st1             {v22.8b}, [x9], x1
+        st1             {v24.8b}, [x0], x1
+        st1             {v23.8b}, [x9], x1
+        st1             {v25.8b}, [x0], x1
+
+        ret             x10
+endfunc
+
+function ff_vp9_loop_filter_v_44_16_neon, export=1
+        mov             x10, x30
+        sub             x9,  x0,  x1, lsl #2
+        ld1             {v20.16b}, [x9], x1 // p3
+        ld1             {v24.16b}, [x0], x1 // q0
+        ld1             {v21.16b}, [x9], x1 // p2
+        ld1             {v25.16b}, [x0], x1 // q1
+        ld1             {v22.16b}, [x9], x1 // p1
+        ld1             {v26.16b}, [x0], x1 // q2
+        ld1             {v23.16b}, [x9], x1 // p0
+        ld1             {v27.16b}, [x0], x1 // q3
+        sub             x0,  x0,  x1, lsl #2
+        sub             x9,  x9,  x1, lsl #1
+
+        loop_filter_4_16b_mix 44
+
+        st1             {v22.16b}, [x9], x1
+        st1             {v24.16b}, [x0], x1
+        st1             {v23.16b}, [x9], x1
+        st1             {v25.16b}, [x0], x1
+
+        ret             x10
+endfunc
+
+function ff_vp9_loop_filter_h_4_8_neon, export=1
+        mov             x10, x30
+        sub             x9,  x0,  #4
+        add             x0,  x9,  x1, lsl #2
+        ld1             {v20.8b}, [x9], x1
+        ld1             {v24.8b}, [x0], x1
+        ld1             {v21.8b}, [x9], x1
+        ld1             {v25.8b}, [x0], x1
+        ld1             {v22.8b}, [x9], x1
+        ld1             {v26.8b}, [x0], x1
+        ld1             {v23.8b}, [x9], x1
+        ld1             {v27.8b}, [x0], x1
+
+        sub             x9,  x9,  x1, lsl #2
+        sub             x0,  x0,  x1, lsl #2
+        // Move x0/x9 forward by 2 pixels; we don't need to rewrite the
+        // outermost 2 pixels since they aren't changed.
+        add             x9,  x9,  #2
+        add             x0,  x0,  #2
+
+        transpose_8x8B  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+
+        loop_filter_4
+
+        // We only will write the mid 4 pixels back; after the loop filter,
+        // these are in v22, v23, v24, v25, ordered as rows (8x4 pixels).
+        // We need to transpose them to columns, done with a 4x8 transpose
+        // (which in practice is two 4x4 transposes of the two 4x4 halves
+        // of the 8x4 pixels; into 4x8 pixels).
+        transpose_4x8B  v22, v23, v24, v25, v26, v27, v28, v29
+        st1             {v22.s}[0], [x9], x1
+        st1             {v22.s}[1], [x0], x1
+        st1             {v23.s}[0], [x9], x1
+        st1             {v23.s}[1], [x0], x1
+        st1             {v24.s}[0], [x9], x1
+        st1             {v24.s}[1], [x0], x1
+        st1             {v25.s}[0], [x9], x1
+        st1             {v25.s}[1], [x0], x1
+
+        ret             x10
+endfunc
+
+function ff_vp9_loop_filter_h_44_16_neon, export=1
+        mov             x10, x30
+        sub             x9,  x0,  #4
+        add             x0,  x9,  x1, lsl #3
+        ld1             {v20.8b},   [x9], x1
+        ld1             {v20.d}[1], [x0], x1
+        ld1             {v21.8b},   [x9], x1
+        ld1             {v21.d}[1], [x0], x1
+        ld1             {v22.8b},   [x9], x1
+        ld1             {v22.d}[1], [x0], x1
+        ld1             {v23.8b},   [x9], x1
+        ld1             {v23.d}[1], [x0], x1
+        ld1             {v24.8b},   [x9], x1
+        ld1             {v24.d}[1], [x0], x1
+        ld1             {v25.8b},   [x9], x1
+        ld1             {v25.d}[1], [x0], x1
+        ld1             {v26.8b},   [x9], x1
+        ld1             {v26.d}[1], [x0], x1
+        ld1             {v27.8b},   [x9], x1
+        ld1             {v27.d}[1], [x0], x1
+
+        sub             x9,  x9,  x1, lsl #3
+        sub             x0,  x0,  x1, lsl #3
+        add             x9,  x9,  #2
+        add             x0,  x0,  #2
+
+        transpose_8x16B v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+
+        loop_filter_4_16b_mix 44
+
+        transpose_4x16B v22, v23, v24, v25, v26, v27, v28, v29
+
+        st1             {v22.s}[0], [x9], x1
+        st1             {v22.s}[2], [x0], x1
+        st1             {v23.s}[0], [x9], x1
+        st1             {v23.s}[2], [x0], x1
+        st1             {v24.s}[0], [x9], x1
+        st1             {v24.s}[2], [x0], x1
+        st1             {v25.s}[0], [x9], x1
+        st1             {v25.s}[2], [x0], x1
+        st1             {v22.s}[1], [x9], x1
+        st1             {v22.s}[3], [x0], x1
+        st1             {v23.s}[1], [x9], x1
+        st1             {v23.s}[3], [x0], x1
+        st1             {v24.s}[1], [x9], x1
+        st1             {v24.s}[3], [x0], x1
+        st1             {v25.s}[1], [x9], x1
+        st1             {v25.s}[3], [x0], x1
+
+        ret             x10
+endfunc
+
+function ff_vp9_loop_filter_v_8_8_neon, export=1
+        mov             x10, x30
+        sub             x9,  x0,  x1, lsl #2
+        ld1             {v20.8b}, [x9], x1 // p3
+        ld1             {v24.8b}, [x0], x1 // q0
+        ld1             {v21.8b}, [x9], x1 // p2
+        ld1             {v25.8b}, [x0], x1 // q1
+        ld1             {v22.8b}, [x9], x1 // p1
+        ld1             {v26.8b}, [x0], x1 // q2
+        ld1             {v23.8b}, [x9], x1 // p0
+        ld1             {v27.8b}, [x0], x1 // q3
+        sub             x9,  x9,  x1, lsl #2
+        sub             x0,  x0,  x1, lsl #2
+        add             x9,  x9,  x1
+
+        loop_filter_8
+
+        st1             {v21.8b}, [x9], x1
+        st1             {v24.8b}, [x0], x1
+        st1             {v22.8b}, [x9], x1
+        st1             {v25.8b}, [x0], x1
+        st1             {v23.8b}, [x9], x1
+        st1             {v26.8b}, [x0], x1
+
+        ret             x10
+6:
+        sub             x9,  x0,  x1, lsl #1
+        st1             {v22.8b}, [x9], x1
+        st1             {v24.8b}, [x0], x1
+        st1             {v23.8b}, [x9], x1
+        st1             {v25.8b}, [x0], x1
+        ret             x10
+endfunc
+
+.macro mix_v_16 mix
+function ff_vp9_loop_filter_v_\mix\()_16_neon, export=1
+        mov             x10, x30
+        sub             x9,  x0,  x1, lsl #2
+        ld1             {v20.16b}, [x9], x1 // p3
+        ld1             {v24.16b}, [x0], x1 // q0
+        ld1             {v21.16b}, [x9], x1 // p2
+        ld1             {v25.16b}, [x0], x1 // q1
+        ld1             {v22.16b}, [x9], x1 // p1
+        ld1             {v26.16b}, [x0], x1 // q2
+        ld1             {v23.16b}, [x9], x1 // p0
+        ld1             {v27.16b}, [x0], x1 // q3
+        sub             x9,  x9,  x1, lsl #2
+        sub             x0,  x0,  x1, lsl #2
+        add             x9,  x9,  x1
+
+        loop_filter_8_16b_mix \mix
+
+        st1             {v21.16b}, [x9], x1
+        st1             {v24.16b}, [x0], x1
+        st1             {v22.16b}, [x9], x1
+        st1             {v25.16b}, [x0], x1
+        st1             {v23.16b}, [x9], x1
+        st1             {v26.16b}, [x0], x1
+
+        ret             x10
+6:
+        sub             x9,  x0,  x1, lsl #1
+        st1             {v22.16b}, [x9], x1
+        st1             {v24.16b}, [x0], x1
+        st1             {v23.16b}, [x9], x1
+        st1             {v25.16b}, [x0], x1
+        ret             x10
+endfunc
+.endm
+
+mix_v_16 48
+mix_v_16 84
+mix_v_16 88
+
+function ff_vp9_loop_filter_h_8_8_neon, export=1
+        mov             x10, x30
+        sub             x9,  x0,  #4
+        add             x0,  x9,  x1, lsl #2
+        ld1             {v20.8b}, [x9], x1
+        ld1             {v24.8b}, [x0], x1
+        ld1             {v21.8b}, [x9], x1
+        ld1             {v25.8b}, [x0], x1
+        ld1             {v22.8b}, [x9], x1
+        ld1             {v26.8b}, [x0], x1
+        ld1             {v23.8b}, [x9], x1
+        ld1             {v27.8b}, [x0], x1
+
+        sub             x9,  x9,  x1, lsl #2
+        sub             x0,  x0,  x1, lsl #2
+
+        transpose_8x8B  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+
+        loop_filter_8
+
+        // Even though only 6 pixels per row have been changed, we write the
+        // full 8 pixel registers.
+        transpose_8x8B  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+
+        st1             {v20.8b}, [x9], x1
+        st1             {v24.8b}, [x0], x1
+        st1             {v21.8b}, [x9], x1
+        st1             {v25.8b}, [x0], x1
+        st1             {v22.8b}, [x9], x1
+        st1             {v26.8b}, [x0], x1
+        st1             {v23.8b}, [x9], x1
+        st1             {v27.8b}, [x0], x1
+
+        ret             x10
+6:
+        // If we didn't need to do the flat8in part, we use the same writeback
+        // as in loop_filter_h_4_8.
+        add             x9,  x9,  #2
+        add             x0,  x0,  #2
+        transpose_4x8B  v22, v23, v24, v25, v26, v27, v28, v29
+        st1             {v22.s}[0], [x9], x1
+        st1             {v22.s}[1], [x0], x1
+        st1             {v23.s}[0], [x9], x1
+        st1             {v23.s}[1], [x0], x1
+        st1             {v24.s}[0], [x9], x1
+        st1             {v24.s}[1], [x0], x1
+        st1             {v25.s}[0], [x9], x1
+        st1             {v25.s}[1], [x0], x1
+        ret             x10
+endfunc
+
+.macro mix_h_16 mix
+function ff_vp9_loop_filter_h_\mix\()_16_neon, export=1
+        mov             x10, x30
+        sub             x9,  x0,  #4
+        add             x0,  x9,  x1, lsl #3
+        ld1             {v20.8b},   [x9], x1
+        ld1             {v20.d}[1], [x0], x1
+        ld1             {v21.8b},   [x9], x1
+        ld1             {v21.d}[1], [x0], x1
+        ld1             {v22.8b},   [x9], x1
+        ld1             {v22.d}[1], [x0], x1
+        ld1             {v23.8b},   [x9], x1
+        ld1             {v23.d}[1], [x0], x1
+        ld1             {v24.8b},   [x9], x1
+        ld1             {v24.d}[1], [x0], x1
+        ld1             {v25.8b},   [x9], x1
+        ld1             {v25.d}[1], [x0], x1
+        ld1             {v26.8b},   [x9], x1
+        ld1             {v26.d}[1], [x0], x1
+        ld1             {v27.8b},   [x9], x1
+        ld1             {v27.d}[1], [x0], x1
+
+        sub             x9,  x9,  x1, lsl #3
+        sub             x0,  x0,  x1, lsl #3
+
+        transpose_8x16B v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+
+        loop_filter_8_16b_mix \mix
+
+        transpose_8x16B v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+
+        st1             {v20.8b},   [x9], x1
+        st1             {v20.d}[1], [x0], x1
+        st1             {v21.8b},   [x9], x1
+        st1             {v21.d}[1], [x0], x1
+        st1             {v22.8b},   [x9], x1
+        st1             {v22.d}[1], [x0], x1
+        st1             {v23.8b},   [x9], x1
+        st1             {v23.d}[1], [x0], x1
+        st1             {v24.8b},   [x9], x1
+        st1             {v24.d}[1], [x0], x1
+        st1             {v25.8b},   [x9], x1
+        st1             {v25.d}[1], [x0], x1
+        st1             {v26.8b},   [x9], x1
+        st1             {v26.d}[1], [x0], x1
+        st1             {v27.8b},   [x9], x1
+        st1             {v27.d}[1], [x0], x1
+
+        ret             x10
+6:
+        add             x9,  x9,  #2
+        add             x0,  x0,  #2
+        transpose_4x16B v22, v23, v24, v25, v26, v27, v28, v29
+        st1             {v22.s}[0], [x9], x1
+        st1             {v22.s}[2], [x0], x1
+        st1             {v23.s}[0], [x9], x1
+        st1             {v23.s}[2], [x0], x1
+        st1             {v24.s}[0], [x9], x1
+        st1             {v24.s}[2], [x0], x1
+        st1             {v25.s}[0], [x9], x1
+        st1             {v25.s}[2], [x0], x1
+        st1             {v22.s}[1], [x9], x1
+        st1             {v22.s}[3], [x0], x1
+        st1             {v23.s}[1], [x9], x1
+        st1             {v23.s}[3], [x0], x1
+        st1             {v24.s}[1], [x9], x1
+        st1             {v24.s}[3], [x0], x1
+        st1             {v25.s}[1], [x9], x1
+        st1             {v25.s}[3], [x0], x1
+        ret             x10
+endfunc
+.endm
+
+mix_h_16 48
+mix_h_16 84
+mix_h_16 88
+
+function ff_vp9_loop_filter_v_16_8_neon, export=1
+        mov             x10, x30
+        stp             d8,  d9,  [sp, #-0x40]!
+        stp             d14, d15, [sp, #0x30]
+        stp             d12, d13, [sp, #0x20]
+        stp             d10, d11, [sp, #0x10]
+        sub             x9,  x0,  x1, lsl #3
+        ld1             {v16.8b}, [x9], x1 // p7
+        ld1             {v24.8b}, [x0], x1 // q0
+        ld1             {v17.8b}, [x9], x1 // p6
+        ld1             {v25.8b}, [x0], x1 // q1
+        ld1             {v18.8b}, [x9], x1 // p5
+        ld1             {v26.8b}, [x0], x1 // q2
+        ld1             {v19.8b}, [x9], x1 // p4
+        ld1             {v27.8b}, [x0], x1 // q3
+        ld1             {v20.8b}, [x9], x1 // p3
+        ld1             {v28.8b}, [x0], x1 // q4
+        ld1             {v21.8b}, [x9], x1 // p2
+        ld1             {v29.8b}, [x0], x1 // q5
+        ld1             {v22.8b}, [x9], x1 // p1
+        ld1             {v30.8b}, [x0], x1 // q6
+        ld1             {v23.8b}, [x9], x1 // p0
+        ld1             {v31.8b}, [x0], x1 // q7
+        sub             x9,  x9,  x1, lsl #3
+        sub             x0,  x0,  x1, lsl #3
+        add             x9,  x9,  x1
+
+        loop_filter_16
+
+        // If we did the flat8out part, we get the output in
+        // v2-v17 (skipping v7 and v16). x9 points to x0 - 7 * stride,
+        // store v2-v9 there, and v10-v17 into x0.
+        st1             {v2.8b},  [x9], x1
+        st1             {v10.8b}, [x0], x1
+        st1             {v3.8b},  [x9], x1
+        st1             {v11.8b}, [x0], x1
+        st1             {v4.8b},  [x9], x1
+        st1             {v12.8b}, [x0], x1
+        st1             {v5.8b},  [x9], x1
+        st1             {v13.8b}, [x0], x1
+        st1             {v6.8b},  [x9], x1
+        st1             {v14.8b}, [x0], x1
+        st1             {v8.8b},  [x9], x1
+        st1             {v15.8b}, [x0], x1
+        st1             {v9.8b},  [x9], x1
+        st1             {v17.8b}, [x0], x1
+9:
+        ldp             d10, d11, [sp, #0x10]
+        ldp             d12, d13, [sp, #0x20]
+        ldp             d14, d15, [sp, #0x30]
+        ldp             d8,  d9,  [sp], #0x40
+        ret             x10
+8:
+        add             x9,  x9,  x1, lsl #2
+        // If we didn't do the flat8out part, the output is left in the
+        // input registers.
+        st1             {v21.8b}, [x9], x1
+        st1             {v24.8b}, [x0], x1
+        st1             {v22.8b}, [x9], x1
+        st1             {v25.8b}, [x0], x1
+        st1             {v23.8b}, [x9], x1
+        st1             {v26.8b}, [x0], x1
+        b               9b
+7:
+        sub             x9,  x0,  x1, lsl #1
+        st1             {v22.8b}, [x9], x1
+        st1             {v24.8b}, [x0], x1
+        st1             {v23.8b}, [x9], x1
+        st1             {v25.8b}, [x0], x1
+        b               9b
+endfunc
+
+function ff_vp9_loop_filter_v_16_16_neon, export=1
+        mov             x10, x30
+        stp             d8,  d9,  [sp, #-0x40]!
+        stp             d14, d15, [sp, #0x30]
+        stp             d12, d13, [sp, #0x20]
+        stp             d10, d11, [sp, #0x10]
+        sub             x9,  x0,  x1, lsl #3
+        ld1             {v16.16b}, [x9], x1 // p7
+        ld1             {v24.16b}, [x0], x1 // q0
+        ld1             {v17.16b}, [x9], x1 // p6
+        ld1             {v25.16b}, [x0], x1 // q1
+        ld1             {v18.16b}, [x9], x1 // p5
+        ld1             {v26.16b}, [x0], x1 // q2
+        ld1             {v19.16b}, [x9], x1 // p4
+        ld1             {v27.16b}, [x0], x1 // q3
+        ld1             {v20.16b}, [x9], x1 // p3
+        ld1             {v28.16b}, [x0], x1 // q4
+        ld1             {v21.16b}, [x9], x1 // p2
+        ld1             {v29.16b}, [x0], x1 // q5
+        ld1             {v22.16b}, [x9], x1 // p1
+        ld1             {v30.16b}, [x0], x1 // q6
+        ld1             {v23.16b}, [x9], x1 // p0
+        ld1             {v31.16b}, [x0], x1 // q7
+        sub             x9,  x9,  x1, lsl #3
+        sub             x0,  x0,  x1, lsl #3
+        add             x9,  x9,  x1
+
+        loop_filter_16_16b
+
+        st1             {v2.16b},  [x9], x1
+        st1             {v10.16b}, [x0], x1
+        st1             {v3.16b},  [x9], x1
+        st1             {v11.16b}, [x0], x1
+        st1             {v4.16b},  [x9], x1
+        st1             {v12.16b}, [x0], x1
+        st1             {v5.16b},  [x9], x1
+        st1             {v13.16b}, [x0], x1
+        st1             {v6.16b},  [x9], x1
+        st1             {v14.16b}, [x0], x1
+        st1             {v8.16b},  [x9], x1
+        st1             {v15.16b}, [x0], x1
+        st1             {v9.16b},  [x9], x1
+        st1             {v17.16b}, [x0], x1
+9:
+        ldp             d10, d11, [sp, #0x10]
+        ldp             d12, d13, [sp, #0x20]
+        ldp             d14, d15, [sp, #0x30]
+        ldp             d8,  d9,  [sp], #0x40
+        ret             x10
+8:
+        add             x9,  x9,  x1, lsl #2
+        st1             {v21.16b}, [x9], x1
+        st1             {v24.16b}, [x0], x1
+        st1             {v22.16b}, [x9], x1
+        st1             {v25.16b}, [x0], x1
+        st1             {v23.16b}, [x9], x1
+        st1             {v26.16b}, [x0], x1
+        b               9b
+7:
+        sub             x9,  x0,  x1, lsl #1
+        st1             {v22.16b}, [x9], x1
+        st1             {v24.16b}, [x0], x1
+        st1             {v23.16b}, [x9], x1
+        st1             {v25.16b}, [x0], x1
+        b               9b
+endfunc
+
+function ff_vp9_loop_filter_h_16_8_neon, export=1
+        mov             x10, x30
+        stp             d8,  d9,  [sp, #-0x40]!
+        stp             d14, d15, [sp, #0x30]
+        stp             d12, d13, [sp, #0x20]
+        stp             d10, d11, [sp, #0x10]
+        sub             x9,  x0,  #8
+        ld1             {v16.8b}, [x9], x1
+        ld1             {v24.8b}, [x0], x1
+        ld1             {v17.8b}, [x9], x1
+        ld1             {v25.8b}, [x0], x1
+        ld1             {v18.8b}, [x9], x1
+        ld1             {v26.8b}, [x0], x1
+        ld1             {v19.8b}, [x9], x1
+        ld1             {v27.8b}, [x0], x1
+        ld1             {v20.8b}, [x9], x1
+        ld1             {v28.8b}, [x0], x1
+        ld1             {v21.8b}, [x9], x1
+        ld1             {v29.8b}, [x0], x1
+        ld1             {v22.8b}, [x9], x1
+        ld1             {v30.8b}, [x0], x1
+        ld1             {v23.8b}, [x9], x1
+        ld1             {v31.8b}, [x0], x1
+        sub             x0,  x0,  x1, lsl #3
+        sub             x9,  x9,  x1, lsl #3
+
+        // The 16x8 pixels read above is in two 8x8 blocks; the left
+        // half in v16-v23, and the right half in v24-v31. Do two 8x8 transposes
+        // of this, to get one column per register.
+        transpose_8x8B  v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
+        transpose_8x8B  v24, v25, v26, v27, v28, v29, v30, v31, v0, v1
+
+        loop_filter_16
+
+        transpose_8x8B  v16, v2,  v3,  v4,  v5,  v6,  v8,  v9,  v0, v1
+        transpose_8x8B  v10, v11, v12, v13, v14, v15, v17, v31, v0, v1
+
+        st1             {v16.8b}, [x9], x1
+        st1             {v10.8b}, [x0], x1
+        st1             {v2.8b},  [x9], x1
+        st1             {v11.8b}, [x0], x1
+        st1             {v3.8b},  [x9], x1
+        st1             {v12.8b}, [x0], x1
+        st1             {v4.8b},  [x9], x1
+        st1             {v13.8b}, [x0], x1
+        st1             {v5.8b},  [x9], x1
+        st1             {v14.8b}, [x0], x1
+        st1             {v6.8b},  [x9], x1
+        st1             {v15.8b}, [x0], x1
+        st1             {v8.8b},  [x9], x1
+        st1             {v17.8b}, [x0], x1
+        st1             {v9.8b},  [x9], x1
+        st1             {v31.8b}, [x0], x1
+9:
+        ldp             d10, d11, [sp, #0x10]
+        ldp             d12, d13, [sp, #0x20]
+        ldp             d14, d15, [sp, #0x30]
+        ldp             d8,  d9,  [sp], #0x40
+        ret             x10
+8:
+        // The same writeback as in loop_filter_h_8_8
+        sub             x9,  x0,  #4
+        add             x0,  x9,  x1, lsl #2
+        transpose_8x8B  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+
+        st1             {v20.8b}, [x9], x1
+        st1             {v24.8b}, [x0], x1
+        st1             {v21.8b}, [x9], x1
+        st1             {v25.8b}, [x0], x1
+        st1             {v22.8b}, [x9], x1
+        st1             {v26.8b}, [x0], x1
+        st1             {v23.8b}, [x9], x1
+        st1             {v27.8b}, [x0], x1
+        b               9b
+7:
+        // The same writeback as in loop_filter_h_4_8
+        sub             x9,  x0,  #2
+        add             x0,  x9,  x1, lsl #2
+        transpose_4x8B  v22, v23, v24, v25, v26, v27, v28, v29
+        st1             {v22.s}[0], [x9], x1
+        st1             {v22.s}[1], [x0], x1
+        st1             {v23.s}[0], [x9], x1
+        st1             {v23.s}[1], [x0], x1
+        st1             {v24.s}[0], [x9], x1
+        st1             {v24.s}[1], [x0], x1
+        st1             {v25.s}[0], [x9], x1
+        st1             {v25.s}[1], [x0], x1
+        b               9b
+endfunc
+
+function ff_vp9_loop_filter_h_16_16_neon, export=1
+        mov             x10, x30
+        stp             d8,  d9,  [sp, #-0x40]!
+        stp             d14, d15, [sp, #0x30]
+        stp             d12, d13, [sp, #0x20]
+        stp             d10, d11, [sp, #0x10]
+        sub             x9,  x0,  #8
+        ld1             {v16.8b},   [x9], x1
+        ld1             {v24.8b},   [x0], x1
+        ld1             {v17.8b},   [x9], x1
+        ld1             {v25.8b},   [x0], x1
+        ld1             {v18.8b},   [x9], x1
+        ld1             {v26.8b},   [x0], x1
+        ld1             {v19.8b},   [x9], x1
+        ld1             {v27.8b},   [x0], x1
+        ld1             {v20.8b},   [x9], x1
+        ld1             {v28.8b},   [x0], x1
+        ld1             {v21.8b},   [x9], x1
+        ld1             {v29.8b},   [x0], x1
+        ld1             {v22.8b},   [x9], x1
+        ld1             {v30.8b},   [x0], x1
+        ld1             {v23.8b},   [x9], x1
+        ld1             {v31.8b},   [x0], x1
+        ld1             {v16.d}[1], [x9], x1
+        ld1             {v24.d}[1], [x0], x1
+        ld1             {v17.d}[1], [x9], x1
+        ld1             {v25.d}[1], [x0], x1
+        ld1             {v18.d}[1], [x9], x1
+        ld1             {v26.d}[1], [x0], x1
+        ld1             {v19.d}[1], [x9], x1
+        ld1             {v27.d}[1], [x0], x1
+        ld1             {v20.d}[1], [x9], x1
+        ld1             {v28.d}[1], [x0], x1
+        ld1             {v21.d}[1], [x9], x1
+        ld1             {v29.d}[1], [x0], x1
+        ld1             {v22.d}[1], [x9], x1
+        ld1             {v30.d}[1], [x0], x1
+        ld1             {v23.d}[1], [x9], x1
+        ld1             {v31.d}[1], [x0], x1
+        sub             x0,  x0,  x1, lsl #4
+        sub             x9,  x9,  x1, lsl #4
+
+        transpose_8x16B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
+        transpose_8x16B v24, v25, v26, v27, v28, v29, v30, v31, v0, v1
+
+        loop_filter_16_16b
+
+        transpose_8x16B v16, v2,  v3,  v4,  v5,  v6,  v8,  v9,  v0, v1
+        transpose_8x16B v10, v11, v12, v13, v14, v15, v17, v31, v0, v1
+
+        st1             {v16.8b},   [x9], x1
+        st1             {v10.8b},   [x0], x1
+        st1             {v2.8b},    [x9], x1
+        st1             {v11.8b},   [x0], x1
+        st1             {v3.8b},    [x9], x1
+        st1             {v12.8b},   [x0], x1
+        st1             {v4.8b},    [x9], x1
+        st1             {v13.8b},   [x0], x1
+        st1             {v5.8b},    [x9], x1
+        st1             {v14.8b},   [x0], x1
+        st1             {v6.8b},    [x9], x1
+        st1             {v15.8b},   [x0], x1
+        st1             {v8.8b},    [x9], x1
+        st1             {v17.8b},   [x0], x1
+        st1             {v9.8b},    [x9], x1
+        st1             {v31.8b},   [x0], x1
+        st1             {v16.d}[1], [x9], x1
+        st1             {v10.d}[1], [x0], x1
+        st1             {v2.d}[1],  [x9], x1
+        st1             {v11.d}[1], [x0], x1
+        st1             {v3.d}[1],  [x9], x1
+        st1             {v12.d}[1], [x0], x1
+        st1             {v4.d}[1],  [x9], x1
+        st1             {v13.d}[1], [x0], x1
+        st1             {v5.d}[1],  [x9], x1
+        st1             {v14.d}[1], [x0], x1
+        st1             {v6.d}[1],  [x9], x1
+        st1             {v15.d}[1], [x0], x1
+        st1             {v8.d}[1],  [x9], x1
+        st1             {v17.d}[1], [x0], x1
+        st1             {v9.d}[1],  [x9], x1
+        st1             {v31.d}[1], [x0], x1
+9:
+        ldp             d10, d11, [sp, #0x10]
+        ldp             d12, d13, [sp, #0x20]
+        ldp             d14, d15, [sp, #0x30]
+        ldp             d8,  d9,  [sp], #0x40
+        ret             x10
+8:
+        sub             x9,  x0,  #4
+        add             x0,  x9,  x1, lsl #3
+        transpose_8x16B v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+
+        st1             {v20.8b},   [x9], x1
+        st1             {v20.d}[1], [x0], x1
+        st1             {v21.8b},   [x9], x1
+        st1             {v21.d}[1], [x0], x1
+        st1             {v22.8b},   [x9], x1
+        st1             {v22.d}[1], [x0], x1
+        st1             {v23.8b},   [x9], x1
+        st1             {v23.d}[1], [x0], x1
+        st1             {v24.8b},   [x9], x1
+        st1             {v24.d}[1], [x0], x1
+        st1             {v25.8b},   [x9], x1
+        st1             {v25.d}[1], [x0], x1
+        st1             {v26.8b},   [x9], x1
+        st1             {v26.d}[1], [x0], x1
+        st1             {v27.8b},   [x9], x1
+        st1             {v27.d}[1], [x0], x1
+        b               9b
+7:
+        sub             x9,  x0,  #2
+        add             x0,  x9,  x1, lsl #3
+        transpose_4x16B v22, v23, v24, v25, v26, v27, v28, v29
+        st1             {v22.s}[0], [x9], x1
+        st1             {v22.s}[2], [x0], x1
+        st1             {v23.s}[0], [x9], x1
+        st1             {v23.s}[2], [x0], x1
+        st1             {v24.s}[0], [x9], x1
+        st1             {v24.s}[2], [x0], x1
+        st1             {v25.s}[0], [x9], x1
+        st1             {v25.s}[2], [x0], x1
+        st1             {v22.s}[1], [x9], x1
+        st1             {v22.s}[3], [x0], x1
+        st1             {v23.s}[1], [x9], x1
+        st1             {v23.s}[3], [x0], x1
+        st1             {v24.s}[1], [x9], x1
+        st1             {v24.s}[3], [x0], x1
+        st1             {v25.s}[1], [x9], x1
+        st1             {v25.s}[3], [x0], x1
+        b               9b
+endfunc
diff --git a/media/ffvpx/libavcodec/aarch64/vp9mc_16bpp_neon.S b/media/ffvpx/libavcodec/aarch64/vp9mc_16bpp_neon.S
new file mode 100644
index 0000000000..53b372c262
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp9mc_16bpp_neon.S
@@ -0,0 +1,606 @@
+/*
+ * Copyright (c) 2017 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+// All public functions in this file have the following signature:
+// typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
+//                            const uint8_t *ref, ptrdiff_t ref_stride,
+//                            int h, int mx, int my);
+
+function ff_vp9_avg64_16_neon, export=1
+        mov             x5,  x0
+        sub             x1,  x1,  #64
+        sub             x3,  x3,  #64
+1:
+        ld1             {v4.8h,  v5.8h,  v6.8h,  v7.8h},  [x2], #64
+        ld1             {v0.8h,  v1.8h,  v2.8h,  v3.8h},  [x0], #64
+        ld1             {v20.8h, v21.8h, v22.8h, v23.8h}, [x2], x3
+        urhadd          v0.8h,  v0.8h,  v4.8h
+        urhadd          v1.8h,  v1.8h,  v5.8h
+        ld1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x0], x1
+        urhadd          v2.8h,  v2.8h,  v6.8h
+        urhadd          v3.8h,  v3.8h,  v7.8h
+        subs            w4,  w4,  #1
+        urhadd          v16.8h, v16.8h, v20.8h
+        urhadd          v17.8h, v17.8h, v21.8h
+        st1             {v0.8h,  v1.8h,  v2.8h,  v3.8h},  [x5], #64
+        urhadd          v18.8h, v18.8h, v22.8h
+        urhadd          v19.8h, v19.8h, v23.8h
+        st1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x5], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_avg32_16_neon, export=1
+        mov             x5,  x0
+1:
+        ld1             {v4.8h,  v5.8h,  v6.8h,  v7.8h},  [x2], x3
+        ld1             {v0.8h,  v1.8h,  v2.8h,  v3.8h},  [x0], x1
+        ld1             {v20.8h, v21.8h, v22.8h, v23.8h}, [x2], x3
+        urhadd          v0.8h,  v0.8h,  v4.8h
+        urhadd          v1.8h,  v1.8h,  v5.8h
+        ld1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x0], x1
+        urhadd          v2.8h,  v2.8h,  v6.8h
+        urhadd          v3.8h,  v3.8h,  v7.8h
+        subs            w4,  w4,  #2
+        urhadd          v16.8h, v16.8h, v20.8h
+        urhadd          v17.8h, v17.8h, v21.8h
+        st1             {v0.8h,  v1.8h,  v2.8h,  v3.8h},  [x5], x1
+        urhadd          v18.8h, v18.8h, v22.8h
+        urhadd          v19.8h, v19.8h, v23.8h
+        st1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x5], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_avg16_16_neon, export=1
+1:
+        ld1             {v2.8h, v3.8h},  [x2], x3
+        ld1             {v0.8h, v1.8h},  [x0]
+        urhadd          v0.8h,  v0.8h,  v2.8h
+        urhadd          v1.8h,  v1.8h,  v3.8h
+        subs            w4,  w4,  #1
+        st1             {v0.8h, v1.8h},  [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_avg8_16_neon, export=1
+        mov             x5,  x0
+1:
+        ld1             {v2.8h},  [x2], x3
+        ld1             {v0.8h},  [x0], x1
+        ld1             {v3.8h},  [x2], x3
+        urhadd          v0.8h,  v0.8h,  v2.8h
+        ld1             {v1.8h},  [x0], x1
+        urhadd          v1.8h,  v1.8h,  v3.8h
+        subs            w4,  w4,  #2
+        st1             {v0.8h},  [x5], x1
+        st1             {v1.8h},  [x5], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_avg4_16_neon, export=1
+        mov             x5,  x0
+1:
+        ld1             {v2.4h},  [x2], x3
+        ld1             {v0.4h},  [x0], x1
+        ld1             {v3.4h},  [x2], x3
+        urhadd          v0.4h,  v0.4h,  v2.4h
+        ld1             {v1.4h},  [x0], x1
+        urhadd          v1.4h,  v1.4h,  v3.4h
+        subs            w4,  w4,  #2
+        st1             {v0.4h},  [x5], x1
+        st1             {v1.8b},  [x5], x1
+        b.ne            1b
+        ret
+endfunc
+
+
+// Extract a vector from src1-src2 and src4-src5 (src1-src3 and src4-src6
+// for size >= 16), and multiply-accumulate into dst1 and dst5 (or
+// dst1-dst2 and dst5-dst6 for size >= 8 and dst1-dst4 and dst5-dst8
+// for size >= 16)
+.macro extmlal dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8, src1, src2, src3, src4, src5, src6, offset, size
+        ext             v20.16b, \src1\().16b, \src2\().16b, #(2*\offset)
+        ext             v22.16b, \src4\().16b, \src5\().16b, #(2*\offset)
+        smlal           \dst1\().4s, v20.4h, v0.h[\offset]
+        smlal           \dst5\().4s, v22.4h, v0.h[\offset]
+.if \size >= 16
+        ext             v21.16b, \src2\().16b, \src3\().16b, #(2*\offset)
+        ext             v23.16b, \src5\().16b, \src6\().16b, #(2*\offset)
+.endif
+.if \size >= 8
+        smlal2          \dst2\().4s, v20.8h, v0.h[\offset]
+        smlal2          \dst6\().4s, v22.8h, v0.h[\offset]
+.endif
+.if \size >= 16
+        smlal           \dst3\().4s, v21.4h, v0.h[\offset]
+        smlal           \dst7\().4s, v23.4h, v0.h[\offset]
+        smlal2          \dst4\().4s, v21.8h, v0.h[\offset]
+        smlal2          \dst8\().4s, v23.8h, v0.h[\offset]
+.endif
+.endm
+
+
+// Instantiate a horizontal filter function for the given size.
+// This can work on 4, 8 or 16 pixels in parallel; for larger
+// widths it will do 16 pixels at a time and loop horizontally.
+// The actual width (in bytes) is passed in x5, the height in w4 and
+// the filter coefficients in x9.
+.macro do_8tap_h type, size
+function \type\()_8tap_\size\()h
+        sub             x2,  x2,  #6
+        add             x6,  x0,  x1
+        add             x7,  x2,  x3
+        add             x1,  x1,  x1
+        add             x3,  x3,  x3
+        // Only size >= 16 loops horizontally and needs
+        // reduced dst stride
+.if \size >= 16
+        sub             x1,  x1,  x5
+.endif
+        // size >= 16 loads two qwords and increments r2,
+        // for size 4/8 it's enough with one qword and no
+        // postincrement
+.if \size >= 16
+        sub             x3,  x3,  x5
+        sub             x3,  x3,  #16
+.endif
+        // Load the filter vector
+        ld1             {v0.8h},  [x9]
+1:
+.if \size >= 16
+        mov             x9,  x5
+.endif
+        // Load src
+.if \size >= 16
+        ld1             {v5.8h,  v6.8h,  v7.8h},  [x2], #48
+        ld1             {v16.8h, v17.8h, v18.8h}, [x7], #48
+.else
+        ld1             {v5.8h,  v6.8h},  [x2]
+        ld1             {v16.8h, v17.8h}, [x7]
+.endif
+2:
+
+        smull           v1.4s,  v5.4h,  v0.h[0]
+        smull           v24.4s, v16.4h, v0.h[0]
+.if \size >= 8
+        smull2          v2.4s,  v5.8h,  v0.h[0]
+        smull2          v25.4s, v16.8h, v0.h[0]
+.endif
+.if \size >= 16
+        smull           v3.4s,  v6.4h,  v0.h[0]
+        smull           v26.4s, v17.4h, v0.h[0]
+        smull2          v4.4s,  v6.8h,  v0.h[0]
+        smull2          v27.4s, v17.8h, v0.h[0]
+.endif
+        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 1, \size
+        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 2, \size
+        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 3, \size
+        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 4, \size
+        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 5, \size
+        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 6, \size
+        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 7, \size
+
+        // Round, shift and saturate
+        // The sqrshrun takes care of clamping negative values to zero, but
+        // we manually need to do umin with the max pixel value.
+        sqrshrun        v1.4h,  v1.4s,  #7
+        sqrshrun        v24.4h, v24.4s, #7
+.if \size >= 8
+        sqrshrun2       v1.8h,  v2.4s,  #7
+        sqrshrun2       v24.8h, v25.4s, #7
+        umin            v1.8h,  v1.8h,  v31.8h
+        umin            v24.8h, v24.8h, v31.8h
+.if \size >= 16
+        sqrshrun        v2.4h,  v3.4s,  #7
+        sqrshrun        v25.4h, v26.4s, #7
+        sqrshrun2       v2.8h,  v4.4s,  #7
+        sqrshrun2       v25.8h, v27.4s, #7
+        umin            v2.8h,  v2.8h,  v31.8h
+        umin            v25.8h, v25.8h, v31.8h
+.endif
+.else
+        umin            v1.4h,  v1.4h,  v31.4h
+        umin            v24.4h, v24.4h, v31.4h
+.endif
+        // Average
+.ifc \type,avg
+.if \size >= 16
+        ld1             {v3.8h,  v4.8h},  [x0]
+        ld1             {v29.8h, v30.8h}, [x6]
+        urhadd          v1.8h,  v1.8h,  v3.8h
+        urhadd          v2.8h,  v2.8h,  v4.8h
+        urhadd          v24.8h, v24.8h, v29.8h
+        urhadd          v25.8h, v25.8h, v30.8h
+.elseif \size >= 8
+        ld1             {v3.8h},  [x0]
+        ld1             {v4.8h},  [x6]
+        urhadd          v1.8h,  v1.8h,  v3.8h
+        urhadd          v24.8h, v24.8h, v4.8h
+.else
+        ld1             {v3.4h},  [x0]
+        ld1             {v4.4h},  [x6]
+        urhadd          v1.4h,  v1.4h,  v3.4h
+        urhadd          v24.4h, v24.4h, v4.4h
+.endif
+.endif
+        // Store and loop horizontally (for size >= 16)
+.if \size >= 16
+        subs            x9,  x9,  #32
+        st1             {v1.8h,  v2.8h},  [x0], #32
+        st1             {v24.8h, v25.8h}, [x6], #32
+        b.eq            3f
+        mov             v5.16b,  v7.16b
+        mov             v16.16b, v18.16b
+        ld1             {v6.8h,  v7.8h},  [x2], #32
+        ld1             {v17.8h, v18.8h}, [x7], #32
+        b               2b
+.elseif \size == 8
+        st1             {v1.8h},  [x0]
+        st1             {v24.8h}, [x6]
+.else // \size == 4
+        st1             {v1.4h},  [x0]
+        st1             {v24.4h}, [x6]
+.endif
+3:
+        // Loop vertically
+        add             x0,  x0,  x1
+        add             x6,  x6,  x1
+        add             x2,  x2,  x3
+        add             x7,  x7,  x3
+        subs            w4,  w4,  #2
+        b.ne            1b
+        ret
+endfunc
+.endm
+
+.macro do_8tap_h_size size
+do_8tap_h put, \size
+do_8tap_h avg, \size
+.endm
+
+do_8tap_h_size 4
+do_8tap_h_size 8
+do_8tap_h_size 16
+
+.macro do_8tap_h_func type, filter, offset, size, bpp
+function ff_vp9_\type\()_\filter\()\size\()_h_\bpp\()_neon, export=1
+        mvni            v31.8h, #((0xff << (\bpp - 8)) & 0xff), lsl #8
+        movrel          x6,  X(ff_vp9_subpel_filters), 256*\offset
+        cmp             w5,  #8
+        add             x9,  x6,  w5, uxtw #4
+        mov             x5,  #2*\size
+.if \size >= 16
+        b               \type\()_8tap_16h
+.else
+        b               \type\()_8tap_\size\()h
+.endif
+endfunc
+.endm
+
+.macro do_8tap_h_filters size, bpp
+do_8tap_h_func put, regular, 1, \size, \bpp
+do_8tap_h_func avg, regular, 1, \size, \bpp
+do_8tap_h_func put, sharp,   2, \size, \bpp
+do_8tap_h_func avg, sharp,   2, \size, \bpp
+do_8tap_h_func put, smooth,  0, \size, \bpp
+do_8tap_h_func avg, smooth,  0, \size, \bpp
+.endm
+
+.macro do_8tap_h_filters_bpp bpp
+do_8tap_h_filters 64, \bpp
+do_8tap_h_filters 32, \bpp
+do_8tap_h_filters 16, \bpp
+do_8tap_h_filters 8,  \bpp
+do_8tap_h_filters 4,  \bpp
+.endm
+
+do_8tap_h_filters_bpp 10
+do_8tap_h_filters_bpp 12
+
+
+// Vertical filters
+
+// Round, shift and saturate and store reg1-reg4
+.macro do_store4 reg1, reg2, reg3, reg4, tmp1, tmp2, tmp3, tmp4, minreg, type
+        sqrshrun        \reg1\().4h,  \reg1\().4s, #7
+        sqrshrun        \reg2\().4h,  \reg2\().4s, #7
+        sqrshrun        \reg3\().4h,  \reg3\().4s, #7
+        sqrshrun        \reg4\().4h,  \reg4\().4s, #7
+.ifc \type,avg
+        ld1             {\tmp1\().4h},  [x7], x1
+        ld1             {\tmp2\().4h},  [x7], x1
+        ld1             {\tmp3\().4h},  [x7], x1
+        ld1             {\tmp4\().4h},  [x7], x1
+.endif
+        umin            \reg1\().4h,  \reg1\().4h,  \minreg\().4h
+        umin            \reg2\().4h,  \reg2\().4h,  \minreg\().4h
+        umin            \reg3\().4h,  \reg3\().4h,  \minreg\().4h
+        umin            \reg4\().4h,  \reg4\().4h,  \minreg\().4h
+.ifc \type,avg
+        urhadd          \reg1\().4h,  \reg1\().4h,  \tmp1\().4h
+        urhadd          \reg2\().4h,  \reg2\().4h,  \tmp2\().4h
+        urhadd          \reg3\().4h,  \reg3\().4h,  \tmp3\().4h
+        urhadd          \reg4\().4h,  \reg4\().4h,  \tmp4\().4h
+.endif
+        st1             {\reg1\().4h},  [x0], x1
+        st1             {\reg2\().4h},  [x0], x1
+        st1             {\reg3\().4h},  [x0], x1
+        st1             {\reg4\().4h},  [x0], x1
+.endm
+
+// Round, shift and saturate and store reg1-8, where
+// reg1-2, reg3-4 etc pairwise correspond to 4 rows.
+.macro do_store8 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, minreg, type
+        sqrshrun        \reg1\().4h,  \reg1\().4s, #7
+        sqrshrun2       \reg1\().8h,  \reg2\().4s, #7
+        sqrshrun        \reg2\().4h,  \reg3\().4s, #7
+        sqrshrun2       \reg2\().8h,  \reg4\().4s, #7
+        sqrshrun        \reg3\().4h,  \reg5\().4s, #7
+        sqrshrun2       \reg3\().8h,  \reg6\().4s, #7
+        sqrshrun        \reg4\().4h,  \reg7\().4s, #7
+        sqrshrun2       \reg4\().8h,  \reg8\().4s, #7
+.ifc \type,avg
+        ld1             {\reg5\().8h},  [x7], x1
+        ld1             {\reg6\().8h},  [x7], x1
+        ld1             {\reg7\().8h},  [x7], x1
+        ld1             {\reg8\().8h},  [x7], x1
+.endif
+        umin            \reg1\().8h,  \reg1\().8h,  \minreg\().8h
+        umin            \reg2\().8h,  \reg2\().8h,  \minreg\().8h
+        umin            \reg3\().8h,  \reg3\().8h,  \minreg\().8h
+        umin            \reg4\().8h,  \reg4\().8h,  \minreg\().8h
+.ifc \type,avg
+        urhadd          \reg1\().8h,  \reg1\().8h,  \reg5\().8h
+        urhadd          \reg2\().8h,  \reg2\().8h,  \reg6\().8h
+        urhadd          \reg3\().8h,  \reg3\().8h,  \reg7\().8h
+        urhadd          \reg4\().8h,  \reg4\().8h,  \reg8\().8h
+.endif
+        st1             {\reg1\().8h},  [x0], x1
+        st1             {\reg2\().8h},  [x0], x1
+        st1             {\reg3\().8h},  [x0], x1
+        st1             {\reg4\().8h},  [x0], x1
+.endm
+
+// Evaluate the filter twice in parallel, from the inputs src1-src9 into dst1-dst2
+// (src1-src8 into dst1, src2-src9 into dst2).
+.macro convolve4 dst1, dst2, src1, src2, src3, src4, src5, src6, src7, src8, src9, tmp1, tmp2
+        smull           \dst1\().4s, \src1\().4h, v0.h[0]
+        smull           \dst2\().4s, \src2\().4h, v0.h[0]
+        smull           \tmp1\().4s, \src2\().4h, v0.h[1]
+        smull           \tmp2\().4s, \src3\().4h, v0.h[1]
+        smlal           \dst1\().4s, \src3\().4h, v0.h[2]
+        smlal           \dst2\().4s, \src4\().4h, v0.h[2]
+        smlal           \tmp1\().4s, \src4\().4h, v0.h[3]
+        smlal           \tmp2\().4s, \src5\().4h, v0.h[3]
+        smlal           \dst1\().4s, \src5\().4h, v0.h[4]
+        smlal           \dst2\().4s, \src6\().4h, v0.h[4]
+        smlal           \tmp1\().4s, \src6\().4h, v0.h[5]
+        smlal           \tmp2\().4s, \src7\().4h, v0.h[5]
+        smlal           \dst1\().4s, \src7\().4h, v0.h[6]
+        smlal           \dst2\().4s, \src8\().4h, v0.h[6]
+        smlal           \tmp1\().4s, \src8\().4h, v0.h[7]
+        smlal           \tmp2\().4s, \src9\().4h, v0.h[7]
+        add             \dst1\().4s, \dst1\().4s, \tmp1\().4s
+        add             \dst2\().4s, \dst2\().4s, \tmp2\().4s
+.endm
+
+// Evaluate the filter twice in parallel, from the inputs src1-src9 into dst1-dst4
+// (src1-src8 into dst1-dst2, src2-src9 into dst3-dst4).
+.macro convolve8 dst1, dst2, dst3, dst4, src1, src2, src3, src4, src5, src6, src7, src8, src9
+        smull           \dst1\().4s, \src1\().4h, v0.h[0]
+        smull2          \dst2\().4s, \src1\().8h, v0.h[0]
+        smull           \dst3\().4s, \src2\().4h, v0.h[0]
+        smull2          \dst4\().4s, \src2\().8h, v0.h[0]
+        smlal           \dst1\().4s, \src2\().4h, v0.h[1]
+        smlal2          \dst2\().4s, \src2\().8h, v0.h[1]
+        smlal           \dst3\().4s, \src3\().4h, v0.h[1]
+        smlal2          \dst4\().4s, \src3\().8h, v0.h[1]
+        smlal           \dst1\().4s, \src3\().4h, v0.h[2]
+        smlal2          \dst2\().4s, \src3\().8h, v0.h[2]
+        smlal           \dst3\().4s, \src4\().4h, v0.h[2]
+        smlal2          \dst4\().4s, \src4\().8h, v0.h[2]
+        smlal           \dst1\().4s, \src4\().4h, v0.h[3]
+        smlal2          \dst2\().4s, \src4\().8h, v0.h[3]
+        smlal           \dst3\().4s, \src5\().4h, v0.h[3]
+        smlal2          \dst4\().4s, \src5\().8h, v0.h[3]
+        smlal           \dst1\().4s, \src5\().4h, v0.h[4]
+        smlal2          \dst2\().4s, \src5\().8h, v0.h[4]
+        smlal           \dst3\().4s, \src6\().4h, v0.h[4]
+        smlal2          \dst4\().4s, \src6\().8h, v0.h[4]
+        smlal           \dst1\().4s, \src6\().4h, v0.h[5]
+        smlal2          \dst2\().4s, \src6\().8h, v0.h[5]
+        smlal           \dst3\().4s, \src7\().4h, v0.h[5]
+        smlal2          \dst4\().4s, \src7\().8h, v0.h[5]
+        smlal           \dst1\().4s, \src7\().4h, v0.h[6]
+        smlal2          \dst2\().4s, \src7\().8h, v0.h[6]
+        smlal           \dst3\().4s, \src8\().4h, v0.h[6]
+        smlal2          \dst4\().4s, \src8\().8h, v0.h[6]
+        smlal           \dst1\().4s, \src8\().4h, v0.h[7]
+        smlal2          \dst2\().4s, \src8\().8h, v0.h[7]
+        smlal           \dst3\().4s, \src9\().4h, v0.h[7]
+        smlal2          \dst4\().4s, \src9\().8h, v0.h[7]
+.endm
+
+// Instantiate a vertical filter function for filtering 8 pixels at a time.
+// The height is passed in x4, the width in x5 and the filter coefficients
+// in x6.
+.macro do_8tap_8v type
+function \type\()_8tap_8v
+        sub             x2,  x2,  x3, lsl #1
+        sub             x2,  x2,  x3
+        ld1             {v0.8h},  [x6]
+1:
+.ifc \type,avg
+        mov             x7,  x0
+.endif
+        mov             x6,  x4
+
+        ld1             {v17.8h}, [x2], x3
+        ld1             {v18.8h}, [x2], x3
+        ld1             {v19.8h}, [x2], x3
+        ld1             {v20.8h}, [x2], x3
+        ld1             {v21.8h}, [x2], x3
+        ld1             {v22.8h}, [x2], x3
+        ld1             {v23.8h}, [x2], x3
+2:
+        ld1             {v24.8h}, [x2], x3
+        ld1             {v25.8h}, [x2], x3
+        ld1             {v26.8h}, [x2], x3
+        ld1             {v27.8h}, [x2], x3
+
+        convolve8       v2,  v3,  v4,  v5,  v17, v18, v19, v20, v21, v22, v23, v24, v25
+        convolve8       v6,  v7,  v30, v31, v19, v20, v21, v22, v23, v24, v25, v26, v27
+        do_store8       v2,  v3,  v4,  v5,  v6,  v7,  v30, v31, v1,  \type
+
+        subs            x6,  x6,  #4
+        b.eq            8f
+
+        ld1             {v16.8h}, [x2], x3
+        ld1             {v17.8h}, [x2], x3
+        ld1             {v18.8h}, [x2], x3
+        ld1             {v19.8h}, [x2], x3
+        convolve8       v2,  v3,  v4,  v5,  v21, v22, v23, v24, v25, v26, v27, v16, v17
+        convolve8       v6,  v7,  v20, v21, v23, v24, v25, v26, v27, v16, v17, v18, v19
+        do_store8       v2,  v3,  v4,  v5,  v6,  v7,  v20, v21, v1,  \type
+
+        subs            x6,  x6,  #4
+        b.eq            8f
+
+        ld1             {v20.8h}, [x2], x3
+        ld1             {v21.8h}, [x2], x3
+        ld1             {v22.8h}, [x2], x3
+        ld1             {v23.8h}, [x2], x3
+        convolve8       v2,  v3,  v4,  v5,  v25, v26, v27, v16, v17, v18, v19, v20, v21
+        convolve8       v6,  v7,  v24, v25, v27, v16, v17, v18, v19, v20, v21, v22, v23
+        do_store8       v2,  v3,  v4,  v5,  v6,  v7,  v24, v25, v1,  \type
+
+        subs            x6,  x6,  #4
+        b.ne            2b
+
+8:
+        subs            x5,  x5,  #8
+        b.eq            9f
+        // x0 -= h * dst_stride
+        msub            x0,  x1,  x4, x0
+        // x2 -= h * src_stride
+        msub            x2,  x3,  x4, x2
+        // x2 -= 8 * src_stride
+        sub             x2,  x2,  x3, lsl #3
+        // x2 += 1 * src_stride
+        add             x2,  x2,  x3
+        add             x2,  x2,  #16
+        add             x0,  x0,  #16
+        b               1b
+9:
+        ret
+endfunc
+.endm
+
+do_8tap_8v put
+do_8tap_8v avg
+
+
+// Instantiate a vertical filter function for filtering a 4 pixels wide
+// slice. This only is designed to work for 4 or 8 output lines.
+.macro do_8tap_4v type
+function \type\()_8tap_4v
+        sub             x2,  x2,  x3, lsl #1
+        sub             x2,  x2,  x3
+        ld1             {v0.8h},  [x6]
+.ifc \type,avg
+        mov             x7,  x0
+.endif
+
+        ld1             {v16.4h}, [x2], x3
+        ld1             {v17.4h}, [x2], x3
+        ld1             {v18.4h}, [x2], x3
+        ld1             {v19.4h}, [x2], x3
+        ld1             {v20.4h}, [x2], x3
+        ld1             {v21.4h}, [x2], x3
+        ld1             {v22.4h}, [x2], x3
+        ld1             {v23.4h}, [x2], x3
+        ld1             {v24.4h}, [x2], x3
+        ld1             {v25.4h}, [x2], x3
+        ld1             {v26.4h}, [x2], x3
+
+        convolve4       v2,  v3,  v16, v17, v18, v19, v20, v21, v22, v23, v24, v30, v31
+        convolve4       v4,  v5,  v18, v19, v20, v21, v22, v23, v24, v25, v26, v30, v31
+        do_store4       v2,  v3,  v4,  v5,  v28, v29, v30, v31, v1,  \type
+
+        subs            x4,  x4,  #4
+        b.eq            9f
+
+        ld1             {v27.4h}, [x2], x3
+        ld1             {v28.4h}, [x2], x3
+        ld1             {v29.4h}, [x2], x3
+        ld1             {v30.4h}, [x2], x3
+
+        convolve4       v2,  v3,  v20, v21, v22, v23, v24, v25, v26, v27, v28, v16, v17
+        convolve4       v4,  v5,  v22, v23, v24, v25, v26, v27, v28, v29, v30, v16, v17
+        do_store4       v2,  v3,  v4,  v5,  v16, v17, v18, v19, v1,  \type
+
+9:
+        ret
+endfunc
+.endm
+
+do_8tap_4v put
+do_8tap_4v avg
+
+
+.macro do_8tap_v_func type, filter, offset, size, bpp
+function ff_vp9_\type\()_\filter\()\size\()_v_\bpp\()_neon, export=1
+        uxtw            x4,  w4
+        mvni            v1.8h, #((0xff << (\bpp - 8)) & 0xff), lsl #8
+        movrel          x5,  X(ff_vp9_subpel_filters), 256*\offset
+        add             x6,  x5,  w6, uxtw #4
+        mov             x5,  #\size
+.if \size >= 8
+        b               \type\()_8tap_8v
+.else
+        b               \type\()_8tap_4v
+.endif
+endfunc
+.endm
+
+.macro do_8tap_v_filters size, bpp
+do_8tap_v_func put, regular, 1, \size, \bpp
+do_8tap_v_func avg, regular, 1, \size, \bpp
+do_8tap_v_func put, sharp,   2, \size, \bpp
+do_8tap_v_func avg, sharp,   2, \size, \bpp
+do_8tap_v_func put, smooth,  0, \size, \bpp
+do_8tap_v_func avg, smooth,  0, \size, \bpp
+.endm
+
+.macro do_8tap_v_filters_bpp bpp
+do_8tap_v_filters 64, \bpp
+do_8tap_v_filters 32, \bpp
+do_8tap_v_filters 16, \bpp
+do_8tap_v_filters 8,  \bpp
+do_8tap_v_filters 4,  \bpp
+.endm
+
+do_8tap_v_filters_bpp 10
+do_8tap_v_filters_bpp 12
diff --git a/media/ffvpx/libavcodec/aarch64/vp9mc_aarch64.S b/media/ffvpx/libavcodec/aarch64/vp9mc_aarch64.S
new file mode 100644
index 0000000000..f17a8cf04a
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp9mc_aarch64.S
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+// All public functions in this file have the following signature:
+// typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
+//                            const uint8_t *ref, ptrdiff_t ref_stride,
+//                            int h, int mx, int my);
+
+function ff_vp9_copy128_aarch64, export=1
+1:
+        ldp             x5,  x6,  [x2]
+        ldp             x7,  x8,  [x2, #16]
+        stp             x5,  x6,  [x0]
+        ldp             x9,  x10, [x2, #32]
+        stp             x7,  x8,  [x0, #16]
+        subs            w4,  w4,  #1
+        ldp             x11, x12, [x2, #48]
+        stp             x9,  x10, [x0, #32]
+        stp             x11, x12, [x0, #48]
+        ldp             x5,  x6,  [x2, #64]
+        ldp             x7,  x8,  [x2, #80]
+        stp             x5,  x6,  [x0, #64]
+        ldp             x9,  x10, [x2, #96]
+        stp             x7,  x8,  [x0, #80]
+        ldp             x11, x12, [x2, #112]
+        stp             x9,  x10, [x0, #96]
+        stp             x11, x12, [x0, #112]
+        add             x2,  x2,  x3
+        add             x0,  x0,  x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_copy64_aarch64, export=1
+1:
+        ldp             x5,  x6,  [x2]
+        ldp             x7,  x8,  [x2, #16]
+        stp             x5,  x6,  [x0]
+        ldp             x9,  x10, [x2, #32]
+        stp             x7,  x8,  [x0, #16]
+        subs            w4,  w4,  #1
+        ldp             x11, x12, [x2, #48]
+        stp             x9,  x10, [x0, #32]
+        stp             x11, x12, [x0, #48]
+        add             x2,  x2,  x3
+        add             x0,  x0,  x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_copy32_aarch64, export=1
+1:
+        ldp             x5,  x6,  [x2]
+        ldp             x7,  x8,  [x2, #16]
+        stp             x5,  x6,  [x0]
+        subs            w4,  w4,  #1
+        stp             x7,  x8,  [x0, #16]
+        add             x2,  x2,  x3
+        add             x0,  x0,  x1
+        b.ne            1b
+        ret
+endfunc
diff --git a/media/ffvpx/libavcodec/aarch64/vp9mc_neon.S b/media/ffvpx/libavcodec/aarch64/vp9mc_neon.S
new file mode 100644
index 0000000000..abf2bae9db
--- /dev/null
+++ b/media/ffvpx/libavcodec/aarch64/vp9mc_neon.S
@@ -0,0 +1,657 @@
+/*
+ * Copyright (c) 2016 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+// All public functions in this file have the following signature:
+// typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
+//                            const uint8_t *ref, ptrdiff_t ref_stride,
+//                            int h, int mx, int my);
+
+function ff_vp9_avg64_neon, export=1
+        mov             x5,  x0
+1:
+        ld1             {v4.16b,  v5.16b,  v6.16b,  v7.16b},  [x2], x3
+        ld1             {v0.16b,  v1.16b,  v2.16b,  v3.16b},  [x0], x1
+        ld1             {v20.16b, v21.16b, v22.16b, v23.16b}, [x2], x3
+        urhadd          v0.16b,  v0.16b,  v4.16b
+        urhadd          v1.16b,  v1.16b,  v5.16b
+        ld1             {v16.16b, v17.16b, v18.16b, v19.16b}, [x0], x1
+        urhadd          v2.16b,  v2.16b,  v6.16b
+        urhadd          v3.16b,  v3.16b,  v7.16b
+        subs            w4,  w4,  #2
+        urhadd          v16.16b, v16.16b, v20.16b
+        urhadd          v17.16b, v17.16b, v21.16b
+        st1             {v0.16b,  v1.16b,  v2.16b,  v3.16b},  [x5], x1
+        urhadd          v18.16b, v18.16b, v22.16b
+        urhadd          v19.16b, v19.16b, v23.16b
+        st1             {v16.16b, v17.16b, v18.16b, v19.16b}, [x5], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_avg32_neon, export=1
+1:
+        ld1             {v2.16b, v3.16b},  [x2], x3
+        ld1             {v0.16b, v1.16b},  [x0]
+        urhadd          v0.16b,  v0.16b,  v2.16b
+        urhadd          v1.16b,  v1.16b,  v3.16b
+        subs            w4,  w4,  #1
+        st1             {v0.16b, v1.16b},  [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_copy16_neon, export=1
+        add             x5,  x0,  x1
+        lsl             x1,  x1,  #1
+        add             x6,  x2,  x3
+        lsl             x3,  x3,  #1
+1:
+        ld1             {v0.16b},  [x2], x3
+        ld1             {v1.16b},  [x6], x3
+        ld1             {v2.16b},  [x2], x3
+        ld1             {v3.16b},  [x6], x3
+        subs            w4,  w4,  #4
+        st1             {v0.16b},  [x0], x1
+        st1             {v1.16b},  [x5], x1
+        st1             {v2.16b},  [x0], x1
+        st1             {v3.16b},  [x5], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_avg16_neon, export=1
+        mov             x5,  x0
+1:
+        ld1             {v2.16b},  [x2], x3
+        ld1             {v0.16b},  [x0], x1
+        ld1             {v3.16b},  [x2], x3
+        urhadd          v0.16b,  v0.16b,  v2.16b
+        ld1             {v1.16b},  [x0], x1
+        urhadd          v1.16b,  v1.16b,  v3.16b
+        subs            w4,  w4,  #2
+        st1             {v0.16b},  [x5], x1
+        st1             {v1.16b},  [x5], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_copy8_neon, export=1
+1:
+        ld1             {v0.8b},  [x2], x3
+        ld1             {v1.8b},  [x2], x3
+        subs            w4,  w4,  #2
+        st1             {v0.8b},  [x0], x1
+        st1             {v1.8b},  [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_avg8_neon, export=1
+        mov             x5,  x0
+1:
+        ld1             {v2.8b},  [x2], x3
+        ld1             {v0.8b},  [x0], x1
+        ld1             {v3.8b},  [x2], x3
+        urhadd          v0.8b,  v0.8b,  v2.8b
+        ld1             {v1.8b},  [x0], x1
+        urhadd          v1.8b,  v1.8b,  v3.8b
+        subs            w4,  w4,  #2
+        st1             {v0.8b},  [x5], x1
+        st1             {v1.8b},  [x5], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_copy4_neon, export=1
+1:
+        ld1             {v0.s}[0], [x2], x3
+        ld1             {v1.s}[0], [x2], x3
+        st1             {v0.s}[0], [x0], x1
+        ld1             {v2.s}[0], [x2], x3
+        st1             {v1.s}[0], [x0], x1
+        ld1             {v3.s}[0], [x2], x3
+        subs            w4,  w4,  #4
+        st1             {v2.s}[0], [x0], x1
+        st1             {v3.s}[0], [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_avg4_neon, export=1
+        mov             x5,  x0
+1:
+        ld1             {v2.s}[0], [x2], x3
+        ld1             {v0.s}[0], [x0], x1
+        ld1             {v2.s}[1], [x2], x3
+        ld1             {v0.s}[1], [x0], x1
+        ld1             {v3.s}[0], [x2], x3
+        ld1             {v1.s}[0], [x0], x1
+        ld1             {v3.s}[1], [x2], x3
+        ld1             {v1.s}[1], [x0], x1
+        subs            w4,  w4,  #4
+        urhadd          v0.8b,  v0.8b,  v2.8b
+        urhadd          v1.8b,  v1.8b,  v3.8b
+        st1             {v0.s}[0], [x5], x1
+        st1             {v0.s}[1], [x5], x1
+        st1             {v1.s}[0], [x5], x1
+        st1             {v1.s}[1], [x5], x1
+        b.ne            1b
+        ret
+endfunc
+
+
+// Extract a vector from src1-src2 and src4-src5 (src1-src3 and src4-src6
+// for size >= 16), and multiply-accumulate into dst1 and dst3 (or
+// dst1-dst2 and dst3-dst4 for size >= 16)
+.macro extmla dst1, dst2, dst3, dst4, src1, src2, src3, src4, src5, src6, offset, size
+        ext             v20.16b, \src1\().16b, \src2\().16b, #(2*\offset)
+        ext             v22.16b, \src4\().16b, \src5\().16b, #(2*\offset)
+.if \size >= 16
+        mla             \dst1\().8h, v20.8h, v0.h[\offset]
+        ext             v21.16b, \src2\().16b, \src3\().16b, #(2*\offset)
+        mla             \dst3\().8h, v22.8h, v0.h[\offset]
+        ext             v23.16b, \src5\().16b, \src6\().16b, #(2*\offset)
+        mla             \dst2\().8h, v21.8h, v0.h[\offset]
+        mla             \dst4\().8h, v23.8h, v0.h[\offset]
+.elseif \size == 8
+        mla             \dst1\().8h, v20.8h, v0.h[\offset]
+        mla             \dst3\().8h, v22.8h, v0.h[\offset]
+.else
+        mla             \dst1\().4h, v20.4h, v0.h[\offset]
+        mla             \dst3\().4h, v22.4h, v0.h[\offset]
+.endif
+.endm
+// The same as above, but don't accumulate straight into the
+// destination, but use a temp register and accumulate with saturation.
+.macro extmulqadd dst1, dst2, dst3, dst4, src1, src2, src3, src4, src5, src6, offset, size
+        ext             v20.16b, \src1\().16b, \src2\().16b, #(2*\offset)
+        ext             v22.16b, \src4\().16b, \src5\().16b, #(2*\offset)
+.if \size >= 16
+        mul             v20.8h, v20.8h, v0.h[\offset]
+        ext             v21.16b, \src2\().16b, \src3\().16b, #(2*\offset)
+        mul             v22.8h, v22.8h, v0.h[\offset]
+        ext             v23.16b, \src5\().16b, \src6\().16b, #(2*\offset)
+        mul             v21.8h, v21.8h, v0.h[\offset]
+        mul             v23.8h, v23.8h, v0.h[\offset]
+.elseif \size == 8
+        mul             v20.8h, v20.8h, v0.h[\offset]
+        mul             v22.8h, v22.8h, v0.h[\offset]
+.else
+        mul             v20.4h, v20.4h, v0.h[\offset]
+        mul             v22.4h, v22.4h, v0.h[\offset]
+.endif
+.if \size == 4
+        sqadd           \dst1\().4h, \dst1\().4h, v20.4h
+        sqadd           \dst3\().4h, \dst3\().4h, v22.4h
+.else
+        sqadd           \dst1\().8h, \dst1\().8h, v20.8h
+        sqadd           \dst3\().8h, \dst3\().8h, v22.8h
+.if \size >= 16
+        sqadd           \dst2\().8h, \dst2\().8h, v21.8h
+        sqadd           \dst4\().8h, \dst4\().8h, v23.8h
+.endif
+.endif
+.endm
+
+
+// Instantiate a horizontal filter function for the given size.
+// This can work on 4, 8 or 16 pixels in parallel; for larger
+// widths it will do 16 pixels at a time and loop horizontally.
+// The actual width is passed in x5, the height in w4 and the
+// filter coefficients in x9. idx2 is the index of the largest
+// filter coefficient (3 or 4) and idx1 is the other one of them.
+.macro do_8tap_h type, size, idx1, idx2
+function \type\()_8tap_\size\()h_\idx1\idx2
+        sub             x2,  x2,  #3
+        add             x6,  x0,  x1
+        add             x7,  x2,  x3
+        add             x1,  x1,  x1
+        add             x3,  x3,  x3
+        // Only size >= 16 loops horizontally and needs
+        // reduced dst stride
+.if \size >= 16
+        sub             x1,  x1,  x5
+.endif
+        // size >= 16 loads two qwords and increments x2,
+        // for size 4/8 it's enough with one qword and no
+        // postincrement
+.if \size >= 16
+        sub             x3,  x3,  x5
+        sub             x3,  x3,  #8
+.endif
+        // Load the filter vector
+        ld1             {v0.8h},  [x9]
+1:
+.if \size >= 16
+        mov             x9,  x5
+.endif
+        // Load src
+.if \size >= 16
+        ld1             {v4.8b,  v5.8b,  v6.8b},  [x2], #24
+        ld1             {v16.8b, v17.8b, v18.8b}, [x7], #24
+.else
+        ld1             {v4.8b,  v5.8b},  [x2]
+        ld1             {v16.8b, v17.8b}, [x7]
+.endif
+        uxtl            v4.8h,  v4.8b
+        uxtl            v5.8h,  v5.8b
+        uxtl            v16.8h, v16.8b
+        uxtl            v17.8h, v17.8b
+.if \size >= 16
+        uxtl            v6.8h,  v6.8b
+        uxtl            v18.8h, v18.8b
+.endif
+2:
+
+        // Accumulate, adding idx2 last with a separate
+        // saturating add. The positive filter coefficients
+        // for all indices except idx2 must add up to less
+        // than 127 for this not to overflow.
+        mul             v1.8h,  v4.8h,  v0.h[0]
+        mul             v24.8h, v16.8h, v0.h[0]
+.if \size >= 16
+        mul             v2.8h,  v5.8h,  v0.h[0]
+        mul             v25.8h, v17.8h, v0.h[0]
+.endif
+        extmla          v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, 1,     \size
+        extmla          v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, 2,     \size
+        extmla          v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, \idx1, \size
+        extmla          v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, 5,     \size
+        extmla          v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, 6,     \size
+        extmla          v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, 7,     \size
+        extmulqadd      v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, \idx2, \size
+
+        // Round, shift and saturate
+        sqrshrun        v1.8b,   v1.8h,  #7
+        sqrshrun        v24.8b,  v24.8h, #7
+.if \size >= 16
+        sqrshrun2       v1.16b,  v2.8h,  #7
+        sqrshrun2       v24.16b, v25.8h, #7
+.endif
+        // Average
+.ifc \type,avg
+.if \size >= 16
+        ld1             {v2.16b}, [x0]
+        ld1             {v3.16b}, [x6]
+        urhadd          v1.16b,  v1.16b,  v2.16b
+        urhadd          v24.16b, v24.16b, v3.16b
+.elseif \size == 8
+        ld1             {v2.8b},  [x0]
+        ld1             {v3.8b},  [x6]
+        urhadd          v1.8b,  v1.8b,  v2.8b
+        urhadd          v24.8b, v24.8b, v3.8b
+.else
+        ld1             {v2.s}[0], [x0]
+        ld1             {v3.s}[0], [x6]
+        urhadd          v1.8b,  v1.8b,  v2.8b
+        urhadd          v24.8b, v24.8b, v3.8b
+.endif
+.endif
+        // Store and loop horizontally (for size >= 16)
+.if \size >= 16
+        subs            x9,  x9,  #16
+        st1             {v1.16b},  [x0], #16
+        st1             {v24.16b}, [x6], #16
+        b.eq            3f
+        mov             v4.16b,  v6.16b
+        mov             v16.16b, v18.16b
+        ld1             {v6.16b},  [x2], #16
+        ld1             {v18.16b}, [x7], #16
+        uxtl            v5.8h,  v6.8b
+        uxtl2           v6.8h,  v6.16b
+        uxtl            v17.8h, v18.8b
+        uxtl2           v18.8h, v18.16b
+        b               2b
+.elseif \size == 8
+        st1             {v1.8b},    [x0]
+        st1             {v24.8b},   [x6]
+.else // \size == 4
+        st1             {v1.s}[0],  [x0]
+        st1             {v24.s}[0], [x6]
+.endif
+3:
+        // Loop vertically
+        add             x0,  x0,  x1
+        add             x6,  x6,  x1
+        add             x2,  x2,  x3
+        add             x7,  x7,  x3
+        subs            w4,  w4,  #2
+        b.ne            1b
+        ret
+endfunc
+.endm
+
+.macro do_8tap_h_size size
+do_8tap_h put, \size, 3, 4
+do_8tap_h avg, \size, 3, 4
+do_8tap_h put, \size, 4, 3
+do_8tap_h avg, \size, 4, 3
+.endm
+
+do_8tap_h_size 4
+do_8tap_h_size 8
+do_8tap_h_size 16
+
+.macro do_8tap_h_func type, filter, offset, size
+function ff_vp9_\type\()_\filter\()\size\()_h_neon, export=1
+        movrel          x6,  X(ff_vp9_subpel_filters), 256*\offset
+        cmp             w5,  #8
+        add             x9,  x6,  w5, uxtw #4
+        mov             x5,  #\size
+.if \size >= 16
+        b.ge            \type\()_8tap_16h_34
+        b               \type\()_8tap_16h_43
+.else
+        b.ge            \type\()_8tap_\size\()h_34
+        b               \type\()_8tap_\size\()h_43
+.endif
+endfunc
+.endm
+
+.macro do_8tap_h_filters size
+do_8tap_h_func put, regular, 1, \size
+do_8tap_h_func avg, regular, 1, \size
+do_8tap_h_func put, sharp,   2, \size
+do_8tap_h_func avg, sharp,   2, \size
+do_8tap_h_func put, smooth,  0, \size
+do_8tap_h_func avg, smooth,  0, \size
+.endm
+
+do_8tap_h_filters 64
+do_8tap_h_filters 32
+do_8tap_h_filters 16
+do_8tap_h_filters 8
+do_8tap_h_filters 4
+
+
+// Vertical filters
+
+// Round, shift and saturate and store reg1-reg2 over 4 lines
+.macro do_store4 reg1, reg2, tmp1, tmp2, type
+        sqrshrun        \reg1\().8b,  \reg1\().8h, #7
+        sqrshrun        \reg2\().8b,  \reg2\().8h, #7
+.ifc \type,avg
+        ld1             {\tmp1\().s}[0],  [x7], x1
+        ld1             {\tmp2\().s}[0],  [x7], x1
+        ld1             {\tmp1\().s}[1],  [x7], x1
+        ld1             {\tmp2\().s}[1],  [x7], x1
+        urhadd          \reg1\().8b,  \reg1\().8b,  \tmp1\().8b
+        urhadd          \reg2\().8b,  \reg2\().8b,  \tmp2\().8b
+.endif
+        st1             {\reg1\().s}[0],  [x0], x1
+        st1             {\reg2\().s}[0],  [x0], x1
+        st1             {\reg1\().s}[1],  [x0], x1
+        st1             {\reg2\().s}[1],  [x0], x1
+.endm
+
+// Round, shift and saturate and store reg1-4
+.macro do_store reg1, reg2, reg3, reg4, tmp1, tmp2, tmp3, tmp4, type
+        sqrshrun        \reg1\().8b,  \reg1\().8h, #7
+        sqrshrun        \reg2\().8b,  \reg2\().8h, #7
+        sqrshrun        \reg3\().8b,  \reg3\().8h, #7
+        sqrshrun        \reg4\().8b,  \reg4\().8h, #7
+.ifc \type,avg
+        ld1             {\tmp1\().8b},  [x7], x1
+        ld1             {\tmp2\().8b},  [x7], x1
+        ld1             {\tmp3\().8b},  [x7], x1
+        ld1             {\tmp4\().8b},  [x7], x1
+        urhadd          \reg1\().8b,  \reg1\().8b,  \tmp1\().8b
+        urhadd          \reg2\().8b,  \reg2\().8b,  \tmp2\().8b
+        urhadd          \reg3\().8b,  \reg3\().8b,  \tmp3\().8b
+        urhadd          \reg4\().8b,  \reg4\().8b,  \tmp4\().8b
+.endif
+        st1             {\reg1\().8b},  [x0], x1
+        st1             {\reg2\().8b},  [x0], x1
+        st1             {\reg3\().8b},  [x0], x1
+        st1             {\reg4\().8b},  [x0], x1
+.endm
+
+// Evaluate the filter twice in parallel, from the inputs src1-src9 into dst1-dst2
+// (src1-src8 into dst1, src2-src9 into dst2), adding idx2 separately
+// at the end with saturation. Indices 0 and 7 always have negative or zero
+// coefficients, so they can be accumulated into tmp1-tmp2 together with the
+// largest coefficient.
+.macro convolve dst1, dst2, src1, src2, src3, src4, src5, src6, src7, src8, src9, idx1, idx2, tmp1, tmp2
+        mul             \dst1\().8h, \src2\().8h, v0.h[1]
+        mul             \dst2\().8h, \src3\().8h, v0.h[1]
+        mul             \tmp1\().8h, \src1\().8h, v0.h[0]
+        mul             \tmp2\().8h, \src2\().8h, v0.h[0]
+        mla             \dst1\().8h, \src3\().8h, v0.h[2]
+        mla             \dst2\().8h, \src4\().8h, v0.h[2]
+.if \idx1 == 3
+        mla             \dst1\().8h, \src4\().8h, v0.h[3]
+        mla             \dst2\().8h, \src5\().8h, v0.h[3]
+.else
+        mla             \dst1\().8h, \src5\().8h, v0.h[4]
+        mla             \dst2\().8h, \src6\().8h, v0.h[4]
+.endif
+        mla             \dst1\().8h, \src6\().8h, v0.h[5]
+        mla             \dst2\().8h, \src7\().8h, v0.h[5]
+        mla             \tmp1\().8h, \src8\().8h, v0.h[7]
+        mla             \tmp2\().8h, \src9\().8h, v0.h[7]
+        mla             \dst1\().8h, \src7\().8h, v0.h[6]
+        mla             \dst2\().8h, \src8\().8h, v0.h[6]
+.if \idx2 == 3
+        mla             \tmp1\().8h, \src4\().8h, v0.h[3]
+        mla             \tmp2\().8h, \src5\().8h, v0.h[3]
+.else
+        mla             \tmp1\().8h, \src5\().8h, v0.h[4]
+        mla             \tmp2\().8h, \src6\().8h, v0.h[4]
+.endif
+        sqadd           \dst1\().8h, \dst1\().8h, \tmp1\().8h
+        sqadd           \dst2\().8h, \dst2\().8h, \tmp2\().8h
+.endm
+
+// Load pixels and extend them to 16 bit
+.macro loadl dst1, dst2, dst3, dst4
+        ld1             {v1.8b}, [x2], x3
+        ld1             {v2.8b}, [x2], x3
+        ld1             {v3.8b}, [x2], x3
+.ifnb \dst4
+        ld1             {v4.8b}, [x2], x3
+.endif
+        uxtl            \dst1\().8h, v1.8b
+        uxtl            \dst2\().8h, v2.8b
+        uxtl            \dst3\().8h, v3.8b
+.ifnb \dst4
+        uxtl            \dst4\().8h, v4.8b
+.endif
+.endm
+
+// Instantiate a vertical filter function for filtering 8 pixels at a time.
+// The height is passed in x4, the width in x5 and the filter coefficients
+// in x6. idx2 is the index of the largest filter coefficient (3 or 4)
+// and idx1 is the other one of them.
+.macro do_8tap_8v type, idx1, idx2
+function \type\()_8tap_8v_\idx1\idx2
+        sub             x2,  x2,  x3, lsl #1
+        sub             x2,  x2,  x3
+        ld1             {v0.8h},  [x6]
+1:
+.ifc \type,avg
+        mov             x7,  x0
+.endif
+        mov             x6,  x4
+
+        loadl           v17, v18, v19
+
+        loadl           v20, v21, v22, v23
+2:
+        loadl           v24, v25, v26, v27
+        convolve        v1,  v2,  v17, v18, v19, v20, v21, v22, v23, v24, v25, \idx1, \idx2, v5,  v6
+        convolve        v3,  v4,  v19, v20, v21, v22, v23, v24, v25, v26, v27, \idx1, \idx2, v5,  v6
+        do_store        v1,  v2,  v3,  v4,  v5,  v6,  v7,  v28, \type
+
+        subs            x6,  x6,  #4
+        b.eq            8f
+
+        loadl           v16, v17, v18, v19
+        convolve        v1,  v2,  v21, v22, v23, v24, v25, v26, v27, v16, v17, \idx1, \idx2, v5,  v6
+        convolve        v3,  v4,  v23, v24, v25, v26, v27, v16, v17, v18, v19, \idx1, \idx2, v5,  v6
+        do_store        v1,  v2,  v3,  v4,  v5,  v6,  v7,  v28, \type
+
+        subs            x6,  x6,  #4
+        b.eq            8f
+
+        loadl           v20, v21, v22, v23
+        convolve        v1,  v2,  v25, v26, v27, v16, v17, v18, v19, v20, v21, \idx1, \idx2, v5,  v6
+        convolve        v3,  v4,  v27, v16, v17, v18, v19, v20, v21, v22, v23, \idx1, \idx2, v5,  v6
+        do_store        v1,  v2,  v3,  v4,  v5,  v6,  v7,  v28, \type
+
+        subs            x6,  x6,  #4
+        b.ne            2b
+
+8:
+        subs            x5,  x5,  #8
+        b.eq            9f
+        // x0 -= h * dst_stride
+        msub            x0,  x1,  x4, x0
+        // x2 -= h * src_stride
+        msub            x2,  x3,  x4, x2
+        // x2 -= 8 * src_stride
+        sub             x2,  x2,  x3, lsl #3
+        // x2 += 1 * src_stride
+        add             x2,  x2,  x3
+        add             x2,  x2,  #8
+        add             x0,  x0,  #8
+        b               1b
+9:
+        ret
+endfunc
+.endm
+
+do_8tap_8v put, 3, 4
+do_8tap_8v put, 4, 3
+do_8tap_8v avg, 3, 4
+do_8tap_8v avg, 4, 3
+
+
+// Instantiate a vertical filter function for filtering a 4 pixels wide
+// slice. The first half of the registers contain one row, while the second
+// half of a register contains the second-next row (also stored in the first
+// half of the register two steps ahead). The convolution does two outputs
+// at a time; the output of v17-v24 into one, and v18-v25 into another one.
+// The first half of first output is the first output row, the first half
+// of the other output is the second output row. The second halves of the
+// registers are rows 3 and 4.
+// This only is designed to work for 4 or 8 output lines.
+.macro do_8tap_4v type, idx1, idx2
+function \type\()_8tap_4v_\idx1\idx2
+        sub             x2,  x2,  x3, lsl #1
+        sub             x2,  x2,  x3
+        ld1             {v0.8h},  [x6]
+.ifc \type,avg
+        mov             x7,  x0
+.endif
+
+        ld1             {v1.s}[0],  [x2], x3
+        ld1             {v2.s}[0],  [x2], x3
+        ld1             {v3.s}[0],  [x2], x3
+        ld1             {v4.s}[0],  [x2], x3
+        ld1             {v5.s}[0],  [x2], x3
+        ld1             {v6.s}[0],  [x2], x3
+        trn1            v1.2s,  v1.2s,  v3.2s
+        ld1             {v7.s}[0],  [x2], x3
+        trn1            v2.2s,  v2.2s,  v4.2s
+        ld1             {v26.s}[0], [x2], x3
+        uxtl            v17.8h, v1.8b
+        trn1            v3.2s,  v3.2s,  v5.2s
+        ld1             {v27.s}[0], [x2], x3
+        uxtl            v18.8h, v2.8b
+        trn1            v4.2s,  v4.2s,  v6.2s
+        ld1             {v28.s}[0], [x2], x3
+        uxtl            v19.8h, v3.8b
+        trn1            v5.2s,  v5.2s,  v7.2s
+        ld1             {v29.s}[0], [x2], x3
+        uxtl            v20.8h, v4.8b
+        trn1            v6.2s,  v6.2s,  v26.2s
+        uxtl            v21.8h, v5.8b
+        trn1            v7.2s,  v7.2s,  v27.2s
+        uxtl            v22.8h, v6.8b
+        trn1            v26.2s, v26.2s, v28.2s
+        uxtl            v23.8h, v7.8b
+        trn1            v27.2s, v27.2s, v29.2s
+        uxtl            v24.8h, v26.8b
+        uxtl            v25.8h, v27.8b
+
+        convolve        v1,  v2,  v17, v18, v19, v20, v21, v22, v23, v24, v25, \idx1, \idx2, v3,  v4
+        do_store4       v1,  v2,  v5,  v6,  \type
+
+        subs            x4,  x4,  #4
+        b.eq            9f
+
+        ld1             {v1.s}[0],  [x2], x3
+        ld1             {v2.s}[0],  [x2], x3
+        trn1            v28.2s, v28.2s, v1.2s
+        trn1            v29.2s, v29.2s, v2.2s
+        ld1             {v1.s}[1],  [x2], x3
+        uxtl            v26.8h, v28.8b
+        ld1             {v2.s}[1],  [x2], x3
+        uxtl            v27.8h, v29.8b
+        uxtl            v28.8h, v1.8b
+        uxtl            v29.8h, v2.8b
+
+        convolve        v1,  v2,  v21, v22, v23, v24, v25, v26, v27, v28, v29, \idx1, \idx2, v3,  v4
+        do_store4       v1,  v2,  v5,  v6,  \type
+
+9:
+        ret
+endfunc
+.endm
+
+do_8tap_4v put, 3, 4
+do_8tap_4v put, 4, 3
+do_8tap_4v avg, 3, 4
+do_8tap_4v avg, 4, 3
+
+
+.macro do_8tap_v_func type, filter, offset, size
+function ff_vp9_\type\()_\filter\()\size\()_v_neon, export=1
+        uxtw            x4,  w4
+        movrel          x5,  X(ff_vp9_subpel_filters), 256*\offset
+        cmp             w6,  #8
+        add             x6,  x5,  w6, uxtw #4
+        mov             x5,  #\size
+.if \size >= 8
+        b.ge            \type\()_8tap_8v_34
+        b               \type\()_8tap_8v_43
+.else
+        b.ge            \type\()_8tap_4v_34
+        b               \type\()_8tap_4v_43
+.endif
+endfunc
+.endm
+
+.macro do_8tap_v_filters size
+do_8tap_v_func put, regular, 1, \size
+do_8tap_v_func avg, regular, 1, \size
+do_8tap_v_func put, sharp,   2, \size
+do_8tap_v_func avg, sharp,   2, \size
+do_8tap_v_func put, smooth,  0, \size
+do_8tap_v_func avg, smooth,  0, \size
+.endm
+
+do_8tap_v_filters 64
+do_8tap_v_filters 32
+do_8tap_v_filters 16
+do_8tap_v_filters 8
+do_8tap_v_filters 4
diff --git a/media/ffvpx/libavcodec/allcodecs.c b/media/ffvpx/libavcodec/allcodecs.c
new file mode 100644
index 0000000000..e593ad19af
--- /dev/null
+++ b/media/ffvpx/libavcodec/allcodecs.c
@@ -0,0 +1,995 @@
+/*
+ * Provide registration of all codecs, parsers and bitstream filters for libavcodec.
+ * Copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Provide registration of all codecs, parsers and bitstream filters for libavcodec.
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "config.h"
+#include "config_components.h"
+#include "libavutil/thread.h"
+#include "codec.h"
+#include "codec_id.h"
+#include "codec_internal.h"
+
+extern const FFCodec ff_a64multi_encoder;
+extern const FFCodec ff_a64multi5_encoder;
+extern const FFCodec ff_aasc_decoder;
+extern const FFCodec ff_aic_decoder;
+extern const FFCodec ff_alias_pix_encoder;
+extern const FFCodec ff_alias_pix_decoder;
+extern const FFCodec ff_agm_decoder;
+extern const FFCodec ff_amv_encoder;
+extern const FFCodec ff_amv_decoder;
+extern const FFCodec ff_anm_decoder;
+extern const FFCodec ff_ansi_decoder;
+extern const FFCodec ff_apng_encoder;
+extern const FFCodec ff_apng_decoder;
+extern const FFCodec ff_arbc_decoder;
+extern const FFCodec ff_argo_decoder;
+extern const FFCodec ff_asv1_encoder;
+extern const FFCodec ff_asv1_decoder;
+extern const FFCodec ff_asv2_encoder;
+extern const FFCodec ff_asv2_decoder;
+extern const FFCodec ff_aura_decoder;
+extern const FFCodec ff_aura2_decoder;
+extern const FFCodec ff_avrp_encoder;
+extern const FFCodec ff_avrp_decoder;
+extern const FFCodec ff_avrn_decoder;
+extern const FFCodec ff_avs_decoder;
+extern const FFCodec ff_avui_encoder;
+extern const FFCodec ff_avui_decoder;
+#if FF_API_AYUV_CODECID
+extern const FFCodec ff_ayuv_encoder;
+extern const FFCodec ff_ayuv_decoder;
+#endif
+extern const FFCodec ff_bethsoftvid_decoder;
+extern const FFCodec ff_bfi_decoder;
+extern const FFCodec ff_bink_decoder;
+extern const FFCodec ff_bitpacked_decoder;
+extern const FFCodec ff_bitpacked_encoder;
+extern const FFCodec ff_bmp_encoder;
+extern const FFCodec ff_bmp_decoder;
+extern const FFCodec ff_bmv_video_decoder;
+extern const FFCodec ff_brender_pix_decoder;
+extern const FFCodec ff_c93_decoder;
+extern const FFCodec ff_cavs_decoder;
+extern const FFCodec ff_cdgraphics_decoder;
+extern const FFCodec ff_cdtoons_decoder;
+extern const FFCodec ff_cdxl_decoder;
+extern const FFCodec ff_cfhd_encoder;
+extern const FFCodec ff_cfhd_decoder;
+extern const FFCodec ff_cinepak_encoder;
+extern const FFCodec ff_cinepak_decoder;
+extern const FFCodec ff_clearvideo_decoder;
+extern const FFCodec ff_cljr_encoder;
+extern const FFCodec ff_cljr_decoder;
+extern const FFCodec ff_cllc_decoder;
+extern const FFCodec ff_comfortnoise_encoder;
+extern const FFCodec ff_comfortnoise_decoder;
+extern const FFCodec ff_cpia_decoder;
+extern const FFCodec ff_cri_decoder;
+extern const FFCodec ff_cscd_decoder;
+extern const FFCodec ff_cyuv_decoder;
+extern const FFCodec ff_dds_decoder;
+extern const FFCodec ff_dfa_decoder;
+extern const FFCodec ff_dirac_decoder;
+extern const FFCodec ff_dnxhd_encoder;
+extern const FFCodec ff_dnxhd_decoder;
+extern const FFCodec ff_dpx_encoder;
+extern const FFCodec ff_dpx_decoder;
+extern const FFCodec ff_dsicinvideo_decoder;
+extern const FFCodec ff_dvaudio_decoder;
+extern const FFCodec ff_dvvideo_encoder;
+extern const FFCodec ff_dvvideo_decoder;
+extern const FFCodec ff_dxa_decoder;
+extern const FFCodec ff_dxtory_decoder;
+extern const FFCodec ff_dxv_decoder;
+extern const FFCodec ff_eacmv_decoder;
+extern const FFCodec ff_eamad_decoder;
+extern const FFCodec ff_eatgq_decoder;
+extern const FFCodec ff_eatgv_decoder;
+extern const FFCodec ff_eatqi_decoder;
+extern const FFCodec ff_eightbps_decoder;
+extern const FFCodec ff_eightsvx_exp_decoder;
+extern const FFCodec ff_eightsvx_fib_decoder;
+extern const FFCodec ff_escape124_decoder;
+extern const FFCodec ff_escape130_decoder;
+extern const FFCodec ff_exr_encoder;
+extern const FFCodec ff_exr_decoder;
+extern const FFCodec ff_ffv1_encoder;
+extern const FFCodec ff_ffv1_decoder;
+extern const FFCodec ff_ffvhuff_encoder;
+extern const FFCodec ff_ffvhuff_decoder;
+extern const FFCodec ff_fic_decoder;
+extern const FFCodec ff_fits_encoder;
+extern const FFCodec ff_fits_decoder;
+extern const FFCodec ff_flashsv_encoder;
+extern const FFCodec ff_flashsv_decoder;
+extern const FFCodec ff_flashsv2_encoder;
+extern const FFCodec ff_flashsv2_decoder;
+extern const FFCodec ff_flic_decoder;
+extern const FFCodec ff_flv_encoder;
+extern const FFCodec ff_flv_decoder;
+extern const FFCodec ff_fmvc_decoder;
+extern const FFCodec ff_fourxm_decoder;
+extern const FFCodec ff_fraps_decoder;
+extern const FFCodec ff_frwu_decoder;
+extern const FFCodec ff_g2m_decoder;
+extern const FFCodec ff_gdv_decoder;
+extern const FFCodec ff_gem_decoder;
+extern const FFCodec ff_gif_encoder;
+extern const FFCodec ff_gif_decoder;
+extern const FFCodec ff_h261_encoder;
+extern const FFCodec ff_h261_decoder;
+extern const FFCodec ff_h263_encoder;
+extern const FFCodec ff_h263_decoder;
+extern const FFCodec ff_h263i_decoder;
+extern const FFCodec ff_h263p_encoder;
+extern const FFCodec ff_h263p_decoder;
+extern const FFCodec ff_h263_v4l2m2m_decoder;
+extern const FFCodec ff_h264_decoder;
+extern const FFCodec ff_h264_crystalhd_decoder;
+extern const FFCodec ff_h264_v4l2m2m_decoder;
+extern const FFCodec ff_h264_mediacodec_decoder;
+extern const FFCodec ff_h264_mediacodec_encoder;
+extern const FFCodec ff_h264_mmal_decoder;
+extern const FFCodec ff_h264_qsv_decoder;
+extern const FFCodec ff_h264_rkmpp_decoder;
+extern const FFCodec ff_hap_encoder;
+extern const FFCodec ff_hap_decoder;
+extern const FFCodec ff_hevc_decoder;
+extern const FFCodec ff_hevc_qsv_decoder;
+extern const FFCodec ff_hevc_rkmpp_decoder;
+extern const FFCodec ff_hevc_v4l2m2m_decoder;
+extern const FFCodec ff_hnm4_video_decoder;
+extern const FFCodec ff_hq_hqa_decoder;
+extern const FFCodec ff_hqx_decoder;
+extern const FFCodec ff_huffyuv_encoder;
+extern const FFCodec ff_huffyuv_decoder;
+extern const FFCodec ff_hymt_decoder;
+extern const FFCodec ff_idcin_decoder;
+extern const FFCodec ff_iff_ilbm_decoder;
+extern const FFCodec ff_imm4_decoder;
+extern const FFCodec ff_imm5_decoder;
+extern const FFCodec ff_indeo2_decoder;
+extern const FFCodec ff_indeo3_decoder;
+extern const FFCodec ff_indeo4_decoder;
+extern const FFCodec ff_indeo5_decoder;
+extern const FFCodec ff_interplay_video_decoder;
+extern const FFCodec ff_ipu_decoder;
+extern const FFCodec ff_jpeg2000_encoder;
+extern const FFCodec ff_jpeg2000_decoder;
+extern const FFCodec ff_jpegls_encoder;
+extern const FFCodec ff_jpegls_decoder;
+extern const FFCodec ff_jv_decoder;
+extern const FFCodec ff_kgv1_decoder;
+extern const FFCodec ff_kmvc_decoder;
+extern const FFCodec ff_lagarith_decoder;
+extern const FFCodec ff_ljpeg_encoder;
+extern const FFCodec ff_loco_decoder;
+extern const FFCodec ff_lscr_decoder;
+extern const FFCodec ff_m101_decoder;
+extern const FFCodec ff_magicyuv_encoder;
+extern const FFCodec ff_magicyuv_decoder;
+extern const FFCodec ff_mdec_decoder;
+extern const FFCodec ff_media100_decoder;
+extern const FFCodec ff_mimic_decoder;
+extern const FFCodec ff_mjpeg_encoder;
+extern const FFCodec ff_mjpeg_decoder;
+extern const FFCodec ff_mjpegb_decoder;
+extern const FFCodec ff_mmvideo_decoder;
+extern const FFCodec ff_mobiclip_decoder;
+extern const FFCodec ff_motionpixels_decoder;
+extern const FFCodec ff_mpeg1video_encoder;
+extern const FFCodec ff_mpeg1video_decoder;
+extern const FFCodec ff_mpeg2video_encoder;
+extern const FFCodec ff_mpeg2video_decoder;
+extern const FFCodec ff_mpeg4_encoder;
+extern const FFCodec ff_mpeg4_decoder;
+extern const FFCodec ff_mpeg4_crystalhd_decoder;
+extern const FFCodec ff_mpeg4_v4l2m2m_decoder;
+extern const FFCodec ff_mpeg4_mmal_decoder;
+extern const FFCodec ff_mpegvideo_decoder;
+extern const FFCodec ff_mpeg1_v4l2m2m_decoder;
+extern const FFCodec ff_mpeg2_mmal_decoder;
+extern const FFCodec ff_mpeg2_crystalhd_decoder;
+extern const FFCodec ff_mpeg2_v4l2m2m_decoder;
+extern const FFCodec ff_mpeg2_qsv_decoder;
+extern const FFCodec ff_mpeg2_mediacodec_decoder;
+extern const FFCodec ff_msa1_decoder;
+extern const FFCodec ff_mscc_decoder;
+extern const FFCodec ff_msmpeg4v1_decoder;
+extern const FFCodec ff_msmpeg4v2_encoder;
+extern const FFCodec ff_msmpeg4v2_decoder;
+extern const FFCodec ff_msmpeg4v3_encoder;
+extern const FFCodec ff_msmpeg4v3_decoder;
+extern const FFCodec ff_msmpeg4_crystalhd_decoder;
+extern const FFCodec ff_msp2_decoder;
+extern const FFCodec ff_msrle_decoder;
+extern const FFCodec ff_mss1_decoder;
+extern const FFCodec ff_mss2_decoder;
+extern const FFCodec ff_msvideo1_encoder;
+extern const FFCodec ff_msvideo1_decoder;
+extern const FFCodec ff_mszh_decoder;
+extern const FFCodec ff_mts2_decoder;
+extern const FFCodec ff_mv30_decoder;
+extern const FFCodec ff_mvc1_decoder;
+extern const FFCodec ff_mvc2_decoder;
+extern const FFCodec ff_mvdv_decoder;
+extern const FFCodec ff_mvha_decoder;
+extern const FFCodec ff_mwsc_decoder;
+extern const FFCodec ff_mxpeg_decoder;
+extern const FFCodec ff_notchlc_decoder;
+extern const FFCodec ff_nuv_decoder;
+extern const FFCodec ff_paf_video_decoder;
+extern const FFCodec ff_pam_encoder;
+extern const FFCodec ff_pam_decoder;
+extern const FFCodec ff_pbm_encoder;
+extern const FFCodec ff_pbm_decoder;
+extern const FFCodec ff_pcx_encoder;
+extern const FFCodec ff_pcx_decoder;
+extern const FFCodec ff_pfm_encoder;
+extern const FFCodec ff_pfm_decoder;
+extern const FFCodec ff_pgm_encoder;
+extern const FFCodec ff_pgm_decoder;
+extern const FFCodec ff_pgmyuv_encoder;
+extern const FFCodec ff_pgmyuv_decoder;
+extern const FFCodec ff_pgx_decoder;
+extern const FFCodec ff_phm_encoder;
+extern const FFCodec ff_phm_decoder;
+extern const FFCodec ff_photocd_decoder;
+extern const FFCodec ff_pictor_decoder;
+extern const FFCodec ff_pixlet_decoder;
+extern const FFCodec ff_png_encoder;
+extern const FFCodec ff_png_decoder;
+extern const FFCodec ff_ppm_encoder;
+extern const FFCodec ff_ppm_decoder;
+extern const FFCodec ff_prores_encoder;
+extern const FFCodec ff_prores_decoder;
+extern const FFCodec ff_prores_aw_encoder;
+extern const FFCodec ff_prores_ks_encoder;
+extern const FFCodec ff_prosumer_decoder;
+extern const FFCodec ff_psd_decoder;
+extern const FFCodec ff_ptx_decoder;
+extern const FFCodec ff_qdraw_decoder;
+extern const FFCodec ff_qoi_encoder;
+extern const FFCodec ff_qoi_decoder;
+extern const FFCodec ff_qpeg_decoder;
+extern const FFCodec ff_qtrle_encoder;
+extern const FFCodec ff_qtrle_decoder;
+extern const FFCodec ff_r10k_encoder;
+extern const FFCodec ff_r10k_decoder;
+extern const FFCodec ff_r210_encoder;
+extern const FFCodec ff_r210_decoder;
+extern const FFCodec ff_rasc_decoder;
+extern const FFCodec ff_rawvideo_encoder;
+extern const FFCodec ff_rawvideo_decoder;
+extern const FFCodec ff_rka_decoder;
+extern const FFCodec ff_rl2_decoder;
+extern const FFCodec ff_roq_encoder;
+extern const FFCodec ff_roq_decoder;
+extern const FFCodec ff_rpza_encoder;
+extern const FFCodec ff_rpza_decoder;
+extern const FFCodec ff_rscc_decoder;
+extern const FFCodec ff_rv10_encoder;
+extern const FFCodec ff_rv10_decoder;
+extern const FFCodec ff_rv20_encoder;
+extern const FFCodec ff_rv20_decoder;
+extern const FFCodec ff_rv30_decoder;
+extern const FFCodec ff_rv40_decoder;
+extern const FFCodec ff_s302m_encoder;
+extern const FFCodec ff_s302m_decoder;
+extern const FFCodec ff_sanm_decoder;
+extern const FFCodec ff_scpr_decoder;
+extern const FFCodec ff_screenpresso_decoder;
+extern const FFCodec ff_sga_decoder;
+extern const FFCodec ff_sgi_encoder;
+extern const FFCodec ff_sgi_decoder;
+extern const FFCodec ff_sgirle_decoder;
+extern const FFCodec ff_sheervideo_decoder;
+extern const FFCodec ff_simbiosis_imx_decoder;
+extern const FFCodec ff_smacker_decoder;
+extern const FFCodec ff_smc_encoder;
+extern const FFCodec ff_smc_decoder;
+extern const FFCodec ff_smvjpeg_decoder;
+extern const FFCodec ff_snow_encoder;
+extern const FFCodec ff_snow_decoder;
+extern const FFCodec ff_sp5x_decoder;
+extern const FFCodec ff_speedhq_decoder;
+extern const FFCodec ff_speedhq_encoder;
+extern const FFCodec ff_speex_decoder;
+extern const FFCodec ff_srgc_decoder;
+extern const FFCodec ff_sunrast_encoder;
+extern const FFCodec ff_sunrast_decoder;
+extern const FFCodec ff_svq1_encoder;
+extern const FFCodec ff_svq1_decoder;
+extern const FFCodec ff_svq3_decoder;
+extern const FFCodec ff_targa_encoder;
+extern const FFCodec ff_targa_decoder;
+extern const FFCodec ff_targa_y216_decoder;
+extern const FFCodec ff_tdsc_decoder;
+extern const FFCodec ff_theora_decoder;
+extern const FFCodec ff_thp_decoder;
+extern const FFCodec ff_tiertexseqvideo_decoder;
+extern const FFCodec ff_tiff_encoder;
+extern const FFCodec ff_tiff_decoder;
+extern const FFCodec ff_tmv_decoder;
+extern const FFCodec ff_truemotion1_decoder;
+extern const FFCodec ff_truemotion2_decoder;
+extern const FFCodec ff_truemotion2rt_decoder;
+extern const FFCodec ff_tscc_decoder;
+extern const FFCodec ff_tscc2_decoder;
+extern const FFCodec ff_txd_decoder;
+extern const FFCodec ff_ulti_decoder;
+extern const FFCodec ff_utvideo_encoder;
+extern const FFCodec ff_utvideo_decoder;
+extern const FFCodec ff_v210_encoder;
+extern const FFCodec ff_v210_decoder;
+extern const FFCodec ff_v210x_decoder;
+extern const FFCodec ff_v308_encoder;
+extern const FFCodec ff_v308_decoder;
+extern const FFCodec ff_v408_encoder;
+extern const FFCodec ff_v408_decoder;
+extern const FFCodec ff_v410_encoder;
+extern const FFCodec ff_v410_decoder;
+extern const FFCodec ff_vb_decoder;
+extern const FFCodec ff_vbn_encoder;
+extern const FFCodec ff_vbn_decoder;
+extern const FFCodec ff_vble_decoder;
+extern const FFCodec ff_vc1_decoder;
+extern const FFCodec ff_vc1_crystalhd_decoder;
+extern const FFCodec ff_vc1image_decoder;
+extern const FFCodec ff_vc1_mmal_decoder;
+extern const FFCodec ff_vc1_qsv_decoder;
+extern const FFCodec ff_vc1_v4l2m2m_decoder;
+extern const FFCodec ff_vc2_encoder;
+extern const FFCodec ff_vcr1_decoder;
+extern const FFCodec ff_vmdvideo_decoder;
+extern const FFCodec ff_vmnc_decoder;
+extern const FFCodec ff_vp3_decoder;
+extern const FFCodec ff_vp4_decoder;
+extern const FFCodec ff_vp5_decoder;
+extern const FFCodec ff_vp6_decoder;
+extern const FFCodec ff_vp6a_decoder;
+extern const FFCodec ff_vp6f_decoder;
+extern const FFCodec ff_vp7_decoder;
+extern const FFCodec ff_vp8_decoder;
+extern const FFCodec ff_vp8_rkmpp_decoder;
+extern const FFCodec ff_vp8_v4l2m2m_decoder;
+extern const FFCodec ff_vp9_decoder;
+extern const FFCodec ff_vp9_rkmpp_decoder;
+extern const FFCodec ff_vp9_v4l2m2m_decoder;
+extern const FFCodec ff_vqa_decoder;
+extern const FFCodec ff_vqc_decoder;
+extern const FFCodec ff_wbmp_decoder;
+extern const FFCodec ff_wbmp_encoder;
+extern const FFCodec ff_webp_decoder;
+extern const FFCodec ff_wcmv_decoder;
+extern const FFCodec ff_wrapped_avframe_encoder;
+extern const FFCodec ff_wrapped_avframe_decoder;
+extern const FFCodec ff_wmv1_encoder;
+extern const FFCodec ff_wmv1_decoder;
+extern const FFCodec ff_wmv2_encoder;
+extern const FFCodec ff_wmv2_decoder;
+extern const FFCodec ff_wmv3_decoder;
+extern const FFCodec ff_wmv3_crystalhd_decoder;
+extern const FFCodec ff_wmv3image_decoder;
+extern const FFCodec ff_wnv1_decoder;
+extern const FFCodec ff_xan_wc3_decoder;
+extern const FFCodec ff_xan_wc4_decoder;
+extern const FFCodec ff_xbm_encoder;
+extern const FFCodec ff_xbm_decoder;
+extern const FFCodec ff_xface_encoder;
+extern const FFCodec ff_xface_decoder;
+extern const FFCodec ff_xl_decoder;
+extern const FFCodec ff_xpm_decoder;
+extern const FFCodec ff_xwd_encoder;
+extern const FFCodec ff_xwd_decoder;
+extern const FFCodec ff_y41p_encoder;
+extern const FFCodec ff_y41p_decoder;
+extern const FFCodec ff_ylc_decoder;
+extern const FFCodec ff_yop_decoder;
+extern const FFCodec ff_yuv4_encoder;
+extern const FFCodec ff_yuv4_decoder;
+extern const FFCodec ff_zero12v_decoder;
+extern const FFCodec ff_zerocodec_decoder;
+extern const FFCodec ff_zlib_encoder;
+extern const FFCodec ff_zlib_decoder;
+extern const FFCodec ff_zmbv_encoder;
+extern const FFCodec ff_zmbv_decoder;
+
+/* audio codecs */
+extern const FFCodec ff_aac_encoder;
+extern const FFCodec ff_aac_decoder;
+extern const FFCodec ff_aac_fixed_decoder;
+extern const FFCodec ff_aac_latm_decoder;
+extern const FFCodec ff_ac3_encoder;
+extern const FFCodec ff_ac3_decoder;
+extern const FFCodec ff_ac3_fixed_encoder;
+extern const FFCodec ff_ac3_fixed_decoder;
+extern const FFCodec ff_acelp_kelvin_decoder;
+extern const FFCodec ff_alac_encoder;
+extern const FFCodec ff_alac_decoder;
+extern const FFCodec ff_als_decoder;
+extern const FFCodec ff_amrnb_decoder;
+extern const FFCodec ff_amrwb_decoder;
+extern const FFCodec ff_apac_decoder;
+extern const FFCodec ff_ape_decoder;
+extern const FFCodec ff_aptx_encoder;
+extern const FFCodec ff_aptx_decoder;
+extern const FFCodec ff_aptx_hd_encoder;
+extern const FFCodec ff_aptx_hd_decoder;
+extern const FFCodec ff_atrac1_decoder;
+extern const FFCodec ff_atrac3_decoder;
+extern const FFCodec ff_atrac3al_decoder;
+extern const FFCodec ff_atrac3p_decoder;
+extern const FFCodec ff_atrac3pal_decoder;
+extern const FFCodec ff_atrac9_decoder;
+extern const FFCodec ff_binkaudio_dct_decoder;
+extern const FFCodec ff_binkaudio_rdft_decoder;
+extern const FFCodec ff_bmv_audio_decoder;
+extern const FFCodec ff_bonk_decoder;
+extern const FFCodec ff_cook_decoder;
+extern const FFCodec ff_dca_encoder;
+extern const FFCodec ff_dca_decoder;
+extern const FFCodec ff_dfpwm_encoder;
+extern const FFCodec ff_dfpwm_decoder;
+extern const FFCodec ff_dolby_e_decoder;
+extern const FFCodec ff_dsd_lsbf_decoder;
+extern const FFCodec ff_dsd_msbf_decoder;
+extern const FFCodec ff_dsd_lsbf_planar_decoder;
+extern const FFCodec ff_dsd_msbf_planar_decoder;
+extern const FFCodec ff_dsicinaudio_decoder;
+extern const FFCodec ff_dss_sp_decoder;
+extern const FFCodec ff_dst_decoder;
+extern const FFCodec ff_eac3_encoder;
+extern const FFCodec ff_eac3_decoder;
+extern const FFCodec ff_evrc_decoder;
+extern const FFCodec ff_fastaudio_decoder;
+extern const FFCodec ff_ffwavesynth_decoder;
+extern const FFCodec ff_flac_encoder;
+extern const FFCodec ff_flac_decoder;
+extern const FFCodec ff_ftr_decoder;
+extern const FFCodec ff_g723_1_encoder;
+extern const FFCodec ff_g723_1_decoder;
+extern const FFCodec ff_g729_decoder;
+extern const FFCodec ff_gsm_decoder;
+extern const FFCodec ff_gsm_ms_decoder;
+extern const FFCodec ff_hca_decoder;
+extern const FFCodec ff_hcom_decoder;
+extern const FFCodec ff_hdr_encoder;
+extern const FFCodec ff_hdr_decoder;
+extern const FFCodec ff_iac_decoder;
+extern const FFCodec ff_ilbc_decoder;
+extern const FFCodec ff_imc_decoder;
+extern const FFCodec ff_interplay_acm_decoder;
+extern const FFCodec ff_mace3_decoder;
+extern const FFCodec ff_mace6_decoder;
+extern const FFCodec ff_metasound_decoder;
+extern const FFCodec ff_misc4_decoder;
+extern const FFCodec ff_mlp_encoder;
+extern const FFCodec ff_mlp_decoder;
+extern const FFCodec ff_mp1_decoder;
+extern const FFCodec ff_mp1float_decoder;
+extern const FFCodec ff_mp2_encoder;
+extern const FFCodec ff_mp2_decoder;
+extern const FFCodec ff_mp2float_decoder;
+extern const FFCodec ff_mp2fixed_encoder;
+extern const FFCodec ff_mp3float_decoder;
+extern const FFCodec ff_mp3_decoder;
+extern const FFCodec ff_mp3adufloat_decoder;
+extern const FFCodec ff_mp3adu_decoder;
+extern const FFCodec ff_mp3on4float_decoder;
+extern const FFCodec ff_mp3on4_decoder;
+extern const FFCodec ff_mpc7_decoder;
+extern const FFCodec ff_mpc8_decoder;
+extern const FFCodec ff_msnsiren_decoder;
+extern const FFCodec ff_nellymoser_encoder;
+extern const FFCodec ff_nellymoser_decoder;
+extern const FFCodec ff_on2avc_decoder;
+extern const FFCodec ff_opus_encoder;
+extern const FFCodec ff_opus_decoder;
+extern const FFCodec ff_paf_audio_decoder;
+extern const FFCodec ff_qcelp_decoder;
+extern const FFCodec ff_qdm2_decoder;
+extern const FFCodec ff_qdmc_decoder;
+extern const FFCodec ff_ra_144_encoder;
+extern const FFCodec ff_ra_144_decoder;
+extern const FFCodec ff_ra_288_decoder;
+extern const FFCodec ff_ralf_decoder;
+extern const FFCodec ff_sbc_encoder;
+extern const FFCodec ff_sbc_decoder;
+extern const FFCodec ff_shorten_decoder;
+extern const FFCodec ff_sipr_decoder;
+extern const FFCodec ff_siren_decoder;
+extern const FFCodec ff_smackaud_decoder;
+extern const FFCodec ff_sonic_encoder;
+extern const FFCodec ff_sonic_decoder;
+extern const FFCodec ff_sonic_ls_encoder;
+extern const FFCodec ff_tak_decoder;
+extern const FFCodec ff_truehd_encoder;
+extern const FFCodec ff_truehd_decoder;
+extern const FFCodec ff_truespeech_decoder;
+extern const FFCodec ff_tta_encoder;
+extern const FFCodec ff_tta_decoder;
+extern const FFCodec ff_twinvq_decoder;
+extern const FFCodec ff_vmdaudio_decoder;
+extern const FFCodec ff_vorbis_encoder;
+extern const FFCodec ff_vorbis_decoder;
+extern const FFCodec ff_wavarc_decoder;
+extern const FFCodec ff_wavpack_encoder;
+extern const FFCodec ff_wavpack_decoder;
+extern const FFCodec ff_wmalossless_decoder;
+extern const FFCodec ff_wmapro_decoder;
+extern const FFCodec ff_wmav1_encoder;
+extern const FFCodec ff_wmav1_decoder;
+extern const FFCodec ff_wmav2_encoder;
+extern const FFCodec ff_wmav2_decoder;
+extern const FFCodec ff_wmavoice_decoder;
+extern const FFCodec ff_ws_snd1_decoder;
+extern const FFCodec ff_xma1_decoder;
+extern const FFCodec ff_xma2_decoder;
+
+/* PCM codecs */
+extern const FFCodec ff_pcm_alaw_encoder;
+extern const FFCodec ff_pcm_alaw_decoder;
+extern const FFCodec ff_pcm_bluray_encoder;
+extern const FFCodec ff_pcm_bluray_decoder;
+extern const FFCodec ff_pcm_dvd_encoder;
+extern const FFCodec ff_pcm_dvd_decoder;
+extern const FFCodec ff_pcm_f16le_decoder;
+extern const FFCodec ff_pcm_f24le_decoder;
+extern const FFCodec ff_pcm_f32be_encoder;
+extern const FFCodec ff_pcm_f32be_decoder;
+extern const FFCodec ff_pcm_f32le_encoder;
+extern const FFCodec ff_pcm_f32le_decoder;
+extern const FFCodec ff_pcm_f64be_encoder;
+extern const FFCodec ff_pcm_f64be_decoder;
+extern const FFCodec ff_pcm_f64le_encoder;
+extern const FFCodec ff_pcm_f64le_decoder;
+extern const FFCodec ff_pcm_lxf_decoder;
+extern const FFCodec ff_pcm_mulaw_encoder;
+extern const FFCodec ff_pcm_mulaw_decoder;
+extern const FFCodec ff_pcm_s8_encoder;
+extern const FFCodec ff_pcm_s8_decoder;
+extern const FFCodec ff_pcm_s8_planar_encoder;
+extern const FFCodec ff_pcm_s8_planar_decoder;
+extern const FFCodec ff_pcm_s16be_encoder;
+extern const FFCodec ff_pcm_s16be_decoder;
+extern const FFCodec ff_pcm_s16be_planar_encoder;
+extern const FFCodec ff_pcm_s16be_planar_decoder;
+extern const FFCodec ff_pcm_s16le_encoder;
+extern const FFCodec ff_pcm_s16le_decoder;
+extern const FFCodec ff_pcm_s16le_planar_encoder;
+extern const FFCodec ff_pcm_s16le_planar_decoder;
+extern const FFCodec ff_pcm_s24be_encoder;
+extern const FFCodec ff_pcm_s24be_decoder;
+extern const FFCodec ff_pcm_s24daud_encoder;
+extern const FFCodec ff_pcm_s24daud_decoder;
+extern const FFCodec ff_pcm_s24le_encoder;
+extern const FFCodec ff_pcm_s24le_decoder;
+extern const FFCodec ff_pcm_s24le_planar_encoder;
+extern const FFCodec ff_pcm_s24le_planar_decoder;
+extern const FFCodec ff_pcm_s32be_encoder;
+extern const FFCodec ff_pcm_s32be_decoder;
+extern const FFCodec ff_pcm_s32le_encoder;
+extern const FFCodec ff_pcm_s32le_decoder;
+extern const FFCodec ff_pcm_s32le_planar_encoder;
+extern const FFCodec ff_pcm_s32le_planar_decoder;
+extern const FFCodec ff_pcm_s64be_encoder;
+extern const FFCodec ff_pcm_s64be_decoder;
+extern const FFCodec ff_pcm_s64le_encoder;
+extern const FFCodec ff_pcm_s64le_decoder;
+extern const FFCodec ff_pcm_sga_decoder;
+extern const FFCodec ff_pcm_u8_encoder;
+extern const FFCodec ff_pcm_u8_decoder;
+extern const FFCodec ff_pcm_u16be_encoder;
+extern const FFCodec ff_pcm_u16be_decoder;
+extern const FFCodec ff_pcm_u16le_encoder;
+extern const FFCodec ff_pcm_u16le_decoder;
+extern const FFCodec ff_pcm_u24be_encoder;
+extern const FFCodec ff_pcm_u24be_decoder;
+extern const FFCodec ff_pcm_u24le_encoder;
+extern const FFCodec ff_pcm_u24le_decoder;
+extern const FFCodec ff_pcm_u32be_encoder;
+extern const FFCodec ff_pcm_u32be_decoder;
+extern const FFCodec ff_pcm_u32le_encoder;
+extern const FFCodec ff_pcm_u32le_decoder;
+extern const FFCodec ff_pcm_vidc_encoder;
+extern const FFCodec ff_pcm_vidc_decoder;
+
+/* DPCM codecs */
+extern const FFCodec ff_cbd2_dpcm_decoder;
+extern const FFCodec ff_derf_dpcm_decoder;
+extern const FFCodec ff_gremlin_dpcm_decoder;
+extern const FFCodec ff_interplay_dpcm_decoder;
+extern const FFCodec ff_roq_dpcm_encoder;
+extern const FFCodec ff_roq_dpcm_decoder;
+extern const FFCodec ff_sdx2_dpcm_decoder;
+extern const FFCodec ff_sol_dpcm_decoder;
+extern const FFCodec ff_xan_dpcm_decoder;
+extern const FFCodec ff_wady_dpcm_decoder;
+
+/* ADPCM codecs */
+extern const FFCodec ff_adpcm_4xm_decoder;
+extern const FFCodec ff_adpcm_adx_encoder;
+extern const FFCodec ff_adpcm_adx_decoder;
+extern const FFCodec ff_adpcm_afc_decoder;
+extern const FFCodec ff_adpcm_agm_decoder;
+extern const FFCodec ff_adpcm_aica_decoder;
+extern const FFCodec ff_adpcm_argo_decoder;
+extern const FFCodec ff_adpcm_argo_encoder;
+extern const FFCodec ff_adpcm_ct_decoder;
+extern const FFCodec ff_adpcm_dtk_decoder;
+extern const FFCodec ff_adpcm_ea_decoder;
+extern const FFCodec ff_adpcm_ea_maxis_xa_decoder;
+extern const FFCodec ff_adpcm_ea_r1_decoder;
+extern const FFCodec ff_adpcm_ea_r2_decoder;
+extern const FFCodec ff_adpcm_ea_r3_decoder;
+extern const FFCodec ff_adpcm_ea_xas_decoder;
+extern const FFCodec ff_adpcm_g722_encoder;
+extern const FFCodec ff_adpcm_g722_decoder;
+extern const FFCodec ff_adpcm_g726_encoder;
+extern const FFCodec ff_adpcm_g726_decoder;
+extern const FFCodec ff_adpcm_g726le_encoder;
+extern const FFCodec ff_adpcm_g726le_decoder;
+extern const FFCodec ff_adpcm_ima_acorn_decoder;
+extern const FFCodec ff_adpcm_ima_amv_decoder;
+extern const FFCodec ff_adpcm_ima_amv_encoder;
+extern const FFCodec ff_adpcm_ima_alp_decoder;
+extern const FFCodec ff_adpcm_ima_alp_encoder;
+extern const FFCodec ff_adpcm_ima_apc_decoder;
+extern const FFCodec ff_adpcm_ima_apm_decoder;
+extern const FFCodec ff_adpcm_ima_apm_encoder;
+extern const FFCodec ff_adpcm_ima_cunning_decoder;
+extern const FFCodec ff_adpcm_ima_dat4_decoder;
+extern const FFCodec ff_adpcm_ima_dk3_decoder;
+extern const FFCodec ff_adpcm_ima_dk4_decoder;
+extern const FFCodec ff_adpcm_ima_ea_eacs_decoder;
+extern const FFCodec ff_adpcm_ima_ea_sead_decoder;
+extern const FFCodec ff_adpcm_ima_iss_decoder;
+extern const FFCodec ff_adpcm_ima_moflex_decoder;
+extern const FFCodec ff_adpcm_ima_mtf_decoder;
+extern const FFCodec ff_adpcm_ima_oki_decoder;
+extern const FFCodec ff_adpcm_ima_qt_encoder;
+extern const FFCodec ff_adpcm_ima_qt_decoder;
+extern const FFCodec ff_adpcm_ima_rad_decoder;
+extern const FFCodec ff_adpcm_ima_ssi_decoder;
+extern const FFCodec ff_adpcm_ima_ssi_encoder;
+extern const FFCodec ff_adpcm_ima_smjpeg_decoder;
+extern const FFCodec ff_adpcm_ima_wav_encoder;
+extern const FFCodec ff_adpcm_ima_wav_decoder;
+extern const FFCodec ff_adpcm_ima_ws_encoder;
+extern const FFCodec ff_adpcm_ima_ws_decoder;
+extern const FFCodec ff_adpcm_ms_encoder;
+extern const FFCodec ff_adpcm_ms_decoder;
+extern const FFCodec ff_adpcm_mtaf_decoder;
+extern const FFCodec ff_adpcm_psx_decoder;
+extern const FFCodec ff_adpcm_sbpro_2_decoder;
+extern const FFCodec ff_adpcm_sbpro_3_decoder;
+extern const FFCodec ff_adpcm_sbpro_4_decoder;
+extern const FFCodec ff_adpcm_swf_encoder;
+extern const FFCodec ff_adpcm_swf_decoder;
+extern const FFCodec ff_adpcm_thp_decoder;
+extern const FFCodec ff_adpcm_thp_le_decoder;
+extern const FFCodec ff_adpcm_vima_decoder;
+extern const FFCodec ff_adpcm_xa_decoder;
+extern const FFCodec ff_adpcm_xmd_decoder;
+extern const FFCodec ff_adpcm_yamaha_encoder;
+extern const FFCodec ff_adpcm_yamaha_decoder;
+extern const FFCodec ff_adpcm_zork_decoder;
+
+/* subtitles */
+extern const FFCodec ff_ssa_encoder;
+extern const FFCodec ff_ssa_decoder;
+extern const FFCodec ff_ass_encoder;
+extern const FFCodec ff_ass_decoder;
+extern const FFCodec ff_ccaption_decoder;
+extern const FFCodec ff_dvbsub_encoder;
+extern const FFCodec ff_dvbsub_decoder;
+extern const FFCodec ff_dvdsub_encoder;
+extern const FFCodec ff_dvdsub_decoder;
+extern const FFCodec ff_jacosub_decoder;
+extern const FFCodec ff_microdvd_decoder;
+extern const FFCodec ff_movtext_encoder;
+extern const FFCodec ff_movtext_decoder;
+extern const FFCodec ff_mpl2_decoder;
+extern const FFCodec ff_pgssub_decoder;
+extern const FFCodec ff_pjs_decoder;
+extern const FFCodec ff_realtext_decoder;
+extern const FFCodec ff_sami_decoder;
+extern const FFCodec ff_srt_encoder;
+extern const FFCodec ff_srt_decoder;
+extern const FFCodec ff_stl_decoder;
+extern const FFCodec ff_subrip_encoder;
+extern const FFCodec ff_subrip_decoder;
+extern const FFCodec ff_subviewer_decoder;
+extern const FFCodec ff_subviewer1_decoder;
+extern const FFCodec ff_text_encoder;
+extern const FFCodec ff_text_decoder;
+extern const FFCodec ff_ttml_encoder;
+extern const FFCodec ff_vplayer_decoder;
+extern const FFCodec ff_webvtt_encoder;
+extern const FFCodec ff_webvtt_decoder;
+extern const FFCodec ff_xsub_encoder;
+extern const FFCodec ff_xsub_decoder;
+
+/* external libraries */
+extern const FFCodec ff_aac_at_encoder;
+extern const FFCodec ff_aac_at_decoder;
+extern const FFCodec ff_ac3_at_decoder;
+extern const FFCodec ff_adpcm_ima_qt_at_decoder;
+extern const FFCodec ff_alac_at_encoder;
+extern const FFCodec ff_alac_at_decoder;
+extern const FFCodec ff_amr_nb_at_decoder;
+extern const FFCodec ff_eac3_at_decoder;
+extern const FFCodec ff_gsm_ms_at_decoder;
+extern const FFCodec ff_ilbc_at_encoder;
+extern const FFCodec ff_ilbc_at_decoder;
+extern const FFCodec ff_mp1_at_decoder;
+extern const FFCodec ff_mp2_at_decoder;
+extern const FFCodec ff_mp3_at_decoder;
+extern const FFCodec ff_pcm_alaw_at_encoder;
+extern const FFCodec ff_pcm_alaw_at_decoder;
+extern const FFCodec ff_pcm_mulaw_at_encoder;
+extern const FFCodec ff_pcm_mulaw_at_decoder;
+extern const FFCodec ff_qdmc_at_decoder;
+extern const FFCodec ff_qdm2_at_decoder;
+extern FFCodec ff_libaom_av1_encoder;
+extern const FFCodec ff_libaribb24_decoder;
+extern const FFCodec ff_libcelt_decoder;
+extern const FFCodec ff_libcodec2_encoder;
+extern const FFCodec ff_libcodec2_decoder;
+extern const FFCodec ff_libdav1d_decoder;
+extern const FFCodec ff_libdavs2_decoder;
+extern const FFCodec ff_libfdk_aac_encoder;
+extern const FFCodec ff_libfdk_aac_decoder;
+extern const FFCodec ff_libgsm_encoder;
+extern const FFCodec ff_libgsm_decoder;
+extern const FFCodec ff_libgsm_ms_encoder;
+extern const FFCodec ff_libgsm_ms_decoder;
+extern const FFCodec ff_libilbc_encoder;
+extern const FFCodec ff_libilbc_decoder;
+extern const FFCodec ff_libjxl_decoder;
+extern const FFCodec ff_libjxl_encoder;
+extern const FFCodec ff_libmp3lame_encoder;
+extern const FFCodec ff_libopencore_amrnb_encoder;
+extern const FFCodec ff_libopencore_amrnb_decoder;
+extern const FFCodec ff_libopencore_amrwb_decoder;
+extern const FFCodec ff_libopenjpeg_encoder;
+extern const FFCodec ff_libopenjpeg_decoder;
+extern const FFCodec ff_libopus_encoder;
+extern const FFCodec ff_libopus_decoder;
+extern const FFCodec ff_librav1e_encoder;
+extern const FFCodec ff_librsvg_decoder;
+extern const FFCodec ff_libshine_encoder;
+extern const FFCodec ff_libspeex_encoder;
+extern const FFCodec ff_libspeex_decoder;
+extern const FFCodec ff_libsvtav1_encoder;
+extern const FFCodec ff_libtheora_encoder;
+extern const FFCodec ff_libtwolame_encoder;
+extern const FFCodec ff_libuavs3d_decoder;
+extern const FFCodec ff_libvo_amrwbenc_encoder;
+extern const FFCodec ff_libvorbis_encoder;
+extern const FFCodec ff_libvorbis_decoder;
+extern const FFCodec ff_libvpx_vp8_encoder;
+extern const FFCodec ff_libvpx_vp8_decoder;
+extern FFCodec ff_libvpx_vp9_encoder;
+extern FFCodec ff_libvpx_vp9_decoder;
+/* preferred over libwebp */
+extern const FFCodec ff_libwebp_anim_encoder;
+extern const FFCodec ff_libwebp_encoder;
+extern const FFCodec ff_libx262_encoder;
+#if CONFIG_LIBX264_ENCODER
+#include <x264.h>
+#if X264_BUILD < 153
+#define LIBX264_CONST
+#else
+#define LIBX264_CONST const
+#endif
+extern LIBX264_CONST FFCodec ff_libx264_encoder;
+#endif
+extern const FFCodec ff_libx264rgb_encoder;
+extern FFCodec ff_libx265_encoder;
+extern const FFCodec ff_libxavs_encoder;
+extern const FFCodec ff_libxavs2_encoder;
+extern const FFCodec ff_libxvid_encoder;
+extern const FFCodec ff_libzvbi_teletext_decoder;
+
+/* text */
+extern const FFCodec ff_bintext_decoder;
+extern const FFCodec ff_xbin_decoder;
+extern const FFCodec ff_idf_decoder;
+
+/* external libraries, that shouldn't be used by default if one of the
+ * above is available */
+extern const FFCodec ff_aac_mf_encoder;
+extern const FFCodec ff_ac3_mf_encoder;
+extern const FFCodec ff_h263_v4l2m2m_encoder;
+extern const FFCodec ff_libaom_av1_decoder;
+/* hwaccel hooks only, so prefer external decoders */
+extern const FFCodec ff_av1_decoder;
+extern const FFCodec ff_av1_cuvid_decoder;
+extern const FFCodec ff_av1_mediacodec_decoder;
+extern const FFCodec ff_av1_nvenc_encoder;
+extern const FFCodec ff_av1_qsv_decoder;
+extern const FFCodec ff_av1_qsv_encoder;
+extern const FFCodec ff_av1_amf_encoder;
+extern const FFCodec ff_libopenh264_encoder;
+extern const FFCodec ff_libopenh264_decoder;
+extern const FFCodec ff_h264_amf_encoder;
+extern const FFCodec ff_h264_cuvid_decoder;
+extern const FFCodec ff_h264_mf_encoder;
+extern const FFCodec ff_h264_nvenc_encoder;
+extern const FFCodec ff_h264_omx_encoder;
+extern const FFCodec ff_h264_qsv_encoder;
+extern const FFCodec ff_h264_v4l2m2m_encoder;
+extern const FFCodec ff_h264_vaapi_encoder;
+extern const FFCodec ff_h264_videotoolbox_encoder;
+extern const FFCodec ff_hevc_amf_encoder;
+extern const FFCodec ff_hevc_cuvid_decoder;
+extern const FFCodec ff_hevc_mediacodec_decoder;
+extern const FFCodec ff_hevc_mediacodec_encoder;
+extern const FFCodec ff_hevc_mf_encoder;
+extern const FFCodec ff_hevc_nvenc_encoder;
+extern const FFCodec ff_hevc_qsv_encoder;
+extern const FFCodec ff_hevc_v4l2m2m_encoder;
+extern const FFCodec ff_hevc_vaapi_encoder;
+extern const FFCodec ff_hevc_videotoolbox_encoder;
+extern const FFCodec ff_libkvazaar_encoder;
+extern const FFCodec ff_mjpeg_cuvid_decoder;
+extern const FFCodec ff_mjpeg_qsv_encoder;
+extern const FFCodec ff_mjpeg_qsv_decoder;
+extern const FFCodec ff_mjpeg_vaapi_encoder;
+extern const FFCodec ff_mp3_mf_encoder;
+extern const FFCodec ff_mpeg1_cuvid_decoder;
+extern const FFCodec ff_mpeg2_cuvid_decoder;
+extern const FFCodec ff_mpeg2_qsv_encoder;
+extern const FFCodec ff_mpeg2_vaapi_encoder;
+extern const FFCodec ff_mpeg4_cuvid_decoder;
+extern const FFCodec ff_mpeg4_mediacodec_decoder;
+extern const FFCodec ff_mpeg4_omx_encoder;
+extern const FFCodec ff_mpeg4_v4l2m2m_encoder;
+extern const FFCodec ff_prores_videotoolbox_encoder;
+extern const FFCodec ff_vc1_cuvid_decoder;
+extern const FFCodec ff_vp8_cuvid_decoder;
+extern const FFCodec ff_vp8_mediacodec_decoder;
+extern const FFCodec ff_vp8_qsv_decoder;
+extern const FFCodec ff_vp8_v4l2m2m_encoder;
+extern const FFCodec ff_vp8_vaapi_encoder;
+extern const FFCodec ff_vp9_cuvid_decoder;
+extern const FFCodec ff_vp9_mediacodec_decoder;
+extern const FFCodec ff_vp9_qsv_decoder;
+extern const FFCodec ff_vp9_vaapi_encoder;
+extern const FFCodec ff_vp9_qsv_encoder;
+
+// null codecs
+extern const FFCodec ff_vnull_decoder;
+extern const FFCodec ff_vnull_encoder;
+extern const FFCodec ff_anull_decoder;
+extern const FFCodec ff_anull_encoder;
+
+// The iterate API is not usable with ossfuzz due to the excessive size of binaries created
+#if CONFIG_OSSFUZZ
+const FFCodec * codec_list[] = {
+    NULL,
+    NULL,
+    NULL
+};
+#else
+#include "libavcodec/codec_list.c"
+#endif
+
+static AVOnce av_codec_static_init = AV_ONCE_INIT;
+static void av_codec_init_static(void)
+{
+    for (int i = 0; codec_list[i]; i++) {
+        if (codec_list[i]->init_static_data)
+            codec_list[i]->init_static_data((FFCodec*)codec_list[i]);
+    }
+}
+
+const AVCodec *av_codec_iterate(void **opaque)
+{
+    uintptr_t i = (uintptr_t)*opaque;
+    const FFCodec *c = codec_list[i];
+
+    ff_thread_once(&av_codec_static_init, av_codec_init_static);
+
+    if (c) {
+        *opaque = (void*)(i + 1);
+        return &c->p;
+    }
+    return NULL;
+}
+
+static enum AVCodecID remap_deprecated_codec_id(enum AVCodecID id)
+{
+    switch(id){
+        //This is for future deprecatec codec ids, its empty since
+        //last major bump but will fill up again over time, please don't remove it
+        default                                         : return id;
+    }
+}
+
+static const AVCodec *find_codec(enum AVCodecID id, int (*x)(const AVCodec *))
+{
+    const AVCodec *p, *experimental = NULL;
+    void *i = 0;
+
+    id = remap_deprecated_codec_id(id);
+
+    while ((p = av_codec_iterate(&i))) {
+        if (!x(p))
+            continue;
+        if (p->id == id) {
+            if (p->capabilities & AV_CODEC_CAP_EXPERIMENTAL && !experimental) {
+                experimental = p;
+            } else
+                return p;
+        }
+    }
+
+    return experimental;
+}
+
+const AVCodec *avcodec_find_encoder(enum AVCodecID id)
+{
+    return find_codec(id, av_codec_is_encoder);
+}
+
+const AVCodec *avcodec_find_decoder(enum AVCodecID id)
+{
+    return find_codec(id, av_codec_is_decoder);
+}
+
+static const AVCodec *find_codec_by_name(const char *name, int (*x)(const AVCodec *))
+{
+    void *i = 0;
+    const AVCodec *p;
+
+    if (!name)
+        return NULL;
+
+    while ((p = av_codec_iterate(&i))) {
+        if (!x(p))
+            continue;
+        if (strcmp(name, p->name) == 0)
+            return p;
+    }
+
+    return NULL;
+}
+
+const AVCodec *avcodec_find_encoder_by_name(const char *name)
+{
+    return find_codec_by_name(name, av_codec_is_encoder);
+}
+
+const AVCodec *avcodec_find_decoder_by_name(const char *name)
+{
+    return find_codec_by_name(name, av_codec_is_decoder);
+}
diff --git a/media/ffvpx/libavcodec/arm/fft_init_arm.c b/media/ffvpx/libavcodec/arm/fft_init_arm.c
new file mode 100644
index 0000000000..8ae22dfb4e
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/fft_init_arm.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+
+#include "libavcodec/fft.h"
+
+void ff_fft_calc_vfp(FFTContext *s, FFTComplex *z);
+
+void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
+
+void ff_imdct_half_vfp(FFTContext *s, FFTSample *output, const FFTSample *input);
+
+void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+
+av_cold void ff_fft_init_arm(FFTContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_vfp_vm(cpu_flags)) {
+        s->fft_calc     = ff_fft_calc_vfp;
+#if CONFIG_MDCT
+        s->imdct_half   = ff_imdct_half_vfp;
+#endif
+    }
+
+    if (have_neon(cpu_flags)) {
+#if CONFIG_FFT
+        if (s->nbits < 17) {
+            s->fft_permute = ff_fft_permute_neon;
+            s->fft_calc    = ff_fft_calc_neon;
+        }
+#endif
+#if CONFIG_MDCT
+        s->imdct_calc   = ff_imdct_calc_neon;
+        s->imdct_half   = ff_imdct_half_neon;
+        s->mdct_calc    = ff_mdct_calc_neon;
+        s->mdct_permutation = FF_MDCT_PERM_INTERLEAVE;
+#endif
+    }
+}
diff --git a/media/ffvpx/libavcodec/arm/fft_neon.S b/media/ffvpx/libavcodec/arm/fft_neon.S
new file mode 100644
index 0000000000..48f8dfc424
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/fft_neon.S
@@ -0,0 +1,375 @@
+/*
+ * ARM NEON optimised FFT
+ *
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2009 Naotoshi Nojiri
+ *
+ * This algorithm (though not any of the implementation details) is
+ * based on libdjbfft by D. J. Bernstein.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+#define M_SQRT1_2 0.70710678118654752440
+
+
+function fft4_neon
+        vld1.32         {d0-d3}, [r0,:128]
+
+        vext.32         q8,  q1,  q1,  #1       @ i2,r3 d3=i3,r2
+        vsub.f32        d6,  d0,  d1            @ r0-r1,i0-i1
+        vsub.f32        d7,  d16, d17           @ r3-r2,i2-i3
+        vadd.f32        d4,  d0,  d1            @ r0+r1,i0+i1
+        vadd.f32        d5,  d2,  d3            @ i2+i3,r2+r3
+        vadd.f32        d1,  d6,  d7
+        vsub.f32        d3,  d6,  d7
+        vadd.f32        d0,  d4,  d5
+        vsub.f32        d2,  d4,  d5
+
+        vst1.32         {d0-d3}, [r0,:128]
+
+        bx              lr
+endfunc
+
+function fft8_neon
+        mov             r1,  r0
+        vld1.32         {d0-d3},   [r1,:128]!
+        vld1.32         {d16-d19}, [r1,:128]
+
+        movw            r2,  #0x04f3            @ sqrt(1/2)
+        movt            r2,  #0x3f35
+        eor             r3,  r2,  #1<<31
+        vdup.32         d31, r2
+
+        vext.32         q11, q1,  q1,  #1       @ i2,r3,i3,r2
+        vadd.f32        d4,  d16, d17           @ r4+r5,i4+i5
+        vmov            d28, r3,  r2
+        vadd.f32        d5,  d18, d19           @ r6+r7,i6+i7
+        vsub.f32        d17, d16, d17           @ r4-r5,i4-i5
+        vsub.f32        d19, d18, d19           @ r6-r7,i6-i7
+        vrev64.32       d29, d28
+        vadd.f32        d20, d0,  d1            @ r0+r1,i0+i1
+        vadd.f32        d21, d2,  d3            @ r2+r3,i2+i3
+        vmul.f32        d26, d17, d28           @ -a2r*w,a2i*w
+        vext.32         q3,  q2,  q2,  #1
+        vmul.f32        d27, d19, d29           @ a3r*w,-a3i*w
+        vsub.f32        d23, d22, d23           @ i2-i3,r3-r2
+        vsub.f32        d22, d0,  d1            @ r0-r1,i0-i1
+        vmul.f32        d24, d17, d31           @ a2r*w,a2i*w
+        vmul.f32        d25, d19, d31           @ a3r*w,a3i*w
+        vadd.f32        d0,  d20, d21
+        vsub.f32        d2,  d20, d21
+        vadd.f32        d1,  d22, d23
+        vrev64.32       q13, q13
+        vsub.f32        d3,  d22, d23
+        vsub.f32        d6,  d6,  d7
+        vadd.f32        d24, d24, d26           @ a2r+a2i,a2i-a2r   t1,t2
+        vadd.f32        d25, d25, d27           @ a3r-a3i,a3i+a3r   t5,t6
+        vadd.f32        d7,  d4,  d5
+        vsub.f32        d18, d2,  d6
+        vext.32         q13, q12, q12, #1
+        vadd.f32        d2,  d2,  d6
+        vsub.f32        d16, d0,  d7
+        vadd.f32        d5,  d25, d24
+        vsub.f32        d4,  d26, d27
+        vadd.f32        d0,  d0,  d7
+        vsub.f32        d17, d1,  d5
+        vsub.f32        d19, d3,  d4
+        vadd.f32        d3,  d3,  d4
+        vadd.f32        d1,  d1,  d5
+
+        vst1.32         {d16-d19}, [r1,:128]
+        vst1.32         {d0-d3},   [r0,:128]
+
+        bx              lr
+endfunc
+
+function fft16_neon
+        movrel          r1, mppm
+        vld1.32         {d16-d19}, [r0,:128]!   @ q8{r0,i0,r1,i1} q9{r2,i2,r3,i3}
+        pld             [r0, #32]
+        vld1.32         {d2-d3}, [r1,:128]
+        vext.32         q13, q9,  q9,  #1
+        vld1.32         {d22-d25}, [r0,:128]!   @ q11{r4,i4,r5,i5} q12{r6,i5,r7,i7}
+        vadd.f32        d4,  d16, d17
+        vsub.f32        d5,  d16, d17
+        vadd.f32        d18, d18, d19
+        vsub.f32        d19, d26, d27
+
+        vadd.f32        d20, d22, d23
+        vsub.f32        d22, d22, d23
+        vsub.f32        d23, d24, d25
+        vadd.f32        q8,  q2,  q9            @ {r0,i0,r1,i1}
+        vadd.f32        d21, d24, d25
+        vmul.f32        d24, d22, d2
+        vsub.f32        q9,  q2,  q9            @ {r2,i2,r3,i3}
+        vmul.f32        d25, d23, d3
+        vuzp.32         d16, d17                @ {r0,r1,i0,i1}
+        vmul.f32        q1,  q11, d2[1]
+        vuzp.32         d18, d19                @ {r2,r3,i2,i3}
+        vrev64.32       q12, q12
+        vadd.f32        q11, q12, q1            @ {t1a,t2a,t5,t6}
+        vld1.32         {d24-d27}, [r0,:128]!   @ q12{r8,i8,r9,i9} q13{r10,i10,r11,i11}
+        vzip.32         q10, q11
+        vld1.32         {d28-d31}, [r0,:128]    @ q14{r12,i12,r13,i13} q15{r14,i14,r15,i15}
+        vadd.f32        d0,  d22, d20
+        vadd.f32        d1,  d21, d23
+        vsub.f32        d2,  d21, d23
+        vsub.f32        d3,  d22, d20
+        sub             r0,  r0,  #96
+        vext.32         q13, q13, q13, #1
+        vsub.f32        q10, q8,  q0            @ {r4,r5,i4,i5}
+        vadd.f32        q8,  q8,  q0            @ {r0,r1,i0,i1}
+        vext.32         q15, q15, q15, #1
+        vsub.f32        q11, q9,  q1            @ {r6,r7,i6,i7}
+        vswp            d25, d26                @ q12{r8,i8,i10,r11} q13{r9,i9,i11,r10}
+        vadd.f32        q9,  q9,  q1            @ {r2,r3,i2,i3}
+        vswp            d29, d30                @ q14{r12,i12,i14,r15} q15{r13,i13,i15,r14}
+        vadd.f32        q0,  q12, q13           @ {t1,t2,t5,t6}
+        vadd.f32        q1,  q14, q15           @ {t1a,t2a,t5a,t6a}
+        movrelx         r2,  X(ff_cos_16)
+        vsub.f32        q13, q12, q13           @ {t3,t4,t7,t8}
+        vrev64.32       d1,  d1
+        vsub.f32        q15, q14, q15           @ {t3a,t4a,t7a,t8a}
+        vrev64.32       d3,  d3
+        movrel          r3,  pmmp
+        vswp            d1,  d26                @ q0{t1,t2,t3,t4} q13{t6,t5,t7,t8}
+        vswp            d3,  d30                @ q1{t1a,t2a,t3a,t4a} q15{t6a,t5a,t7a,t8a}
+        vadd.f32        q12, q0,  q13           @ {r8,i8,r9,i9}
+        vadd.f32        q14, q1,  q15           @ {r12,i12,r13,i13}
+        vld1.32         {d4-d5},  [r2,:64]
+        vsub.f32        q13, q0,  q13           @ {r10,i10,r11,i11}
+        vsub.f32        q15, q1,  q15           @ {r14,i14,r15,i15}
+        vswp            d25, d28                @ q12{r8,i8,r12,i12} q14{r9,i9,r13,i13}
+        vld1.32         {d6-d7},  [r3,:128]
+        vrev64.32       q1,  q14
+        vmul.f32        q14, q14, d4[1]
+        vmul.f32        q1,  q1,  q3
+        vmla.f32        q14, q1,  d5[1]         @ {t1a,t2a,t5a,t6a}
+        vswp            d27, d30                @ q13{r10,i10,r14,i14} q15{r11,i11,r15,i15}
+        vzip.32         q12, q14
+        vadd.f32        d0,  d28, d24
+        vadd.f32        d1,  d25, d29
+        vsub.f32        d2,  d25, d29
+        vsub.f32        d3,  d28, d24
+        vsub.f32        q12, q8,  q0            @ {r8,r9,i8,i9}
+        vadd.f32        q8,  q8,  q0            @ {r0,r1,i0,i1}
+        vsub.f32        q14, q10, q1            @ {r12,r13,i12,i13}
+        mov             r1,  #32
+        vadd.f32        q10, q10, q1            @ {r4,r5,i4,i5}
+        vrev64.32       q0,  q13
+        vmul.f32        q13, q13, d5[0]
+        vrev64.32       q1,  q15
+        vmul.f32        q15, q15, d5[1]
+        vst2.32         {d16-d17},[r0,:128], r1
+        vmul.f32        q0,  q0,  q3
+        vst2.32         {d20-d21},[r0,:128], r1
+        vmul.f32        q1,  q1,  q3
+        vmla.f32        q13, q0,  d5[0]         @ {t1,t2,t5,t6}
+        vmla.f32        q15, q1,  d4[1]         @ {t1a,t2a,t5a,t6a}
+        vst2.32         {d24-d25},[r0,:128], r1
+        vst2.32         {d28-d29},[r0,:128]
+        vzip.32         q13, q15
+        sub             r0, r0, #80
+        vadd.f32        d0,  d30, d26
+        vadd.f32        d1,  d27, d31
+        vsub.f32        d2,  d27, d31
+        vsub.f32        d3,  d30, d26
+        vsub.f32        q13, q9,  q0            @ {r10,r11,i10,i11}
+        vadd.f32        q9,  q9,  q0            @ {r2,r3,i2,i3}
+        vsub.f32        q15, q11, q1            @ {r14,r15,i14,i15}
+        vadd.f32        q11, q11, q1            @ {r6,r7,i6,i7}
+        vst2.32         {d18-d19},[r0,:128], r1
+        vst2.32         {d22-d23},[r0,:128], r1
+        vst2.32         {d26-d27},[r0,:128], r1
+        vst2.32         {d30-d31},[r0,:128]
+        bx              lr
+endfunc
+
+function fft_pass_neon
+        push            {r4-r6,lr}
+        mov             r6,  r2                 @ n
+        lsl             r5,  r2,  #3            @ 2 * n * sizeof FFTSample
+        lsl             r4,  r2,  #4            @ 2 * n * sizeof FFTComplex
+        lsl             r2,  r2,  #5            @ 4 * n * sizeof FFTComplex
+        add             r3,  r2,  r4
+        add             r4,  r4,  r0            @ &z[o1]
+        add             r2,  r2,  r0            @ &z[o2]
+        add             r3,  r3,  r0            @ &z[o3]
+        vld1.32         {d20-d21},[r2,:128]     @ {z[o2],z[o2+1]}
+        movrel          r12, pmmp
+        vld1.32         {d22-d23},[r3,:128]     @ {z[o3],z[o3+1]}
+        add             r5,  r5,  r1            @ wim
+        vld1.32         {d6-d7},  [r12,:128]    @ pmmp
+        vswp            d21, d22
+        vld1.32         {d4},     [r1,:64]!     @ {wre[0],wre[1]}
+        sub             r5,  r5,  #4            @ wim--
+        vrev64.32       q1,  q11
+        vmul.f32        q11, q11, d4[1]
+        vmul.f32        q1,  q1,  q3
+        vld1.32         {d5[0]},  [r5,:32]      @ d5[0] = wim[-1]
+        vmla.f32        q11, q1,  d5[0]         @ {t1a,t2a,t5a,t6a}
+        vld2.32         {d16-d17},[r0,:128]     @ {z[0],z[1]}
+        sub             r6, r6, #1              @ n--
+        vld2.32         {d18-d19},[r4,:128]     @ {z[o1],z[o1+1]}
+        vzip.32         q10, q11
+        vadd.f32        d0,  d22, d20
+        vadd.f32        d1,  d21, d23
+        vsub.f32        d2,  d21, d23
+        vsub.f32        d3,  d22, d20
+        vsub.f32        q10, q8,  q0
+        vadd.f32        q8,  q8,  q0
+        vsub.f32        q11, q9,  q1
+        vadd.f32        q9,  q9,  q1
+        vst2.32         {d20-d21},[r2,:128]!    @ {z[o2],z[o2+1]}
+        vst2.32         {d16-d17},[r0,:128]!    @ {z[0],z[1]}
+        vst2.32         {d22-d23},[r3,:128]!    @ {z[o3],z[o3+1]}
+        vst2.32         {d18-d19},[r4,:128]!    @ {z[o1],z[o1+1]}
+        sub             r5,  r5,  #8            @ wim -= 2
+1:
+        vld1.32         {d20-d21},[r2,:128]     @ {z[o2],z[o2+1]}
+        vld1.32         {d22-d23},[r3,:128]     @ {z[o3],z[o3+1]}
+        vswp            d21, d22
+        vld1.32         {d4}, [r1]!             @ {wre[0],wre[1]}
+        vrev64.32       q0,  q10
+        vmul.f32        q10, q10, d4[0]
+        vrev64.32       q1,  q11
+        vmul.f32        q11, q11, d4[1]
+        vld1.32         {d5}, [r5]              @ {wim[-1],wim[0]}
+        vmul.f32        q0,  q0,  q3
+        sub             r5,  r5,  #8            @ wim -= 2
+        vmul.f32        q1,  q1,  q3
+        vmla.f32        q10, q0,  d5[1]         @ {t1,t2,t5,t6}
+        vmla.f32        q11, q1,  d5[0]         @ {t1a,t2a,t5a,t6a}
+        vld2.32         {d16-d17},[r0,:128]     @ {z[0],z[1]}
+        subs            r6,  r6,  #1            @ n--
+        vld2.32         {d18-d19},[r4,:128]     @ {z[o1],z[o1+1]}
+        vzip.32         q10, q11
+        vadd.f32        d0,  d22, d20
+        vadd.f32        d1,  d21, d23
+        vsub.f32        d2,  d21, d23
+        vsub.f32        d3,  d22, d20
+        vsub.f32        q10, q8,  q0
+        vadd.f32        q8,  q8,  q0
+        vsub.f32        q11, q9,  q1
+        vadd.f32        q9,  q9,  q1
+        vst2.32         {d20-d21}, [r2,:128]!   @ {z[o2],z[o2+1]}
+        vst2.32         {d16-d17}, [r0,:128]!   @ {z[0],z[1]}
+        vst2.32         {d22-d23}, [r3,:128]!   @ {z[o3],z[o3+1]}
+        vst2.32         {d18-d19}, [r4,:128]!   @ {z[o1],z[o1+1]}
+        bne             1b
+
+        pop             {r4-r6,pc}
+endfunc
+
+.macro  def_fft n, n2, n4
+        .align 6
+function fft\n\()_neon
+        push            {r4, lr}
+        mov             r4,  r0
+        bl              fft\n2\()_neon
+        add             r0,  r4,  #\n4*2*8
+        bl              fft\n4\()_neon
+        add             r0,  r4,  #\n4*3*8
+        bl              fft\n4\()_neon
+        mov             r0,  r4
+        pop             {r4, lr}
+        movrelx         r1,  X(ff_cos_\n)
+        mov             r2,  #\n4/2
+        b               fft_pass_neon
+endfunc
+.endm
+
+        def_fft    32,    16,     8
+        def_fft    64,    32,    16
+        def_fft   128,    64,    32
+        def_fft   256,   128,    64
+        def_fft   512,   256,   128
+        def_fft  1024,   512,   256
+        def_fft  2048,  1024,   512
+        def_fft  4096,  2048,  1024
+        def_fft  8192,  4096,  2048
+        def_fft 16384,  8192,  4096
+        def_fft 32768, 16384,  8192
+        def_fft 65536, 32768, 16384
+
+function ff_fft_calc_neon, export=1
+        ldr             r2,  [r0]
+        sub             r2,  r2,  #2
+        movrel          r3,  fft_tab_neon
+        ldr             r3,  [r3, r2, lsl #2]
+        mov             r0,  r1
+        bx              r3
+endfunc
+
+function ff_fft_permute_neon, export=1
+        push            {r4,lr}
+        mov             r12, #1
+        ldr             r2,  [r0]       @ nbits
+        ldr             r3,  [r0, #12]  @ tmp_buf
+        ldr             r0,  [r0, #8]   @ revtab
+        lsl             r12, r12, r2
+        mov             r2,  r12
+1:
+        vld1.32         {d0-d1}, [r1,:128]!
+        ldr             r4,  [r0], #4
+        uxth            lr,  r4
+        uxth            r4,  r4,  ror #16
+        add             lr,  r3,  lr,  lsl #3
+        add             r4,  r3,  r4,  lsl #3
+        vst1.32         {d0}, [lr,:64]
+        vst1.32         {d1}, [r4,:64]
+        subs            r12, r12, #2
+        bgt             1b
+
+        sub             r1,  r1,  r2,  lsl #3
+1:
+        vld1.32         {d0-d3}, [r3,:128]!
+        vst1.32         {d0-d3}, [r1,:128]!
+        subs            r2,  r2,  #4
+        bgt             1b
+
+        pop             {r4,pc}
+endfunc
+
+const   fft_tab_neon, relocate=1
+        .word fft4_neon
+        .word fft8_neon
+        .word fft16_neon
+        .word fft32_neon
+        .word fft64_neon
+        .word fft128_neon
+        .word fft256_neon
+        .word fft512_neon
+        .word fft1024_neon
+        .word fft2048_neon
+        .word fft4096_neon
+        .word fft8192_neon
+        .word fft16384_neon
+        .word fft32768_neon
+        .word fft65536_neon
+endconst
+
+const   pmmp, align=4
+        .float          +1.0, -1.0, -1.0, +1.0
+endconst
+
+const   mppm, align=4
+        .float          -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
+endconst
diff --git a/media/ffvpx/libavcodec/arm/fft_vfp.S b/media/ffvpx/libavcodec/arm/fft_vfp.S
new file mode 100644
index 0000000000..ac601325f2
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/fft_vfp.S
@@ -0,0 +1,530 @@
+/*
+ * Copyright (c) 2013 RISC OS Open Ltd
+ * Author: Ben Avison <bavison@riscosopen.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+@ The fftx_internal_vfp versions of the functions obey a modified AAPCS:
+@ VFP is in RunFast mode, vector length 4, stride 1 thoroughout, and
+@ all single-precision VFP registers may be corrupted on exit. The a2
+@ register may not be clobbered in these functions, as it holds the
+@ stored original FPSCR.
+
+function ff_fft_calc_vfp, export=1
+        ldr     ip, [a1, #0]    @ nbits
+        mov     a1, a2
+        movrel  a2, (fft_tab_vfp - 8)
+        ldr     pc, [a2, ip, lsl #2]
+endfunc
+const   fft_tab_vfp, relocate=1
+        .word   fft4_vfp
+        .word   fft8_vfp
+        .word   X(ff_fft16_vfp)     @ this one alone is exported
+        .word   fft32_vfp
+        .word   fft64_vfp
+        .word   fft128_vfp
+        .word   fft256_vfp
+        .word   fft512_vfp
+        .word   fft1024_vfp
+        .word   fft2048_vfp
+        .word   fft4096_vfp
+        .word   fft8192_vfp
+        .word   fft16384_vfp
+        .word   fft32768_vfp
+        .word   fft65536_vfp
+endconst
+
+function fft4_vfp
+        vldr    d0, [a1, #0*2*4]   @ s0,s1   = z[0]
+        vldr    d4, [a1, #1*2*4]   @ s8,s9   = z[1]
+        vldr    d1, [a1, #2*2*4]   @ s2,s3   = z[2]
+        vldr    d5, [a1, #3*2*4]   @ s10,s11 = z[3]
+        @ stall
+        vadd.f  s12, s0, s8        @ i0
+        vadd.f  s13, s1, s9        @ i1
+        vadd.f  s14, s2, s10       @ i2
+        vadd.f  s15, s3, s11       @ i3
+        vsub.f  s8, s0, s8         @ i4
+        vsub.f  s9, s1, s9         @ i5
+        vsub.f  s10, s2, s10       @ i6
+        vsub.f  s11, s3, s11       @ i7
+        @ stall
+        @ stall
+        vadd.f  s0, s12, s14       @ z[0].re
+        vsub.f  s4, s12, s14       @ z[2].re
+        vadd.f  s1, s13, s15       @ z[0].im
+        vsub.f  s5, s13, s15       @ z[2].im
+        vadd.f  s7, s9, s10        @ z[3].im
+        vsub.f  s3, s9, s10        @ z[1].im
+        vadd.f  s2, s8, s11        @ z[1].re
+        vsub.f  s6, s8, s11        @ z[3].re
+        @ stall
+        @ stall
+        vstr    d0, [a1, #0*2*4]
+        vstr    d2, [a1, #2*2*4]
+        @ stall
+        @ stall
+        vstr    d1, [a1, #1*2*4]
+        vstr    d3, [a1, #3*2*4]
+
+        bx      lr
+endfunc
+
+.macro macro_fft8_head
+        @ FFT4
+        vldr    d4, [a1, #0 * 2*4]
+        vldr    d6, [a1, #1 * 2*4]
+        vldr    d5, [a1, #2 * 2*4]
+        vldr    d7, [a1, #3 * 2*4]
+            @ BF
+            vldr    d12, [a1, #4 * 2*4]
+        vadd.f  s16, s8, s12    @ vector op
+            vldr    d14, [a1, #5 * 2*4]
+            vldr    d13, [a1, #6 * 2*4]
+            vldr    d15, [a1, #7 * 2*4]
+        vsub.f  s20, s8, s12    @ vector op
+        vadd.f  s0, s16, s18
+        vsub.f  s2, s16, s18
+        vadd.f  s1, s17, s19
+        vsub.f  s3, s17, s19
+        vadd.f  s7, s21, s22
+        vsub.f  s5, s21, s22
+        vadd.f  s4, s20, s23
+        vsub.f  s6, s20, s23
+            vsub.f  s20, s24, s28   @ vector op
+        vstr    d0, [a1, #0 * 2*4]  @ transfer s0-s7 to s24-s31 via memory
+        vstr    d1, [a1, #1 * 2*4]
+        vldr    s0, cos1pi4
+            vadd.f  s16, s24, s28   @ vector op
+        vstr    d2, [a1, #2 * 2*4]
+        vstr    d3, [a1, #3 * 2*4]
+        vldr    d12, [a1, #0 * 2*4]
+            @ TRANSFORM
+            vmul.f  s20, s20, s0    @ vector x scalar op
+        vldr    d13, [a1, #1 * 2*4]
+        vldr    d14, [a1, #2 * 2*4]
+        vldr    d15, [a1, #3 * 2*4]
+        @ BUTTERFLIES
+        vadd.f  s0, s18, s16
+        vadd.f  s1, s17, s19
+        vsub.f  s2, s17, s19
+        vsub.f  s3, s18, s16
+            vadd.f  s4, s21, s20
+            vsub.f  s5, s21, s20
+            vadd.f  s6, s22, s23
+            vsub.f  s7, s22, s23
+        vadd.f  s8, s0, s24         @ vector op
+        vstr    d0, [a1, #0 * 2*4]  @ transfer s0-s3 to s12-s15 via memory
+        vstr    d1, [a1, #1 * 2*4]
+        vldr    d6, [a1, #0 * 2*4]
+        vldr    d7, [a1, #1 * 2*4]
+            vadd.f  s1, s5, s6
+            vadd.f  s0, s7, s4
+            vsub.f  s2, s5, s6
+            vsub.f  s3, s7, s4
+        vsub.f  s12, s24, s12       @ vector op
+            vsub.f  s5, s29, s1
+            vsub.f  s4, s28, s0
+            vsub.f  s6, s30, s2
+            vsub.f  s7, s31, s3
+            vadd.f  s16, s0, s28    @ vector op
+        vstr    d6, [a1, #4 * 2*4]
+        vstr    d7, [a1, #6 * 2*4]
+        vstr    d4, [a1, #0 * 2*4]
+        vstr    d5, [a1, #2 * 2*4]
+             vstr    d2, [a1, #5 * 2*4]
+             vstr    d3, [a1, #7 * 2*4]
+.endm
+
+.macro macro_fft8_tail
+             vstr    d8, [a1, #1 * 2*4]
+             vstr    d9, [a1, #3 * 2*4]
+.endm
+
+function .Lfft8_internal_vfp
+        macro_fft8_head
+        macro_fft8_tail
+        bx      lr
+endfunc
+
+function fft8_vfp
+        ldr     a3, =0x03030000     @ RunFast mode, vector length 4, stride 1
+        fmrx    a2, FPSCR
+        fmxr    FPSCR, a3
+        vpush   {s16-s31}
+        mov     ip, lr
+        bl      .Lfft8_internal_vfp
+        vpop    {s16-s31}
+        fmxr    FPSCR, a2
+        bx      ip
+endfunc
+
+.align 3
+cos1pi4:    @ cos(1*pi/4) = sqrt(2)
+        .float  0.707106769084930419921875
+cos1pi8:    @ cos(1*pi/8) = sqrt(2+sqrt(2))/2
+        .float  0.92387950420379638671875
+cos3pi8:    @ cos(2*pi/8) = sqrt(2-sqrt(2))/2
+        .float  0.3826834261417388916015625
+
+function .Lfft16_internal_vfp
+        macro_fft8_head
+        @ FFT4(z+8)
+        vldr    d10, [a1, #8 * 2*4]
+        vldr    d12, [a1, #9 * 2*4]
+        vldr    d11, [a1, #10 * 2*4]
+        vldr    d13, [a1, #11 * 2*4]
+        macro_fft8_tail
+        vadd.f  s16, s20, s24   @ vector op
+            @ FFT4(z+12)
+            vldr    d4, [a1, #12 * 2*4]
+            vldr    d6, [a1, #13 * 2*4]
+            vldr    d5, [a1, #14 * 2*4]
+        vsub.f  s20, s20, s24   @ vector op
+            vldr    d7, [a1, #15 * 2*4]
+        vadd.f  s0, s16, s18
+        vsub.f  s4, s16, s18
+        vadd.f  s1, s17, s19
+        vsub.f  s5, s17, s19
+        vadd.f  s7, s21, s22
+        vsub.f  s3, s21, s22
+        vadd.f  s2, s20, s23
+        vsub.f  s6, s20, s23
+            vadd.f  s16, s8, s12    @ vector op
+        vstr    d0, [a1, #8 * 2*4]
+        vstr    d2, [a1, #10 * 2*4]
+        vstr    d1, [a1, #9 * 2*4]
+            vsub.f  s20, s8, s12
+        vstr    d3, [a1, #11 * 2*4]
+        @ TRANSFORM(z[2],z[6],z[10],z[14],cos1pi4,cos1pi4)
+        vldr    d12, [a1, #10 * 2*4]
+            vadd.f  s0, s16, s18
+            vadd.f  s1, s17, s19
+            vsub.f  s6, s16, s18
+            vsub.f  s7, s17, s19
+            vsub.f  s3, s21, s22
+            vadd.f  s2, s20, s23
+            vadd.f  s5, s21, s22
+            vsub.f  s4, s20, s23
+            vstr    d0, [a1, #12 * 2*4]
+        vmov    s0, s6
+          @ TRANSFORM(z[1],z[5],z[9],z[13],cos1pi8,cos3pi8)
+          vldr    d6, [a1, #9 * 2*4]
+            vstr    d1, [a1, #13 * 2*4]
+        vldr    d1, cos1pi4 @ s2 = cos1pi4, s3 = cos1pi8
+            vstr    d2, [a1, #15 * 2*4]
+          vldr    d7, [a1, #13 * 2*4]
+        vadd.f  s4, s25, s24
+        vsub.f  s5, s25, s24
+        vsub.f  s6, s0, s7
+        vadd.f  s7, s0, s7
+          vmul.f  s20, s12, s3  @ vector op
+            @ TRANSFORM(z[3],z[7],z[11],z[15],cos3pi8,cos1pi8)
+            vldr    d4, [a1, #11 * 2*4]
+            vldr    d5, [a1, #15 * 2*4]
+            vldr    s1, cos3pi8
+        vmul.f  s24, s4, s2     @ vector * scalar op
+          vmul.f  s28, s12, s1  @ vector * scalar op
+            vmul.f  s12, s8, s1 @ vector * scalar op
+          vadd.f  s4, s20, s29
+          vsub.f  s5, s21, s28
+          vsub.f  s6, s22, s31
+          vadd.f  s7, s23, s30
+            vmul.f  s8, s8, s3  @ vector * scalar op
+          vldr    d8, [a1, #1 * 2*4]
+          vldr    d9, [a1, #5 * 2*4]
+            vldr    d10, [a1, #3 * 2*4]
+            vldr    d11, [a1, #7 * 2*4]
+        vldr    d14, [a1, #2 * 2*4]
+          vadd.f  s0, s6, s4
+          vadd.f  s1, s5, s7
+          vsub.f  s2, s5, s7
+          vsub.f  s3, s6, s4
+            vadd.f  s4, s12, s9
+            vsub.f  s5, s13, s8
+            vsub.f  s6, s14, s11
+            vadd.f  s7, s15, s10
+          vadd.f  s12, s0, s16  @ vector op
+          vstr    d0, [a1, #1 * 2*4]
+          vstr    d1, [a1, #5 * 2*4]
+          vldr    d4, [a1, #1 * 2*4]
+          vldr    d5, [a1, #5 * 2*4]
+            vadd.f  s0, s6, s4
+            vadd.f  s1, s5, s7
+            vsub.f  s2, s5, s7
+            vsub.f  s3, s6, s4
+          vsub.f  s8, s16, s8   @ vector op
+          vstr    d6, [a1, #1 * 2*4]
+          vstr    d7, [a1, #5 * 2*4]
+        vldr    d15, [a1, #6 * 2*4]
+            vsub.f  s4, s20, s0
+            vsub.f  s5, s21, s1
+            vsub.f  s6, s22, s2
+            vsub.f  s7, s23, s3
+            vadd.f  s20, s0, s20    @ vector op
+          vstr    d4, [a1, #9 * 2*4]
+              @ TRANSFORM_ZERO(z[0],z[4],z[8],z[12])
+              vldr    d6, [a1, #8 * 2*4]
+          vstr    d5, [a1, #13 * 2*4]
+              vldr    d7, [a1, #12 * 2*4]
+          vstr    d2, [a1, #11 * 2*4]
+              vldr    d8, [a1, #0 * 2*4]
+          vstr    d3, [a1, #15 * 2*4]
+              vldr    d9, [a1, #4 * 2*4]
+        vadd.f  s0, s26, s24
+        vadd.f  s1, s25, s27
+        vsub.f  s2, s25, s27
+        vsub.f  s3, s26, s24
+              vadd.f  s4, s14, s12
+              vadd.f  s5, s13, s15
+              vsub.f  s6, s13, s15
+              vsub.f  s7, s14, s12
+        vadd.f  s8, s0, s28 @ vector op
+        vstr    d0, [a1, #3 * 2*4]
+        vstr    d1, [a1, #7 * 2*4]
+        vldr    d6, [a1, #3 * 2*4]
+        vldr    d7, [a1, #7 * 2*4]
+              vsub.f  s0, s16, s4
+              vsub.f  s1, s17, s5
+              vsub.f  s2, s18, s6
+              vsub.f  s3, s19, s7
+        vsub.f  s12, s28, s12       @ vector op
+              vadd.f  s16, s4, s16  @ vector op
+            vstr    d10, [a1, #3 * 2*4]
+            vstr    d11, [a1, #7 * 2*4]
+        vstr    d4, [a1, #2 * 2*4]
+        vstr    d5, [a1, #6 * 2*4]
+              vstr    d0, [a1, #8 * 2*4]
+              vstr    d1, [a1, #12 * 2*4]
+        vstr    d6, [a1, #10 * 2*4]
+        vstr    d7, [a1, #14 * 2*4]
+              vstr    d8, [a1, #0 * 2*4]
+              vstr    d9, [a1, #4 * 2*4]
+
+        bx      lr
+endfunc
+
+function ff_fft16_vfp, export=1
+        ldr     a3, =0x03030000     @ RunFast mode, vector length 4, stride 1
+        fmrx    a2, FPSCR
+        fmxr    FPSCR, a3
+        vpush   {s16-s31}
+        mov     ip, lr
+        bl      .Lfft16_internal_vfp
+        vpop    {s16-s31}
+        fmxr    FPSCR, a2
+        bx      ip
+endfunc
+
+.macro pass n, z0, z1, z2, z3
+        add     v6, v5, #4*2*\n
+        @ TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3])
+            @ TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1])
+                @ TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0])
+                    @ TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1])
+            vldr    d8, [\z2, #8*(o2+1)]        @ s16,s17
+            vldmdb  v6!, {s2}
+            vldr    d9, [\z3, #8*(o3+1)]        @ s18,s19
+            vldmia  v5!, {s0,s1}                @ s0 is unused
+        vldr    s7, [\z2, #8*o2]            @ t1
+            vmul.f  s20, s16, s2                @ vector * scalar
+        vldr    s0, [\z3, #8*o3]            @ t5
+        vldr    s6, [\z2, #8*o2+4]          @ t2
+        vldr    s3, [\z3, #8*o3+4]          @ t6
+            vmul.f  s16, s16, s1                @ vector * scalar
+        ldr     a4, =\n-1
+1:      add     \z0, \z0, #8*2
+ .if \n*4*2 >= 512
+        add     \z1, \z1, #8*2
+ .endif
+ .if \n*4*2 >= 256
+        add     \z2, \z2, #8*2
+ .endif
+ .if \n*4*2 >= 512
+        add     \z3, \z3, #8*2
+ .endif
+        @ up to 2 stalls (VFP vector issuing / waiting for s0)
+        @ depending upon whether this is the first iteration and
+        @ how many add instructions are inserted above
+        vadd.f  s4, s0, s7                  @ t5
+        vadd.f  s5, s6, s3                  @ t6
+        vsub.f  s6, s6, s3                  @ t4
+        vsub.f  s7, s0, s7                  @ t3
+        vldr    d6, [\z0, #8*0-8*2]         @ s12,s13
+            vadd.f  s0, s16, s21                @ t1
+        vldr    d7, [\z1, #8*o1-8*2]        @ s14,s15
+            vsub.f  s1, s18, s23                @ t5
+        vadd.f  s8, s4, s12                 @ vector + vector
+        @ stall (VFP vector issuing)
+        @ stall (VFP vector issuing)
+        @ stall (VFP vector issuing)
+        vsub.f  s4, s12, s4
+        vsub.f  s5, s13, s5
+        vsub.f  s6, s14, s6
+        vsub.f  s7, s15, s7
+            vsub.f  s2, s17, s20                @ t2
+            vadd.f  s3, s19, s22                @ t6
+        vstr    d4, [\z0, #8*0-8*2]         @ s8,s9
+        vstr    d5, [\z1, #8*o1-8*2]        @ s10,s11
+        @ stall (waiting for s5)
+        vstr    d2, [\z2, #8*o2-8*2]        @ s4,s5
+            vadd.f  s4, s1, s0                  @ t5
+        vstr    d3, [\z3, #8*o3-8*2]        @ s6,s7
+            vsub.f  s7, s1, s0                  @ t3
+            vadd.f  s5, s2, s3                  @ t6
+            vsub.f  s6, s2, s3                  @ t4
+            vldr    d6, [\z0, #8*1-8*2]         @ s12,s13
+            vldr    d7, [\z1, #8*(o1+1)-8*2]    @ s14,s15
+                vldr    d4, [\z2, #8*o2]            @ s8,s9
+                vldmdb  v6!, {s2,s3}
+                vldr    d5, [\z3, #8*o3]            @ s10,s11
+            vadd.f  s20, s4, s12                @ vector + vector
+                vldmia  v5!, {s0,s1}
+                    vldr    d8, [\z2, #8*(o2+1)]        @ s16,s17
+            @ stall (VFP vector issuing)
+            vsub.f  s4, s12, s4
+            vsub.f  s5, s13, s5
+            vsub.f  s6, s14, s6
+            vsub.f  s7, s15, s7
+                vmul.f  s12, s8, s3                 @ vector * scalar
+            vstr    d10, [\z0, #8*1-8*2]        @ s20,s21
+                    vldr    d9, [\z3, #8*(o3+1)]        @ s18,s19
+            vstr    d11, [\z1, #8*(o1+1)-8*2]   @ s22,s23
+                vmul.f  s8, s8, s0                  @ vector * scalar
+            vstr    d2, [\z2, #8*(o2+1)-8*2]    @ s4,s5
+            @ stall (waiting for s7)
+            vstr    d3, [\z3, #8*(o3+1)-8*2]    @ s6,s7
+                    vmul.f  s20, s16, s2                @ vector * scalar
+                @ stall (VFP vector issuing)
+                @ stall (VFP vector issuing)
+                @ stall (VFP vector issuing)
+                vadd.f  s7, s8, s13                 @ t1
+                vsub.f  s6, s9, s12                 @ t2
+                vsub.f  s0, s10, s15                @ t5
+                vadd.f  s3, s11, s14                @ t6
+                    vmul.f  s16, s16, s1                @ vector * scalar
+        subs    a4, a4, #1
+        bne     1b
+        @ What remains is identical to the first two indentations of
+        @ the above, but without the increment of z
+        vadd.f  s4, s0, s7                  @ t5
+        vadd.f  s5, s6, s3                  @ t6
+        vsub.f  s6, s6, s3                  @ t4
+        vsub.f  s7, s0, s7                  @ t3
+        vldr    d6, [\z0, #8*0]             @ s12,s13
+            vadd.f  s0, s16, s21                @ t1
+        vldr    d7, [\z1, #8*o1]            @ s14,s15
+            vsub.f  s1, s18, s23                @ t5
+        vadd.f  s8, s4, s12                 @ vector + vector
+        vsub.f  s4, s12, s4
+        vsub.f  s5, s13, s5
+        vsub.f  s6, s14, s6
+        vsub.f  s7, s15, s7
+            vsub.f  s2, s17, s20                @ t2
+            vadd.f  s3, s19, s22                @ t6
+        vstr    d4, [\z0, #8*0]             @ s8,s9
+        vstr    d5, [\z1, #8*o1]            @ s10,s11
+        vstr    d2, [\z2, #8*o2]            @ s4,s5
+            vadd.f  s4, s1, s0                  @ t5
+        vstr    d3, [\z3, #8*o3]            @ s6,s7
+            vsub.f  s7, s1, s0                  @ t3
+            vadd.f  s5, s2, s3                  @ t6
+            vsub.f  s6, s2, s3                  @ t4
+            vldr    d6, [\z0, #8*1]             @ s12,s13
+            vldr    d7, [\z1, #8*(o1+1)]        @ s14,s15
+            vadd.f  s20, s4, s12                @ vector + vector
+            vsub.f  s4, s12, s4
+            vsub.f  s5, s13, s5
+            vsub.f  s6, s14, s6
+            vsub.f  s7, s15, s7
+            vstr    d10, [\z0, #8*1]            @ s20,s21
+            vstr    d11, [\z1, #8*(o1+1)]       @ s22,s23
+            vstr    d2, [\z2, #8*(o2+1)]        @ s4,s5
+            vstr    d3, [\z3, #8*(o3+1)]        @ s6,s7
+.endm
+
+.macro  def_fft n, n2, n4
+function .Lfft\n\()_internal_vfp
+ .if \n >= 512
+        push    {v1-v6,lr}
+ .elseif \n >= 256
+        push    {v1-v2,v5-v6,lr}
+ .else
+        push    {v1,v5-v6,lr}
+ .endif
+        mov     v1, a1
+        bl      .Lfft\n2\()_internal_vfp
+        add     a1, v1, #8*(\n/4)*2
+        bl      .Lfft\n4\()_internal_vfp
+        movrelx v5, X(ff_cos_\n), a1
+        add     a1, v1, #8*(\n/4)*3
+        bl      .Lfft\n4\()_internal_vfp
+ .if \n >= 512
+  .set o1, 0*(\n/4/2)
+  .set o2, 0*(\n/4/2)
+  .set o3, 0*(\n/4/2)
+        add     v2, v1, #8*2*(\n/4/2)
+        add     v3, v1, #8*4*(\n/4/2)
+        add     v4, v1, #8*6*(\n/4/2)
+        pass    (\n/4/2), v1, v2, v3, v4
+        pop     {v1-v6,pc}
+ .elseif \n >= 256
+  .set o1, 2*(\n/4/2)
+  .set o2, 0*(\n/4/2)
+  .set o3, 2*(\n/4/2)
+        add     v2, v1, #8*4*(\n/4/2)
+        pass    (\n/4/2), v1, v1, v2, v2
+        pop     {v1-v2,v5-v6,pc}
+ .else
+  .set o1, 2*(\n/4/2)
+  .set o2, 4*(\n/4/2)
+  .set o3, 6*(\n/4/2)
+        pass    (\n/4/2), v1, v1, v1, v1
+        pop     {v1,v5-v6,pc}
+ .endif
+endfunc
+
+function fft\n\()_vfp
+        ldr     a3, =0x03030000 /* RunFast mode, vector length 4, stride 1 */
+        fmrx    a2, FPSCR
+        fmxr    FPSCR, a3
+        vpush   {s16-s31}
+        mov     ip, lr
+        bl      .Lfft\n\()_internal_vfp
+        vpop    {s16-s31}
+        fmxr    FPSCR, a2
+        bx      ip
+endfunc
+
+.ltorg
+.endm
+
+        def_fft    32,    16,     8
+        def_fft    64,    32,    16
+        def_fft   128,    64,    32
+        def_fft   256,   128,    64
+        def_fft   512,   256,   128
+        def_fft  1024,   512,   256
+        def_fft  2048,  1024,   512
+        def_fft  4096,  2048,  1024
+        def_fft  8192,  4096,  2048
+        def_fft 16384,  8192,  4096
+        def_fft 32768, 16384,  8192
+        def_fft 65536, 32768, 16384
diff --git a/media/ffvpx/libavcodec/arm/flacdsp_arm.S b/media/ffvpx/libavcodec/arm/flacdsp_arm.S
new file mode 100644
index 0000000000..f8861c5967
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/flacdsp_arm.S
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function flac_lpc_16_1_arm
+        ldr             r12, [sp]
+        push            {r4, lr}
+        ldr             r1,  [r1]
+        subs            r12, r12, #2
+        ldr             lr,  [r0], #4
+        beq             2f
+        it              lt
+        poplt           {r4, pc}
+1:
+        mul             r4,  lr,  r1
+        ldm             r0,  {r2, lr}
+        add_sh          r2,  r2,  r4,  asr r3
+        mul             r4,  r2,  r1
+        subs            r12, r12, #2
+        add_sh          lr,  lr,  r4,  asr r3
+        stm             r0!, {r2, lr}
+        bgt             1b
+        it              lt
+        poplt           {r4, pc}
+2:
+        mul             r4,  lr,  r1
+        ldr             r2,  [r0]
+        add_sh          r2,  r2,  r4,  asr r3
+        str             r2,  [r0]
+        pop             {r4, pc}
+endfunc
+
+function flac_lpc_16_2_arm
+        ldr             r12, [sp]
+        subs            r12, r12, r2
+        it              le
+        bxle            lr
+
+        push            {r4-r9, lr}
+        ldm             r0!, {r6, r7}
+        ldm             r1,  {r8, r9}
+        subs            r12, r12, #1
+        beq             2f
+1:
+        mul             r4,  r6,  r8
+        mul             r5,  r7,  r8
+        mla             r4,  r7,  r9,  r4
+        ldm             r0,  {r6, r7}
+        add_sh          r6,  r6,  r4,  asr r3
+        mla             r5,  r6,  r9,  r5
+        add_sh          r7,  r7,  r5,  asr r3
+        stm             r0!, {r6, r7}
+        subs            r12, r12, #2
+        bgt             1b
+        it              lt
+        poplt           {r4-r9, pc}
+2:
+        mul             r4,  r6,  r8
+        mla             r4,  r7,  r9,  r4
+        ldr             r5,  [r0]
+        add_sh          r5,  r5,  r4,  asr r3
+        str             r5,  [r0]
+        pop             {r4-r9, pc}
+endfunc
+
+function ff_flac_lpc_16_arm, export=1
+        cmp             r2,  #2
+        blt             flac_lpc_16_1_arm
+        beq             flac_lpc_16_2_arm
+
+        ldr             r12, [sp]
+        subs            r12, r12, r2
+        it              le
+        bxle            lr
+
+        push            {r4-r9, lr}
+
+        subs            r12, r12, #1
+        beq             3f
+1:
+        sub             lr,  r2,  #2
+        mov             r4,  #0
+        mov             r5,  #0
+
+        ldr             r7,  [r0], #4
+        ldr             r9,  [r1], #4
+2:
+        mla             r4,  r7,  r9,  r4
+        ldm             r0!, {r6, r7}
+        mla             r5,  r6,  r9,  r5
+        ldm             r1!, {r8, r9}
+        mla             r4,  r6,  r8,  r4
+        subs            lr,  lr,  #2
+        mla             r5,  r7,  r8,  r5
+        bgt             2b
+        blt             6f
+
+        mla             r4,  r7,  r9,  r4
+        ldr             r7,  [r0], #4
+        mla             r5,  r7,  r9,  r5
+        ldr             r9,  [r1], #4
+6:
+        mla             r4,  r7,  r9,  r4
+        ldm             r0,  {r6, r7}
+        add_sh          r6,  r6,  r4,  asr r3
+        mla             r5,  r6,  r9,  r5
+        add_sh          r7,  r7,  r5,  asr r3
+        stm             r0!, {r6, r7}
+        sub             r0,  r0,  r2,  lsl #2
+        sub             r1,  r1,  r2,  lsl #2
+
+        subs            r12, r12, #2
+        bgt             1b
+        it              lt
+        poplt           {r4-r9, pc}
+3:
+        mov             r4,  #0
+4:
+        ldr             r5,  [r1], #4
+        ldr             r6,  [r0], #4
+        mla             r4,  r5,  r6,  r4
+        subs            r2,  r2,  #1
+        bgt             4b
+        ldr             r5,  [r0]
+        add_sh          r5,  r5,  r4,  asr r3
+        str             r5,  [r0]
+        pop             {r4-r9, pc}
+endfunc
diff --git a/media/ffvpx/libavcodec/arm/flacdsp_init_arm.c b/media/ffvpx/libavcodec/arm/flacdsp_init_arm.c
new file mode 100644
index 0000000000..9962cc89f4
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/flacdsp_init_arm.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavcodec/flacdsp.h"
+
+void ff_flac_lpc_16_arm(int32_t *samples, const int coeffs[32], int order,
+                        int qlevel, int len);
+
+av_cold void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels)
+{
+    c->lpc16 = ff_flac_lpc_16_arm;
+}
diff --git a/media/ffvpx/libavcodec/arm/idct.h b/media/ffvpx/libavcodec/arm/idct.h
new file mode 100644
index 0000000000..6c79a69c5f
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/idct.h
@@ -0,0 +1,41 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ARM_IDCT_H
+#define AVCODEC_ARM_IDCT_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_j_rev_dct_arm(int16_t *data);
+
+void ff_simple_idct_arm(int16_t *data);
+
+void ff_simple_idct_armv5te(int16_t *data);
+void ff_simple_idct_put_armv5te(uint8_t *dest, ptrdiff_t line_size, int16_t *data);
+void ff_simple_idct_add_armv5te(uint8_t *dest, ptrdiff_t line_size, int16_t *data);
+
+void ff_simple_idct_armv6(int16_t *data);
+void ff_simple_idct_put_armv6(uint8_t *dest, ptrdiff_t line_size, int16_t *data);
+void ff_simple_idct_add_armv6(uint8_t *dest, ptrdiff_t line_size, int16_t *data);
+
+void ff_simple_idct_neon(int16_t *data);
+void ff_simple_idct_put_neon(uint8_t *dest, ptrdiff_t line_size, int16_t *data);
+void ff_simple_idct_add_neon(uint8_t *dest, ptrdiff_t line_size, int16_t *data);
+
+#endif /* AVCODEC_ARM_IDCT_H */
diff --git a/media/ffvpx/libavcodec/arm/idctdsp_arm.S b/media/ffvpx/libavcodec/arm/idctdsp_arm.S
new file mode 100644
index 0000000000..057eff9be8
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/idctdsp_arm.S
@@ -0,0 +1,120 @@
+@
+@ ARMv4-optimized IDCT functions
+@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
+@
+@ This file is part of FFmpeg.
+@
+@ FFmpeg is free software; you can redistribute it and/or
+@ modify it under the terms of the GNU Lesser General Public
+@ License as published by the Free Software Foundation; either
+@ version 2.1 of the License, or (at your option) any later version.
+@
+@ FFmpeg is distributed in the hope that it will be useful,
+@ but WITHOUT ANY WARRANTY; without even the implied warranty of
+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+@ Lesser General Public License for more details.
+@
+@ You should have received a copy of the GNU Lesser General Public
+@ License along with FFmpeg; if not, write to the Free Software
+@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+@
+
+#include "config.h"
+#include "libavutil/arm/asm.S"
+
+@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, ptrdiff_t stride)
+function ff_add_pixels_clamped_arm, export=1, align=5
+        push            {r4-r10}
+        mov             r10, #8
+1:
+        ldr             r4,  [r1]               /* load dest */
+        /* block[0] and block[1]*/
+        ldrsh           r5,  [r0]
+        ldrsh           r7,  [r0, #2]
+        and             r6,  r4,  #0xFF
+        and             r8,  r4,  #0xFF00
+        add             r6,  r6,  r5
+        add             r8,  r7,  r8,  lsr #8
+        mvn             r5,  r5
+        mvn             r7,  r7
+        tst             r6,  #0x100
+        it              ne
+        movne           r6,  r5,  lsr #24
+        tst             r8,  #0x100
+        it              ne
+        movne           r8,  r7,  lsr #24
+        mov             r9,  r6
+        ldrsh           r5,  [r0, #4]           /* moved form [A] */
+        orr             r9,  r9,  r8,  lsl #8
+        /* block[2] and block[3] */
+        /* [A] */
+        ldrsh           r7,  [r0, #6]
+        and             r6,  r4,  #0xFF0000
+        and             r8,  r4,  #0xFF000000
+        add             r6,  r5,  r6,  lsr #16
+        add             r8,  r7,  r8,  lsr #24
+        mvn             r5,  r5
+        mvn             r7,  r7
+        tst             r6,  #0x100
+        it              ne
+        movne           r6,  r5,  lsr #24
+        tst             r8,  #0x100
+        it              ne
+        movne           r8,  r7,  lsr #24
+        orr             r9,  r9,  r6,  lsl #16
+        ldr             r4,  [r1, #4]           /* moved form [B] */
+        orr             r9,  r9,  r8,  lsl #24
+        /* store dest */
+        ldrsh           r5,  [r0, #8]           /* moved form [C] */
+        str             r9,  [r1]
+
+        /* load dest */
+        /* [B] */
+        /* block[4] and block[5] */
+        /* [C] */
+        ldrsh           r7,  [r0, #10]
+        and             r6,  r4,  #0xFF
+        and             r8,  r4,  #0xFF00
+        add             r6,  r6,  r5
+        add             r8,  r7,  r8,  lsr #8
+        mvn             r5,  r5
+        mvn             r7,  r7
+        tst             r6,  #0x100
+        it              ne
+        movne           r6,  r5,  lsr #24
+        tst             r8,  #0x100
+        it              ne
+        movne           r8,  r7,  lsr #24
+        mov             r9,  r6
+        ldrsh           r5,  [r0, #12]          /* moved from [D] */
+        orr             r9,  r9,  r8,  lsl #8
+        /* block[6] and block[7] */
+        /* [D] */
+        ldrsh           r7,  [r0, #14]
+        and             r6,  r4,  #0xFF0000
+        and             r8,  r4,  #0xFF000000
+        add             r6,  r5,  r6,  lsr #16
+        add             r8,  r7,  r8,  lsr #24
+        mvn             r5,  r5
+        mvn             r7,  r7
+        tst             r6,  #0x100
+        it              ne
+        movne           r6,  r5,  lsr #24
+        tst             r8,  #0x100
+        it              ne
+        movne           r8,  r7,  lsr #24
+        orr             r9,  r9,  r6,  lsl #16
+        add             r0,  r0,  #16           /* moved from [E] */
+        orr             r9,  r9,  r8,  lsl #24
+        subs            r10, r10, #1            /* moved from [F] */
+        /* store dest */
+        str             r9,  [r1, #4]
+
+        /* [E] */
+        /* [F] */
+        add             r1,  r1,  r2
+        bne             1b
+
+        pop             {r4-r10}
+        bx              lr
+endfunc
diff --git a/media/ffvpx/libavcodec/arm/idctdsp_arm.h b/media/ffvpx/libavcodec/arm/idctdsp_arm.h
new file mode 100644
index 0000000000..d7bc5cd02a
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/idctdsp_arm.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ARM_IDCTDSP_ARM_H
+#define AVCODEC_ARM_IDCTDSP_ARM_H
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/idctdsp.h"
+
+void ff_idctdsp_init_armv5te(IDCTDSPContext *c, AVCodecContext *avctx,
+                             unsigned high_bit_depth);
+void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx,
+                           unsigned high_bit_depth);
+void ff_idctdsp_init_neon(IDCTDSPContext *c, AVCodecContext *avctx,
+                          unsigned high_bit_depth);
+
+#endif /* AVCODEC_ARM_IDCTDSP_ARM_H */
diff --git a/media/ffvpx/libavcodec/arm/idctdsp_armv6.S b/media/ffvpx/libavcodec/arm/idctdsp_armv6.S
new file mode 100644
index 0000000000..a6e77d6da1
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/idctdsp_armv6.S
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_add_pixels_clamped_armv6, export=1
+        push            {r4-r8,lr}
+        mov             r3,  #8
+1:
+        ldm             r0!, {r4,r5,r12,lr}
+        ldrd            r6,  r7,  [r1]
+        pkhbt           r8,  r4,  r5,  lsl #16
+        pkhtb           r5,  r5,  r4,  asr #16
+        pkhbt           r4,  r12, lr,  lsl #16
+        pkhtb           lr,  lr,  r12, asr #16
+        pld             [r1, r2]
+        uxtab16         r8,  r8,  r6
+        uxtab16         r5,  r5,  r6,  ror #8
+        uxtab16         r4,  r4,  r7
+        uxtab16         lr,  lr,  r7,  ror #8
+        usat16          r8,  #8,  r8
+        usat16          r5,  #8,  r5
+        usat16          r4,  #8,  r4
+        usat16          lr,  #8,  lr
+        orr             r6,  r8,  r5,  lsl #8
+        orr             r7,  r4,  lr,  lsl #8
+        subs            r3,  r3,  #1
+        strd_post       r6,  r7,  r1,  r2
+        bgt             1b
+        pop             {r4-r8,pc}
+endfunc
diff --git a/media/ffvpx/libavcodec/arm/idctdsp_init_arm.c b/media/ffvpx/libavcodec/arm/idctdsp_init_arm.c
new file mode 100644
index 0000000000..ebc90e4b49
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/idctdsp_init_arm.c
@@ -0,0 +1,94 @@
+/*
+ * ARM-optimized IDCT functions
+ * Copyright (c) 2001 Lionel Ulmer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/idctdsp.h"
+#include "idct.h"
+#include "idctdsp_arm.h"
+
+void ff_add_pixels_clamped_arm(const int16_t *block, uint8_t *dest,
+                               ptrdiff_t line_size);
+
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+ * converted */
+static void j_rev_dct_arm_put(uint8_t *dest, ptrdiff_t line_size,
+                              int16_t *block)
+{
+    ff_j_rev_dct_arm(block);
+    ff_put_pixels_clamped_c(block, dest, line_size);
+}
+
+static void j_rev_dct_arm_add(uint8_t *dest, ptrdiff_t line_size,
+                              int16_t *block)
+{
+    ff_j_rev_dct_arm(block);
+    ff_add_pixels_clamped_arm(block, dest, line_size);
+}
+
+static void simple_idct_arm_put(uint8_t *dest, ptrdiff_t line_size,
+                                int16_t *block)
+{
+    ff_simple_idct_arm(block);
+    ff_put_pixels_clamped_c(block, dest, line_size);
+}
+
+static void simple_idct_arm_add(uint8_t *dest, ptrdiff_t line_size,
+                                int16_t *block)
+{
+    ff_simple_idct_arm(block);
+    ff_add_pixels_clamped_arm(block, dest, line_size);
+}
+
+av_cold void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx,
+                                 unsigned high_bit_depth)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (!avctx->lowres && !high_bit_depth) {
+        if ((avctx->idct_algo == FF_IDCT_AUTO && !(avctx->flags & AV_CODEC_FLAG_BITEXACT)) ||
+            avctx->idct_algo == FF_IDCT_ARM) {
+            c->idct_put  = j_rev_dct_arm_put;
+            c->idct_add  = j_rev_dct_arm_add;
+            c->idct      = ff_j_rev_dct_arm;
+            c->perm_type = FF_IDCT_PERM_LIBMPEG2;
+        } else if (avctx->idct_algo == FF_IDCT_SIMPLEARM) {
+            c->idct_put  = simple_idct_arm_put;
+            c->idct_add  = simple_idct_arm_add;
+            c->idct      = ff_simple_idct_arm;
+            c->perm_type = FF_IDCT_PERM_NONE;
+        }
+    }
+
+    c->add_pixels_clamped = ff_add_pixels_clamped_arm;
+
+    if (have_armv5te(cpu_flags))
+        ff_idctdsp_init_armv5te(c, avctx, high_bit_depth);
+    if (have_armv6(cpu_flags))
+        ff_idctdsp_init_armv6(c, avctx, high_bit_depth);
+    if (have_neon(cpu_flags))
+        ff_idctdsp_init_neon(c, avctx, high_bit_depth);
+}
diff --git a/media/ffvpx/libavcodec/arm/idctdsp_init_armv5te.c b/media/ffvpx/libavcodec/arm/idctdsp_init_armv5te.c
new file mode 100644
index 0000000000..3d881e1f18
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/idctdsp_init_armv5te.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/idctdsp.h"
+#include "idct.h"
+#include "idctdsp_arm.h"
+
+av_cold void ff_idctdsp_init_armv5te(IDCTDSPContext *c, AVCodecContext *avctx,
+                                     unsigned high_bit_depth)
+{
+    if (!avctx->lowres && !high_bit_depth &&
+        (avctx->idct_algo == FF_IDCT_AUTO ||
+         avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+         avctx->idct_algo == FF_IDCT_SIMPLEARMV5TE)) {
+        c->idct_put  = ff_simple_idct_put_armv5te;
+        c->idct_add  = ff_simple_idct_add_armv5te;
+        c->idct      = ff_simple_idct_armv5te;
+        c->perm_type = FF_IDCT_PERM_NONE;
+    }
+}
diff --git a/media/ffvpx/libavcodec/arm/idctdsp_init_armv6.c b/media/ffvpx/libavcodec/arm/idctdsp_init_armv6.c
new file mode 100644
index 0000000000..edf3070e15
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/idctdsp_init_armv6.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/idctdsp.h"
+#include "idct.h"
+#include "idctdsp_arm.h"
+
+void ff_add_pixels_clamped_armv6(const int16_t *block, uint8_t *pixels,
+                                 ptrdiff_t line_size);
+
+av_cold void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx,
+                                   unsigned high_bit_depth)
+{
+    if (!avctx->lowres && !high_bit_depth) {
+        if ((avctx->idct_algo == FF_IDCT_AUTO && !(avctx->flags & AV_CODEC_FLAG_BITEXACT)) ||
+            avctx->idct_algo == FF_IDCT_SIMPLEARMV6) {
+            c->idct_put  = ff_simple_idct_put_armv6;
+            c->idct_add  = ff_simple_idct_add_armv6;
+            c->idct      = ff_simple_idct_armv6;
+            c->perm_type = FF_IDCT_PERM_LIBMPEG2;
+        }
+    }
+    c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
+}
diff --git a/media/ffvpx/libavcodec/arm/idctdsp_init_neon.c b/media/ffvpx/libavcodec/arm/idctdsp_init_neon.c
new file mode 100644
index 0000000000..b70c5b0d44
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/idctdsp_init_neon.c
@@ -0,0 +1,51 @@
+/*
+ * ARM-NEON-optimized IDCT functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/idctdsp.h"
+#include "idct.h"
+#include "idctdsp_arm.h"
+
+void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, ptrdiff_t);
+void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, ptrdiff_t);
+void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, ptrdiff_t);
+
+av_cold void ff_idctdsp_init_neon(IDCTDSPContext *c, AVCodecContext *avctx,
+                                  unsigned high_bit_depth)
+{
+    if (!avctx->lowres && !high_bit_depth) {
+        if (avctx->idct_algo == FF_IDCT_AUTO ||
+            avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+            avctx->idct_algo == FF_IDCT_SIMPLENEON) {
+            c->idct_put  = ff_simple_idct_put_neon;
+            c->idct_add  = ff_simple_idct_add_neon;
+            c->idct      = ff_simple_idct_neon;
+            c->perm_type = FF_IDCT_PERM_PARTTRANS;
+        }
+    }
+
+    c->add_pixels_clamped        = ff_add_pixels_clamped_neon;
+    c->put_pixels_clamped        = ff_put_pixels_clamped_neon;
+    c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
+}
diff --git a/media/ffvpx/libavcodec/arm/idctdsp_neon.S b/media/ffvpx/libavcodec/arm/idctdsp_neon.S
new file mode 100644
index 0000000000..1911a33468
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/idctdsp_neon.S
@@ -0,0 +1,128 @@
+/*
+ * ARM-NEON-optimized IDCT functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_put_pixels_clamped_neon, export=1
+        vld1.16         {d16-d19}, [r0,:128]!
+        vqmovun.s16     d0, q8
+        vld1.16         {d20-d23}, [r0,:128]!
+        vqmovun.s16     d1, q9
+        vld1.16         {d24-d27}, [r0,:128]!
+        vqmovun.s16     d2, q10
+        vld1.16         {d28-d31}, [r0,:128]!
+        vqmovun.s16     d3, q11
+        vst1.8          {d0},      [r1,:64], r2
+        vqmovun.s16     d4, q12
+        vst1.8          {d1},      [r1,:64], r2
+        vqmovun.s16     d5, q13
+        vst1.8          {d2},      [r1,:64], r2
+        vqmovun.s16     d6, q14
+        vst1.8          {d3},      [r1,:64], r2
+        vqmovun.s16     d7, q15
+        vst1.8          {d4},      [r1,:64], r2
+        vst1.8          {d5},      [r1,:64], r2
+        vst1.8          {d6},      [r1,:64], r2
+        vst1.8          {d7},      [r1,:64], r2
+        bx              lr
+endfunc
+
+function ff_put_signed_pixels_clamped_neon, export=1
+        vmov.u8         d31, #128
+        vld1.16         {d16-d17}, [r0,:128]!
+        vqmovn.s16      d0, q8
+        vld1.16         {d18-d19}, [r0,:128]!
+        vqmovn.s16      d1, q9
+        vld1.16         {d16-d17}, [r0,:128]!
+        vqmovn.s16      d2, q8
+        vld1.16         {d18-d19}, [r0,:128]!
+        vadd.u8         d0, d0, d31
+        vld1.16         {d20-d21}, [r0,:128]!
+        vadd.u8         d1, d1, d31
+        vld1.16         {d22-d23}, [r0,:128]!
+        vadd.u8         d2, d2, d31
+        vst1.8          {d0},      [r1,:64], r2
+        vqmovn.s16      d3, q9
+        vst1.8          {d1},      [r1,:64], r2
+        vqmovn.s16      d4, q10
+        vst1.8          {d2},      [r1,:64], r2
+        vqmovn.s16      d5, q11
+        vld1.16         {d24-d25}, [r0,:128]!
+        vadd.u8         d3, d3, d31
+        vld1.16         {d26-d27}, [r0,:128]!
+        vadd.u8         d4, d4, d31
+        vadd.u8         d5, d5, d31
+        vst1.8          {d3},      [r1,:64], r2
+        vqmovn.s16      d6, q12
+        vst1.8          {d4},      [r1,:64], r2
+        vqmovn.s16      d7, q13
+        vst1.8          {d5},      [r1,:64], r2
+        vadd.u8         d6, d6, d31
+        vadd.u8         d7, d7, d31
+        vst1.8          {d6},      [r1,:64], r2
+        vst1.8          {d7},      [r1,:64], r2
+        bx              lr
+endfunc
+
+function ff_add_pixels_clamped_neon, export=1
+        mov             r3, r1
+        vld1.8          {d16},   [r1,:64], r2
+        vld1.16         {d0-d1}, [r0,:128]!
+        vaddw.u8        q0, q0, d16
+        vld1.8          {d17},   [r1,:64], r2
+        vld1.16         {d2-d3}, [r0,:128]!
+        vqmovun.s16     d0, q0
+        vld1.8          {d18},   [r1,:64], r2
+        vaddw.u8        q1, q1, d17
+        vld1.16         {d4-d5}, [r0,:128]!
+        vaddw.u8        q2, q2, d18
+        vst1.8          {d0},    [r3,:64], r2
+        vqmovun.s16     d2, q1
+        vld1.8          {d19},   [r1,:64], r2
+        vld1.16         {d6-d7}, [r0,:128]!
+        vaddw.u8        q3, q3, d19
+        vqmovun.s16     d4, q2
+        vst1.8          {d2},    [r3,:64], r2
+        vld1.8          {d16},   [r1,:64], r2
+        vqmovun.s16     d6, q3
+        vld1.16         {d0-d1}, [r0,:128]!
+        vaddw.u8        q0, q0, d16
+        vst1.8          {d4},    [r3,:64], r2
+        vld1.8          {d17},   [r1,:64], r2
+        vld1.16         {d2-d3}, [r0,:128]!
+        vaddw.u8        q1, q1, d17
+        vst1.8          {d6},    [r3,:64], r2
+        vqmovun.s16     d0, q0
+        vld1.8          {d18},   [r1,:64], r2
+        vld1.16         {d4-d5}, [r0,:128]!
+        vaddw.u8        q2, q2, d18
+        vst1.8          {d0},    [r3,:64], r2
+        vqmovun.s16     d2, q1
+        vld1.8          {d19},   [r1,:64], r2
+        vqmovun.s16     d4, q2
+        vld1.16         {d6-d7}, [r0,:128]!
+        vaddw.u8        q3, q3, d19
+        vst1.8          {d2},    [r3,:64], r2
+        vqmovun.s16     d6, q3
+        vst1.8          {d4},    [r3,:64], r2
+        vst1.8          {d6},    [r3,:64], r2
+        bx              lr
+endfunc
diff --git a/media/ffvpx/libavcodec/arm/jrevdct_arm.S b/media/ffvpx/libavcodec/arm/jrevdct_arm.S
new file mode 100644
index 0000000000..f951e2af34
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/jrevdct_arm.S
@@ -0,0 +1,383 @@
+/*
+   C-like prototype :
+        void j_rev_dct_arm(DCTBLOCK data)
+
+   With DCTBLOCK being a pointer to an array of 64 'signed shorts'
+
+   Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+   COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+   IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+
+#include "libavutil/arm/asm.S"
+
+#define FIX_0_298631336 2446
+#define FIX_0_541196100 4433
+#define FIX_0_765366865 6270
+#define FIX_1_175875602 9633
+#define FIX_1_501321110 12299
+#define FIX_2_053119869 16819
+#define FIX_3_072711026 25172
+#define FIX_M_0_390180644 -3196
+#define FIX_M_0_899976223 -7373
+#define FIX_M_1_847759065 -15137
+#define FIX_M_1_961570560 -16069
+#define FIX_M_2_562915447 -20995
+#define FIX_0xFFFF 0xFFFF
+
+#define FIX_0_298631336_ID      0
+#define FIX_0_541196100_ID      4
+#define FIX_0_765366865_ID      8
+#define FIX_1_175875602_ID     12
+#define FIX_1_501321110_ID     16
+#define FIX_2_053119869_ID     20
+#define FIX_3_072711026_ID     24
+#define FIX_M_0_390180644_ID   28
+#define FIX_M_0_899976223_ID   32
+#define FIX_M_1_847759065_ID   36
+#define FIX_M_1_961570560_ID   40
+#define FIX_M_2_562915447_ID   44
+#define FIX_0xFFFF_ID          48
+
+function ff_j_rev_dct_arm, export=1
+        push {r0, r4 - r11, lr}
+
+        mov lr, r0                      @ lr = pointer to the current row
+        mov r12, #8                     @ r12 = row-counter
+        movrel r11, const_array         @ r11 = base pointer to the constants array
+row_loop:
+        ldrsh r0, [lr, # 0]             @ r0 = 'd0'
+        ldrsh r2, [lr, # 2]             @ r2 = 'd2'
+
+        @ Optimization for row that have all items except the first set to 0
+        @ (this works as the int16_t are always 4-byte aligned)
+        ldr r5, [lr, # 0]
+        ldr r6, [lr, # 4]
+        ldr r3, [lr, # 8]
+        ldr r4, [lr, #12]
+        orr r3, r3, r4
+        orr r3, r3, r6
+        orrs r5, r3, r5
+        beq end_of_row_loop             @ nothing to be done as ALL of them are '0'
+        orrs r3, r3, r2
+        beq empty_row
+
+        ldrsh r1, [lr, # 8]             @ r1 = 'd1'
+        ldrsh r4, [lr, # 4]             @ r4 = 'd4'
+        ldrsh r6, [lr, # 6]             @ r6 = 'd6'
+
+        ldr r3, [r11, #FIX_0_541196100_ID]
+        add r7, r2, r6
+        ldr r5, [r11, #FIX_M_1_847759065_ID]
+        mul r7, r3, r7                      @ r7 = z1
+        ldr r3, [r11, #FIX_0_765366865_ID]
+        mla r6, r5, r6, r7                  @ r6 = tmp2
+        add r5, r0, r4                      @ r5 = tmp0
+        mla r2, r3, r2, r7                  @ r2 = tmp3
+        sub r3, r0, r4                      @ r3 = tmp1
+
+        add r0, r2, r5, lsl #13             @ r0 = tmp10
+        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
+        add r4, r6, r3, lsl #13             @ r4 = tmp11
+        rsb r3, r6, r3, lsl #13             @ r3 = tmp12
+
+        push {r0, r2, r3, r4} @ save on the stack tmp10, tmp13, tmp12, tmp11
+
+        ldrsh r3, [lr, #10]             @ r3 = 'd3'
+        ldrsh r5, [lr, #12]             @ r5 = 'd5'
+        ldrsh r7, [lr, #14]             @ r7 = 'd7'
+
+        add r0, r3, r5                        @ r0 = 'z2'
+        add r2, r1, r7                  @ r2 = 'z1'
+        add r4, r3, r7                  @ r4 = 'z3'
+        add r6, r1, r5                  @ r6 = 'z4'
+        ldr r9, [r11, #FIX_1_175875602_ID]
+        add r8, r4, r6                  @ r8 = z3 + z4
+        ldr r10, [r11, #FIX_M_0_899976223_ID]
+        mul r8, r9, r8                  @ r8 = 'z5'
+        ldr r9, [r11, #FIX_M_2_562915447_ID]
+        mul r2, r10, r2                 @ r2 = 'z1'
+        ldr r10, [r11, #FIX_M_1_961570560_ID]
+        mul r0, r9, r0                  @ r0 = 'z2'
+        ldr r9, [r11, #FIX_M_0_390180644_ID]
+        mla r4, r10, r4, r8             @ r4 = 'z3'
+        ldr r10, [r11, #FIX_0_298631336_ID]
+        mla r6, r9, r6, r8              @ r6 = 'z4'
+        ldr r9, [r11, #FIX_2_053119869_ID]
+        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
+        ldr r10, [r11, #FIX_3_072711026_ID]
+        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
+        ldr r9, [r11, #FIX_1_501321110_ID]
+        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
+        add r7, r7, r4                  @ r7 = tmp0
+        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
+        add r5,        r5, r6                  @ r5 = tmp1
+        add r3, r3, r4                  @ r3 = tmp2
+        add r1, r1, r6                  @ r1 = tmp3
+
+        pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
+                             @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
+
+        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
+        add r8, r0, r1
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 0]
+
+        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
+        sub r8, r0, r1
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, #14]
+
+        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
+        add r8, r6, r3
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 2]
+
+        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
+        sub r8, r6, r3
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, #12]
+
+        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
+        add r8, r4, r5
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 4]
+
+        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
+        sub r8, r4, r5
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, #10]
+
+        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
+        add r8, r2, r7
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 6]
+
+        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
+        sub r8, r2, r7
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 8]
+
+        @ End of row loop
+        add lr, lr, #16
+        subs r12, r12, #1
+        bne row_loop
+        beq start_column_loop
+
+empty_row:
+        ldr r1, [r11, #FIX_0xFFFF_ID]
+        mov r0, r0, lsl #2
+        and r0, r0, r1
+        add r0, r0, r0, lsl #16
+        str r0, [lr, # 0]
+        str r0, [lr, # 4]
+        str r0, [lr, # 8]
+        str r0, [lr, #12]
+
+end_of_row_loop:
+        @ End of loop
+        add lr, lr, #16
+        subs r12, r12, #1
+        bne row_loop
+
+start_column_loop:
+        @ Start of column loop
+        pop {lr}
+        mov r12, #8
+column_loop:
+        ldrsh r0, [lr, #( 0*8)]             @ r0 = 'd0'
+        ldrsh r2, [lr, #( 4*8)]             @ r2 = 'd2'
+        ldrsh r4, [lr, #( 8*8)]             @ r4 = 'd4'
+        ldrsh r6, [lr, #(12*8)]             @ r6 = 'd6'
+
+        ldr r3, [r11, #FIX_0_541196100_ID]
+        add r1, r2, r6
+        ldr r5, [r11, #FIX_M_1_847759065_ID]
+        mul r1, r3, r1                      @ r1 = z1
+        ldr r3, [r11, #FIX_0_765366865_ID]
+        mla r6, r5, r6, r1                  @ r6 = tmp2
+        add r5, r0, r4                      @ r5 = tmp0
+        mla r2, r3, r2, r1                  @ r2 = tmp3
+        sub r3, r0, r4                      @ r3 = tmp1
+
+        add r0, r2, r5, lsl #13             @ r0 = tmp10
+        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
+        add r4, r6, r3, lsl #13             @ r4 = tmp11
+        rsb r6, r6, r3, lsl #13             @ r6 = tmp12
+
+        ldrsh r1, [lr, #( 2*8)]             @ r1 = 'd1'
+        ldrsh r3, [lr, #( 6*8)]             @ r3 = 'd3'
+        ldrsh r5, [lr, #(10*8)]             @ r5 = 'd5'
+        ldrsh r7, [lr, #(14*8)]             @ r7 = 'd7'
+
+        @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
+        orr r9, r1, r3
+        orr r10, r5, r7
+        orrs r10, r9, r10
+        beq empty_odd_column
+
+        push {r0, r2, r4, r6} @ save on the stack tmp10, tmp13, tmp12, tmp11
+
+        add r0, r3, r5                  @ r0 = 'z2'
+        add r2, r1, r7                  @ r2 = 'z1'
+        add r4, r3, r7                  @ r4 = 'z3'
+        add r6, r1, r5                  @ r6 = 'z4'
+        ldr r9, [r11, #FIX_1_175875602_ID]
+        add r8, r4, r6
+        ldr r10, [r11, #FIX_M_0_899976223_ID]
+        mul r8, r9, r8                  @ r8 = 'z5'
+        ldr r9, [r11, #FIX_M_2_562915447_ID]
+        mul r2, r10, r2                 @ r2 = 'z1'
+        ldr r10, [r11, #FIX_M_1_961570560_ID]
+        mul r0, r9, r0                  @ r0 = 'z2'
+        ldr r9, [r11, #FIX_M_0_390180644_ID]
+        mla r4, r10, r4, r8             @ r4 = 'z3'
+        ldr r10, [r11, #FIX_0_298631336_ID]
+        mla r6, r9, r6, r8              @ r6 = 'z4'
+        ldr r9, [r11, #FIX_2_053119869_ID]
+        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
+        ldr r10, [r11, #FIX_3_072711026_ID]
+        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
+        ldr r9, [r11, #FIX_1_501321110_ID]
+        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
+        add r7, r7, r4                  @ r7 = tmp0
+        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
+        add r5,        r5, r6                  @ r5 = tmp1
+        add r3, r3, r4                  @ r3 = tmp2
+        add r1, r1, r6                  @ r1 = tmp3
+
+        pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
+                             @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
+
+        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
+        add r8, r0, r1
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 0*8)]
+
+        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
+        sub r8, r0, r1
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #(14*8)]
+
+        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
+        add r8, r4, r3
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 2*8)]
+
+        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
+        sub r8, r4, r3
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #(12*8)]
+
+        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
+        add r8, r6, r5
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 4*8)]
+
+        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
+        sub r8, r6, r5
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #(10*8)]
+
+        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
+        add r8, r2, r7
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 6*8)]
+
+        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
+        sub r8, r2, r7
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 8*8)]
+
+        @ End of row loop
+        add lr, lr, #2
+        subs r12, r12, #1
+        bne column_loop
+        beq the_end
+
+empty_odd_column:
+        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
+        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
+        add r0, r0, #(1<<17)
+        mov r0, r0, asr #18
+        strh r0, [lr, #( 0*8)]
+        strh r0, [lr, #(14*8)]
+
+        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
+        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
+        add r4, r4, #(1<<17)
+        mov r4, r4, asr #18
+        strh r4, [lr, #( 2*8)]
+        strh r4, [lr, #(12*8)]
+
+        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
+        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
+        add r6, r6, #(1<<17)
+        mov r6, r6, asr #18
+        strh r6, [lr, #( 4*8)]
+        strh r6, [lr, #(10*8)]
+
+        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
+        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
+        add r2, r2, #(1<<17)
+        mov r2, r2, asr #18
+        strh r2, [lr, #( 6*8)]
+        strh r2, [lr, #( 8*8)]
+
+        @ End of row loop
+        add lr, lr, #2
+        subs r12, r12, #1
+        bne column_loop
+
+the_end:
+        @ The end....
+        pop {r4 - r11, pc}
+endfunc
+
+const const_array
+        .word FIX_0_298631336
+        .word FIX_0_541196100
+        .word FIX_0_765366865
+        .word FIX_1_175875602
+        .word FIX_1_501321110
+        .word FIX_2_053119869
+        .word FIX_3_072711026
+        .word FIX_M_0_390180644
+        .word FIX_M_0_899976223
+        .word FIX_M_1_847759065
+        .word FIX_M_1_961570560
+        .word FIX_M_2_562915447
+        .word FIX_0xFFFF
+endconst
diff --git a/media/ffvpx/libavcodec/arm/mathops.h b/media/ffvpx/libavcodec/arm/mathops.h
new file mode 100644
index 0000000000..dc57c5571c
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/mathops.h
@@ -0,0 +1,108 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ARM_MATHOPS_H
+#define AVCODEC_ARM_MATHOPS_H
+
+#include <stdint.h>
+#include "config.h"
+#include "libavutil/common.h"
+
+#if HAVE_INLINE_ASM
+
+#if HAVE_ARMV6_INLINE
+#define MULH MULH
+static inline av_const int MULH(int a, int b)
+{
+    int r;
+    __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
+    return r;
+}
+
+#define FASTDIV FASTDIV
+static av_always_inline av_const int FASTDIV(int a, int b)
+{
+    int r;
+    __asm__ ("cmp     %2, #2               \n\t"
+             "ldr     %0, [%3, %2, lsl #2] \n\t"
+             "ite     le                   \n\t"
+             "lsrle   %0, %1, #1           \n\t"
+             "smmulgt %0, %0, %1           \n\t"
+             : "=&r"(r) : "r"(a), "r"(b), "r"(ff_inverse) : "cc");
+    return r;
+}
+
+#else /* HAVE_ARMV6_INLINE */
+
+#define FASTDIV FASTDIV
+static av_always_inline av_const int FASTDIV(int a, int b)
+{
+    int r, t;
+    __asm__ ("umull %1, %0, %2, %3"
+             : "=&r"(r), "=&r"(t) : "r"(a), "r"(ff_inverse[b]));
+    return r;
+}
+#endif
+
+#define MLS64(d, a, b) MAC64(d, -(a), b)
+
+#if HAVE_ARMV5TE_INLINE
+
+/* signed 16x16 -> 32 multiply add accumulate */
+#   define MAC16(rt, ra, rb)                                            \
+    __asm__ ("smlabb %0, %1, %2, %0" : "+r"(rt) : "r"(ra), "r"(rb));
+
+/* signed 16x16 -> 32 multiply */
+#   define MUL16 MUL16
+static inline av_const int MUL16(int ra, int rb)
+{
+    int rt;
+    __asm__ ("smulbb %0, %1, %2" : "=r"(rt) : "r"(ra), "r"(rb));
+    return rt;
+}
+
+#endif
+
+#define mid_pred mid_pred
+static inline av_const int mid_pred(int a, int b, int c)
+{
+    int m;
+    __asm__ (
+        "mov   %0, %2  \n\t"
+        "cmp   %1, %2  \n\t"
+        "itt   gt      \n\t"
+        "movgt %0, %1  \n\t"
+        "movgt %1, %2  \n\t"
+        "cmp   %1, %3  \n\t"
+        "it    le      \n\t"
+        "movle %1, %3  \n\t"
+        "cmp   %0, %1  \n\t"
+        "it    gt      \n\t"
+        "movgt %0, %1  \n\t"
+        : "=&r"(m), "+r"(a)
+        : "r"(b), "r"(c)
+        : "cc");
+    return m;
+}
+
+#endif /* HAVE_INLINE_ASM */
+
+#endif /* AVCODEC_ARM_MATHOPS_H */
diff --git a/media/ffvpx/libavcodec/arm/moz.build b/media/ffvpx/libavcodec/arm/moz.build
new file mode 100644
index 0000000000..dafeab21d8
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/moz.build
@@ -0,0 +1,33 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+SOURCES += [
+    'fft_init_arm.c',
+    'fft_neon.S',
+    'fft_vfp.S',
+    'flacdsp_arm.S',
+    'flacdsp_init_arm.c',
+    'idctdsp_arm.S',
+    'idctdsp_armv6.S',
+    'idctdsp_init_arm.c',
+    'idctdsp_init_armv5te.c',
+    'idctdsp_init_armv6.c',
+    'idctdsp_init_neon.c',
+    'idctdsp_neon.S',
+    'jrevdct_arm.S',
+    'mpegaudiodsp_fixed_armv6.S',
+    'mpegaudiodsp_init_arm.c',
+    'rdft_init_arm.c',
+    'rdft_neon.S',
+    'simple_idct_arm.S',
+    'simple_idct_armv5te.S',
+    'simple_idct_armv6.S',
+    'simple_idct_neon.S',
+]
+
+FINAL_LIBRARY = 'mozavcodec'
+
+include('/media/ffvpx/ffvpxcommon.mozbuild')
diff --git a/media/ffvpx/libavcodec/arm/mpegaudiodsp_fixed_armv6.S b/media/ffvpx/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
new file mode 100644
index 0000000000..977abb6939
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+.macro  skip            args:vararg
+.endm
+
+.macro  sum8            lo,  hi,  w, p, t1, t2, t3, t4, rsb=skip, offs=0
+        ldr             \t1, [\w, #4*\offs]
+        ldr             \t2, [\p, #4]!
+        \rsb            \t1, \t1, #0
+  .irpc i, 135
+        ldr             \t3, [\w, #4*64*\i+4*\offs]
+        ldr             \t4, [\p, #4*64*\i]
+        smlal           \lo, \hi, \t1, \t2
+        \rsb            \t3, \t3, #0
+        ldr             \t1, [\w, #4*64*(\i+1)+4*\offs]
+        ldr             \t2, [\p, #4*64*(\i+1)]
+        smlal           \lo, \hi, \t3, \t4
+        \rsb            \t1, \t1, #0
+  .endr
+        ldr             \t3, [\w, #4*64*7+4*\offs]
+        ldr             \t4, [\p, #4*64*7]
+        smlal           \lo, \hi, \t1, \t2
+        \rsb            \t3, \t3, #0
+        smlal           \lo, \hi, \t3, \t4
+.endm
+
+.macro  round           rd,  lo,  hi
+        lsr             \rd, \lo, #24
+        bic             \lo, \lo, #0xff000000
+        orr             \rd, \rd, \hi, lsl #8
+        mov             \hi, #0
+        ssat            \rd, #16, \rd
+.endm
+
+function ff_mpadsp_apply_window_fixed_armv6, export=1
+        push            {r2,r4-r11,lr}
+
+        add             r4,  r0,  #4*512        @ synth_buf + 512
+    .rept 4
+        ldm             r0!, {r5-r12}
+        stm             r4!, {r5-r12}
+    .endr
+
+        ldr             r4,  [sp, #40]          @ incr
+        sub             r0,  r0,  #4*17         @ synth_buf + 16
+        ldr             r8,  [r2]               @ sum:low
+        add             r2,  r0,  #4*32         @ synth_buf + 48
+        rsb             r5,  r4,  r4,  lsl #5   @ 31 * incr
+        lsl             r4,  r4,  #1
+        asr             r9,  r8,  #31           @ sum:high
+        add             r5,  r3,  r5,  lsl #1   @ samples2
+        add             r6,  r1,  #4*32         @ w2
+        str             r4,  [sp, #40]
+
+        sum8            r8,  r9,  r1,  r0,  r10, r11, r12, lr
+        sum8            r8,  r9,  r1,  r2,  r10, r11, r12, lr, rsb, 32
+        round           r10, r8,  r9
+        strh_post       r10, r3,  r4
+
+        mov             lr,  #15
+1:
+        ldr             r12, [r0, #4]!
+        ldr             r11, [r6, #-4]!
+        ldr             r10, [r1, #4]!
+  .irpc i, 0246
+    .if \i
+        ldr             r11, [r6, #4*64*\i]
+        ldr             r10, [r1, #4*64*\i]
+    .endif
+        rsb             r11, r11, #0
+        smlal           r8,  r9,  r10, r12
+        ldr             r10, [r0, #4*64*(\i+1)]
+    .ifeq \i
+        smull           r4, r7, r11, r12
+    .else
+        smlal           r4, r7, r11, r12
+    .endif
+        ldr             r11, [r6, #4*64*(\i+1)]
+        ldr             r12, [r1, #4*64*(\i+1)]
+        rsb             r11, r11, #0
+        smlal           r8,  r9,  r12, r10
+    .iflt \i-6
+        ldr             r12, [r0, #4*64*(\i+2)]
+    .else
+        ldr             r12, [r2, #-4]!
+    .endif
+        smlal           r4,  r7,  r11, r10
+  .endr
+  .irpc i, 0246
+        ldr             r10, [r1, #4*64*\i+4*32]
+        rsb             r12, r12, #0
+        ldr             r11, [r6, #4*64*\i+4*32]
+        smlal           r8,  r9,  r10, r12
+        ldr             r10, [r2, #4*64*(\i+1)]
+        smlal           r4,  r7,  r11, r12
+        ldr             r12, [r1, #4*64*(\i+1)+4*32]
+        rsb             r10, r10, #0
+        ldr             r11, [r6, #4*64*(\i+1)+4*32]
+        smlal           r8,  r9,  r12, r10
+    .iflt \i-6
+        ldr             r12, [r2, #4*64*(\i+2)]
+    .else
+        ldr             r12, [sp, #40]
+    .endif
+        smlal           r4,  r7,  r11, r10
+  .endr
+        round           r10, r8,  r9
+        adds            r8,  r8,  r4
+        adc             r9,  r9,  r7
+        strh_post       r10, r3,  r12
+        round           r11, r8,  r9
+        subs            lr,  lr,  #1
+        strh_dpost      r11, r5, r12
+        bgt             1b
+
+        sum8            r8,  r9,  r1,  r0,  r10, r11, r12, lr, rsb, 33
+        pop             {r4}
+        round           r10, r8,  r9
+        str             r8,  [r4]
+        strh            r10, [r3]
+
+        pop             {r4-r11,pc}
+endfunc
diff --git a/media/ffvpx/libavcodec/arm/mpegaudiodsp_init_arm.c b/media/ffvpx/libavcodec/arm/mpegaudiodsp_init_arm.c
new file mode 100644
index 0000000000..d87bd27ad8
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/mpegaudiodsp_init_arm.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2011 Mans Rullgard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/mpegaudiodsp.h"
+#include "config.h"
+
+void ff_mpadsp_apply_window_fixed_armv6(int32_t *synth_buf, int32_t *window,
+                                        int *dither, int16_t *out, ptrdiff_t incr);
+
+av_cold void ff_mpadsp_init_arm(MPADSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_armv6(cpu_flags)) {
+        s->apply_window_fixed = ff_mpadsp_apply_window_fixed_armv6;
+    }
+}
diff --git a/media/ffvpx/libavcodec/arm/rdft_init_arm.c b/media/ffvpx/libavcodec/arm/rdft_init_arm.c
new file mode 100644
index 0000000000..1c5d8beb61
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/rdft_init_arm.c
@@ -0,0 +1,33 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+
+#include "libavcodec/rdft.h"
+
+void ff_rdft_calc_neon(struct RDFTContext *s, FFTSample *z);
+
+av_cold void ff_rdft_init_arm(RDFTContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
+        s->rdft_calc    = ff_rdft_calc_neon;
+}
diff --git a/media/ffvpx/libavcodec/arm/rdft_neon.S b/media/ffvpx/libavcodec/arm/rdft_neon.S
new file mode 100644
index 0000000000..eabb92b4bd
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/rdft_neon.S
@@ -0,0 +1,155 @@
+/*
+ * ARM NEON optimised RDFT
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_rdft_calc_neon, export=1
+        push            {r4-r8,lr}
+
+        ldr             r6,  [r0, #4]           @ inverse
+        mov             r4,  r0
+        mov             r5,  r1
+
+        lsls            r6,  r6,  #31
+        bne             1f
+        add             r0,  r4,  #24
+        bl              X(ff_fft_permute_neon)
+        add             r0,  r4,  #24
+        mov             r1,  r5
+        bl              X(ff_fft_calc_neon)
+1:
+        ldr             r12, [r4, #0]           @ nbits
+        mov             r2,  #1
+        ldr             r8,  [r4, #20]          @ negative_sin
+        lsl             r12, r2,  r12
+        add             r0,  r5,  #8
+        lsl             r8,  r8,  #31
+        add             r1,  r5,  r12, lsl #2
+        lsr             r12, r12, #2
+        vdup.32         d26, r8
+        ldr             r2,  [r4, #12]          @ tcos
+        sub             r12, r12, #2
+        ldr             r3,  [r4, #16]          @ tsin
+        mov             r7,  r0
+        sub             r1,  r1,  #8
+        mov             lr,  r1
+        mov             r8,  #-8
+        vld1.32         {d0},     [r0,:64]!     @ d1[0,1]
+        vld1.32         {d1},     [r1,:64], r8  @ d2[0,1]
+        vld1.32         {d4},     [r2,:64]!     @ tcos[i]
+        vld1.32         {d5},     [r3,:64]!     @ tsin[i]
+        vmov.f32        d18, #0.5               @ k1
+        vdup.32         d19, r6
+        veor            d5,  d26, d5
+        pld             [r0, #32]
+        veor            d19, d18, d19           @ k2
+        vmov.i32        d16, #0
+        vmov.i32        d17, #1<<31
+        pld             [r1, #-32]
+        vtrn.32         d16, d17
+        pld             [r2, #32]
+        vrev64.32       d16, d16                @ d16=1,0 d17=0,1
+        pld             [r3, #32]
+2:
+        veor            q1,  q0,  q8            @ -d1[0],d1[1], d2[0],-d2[1]
+        vld1.32         {d24},    [r0,:64]!     @  d1[0,1]
+        vadd.f32        d0,  d0,  d3            @  d1[0]+d2[0], d1[1]-d2[1]
+        vld1.32         {d25},    [r1,:64], r8  @  d2[0,1]
+        vadd.f32        d1,  d2,  d1            @ -d1[0]+d2[0], d1[1]+d2[1]
+        veor            q3,  q12, q8            @ -d1[0],d1[1], d2[0],-d2[1]
+        pld             [r0, #32]
+        vmul.f32        q10, q0,  q9            @  ev.re, ev.im, od.im, od.re
+        pld             [r1, #-32]
+        vadd.f32        d0,  d24, d7            @  d1[0]+d2[0], d1[1]-d2[1]
+        vadd.f32        d1,  d6,  d25           @ -d1[0]+d2[0], d1[1]+d2[1]
+        vmul.f32        q11, q0,  q9            @  ev.re, ev.im, od.im, od.re
+        veor            d7,  d21, d16           @ -od.im, od.re
+        vrev64.32       d3,  d21                @  od.re, od.im
+        veor            d6,  d20, d17           @  ev.re,-ev.im
+        veor            d2,  d3,  d16           @ -od.re, od.im
+        vmla.f32        d20, d3,  d4[1]
+        vmla.f32        d20, d7,  d5[1]
+        vmla.f32        d6,  d2,  d4[1]
+        vmla.f32        d6,  d21, d5[1]
+        vld1.32         {d4},     [r2,:64]!     @  tcos[i]
+        veor            d7,  d23, d16           @ -od.im, od.re
+        vld1.32         {d5},     [r3,:64]!     @  tsin[i]
+        veor            d24, d22, d17           @  ev.re,-ev.im
+        vrev64.32       d3,  d23                @  od.re, od.im
+        veor            d5, d26, d5
+        pld             [r2, #32]
+        veor            d2,  d3,  d16           @ -od.re, od.im
+        pld             [r3, #32]
+        vmla.f32        d22, d3,  d4[0]
+        vmla.f32        d22, d7,  d5[0]
+        vmla.f32        d24, d2,  d4[0]
+        vmla.f32        d24, d23, d5[0]
+        vld1.32         {d0},     [r0,:64]!     @  d1[0,1]
+        vld1.32         {d1},     [r1,:64], r8  @  d2[0,1]
+        vst1.32         {d20},    [r7,:64]!
+        vst1.32         {d6},     [lr,:64], r8
+        vst1.32         {d22},    [r7,:64]!
+        vst1.32         {d24},    [lr,:64], r8
+        subs            r12, r12, #2
+        bgt             2b
+
+        veor            q1,  q0,  q8            @ -d1[0],d1[1], d2[0],-d2[1]
+        vadd.f32        d0,  d0,  d3            @  d1[0]+d2[0], d1[1]-d2[1]
+        vadd.f32        d1,  d2,  d1            @ -d1[0]+d2[0], d1[1]+d2[1]
+        ldr             r2,  [r4, #8]           @  sign_convention
+        vmul.f32        q10, q0,  q9            @  ev.re, ev.im, od.im, od.re
+        add             r0,  r0,  #4
+        bfc             r2,  #0,  #31
+        vld1.32         {d0[0]},  [r0,:32]
+        veor            d7,  d21, d16           @ -od.im, od.re
+        vrev64.32       d3,  d21                @  od.re, od.im
+        veor            d6,  d20, d17           @  ev.re,-ev.im
+        vld1.32         {d22},    [r5,:64]
+        vdup.32         d1,  r2
+        vmov            d23, d22
+        veor            d2,  d3,  d16           @ -od.re, od.im
+        vtrn.32         d22, d23
+        veor            d0,  d0,  d1
+        veor            d23, d23, d17
+        vmla.f32        d20, d3,  d4[1]
+        vmla.f32        d20, d7,  d5[1]
+        vmla.f32        d6,  d2,  d4[1]
+        vmla.f32        d6,  d21, d5[1]
+        vadd.f32        d22, d22, d23
+        vst1.32         {d20},    [r7,:64]
+        vst1.32         {d6},     [lr,:64]
+        vst1.32         {d0[0]},  [r0,:32]
+        vst1.32         {d22},    [r5,:64]
+
+        cmp             r6,  #0
+        it              eq
+        popeq           {r4-r8,pc}
+
+        vmul.f32        d22, d22, d18
+        vst1.32         {d22},    [r5,:64]
+        add             r0,  r4,  #24
+        mov             r1,  r5
+        bl              X(ff_fft_permute_neon)
+        add             r0,  r4,  #24
+        mov             r1,  r5
+        pop             {r4-r8,lr}
+        b               X(ff_fft_calc_neon)
+endfunc
diff --git a/media/ffvpx/libavcodec/arm/simple_idct_arm.S b/media/ffvpx/libavcodec/arm/simple_idct_arm.S
new file mode 100644
index 0000000000..42d79ab95e
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/simple_idct_arm.S
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2002 Frederic 'dilb' Boulay
+ *
+ * Author: Frederic Boulay <dilb@handhelds.org>
+ *
+ * The function defined in this file is derived from the simple_idct function
+ * from the libavcodec library part of the FFmpeg project.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+/* useful constants for the algorithm */
+#define W1  22725
+#define W2  21407
+#define W3  19266
+#define W4  16383
+#define W5  12873
+#define W6  8867
+#define W7  4520
+#define MASK_MSHW 0xFFFF0000
+
+#define ROW_SHIFT 11
+#define ROW_SHIFT2MSHW (16-11)
+#define COL_SHIFT 20
+#define ROW_SHIFTED_1 1024 /* 1<< (ROW_SHIFT-1) */
+#define COL_SHIFTED_1 524288 /* 1<< (COL_SHIFT-1) */
+
+
+function ff_simple_idct_arm, export=1
+        @@ void simple_idct_arm(int16_t *block)
+        @@ save stack for reg needed (take all of them),
+        @@ R0-R3 are scratch regs, so no need to save them, but R0 contains the pointer to block
+        @@ so it must not be overwritten, if it is not saved!!
+        @@ R12 is another scratch register, so it should not be saved too
+        @@ save all registers
+        stmfd sp!, {r4-r11, r14} @ R14 is also called LR
+        @@ at this point, R0=block, other registers are free.
+        add r14, r0, #112        @ R14=&block[8*7], better start from the last row, and decrease the value until row=0, i.e. R12=block.
+        @@ add 2 temporary variables in the stack: R0 and R14
+        sub sp, sp, #8          @ allow 2 local variables
+        str r0, [sp, #0]        @ save block in sp[0]
+        @@ stack status
+        @@ sp+4   free
+        @@ sp+0   R0  (block)
+
+
+        @@ at this point, R0=block, R14=&block[56], R12=__const_ptr_, R1-R11 free
+
+
+__row_loop:
+        @@ read the row and check if it is null, almost null, or not, according to strongarm specs, it is not necessary to optimize ldr accesses (i.e. split 32 bits in two 16-bit words), at least it gives more usable registers :)
+        ldr r1, [r14, #0]        @ R1=(int32)(R12)[0]=ROWr32[0] (relative row cast to a 32b pointer)
+        ldr r2, [r14, #4]        @ R2=(int32)(R12)[1]=ROWr32[1]
+        ldr r3, [r14, #8]        @ R3=ROWr32[2]
+        ldr r4, [r14, #12]       @ R4=ROWr32[3]
+        @@ check if the words are null, if all of them are null, then proceed with next row (branch __end_row_loop),
+        @@ if ROWr16[0] is the only one not null, then proceed with this special case (branch __almost_empty_row)
+        @@ else follow the complete algorithm.
+        @@ at this point, R0=block, R14=&block[n], R12=__const_ptr_, R1=ROWr32[0], R2=ROWr32[1],
+        @@                R3=ROWr32[2], R4=ROWr32[3], R5-R11 free
+        orr r5, r4, r3           @ R5=R4 | R3
+        orr r5, r5, r2           @ R5=R4 | R3 | R2
+        orrs r6, r5, r1          @ Test R5 | R1 (the aim is to check if everything is null)
+        beq __end_row_loop
+        mov r7, r1, asr #16      @ R7=R1>>16=ROWr16[1] (evaluate it now, as it could be useful later)
+        ldrsh r6, [r14, #0]      @ R6=ROWr16[0]
+        orrs r5, r5, r7          @ R5=R4 | R3 | R2 | R7
+        beq __almost_empty_row
+
+@@ __b_evaluation:
+        @@ at this point, R0=block (temp),  R1(free), R2=ROWr32[1], R3=ROWr32[2], R4=ROWr32[3],
+        @@     R5=(temp), R6=ROWr16[0], R7=ROWr16[1], R8-R11 free,
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ to save some registers/calls, proceed with b0-b3 first, followed by a0-a3
+
+        @@ MUL16(b0, W1, row[1]);
+        @@ MUL16(b1, W3, row[1]);
+        @@ MUL16(b2, W5, row[1]);
+        @@ MUL16(b3, W7, row[1]);
+        @@ MAC16(b0, W3, row[3]);
+        @@ MAC16(b1, -W7, row[3]);
+        @@ MAC16(b2, -W1, row[3]);
+        @@ MAC16(b3, -W5, row[3]);
+        ldr r8, =W1              @ R8=W1
+        mov r2, r2, asr #16      @ R2=ROWr16[3]
+        mul r0, r8, r7           @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        ldr r9, =W3              @ R9=W3
+        ldr r10, =W5             @ R10=W5
+        mul r1, r9, r7           @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        ldr r11, =W7             @ R11=W7
+        mul r5, r10, r7          @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        mul r7, r11, r7          @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        teq r2, #0               @ if null avoid muls
+        itttt ne
+        mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        rsbne r2, r2, #0         @ R2=-ROWr16[3]
+        mlane r1, r11, r2, r1    @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        mlane r5, r8, r2, r5     @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        it    ne
+        mlane r7, r10, r2, r7    @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+
+        @@ at this point, R0=b0,  R1=b1, R2 (free), R3=ROWr32[2], R4=ROWr32[3],
+        @@     R5=b2, R6=ROWr16[0], R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
+        @@ if (temp != 0) {}
+        orrs r2, r3, r4          @ R2=ROWr32[2] | ROWr32[3]
+        beq __end_b_evaluation
+
+        @@ at this point, R0=b0,  R1=b1, R2 (free), R3=ROWr32[2], R4=ROWr32[3],
+        @@     R5=b2, R6=ROWr16[0], R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ MAC16(b0, W5, row[5]);
+        @@ MAC16(b2, W7, row[5]);
+        @@ MAC16(b3, W3, row[5]);
+        @@ MAC16(b1, -W1, row[5]);
+        @@ MAC16(b0, W7, row[7]);
+        @@ MAC16(b2, W3, row[7]);
+        @@ MAC16(b3, -W1, row[7]);
+        @@ MAC16(b1, -W5, row[7]);
+        mov r3, r3, asr #16      @ R3=ROWr16[5]
+        teq r3, #0               @ if null avoid muls
+        it    ne
+        mlane r0, r10, r3, r0    @ R0+=W5*ROWr16[5]=b0
+        mov r4, r4, asr #16      @ R4=ROWr16[7]
+        itttt ne
+        mlane r5, r11, r3, r5    @ R5+=W7*ROWr16[5]=b2
+        mlane r7, r9, r3, r7     @ R7+=W3*ROWr16[5]=b3
+        rsbne r3, r3, #0         @ R3=-ROWr16[5]
+        mlane r1, r8, r3, r1     @ R7-=W1*ROWr16[5]=b1
+        @@ R3 is free now
+        teq r4, #0               @ if null avoid muls
+        itttt ne
+        mlane r0, r11, r4, r0    @ R0+=W7*ROWr16[7]=b0
+        mlane r5, r9, r4, r5     @ R5+=W3*ROWr16[7]=b2
+        rsbne r4, r4, #0         @ R4=-ROWr16[7]
+        mlane r7, r8, r4, r7     @ R7-=W1*ROWr16[7]=b3
+        it    ne
+        mlane r1, r10, r4, r1    @ R1-=W5*ROWr16[7]=b1
+        @@ R4 is free now
+__end_b_evaluation:
+        @@ at this point, R0=b0,  R1=b1, R2=ROWr32[2] | ROWr32[3] (tmp), R3 (free), R4 (free),
+        @@     R5=b2, R6=ROWr16[0], R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+
+@@ __a_evaluation:
+        @@ a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
+        @@ a1 = a0 + W6 * row[2];
+        @@ a2 = a0 - W6 * row[2];
+        @@ a3 = a0 - W2 * row[2];
+        @@ a0 = a0 + W2 * row[2];
+        ldr r9, =W4              @ R9=W4
+        mul r6, r9, r6           @ R6=W4*ROWr16[0]
+        ldr r10, =W6             @ R10=W6
+        ldrsh r4, [r14, #4]      @ R4=ROWr16[2] (a3 not defined yet)
+        add r6, r6, #ROW_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(ROW_SHIFT-1) (a0)
+
+        mul r11, r10, r4         @ R11=W6*ROWr16[2]
+        ldr r8, =W2              @ R8=W2
+        sub r3, r6, r11          @ R3=a0-W6*ROWr16[2] (a2)
+        @@ temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
+        @@ if (temp != 0) {}
+        teq r2, #0
+        beq __end_bef_a_evaluation
+
+        add r2, r6, r11          @ R2=a0+W6*ROWr16[2] (a1)
+        mul r11, r8, r4          @ R11=W2*ROWr16[2]
+        sub r4, r6, r11          @ R4=a0-W2*ROWr16[2] (a3)
+        add r6, r6, r11          @ R6=a0+W2*ROWr16[2] (a0)
+
+
+        @@ at this point, R0=b0,  R1=b1, R2=a1, R3=a2, R4=a3,
+        @@     R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+
+
+        @@ a0 += W4*row[4]
+        @@ a1 -= W4*row[4]
+        @@ a2 -= W4*row[4]
+        @@ a3 += W4*row[4]
+        ldrsh r11, [r14, #8]     @ R11=ROWr16[4]
+        teq r11, #0              @ if null avoid muls
+        it    ne
+        mulne r11, r9, r11       @ R11=W4*ROWr16[4]
+        @@ R9 is free now
+        ldrsh r9, [r14, #12]     @ R9=ROWr16[6]
+        itttt ne
+        addne r6, r6, r11        @ R6+=W4*ROWr16[4] (a0)
+        subne r2, r2, r11        @ R2-=W4*ROWr16[4] (a1)
+        subne r3, r3, r11        @ R3-=W4*ROWr16[4] (a2)
+        addne r4, r4, r11        @ R4+=W4*ROWr16[4] (a3)
+        @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
+        teq r9, #0               @ if null avoid muls
+        itttt ne
+        mulne r11, r10, r9       @ R11=W6*ROWr16[6]
+        addne r6, r6, r11        @ R6+=W6*ROWr16[6] (a0)
+        mulne r10, r8, r9        @ R10=W2*ROWr16[6]
+        @@ a0 += W6*row[6];
+        @@ a3 -= W6*row[6];
+        @@ a1 -= W2*row[6];
+        @@ a2 += W2*row[6];
+        subne r4, r4, r11        @ R4-=W6*ROWr16[6] (a3)
+        itt   ne
+        subne r2, r2, r10        @ R2-=W2*ROWr16[6] (a1)
+        addne r3, r3, r10        @ R3+=W2*ROWr16[6] (a2)
+
+__end_a_evaluation:
+        @@ at this point, R0=b0,  R1=b1, R2=a1, R3=a2, R4=a3,
+        @@     R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ row[0] = (a0 + b0) >> ROW_SHIFT;
+        @@ row[1] = (a1 + b1) >> ROW_SHIFT;
+        @@ row[2] = (a2 + b2) >> ROW_SHIFT;
+        @@ row[3] = (a3 + b3) >> ROW_SHIFT;
+        @@ row[4] = (a3 - b3) >> ROW_SHIFT;
+        @@ row[5] = (a2 - b2) >> ROW_SHIFT;
+        @@ row[6] = (a1 - b1) >> ROW_SHIFT;
+        @@ row[7] = (a0 - b0) >> ROW_SHIFT;
+        add r8, r6, r0           @ R8=a0+b0
+        add r9, r2, r1           @ R9=a1+b1
+        @@ put two 16-bit half-words in a 32-bit word
+        @@ ROWr32[0]=ROWr16[0] | (ROWr16[1]<<16) (only little-endian compliant then!!!)
+        ldr r10, =MASK_MSHW      @ R10=0xFFFF0000
+        and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a1+b1)<<5)
+        mvn r11, r10             @ R11= NOT R10= 0x0000FFFF
+        and r8, r11, r8, asr #ROW_SHIFT @ R8=0x0000FFFF & ((a0+b0)>>11)
+        orr r8, r8, r9
+        str r8, [r14, #0]
+
+        add r8, r3, r5           @ R8=a2+b2
+        add r9, r4, r7           @ R9=a3+b3
+        and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a3+b3)<<5)
+        and r8, r11, r8, asr #ROW_SHIFT @ R8=0x0000FFFF & ((a2+b2)>>11)
+        orr r8, r8, r9
+        str r8, [r14, #4]
+
+        sub r8, r4, r7           @ R8=a3-b3
+        sub r9, r3, r5           @ R9=a2-b2
+        and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a2-b2)<<5)
+        and r8, r11, r8, asr #ROW_SHIFT @ R8=0x0000FFFF & ((a3-b3)>>11)
+        orr r8, r8, r9
+        str r8, [r14, #8]
+
+        sub r8, r2, r1           @ R8=a1-b1
+        sub r9, r6, r0           @ R9=a0-b0
+        and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a0-b0)<<5)
+        and r8, r11, r8, asr #ROW_SHIFT @ R8=0x0000FFFF & ((a1-b1)>>11)
+        orr r8, r8, r9
+        str r8, [r14, #12]
+
+        bal __end_row_loop
+
+__almost_empty_row:
+        @@ the row was empty, except ROWr16[0], now, management of this special case
+        @@ at this point, R0=block, R14=&block[n], R12=__const_ptr_, R1=ROWr32[0], R2=ROWr32[1],
+        @@                R3=ROWr32[2], R4=ROWr32[3], R5=(temp), R6=ROWr16[0], R7=ROWr16[1],
+        @@                R8=0xFFFF (temp), R9-R11 free
+        mov r8, #0x10000         @ R8=0xFFFF (2 steps needed!) it saves a ldr call (because of delay run).
+        sub r8, r8, #1           @ R8 is now ready.
+        and r5, r8, r6, lsl #3   @ R5=R8 & (R6<<3)= (ROWr16[0]<<3) & 0xFFFF
+        orr r5, r5, r5, lsl #16  @ R5=R5 | (R5<<16)
+        str r5, [r14, #0]        @ R14[0]=ROWr32[0]=R5
+        str r5, [r14, #4]        @ R14[4]=ROWr32[1]=R5
+        str r5, [r14, #8]        @ R14[8]=ROWr32[2]=R5
+        str r5, [r14, #12]       @ R14[12]=ROWr32[3]=R5
+
+__end_row_loop:
+        @@ at this point, R0-R11 (free)
+        @@     R12=__const_ptr_, R14=&block[n]
+        ldr r0, [sp, #0]         @ R0=block
+        teq r0, r14              @ compare current &block[8*n] to block, when block is reached, the loop is finished.
+        sub r14, r14, #16
+        bne __row_loop
+
+
+
+        @@ at this point, R0=block, R1-R11 (free)
+        @@     R12=__const_ptr_, R14=&block[n]
+        add r14, r0, #14        @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block.
+__col_loop:
+
+@@ __b_evaluation2:
+        @@ at this point, R0=block (temp),  R1-R11 (free)
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ proceed with b0-b3 first, followed by a0-a3
+        @@ MUL16(b0, W1, col[8x1]);
+        @@ MUL16(b1, W3, col[8x1]);
+        @@ MUL16(b2, W5, col[8x1]);
+        @@ MUL16(b3, W7, col[8x1]);
+        @@ MAC16(b0, W3, col[8x3]);
+        @@ MAC16(b1, -W7, col[8x3]);
+        @@ MAC16(b2, -W1, col[8x3]);
+        @@ MAC16(b3, -W5, col[8x3]);
+        ldr r8, =W1              @ R8=W1
+        ldrsh r7, [r14, #16]
+        mul r0, r8, r7           @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        ldr r9, =W3              @ R9=W3
+        ldr r10, =W5             @ R10=W5
+        mul r1, r9, r7           @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        ldr r11, =W7             @ R11=W7
+        mul r5, r10, r7          @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        ldrsh r2, [r14, #48]
+        mul r7, r11, r7          @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        teq r2, #0               @ if 0, then avoid muls
+        itttt ne
+        mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        rsbne r2, r2, #0         @ R2=-ROWr16[3]
+        mlane r1, r11, r2, r1    @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        mlane r5, r8, r2, r5     @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        it    ne
+        mlane r7, r10, r2, r7    @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+
+        @@ at this point, R0=b0,  R1=b1, R2 (free), R3 (free), R4 (free),
+        @@     R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ MAC16(b0, W5, col[5x8]);
+        @@ MAC16(b2, W7, col[5x8]);
+        @@ MAC16(b3, W3, col[5x8]);
+        @@ MAC16(b1, -W1, col[5x8]);
+        @@ MAC16(b0, W7, col[7x8]);
+        @@ MAC16(b2, W3, col[7x8]);
+        @@ MAC16(b3, -W1, col[7x8]);
+        @@ MAC16(b1, -W5, col[7x8]);
+        ldrsh r3, [r14, #80]     @ R3=COLr16[5x8]
+        teq r3, #0               @ if 0 then avoid muls
+        itttt ne
+        mlane r0, r10, r3, r0    @ R0+=W5*ROWr16[5x8]=b0
+        mlane r5, r11, r3, r5    @ R5+=W7*ROWr16[5x8]=b2
+        mlane r7, r9, r3, r7     @ R7+=W3*ROWr16[5x8]=b3
+        rsbne r3, r3, #0         @ R3=-ROWr16[5x8]
+        ldrsh r4, [r14, #112]    @ R4=COLr16[7x8]
+        it    ne
+        mlane r1, r8, r3, r1     @ R7-=W1*ROWr16[5x8]=b1
+        @@ R3 is free now
+        teq r4, #0               @ if 0 then avoid muls
+        itttt ne
+        mlane r0, r11, r4, r0    @ R0+=W7*ROWr16[7x8]=b0
+        mlane r5, r9, r4, r5     @ R5+=W3*ROWr16[7x8]=b2
+        rsbne r4, r4, #0         @ R4=-ROWr16[7x8]
+        mlane r7, r8, r4, r7     @ R7-=W1*ROWr16[7x8]=b3
+        it    ne
+        mlane r1, r10, r4, r1    @ R1-=W5*ROWr16[7x8]=b1
+        @@ R4 is free now
+@@ __end_b_evaluation2:
+        @@ at this point, R0=b0,  R1=b1, R2 (free), R3 (free), R4 (free),
+        @@     R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+
+@@ __a_evaluation2:
+        @@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1));
+        @@ a1 = a0 + W6 * row[2];
+        @@ a2 = a0 - W6 * row[2];
+        @@ a3 = a0 - W2 * row[2];
+        @@ a0 = a0 + W2 * row[2];
+        ldrsh r6, [r14, #0]
+        ldr r9, =W4              @ R9=W4
+        mul r6, r9, r6           @ R6=W4*ROWr16[0]
+        ldr r10, =W6             @ R10=W6
+        ldrsh r4, [r14, #32]     @ R4=ROWr16[2] (a3 not defined yet)
+        add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0)
+        mul r11, r10, r4         @ R11=W6*ROWr16[2]
+        ldr r8, =W2              @ R8=W2
+        add r2, r6, r11          @ R2=a0+W6*ROWr16[2] (a1)
+        sub r3, r6, r11          @ R3=a0-W6*ROWr16[2] (a2)
+        mul r11, r8, r4          @ R11=W2*ROWr16[2]
+        sub r4, r6, r11          @ R4=a0-W2*ROWr16[2] (a3)
+        add r6, r6, r11          @ R6=a0+W2*ROWr16[2] (a0)
+
+        @@ at this point, R0=b0,  R1=b1, R2=a1, R3=a2, R4=a3,
+        @@     R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ a0 += W4*row[4]
+        @@ a1 -= W4*row[4]
+        @@ a2 -= W4*row[4]
+        @@ a3 += W4*row[4]
+        ldrsh r11, [r14, #64]    @ R11=ROWr16[4]
+        teq r11, #0              @ if null avoid muls
+        itttt ne
+        mulne r11, r9, r11       @ R11=W4*ROWr16[4]
+        @@ R9 is free now
+        addne r6, r6, r11        @ R6+=W4*ROWr16[4] (a0)
+        subne r2, r2, r11        @ R2-=W4*ROWr16[4] (a1)
+        subne r3, r3, r11        @ R3-=W4*ROWr16[4] (a2)
+        ldrsh r9, [r14, #96]     @ R9=ROWr16[6]
+        it    ne
+        addne r4, r4, r11        @ R4+=W4*ROWr16[4] (a3)
+        @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
+        teq r9, #0               @ if null avoid muls
+        itttt ne
+        mulne r11, r10, r9       @ R11=W6*ROWr16[6]
+        addne r6, r6, r11        @ R6+=W6*ROWr16[6] (a0)
+        mulne r10, r8, r9        @ R10=W2*ROWr16[6]
+        @@ a0 += W6*row[6];
+        @@ a3 -= W6*row[6];
+        @@ a1 -= W2*row[6];
+        @@ a2 += W2*row[6];
+        subne r4, r4, r11        @ R4-=W6*ROWr16[6] (a3)
+        itt   ne
+        subne r2, r2, r10        @ R2-=W2*ROWr16[6] (a1)
+        addne r3, r3, r10        @ R3+=W2*ROWr16[6] (a2)
+@@ __end_a_evaluation2:
+        @@ at this point, R0=b0,  R1=b1, R2=a1, R3=a2, R4=a3,
+        @@     R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ col[0 ] = ((a0 + b0) >> COL_SHIFT);
+        @@ col[8 ] = ((a1 + b1) >> COL_SHIFT);
+        @@ col[16] = ((a2 + b2) >> COL_SHIFT);
+        @@ col[24] = ((a3 + b3) >> COL_SHIFT);
+        @@ col[32] = ((a3 - b3) >> COL_SHIFT);
+        @@ col[40] = ((a2 - b2) >> COL_SHIFT);
+        @@ col[48] = ((a1 - b1) >> COL_SHIFT);
+        @@ col[56] = ((a0 - b0) >> COL_SHIFT);
+        @@@@@ no optimization here @@@@@
+        add r8, r6, r0           @ R8=a0+b0
+        add r9, r2, r1           @ R9=a1+b1
+        mov r8, r8, asr #COL_SHIFT
+        mov r9, r9, asr #COL_SHIFT
+        strh r8, [r14, #0]
+        strh r9, [r14, #16]
+        add r8, r3, r5           @ R8=a2+b2
+        add r9, r4, r7           @ R9=a3+b3
+        mov r8, r8, asr #COL_SHIFT
+        mov r9, r9, asr #COL_SHIFT
+        strh r8, [r14, #32]
+        strh r9, [r14, #48]
+        sub r8, r4, r7           @ R8=a3-b3
+        sub r9, r3, r5           @ R9=a2-b2
+        mov r8, r8, asr #COL_SHIFT
+        mov r9, r9, asr #COL_SHIFT
+        strh r8, [r14, #64]
+        strh r9, [r14, #80]
+        sub r8, r2, r1           @ R8=a1-b1
+        sub r9, r6, r0           @ R9=a0-b0
+        mov r8, r8, asr #COL_SHIFT
+        mov r9, r9, asr #COL_SHIFT
+        strh r8, [r14, #96]
+        strh r9, [r14, #112]
+
+@@ __end_col_loop:
+        @@ at this point, R0-R11 (free)
+        @@     R12=__const_ptr_, R14=&block[n]
+        ldr r0, [sp, #0]         @ R0=block
+        teq r0, r14              @ compare current &block[n] to block, when block is reached, the loop is finished.
+        sub r14, r14, #2
+        bne __col_loop
+
+
+
+
+@@ __end_simple_idct_arm:
+        @@ restore registers to previous status!
+        add sp, sp, #8 @@ the local variables!
+        ldmfd sp!, {r4-r11, r15} @@ update PC with LR content.
+
+
+
+@@ kind of sub-function, here not to overload the common case.
+__end_bef_a_evaluation:
+        add r2, r6, r11          @ R2=a0+W6*ROWr16[2] (a1)
+        mul r11, r8, r4          @ R11=W2*ROWr16[2]
+        sub r4, r6, r11          @ R4=a0-W2*ROWr16[2] (a3)
+        add r6, r6, r11          @ R6=a0+W2*ROWr16[2] (a0)
+        bal __end_a_evaluation
+endfunc
diff --git a/media/ffvpx/libavcodec/arm/simple_idct_armv5te.S b/media/ffvpx/libavcodec/arm/simple_idct_armv5te.S
new file mode 100644
index 0000000000..a8d03469ab
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/simple_idct_armv5te.S
@@ -0,0 +1,613 @@
+/*
+ * Simple IDCT
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2006 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+#define W1  22725   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W2  21407   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W3  19266   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W4  16383   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W5  12873   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W6  8867    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W7  4520    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+#define W13 (W1 | (W3 << 16))
+#define W26 (W2 | (W6 << 16))
+#define W57 (W5 | (W7 << 16))
+
+function idct_row_armv5te
+        str    lr, [sp, #-4]!
+
+        ldrd   v1, v2, [a1, #8]
+        ldrd   a3, a4, [a1]          /* a3 = row[1:0], a4 = row[3:2] */
+        orrs   v1, v1, v2
+        itt    eq
+        cmpeq  v1, a4
+        cmpeq  v1, a3, lsr #16
+        beq    row_dc_only
+
+        mov    v1, #(1<<(ROW_SHIFT-1))
+        mov    ip, #16384
+        sub    ip, ip, #1            /* ip = W4 */
+        smlabb v1, ip, a3, v1        /* v1 = W4*row[0]+(1<<(RS-1)) */
+        ldr    ip, =W26              /* ip = W2 | (W6 << 16) */
+        smultb a2, ip, a4
+        smulbb lr, ip, a4
+        add    v2, v1, a2
+        sub    v3, v1, a2
+        sub    v4, v1, lr
+        add    v1, v1, lr
+
+        ldr    ip, =W13              /* ip = W1 | (W3 << 16) */
+        ldr    lr, =W57              /* lr = W5 | (W7 << 16) */
+        smulbt v5, ip, a3
+        smultt v6, lr, a4
+        smlatt v5, ip, a4, v5
+        smultt a2, ip, a3
+        smulbt v7, lr, a3
+        sub    v6, v6, a2
+        smulbt a2, ip, a4
+        smultt fp, lr, a3
+        sub    v7, v7, a2
+        smulbt a2, lr, a4
+        ldrd   a3, a4, [a1, #8]     /* a3=row[5:4] a4=row[7:6] */
+        sub    fp, fp, a2
+
+        orrs   a2, a3, a4
+        beq    1f
+
+        smlabt v5, lr, a3, v5
+        smlabt v6, ip, a3, v6
+        smlatt v5, lr, a4, v5
+        smlabt v6, lr, a4, v6
+        smlatt v7, lr, a3, v7
+        smlatt fp, ip, a3, fp
+        smulbt a2, ip, a4
+        smlatt v7, ip, a4, v7
+        sub    fp, fp, a2
+
+        ldr    ip, =W26              /* ip = W2 | (W6 << 16) */
+        mov    a2, #16384
+        sub    a2, a2, #1            /* a2 =  W4 */
+        smulbb a2, a2, a3            /* a2 =  W4*row[4] */
+        smultb lr, ip, a4            /* lr =  W6*row[6] */
+        add    v1, v1, a2            /* v1 += W4*row[4] */
+        add    v1, v1, lr            /* v1 += W6*row[6] */
+        add    v4, v4, a2            /* v4 += W4*row[4] */
+        sub    v4, v4, lr            /* v4 -= W6*row[6] */
+        smulbb lr, ip, a4            /* lr =  W2*row[6] */
+        sub    v2, v2, a2            /* v2 -= W4*row[4] */
+        sub    v2, v2, lr            /* v2 -= W2*row[6] */
+        sub    v3, v3, a2            /* v3 -= W4*row[4] */
+        add    v3, v3, lr            /* v3 += W2*row[6] */
+
+1:      add    a2, v1, v5
+        mov    a3, a2, lsr #11
+        bic    a3, a3, #0x1f0000
+        sub    a2, v2, v6
+        mov    a2, a2, lsr #11
+        add    a3, a3, a2, lsl #16
+        add    a2, v3, v7
+        mov    a4, a2, lsr #11
+        bic    a4, a4, #0x1f0000
+        add    a2, v4, fp
+        mov    a2, a2, lsr #11
+        add    a4, a4, a2, lsl #16
+        strd   a3, a4, [a1]
+
+        sub    a2, v4, fp
+        mov    a3, a2, lsr #11
+        bic    a3, a3, #0x1f0000
+        sub    a2, v3, v7
+        mov    a2, a2, lsr #11
+        add    a3, a3, a2, lsl #16
+        add    a2, v2, v6
+        mov    a4, a2, lsr #11
+        bic    a4, a4, #0x1f0000
+        sub    a2, v1, v5
+        mov    a2, a2, lsr #11
+        add    a4, a4, a2, lsl #16
+        strd   a3, a4, [a1, #8]
+
+        ldr    pc, [sp], #4
+
+row_dc_only:
+        orr    a3, a3, a3, lsl #16
+        bic    a3, a3, #0xe000
+        mov    a3, a3, lsl #3
+        mov    a4, a3
+        strd   a3, a4, [a1]
+        strd   a3, a4, [a1, #8]
+
+        ldr    pc, [sp], #4
+endfunc
+
+        .macro idct_col
+        ldr    a4, [a1]              /* a4 = col[1:0] */
+        mov    ip, #16384
+        sub    ip, ip, #1            /* ip = W4 */
+        mov    v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
+        add    v2, v1, a4, asr #16
+        rsb    v2, v2, v2, lsl #14
+        mov    a4, a4, lsl #16
+        add    v1, v1, a4, asr #16
+        ldr    a4, [a1, #(16*4)]
+        rsb    v1, v1, v1, lsl #14
+
+        smulbb lr, ip, a4
+        smulbt a3, ip, a4
+        sub    v3, v1, lr
+        sub    v5, v1, lr
+        add    v7, v1, lr
+        add    v1, v1, lr
+        sub    v4, v2, a3
+        sub    v6, v2, a3
+        add    fp, v2, a3
+        ldr    ip, =W26
+        ldr    a4, [a1, #(16*2)]
+        add    v2, v2, a3
+
+        smulbb lr, ip, a4
+        smultb a3, ip, a4
+        add    v1, v1, lr
+        sub    v7, v7, lr
+        add    v3, v3, a3
+        sub    v5, v5, a3
+        smulbt lr, ip, a4
+        smultt a3, ip, a4
+        add    v2, v2, lr
+        sub    fp, fp, lr
+        add    v4, v4, a3
+        ldr    a4, [a1, #(16*6)]
+        sub    v6, v6, a3
+
+        smultb lr, ip, a4
+        smulbb a3, ip, a4
+        add    v1, v1, lr
+        sub    v7, v7, lr
+        sub    v3, v3, a3
+        add    v5, v5, a3
+        smultt lr, ip, a4
+        smulbt a3, ip, a4
+        add    v2, v2, lr
+        sub    fp, fp, lr
+        sub    v4, v4, a3
+        add    v6, v6, a3
+
+        stmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
+
+        ldr    ip, =W13
+        ldr    a4, [a1, #(16*1)]
+        ldr    lr, =W57
+        smulbb v1, ip, a4
+        smultb v3, ip, a4
+        smulbb v5, lr, a4
+        smultb v7, lr, a4
+        smulbt v2, ip, a4
+        smultt v4, ip, a4
+        smulbt v6, lr, a4
+        smultt fp, lr, a4
+        rsb    v4, v4, #0
+        ldr    a4, [a1, #(16*3)]
+        rsb    v3, v3, #0
+
+        smlatb v1, ip, a4, v1
+        smlatb v3, lr, a4, v3
+        smulbb a3, ip, a4
+        smulbb a2, lr, a4
+        sub    v5, v5, a3
+        sub    v7, v7, a2
+        smlatt v2, ip, a4, v2
+        smlatt v4, lr, a4, v4
+        smulbt a3, ip, a4
+        smulbt a2, lr, a4
+        sub    v6, v6, a3
+        ldr    a4, [a1, #(16*5)]
+        sub    fp, fp, a2
+
+        smlabb v1, lr, a4, v1
+        smlabb v3, ip, a4, v3
+        smlatb v5, lr, a4, v5
+        smlatb v7, ip, a4, v7
+        smlabt v2, lr, a4, v2
+        smlabt v4, ip, a4, v4
+        smlatt v6, lr, a4, v6
+        ldr    a3, [a1, #(16*7)]
+        smlatt fp, ip, a4, fp
+
+        smlatb v1, lr, a3, v1
+        smlabb v3, lr, a3, v3
+        smlatb v5, ip, a3, v5
+        smulbb a4, ip, a3
+        smlatt v2, lr, a3, v2
+        sub    v7, v7, a4
+        smlabt v4, lr, a3, v4
+        smulbt a4, ip, a3
+        smlatt v6, ip, a3, v6
+        sub    fp, fp, a4
+        .endm
+
+function idct_col_armv5te
+        str    lr, [sp, #-4]!
+
+        idct_col
+
+        ldmfd  sp!, {a3, a4}
+        adds   a2, a3, v1
+        mov    a2, a2, lsr #20
+        it     mi
+        orrmi  a2, a2, #0xf000
+        add    ip, a4, v2
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1]
+        subs   a3, a3, v1
+        mov    a2, a3, lsr #20
+        it     mi
+        orrmi  a2, a2, #0xf000
+        sub    a4, a4, v2
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        ldmfd  sp!, {a3, a4}
+        str    a2, [a1, #(16*7)]
+
+        subs   a2, a3, v3
+        mov    a2, a2, lsr #20
+        it     mi
+        orrmi  a2, a2, #0xf000
+        sub    ip, a4, v4
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1, #(16*1)]
+        adds   a3, a3, v3
+        mov    a2, a3, lsr #20
+        it     mi
+        orrmi  a2, a2, #0xf000
+        add    a4, a4, v4
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        ldmfd  sp!, {a3, a4}
+        str    a2, [a1, #(16*6)]
+
+        adds   a2, a3, v5
+        mov    a2, a2, lsr #20
+        it     mi
+        orrmi  a2, a2, #0xf000
+        add    ip, a4, v6
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1, #(16*2)]
+        subs   a3, a3, v5
+        mov    a2, a3, lsr #20
+        it     mi
+        orrmi  a2, a2, #0xf000
+        sub    a4, a4, v6
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        ldmfd  sp!, {a3, a4}
+        str    a2, [a1, #(16*5)]
+
+        adds   a2, a3, v7
+        mov    a2, a2, lsr #20
+        it     mi
+        orrmi  a2, a2, #0xf000
+        add    ip, a4, fp
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1, #(16*3)]
+        subs   a3, a3, v7
+        mov    a2, a3, lsr #20
+        it     mi
+        orrmi  a2, a2, #0xf000
+        sub    a4, a4, fp
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        str    a2, [a1, #(16*4)]
+
+        ldr    pc, [sp], #4
+endfunc
+
+.macro  clip   dst, src:vararg
+        movs   \dst, \src
+        it     mi
+        movmi  \dst, #0
+        cmp    \dst, #255
+        it     gt
+        movgt  \dst, #255
+.endm
+
+.macro  aclip  dst, src:vararg
+        adds   \dst, \src
+        it     mi
+        movmi  \dst, #0
+        cmp    \dst, #255
+        it     gt
+        movgt  \dst, #255
+.endm
+
+function idct_col_put_armv5te
+        str    lr, [sp, #-4]!
+
+        idct_col
+
+        ldmfd  sp!, {a3, a4}
+        ldr    lr, [sp, #32]
+        add    a2, a3, v1
+        clip   a2, a2, asr #20
+        add    ip, a4, v2
+        clip   ip, ip, asr #20
+        orr    a2, a2, ip, lsl #8
+        sub    a3, a3, v1
+        clip   a3, a3, asr #20
+        sub    a4, a4, v2
+        clip   a4, a4, asr #20
+        ldr    v1, [sp, #28]
+        strh   a2, [v1]
+        add    a2, v1, #2
+        str    a2, [sp, #28]
+        orr    a2, a3, a4, lsl #8
+        rsb    v2, lr, lr, lsl #3
+        ldmfd  sp!, {a3, a4}
+        strh_pre a2, v2, v1
+
+        sub    a2, a3, v3
+        clip   a2, a2, asr #20
+        sub    ip, a4, v4
+        clip   ip, ip, asr #20
+        orr    a2, a2, ip, lsl #8
+        strh_pre a2, v1, lr
+        add    a3, a3, v3
+        clip   a2, a3, asr #20
+        add    a4, a4, v4
+        clip   a4, a4, asr #20
+        orr    a2, a2, a4, lsl #8
+        ldmfd  sp!, {a3, a4}
+        strh_dpre a2, v2, lr
+
+        add    a2, a3, v5
+        clip   a2, a2, asr #20
+        add    ip, a4, v6
+        clip   ip, ip, asr #20
+        orr    a2, a2, ip, lsl #8
+        strh_pre a2, v1, lr
+        sub    a3, a3, v5
+        clip   a2, a3, asr #20
+        sub    a4, a4, v6
+        clip   a4, a4, asr #20
+        orr    a2, a2, a4, lsl #8
+        ldmfd  sp!, {a3, a4}
+        strh_dpre a2, v2, lr
+
+        add    a2, a3, v7
+        clip   a2, a2, asr #20
+        add    ip, a4, fp
+        clip   ip, ip, asr #20
+        orr    a2, a2, ip, lsl #8
+        strh   a2, [v1, lr]
+        sub    a3, a3, v7
+        clip   a2, a3, asr #20
+        sub    a4, a4, fp
+        clip   a4, a4, asr #20
+        orr    a2, a2, a4, lsl #8
+        strh_dpre a2, v2, lr
+
+        ldr    pc, [sp], #4
+endfunc
+
+function idct_col_add_armv5te
+        str    lr, [sp, #-4]!
+
+        idct_col
+
+        ldr    lr, [sp, #36]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh   ip, [lr]
+        add    a2, a3, v1
+        sub    a3, a3, v1
+        and    v1, ip, #255
+        aclip  a2, v1, a2, asr #20
+        add    v1, a4, v2
+        mov    v1, v1, asr #20
+        aclip  v1, v1, ip, lsr #8
+        orr    a2, a2, v1, lsl #8
+        ldr    v1, [sp, #32]
+        sub    a4, a4, v2
+        rsb    v2, v1, v1, lsl #3
+        ldrh_pre ip, v2, lr
+        strh   a2, [lr]
+        and    a2, ip, #255
+        aclip  a3, a2, a3, asr #20
+        mov    a4, a4, asr #20
+        aclip  a4, a4, ip, lsr #8
+        add    a2, lr, #2
+        str    a2, [sp, #28]
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh_pre ip, lr, v1
+        sub    a2, a3, v3
+        add    a3, a3, v3
+        and    v3, ip, #255
+        aclip  a2, v3, a2, asr #20
+        sub    v3, a4, v4
+        mov    v3, v3, asr #20
+        aclip  v3, v3, ip, lsr #8
+        orr    a2, a2, v3, lsl #8
+        add    a4, a4, v4
+        ldrh_dpre ip, v2, v1
+        strh   a2, [lr]
+        and    a2, ip, #255
+        aclip  a3, a2, a3, asr #20
+        mov    a4, a4, asr #20
+        aclip  a4, a4, ip, lsr #8
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh_pre ip, lr, v1
+        add    a2, a3, v5
+        sub    a3, a3, v5
+        and    v3, ip, #255
+        aclip  a2, v3, a2, asr #20
+        add    v3, a4, v6
+        mov    v3, v3, asr #20
+        aclip  v3, v3, ip, lsr #8
+        orr    a2, a2, v3, lsl #8
+        sub    a4, a4, v6
+        ldrh_dpre ip, v2, v1
+        strh   a2, [lr]
+        and    a2, ip, #255
+        aclip  a3, a2, a3, asr #20
+        mov    a4, a4, asr #20
+        aclip  a4, a4, ip, lsr #8
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh_pre ip, lr, v1
+        add    a2, a3, v7
+        sub    a3, a3, v7
+        and    v3, ip, #255
+        aclip  a2, v3, a2, asr #20
+        add    v3, a4, fp
+        mov    v3, v3, asr #20
+        aclip  v3, v3, ip, lsr #8
+        orr    a2, a2, v3, lsl #8
+        sub    a4, a4, fp
+        ldrh_dpre ip, v2, v1
+        strh   a2, [lr]
+        and    a2, ip, #255
+        aclip  a3, a2, a3, asr #20
+        mov    a4, a4, asr #20
+        aclip  a4, a4, ip, lsr #8
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldr    pc, [sp], #4
+endfunc
+
+function ff_simple_idct_armv5te, export=1
+        stmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+
+        sub    a1, a1, #(16*7)
+
+        bl     idct_col_armv5te
+        add    a1, a1, #4
+        bl     idct_col_armv5te
+        add    a1, a1, #4
+        bl     idct_col_armv5te
+        add    a1, a1, #4
+        bl     idct_col_armv5te
+
+        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+endfunc
+
+function ff_simple_idct_add_armv5te, export=1
+        stmfd  sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+        mov    a1, a3
+
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+
+        sub    a1, a1, #(16*7)
+
+        bl     idct_col_add_armv5te
+        add    a1, a1, #4
+        bl     idct_col_add_armv5te
+        add    a1, a1, #4
+        bl     idct_col_add_armv5te
+        add    a1, a1, #4
+        bl     idct_col_add_armv5te
+
+        add    sp, sp, #8
+        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+endfunc
+
+function ff_simple_idct_put_armv5te, export=1
+        stmfd  sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+        mov    a1, a3
+
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+
+        sub    a1, a1, #(16*7)
+
+        bl     idct_col_put_armv5te
+        add    a1, a1, #4
+        bl     idct_col_put_armv5te
+        add    a1, a1, #4
+        bl     idct_col_put_armv5te
+        add    a1, a1, #4
+        bl     idct_col_put_armv5te
+
+        add    sp, sp, #8
+        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+endfunc
diff --git a/media/ffvpx/libavcodec/arm/simple_idct_armv6.S b/media/ffvpx/libavcodec/arm/simple_idct_armv6.S
new file mode 100644
index 0000000000..f95c20d295
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/simple_idct_armv6.S
@@ -0,0 +1,425 @@
+/*
+ * Simple IDCT
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2007 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+#define W1  22725   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W2  21407   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W3  19266   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W4  16383   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W5  12873   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W6  8867    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W7  4520    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+#define W13 (W1 | (W3 << 16))
+#define W26 (W2 | (W6 << 16))
+#define W42 (W4 | (W2 << 16))
+#define W42n (-W4&0xffff | (-W2 << 16))
+#define W46 (W4 | (W6 << 16))
+#define W57 (W5 | (W7 << 16))
+
+/*
+  Compute partial IDCT of single row.
+  shift = left-shift amount
+  r0 = source address
+  r2 = row[2,0] <= 2 cycles
+  r3 = row[3,1]
+  ip = w42      <= 2 cycles
+
+  Output in registers r4--r11
+*/
+        .macro idct_row shift
+        ldr    lr, =W46              /* lr  = W4 | (W6 << 16) */
+        mov    r1, #(1<<(\shift-1))
+        smlad  r4, r2, ip, r1
+        smlsd  r7, r2, ip, r1
+        ldr    ip, =W13              /* ip  = W1 | (W3 << 16) */
+        ldr    r10,=W57              /* r10 = W5 | (W7 << 16) */
+        smlad  r5, r2, lr, r1
+        smlsd  r6, r2, lr, r1
+
+        smuad  r8, r3, ip            /* r8  =  B0 = W1*row[1] + W3*row[3] */
+        smusdx r11,r3, r10           /* r11 =  B3 = W7*row[1] - W5*row[3] */
+        ldr    lr, [r0, #12]         /* lr  =  row[7,5] */
+        pkhtb  r2, ip, r10,asr #16   /* r3  =  W7 | (W3 << 16) */
+        pkhbt  r1, ip, r10,lsl #16   /* r1  =  W1 | (W5 << 16) */
+        smusdx r9, r2, r3            /* r9  = -B1 = W7*row[3] - W3*row[1] */
+        smlad  r8, lr, r10,r8        /* B0  +=      W5*row[5] + W7*row[7] */
+        smusdx r10,r3, r1            /* r10 =  B2 = W5*row[1] - W1*row[3] */
+
+        ldr    r3, =W42n             /* r3 =  -W4 | (-W2 << 16) */
+        smlad  r10,lr, r2, r10       /* B2 +=  W7*row[5] + W3*row[7] */
+        ldr    r2, [r0, #4]          /* r2 =   row[6,4] */
+        smlsdx r11,lr, ip, r11       /* B3 +=  W3*row[5] - W1*row[7] */
+        ldr    ip, =W46              /* ip =   W4 | (W6 << 16) */
+        smlad  r9, lr, r1, r9        /* B1 -=  W1*row[5] + W5*row[7] */
+
+        smlad  r5, r2, r3, r5        /* A1 += -W4*row[4] - W2*row[6] */
+        smlsd  r6, r2, r3, r6        /* A2 += -W4*row[4] + W2*row[6] */
+        smlad  r4, r2, ip, r4        /* A0 +=  W4*row[4] + W6*row[6] */
+        smlsd  r7, r2, ip, r7        /* A3 +=  W4*row[4] - W6*row[6] */
+        .endm
+
+/*
+  Compute partial IDCT of half row.
+  shift = left-shift amount
+  r2 = row[2,0]
+  r3 = row[3,1]
+  ip = w42
+
+  Output in registers r4--r11
+*/
+        .macro idct_row4 shift
+        ldr    lr, =W46              /* lr =  W4 | (W6 << 16) */
+        ldr    r10,=W57              /* r10 = W5 | (W7 << 16) */
+        mov    r1, #(1<<(\shift-1))
+        smlad  r4, r2, ip, r1
+        smlsd  r7, r2, ip, r1
+        ldr    ip, =W13              /* ip =  W1 | (W3 << 16) */
+        smlad  r5, r2, lr, r1
+        smlsd  r6, r2, lr, r1
+        smusdx r11,r3, r10           /* r11 =  B3 = W7*row[1] - W5*row[3] */
+        smuad  r8, r3, ip            /* r8  =  B0 = W1*row[1] + W3*row[3] */
+        pkhtb  r2, ip, r10,asr #16   /* r3  =  W7 | (W3 << 16) */
+        pkhbt  r1, ip, r10,lsl #16   /* r1  =  W1 | (W5 << 16) */
+        smusdx r9, r2, r3            /* r9  = -B1 = W7*row[3] - W3*row[1] */
+        smusdx r10,r3, r1            /* r10 =  B2 = W5*row[1] - W1*row[3] */
+        .endm
+
+/*
+  Compute final part of IDCT single row without shift.
+  Input in registers r4--r11
+  Output in registers ip, r4--r6, lr, r8--r10
+*/
+        .macro idct_finish
+        add    ip, r4, r8            /* r1 = A0 + B0 */
+        sub    lr, r4, r8            /* r2 = A0 - B0 */
+        sub    r4, r5, r9            /* r2 = A1 + B1 */
+        add    r8, r5, r9            /* r2 = A1 - B1 */
+        add    r5, r6, r10           /* r1 = A2 + B2 */
+        sub    r9, r6, r10           /* r1 = A2 - B2 */
+        add    r6, r7, r11           /* r2 = A3 + B3 */
+        sub    r10,r7, r11           /* r2 = A3 - B3 */
+        .endm
+
+/*
+  Compute final part of IDCT single row.
+  shift = right-shift amount
+  Input/output in registers r4--r11
+*/
+        .macro idct_finish_shift shift
+        add    r3, r4, r8            /* r3 = A0 + B0 */
+        sub    r2, r4, r8            /* r2 = A0 - B0 */
+        mov    r4, r3, asr #\shift
+        mov    r8, r2, asr #\shift
+
+        sub    r3, r5, r9            /* r3 = A1 + B1 */
+        add    r2, r5, r9            /* r2 = A1 - B1 */
+        mov    r5, r3, asr #\shift
+        mov    r9, r2, asr #\shift
+
+        add    r3, r6, r10           /* r3 = A2 + B2 */
+        sub    r2, r6, r10           /* r2 = A2 - B2 */
+        mov    r6, r3, asr #\shift
+        mov    r10,r2, asr #\shift
+
+        add    r3, r7, r11           /* r3 = A3 + B3 */
+        sub    r2, r7, r11           /* r2 = A3 - B3 */
+        mov    r7, r3, asr #\shift
+        mov    r11,r2, asr #\shift
+        .endm
+
+/*
+  Compute final part of IDCT single row, saturating results at 8 bits.
+  shift = right-shift amount
+  Input/output in registers r4--r11
+*/
+        .macro idct_finish_shift_sat shift
+        add    r3, r4, r8            /* r3 = A0 + B0 */
+        sub    ip, r4, r8            /* ip = A0 - B0 */
+        usat   r4, #8, r3, asr #\shift
+        usat   r8, #8, ip, asr #\shift
+
+        sub    r3, r5, r9            /* r3 = A1 + B1 */
+        add    ip, r5, r9            /* ip = A1 - B1 */
+        usat   r5, #8, r3, asr #\shift
+        usat   r9, #8, ip, asr #\shift
+
+        add    r3, r6, r10           /* r3 = A2 + B2 */
+        sub    ip, r6, r10           /* ip = A2 - B2 */
+        usat   r6, #8, r3, asr #\shift
+        usat   r10,#8, ip, asr #\shift
+
+        add    r3, r7, r11           /* r3 = A3 + B3 */
+        sub    ip, r7, r11           /* ip = A3 - B3 */
+        usat   r7, #8, r3, asr #\shift
+        usat   r11,#8, ip, asr #\shift
+        .endm
+
+/*
+  Compute IDCT of single row, storing as column.
+  r0 = source
+  r1 = dest
+*/
+function idct_row_armv6
+        push   {lr}
+
+        ldr    lr, [r0, #12]         /* lr = row[7,5] */
+        ldr    ip, [r0, #4]          /* ip = row[6,4] */
+        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
+        ldr    r2, [r0]              /* r2 = row[2,0] */
+        orrs   lr, lr, ip
+        itt    eq
+        cmpeq  lr, r3
+        cmpeq  lr, r2, lsr #16
+        beq    1f
+        push   {r1}
+        ldr    ip, =W42              /* ip = W4 | (W2 << 16) */
+        cmp    lr, #0
+        beq    2f
+
+        idct_row   ROW_SHIFT
+        b      3f
+
+2:      idct_row4  ROW_SHIFT
+
+3:      pop    {r1}
+        idct_finish_shift ROW_SHIFT
+
+        strh   r4, [r1]
+        strh   r5, [r1, #(16*2)]
+        strh   r6, [r1, #(16*4)]
+        strh   r7, [r1, #(16*6)]
+        strh   r11,[r1, #(16*1)]
+        strh   r10,[r1, #(16*3)]
+        strh   r9, [r1, #(16*5)]
+        strh   r8, [r1, #(16*7)]
+
+        pop    {pc}
+
+1:      mov    r2, r2, lsl #3
+        strh   r2, [r1]
+        strh   r2, [r1, #(16*2)]
+        strh   r2, [r1, #(16*4)]
+        strh   r2, [r1, #(16*6)]
+        strh   r2, [r1, #(16*1)]
+        strh   r2, [r1, #(16*3)]
+        strh   r2, [r1, #(16*5)]
+        strh   r2, [r1, #(16*7)]
+        pop    {pc}
+endfunc
+
+/*
+  Compute IDCT of single column, read as row.
+  r0 = source
+  r1 = dest
+*/
+function idct_col_armv6
+        push   {r1, lr}
+
+        ldr    r2, [r0]              /* r2 = row[2,0] */
+        ldr    ip, =W42              /* ip = W4 | (W2 << 16) */
+        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
+        idct_row COL_SHIFT
+        pop    {r1}
+        idct_finish_shift COL_SHIFT
+
+        strh   r4, [r1]
+        strh   r5, [r1, #(16*1)]
+        strh   r6, [r1, #(16*2)]
+        strh   r7, [r1, #(16*3)]
+        strh   r11,[r1, #(16*4)]
+        strh   r10,[r1, #(16*5)]
+        strh   r9, [r1, #(16*6)]
+        strh   r8, [r1, #(16*7)]
+
+        pop    {pc}
+endfunc
+
+/*
+  Compute IDCT of single column, read as row, store saturated 8-bit.
+  r0 = source
+  r1 = dest
+  r2 = line size
+*/
+function idct_col_put_armv6
+        push   {r1, r2, lr}
+
+        ldr    r2, [r0]              /* r2 = row[2,0] */
+        ldr    ip, =W42              /* ip = W4 | (W2 << 16) */
+        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
+        idct_row COL_SHIFT
+        pop    {r1, r2}
+        idct_finish_shift_sat COL_SHIFT
+
+        strb_post r4, r1, r2
+        strb_post r5, r1, r2
+        strb_post r6, r1, r2
+        strb_post r7, r1, r2
+        strb_post r11,r1, r2
+        strb_post r10,r1, r2
+        strb_post r9, r1, r2
+        strb_post r8, r1, r2
+
+        sub    r1, r1, r2, lsl #3
+
+        pop    {pc}
+endfunc
+
+/*
+  Compute IDCT of single column, read as row, add/store saturated 8-bit.
+  r0 = source
+  r1 = dest
+  r2 = line size
+*/
+function idct_col_add_armv6
+        push   {r1, r2, lr}
+
+        ldr    r2, [r0]              /* r2 = row[2,0] */
+        ldr    ip, =W42              /* ip = W4 | (W2 << 16) */
+        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
+        idct_row COL_SHIFT
+        pop    {r1, r2}
+        idct_finish
+
+        ldrb   r3, [r1]
+        ldrb   r7, [r1, r2]
+        ldrb   r11,[r1, r2, lsl #2]
+        add    ip, r3, ip, asr #COL_SHIFT
+        usat   ip, #8, ip
+        add    r4, r7, r4, asr #COL_SHIFT
+        strb_post ip, r1, r2
+        ldrb   ip, [r1, r2]
+        usat   r4, #8, r4
+        ldrb   r11,[r1, r2, lsl #2]
+        add    r5, ip, r5, asr #COL_SHIFT
+        usat   r5, #8, r5
+        strb_post r4, r1, r2
+        ldrb   r3, [r1, r2]
+        ldrb   ip, [r1, r2, lsl #2]
+        strb_post r5, r1, r2
+        ldrb   r7, [r1, r2]
+        ldrb   r4, [r1, r2, lsl #2]
+        add    r6, r3, r6, asr #COL_SHIFT
+        usat   r6, #8, r6
+        add    r10,r7, r10,asr #COL_SHIFT
+        usat   r10,#8, r10
+        add    r9, r11,r9, asr #COL_SHIFT
+        usat   r9, #8, r9
+        add    r8, ip, r8, asr #COL_SHIFT
+        usat   r8, #8, r8
+        add    lr, r4, lr, asr #COL_SHIFT
+        usat   lr, #8, lr
+        strb_post r6, r1, r2
+        strb_post r10,r1, r2
+        strb_post r9, r1, r2
+        strb_post r8, r1, r2
+        strb_post lr, r1, r2
+
+        sub    r1, r1, r2, lsl #3
+
+        pop    {pc}
+endfunc
+
+/*
+  Compute 8 IDCT row transforms.
+  func = IDCT row->col function
+  width = width of columns in bytes
+*/
+        .macro idct_rows func width
+        bl     \func
+        add    r0, r0, #(16*2)
+        add    r1, r1, #\width
+        bl     \func
+        add    r0, r0, #(16*2)
+        add    r1, r1, #\width
+        bl     \func
+        add    r0, r0, #(16*2)
+        add    r1, r1, #\width
+        bl     \func
+        sub    r0, r0, #(16*5)
+        add    r1, r1, #\width
+        bl     \func
+        add    r0, r0, #(16*2)
+        add    r1, r1, #\width
+        bl     \func
+        add    r0, r0, #(16*2)
+        add    r1, r1, #\width
+        bl     \func
+        add    r0, r0, #(16*2)
+        add    r1, r1, #\width
+        bl     \func
+
+        sub    r0, r0, #(16*7)
+        .endm
+
+/* void ff_simple_idct_armv6(int16_t *data); */
+function ff_simple_idct_armv6, export=1
+        push   {r4-r11, lr}
+        sub    sp, sp, #128
+
+        mov    r1, sp
+        idct_rows idct_row_armv6, 2
+        mov    r1, r0
+        mov    r0, sp
+        idct_rows idct_col_armv6, 2
+
+        add    sp, sp, #128
+        pop    {r4-r11, pc}
+endfunc
+
+/* ff_simple_idct_add_armv6(uint8_t *dest, ptrdiff_t line_size, int16_t *data); */
+function ff_simple_idct_add_armv6, export=1
+        push   {r0, r1, r4-r11, lr}
+        sub    sp, sp, #128
+
+        mov    r0, r2
+        mov    r1, sp
+        idct_rows idct_row_armv6, 2
+        mov    r0, sp
+        ldr    r1, [sp, #128]
+        ldr    r2, [sp, #(128+4)]
+        idct_rows idct_col_add_armv6, 1
+
+        add    sp, sp, #(128+8)
+        pop    {r4-r11, pc}
+endfunc
+
+/* ff_simple_idct_put_armv6(uint8_t *dest, ptrdiff_t line_size, int16_t *data); */
+function ff_simple_idct_put_armv6, export=1
+        push   {r0, r1, r4-r11, lr}
+        sub    sp, sp, #128
+
+        mov    r0, r2
+        mov    r1, sp
+        idct_rows idct_row_armv6, 2
+        mov    r0, sp
+        ldr    r1, [sp, #128]
+        ldr    r2, [sp, #(128+4)]
+        idct_rows idct_col_put_armv6, 1
+
+        add    sp, sp, #(128+8)
+        pop    {r4-r11, pc}
+endfunc
diff --git a/media/ffvpx/libavcodec/arm/simple_idct_neon.S b/media/ffvpx/libavcodec/arm/simple_idct_neon.S
new file mode 100644
index 0000000000..726d4cbefa
--- /dev/null
+++ b/media/ffvpx/libavcodec/arm/simple_idct_neon.S
@@ -0,0 +1,375 @@
+/*
+ * ARM NEON IDCT
+ *
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * Based on Simple IDCT
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+#define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4c ((1<<(COL_SHIFT-1))/W4)
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+#define w1 d0[0]
+#define w2 d0[1]
+#define w3 d0[2]
+#define w4 d0[3]
+#define w5 d1[0]
+#define w6 d1[1]
+#define w7 d1[2]
+#define w4c d1[3]
+
+        .macro idct_col4_top
+        vmull.s16       q7,  d6,  w2    /* q9   = W2 * col[2] */
+        vmull.s16       q8,  d6,  w6    /* q10  = W6 * col[2] */
+        vmull.s16       q9,  d4,  w1    /* q9   = W1 * col[1] */
+        vadd.i32        q11, q15, q7
+        vmull.s16       q10, d4,  w3    /* q10  = W3 * col[1] */
+        vadd.i32        q12, q15, q8
+        vmull.s16       q5,  d4,  w5    /* q5   = W5 * col[1] */
+        vsub.i32        q13, q15, q8
+        vmull.s16       q6,  d4,  w7    /* q6   = W7 * col[1] */
+        vsub.i32        q14, q15, q7
+
+        vmlal.s16       q9,  d8,  w3    /* q9  += W3 * col[3] */
+        vmlsl.s16       q10, d8,  w7    /* q10 -= W7 * col[3] */
+        vmlsl.s16       q5,  d8,  w1    /* q5  -= W1 * col[3] */
+        vmlsl.s16       q6,  d8,  w5    /* q6  -= W5 * col[3] */
+        .endm
+
+        .text
+        .align 6
+
+function idct_row4_pld_neon
+        pld             [r0]
+        add             r3,  r0,  r1,  lsl #2
+        pld             [r0, r1]
+        pld             [r0, r1, lsl #1]
+A       pld             [r3, -r1]
+        pld             [r3]
+        pld             [r3, r1]
+        add             r3,  r3,  r1,  lsl #1
+        pld             [r3]
+        pld             [r3, r1]
+endfunc
+
+function idct_row4_neon
+        vmov.i32        q15, #(1<<(ROW_SHIFT-1))
+        vld1.64         {d2-d5},  [r2,:128]!
+        vmlal.s16       q15, d2,  w4    /* q15  += W4 * col[0] */
+        vld1.64         {d6,d7},  [r2,:128]!
+        vorr            d10, d3,  d5
+        vld1.64         {d8,d9},  [r2,:128]!
+        add             r2,  r2,  #-64
+
+        vorr            d11, d7,  d9
+        vorr            d10, d10, d11
+        vmov            r3,  r4,  d10
+
+        idct_col4_top
+
+        orrs            r3,  r3,  r4
+        beq             1f
+
+        vmull.s16       q7,  d3,  w4    /* q7   = W4 * col[4] */
+        vmlal.s16       q9,  d5,  w5    /* q9  += W5 * col[5] */
+        vmlsl.s16       q10, d5,  w1    /* q10 -= W1 * col[5] */
+        vmull.s16       q8,  d7,  w2    /* q8   = W2 * col[6] */
+        vmlal.s16       q5,  d5,  w7    /* q5  += W7 * col[5] */
+        vadd.i32        q11, q11, q7
+        vsub.i32        q12, q12, q7
+        vsub.i32        q13, q13, q7
+        vadd.i32        q14, q14, q7
+        vmlal.s16       q6,  d5,  w3    /* q6  += W3 * col[5] */
+        vmull.s16       q7,  d7,  w6    /* q7   = W6 * col[6] */
+        vmlal.s16       q9,  d9,  w7
+        vmlsl.s16       q10, d9,  w5
+        vmlal.s16       q5,  d9,  w3
+        vmlsl.s16       q6,  d9,  w1
+        vadd.i32        q11, q11, q7
+        vsub.i32        q12, q12, q8
+        vadd.i32        q13, q13, q8
+        vsub.i32        q14, q14, q7
+
+1:      vadd.i32        q3,  q11, q9
+        vadd.i32        q4,  q12, q10
+        vshrn.i32       d2,  q3,  #ROW_SHIFT
+        vshrn.i32       d4,  q4,  #ROW_SHIFT
+        vadd.i32        q7,  q13, q5
+        vadd.i32        q8,  q14, q6
+        vtrn.16         d2,  d4
+        vshrn.i32       d6,  q7,  #ROW_SHIFT
+        vshrn.i32       d8,  q8,  #ROW_SHIFT
+        vsub.i32        q14, q14, q6
+        vsub.i32        q11, q11, q9
+        vtrn.16         d6,  d8
+        vsub.i32        q13, q13, q5
+        vshrn.i32       d3,  q14, #ROW_SHIFT
+        vtrn.32         d2,  d6
+        vsub.i32        q12, q12, q10
+        vtrn.32         d4,  d8
+        vshrn.i32       d5,  q13, #ROW_SHIFT
+        vshrn.i32       d7,  q12, #ROW_SHIFT
+        vshrn.i32       d9,  q11, #ROW_SHIFT
+
+        vtrn.16         d3,  d5
+        vtrn.16         d7,  d9
+        vtrn.32         d3,  d7
+        vtrn.32         d5,  d9
+
+        vst1.64         {d2-d5},  [r2,:128]!
+        vst1.64         {d6-d9},  [r2,:128]!
+
+        bx              lr
+endfunc
+
+function idct_col4_neon
+        mov             ip,  #16
+        vld1.64         {d2}, [r2,:64], ip /* d2 = col[0] */
+        vdup.16         d30, w4c
+        vld1.64         {d4}, [r2,:64], ip /* d3 = col[1] */
+        vadd.i16        d30, d30, d2
+        vld1.64         {d6}, [r2,:64], ip /* d4 = col[2] */
+        vmull.s16       q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/
+        vld1.64         {d8}, [r2,:64], ip /* d5 = col[3] */
+
+        ldrd            r4,  r5,  [r2]
+        ldrd            r6,  r7,  [r2, #16]
+        orrs            r4,  r4,  r5
+
+        idct_col4_top
+        it              eq
+        addeq           r2,  r2,  #16
+        beq             1f
+
+        vld1.64         {d3}, [r2,:64], ip /* d6 = col[4] */
+        vmull.s16       q7,  d3,  w4    /* q7   = W4 * col[4] */
+        vadd.i32        q11, q11, q7
+        vsub.i32        q12, q12, q7
+        vsub.i32        q13, q13, q7
+        vadd.i32        q14, q14, q7
+
+1:      orrs            r6,  r6,  r7
+        ldrd            r4,  r5,  [r2, #16]
+        it              eq
+        addeq           r2,  r2,  #16
+        beq             2f
+
+        vld1.64         {d5}, [r2,:64], ip /* d7 = col[5] */
+        vmlal.s16       q9,  d5,  w5    /* q9  += W5 * col[5] */
+        vmlsl.s16       q10, d5,  w1    /* q10 -= W1 * col[5] */
+        vmlal.s16       q5,  d5,  w7    /* q5  += W7 * col[5] */
+        vmlal.s16       q6,  d5,  w3    /* q6  += W3 * col[5] */
+
+2:      orrs            r4,  r4,  r5
+        ldrd            r4,  r5,  [r2, #16]
+        it              eq
+        addeq           r2,  r2,  #16
+        beq             3f
+
+        vld1.64         {d7}, [r2,:64], ip /* d8 = col[6] */
+        vmull.s16       q7,  d7,  w6    /* q7   = W6 * col[6] */
+        vmull.s16       q8,  d7,  w2    /* q8   = W2 * col[6] */
+        vadd.i32        q11, q11, q7
+        vsub.i32        q14, q14, q7
+        vsub.i32        q12, q12, q8
+        vadd.i32        q13, q13, q8
+
+3:      orrs            r4,  r4,  r5
+        it              eq
+        addeq           r2,  r2,  #16
+        beq             4f
+
+        vld1.64         {d9}, [r2,:64], ip /* d9 = col[7] */
+        vmlal.s16       q9,  d9,  w7
+        vmlsl.s16       q10, d9,  w5
+        vmlal.s16       q5,  d9,  w3
+        vmlsl.s16       q6,  d9,  w1
+
+4:      vaddhn.i32      d2,  q11, q9
+        vaddhn.i32      d3,  q12, q10
+        vaddhn.i32      d4,  q13, q5
+        vaddhn.i32      d5,  q14, q6
+        vsubhn.i32      d9,  q11, q9
+        vsubhn.i32      d8,  q12, q10
+        vsubhn.i32      d7,  q13, q5
+        vsubhn.i32      d6,  q14, q6
+
+        bx              lr
+endfunc
+
+        .align 6
+
+function idct_col4_st8_neon
+        vqshrun.s16     d2,  q1,  #COL_SHIFT-16
+        vqshrun.s16     d3,  q2,  #COL_SHIFT-16
+        vqshrun.s16     d4,  q3,  #COL_SHIFT-16
+        vqshrun.s16     d5,  q4,  #COL_SHIFT-16
+        vst1.32         {d2[0]}, [r0,:32], r1
+        vst1.32         {d2[1]}, [r0,:32], r1
+        vst1.32         {d3[0]}, [r0,:32], r1
+        vst1.32         {d3[1]}, [r0,:32], r1
+        vst1.32         {d4[0]}, [r0,:32], r1
+        vst1.32         {d4[1]}, [r0,:32], r1
+        vst1.32         {d5[0]}, [r0,:32], r1
+        vst1.32         {d5[1]}, [r0,:32], r1
+
+        bx              lr
+endfunc
+
+const   idct_coeff_neon, align=4
+        .short W1, W2, W3, W4, W5, W6, W7, W4c
+endconst
+
+        .macro idct_start data
+        push            {r4-r7, lr}
+        pld             [\data]
+        pld             [\data, #64]
+        vpush           {d8-d15}
+        movrel          r3,  idct_coeff_neon
+        vld1.64         {d0,d1}, [r3,:128]
+        .endm
+
+        .macro idct_end
+        vpop            {d8-d15}
+        pop             {r4-r7, pc}
+        .endm
+
+/* void ff_simple_idct_put_neon(uint8_t *dst, ptrdiff_t line_size, int16_t *data); */
+function ff_simple_idct_put_neon, export=1
+        idct_start      r2
+
+        bl              idct_row4_pld_neon
+        bl              idct_row4_neon
+        add             r2,  r2,  #-128
+        bl              idct_col4_neon
+        bl              idct_col4_st8_neon
+        sub             r0,  r0,  r1, lsl #3
+        add             r0,  r0,  #4
+        add             r2,  r2,  #-120
+        bl              idct_col4_neon
+        bl              idct_col4_st8_neon
+
+        idct_end
+endfunc
+
+        .align 6
+
+function idct_col4_add8_neon
+        mov             ip,  r0
+
+        vld1.32         {d10[0]}, [r0,:32], r1
+        vshr.s16        q1,  q1,  #COL_SHIFT-16
+        vld1.32         {d10[1]}, [r0,:32], r1
+        vshr.s16        q2,  q2,  #COL_SHIFT-16
+        vld1.32         {d11[0]}, [r0,:32], r1
+        vshr.s16        q3,  q3,  #COL_SHIFT-16
+        vld1.32         {d11[1]}, [r0,:32], r1
+        vshr.s16        q4,  q4,  #COL_SHIFT-16
+        vld1.32         {d12[0]}, [r0,:32], r1
+        vaddw.u8        q1,  q1,  d10
+        vld1.32         {d12[1]}, [r0,:32], r1
+        vaddw.u8        q2,  q2,  d11
+        vld1.32         {d13[0]}, [r0,:32], r1
+        vqmovun.s16     d2,  q1
+        vld1.32         {d13[1]}, [r0,:32], r1
+        vaddw.u8        q3,  q3,  d12
+        vst1.32         {d2[0]},  [ip,:32], r1
+        vqmovun.s16     d3,  q2
+        vst1.32         {d2[1]},  [ip,:32], r1
+        vaddw.u8        q4,  q4,  d13
+        vst1.32         {d3[0]},  [ip,:32], r1
+        vqmovun.s16     d4,  q3
+        vst1.32         {d3[1]},  [ip,:32], r1
+        vqmovun.s16     d5,  q4
+        vst1.32         {d4[0]},  [ip,:32], r1
+        vst1.32         {d4[1]},  [ip,:32], r1
+        vst1.32         {d5[0]},  [ip,:32], r1
+        vst1.32         {d5[1]},  [ip,:32], r1
+
+        bx              lr
+endfunc
+
+/* void ff_simple_idct_add_neon(uint8_t *dst, ptrdiff_t line_size, int16_t *data); */
+function ff_simple_idct_add_neon, export=1
+        idct_start      r2
+
+        bl              idct_row4_pld_neon
+        bl              idct_row4_neon
+        add             r2,  r2,  #-128
+        bl              idct_col4_neon
+        bl              idct_col4_add8_neon
+        sub             r0,  r0,  r1, lsl #3
+        add             r0,  r0,  #4
+        add             r2,  r2,  #-120
+        bl              idct_col4_neon
+        bl              idct_col4_add8_neon
+
+        idct_end
+endfunc
+
+        .align 6
+
+function idct_col4_st16_neon
+        mov             ip,  #16
+
+        vshr.s16        q1,  q1,  #COL_SHIFT-16
+        vshr.s16        q2,  q2,  #COL_SHIFT-16
+        vst1.64         {d2}, [r2,:64], ip
+        vshr.s16        q3,  q3,  #COL_SHIFT-16
+        vst1.64         {d3}, [r2,:64], ip
+        vshr.s16        q4,  q4,  #COL_SHIFT-16
+        vst1.64         {d4}, [r2,:64], ip
+        vst1.64         {d5}, [r2,:64], ip
+        vst1.64         {d6}, [r2,:64], ip
+        vst1.64         {d7}, [r2,:64], ip
+        vst1.64         {d8}, [r2,:64], ip
+        vst1.64         {d9}, [r2,:64], ip
+
+        bx              lr
+endfunc
+
+/* void ff_simple_idct_neon(int16_t *data); */
+function ff_simple_idct_neon, export=1
+        idct_start      r0
+
+        mov             r2,  r0
+        bl              idct_row4_neon
+        bl              idct_row4_neon
+        add             r2,  r2,  #-128
+        bl              idct_col4_neon
+        add             r2,  r2,  #-128
+        bl              idct_col4_st16_neon
+        add             r2,  r2,  #-120
+        bl              idct_col4_neon
+        add             r2,  r2,  #-128
+        bl              idct_col4_st16_neon
+
+        idct_end
+endfunc
diff --git a/media/ffvpx/libavcodec/atsc_a53.c b/media/ffvpx/libavcodec/atsc_a53.c
new file mode 100644
index 0000000000..29ec71bc5f
--- /dev/null
+++ b/media/ffvpx/libavcodec/atsc_a53.c
@@ -0,0 +1,119 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "atsc_a53.h"
+#include "get_bits.h"
+
+int ff_alloc_a53_sei(const AVFrame *frame, size_t prefix_len,
+                     void **data, size_t *sei_size)
+{
+    AVFrameSideData *side_data = NULL;
+    uint8_t *sei_data;
+
+    if (frame)
+        side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC);
+
+    if (!side_data) {
+        *data = NULL;
+        return 0;
+    }
+
+    *sei_size = side_data->size + 11;
+    *data = av_mallocz(*sei_size + prefix_len);
+    if (!*data)
+        return AVERROR(ENOMEM);
+    sei_data = (uint8_t*)*data + prefix_len;
+
+    // country code
+    sei_data[0] = 181;
+    sei_data[1] = 0;
+    sei_data[2] = 49;
+
+    /**
+     * 'GA94' is standard in North America for ATSC, but hard coding
+     * this style may not be the right thing to do -- other formats
+     * do exist. This information is not available in the side_data
+     * so we are going with this right now.
+     */
+    AV_WL32(sei_data + 3, MKTAG('G', 'A', '9', '4'));
+    sei_data[7] = 3;
+    sei_data[8] = ((side_data->size/3) & 0x1f) | 0x40;
+    sei_data[9] = 0;
+
+    memcpy(sei_data + 10, side_data->data, side_data->size);
+
+    sei_data[side_data->size+10] = 255;
+
+    return 0;
+}
+
+int ff_parse_a53_cc(AVBufferRef **pbuf, const uint8_t *data, int size)
+{
+    AVBufferRef *buf = *pbuf;
+    GetBitContext gb;
+    size_t new_size, old_size = buf ? buf->size : 0;
+    int ret, cc_count;
+
+    if (size < 3)
+        return AVERROR_INVALIDDATA;
+
+    ret = init_get_bits8(&gb, data, size);
+    if (ret < 0)
+        return ret;
+
+    if (get_bits(&gb, 8) != 0x3) // user_data_type_code
+        return 0;
+
+    skip_bits(&gb, 1); // reserved
+    if (!get_bits(&gb, 1)) // process_cc_data_flag
+        return 0;
+
+    skip_bits(&gb, 1); // zero bit
+    cc_count = get_bits(&gb, 5);
+    if (!cc_count)
+        return 0;
+
+    skip_bits(&gb, 8); // reserved
+
+    /* 3 bytes per CC plus one byte marker_bits at the end */
+    if (cc_count * 3 >= (get_bits_left(&gb) >> 3))
+        return AVERROR_INVALIDDATA;
+
+    new_size = (old_size + cc_count * 3);
+
+    if (new_size > INT_MAX)
+        return AVERROR_INVALIDDATA;
+
+    /* Allow merging of the cc data from two fields. */
+    ret = av_buffer_realloc(pbuf, new_size);
+    if (ret < 0)
+        return ret;
+
+    buf = *pbuf;
+    /* Use of av_buffer_realloc assumes buffer is writeable */
+    for (int i = 0; i < cc_count; i++) {
+        buf->data[old_size++] = get_bits(&gb, 8);
+        buf->data[old_size++] = get_bits(&gb, 8);
+        buf->data[old_size++] = get_bits(&gb, 8);
+    }
+
+    return cc_count;
+}
diff --git a/media/ffvpx/libavcodec/atsc_a53.h b/media/ffvpx/libavcodec/atsc_a53.h
new file mode 100644
index 0000000000..0622a55549
--- /dev/null
+++ b/media/ffvpx/libavcodec/atsc_a53.h
@@ -0,0 +1,56 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ATSC_A53_H
+#define AVCODEC_ATSC_A53_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/buffer.h"
+#include "libavutil/frame.h"
+
+/**
+ * Check AVFrame for A53 side data and allocate and fill SEI message with A53 info
+ *
+ * @param frame      Raw frame to get A53 side data from
+ * @param prefix_len Number of bytes to allocate before SEI message
+ * @param data       Pointer to a variable to store allocated memory
+ *                   Upon return the variable will hold NULL on error or if frame has no A53 info.
+ *                   Otherwise it will point to prefix_len uninitialized bytes followed by
+ *                   *sei_size SEI message
+ * @param sei_size   Pointer to a variable to store generated SEI message length
+ * @return           Zero on success, negative error code on failure
+ */
+int ff_alloc_a53_sei(const AVFrame *frame, size_t prefix_len,
+                     void **data, size_t *sei_size);
+
+/**
+ * Parse a data array for ATSC A53 Part 4 Closed Captions and store them in an AVBufferRef.
+ *
+ * @param pbuf Pointer to an AVBufferRef to append the closed captions. *pbuf may be NULL, in
+ *             which case a new buffer will be allocated and put in it.
+ * @param data The data array containing the raw A53 data.
+ * @param size Size of the data array in bytes.
+ *
+ * @return Number of closed captions parsed on success, negative error code on failure.
+ *         If no Closed Captions are parsed, *pbuf is untouched.
+ */
+int ff_parse_a53_cc(AVBufferRef **pbuf, const uint8_t *data, int size);
+
+#endif /* AVCODEC_ATSC_A53_H */
diff --git a/media/ffvpx/libavcodec/av1.h b/media/ffvpx/libavcodec/av1.h
new file mode 100644
index 0000000000..384f7cddc7
--- /dev/null
+++ b/media/ffvpx/libavcodec/av1.h
@@ -0,0 +1,184 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AV1 common definitions
+ */
+
+#ifndef AVCODEC_AV1_H
+#define AVCODEC_AV1_H
+
+// OBU types (section 6.2.2).
+typedef enum {
+    // 0 reserved.
+    AV1_OBU_SEQUENCE_HEADER        = 1,
+    AV1_OBU_TEMPORAL_DELIMITER     = 2,
+    AV1_OBU_FRAME_HEADER           = 3,
+    AV1_OBU_TILE_GROUP             = 4,
+    AV1_OBU_METADATA               = 5,
+    AV1_OBU_FRAME                  = 6,
+    AV1_OBU_REDUNDANT_FRAME_HEADER = 7,
+    AV1_OBU_TILE_LIST              = 8,
+    // 9-14 reserved.
+    AV1_OBU_PADDING                = 15,
+} AV1_OBU_Type;
+
+// Metadata types (section 6.7.1).
+enum {
+    AV1_METADATA_TYPE_HDR_CLL     = 1,
+    AV1_METADATA_TYPE_HDR_MDCV    = 2,
+    AV1_METADATA_TYPE_SCALABILITY = 3,
+    AV1_METADATA_TYPE_ITUT_T35    = 4,
+    AV1_METADATA_TYPE_TIMECODE    = 5,
+};
+
+// Frame types (section 6.8.2).
+enum {
+    AV1_FRAME_KEY        = 0,
+    AV1_FRAME_INTER      = 1,
+    AV1_FRAME_INTRA_ONLY = 2,
+    AV1_FRAME_SWITCH     = 3,
+};
+
+// Reference frames (section 6.10.24).
+enum {
+    AV1_REF_FRAME_INTRA   = 0,
+    AV1_REF_FRAME_LAST    = 1,
+    AV1_REF_FRAME_LAST2   = 2,
+    AV1_REF_FRAME_LAST3   = 3,
+    AV1_REF_FRAME_GOLDEN  = 4,
+    AV1_REF_FRAME_BWDREF  = 5,
+    AV1_REF_FRAME_ALTREF2 = 6,
+    AV1_REF_FRAME_ALTREF  = 7,
+};
+
+// Constants (section 3).
+enum {
+    AV1_MAX_OPERATING_POINTS = 32,
+
+    AV1_MAX_SB_SIZE    = 128,
+    AV1_MI_SIZE        = 4,
+
+    AV1_MAX_TILE_WIDTH = 4096,
+    AV1_MAX_TILE_AREA  = 4096 * 2304,
+    AV1_MAX_TILE_ROWS  = 64,
+    AV1_MAX_TILE_COLS  = 64,
+
+    AV1_NUM_REF_FRAMES       = 8,
+    AV1_REFS_PER_FRAME       = 7,
+    AV1_TOTAL_REFS_PER_FRAME = 8,
+    AV1_PRIMARY_REF_NONE     = 7,
+
+    AV1_MAX_SEGMENTS = 8,
+    AV1_SEG_LVL_MAX  = 8,
+
+    AV1_SEG_LVL_ALT_Q      = 0,
+    AV1_SEG_LVL_ALT_LF_Y_V = 1,
+    AV1_SEG_LVL_REF_FRAME  = 5,
+    AV1_SEG_LVL_SKIP       = 6,
+    AV1_SEG_LVL_GLOBAL_MV  = 7,
+
+    AV1_SELECT_SCREEN_CONTENT_TOOLS = 2,
+    AV1_SELECT_INTEGER_MV           = 2,
+
+    AV1_SUPERRES_NUM       = 8,
+    AV1_SUPERRES_DENOM_MIN = 9,
+
+    AV1_INTERPOLATION_FILTER_SWITCHABLE = 4,
+
+    AV1_GM_ABS_ALPHA_BITS       = 12,
+    AV1_GM_ALPHA_PREC_BITS      = 15,
+    AV1_GM_ABS_TRANS_ONLY_BITS  = 9,
+    AV1_GM_TRANS_ONLY_PREC_BITS = 3,
+    AV1_GM_ABS_TRANS_BITS       = 12,
+    AV1_GM_TRANS_PREC_BITS      = 6,
+    AV1_WARPEDMODEL_PREC_BITS   = 16,
+
+    AV1_WARP_MODEL_IDENTITY    = 0,
+    AV1_WARP_MODEL_TRANSLATION = 1,
+    AV1_WARP_MODEL_ROTZOOM     = 2,
+    AV1_WARP_MODEL_AFFINE      = 3,
+    AV1_WARP_PARAM_REDUCE_BITS = 6,
+
+    AV1_DIV_LUT_BITS      = 8,
+    AV1_DIV_LUT_PREC_BITS = 14,
+    AV1_DIV_LUT_NUM       = 257,
+
+    AV1_MAX_LOOP_FILTER = 63,
+};
+
+
+// The main colour configuration information uses the same ISO/IEC 23001-8
+// (H.273) enums as FFmpeg does, so separate definitions are not required.
+
+// Chroma sample position.
+enum {
+    AV1_CSP_UNKNOWN   = 0,
+    AV1_CSP_VERTICAL  = 1, // -> AVCHROMA_LOC_LEFT.
+    AV1_CSP_COLOCATED = 2, // -> AVCHROMA_LOC_TOPLEFT.
+};
+
+// Scalability modes (section 6.7.5)
+enum {
+    AV1_SCALABILITY_L1T2 = 0,
+    AV1_SCALABILITY_L1T3 = 1,
+    AV1_SCALABILITY_L2T1 = 2,
+    AV1_SCALABILITY_L2T2 = 3,
+    AV1_SCALABILITY_L2T3 = 4,
+    AV1_SCALABILITY_S2T1 = 5,
+    AV1_SCALABILITY_S2T2 = 6,
+    AV1_SCALABILITY_S2T3 = 7,
+    AV1_SCALABILITY_L2T1h = 8,
+    AV1_SCALABILITY_L2T2h = 9,
+    AV1_SCALABILITY_L2T3h = 10,
+    AV1_SCALABILITY_S2T1h = 11,
+    AV1_SCALABILITY_S2T2h = 12,
+    AV1_SCALABILITY_S2T3h = 13,
+    AV1_SCALABILITY_SS = 14,
+    AV1_SCALABILITY_L3T1 = 15,
+    AV1_SCALABILITY_L3T2 = 16,
+    AV1_SCALABILITY_L3T3 = 17,
+    AV1_SCALABILITY_S3T1 = 18,
+    AV1_SCALABILITY_S3T2 = 19,
+    AV1_SCALABILITY_S3T3 = 20,
+    AV1_SCALABILITY_L3T2_KEY = 21,
+    AV1_SCALABILITY_L3T3_KEY = 22,
+    AV1_SCALABILITY_L4T5_KEY = 23,
+    AV1_SCALABILITY_L4T7_KEY = 24,
+    AV1_SCALABILITY_L3T2_KEY_SHIFT = 25,
+    AV1_SCALABILITY_L3T3_KEY_SHIFT = 26,
+    AV1_SCALABILITY_L4T5_KEY_SHIFT = 27,
+    AV1_SCALABILITY_L4T7_KEY_SHIFT = 28,
+};
+
+// Frame Restoration types (section 6.10.15)
+enum {
+    AV1_RESTORE_NONE       = 0,
+    AV1_RESTORE_WIENER     = 1,
+    AV1_RESTORE_SGRPROJ    = 2,
+    AV1_RESTORE_SWITCHABLE = 3,
+};
+
+// Sequence Headers are actually unbounded because one can use
+// an arbitrary number of leading zeroes when encoding via uvlc.
+// The following estimate is based around using the lowest number
+// of bits for uvlc encoding.
+#define AV1_SANE_SEQUENCE_HEADER_MAX_BITS           3138
+
+#endif /* AVCODEC_AV1_H */
diff --git a/media/ffvpx/libavcodec/av1_frame_split_bsf.c b/media/ffvpx/libavcodec/av1_frame_split_bsf.c
new file mode 100644
index 0000000000..5f6a40316c
--- /dev/null
+++ b/media/ffvpx/libavcodec/av1_frame_split_bsf.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2019 James Almer <jamrial@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * This bitstream filter splits AV1 Temporal Units into packets containing
+ * just one frame, plus any leading and trailing OBUs that may be present at
+ * the beginning or end, respectively.
+ *
+ * Temporal Units already containing only one frame will be passed through
+ * unchanged. When splitting can't be performed, the Temporal Unit will be
+ * passed through containing only the remaining OBUs starting from the first
+ * one after the last successfully split frame.
+ */
+
+#include "libavutil/avassert.h"
+
+#include "bsf.h"
+#include "bsf_internal.h"
+#include "cbs.h"
+#include "cbs_av1.h"
+
+typedef struct AV1FSplitContext {
+    AVPacket *buffer_pkt;
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment temporal_unit;
+
+    int nb_frames;
+    int cur_frame;
+    int cur_frame_idx;
+    int last_frame_idx;
+} AV1FSplitContext;
+
+static int av1_frame_split_filter(AVBSFContext *ctx, AVPacket *out)
+{
+    AV1FSplitContext *s = ctx->priv_data;
+    CodedBitstreamFragment *td = &s->temporal_unit;
+    int i, ret;
+    int split = !!s->buffer_pkt->data;
+
+    if (!s->buffer_pkt->data) {
+        int nb_frames = 0;
+
+        ret = ff_bsf_get_packet_ref(ctx, s->buffer_pkt);
+        if (ret < 0)
+            return ret;
+
+        ret = ff_cbs_read_packet(s->cbc, td, s->buffer_pkt);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_WARNING, "Failed to parse temporal unit.\n");
+            goto passthrough;
+        }
+
+        for (i = 0; i < td->nb_units; i++) {
+            CodedBitstreamUnit *unit = &td->units[i];
+
+            if (unit->type == AV1_OBU_FRAME ||
+                unit->type == AV1_OBU_FRAME_HEADER)
+                nb_frames++;
+            else if (unit->type == AV1_OBU_TILE_LIST) {
+                av_log(ctx, AV_LOG_VERBOSE, "Large scale tiles are unsupported.\n");
+                goto passthrough;
+            }
+        }
+        if (nb_frames > 1) {
+            s->cur_frame = 0;
+            s->cur_frame_idx = s->last_frame_idx = 0;
+            s->nb_frames = nb_frames;
+            split = 1;
+        }
+    }
+
+    if (split) {
+        AV1RawFrameHeader *frame = NULL;
+        int cur_frame_type = -1, size = 0;
+
+        for (i = s->cur_frame_idx; i < td->nb_units; i++) {
+            CodedBitstreamUnit *unit = &td->units[i];
+
+            size += unit->data_size;
+            if (unit->type == AV1_OBU_FRAME) {
+                AV1RawOBU *obu = unit->content;
+
+                if (frame) {
+                    av_log(ctx, AV_LOG_WARNING, "Frame OBU found when Tile data for a "
+                                                "previous frame was expected.\n");
+                    goto passthrough;
+                }
+
+                frame = &obu->obu.frame.header;
+                cur_frame_type = obu->header.obu_type;
+                s->last_frame_idx = s->cur_frame_idx;
+                s->cur_frame_idx  = i + 1;
+                s->cur_frame++;
+
+                // split here unless it's the last frame, in which case
+                // include every trailing OBU
+                if (s->cur_frame < s->nb_frames)
+                    break;
+            } else if (unit->type == AV1_OBU_FRAME_HEADER) {
+                AV1RawOBU *obu = unit->content;
+
+                if (frame) {
+                    av_log(ctx, AV_LOG_WARNING, "Frame Header OBU found when Tile data for a "
+                                                "previous frame was expected.\n");
+                    goto passthrough;
+                }
+
+                frame = &obu->obu.frame_header;
+                cur_frame_type = obu->header.obu_type;
+                s->last_frame_idx = s->cur_frame_idx;
+                s->cur_frame++;
+
+                // split here if show_existing_frame unless it's the last
+                // frame, in which case include every trailing OBU
+                if (frame->show_existing_frame &&
+                    s->cur_frame < s->nb_frames) {
+                    s->cur_frame_idx = i + 1;
+                    break;
+                }
+            } else if (unit->type == AV1_OBU_TILE_GROUP) {
+                AV1RawOBU *obu = unit->content;
+                AV1RawTileGroup *group = &obu->obu.tile_group;
+
+                if (!frame || cur_frame_type != AV1_OBU_FRAME_HEADER) {
+                    av_log(ctx, AV_LOG_WARNING, "Unexpected Tile Group OBU found before a "
+                                                "Frame Header.\n");
+                    goto passthrough;
+                }
+
+                if ((group->tg_end == (frame->tile_cols * frame->tile_rows) - 1) &&
+                    // include every trailing OBU with the last frame
+                    s->cur_frame < s->nb_frames) {
+                    s->cur_frame_idx = i + 1;
+                    break;
+                }
+            }
+        }
+        av_assert0(frame && s->cur_frame <= s->nb_frames);
+
+        ret = av_packet_ref(out, s->buffer_pkt);
+        if (ret < 0)
+            goto fail;
+
+        out->data = (uint8_t *)td->units[s->last_frame_idx].data;
+        out->size = size;
+
+        // skip the frame in the buffer packet if it's split successfully, so it's not present
+        // if the packet is passed through in case of failure when splitting another frame.
+        s->buffer_pkt->data += size;
+        s->buffer_pkt->size -= size;
+
+        if (!frame->show_existing_frame && !frame->show_frame)
+            out->pts = AV_NOPTS_VALUE;
+
+        if (s->cur_frame == s->nb_frames) {
+            av_packet_unref(s->buffer_pkt);
+            ff_cbs_fragment_reset(td);
+        }
+
+        return 0;
+    }
+
+passthrough:
+    av_packet_move_ref(out, s->buffer_pkt);
+
+    ret = 0;
+fail:
+    if (ret < 0) {
+        av_packet_unref(out);
+        av_packet_unref(s->buffer_pkt);
+    }
+    ff_cbs_fragment_reset(td);
+
+    return ret;
+}
+
+static const CodedBitstreamUnitType decompose_unit_types[] = {
+    AV1_OBU_TEMPORAL_DELIMITER,
+    AV1_OBU_SEQUENCE_HEADER,
+    AV1_OBU_FRAME_HEADER,
+    AV1_OBU_TILE_GROUP,
+    AV1_OBU_FRAME,
+};
+
+static int av1_frame_split_init(AVBSFContext *ctx)
+{
+    AV1FSplitContext *s = ctx->priv_data;
+    CodedBitstreamFragment *td = &s->temporal_unit;
+    int ret;
+
+    s->buffer_pkt = av_packet_alloc();
+    if (!s->buffer_pkt)
+        return AVERROR(ENOMEM);
+
+    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, ctx);
+    if (ret < 0)
+        return ret;
+
+    s->cbc->decompose_unit_types    = decompose_unit_types;
+    s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(decompose_unit_types);
+
+    if (!ctx->par_in->extradata_size)
+        return 0;
+
+    ret = ff_cbs_read_extradata(s->cbc, td, ctx->par_in);
+    if (ret < 0)
+        av_log(ctx, AV_LOG_WARNING, "Failed to parse extradata.\n");
+
+    ff_cbs_fragment_reset(td);
+
+    return 0;
+}
+
+static void av1_frame_split_flush(AVBSFContext *ctx)
+{
+    AV1FSplitContext *s = ctx->priv_data;
+
+    av_packet_unref(s->buffer_pkt);
+    ff_cbs_fragment_reset(&s->temporal_unit);
+}
+
+static void av1_frame_split_close(AVBSFContext *ctx)
+{
+    AV1FSplitContext *s = ctx->priv_data;
+
+    av_packet_free(&s->buffer_pkt);
+    ff_cbs_fragment_free(&s->temporal_unit);
+    ff_cbs_close(&s->cbc);
+}
+
+static const enum AVCodecID av1_frame_split_codec_ids[] = {
+    AV_CODEC_ID_AV1, AV_CODEC_ID_NONE,
+};
+
+const FFBitStreamFilter ff_av1_frame_split_bsf = {
+    .p.name         = "av1_frame_split",
+    .p.codec_ids    = av1_frame_split_codec_ids,
+    .priv_data_size = sizeof(AV1FSplitContext),
+    .init           = av1_frame_split_init,
+    .flush          = av1_frame_split_flush,
+    .close          = av1_frame_split_close,
+    .filter         = av1_frame_split_filter,
+};
diff --git a/media/ffvpx/libavcodec/av1_parse.h b/media/ffvpx/libavcodec/av1_parse.h
new file mode 100644
index 0000000000..f4a5d2830e
--- /dev/null
+++ b/media/ffvpx/libavcodec/av1_parse.h
@@ -0,0 +1,184 @@
+/*
+ * AV1 common parsing code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AV1_PARSE_H
+#define AVCODEC_AV1_PARSE_H
+
+#include <limits.h>
+#include <stdint.h>
+
+#include "libavutil/error.h"
+#include "libavutil/intmath.h"
+#include "libavutil/macros.h"
+
+#include "av1.h"
+#include "get_bits.h"
+
+// OBU header fields + max leb128 length
+#define MAX_OBU_HEADER_SIZE (2 + 8)
+
+typedef struct AV1OBU {
+    /** Size of payload */
+    int size;
+    const uint8_t *data;
+
+    /**
+     * Size, in bits, of just the data, excluding the trailing_one_bit and
+     * any trailing padding.
+     */
+    int size_bits;
+
+    /** Size of entire OBU, including header */
+    int raw_size;
+    const uint8_t *raw_data;
+
+    /** GetBitContext initialized to the start of the payload */
+    GetBitContext gb;
+
+    int type;
+
+    int temporal_id;
+    int spatial_id;
+} AV1OBU;
+
+/** An input packet split into OBUs */
+typedef struct AV1Packet {
+    AV1OBU *obus;
+    int nb_obus;
+    int obus_allocated;
+    unsigned obus_allocated_size;
+} AV1Packet;
+
+/**
+ * Extract an OBU from a raw bitstream.
+ *
+ * @note This function does not copy or store any bitstream data. All
+ *       the pointers in the AV1OBU structure will be valid as long
+ *       as the input buffer also is.
+ */
+int ff_av1_extract_obu(AV1OBU *obu, const uint8_t *buf, int length,
+                       void *logctx);
+
+/**
+ * Split an input packet into OBUs.
+ *
+ * @note This function does not copy or store any bitstream data. All
+ *       the pointers in the AV1Packet structure will be valid as
+ *       long as the input buffer also is.
+ */
+int ff_av1_packet_split(AV1Packet *pkt, const uint8_t *buf, int length,
+                        void *logctx);
+
+/**
+ * Free all the allocated memory in the packet.
+ */
+void ff_av1_packet_uninit(AV1Packet *pkt);
+
+static inline int64_t leb128(GetBitContext *gb) {
+    int64_t ret = 0;
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        int byte = get_bits(gb, 8);
+        ret |= (int64_t)(byte & 0x7f) << (i * 7);
+        if (!(byte & 0x80))
+            break;
+    }
+    return ret;
+}
+
+static inline int parse_obu_header(const uint8_t *buf, int buf_size,
+                                   int64_t *obu_size, int *start_pos, int *type,
+                                   int *temporal_id, int *spatial_id)
+{
+    GetBitContext gb;
+    int ret, extension_flag, has_size_flag;
+    int64_t size;
+
+    ret = init_get_bits8(&gb, buf, FFMIN(buf_size, MAX_OBU_HEADER_SIZE));
+    if (ret < 0)
+        return ret;
+
+    if (get_bits1(&gb) != 0) // obu_forbidden_bit
+        return AVERROR_INVALIDDATA;
+
+    *type      = get_bits(&gb, 4);
+    extension_flag = get_bits1(&gb);
+    has_size_flag  = get_bits1(&gb);
+    skip_bits1(&gb); // obu_reserved_1bit
+
+    if (extension_flag) {
+        *temporal_id = get_bits(&gb, 3);
+        *spatial_id  = get_bits(&gb, 2);
+        skip_bits(&gb, 3); // extension_header_reserved_3bits
+    } else {
+        *temporal_id = *spatial_id = 0;
+    }
+
+    *obu_size  = has_size_flag ? leb128(&gb)
+                               : buf_size - 1 - extension_flag;
+
+    if (get_bits_left(&gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    *start_pos = get_bits_count(&gb) / 8;
+
+    size = *obu_size + *start_pos;
+
+    if (size > buf_size)
+        return AVERROR_INVALIDDATA;
+
+    return size;
+}
+
+static inline int get_obu_bit_length(const uint8_t *buf, int size, int type)
+{
+    int v;
+
+    /* There are no trailing bits on these */
+    if (type == AV1_OBU_TILE_GROUP ||
+        type == AV1_OBU_TILE_LIST ||
+        type == AV1_OBU_FRAME) {
+        if (size > INT_MAX / 8)
+            return AVERROR(ERANGE);
+        else
+            return size * 8;
+    }
+
+    while (size > 0 && buf[size - 1] == 0)
+        size--;
+
+    if (!size)
+        return 0;
+
+    v = buf[size - 1];
+
+    if (size > INT_MAX / 8)
+        return AVERROR(ERANGE);
+    size *= 8;
+
+    /* Remove the trailing_one_bit and following trailing zeros */
+    if (v)
+        size -= ff_ctz(v) + 1;
+
+    return size;
+}
+
+#endif /* AVCODEC_AV1_PARSE_H */
diff --git a/media/ffvpx/libavcodec/av1_parser.c b/media/ffvpx/libavcodec/av1_parser.c
new file mode 100644
index 0000000000..14dae92fe9
--- /dev/null
+++ b/media/ffvpx/libavcodec/av1_parser.c
@@ -0,0 +1,216 @@
+/*
+ * AV1 parser
+ *
+ * Copyright (C) 2018 James Almer <jamrial@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "cbs.h"
+#include "cbs_av1.h"
+#include "parser.h"
+
+typedef struct AV1ParseContext {
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment temporal_unit;
+    int parsed_extradata;
+} AV1ParseContext;
+
+static const enum AVPixelFormat pix_fmts_8bit[2][2] = {
+    { AV_PIX_FMT_YUV444P, AV_PIX_FMT_NONE },
+    { AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P },
+};
+static const enum AVPixelFormat pix_fmts_10bit[2][2] = {
+    { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_NONE },
+    { AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV420P10 },
+};
+static const enum AVPixelFormat pix_fmts_12bit[2][2] = {
+    { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_NONE },
+    { AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV420P12 },
+};
+
+static const enum AVPixelFormat pix_fmts_rgb[3] = {
+    AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12,
+};
+
+static int av1_parser_parse(AVCodecParserContext *ctx,
+                            AVCodecContext *avctx,
+                            const uint8_t **out_data, int *out_size,
+                            const uint8_t *data, int size)
+{
+    AV1ParseContext *s = ctx->priv_data;
+    CodedBitstreamFragment *td = &s->temporal_unit;
+    const CodedBitstreamAV1Context *av1 = s->cbc->priv_data;
+    const AV1RawSequenceHeader *seq;
+    const AV1RawColorConfig *color;
+    int ret;
+
+    *out_data = data;
+    *out_size = size;
+
+    ctx->key_frame         = -1;
+    ctx->pict_type         = AV_PICTURE_TYPE_NONE;
+    ctx->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
+
+    s->cbc->log_ctx = avctx;
+
+    if (avctx->extradata_size && !s->parsed_extradata) {
+        s->parsed_extradata = 1;
+
+        ret = ff_cbs_read_extradata_from_codec(s->cbc, td, avctx);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_WARNING, "Failed to parse extradata.\n");
+        }
+
+        ff_cbs_fragment_reset(td);
+    }
+
+    ret = ff_cbs_read(s->cbc, td, data, size);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to parse temporal unit.\n");
+        goto end;
+    }
+
+    if (!av1->sequence_header) {
+        av_log(avctx, AV_LOG_ERROR, "No sequence header available\n");
+        goto end;
+    }
+
+    seq = av1->sequence_header;
+    color = &seq->color_config;
+
+    for (int i = 0; i < td->nb_units; i++) {
+        const CodedBitstreamUnit *unit = &td->units[i];
+        const AV1RawOBU *obu = unit->content;
+        const AV1RawFrameHeader *frame;
+
+        if (unit->type == AV1_OBU_FRAME)
+            frame = &obu->obu.frame.header;
+        else if (unit->type == AV1_OBU_FRAME_HEADER)
+            frame = &obu->obu.frame_header;
+        else
+            continue;
+
+        if (obu->header.spatial_id > 0)
+            continue;
+
+        if (!frame->show_frame && !frame->show_existing_frame)
+            continue;
+
+        ctx->width  = frame->frame_width_minus_1 + 1;
+        ctx->height = frame->frame_height_minus_1 + 1;
+
+        ctx->key_frame = frame->frame_type == AV1_FRAME_KEY && !frame->show_existing_frame;
+
+        switch (frame->frame_type) {
+        case AV1_FRAME_KEY:
+        case AV1_FRAME_INTRA_ONLY:
+            ctx->pict_type = AV_PICTURE_TYPE_I;
+            break;
+        case AV1_FRAME_INTER:
+            ctx->pict_type = AV_PICTURE_TYPE_P;
+            break;
+        case AV1_FRAME_SWITCH:
+            ctx->pict_type = AV_PICTURE_TYPE_SP;
+            break;
+        }
+        ctx->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
+    }
+
+    switch (av1->bit_depth) {
+    case 8:
+        ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY8
+                                         : pix_fmts_8bit [color->subsampling_x][color->subsampling_y];
+        break;
+    case 10:
+        ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY10
+                                         : pix_fmts_10bit[color->subsampling_x][color->subsampling_y];
+        break;
+    case 12:
+        ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY12
+                                         : pix_fmts_12bit[color->subsampling_x][color->subsampling_y];
+        break;
+    }
+    av_assert2(ctx->format != AV_PIX_FMT_NONE);
+
+    if (!color->subsampling_x && !color->subsampling_y &&
+        color->matrix_coefficients       == AVCOL_SPC_RGB &&
+        color->color_primaries           == AVCOL_PRI_BT709 &&
+        color->transfer_characteristics  == AVCOL_TRC_IEC61966_2_1)
+        ctx->format = pix_fmts_rgb[color->high_bitdepth + color->twelve_bit];
+
+    avctx->profile = seq->seq_profile;
+    avctx->level   = seq->seq_level_idx[0];
+
+    avctx->colorspace = (enum AVColorSpace) color->matrix_coefficients;
+    avctx->color_primaries = (enum AVColorPrimaries) color->color_primaries;
+    avctx->color_trc = (enum AVColorTransferCharacteristic) color->transfer_characteristics;
+    avctx->color_range = color->color_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
+
+    if (seq->timing_info_present_flag) {
+        const AV1RawTimingInfo *timing = &seq->timing_info;
+        av_reduce(&avctx->framerate.den, &avctx->framerate.num,
+                  timing->num_units_in_display_tick, timing->time_scale, INT_MAX);
+    }
+
+end:
+    ff_cbs_fragment_reset(td);
+
+    s->cbc->log_ctx = NULL;
+
+    return size;
+}
+
+static const CodedBitstreamUnitType decompose_unit_types[] = {
+    AV1_OBU_TEMPORAL_DELIMITER,
+    AV1_OBU_SEQUENCE_HEADER,
+    AV1_OBU_FRAME_HEADER,
+    AV1_OBU_TILE_GROUP,
+    AV1_OBU_FRAME,
+};
+
+static av_cold int av1_parser_init(AVCodecParserContext *ctx)
+{
+    AV1ParseContext *s = ctx->priv_data;
+    int ret;
+
+    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, NULL);
+    if (ret < 0)
+        return ret;
+
+    s->cbc->decompose_unit_types    = decompose_unit_types;
+    s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(decompose_unit_types);
+
+    return 0;
+}
+
+static void av1_parser_close(AVCodecParserContext *ctx)
+{
+    AV1ParseContext *s = ctx->priv_data;
+
+    ff_cbs_fragment_free(&s->temporal_unit);
+    ff_cbs_close(&s->cbc);
+}
+
+const AVCodecParser ff_av1_parser = {
+    .codec_ids      = { AV_CODEC_ID_AV1 },
+    .priv_data_size = sizeof(AV1ParseContext),
+    .parser_init    = av1_parser_init,
+    .parser_close   = av1_parser_close,
+    .parser_parse   = av1_parser_parse,
+};
diff --git a/media/ffvpx/libavcodec/av1dec.c b/media/ffvpx/libavcodec/av1dec.c
new file mode 100644
index 0000000000..d83c902f1f
--- /dev/null
+++ b/media/ffvpx/libavcodec/av1dec.c
@@ -0,0 +1,1286 @@
+/*
+ * AV1 video decoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config_components.h"
+
+#include "libavutil/film_grain_params.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "av1dec.h"
+#include "bytestream.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "hwconfig.h"
+#include "profiles.h"
+#include "thread.h"
+
+/**< same with Div_Lut defined in spec 7.11.3.7 */
+static const uint16_t div_lut[AV1_DIV_LUT_NUM] = {
+  16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
+  15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
+  15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
+  14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
+  13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
+  13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
+  13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
+  12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
+  12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
+  11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
+  11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
+  11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
+  10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
+  10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
+  10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
+  9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
+  9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
+  9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
+  9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
+  9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
+  8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
+  8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
+  8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
+  8240,  8224,  8208,  8192
+};
+
+static uint32_t inverse_recenter(int r, uint32_t v)
+{
+    if (v > 2 * r)
+        return v;
+    else if (v & 1)
+        return r - ((v + 1) >> 1);
+    else
+        return r + (v >> 1);
+}
+
+static uint32_t decode_unsigned_subexp_with_ref(uint32_t sub_exp,
+                                                int mx, int r)
+{
+    if ((r << 1) <= mx) {
+        return inverse_recenter(r, sub_exp);
+    } else {
+        return mx - 1 - inverse_recenter(mx - 1 - r, sub_exp);
+    }
+}
+
+static int32_t decode_signed_subexp_with_ref(uint32_t sub_exp, int low,
+                                             int high, int r)
+{
+    int32_t x = decode_unsigned_subexp_with_ref(sub_exp, high - low, r - low);
+    return x + low;
+}
+
+static void read_global_param(AV1DecContext *s, int type, int ref, int idx)
+{
+    uint8_t primary_frame, prev_frame;
+    uint32_t abs_bits, prec_bits, round, prec_diff, sub, mx;
+    int32_t r, prev_gm_param;
+
+    primary_frame = s->raw_frame_header->primary_ref_frame;
+    prev_frame = s->raw_frame_header->ref_frame_idx[primary_frame];
+    abs_bits = AV1_GM_ABS_ALPHA_BITS;
+    prec_bits = AV1_GM_ALPHA_PREC_BITS;
+
+    /* setup_past_independence() sets PrevGmParams to default values. We can
+     * simply point to the current's frame gm_params as they will be initialized
+     * with defaults at this point.
+     */
+    if (s->raw_frame_header->primary_ref_frame == AV1_PRIMARY_REF_NONE)
+        prev_gm_param = s->cur_frame.gm_params[ref][idx];
+    else
+        prev_gm_param = s->ref[prev_frame].gm_params[ref][idx];
+
+    if (idx < 2) {
+        if (type == AV1_WARP_MODEL_TRANSLATION) {
+            abs_bits = AV1_GM_ABS_TRANS_ONLY_BITS -
+                !s->raw_frame_header->allow_high_precision_mv;
+            prec_bits = AV1_GM_TRANS_ONLY_PREC_BITS -
+                !s->raw_frame_header->allow_high_precision_mv;
+        } else {
+            abs_bits = AV1_GM_ABS_TRANS_BITS;
+            prec_bits = AV1_GM_TRANS_PREC_BITS;
+        }
+    }
+    round = (idx % 3) == 2 ? (1 << AV1_WARPEDMODEL_PREC_BITS) : 0;
+    prec_diff = AV1_WARPEDMODEL_PREC_BITS - prec_bits;
+    sub = (idx % 3) == 2 ? (1 << prec_bits) : 0;
+    mx = 1 << abs_bits;
+    r = (prev_gm_param >> prec_diff) - sub;
+
+    s->cur_frame.gm_params[ref][idx] =
+        (decode_signed_subexp_with_ref(s->raw_frame_header->gm_params[ref][idx],
+                                       -mx, mx + 1, r) << prec_diff) + round;
+}
+
+static uint64_t round_two(uint64_t x, uint16_t n)
+{
+    if (n == 0)
+        return x;
+    return ((x + ((uint64_t)1 << (n - 1))) >> n);
+}
+
+static int64_t round_two_signed(int64_t x, uint16_t n)
+{
+    return ((x<0) ? -((int64_t)round_two(-x, n)) : (int64_t)round_two(x, n));
+}
+
+/**
+ * Resolve divisor process.
+ * see spec 7.11.3.7
+ */
+static int16_t resolve_divisor(uint32_t d, uint16_t *shift)
+{
+    int32_t e, f;
+
+    *shift = av_log2(d);
+    e = d - (1 << (*shift));
+    if (*shift > AV1_DIV_LUT_BITS)
+        f = round_two(e, *shift - AV1_DIV_LUT_BITS);
+    else
+        f = e << (AV1_DIV_LUT_BITS - (*shift));
+
+    *shift += AV1_DIV_LUT_PREC_BITS;
+
+    return div_lut[f];
+}
+
+/**
+ * check if global motion params is valid.
+ * see spec 7.11.3.6
+ */
+static uint8_t get_shear_params_valid(AV1DecContext *s, int idx)
+{
+    int16_t alpha, beta, gamma, delta, divf, divs;
+    int64_t v, w;
+    int32_t *param = &s->cur_frame.gm_params[idx][0];
+    if (param[2] < 0)
+        return 0;
+
+    alpha = av_clip_int16(param[2] - (1 << AV1_WARPEDMODEL_PREC_BITS));
+    beta  = av_clip_int16(param[3]);
+    divf  = resolve_divisor(abs(param[2]), &divs);
+    v     = (int64_t)param[4] * (1 << AV1_WARPEDMODEL_PREC_BITS);
+    w     = (int64_t)param[3] * param[4];
+    gamma = av_clip_int16((int)round_two_signed((v * divf), divs));
+    delta = av_clip_int16(param[5] - (int)round_two_signed((w * divf), divs) - (1 << AV1_WARPEDMODEL_PREC_BITS));
+
+    alpha = round_two_signed(alpha, AV1_WARP_PARAM_REDUCE_BITS) << AV1_WARP_PARAM_REDUCE_BITS;
+    beta  = round_two_signed(beta,  AV1_WARP_PARAM_REDUCE_BITS) << AV1_WARP_PARAM_REDUCE_BITS;
+    gamma = round_two_signed(gamma, AV1_WARP_PARAM_REDUCE_BITS) << AV1_WARP_PARAM_REDUCE_BITS;
+    delta = round_two_signed(delta, AV1_WARP_PARAM_REDUCE_BITS) << AV1_WARP_PARAM_REDUCE_BITS;
+
+    if ((4 * abs(alpha) + 7 * abs(beta)) >= (1 << AV1_WARPEDMODEL_PREC_BITS) ||
+        (4 * abs(gamma) + 4 * abs(delta)) >= (1 << AV1_WARPEDMODEL_PREC_BITS))
+        return 0;
+
+    return 1;
+}
+
+/**
+* update gm type/params, since cbs already implemented part of this function,
+* so we don't need to full implement spec.
+*/
+static void global_motion_params(AV1DecContext *s)
+{
+    const AV1RawFrameHeader *header = s->raw_frame_header;
+    int type, ref;
+
+    for (ref = AV1_REF_FRAME_LAST; ref <= AV1_REF_FRAME_ALTREF; ref++) {
+        s->cur_frame.gm_type[ref] = AV1_WARP_MODEL_IDENTITY;
+        for (int i = 0; i < 6; i++)
+            s->cur_frame.gm_params[ref][i] = (i % 3 == 2) ?
+                                             1 << AV1_WARPEDMODEL_PREC_BITS : 0;
+    }
+    if (header->frame_type == AV1_FRAME_KEY ||
+        header->frame_type == AV1_FRAME_INTRA_ONLY)
+        return;
+
+    for (ref = AV1_REF_FRAME_LAST; ref <= AV1_REF_FRAME_ALTREF; ref++) {
+        if (header->is_global[ref]) {
+            if (header->is_rot_zoom[ref]) {
+                type = AV1_WARP_MODEL_ROTZOOM;
+            } else {
+                type = header->is_translation[ref] ? AV1_WARP_MODEL_TRANSLATION
+                                                   : AV1_WARP_MODEL_AFFINE;
+            }
+        } else {
+            type = AV1_WARP_MODEL_IDENTITY;
+        }
+        s->cur_frame.gm_type[ref] = type;
+
+        if (type >= AV1_WARP_MODEL_ROTZOOM) {
+            read_global_param(s, type, ref, 2);
+            read_global_param(s, type, ref, 3);
+            if (type == AV1_WARP_MODEL_AFFINE) {
+                read_global_param(s, type, ref, 4);
+                read_global_param(s, type, ref, 5);
+            } else {
+                s->cur_frame.gm_params[ref][4] = -s->cur_frame.gm_params[ref][3];
+                s->cur_frame.gm_params[ref][5] = s->cur_frame.gm_params[ref][2];
+            }
+        }
+        if (type >= AV1_WARP_MODEL_TRANSLATION) {
+            read_global_param(s, type, ref, 0);
+            read_global_param(s, type, ref, 1);
+        }
+        if (type <= AV1_WARP_MODEL_AFFINE) {
+            s->cur_frame.gm_invalid[ref] = !get_shear_params_valid(s, ref);
+        }
+    }
+}
+
+static int get_relative_dist(const AV1RawSequenceHeader *seq,
+                             unsigned int a, unsigned int b)
+{
+    unsigned int diff = a - b;
+    unsigned int m = 1 << seq->order_hint_bits_minus_1;
+    return (diff & (m - 1)) - (diff & m);
+}
+
+static void skip_mode_params(AV1DecContext *s)
+{
+    const AV1RawFrameHeader *header = s->raw_frame_header;
+    const AV1RawSequenceHeader *seq = s->raw_seq;
+
+    int forward_idx,  backward_idx;
+    int forward_hint, backward_hint;
+    int second_forward_idx, second_forward_hint;
+    int ref_hint, dist, i;
+
+    if (!header->skip_mode_present)
+        return;
+
+    forward_idx  = -1;
+    backward_idx = -1;
+    for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+        ref_hint = s->ref[header->ref_frame_idx[i]].raw_frame_header->order_hint;
+        dist = get_relative_dist(seq, ref_hint, header->order_hint);
+        if (dist < 0) {
+            if (forward_idx < 0 ||
+                get_relative_dist(seq, ref_hint, forward_hint) > 0) {
+                forward_idx  = i;
+                forward_hint = ref_hint;
+            }
+        } else if (dist > 0) {
+            if (backward_idx < 0 ||
+                get_relative_dist(seq, ref_hint, backward_hint) < 0) {
+                backward_idx  = i;
+                backward_hint = ref_hint;
+            }
+        }
+    }
+
+    if (forward_idx < 0) {
+        return;
+    } else if (backward_idx >= 0) {
+        s->cur_frame.skip_mode_frame_idx[0] =
+            AV1_REF_FRAME_LAST + FFMIN(forward_idx, backward_idx);
+        s->cur_frame.skip_mode_frame_idx[1] =
+            AV1_REF_FRAME_LAST + FFMAX(forward_idx, backward_idx);
+        return;
+    }
+
+    second_forward_idx = -1;
+    for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+        ref_hint = s->ref[header->ref_frame_idx[i]].raw_frame_header->order_hint;
+        if (get_relative_dist(seq, ref_hint, forward_hint) < 0) {
+            if (second_forward_idx < 0 ||
+                get_relative_dist(seq, ref_hint, second_forward_hint) > 0) {
+                second_forward_idx  = i;
+                second_forward_hint = ref_hint;
+            }
+        }
+    }
+
+    if (second_forward_idx < 0)
+        return;
+
+    s->cur_frame.skip_mode_frame_idx[0] =
+        AV1_REF_FRAME_LAST + FFMIN(forward_idx, second_forward_idx);
+    s->cur_frame.skip_mode_frame_idx[1] =
+        AV1_REF_FRAME_LAST + FFMAX(forward_idx, second_forward_idx);
+}
+
+static void coded_lossless_param(AV1DecContext *s)
+{
+    const AV1RawFrameHeader *header = s->raw_frame_header;
+    int i;
+
+    if (header->delta_q_y_dc || header->delta_q_u_ac ||
+        header->delta_q_u_dc || header->delta_q_v_ac ||
+        header->delta_q_v_dc) {
+        s->cur_frame.coded_lossless = 0;
+        return;
+    }
+
+    s->cur_frame.coded_lossless = 1;
+    for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
+        int qindex;
+        if (header->feature_enabled[i][AV1_SEG_LVL_ALT_Q]) {
+            qindex = (header->base_q_idx +
+                      header->feature_value[i][AV1_SEG_LVL_ALT_Q]);
+        } else {
+            qindex = header->base_q_idx;
+        }
+        qindex = av_clip_uintp2(qindex, 8);
+
+        if (qindex) {
+            s->cur_frame.coded_lossless = 0;
+            return;
+        }
+    }
+}
+
+static void load_grain_params(AV1DecContext *s)
+{
+    const AV1RawFrameHeader *header = s->raw_frame_header;
+    const AV1RawFilmGrainParams *film_grain = &header->film_grain, *src;
+    AV1RawFilmGrainParams *dst = &s->cur_frame.film_grain;
+
+    if (!film_grain->apply_grain)
+        return;
+
+    if (film_grain->update_grain) {
+        memcpy(dst, film_grain, sizeof(*dst));
+        return;
+    }
+
+    src = &s->ref[film_grain->film_grain_params_ref_idx].film_grain;
+
+    memcpy(dst, src, sizeof(*dst));
+    dst->grain_seed = film_grain->grain_seed;
+}
+
+static int init_tile_data(AV1DecContext *s)
+
+{
+    int cur_tile_num =
+        s->raw_frame_header->tile_cols * s->raw_frame_header->tile_rows;
+    if (s->tile_num < cur_tile_num) {
+        int ret = av_reallocp_array(&s->tile_group_info, cur_tile_num,
+                                    sizeof(TileGroupInfo));
+        if (ret < 0) {
+            s->tile_num = 0;
+            return ret;
+        }
+    }
+    s->tile_num = cur_tile_num;
+
+    return 0;
+}
+
+static int get_tiles_info(AVCodecContext *avctx, const AV1RawTileGroup *tile_group)
+{
+    AV1DecContext *s = avctx->priv_data;
+    GetByteContext gb;
+    uint16_t tile_num, tile_row, tile_col;
+    uint32_t size = 0, size_bytes = 0;
+
+    bytestream2_init(&gb, tile_group->tile_data.data,
+                     tile_group->tile_data.data_size);
+    s->tg_start = tile_group->tg_start;
+    s->tg_end = tile_group->tg_end;
+
+    for (tile_num = tile_group->tg_start; tile_num <= tile_group->tg_end; tile_num++) {
+        tile_row = tile_num / s->raw_frame_header->tile_cols;
+        tile_col = tile_num % s->raw_frame_header->tile_cols;
+
+        if (tile_num == tile_group->tg_end) {
+            s->tile_group_info[tile_num].tile_size = bytestream2_get_bytes_left(&gb);
+            s->tile_group_info[tile_num].tile_offset = bytestream2_tell(&gb);
+            s->tile_group_info[tile_num].tile_row = tile_row;
+            s->tile_group_info[tile_num].tile_column = tile_col;
+            return 0;
+        }
+        size_bytes = s->raw_frame_header->tile_size_bytes_minus1 + 1;
+        if (bytestream2_get_bytes_left(&gb) < size_bytes)
+            return AVERROR_INVALIDDATA;
+        size = 0;
+        for (int i = 0; i < size_bytes; i++)
+            size |= bytestream2_get_byteu(&gb) << 8 * i;
+        if (bytestream2_get_bytes_left(&gb) <= size)
+            return AVERROR_INVALIDDATA;
+        size++;
+
+        s->tile_group_info[tile_num].tile_size = size;
+        s->tile_group_info[tile_num].tile_offset = bytestream2_tell(&gb);
+        s->tile_group_info[tile_num].tile_row = tile_row;
+        s->tile_group_info[tile_num].tile_column = tile_col;
+
+        bytestream2_skipu(&gb, size);
+    }
+
+    return 0;
+
+}
+
+static int get_pixel_format(AVCodecContext *avctx)
+{
+    AV1DecContext *s = avctx->priv_data;
+    const AV1RawSequenceHeader *seq = s->raw_seq;
+    uint8_t bit_depth;
+    int ret;
+    enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE;
+#define HWACCEL_MAX (CONFIG_AV1_DXVA2_HWACCEL + \
+                     CONFIG_AV1_D3D11VA_HWACCEL * 2 + \
+                     CONFIG_AV1_NVDEC_HWACCEL + \
+                     CONFIG_AV1_VAAPI_HWACCEL + \
+                     CONFIG_AV1_VDPAU_HWACCEL)
+    enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
+
+    if (seq->seq_profile == 2 && seq->color_config.high_bitdepth)
+        bit_depth = seq->color_config.twelve_bit ? 12 : 10;
+    else if (seq->seq_profile <= 2)
+        bit_depth = seq->color_config.high_bitdepth ? 10 : 8;
+    else {
+        av_log(avctx, AV_LOG_ERROR,
+               "Unknown AV1 profile %d.\n", seq->seq_profile);
+        return -1;
+    }
+
+    if (!seq->color_config.mono_chrome) {
+        // 4:4:4 x:0 y:0, 4:2:2 x:1 y:0, 4:2:0 x:1 y:1
+        if (seq->color_config.subsampling_x == 0 &&
+            seq->color_config.subsampling_y == 0) {
+            if (bit_depth == 8)
+                pix_fmt = AV_PIX_FMT_YUV444P;
+            else if (bit_depth == 10)
+                pix_fmt = AV_PIX_FMT_YUV444P10;
+            else if (bit_depth == 12)
+                pix_fmt = AV_PIX_FMT_YUV444P12;
+            else
+                av_log(avctx, AV_LOG_WARNING, "Unknown AV1 pixel format.\n");
+        } else if (seq->color_config.subsampling_x == 1 &&
+                   seq->color_config.subsampling_y == 0) {
+            if (bit_depth == 8)
+                pix_fmt = AV_PIX_FMT_YUV422P;
+            else if (bit_depth == 10)
+                pix_fmt = AV_PIX_FMT_YUV422P10;
+            else if (bit_depth == 12)
+                pix_fmt = AV_PIX_FMT_YUV422P12;
+            else
+                av_log(avctx, AV_LOG_WARNING, "Unknown AV1 pixel format.\n");
+        } else if (seq->color_config.subsampling_x == 1 &&
+                   seq->color_config.subsampling_y == 1) {
+            if (bit_depth == 8)
+                pix_fmt = AV_PIX_FMT_YUV420P;
+            else if (bit_depth == 10)
+                pix_fmt = AV_PIX_FMT_YUV420P10;
+            else if (bit_depth == 12)
+                pix_fmt = AV_PIX_FMT_YUV420P12;
+            else
+                av_log(avctx, AV_LOG_WARNING, "Unknown AV1 pixel format.\n");
+        }
+    } else {
+        if (bit_depth == 8)
+            pix_fmt = AV_PIX_FMT_GRAY8;
+        else if (bit_depth == 10)
+            pix_fmt = AV_PIX_FMT_GRAY10;
+        else if (bit_depth == 12)
+            pix_fmt = AV_PIX_FMT_GRAY12;
+        else
+            av_log(avctx, AV_LOG_WARNING, "Unknown AV1 pixel format.\n");
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "AV1 decode get format: %s.\n",
+           av_get_pix_fmt_name(pix_fmt));
+
+    if (pix_fmt == AV_PIX_FMT_NONE)
+        return -1;
+
+    switch (pix_fmt) {
+    case AV_PIX_FMT_YUV420P:
+#if CONFIG_AV1_DXVA2_HWACCEL
+        *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
+#endif
+#if CONFIG_AV1_D3D11VA_HWACCEL
+        *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
+        *fmtp++ = AV_PIX_FMT_D3D11;
+#endif
+#if CONFIG_AV1_NVDEC_HWACCEL
+        *fmtp++ = AV_PIX_FMT_CUDA;
+#endif
+#if CONFIG_AV1_VAAPI_HWACCEL
+        *fmtp++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_AV1_VDPAU_HWACCEL
+        *fmtp++ = AV_PIX_FMT_VDPAU;
+#endif
+        break;
+    case AV_PIX_FMT_YUV420P10:
+#if CONFIG_AV1_DXVA2_HWACCEL
+        *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
+#endif
+#if CONFIG_AV1_D3D11VA_HWACCEL
+        *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
+        *fmtp++ = AV_PIX_FMT_D3D11;
+#endif
+#if CONFIG_AV1_NVDEC_HWACCEL
+        *fmtp++ = AV_PIX_FMT_CUDA;
+#endif
+#if CONFIG_AV1_VAAPI_HWACCEL
+        *fmtp++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_AV1_VDPAU_HWACCEL
+        *fmtp++ = AV_PIX_FMT_VDPAU;
+#endif
+        break;
+    case AV_PIX_FMT_GRAY8:
+#if CONFIG_AV1_NVDEC_HWACCEL
+        *fmtp++ = AV_PIX_FMT_CUDA;
+#endif
+        break;
+    case AV_PIX_FMT_GRAY10:
+#if CONFIG_AV1_NVDEC_HWACCEL
+        *fmtp++ = AV_PIX_FMT_CUDA;
+#endif
+        break;
+    }
+
+    *fmtp++ = pix_fmt;
+    *fmtp = AV_PIX_FMT_NONE;
+
+    ret = ff_thread_get_format(avctx, pix_fmts);
+    if (ret < 0)
+        return ret;
+
+    /**
+     * check if the HW accel is inited correctly. If not, return un-implemented.
+     * Since now the av1 decoder doesn't support native decode, if it will be
+     * implemented in the future, need remove this check.
+     */
+    if (!avctx->hwaccel) {
+        av_log(avctx, AV_LOG_ERROR, "Your platform doesn't support"
+               " hardware accelerated AV1 decoding.\n");
+        return AVERROR(ENOSYS);
+    }
+
+    s->pix_fmt = pix_fmt;
+    avctx->pix_fmt = ret;
+
+    return 0;
+}
+
+static void av1_frame_unref(AVCodecContext *avctx, AV1Frame *f)
+{
+    ff_thread_release_buffer(avctx, f->f);
+    av_buffer_unref(&f->hwaccel_priv_buf);
+    f->hwaccel_picture_private = NULL;
+    av_buffer_unref(&f->header_ref);
+    f->raw_frame_header = NULL;
+    f->spatial_id = f->temporal_id = 0;
+    memset(f->skip_mode_frame_idx, 0,
+           2 * sizeof(uint8_t));
+    memset(&f->film_grain, 0, sizeof(f->film_grain));
+    f->coded_lossless = 0;
+}
+
+static int av1_frame_ref(AVCodecContext *avctx, AV1Frame *dst, const AV1Frame *src)
+{
+    int ret;
+
+    ret = av_buffer_replace(&dst->header_ref, src->header_ref);
+    if (ret < 0)
+        return ret;
+
+    dst->raw_frame_header = src->raw_frame_header;
+
+    if (!src->f->buf[0])
+        return 0;
+
+    ret = av_frame_ref(dst->f, src->f);
+    if (ret < 0)
+        goto fail;
+
+    if (src->hwaccel_picture_private) {
+        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
+        if (!dst->hwaccel_priv_buf)
+            goto fail;
+        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
+    }
+
+    dst->spatial_id = src->spatial_id;
+    dst->temporal_id = src->temporal_id;
+    memcpy(dst->gm_invalid,
+           src->gm_invalid,
+           AV1_NUM_REF_FRAMES * sizeof(uint8_t));
+    memcpy(dst->gm_type,
+           src->gm_type,
+           AV1_NUM_REF_FRAMES * sizeof(uint8_t));
+    memcpy(dst->gm_params,
+           src->gm_params,
+           AV1_NUM_REF_FRAMES * 6 * sizeof(int32_t));
+    memcpy(dst->skip_mode_frame_idx,
+           src->skip_mode_frame_idx,
+           2 * sizeof(uint8_t));
+    memcpy(&dst->film_grain,
+           &src->film_grain,
+           sizeof(dst->film_grain));
+    dst->coded_lossless = src->coded_lossless;
+
+    return 0;
+
+fail:
+    av1_frame_unref(avctx, dst);
+    return AVERROR(ENOMEM);
+}
+
+static av_cold int av1_decode_free(AVCodecContext *avctx)
+{
+    AV1DecContext *s = avctx->priv_data;
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
+        av1_frame_unref(avctx, &s->ref[i]);
+        av_frame_free(&s->ref[i].f);
+    }
+    av1_frame_unref(avctx, &s->cur_frame);
+    av_frame_free(&s->cur_frame.f);
+
+    av_buffer_unref(&s->seq_ref);
+    av_buffer_unref(&s->header_ref);
+    av_freep(&s->tile_group_info);
+
+    ff_cbs_fragment_free(&s->current_obu);
+    ff_cbs_close(&s->cbc);
+
+    return 0;
+}
+
+static int set_context_with_sequence(AVCodecContext *avctx,
+                                     const AV1RawSequenceHeader *seq)
+{
+    int width = seq->max_frame_width_minus_1 + 1;
+    int height = seq->max_frame_height_minus_1 + 1;
+
+    avctx->profile = seq->seq_profile;
+    avctx->level = seq->seq_level_idx[0];
+
+    avctx->color_range =
+        seq->color_config.color_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
+    avctx->color_primaries = seq->color_config.color_primaries;
+    avctx->colorspace = seq->color_config.color_primaries;
+    avctx->color_trc = seq->color_config.transfer_characteristics;
+
+    switch (seq->color_config.chroma_sample_position) {
+    case AV1_CSP_VERTICAL:
+        avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
+        break;
+    case AV1_CSP_COLOCATED:
+        avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;
+        break;
+    }
+
+    if (seq->film_grain_params_present)
+        avctx->properties |= FF_CODEC_PROPERTY_FILM_GRAIN;
+    else
+        avctx->properties &= ~FF_CODEC_PROPERTY_FILM_GRAIN;
+
+    if (avctx->width != width || avctx->height != height) {
+        int ret = ff_set_dimensions(avctx, width, height);
+        if (ret < 0)
+            return ret;
+    }
+    avctx->sample_aspect_ratio = (AVRational) { 1, 1 };
+
+    if (seq->timing_info.num_units_in_display_tick &&
+        seq->timing_info.time_scale) {
+        av_reduce(&avctx->framerate.den, &avctx->framerate.num,
+                  seq->timing_info.num_units_in_display_tick,
+                  seq->timing_info.time_scale,
+                  INT_MAX);
+        if (seq->timing_info.equal_picture_interval)
+            avctx->ticks_per_frame = seq->timing_info.num_ticks_per_picture_minus_1 + 1;
+    }
+
+    return 0;
+}
+
+static int update_context_with_frame_header(AVCodecContext *avctx,
+                                            const AV1RawFrameHeader *header)
+{
+    AVRational aspect_ratio;
+    int width = header->frame_width_minus_1 + 1;
+    int height = header->frame_height_minus_1 + 1;
+    int r_width = header->render_width_minus_1 + 1;
+    int r_height = header->render_height_minus_1 + 1;
+    int ret;
+
+    if (avctx->width != width || avctx->height != height) {
+        ret = ff_set_dimensions(avctx, width, height);
+        if (ret < 0)
+            return ret;
+    }
+
+    av_reduce(&aspect_ratio.num, &aspect_ratio.den,
+              (int64_t)height * r_width,
+              (int64_t)width * r_height,
+              INT_MAX);
+
+    if (av_cmp_q(avctx->sample_aspect_ratio, aspect_ratio)) {
+        ret = ff_set_sar(avctx, aspect_ratio);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static av_cold int av1_decode_init(AVCodecContext *avctx)
+{
+    AV1DecContext *s = avctx->priv_data;
+    AV1RawSequenceHeader *seq;
+    int ret;
+
+    s->avctx = avctx;
+    s->pix_fmt = AV_PIX_FMT_NONE;
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
+        s->ref[i].f = av_frame_alloc();
+        if (!s->ref[i].f) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Failed to allocate reference frame buffer %d.\n", i);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    s->cur_frame.f = av_frame_alloc();
+    if (!s->cur_frame.f) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to allocate current frame buffer.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, avctx);
+    if (ret < 0)
+        return ret;
+
+    av_opt_set_int(s->cbc->priv_data, "operating_point", s->operating_point, 0);
+
+    if (avctx->extradata && avctx->extradata_size) {
+        ret = ff_cbs_read_extradata_from_codec(s->cbc,
+                                               &s->current_obu,
+                                               avctx);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_WARNING, "Failed to read extradata.\n");
+            return ret;
+        }
+
+        seq = ((CodedBitstreamAV1Context *)(s->cbc->priv_data))->sequence_header;
+        if (!seq) {
+            av_log(avctx, AV_LOG_WARNING, "No sequence header available.\n");
+            goto end;
+        }
+
+        ret = set_context_with_sequence(avctx, seq);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_WARNING, "Failed to set decoder context.\n");
+            goto end;
+        }
+
+        end:
+        ff_cbs_fragment_reset(&s->current_obu);
+    }
+
+    return ret;
+}
+
+static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f)
+{
+    AV1DecContext *s = avctx->priv_data;
+    AV1RawFrameHeader *header= s->raw_frame_header;
+    AVFrame *frame;
+    int ret;
+
+    ret = update_context_with_frame_header(avctx, header);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to update context with frame header\n");
+        return ret;
+    }
+
+    if ((ret = ff_thread_get_buffer(avctx, f->f, AV_GET_BUFFER_FLAG_REF)) < 0)
+        goto fail;
+
+    frame = f->f;
+    frame->key_frame = header->frame_type == AV1_FRAME_KEY;
+
+    switch (header->frame_type) {
+    case AV1_FRAME_KEY:
+    case AV1_FRAME_INTRA_ONLY:
+        frame->pict_type = AV_PICTURE_TYPE_I;
+        break;
+    case AV1_FRAME_INTER:
+        frame->pict_type = AV_PICTURE_TYPE_P;
+        break;
+    case AV1_FRAME_SWITCH:
+        frame->pict_type = AV_PICTURE_TYPE_SP;
+        break;
+    }
+
+    if (avctx->hwaccel) {
+        const AVHWAccel *hwaccel = avctx->hwaccel;
+        if (hwaccel->frame_priv_data_size) {
+            f->hwaccel_priv_buf =
+                av_buffer_allocz(hwaccel->frame_priv_data_size);
+            if (!f->hwaccel_priv_buf) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
+        }
+    }
+    return 0;
+
+fail:
+    av1_frame_unref(avctx, f);
+    return ret;
+}
+
+static int export_film_grain(AVCodecContext *avctx, AVFrame *frame)
+{
+    AV1DecContext *s = avctx->priv_data;
+    const AV1RawFilmGrainParams *film_grain = &s->cur_frame.film_grain;
+    AVFilmGrainParams *fgp;
+    AVFilmGrainAOMParams *aom;
+
+    if (!film_grain->apply_grain)
+        return 0;
+
+    fgp = av_film_grain_params_create_side_data(frame);
+    if (!fgp)
+        return AVERROR(ENOMEM);
+
+    fgp->type = AV_FILM_GRAIN_PARAMS_AV1;
+    fgp->seed = film_grain->grain_seed;
+
+    aom = &fgp->codec.aom;
+    aom->chroma_scaling_from_luma = film_grain->chroma_scaling_from_luma;
+    aom->scaling_shift = film_grain->grain_scaling_minus_8 + 8;
+    aom->ar_coeff_lag = film_grain->ar_coeff_lag;
+    aom->ar_coeff_shift = film_grain->ar_coeff_shift_minus_6 + 6;
+    aom->grain_scale_shift = film_grain->grain_scale_shift;
+    aom->overlap_flag = film_grain->overlap_flag;
+    aom->limit_output_range = film_grain->clip_to_restricted_range;
+
+    aom->num_y_points = film_grain->num_y_points;
+    for (int i = 0; i < film_grain->num_y_points; i++) {
+        aom->y_points[i][0] = film_grain->point_y_value[i];
+        aom->y_points[i][1] = film_grain->point_y_scaling[i];
+    }
+    aom->num_uv_points[0] = film_grain->num_cb_points;
+    for (int i = 0; i < film_grain->num_cb_points; i++) {
+        aom->uv_points[0][i][0] = film_grain->point_cb_value[i];
+        aom->uv_points[0][i][1] = film_grain->point_cb_scaling[i];
+    }
+    aom->num_uv_points[1] = film_grain->num_cr_points;
+    for (int i = 0; i < film_grain->num_cr_points; i++) {
+        aom->uv_points[1][i][0] = film_grain->point_cr_value[i];
+        aom->uv_points[1][i][1] = film_grain->point_cr_scaling[i];
+    }
+
+    for (int i = 0; i < 24; i++) {
+        aom->ar_coeffs_y[i] = film_grain->ar_coeffs_y_plus_128[i] - 128;
+    }
+    for (int i = 0; i < 25; i++) {
+        aom->ar_coeffs_uv[0][i] = film_grain->ar_coeffs_cb_plus_128[i] - 128;
+        aom->ar_coeffs_uv[1][i] = film_grain->ar_coeffs_cr_plus_128[i] - 128;
+    }
+
+    aom->uv_mult[0] = film_grain->cb_mult;
+    aom->uv_mult[1] = film_grain->cr_mult;
+    aom->uv_mult_luma[0] = film_grain->cb_luma_mult;
+    aom->uv_mult_luma[1] = film_grain->cr_luma_mult;
+    aom->uv_offset[0] = film_grain->cb_offset;
+    aom->uv_offset[1] = film_grain->cr_offset;
+
+    return 0;
+}
+
+static int set_output_frame(AVCodecContext *avctx, AVFrame *frame,
+                            const AVPacket *pkt, int *got_frame)
+{
+    AV1DecContext *s = avctx->priv_data;
+    const AVFrame *srcframe = s->cur_frame.f;
+    int ret;
+
+    // TODO: all layers
+    if (s->operating_point_idc &&
+        av_log2(s->operating_point_idc >> 8) > s->cur_frame.spatial_id)
+        return 0;
+
+    ret = av_frame_ref(frame, srcframe);
+    if (ret < 0)
+        return ret;
+
+    if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) {
+        ret = export_film_grain(avctx, frame);
+        if (ret < 0) {
+            av_frame_unref(frame);
+            return ret;
+        }
+    }
+
+    frame->pts = pkt->pts;
+    frame->pkt_dts = pkt->dts;
+    frame->pkt_size = pkt->size;
+
+    *got_frame = 1;
+
+    return 0;
+}
+
+static int update_reference_list(AVCodecContext *avctx)
+{
+    AV1DecContext *s = avctx->priv_data;
+    const AV1RawFrameHeader *header = s->raw_frame_header;
+    int ret;
+
+    for (int i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+        if (header->refresh_frame_flags & (1 << i)) {
+            av1_frame_unref(avctx, &s->ref[i]);
+            if ((ret = av1_frame_ref(avctx, &s->ref[i], &s->cur_frame)) < 0) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "Failed to update frame %d in reference list\n", i);
+                return ret;
+            }
+        }
+    }
+    return 0;
+}
+
+static int get_current_frame(AVCodecContext *avctx)
+{
+    AV1DecContext *s = avctx->priv_data;
+    int ret;
+
+    av1_frame_unref(avctx, &s->cur_frame);
+
+    s->cur_frame.header_ref = av_buffer_ref(s->header_ref);
+    if (!s->cur_frame.header_ref)
+        return AVERROR(ENOMEM);
+
+    s->cur_frame.raw_frame_header = s->raw_frame_header;
+
+    ret = init_tile_data(s);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to init tile data.\n");
+        return ret;
+    }
+
+    if ((avctx->skip_frame >= AVDISCARD_NONINTRA &&
+            (s->raw_frame_header->frame_type != AV1_FRAME_KEY &&
+             s->raw_frame_header->frame_type != AV1_FRAME_INTRA_ONLY)) ||
+        (avctx->skip_frame >= AVDISCARD_NONKEY   &&
+             s->raw_frame_header->frame_type != AV1_FRAME_KEY) ||
+        avctx->skip_frame >= AVDISCARD_ALL)
+        return 0;
+
+    ret = av1_frame_alloc(avctx, &s->cur_frame);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to allocate space for current frame.\n");
+        return ret;
+    }
+
+    global_motion_params(s);
+    skip_mode_params(s);
+    coded_lossless_param(s);
+    load_grain_params(s);
+
+    return ret;
+}
+
+static int av1_decode_frame(AVCodecContext *avctx, AVFrame *frame,
+                            int *got_frame, AVPacket *pkt)
+{
+    AV1DecContext *s = avctx->priv_data;
+    AV1RawTileGroup *raw_tile_group = NULL;
+    int ret;
+
+    ret = ff_cbs_read_packet(s->cbc, &s->current_obu, pkt);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to read packet.\n");
+        goto end;
+    }
+    av_log(avctx, AV_LOG_DEBUG, "Total obu for this frame:%d.\n",
+           s->current_obu.nb_units);
+
+    for (int i = 0; i < s->current_obu.nb_units; i++) {
+        CodedBitstreamUnit *unit = &s->current_obu.units[i];
+        AV1RawOBU *obu = unit->content;
+        const AV1RawOBUHeader *header;
+
+        if (!obu)
+            continue;
+
+        header = &obu->header;
+        av_log(avctx, AV_LOG_DEBUG, "Obu idx:%d, obu type:%d.\n", i, unit->type);
+
+        switch (unit->type) {
+        case AV1_OBU_SEQUENCE_HEADER:
+            av_buffer_unref(&s->seq_ref);
+            s->seq_ref = av_buffer_ref(unit->content_ref);
+            if (!s->seq_ref) {
+                ret = AVERROR(ENOMEM);
+                goto end;
+            }
+
+            s->raw_seq = &obu->obu.sequence_header;
+
+            ret = set_context_with_sequence(avctx, s->raw_seq);
+            if (ret < 0) {
+                av_log(avctx, AV_LOG_ERROR, "Failed to set context.\n");
+                s->raw_seq = NULL;
+                goto end;
+            }
+
+            s->operating_point_idc = s->raw_seq->operating_point_idc[s->operating_point];
+
+            if (s->pix_fmt == AV_PIX_FMT_NONE) {
+                ret = get_pixel_format(avctx);
+                if (ret < 0) {
+                    av_log(avctx, AV_LOG_ERROR,
+                           "Failed to get pixel format.\n");
+                    s->raw_seq = NULL;
+                    goto end;
+                }
+            }
+
+            if (avctx->hwaccel && avctx->hwaccel->decode_params) {
+                ret = avctx->hwaccel->decode_params(avctx, unit->type, unit->data,
+                                                    unit->data_size);
+                if (ret < 0) {
+                    av_log(avctx, AV_LOG_ERROR, "HW accel decode params fail.\n");
+                    s->raw_seq = NULL;
+                    goto end;
+                }
+            }
+            break;
+        case AV1_OBU_REDUNDANT_FRAME_HEADER:
+            if (s->raw_frame_header)
+                break;
+        // fall-through
+        case AV1_OBU_FRAME:
+        case AV1_OBU_FRAME_HEADER:
+            if (!s->raw_seq) {
+                av_log(avctx, AV_LOG_ERROR, "Missing Sequence Header.\n");
+                ret = AVERROR_INVALIDDATA;
+                goto end;
+            }
+
+            av_buffer_unref(&s->header_ref);
+            s->header_ref = av_buffer_ref(unit->content_ref);
+            if (!s->header_ref) {
+                ret = AVERROR(ENOMEM);
+                goto end;
+            }
+
+            if (unit->type == AV1_OBU_FRAME)
+                s->raw_frame_header = &obu->obu.frame.header;
+            else
+                s->raw_frame_header = &obu->obu.frame_header;
+
+            if (s->raw_frame_header->show_existing_frame) {
+                av1_frame_unref(avctx, &s->cur_frame);
+
+                ret = av1_frame_ref(avctx, &s->cur_frame,
+                                    &s->ref[s->raw_frame_header->frame_to_show_map_idx]);
+                if (ret < 0) {
+                    av_log(avctx, AV_LOG_ERROR, "Failed to get reference frame.\n");
+                    goto end;
+                }
+
+                ret = update_reference_list(avctx);
+                if (ret < 0) {
+                    av_log(avctx, AV_LOG_ERROR, "Failed to update reference list.\n");
+                    goto end;
+                }
+
+                if (s->cur_frame.f->buf[0]) {
+                    ret = set_output_frame(avctx, frame, pkt, got_frame);
+                    if (ret < 0)
+                        av_log(avctx, AV_LOG_ERROR, "Set output frame error.\n");
+                }
+
+                s->raw_frame_header = NULL;
+
+                goto end;
+            }
+
+            ret = get_current_frame(avctx);
+            if (ret < 0) {
+                av_log(avctx, AV_LOG_ERROR, "Get current frame error\n");
+                goto end;
+            }
+
+            s->cur_frame.spatial_id  = header->spatial_id;
+            s->cur_frame.temporal_id = header->temporal_id;
+
+            if (avctx->hwaccel && s->cur_frame.f->buf[0]) {
+                ret = avctx->hwaccel->start_frame(avctx, unit->data,
+                                                  unit->data_size);
+                if (ret < 0) {
+                    av_log(avctx, AV_LOG_ERROR, "HW accel start frame fail.\n");
+                    goto end;
+                }
+            }
+            if (unit->type != AV1_OBU_FRAME)
+                break;
+        // fall-through
+        case AV1_OBU_TILE_GROUP:
+            if (!s->raw_frame_header) {
+                av_log(avctx, AV_LOG_ERROR, "Missing Frame Header.\n");
+                ret = AVERROR_INVALIDDATA;
+                goto end;
+            }
+
+            if (unit->type == AV1_OBU_FRAME)
+                raw_tile_group = &obu->obu.frame.tile_group;
+            else
+                raw_tile_group = &obu->obu.tile_group;
+
+            ret = get_tiles_info(avctx, raw_tile_group);
+            if (ret < 0)
+                goto end;
+
+            if (avctx->hwaccel && s->cur_frame.f->buf[0]) {
+                ret = avctx->hwaccel->decode_slice(avctx,
+                                                   raw_tile_group->tile_data.data,
+                                                   raw_tile_group->tile_data.data_size);
+                if (ret < 0) {
+                    av_log(avctx, AV_LOG_ERROR,
+                           "HW accel decode slice fail.\n");
+                    goto end;
+                }
+            }
+            break;
+        case AV1_OBU_TILE_LIST:
+        case AV1_OBU_TEMPORAL_DELIMITER:
+        case AV1_OBU_PADDING:
+        case AV1_OBU_METADATA:
+            break;
+        default:
+            av_log(avctx, AV_LOG_DEBUG,
+                   "Unknown obu type: %d (%"SIZE_SPECIFIER" bits).\n",
+                   unit->type, unit->data_size);
+        }
+
+        if (raw_tile_group && (s->tile_num == raw_tile_group->tg_end + 1)) {
+            if (avctx->hwaccel && s->cur_frame.f->buf[0]) {
+                ret = avctx->hwaccel->end_frame(avctx);
+                if (ret < 0) {
+                    av_log(avctx, AV_LOG_ERROR, "HW accel end frame fail.\n");
+                    goto end;
+                }
+            }
+
+            ret = update_reference_list(avctx);
+            if (ret < 0) {
+                av_log(avctx, AV_LOG_ERROR, "Failed to update reference list.\n");
+                goto end;
+            }
+
+            if (s->raw_frame_header->show_frame && s->cur_frame.f->buf[0]) {
+                ret = set_output_frame(avctx, frame, pkt, got_frame);
+                if (ret < 0) {
+                    av_log(avctx, AV_LOG_ERROR, "Set output frame error\n");
+                    goto end;
+                }
+            }
+            raw_tile_group = NULL;
+            s->raw_frame_header = NULL;
+        }
+    }
+
+end:
+    ff_cbs_fragment_reset(&s->current_obu);
+    if (ret < 0)
+        s->raw_frame_header = NULL;
+    return ret;
+}
+
+static void av1_decode_flush(AVCodecContext *avctx)
+{
+    AV1DecContext *s = avctx->priv_data;
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(s->ref); i++)
+        av1_frame_unref(avctx, &s->ref[i]);
+
+    av1_frame_unref(avctx, &s->cur_frame);
+    s->operating_point_idc = 0;
+    s->raw_frame_header = NULL;
+    s->raw_seq = NULL;
+
+    ff_cbs_flush(s->cbc);
+}
+
+#define OFFSET(x) offsetof(AV1DecContext, x)
+#define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption av1_options[] = {
+    { "operating_point",  "Select an operating point of the scalable bitstream",
+                          OFFSET(operating_point), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, AV1_MAX_OPERATING_POINTS - 1, VD },
+    { NULL }
+};
+
+static const AVClass av1_class = {
+    .class_name = "AV1 decoder",
+    .item_name  = av_default_item_name,
+    .option     = av1_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const FFCodec ff_av1_decoder = {
+    .p.name                = "av1",
+    CODEC_LONG_NAME("Alliance for Open Media AV1"),
+    .p.type                = AVMEDIA_TYPE_VIDEO,
+    .p.id                  = AV_CODEC_ID_AV1,
+    .priv_data_size        = sizeof(AV1DecContext),
+    .init                  = av1_decode_init,
+    .close                 = av1_decode_free,
+    FF_CODEC_DECODE_CB(av1_decode_frame),
+    .p.capabilities        = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_AVOID_PROBING,
+    .caps_internal         = FF_CODEC_CAP_INIT_CLEANUP |
+                             FF_CODEC_CAP_SETS_PKT_DTS,
+    .flush                 = av1_decode_flush,
+    .p.profiles            = NULL_IF_CONFIG_SMALL(ff_av1_profiles),
+    .p.priv_class          = &av1_class,
+    .bsfs                  = "av1_frame_split",
+    .hw_configs            = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_AV1_DXVA2_HWACCEL
+        HWACCEL_DXVA2(av1),
+#endif
+#if CONFIG_AV1_D3D11VA_HWACCEL
+        HWACCEL_D3D11VA(av1),
+#endif
+#if CONFIG_AV1_D3D11VA2_HWACCEL
+        HWACCEL_D3D11VA2(av1),
+#endif
+#if CONFIG_AV1_NVDEC_HWACCEL
+        HWACCEL_NVDEC(av1),
+#endif
+#if CONFIG_AV1_VAAPI_HWACCEL
+        HWACCEL_VAAPI(av1),
+#endif
+#if CONFIG_AV1_VDPAU_HWACCEL
+        HWACCEL_VDPAU(av1),
+#endif
+
+        NULL
+    },
+};
diff --git a/media/ffvpx/libavcodec/av1dec.h b/media/ffvpx/libavcodec/av1dec.h
new file mode 100644
index 0000000000..82c7084e99
--- /dev/null
+++ b/media/ffvpx/libavcodec/av1dec.h
@@ -0,0 +1,89 @@
+/*
+ * AV1 video decoder
+ * *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AV1DEC_H
+#define AVCODEC_AV1DEC_H
+
+#include <stdint.h>
+
+#include "libavutil/buffer.h"
+#include "libavutil/frame.h"
+#include "libavutil/pixfmt.h"
+#include "avcodec.h"
+#include "cbs.h"
+#include "cbs_av1.h"
+
+typedef struct AV1Frame {
+    AVFrame *f;
+
+    AVBufferRef *hwaccel_priv_buf;
+    void *hwaccel_picture_private;
+
+    AVBufferRef *header_ref;
+    AV1RawFrameHeader *raw_frame_header;
+
+    int temporal_id;
+    int spatial_id;
+
+    uint8_t gm_invalid[AV1_NUM_REF_FRAMES];
+    uint8_t gm_type[AV1_NUM_REF_FRAMES];
+    int32_t gm_params[AV1_NUM_REF_FRAMES][6];
+
+    uint8_t skip_mode_frame_idx[2];
+
+    AV1RawFilmGrainParams film_grain;
+
+    uint8_t coded_lossless;
+} AV1Frame;
+
+typedef struct TileGroupInfo {
+    uint32_t tile_offset;
+    uint32_t tile_size;
+    uint16_t tile_row;
+    uint16_t tile_column;
+} TileGroupInfo;
+
+typedef struct AV1DecContext {
+    const AVClass *class;
+    AVCodecContext *avctx;
+
+    enum AVPixelFormat pix_fmt;
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment current_obu;
+
+    AVBufferRef *seq_ref;
+    AV1RawSequenceHeader *raw_seq;
+    AVBufferRef *header_ref;
+    AV1RawFrameHeader *raw_frame_header;
+    TileGroupInfo *tile_group_info;
+    uint16_t tile_num;
+    uint16_t tg_start;
+    uint16_t tg_end;
+
+    int operating_point_idc;
+
+    AV1Frame ref[AV1_NUM_REF_FRAMES];
+    AV1Frame cur_frame;
+
+    // AVOptions
+    int operating_point;
+} AV1DecContext;
+
+#endif /* AVCODEC_AV1DEC_H */
diff --git a/media/ffvpx/libavcodec/avcodec.c b/media/ffvpx/libavcodec/avcodec.c
new file mode 100644
index 0000000000..fb1362290f
--- /dev/null
+++ b/media/ffvpx/libavcodec/avcodec.c
@@ -0,0 +1,716 @@
+/*
+ * AVCodecContext functions for libavcodec
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AVCodecContext functions for libavcodec
+ */
+
+#include "config.h"
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/fifo.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/thread.h"
+#include "avcodec.h"
+#include "bsf.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "encode.h"
+#include "frame_thread_encoder.h"
+#include "internal.h"
+#include "thread.h"
+
+int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2), void *arg, int *ret, int count, int size)
+{
+    int i;
+
+    for (i = 0; i < count; i++) {
+        int r = func(c, (char *)arg + i * size);
+        if (ret)
+            ret[i] = r;
+    }
+    emms_c();
+    return 0;
+}
+
+int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2, int jobnr, int threadnr), void *arg, int *ret, int count)
+{
+    int i;
+
+    for (i = 0; i < count; i++) {
+        int r = func(c, arg, i, 0);
+        if (ret)
+            ret[i] = r;
+    }
+    emms_c();
+    return 0;
+}
+
+static AVMutex codec_mutex = AV_MUTEX_INITIALIZER;
+
+static void lock_avcodec(const FFCodec *codec)
+{
+    if (codec->caps_internal & FF_CODEC_CAP_NOT_INIT_THREADSAFE && codec->init)
+        ff_mutex_lock(&codec_mutex);
+}
+
+static void unlock_avcodec(const FFCodec *codec)
+{
+    if (codec->caps_internal & FF_CODEC_CAP_NOT_INIT_THREADSAFE && codec->init)
+        ff_mutex_unlock(&codec_mutex);
+}
+
+static int64_t get_bit_rate(AVCodecContext *ctx)
+{
+    int64_t bit_rate;
+    int bits_per_sample;
+
+    switch (ctx->codec_type) {
+    case AVMEDIA_TYPE_VIDEO:
+    case AVMEDIA_TYPE_DATA:
+    case AVMEDIA_TYPE_SUBTITLE:
+    case AVMEDIA_TYPE_ATTACHMENT:
+        bit_rate = ctx->bit_rate;
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        bits_per_sample = av_get_bits_per_sample(ctx->codec_id);
+        if (bits_per_sample) {
+            bit_rate = ctx->sample_rate * (int64_t)ctx->ch_layout.nb_channels;
+            if (bit_rate > INT64_MAX / bits_per_sample) {
+                bit_rate = 0;
+            } else
+                bit_rate *= bits_per_sample;
+        } else
+            bit_rate = ctx->bit_rate;
+        break;
+    default:
+        bit_rate = 0;
+        break;
+    }
+    return bit_rate;
+}
+
+int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options)
+{
+    int ret = 0;
+    AVCodecInternal *avci;
+    const FFCodec *codec2;
+
+    if (avcodec_is_open(avctx))
+        return 0;
+
+    if (!codec && !avctx->codec) {
+        av_log(avctx, AV_LOG_ERROR, "No codec provided to avcodec_open2()\n");
+        return AVERROR(EINVAL);
+    }
+    if (codec && avctx->codec && codec != avctx->codec) {
+        av_log(avctx, AV_LOG_ERROR, "This AVCodecContext was allocated for %s, "
+                                    "but %s passed to avcodec_open2()\n", avctx->codec->name, codec->name);
+        return AVERROR(EINVAL);
+    }
+    if (!codec)
+        codec = avctx->codec;
+    codec2 = ffcodec(codec);
+
+    if ((avctx->codec_type != AVMEDIA_TYPE_UNKNOWN && avctx->codec_type != codec->type) ||
+        (avctx->codec_id   != AV_CODEC_ID_NONE     && avctx->codec_id   != codec->id)) {
+        av_log(avctx, AV_LOG_ERROR, "Codec type or id mismatches\n");
+        return AVERROR(EINVAL);
+    }
+
+    avctx->codec_type = codec->type;
+    avctx->codec_id   = codec->id;
+    avctx->codec      = codec;
+
+    if (avctx->extradata_size < 0 || avctx->extradata_size >= FF_MAX_EXTRADATA_SIZE)
+        return AVERROR(EINVAL);
+
+    avci = av_mallocz(sizeof(*avci));
+    if (!avci) {
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
+    avctx->internal = avci;
+
+    avci->buffer_frame = av_frame_alloc();
+    avci->buffer_pkt = av_packet_alloc();
+    if (!avci->buffer_frame || !avci->buffer_pkt) {
+        ret = AVERROR(ENOMEM);
+        goto free_and_end;
+    }
+
+    if (codec2->priv_data_size > 0) {
+        if (!avctx->priv_data) {
+            avctx->priv_data = av_mallocz(codec2->priv_data_size);
+            if (!avctx->priv_data) {
+                ret = AVERROR(ENOMEM);
+                goto free_and_end;
+            }
+            if (codec->priv_class) {
+                *(const AVClass **)avctx->priv_data = codec->priv_class;
+                av_opt_set_defaults(avctx->priv_data);
+            }
+        }
+        if (codec->priv_class && (ret = av_opt_set_dict(avctx->priv_data, options)) < 0)
+            goto free_and_end;
+    } else {
+        avctx->priv_data = NULL;
+    }
+    if ((ret = av_opt_set_dict(avctx, options)) < 0)
+        goto free_and_end;
+
+    if (avctx->codec_whitelist && av_match_list(codec->name, avctx->codec_whitelist, ',') <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "Codec (%s) not on whitelist \'%s\'\n", codec->name, avctx->codec_whitelist);
+        ret = AVERROR(EINVAL);
+        goto free_and_end;
+    }
+
+    // only call ff_set_dimensions() for non H.264/VP6F/DXV codecs so as not to overwrite previously setup dimensions
+    if (!(avctx->coded_width && avctx->coded_height && avctx->width && avctx->height &&
+          (avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_VP6F || avctx->codec_id == AV_CODEC_ID_DXV))) {
+        if (avctx->coded_width && avctx->coded_height)
+            ret = ff_set_dimensions(avctx, avctx->coded_width, avctx->coded_height);
+        else if (avctx->width && avctx->height)
+            ret = ff_set_dimensions(avctx, avctx->width, avctx->height);
+        if (ret < 0)
+            goto free_and_end;
+    }
+
+    if ((avctx->coded_width || avctx->coded_height || avctx->width || avctx->height)
+        && (  av_image_check_size2(avctx->coded_width, avctx->coded_height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx) < 0
+           || av_image_check_size2(avctx->width,       avctx->height,       avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx) < 0)) {
+        av_log(avctx, AV_LOG_WARNING, "Ignoring invalid width/height values\n");
+        ff_set_dimensions(avctx, 0, 0);
+    }
+
+    if (avctx->width > 0 && avctx->height > 0) {
+        if (av_image_check_sar(avctx->width, avctx->height,
+                               avctx->sample_aspect_ratio) < 0) {
+            av_log(avctx, AV_LOG_WARNING, "ignoring invalid SAR: %u/%u\n",
+                   avctx->sample_aspect_ratio.num,
+                   avctx->sample_aspect_ratio.den);
+            avctx->sample_aspect_ratio = (AVRational){ 0, 1 };
+        }
+    }
+
+    if (avctx->sample_rate < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid sample rate: %d\n", avctx->sample_rate);
+        ret = AVERROR(EINVAL);
+        goto free_and_end;
+    }
+    if (avctx->block_align < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid block align: %d\n", avctx->block_align);
+        ret = AVERROR(EINVAL);
+        goto free_and_end;
+    }
+
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+    /* compat wrapper for old-style callers */
+    if (avctx->channel_layout && !avctx->channels)
+        avctx->channels = av_popcount64(avctx->channel_layout);
+
+    if ((avctx->channels && avctx->ch_layout.nb_channels != avctx->channels) ||
+        (avctx->channel_layout && (avctx->ch_layout.order != AV_CHANNEL_ORDER_NATIVE ||
+                                   avctx->ch_layout.u.mask != avctx->channel_layout))) {
+        av_channel_layout_uninit(&avctx->ch_layout);
+        if (avctx->channel_layout) {
+            av_channel_layout_from_mask(&avctx->ch_layout, avctx->channel_layout);
+        } else {
+            avctx->ch_layout.order       = AV_CHANNEL_ORDER_UNSPEC;
+        }
+        avctx->ch_layout.nb_channels = avctx->channels;
+    }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    /* AV_CODEC_CAP_CHANNEL_CONF is a decoder-only flag; so the code below
+     * in particular checks that nb_channels is set for all audio encoders. */
+    if (avctx->codec_type == AVMEDIA_TYPE_AUDIO && !avctx->ch_layout.nb_channels
+        && !(codec->capabilities & AV_CODEC_CAP_CHANNEL_CONF)) {
+        av_log(avctx, AV_LOG_ERROR, "%s requires channel layout to be set\n",
+               av_codec_is_decoder(codec) ? "Decoder" : "Encoder");
+        ret = AVERROR(EINVAL);
+        goto free_and_end;
+    }
+    if (avctx->ch_layout.nb_channels && !av_channel_layout_check(&avctx->ch_layout)) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid channel layout\n");
+        ret = AVERROR(EINVAL);
+        goto free_and_end;
+    }
+    if (avctx->ch_layout.nb_channels > FF_SANE_NB_CHANNELS) {
+        av_log(avctx, AV_LOG_ERROR, "Too many channels: %d\n", avctx->ch_layout.nb_channels);
+        ret = AVERROR(EINVAL);
+        goto free_and_end;
+    }
+
+    avctx->frame_num = 0;
+#if FF_API_AVCTX_FRAME_NUMBER
+FF_DISABLE_DEPRECATION_WARNINGS
+    avctx->frame_number = avctx->frame_num;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    avctx->codec_descriptor = avcodec_descriptor_get(avctx->codec_id);
+
+    if ((avctx->codec->capabilities & AV_CODEC_CAP_EXPERIMENTAL) &&
+        avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
+        const char *codec_string = av_codec_is_encoder(codec) ? "encoder" : "decoder";
+        const AVCodec *codec2;
+        av_log(avctx, AV_LOG_ERROR,
+               "The %s '%s' is experimental but experimental codecs are not enabled, "
+               "add '-strict %d' if you want to use it.\n",
+               codec_string, codec->name, FF_COMPLIANCE_EXPERIMENTAL);
+        codec2 = av_codec_is_encoder(codec) ? avcodec_find_encoder(codec->id) : avcodec_find_decoder(codec->id);
+        if (!(codec2->capabilities & AV_CODEC_CAP_EXPERIMENTAL))
+            av_log(avctx, AV_LOG_ERROR, "Alternatively use the non experimental %s '%s'.\n",
+                codec_string, codec2->name);
+        ret = AVERROR_EXPERIMENTAL;
+        goto free_and_end;
+    }
+
+    if (avctx->codec_type == AVMEDIA_TYPE_AUDIO &&
+        (!avctx->time_base.num || !avctx->time_base.den)) {
+        avctx->time_base.num = 1;
+        avctx->time_base.den = avctx->sample_rate;
+    }
+
+    if (av_codec_is_encoder(avctx->codec))
+        ret = ff_encode_preinit(avctx);
+    else
+        ret = ff_decode_preinit(avctx);
+    if (ret < 0)
+        goto free_and_end;
+
+    if (HAVE_THREADS && !avci->frame_thread_encoder) {
+        /* Frame-threaded decoders call FFCodec.init for their child contexts. */
+        lock_avcodec(codec2);
+        ret = ff_thread_init(avctx);
+        unlock_avcodec(codec2);
+        if (ret < 0) {
+            goto free_and_end;
+        }
+    }
+    if (!HAVE_THREADS && !(codec2->caps_internal & FF_CODEC_CAP_AUTO_THREADS))
+        avctx->thread_count = 1;
+
+    if (!(avctx->active_thread_type & FF_THREAD_FRAME) ||
+        avci->frame_thread_encoder) {
+        if (codec2->init) {
+            lock_avcodec(codec2);
+            ret = codec2->init(avctx);
+            unlock_avcodec(codec2);
+            if (ret < 0) {
+                avci->needs_close = codec2->caps_internal & FF_CODEC_CAP_INIT_CLEANUP;
+                goto free_and_end;
+            }
+        }
+        avci->needs_close = 1;
+    }
+
+    ret=0;
+
+    if (av_codec_is_decoder(avctx->codec)) {
+        if (!avctx->bit_rate)
+            avctx->bit_rate = get_bit_rate(avctx);
+
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+        /* update the deprecated fields for old-style callers */
+        avctx->channels = avctx->ch_layout.nb_channels;
+        avctx->channel_layout = avctx->ch_layout.order == AV_CHANNEL_ORDER_NATIVE ?
+                                avctx->ch_layout.u.mask : 0;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+        /* validate channel layout from the decoder */
+        if ((avctx->ch_layout.nb_channels && !av_channel_layout_check(&avctx->ch_layout)) ||
+            avctx->ch_layout.nb_channels > FF_SANE_NB_CHANNELS) {
+            ret = AVERROR(EINVAL);
+            goto free_and_end;
+        }
+        if (avctx->bits_per_coded_sample < 0) {
+            ret = AVERROR(EINVAL);
+            goto free_and_end;
+        }
+    }
+    if (codec->priv_class)
+        av_assert0(*(const AVClass **)avctx->priv_data == codec->priv_class);
+
+end:
+
+    return ret;
+free_and_end:
+    avcodec_close(avctx);
+    goto end;
+}
+
+void avcodec_flush_buffers(AVCodecContext *avctx)
+{
+    AVCodecInternal *avci = avctx->internal;
+
+    if (av_codec_is_encoder(avctx->codec)) {
+        int caps = avctx->codec->capabilities;
+
+        if (!(caps & AV_CODEC_CAP_ENCODER_FLUSH)) {
+            // Only encoders that explicitly declare support for it can be
+            // flushed. Otherwise, this is a no-op.
+            av_log(avctx, AV_LOG_WARNING, "Ignoring attempt to flush encoder "
+                   "that doesn't support it\n");
+            return;
+        }
+        if (avci->in_frame)
+            av_frame_unref(avci->in_frame);
+        if (avci->recon_frame)
+            av_frame_unref(avci->recon_frame);
+    } else {
+        av_packet_unref(avci->last_pkt_props);
+        av_packet_unref(avci->in_pkt);
+
+        avctx->pts_correction_last_pts =
+        avctx->pts_correction_last_dts = INT64_MIN;
+
+        av_bsf_flush(avci->bsf);
+    }
+
+    avci->draining      = 0;
+    avci->draining_done = 0;
+    avci->nb_draining_errors = 0;
+    av_frame_unref(avci->buffer_frame);
+    av_packet_unref(avci->buffer_pkt);
+
+    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME)
+        ff_thread_flush(avctx);
+    else if (ffcodec(avctx->codec)->flush)
+        ffcodec(avctx->codec)->flush(avctx);
+}
+
+void avsubtitle_free(AVSubtitle *sub)
+{
+    int i;
+
+    for (i = 0; i < sub->num_rects; i++) {
+        AVSubtitleRect *const rect = sub->rects[i];
+
+        av_freep(&rect->data[0]);
+        av_freep(&rect->data[1]);
+        av_freep(&rect->data[2]);
+        av_freep(&rect->data[3]);
+        av_freep(&rect->text);
+        av_freep(&rect->ass);
+
+        av_freep(&sub->rects[i]);
+    }
+
+    av_freep(&sub->rects);
+
+    memset(sub, 0, sizeof(*sub));
+}
+
+av_cold int avcodec_close(AVCodecContext *avctx)
+{
+    int i;
+
+    if (!avctx)
+        return 0;
+
+    if (avcodec_is_open(avctx)) {
+        AVCodecInternal *avci = avctx->internal;
+
+        if (CONFIG_FRAME_THREAD_ENCODER &&
+            avci->frame_thread_encoder && avctx->thread_count > 1) {
+            ff_frame_thread_encoder_free(avctx);
+        }
+        if (HAVE_THREADS && avci->thread_ctx)
+            ff_thread_free(avctx);
+        if (avci->needs_close && ffcodec(avctx->codec)->close)
+            ffcodec(avctx->codec)->close(avctx);
+        avci->byte_buffer_size = 0;
+        av_freep(&avci->byte_buffer);
+        av_frame_free(&avci->buffer_frame);
+        av_packet_free(&avci->buffer_pkt);
+        av_packet_free(&avci->last_pkt_props);
+
+        av_packet_free(&avci->in_pkt);
+        av_frame_free(&avci->in_frame);
+        av_frame_free(&avci->recon_frame);
+
+        av_buffer_unref(&avci->pool);
+
+        if (avctx->hwaccel && avctx->hwaccel->uninit)
+            avctx->hwaccel->uninit(avctx);
+        av_freep(&avci->hwaccel_priv_data);
+
+        av_bsf_free(&avci->bsf);
+
+        av_channel_layout_uninit(&avci->initial_ch_layout);
+
+#if CONFIG_LCMS2
+        ff_icc_context_uninit(&avci->icc);
+#endif
+
+        av_freep(&avctx->internal);
+    }
+
+    for (i = 0; i < avctx->nb_coded_side_data; i++)
+        av_freep(&avctx->coded_side_data[i].data);
+    av_freep(&avctx->coded_side_data);
+    avctx->nb_coded_side_data = 0;
+
+    av_buffer_unref(&avctx->hw_frames_ctx);
+    av_buffer_unref(&avctx->hw_device_ctx);
+
+    if (avctx->priv_data && avctx->codec && avctx->codec->priv_class)
+        av_opt_free(avctx->priv_data);
+    av_opt_free(avctx);
+    av_freep(&avctx->priv_data);
+    if (av_codec_is_encoder(avctx->codec)) {
+        av_freep(&avctx->extradata);
+        avctx->extradata_size = 0;
+    } else if (av_codec_is_decoder(avctx->codec))
+        av_freep(&avctx->subtitle_header);
+
+    avctx->codec = NULL;
+    avctx->active_thread_type = 0;
+
+    return 0;
+}
+
+static const char *unknown_if_null(const char *str)
+{
+    return str ? str : "unknown";
+}
+
+void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
+{
+    const char *codec_type;
+    const char *codec_name;
+    const char *profile = NULL;
+    AVBPrint bprint;
+    int64_t bitrate;
+    int new_line = 0;
+    AVRational display_aspect_ratio;
+    const char *separator = enc->dump_separator ? (const char *)enc->dump_separator : ", ";
+    const char *str;
+
+    if (!buf || buf_size <= 0)
+        return;
+    av_bprint_init_for_buffer(&bprint, buf, buf_size);
+    codec_type = av_get_media_type_string(enc->codec_type);
+    codec_name = avcodec_get_name(enc->codec_id);
+    profile = avcodec_profile_name(enc->codec_id, enc->profile);
+
+    av_bprintf(&bprint, "%s: %s", codec_type ? codec_type : "unknown",
+               codec_name);
+    buf[0] ^= 'a' ^ 'A'; /* first letter in uppercase */
+
+    if (enc->codec && strcmp(enc->codec->name, codec_name))
+        av_bprintf(&bprint, " (%s)", enc->codec->name);
+
+    if (profile)
+        av_bprintf(&bprint, " (%s)", profile);
+    if (   enc->codec_type == AVMEDIA_TYPE_VIDEO
+        && av_log_get_level() >= AV_LOG_VERBOSE
+        && enc->refs)
+        av_bprintf(&bprint, ", %d reference frame%s",
+                   enc->refs, enc->refs > 1 ? "s" : "");
+
+    if (enc->codec_tag)
+        av_bprintf(&bprint, " (%s / 0x%04X)",
+                   av_fourcc2str(enc->codec_tag), enc->codec_tag);
+
+    switch (enc->codec_type) {
+    case AVMEDIA_TYPE_VIDEO:
+        {
+            unsigned len;
+
+            av_bprintf(&bprint, "%s%s", separator,
+                       enc->pix_fmt == AV_PIX_FMT_NONE ? "none" :
+                       unknown_if_null(av_get_pix_fmt_name(enc->pix_fmt)));
+
+            av_bprint_chars(&bprint, '(', 1);
+            len = bprint.len;
+
+            /* The following check ensures that '(' has been written
+             * and therefore allows us to erase it if it turns out
+             * to be unnecessary. */
+            if (!av_bprint_is_complete(&bprint))
+                return;
+
+            if (enc->bits_per_raw_sample && enc->pix_fmt != AV_PIX_FMT_NONE &&
+                enc->bits_per_raw_sample < av_pix_fmt_desc_get(enc->pix_fmt)->comp[0].depth)
+                av_bprintf(&bprint, "%d bpc, ", enc->bits_per_raw_sample);
+            if (enc->color_range != AVCOL_RANGE_UNSPECIFIED &&
+                (str = av_color_range_name(enc->color_range)))
+                av_bprintf(&bprint, "%s, ", str);
+
+            if (enc->colorspace != AVCOL_SPC_UNSPECIFIED ||
+                enc->color_primaries != AVCOL_PRI_UNSPECIFIED ||
+                enc->color_trc != AVCOL_TRC_UNSPECIFIED) {
+                const char *col = unknown_if_null(av_color_space_name(enc->colorspace));
+                const char *pri = unknown_if_null(av_color_primaries_name(enc->color_primaries));
+                const char *trc = unknown_if_null(av_color_transfer_name(enc->color_trc));
+                if (strcmp(col, pri) || strcmp(col, trc)) {
+                    new_line = 1;
+                    av_bprintf(&bprint, "%s/%s/%s, ", col, pri, trc);
+                } else
+                    av_bprintf(&bprint, "%s, ", col);
+            }
+
+            if (enc->field_order != AV_FIELD_UNKNOWN) {
+                const char *field_order = "progressive";
+                if (enc->field_order == AV_FIELD_TT)
+                    field_order = "top first";
+                else if (enc->field_order == AV_FIELD_BB)
+                    field_order = "bottom first";
+                else if (enc->field_order == AV_FIELD_TB)
+                    field_order = "top coded first (swapped)";
+                else if (enc->field_order == AV_FIELD_BT)
+                    field_order = "bottom coded first (swapped)";
+
+                av_bprintf(&bprint, "%s, ", field_order);
+            }
+
+            if (av_log_get_level() >= AV_LOG_VERBOSE &&
+                enc->chroma_sample_location != AVCHROMA_LOC_UNSPECIFIED &&
+                (str = av_chroma_location_name(enc->chroma_sample_location)))
+                av_bprintf(&bprint, "%s, ", str);
+
+            if (len == bprint.len) {
+                bprint.str[len - 1] = '\0';
+                bprint.len--;
+            } else {
+                if (bprint.len - 2 < bprint.size) {
+                    /* Erase the last ", " */
+                    bprint.len -= 2;
+                    bprint.str[bprint.len] = '\0';
+                }
+                av_bprint_chars(&bprint, ')', 1);
+            }
+        }
+
+        if (enc->width) {
+            av_bprintf(&bprint, "%s%dx%d", new_line ? separator : ", ",
+                       enc->width, enc->height);
+
+            if (av_log_get_level() >= AV_LOG_VERBOSE &&
+                (enc->width != enc->coded_width ||
+                 enc->height != enc->coded_height))
+                av_bprintf(&bprint, " (%dx%d)",
+                           enc->coded_width, enc->coded_height);
+
+            if (enc->sample_aspect_ratio.num) {
+                av_reduce(&display_aspect_ratio.num, &display_aspect_ratio.den,
+                          enc->width * (int64_t)enc->sample_aspect_ratio.num,
+                          enc->height * (int64_t)enc->sample_aspect_ratio.den,
+                          1024 * 1024);
+                av_bprintf(&bprint, " [SAR %d:%d DAR %d:%d]",
+                         enc->sample_aspect_ratio.num, enc->sample_aspect_ratio.den,
+                         display_aspect_ratio.num, display_aspect_ratio.den);
+            }
+            if (av_log_get_level() >= AV_LOG_DEBUG) {
+                int g = av_gcd(enc->time_base.num, enc->time_base.den);
+                av_bprintf(&bprint, ", %d/%d",
+                           enc->time_base.num / g, enc->time_base.den / g);
+            }
+        }
+        if (encode) {
+            av_bprintf(&bprint, ", q=%d-%d", enc->qmin, enc->qmax);
+        } else {
+            if (enc->properties & FF_CODEC_PROPERTY_CLOSED_CAPTIONS)
+                av_bprintf(&bprint, ", Closed Captions");
+            if (enc->properties & FF_CODEC_PROPERTY_FILM_GRAIN)
+                av_bprintf(&bprint, ", Film Grain");
+            if (enc->properties & FF_CODEC_PROPERTY_LOSSLESS)
+                av_bprintf(&bprint, ", lossless");
+        }
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        av_bprintf(&bprint, "%s", separator);
+
+        if (enc->sample_rate) {
+            av_bprintf(&bprint, "%d Hz, ", enc->sample_rate);
+        }
+        {
+            char buf[512];
+            int ret = av_channel_layout_describe(&enc->ch_layout, buf, sizeof(buf));
+            if (ret >= 0)
+                av_bprintf(&bprint, "%s", buf);
+        }
+        if (enc->sample_fmt != AV_SAMPLE_FMT_NONE &&
+            (str = av_get_sample_fmt_name(enc->sample_fmt))) {
+            av_bprintf(&bprint, ", %s", str);
+        }
+        if (   enc->bits_per_raw_sample > 0
+            && enc->bits_per_raw_sample != av_get_bytes_per_sample(enc->sample_fmt) * 8)
+            av_bprintf(&bprint, " (%d bit)", enc->bits_per_raw_sample);
+        if (av_log_get_level() >= AV_LOG_VERBOSE) {
+            if (enc->initial_padding)
+                av_bprintf(&bprint, ", delay %d", enc->initial_padding);
+            if (enc->trailing_padding)
+                av_bprintf(&bprint, ", padding %d", enc->trailing_padding);
+        }
+        break;
+    case AVMEDIA_TYPE_DATA:
+        if (av_log_get_level() >= AV_LOG_DEBUG) {
+            int g = av_gcd(enc->time_base.num, enc->time_base.den);
+            if (g)
+                av_bprintf(&bprint, ", %d/%d",
+                           enc->time_base.num / g, enc->time_base.den / g);
+        }
+        break;
+    case AVMEDIA_TYPE_SUBTITLE:
+        if (enc->width)
+            av_bprintf(&bprint, ", %dx%d", enc->width, enc->height);
+        break;
+    default:
+        return;
+    }
+    if (encode) {
+        if (enc->flags & AV_CODEC_FLAG_PASS1)
+            av_bprintf(&bprint, ", pass 1");
+        if (enc->flags & AV_CODEC_FLAG_PASS2)
+            av_bprintf(&bprint, ", pass 2");
+    }
+    bitrate = get_bit_rate(enc);
+    if (bitrate != 0) {
+        av_bprintf(&bprint, ", %"PRId64" kb/s", bitrate / 1000);
+    } else if (enc->rc_max_rate > 0) {
+        av_bprintf(&bprint, ", max. %"PRId64" kb/s", enc->rc_max_rate / 1000);
+    }
+}
+
+int avcodec_is_open(AVCodecContext *s)
+{
+    return !!s->internal;
+}
+
+int attribute_align_arg avcodec_receive_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    av_frame_unref(frame);
+
+    if (av_codec_is_decoder(avctx->codec))
+        return ff_decode_receive_frame(avctx, frame);
+    return ff_encode_receive_frame(avctx, frame);
+}
diff --git a/media/ffvpx/libavcodec/avcodec.h b/media/ffvpx/libavcodec/avcodec.h
new file mode 100644
index 0000000000..9a0fe97cad
--- /dev/null
+++ b/media/ffvpx/libavcodec/avcodec.h
@@ -0,0 +1,3193 @@
+/*
+ * copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AVCODEC_H
+#define AVCODEC_AVCODEC_H
+
+/**
+ * @file
+ * @ingroup libavc
+ * Libavcodec external API header
+ */
+
+#include "libavutil/samplefmt.h"
+#include "libavutil/attributes.h"
+#include "libavutil/avutil.h"
+#include "libavutil/buffer.h"
+#include "libavutil/dict.h"
+#include "libavutil/frame.h"
+#include "libavutil/log.h"
+#include "libavutil/pixfmt.h"
+#include "libavutil/rational.h"
+
+#include "codec.h"
+#include "codec_desc.h"
+#include "codec_par.h"
+#include "codec_id.h"
+#include "defs.h"
+#include "packet.h"
+#include "version_major.h"
+#ifndef HAVE_AV_CONFIG_H
+/* When included as part of the ffmpeg build, only include the major version
+ * to avoid unnecessary rebuilds. When included externally, keep including
+ * the full version information. */
+#include "version.h"
+#endif
+
+/**
+ * @defgroup libavc libavcodec
+ * Encoding/Decoding Library
+ *
+ * @{
+ *
+ * @defgroup lavc_decoding Decoding
+ * @{
+ * @}
+ *
+ * @defgroup lavc_encoding Encoding
+ * @{
+ * @}
+ *
+ * @defgroup lavc_codec Codecs
+ * @{
+ * @defgroup lavc_codec_native Native Codecs
+ * @{
+ * @}
+ * @defgroup lavc_codec_wrappers External library wrappers
+ * @{
+ * @}
+ * @defgroup lavc_codec_hwaccel Hardware Accelerators bridge
+ * @{
+ * @}
+ * @}
+ * @defgroup lavc_internal Internal
+ * @{
+ * @}
+ * @}
+ */
+
+/**
+ * @ingroup libavc
+ * @defgroup lavc_encdec send/receive encoding and decoding API overview
+ * @{
+ *
+ * The avcodec_send_packet()/avcodec_receive_frame()/avcodec_send_frame()/
+ * avcodec_receive_packet() functions provide an encode/decode API, which
+ * decouples input and output.
+ *
+ * The API is very similar for encoding/decoding and audio/video, and works as
+ * follows:
+ * - Set up and open the AVCodecContext as usual.
+ * - Send valid input:
+ *   - For decoding, call avcodec_send_packet() to give the decoder raw
+ *     compressed data in an AVPacket.
+ *   - For encoding, call avcodec_send_frame() to give the encoder an AVFrame
+ *     containing uncompressed audio or video.
+ *
+ *   In both cases, it is recommended that AVPackets and AVFrames are
+ *   refcounted, or libavcodec might have to copy the input data. (libavformat
+ *   always returns refcounted AVPackets, and av_frame_get_buffer() allocates
+ *   refcounted AVFrames.)
+ * - Receive output in a loop. Periodically call one of the avcodec_receive_*()
+ *   functions and process their output:
+ *   - For decoding, call avcodec_receive_frame(). On success, it will return
+ *     an AVFrame containing uncompressed audio or video data.
+ *   - For encoding, call avcodec_receive_packet(). On success, it will return
+ *     an AVPacket with a compressed frame.
+ *
+ *   Repeat this call until it returns AVERROR(EAGAIN) or an error. The
+ *   AVERROR(EAGAIN) return value means that new input data is required to
+ *   return new output. In this case, continue with sending input. For each
+ *   input frame/packet, the codec will typically return 1 output frame/packet,
+ *   but it can also be 0 or more than 1.
+ *
+ * At the beginning of decoding or encoding, the codec might accept multiple
+ * input frames/packets without returning a frame, until its internal buffers
+ * are filled. This situation is handled transparently if you follow the steps
+ * outlined above.
+ *
+ * In theory, sending input can result in EAGAIN - this should happen only if
+ * not all output was received. You can use this to structure alternative decode
+ * or encode loops other than the one suggested above. For example, you could
+ * try sending new input on each iteration, and try to receive output if that
+ * returns EAGAIN.
+ *
+ * End of stream situations. These require "flushing" (aka draining) the codec,
+ * as the codec might buffer multiple frames or packets internally for
+ * performance or out of necessity (consider B-frames).
+ * This is handled as follows:
+ * - Instead of valid input, send NULL to the avcodec_send_packet() (decoding)
+ *   or avcodec_send_frame() (encoding) functions. This will enter draining
+ *   mode.
+ * - Call avcodec_receive_frame() (decoding) or avcodec_receive_packet()
+ *   (encoding) in a loop until AVERROR_EOF is returned. The functions will
+ *   not return AVERROR(EAGAIN), unless you forgot to enter draining mode.
+ * - Before decoding can be resumed again, the codec has to be reset with
+ *   avcodec_flush_buffers().
+ *
+ * Using the API as outlined above is highly recommended. But it is also
+ * possible to call functions outside of this rigid schema. For example, you can
+ * call avcodec_send_packet() repeatedly without calling
+ * avcodec_receive_frame(). In this case, avcodec_send_packet() will succeed
+ * until the codec's internal buffer has been filled up (which is typically of
+ * size 1 per output frame, after initial input), and then reject input with
+ * AVERROR(EAGAIN). Once it starts rejecting input, you have no choice but to
+ * read at least some output.
+ *
+ * Not all codecs will follow a rigid and predictable dataflow; the only
+ * guarantee is that an AVERROR(EAGAIN) return value on a send/receive call on
+ * one end implies that a receive/send call on the other end will succeed, or
+ * at least will not fail with AVERROR(EAGAIN). In general, no codec will
+ * permit unlimited buffering of input or output.
+ *
+ * A codec is not allowed to return AVERROR(EAGAIN) for both sending and receiving. This
+ * would be an invalid state, which could put the codec user into an endless
+ * loop. The API has no concept of time either: it cannot happen that trying to
+ * do avcodec_send_packet() results in AVERROR(EAGAIN), but a repeated call 1 second
+ * later accepts the packet (with no other receive/flush API calls involved).
+ * The API is a strict state machine, and the passage of time is not supposed
+ * to influence it. Some timing-dependent behavior might still be deemed
+ * acceptable in certain cases. But it must never result in both send/receive
+ * returning EAGAIN at the same time at any point. It must also absolutely be
+ * avoided that the current state is "unstable" and can "flip-flop" between
+ * the send/receive APIs allowing progress. For example, it's not allowed that
+ * the codec randomly decides that it actually wants to consume a packet now
+ * instead of returning a frame, after it just returned AVERROR(EAGAIN) on an
+ * avcodec_send_packet() call.
+ * @}
+ */
+
+/**
+ * @defgroup lavc_core Core functions/structures.
+ * @ingroup libavc
+ *
+ * Basic definitions, functions for querying libavcodec capabilities,
+ * allocating core structures, etc.
+ * @{
+ */
+
+/**
+ * @ingroup lavc_encoding
+ * minimum encoding buffer size
+ * Used to avoid some checks during header writing.
+ */
+#define AV_INPUT_BUFFER_MIN_SIZE 16384
+
+/**
+ * @ingroup lavc_encoding
+ */
+typedef struct RcOverride{
+    int start_frame;
+    int end_frame;
+    int qscale; // If this is 0 then quality_factor will be used instead.
+    float quality_factor;
+} RcOverride;
+
+/* encoding support
+   These flags can be passed in AVCodecContext.flags before initialization.
+   Note: Not everything is supported yet.
+*/
+
+/**
+ * Allow decoders to produce frames with data planes that are not aligned
+ * to CPU requirements (e.g. due to cropping).
+ */
+#define AV_CODEC_FLAG_UNALIGNED       (1 <<  0)
+/**
+ * Use fixed qscale.
+ */
+#define AV_CODEC_FLAG_QSCALE          (1 <<  1)
+/**
+ * 4 MV per MB allowed / advanced prediction for H.263.
+ */
+#define AV_CODEC_FLAG_4MV             (1 <<  2)
+/**
+ * Output even those frames that might be corrupted.
+ */
+#define AV_CODEC_FLAG_OUTPUT_CORRUPT  (1 <<  3)
+/**
+ * Use qpel MC.
+ */
+#define AV_CODEC_FLAG_QPEL            (1 <<  4)
+/**
+ * Don't output frames whose parameters differ from first
+ * decoded frame in stream.
+ */
+#define AV_CODEC_FLAG_DROPCHANGED     (1 <<  5)
+/**
+ * Request the encoder to output reconstructed frames, i.e.\ frames that would
+ * be produced by decoding the encoded bistream. These frames may be retrieved
+ * by calling avcodec_receive_frame() immediately after a successful call to
+ * avcodec_receive_packet().
+ *
+ * Should only be used with encoders flagged with the
+ * @ref AV_CODEC_CAP_ENCODER_RECON_FRAME capability.
+ */
+#define AV_CODEC_FLAG_RECON_FRAME     (1 <<  6)
+/**
+ * @par decoding
+ * Request the decoder to propagate each packets AVPacket.opaque and
+ * AVPacket.opaque_ref to its corresponding output AVFrame.
+ *
+ * @par encoding:
+ * Request the encoder to propagate each frame's AVFrame.opaque and
+ * AVFrame.opaque_ref values to its corresponding output AVPacket.
+ *
+ * @par
+ * May only be set on encoders that have the
+ * @ref AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE capability flag.
+ *
+ * @note
+ * While in typical cases one input frame produces exactly one output packet
+ * (perhaps after a delay), in general the mapping of frames to packets is
+ * M-to-N, so
+ * - Any number of input frames may be associated with any given output packet.
+ *   This includes zero - e.g. some encoders may output packets that carry only
+ *   metadata about the whole stream.
+ * - A given input frame may be associated with any number of output packets.
+ *   Again this includes zero - e.g. some encoders may drop frames under certain
+ *   conditions.
+ * .
+ * This implies that when using this flag, the caller must NOT assume that
+ * - a given input frame's opaques will necessarily appear on some output packet;
+ * - every output packet will have some non-NULL opaque value.
+ * .
+ * When an output packet contains multiple frames, the opaque values will be
+ * taken from the first of those.
+ *
+ * @note
+ * The converse holds for decoders, with frames and packets switched.
+ */
+#define AV_CODEC_FLAG_COPY_OPAQUE     (1 <<  7)
+/**
+ * Signal to the encoder that the values of AVFrame.duration are valid and
+ * should be used (typically for transferring them to output packets).
+ *
+ * If this flag is not set, frame durations are ignored.
+ */
+#define AV_CODEC_FLAG_FRAME_DURATION  (1 <<  8)
+/**
+ * Use internal 2pass ratecontrol in first pass mode.
+ */
+#define AV_CODEC_FLAG_PASS1           (1 <<  9)
+/**
+ * Use internal 2pass ratecontrol in second pass mode.
+ */
+#define AV_CODEC_FLAG_PASS2           (1 << 10)
+/**
+ * loop filter.
+ */
+#define AV_CODEC_FLAG_LOOP_FILTER     (1 << 11)
+/**
+ * Only decode/encode grayscale.
+ */
+#define AV_CODEC_FLAG_GRAY            (1 << 13)
+/**
+ * error[?] variables will be set during encoding.
+ */
+#define AV_CODEC_FLAG_PSNR            (1 << 15)
+/**
+ * Use interlaced DCT.
+ */
+#define AV_CODEC_FLAG_INTERLACED_DCT  (1 << 18)
+/**
+ * Force low delay.
+ */
+#define AV_CODEC_FLAG_LOW_DELAY       (1 << 19)
+/**
+ * Place global headers in extradata instead of every keyframe.
+ */
+#define AV_CODEC_FLAG_GLOBAL_HEADER   (1 << 22)
+/**
+ * Use only bitexact stuff (except (I)DCT).
+ */
+#define AV_CODEC_FLAG_BITEXACT        (1 << 23)
+/* Fx : Flag for H.263+ extra options */
+/**
+ * H.263 advanced intra coding / MPEG-4 AC prediction
+ */
+#define AV_CODEC_FLAG_AC_PRED         (1 << 24)
+/**
+ * interlaced motion estimation
+ */
+#define AV_CODEC_FLAG_INTERLACED_ME   (1 << 29)
+#define AV_CODEC_FLAG_CLOSED_GOP      (1U << 31)
+
+/**
+ * Allow non spec compliant speedup tricks.
+ */
+#define AV_CODEC_FLAG2_FAST           (1 <<  0)
+/**
+ * Skip bitstream encoding.
+ */
+#define AV_CODEC_FLAG2_NO_OUTPUT      (1 <<  2)
+/**
+ * Place global headers at every keyframe instead of in extradata.
+ */
+#define AV_CODEC_FLAG2_LOCAL_HEADER   (1 <<  3)
+
+/**
+ * Input bitstream might be truncated at a packet boundaries
+ * instead of only at frame boundaries.
+ */
+#define AV_CODEC_FLAG2_CHUNKS         (1 << 15)
+/**
+ * Discard cropping information from SPS.
+ */
+#define AV_CODEC_FLAG2_IGNORE_CROP    (1 << 16)
+
+/**
+ * Show all frames before the first keyframe
+ */
+#define AV_CODEC_FLAG2_SHOW_ALL       (1 << 22)
+/**
+ * Export motion vectors through frame side data
+ */
+#define AV_CODEC_FLAG2_EXPORT_MVS     (1 << 28)
+/**
+ * Do not skip samples and export skip information as frame side data
+ */
+#define AV_CODEC_FLAG2_SKIP_MANUAL    (1 << 29)
+/**
+ * Do not reset ASS ReadOrder field on flush (subtitles decoding)
+ */
+#define AV_CODEC_FLAG2_RO_FLUSH_NOOP  (1 << 30)
+/**
+ * Generate/parse ICC profiles on encode/decode, as appropriate for the type of
+ * file. No effect on codecs which cannot contain embedded ICC profiles, or
+ * when compiled without support for lcms2.
+ */
+#define AV_CODEC_FLAG2_ICC_PROFILES   (1U << 31)
+
+/* Exported side data.
+   These flags can be passed in AVCodecContext.export_side_data before initialization.
+*/
+/**
+ * Export motion vectors through frame side data
+ */
+#define AV_CODEC_EXPORT_DATA_MVS         (1 << 0)
+/**
+ * Export encoder Producer Reference Time through packet side data
+ */
+#define AV_CODEC_EXPORT_DATA_PRFT        (1 << 1)
+/**
+ * Decoding only.
+ * Export the AVVideoEncParams structure through frame side data.
+ */
+#define AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS (1 << 2)
+/**
+ * Decoding only.
+ * Do not apply film grain, export it instead.
+ */
+#define AV_CODEC_EXPORT_DATA_FILM_GRAIN (1 << 3)
+
+/**
+ * The decoder will keep a reference to the frame and may reuse it later.
+ */
+#define AV_GET_BUFFER_FLAG_REF (1 << 0)
+
+/**
+ * The encoder will keep a reference to the packet and may reuse it later.
+ */
+#define AV_GET_ENCODE_BUFFER_FLAG_REF (1 << 0)
+
+struct AVCodecInternal;
+
+/**
+ * main external API structure.
+ * New fields can be added to the end with minor version bumps.
+ * Removal, reordering and changes to existing fields require a major
+ * version bump.
+ * You can use AVOptions (av_opt* / av_set/get*()) to access these fields from user
+ * applications.
+ * The name string for AVOptions options matches the associated command line
+ * parameter name and can be found in libavcodec/options_table.h
+ * The AVOption/command line parameter names differ in some cases from the C
+ * structure field names for historic reasons or brevity.
+ * sizeof(AVCodecContext) must not be used outside libav*.
+ */
+typedef struct AVCodecContext {
+    /**
+     * information on struct for av_log
+     * - set by avcodec_alloc_context3
+     */
+    const AVClass *av_class;
+    int log_level_offset;
+
+    enum AVMediaType codec_type; /* see AVMEDIA_TYPE_xxx */
+    const struct AVCodec  *codec;
+    enum AVCodecID     codec_id; /* see AV_CODEC_ID_xxx */
+
+    /**
+     * fourcc (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
+     * This is used to work around some encoder bugs.
+     * A demuxer should set this to what is stored in the field used to identify the codec.
+     * If there are multiple such fields in a container then the demuxer should choose the one
+     * which maximizes the information about the used codec.
+     * If the codec tag field in a container is larger than 32 bits then the demuxer should
+     * remap the longer ID to 32 bits with a table or other structure. Alternatively a new
+     * extra_codec_tag + size could be added but for this a clear advantage must be demonstrated
+     * first.
+     * - encoding: Set by user, if not then the default based on codec_id will be used.
+     * - decoding: Set by user, will be converted to uppercase by libavcodec during init.
+     */
+    unsigned int codec_tag;
+
+    void *priv_data;
+
+    /**
+     * Private context used for internal data.
+     *
+     * Unlike priv_data, this is not codec-specific. It is used in general
+     * libavcodec functions.
+     */
+    struct AVCodecInternal *internal;
+
+    /**
+     * Private data of the user, can be used to carry app specific stuff.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    void *opaque;
+
+    /**
+     * the average bitrate
+     * - encoding: Set by user; unused for constant quantizer encoding.
+     * - decoding: Set by user, may be overwritten by libavcodec
+     *             if this info is available in the stream
+     */
+    int64_t bit_rate;
+
+    /**
+     * number of bits the bitstream is allowed to diverge from the reference.
+     *           the reference can be CBR (for CBR pass1) or VBR (for pass2)
+     * - encoding: Set by user; unused for constant quantizer encoding.
+     * - decoding: unused
+     */
+    int bit_rate_tolerance;
+
+    /**
+     * Global quality for codecs which cannot change it per frame.
+     * This should be proportional to MPEG-1/2/4 qscale.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int global_quality;
+
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int compression_level;
+#define FF_COMPRESSION_DEFAULT -1
+
+    /**
+     * AV_CODEC_FLAG_*.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int flags;
+
+    /**
+     * AV_CODEC_FLAG2_*
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int flags2;
+
+    /**
+     * some codecs need / can use extradata like Huffman tables.
+     * MJPEG: Huffman tables
+     * rv10: additional flags
+     * MPEG-4: global headers (they can be in the bitstream or here)
+     * The allocated memory should be AV_INPUT_BUFFER_PADDING_SIZE bytes larger
+     * than extradata_size to avoid problems if it is read with the bitstream reader.
+     * The bytewise contents of extradata must not depend on the architecture or CPU endianness.
+     * Must be allocated with the av_malloc() family of functions.
+     * - encoding: Set/allocated/freed by libavcodec.
+     * - decoding: Set/allocated/freed by user.
+     */
+    uint8_t *extradata;
+    int extradata_size;
+
+    /**
+     * This is the fundamental unit of time (in seconds) in terms
+     * of which frame timestamps are represented. For fixed-fps content,
+     * timebase should be 1/framerate and timestamp increments should be
+     * identically 1.
+     * This often, but not always is the inverse of the frame rate or field rate
+     * for video. 1/time_base is not the average frame rate if the frame rate is not
+     * constant.
+     *
+     * Like containers, elementary streams also can store timestamps, 1/time_base
+     * is the unit in which these timestamps are specified.
+     * As example of such codec time base see ISO/IEC 14496-2:2001(E)
+     * vop_time_increment_resolution and fixed_vop_rate
+     * (fixed_vop_rate == 0 implies that it is different from the framerate)
+     *
+     * - encoding: MUST be set by user.
+     * - decoding: unused.
+     */
+    AVRational time_base;
+
+    /**
+     * For some codecs, the time base is closer to the field rate than the frame rate.
+     * Most notably, H.264 and MPEG-2 specify time_base as half of frame duration
+     * if no telecine is used ...
+     *
+     * Set to time_base ticks per frame. Default 1, e.g., H.264/MPEG-2 set it to 2.
+     */
+    int ticks_per_frame;
+
+    /**
+     * Codec delay.
+     *
+     * Encoding: Number of frames delay there will be from the encoder input to
+     *           the decoder output. (we assume the decoder matches the spec)
+     * Decoding: Number of frames delay in addition to what a standard decoder
+     *           as specified in the spec would produce.
+     *
+     * Video:
+     *   Number of frames the decoded output will be delayed relative to the
+     *   encoded input.
+     *
+     * Audio:
+     *   For encoding, this field is unused (see initial_padding).
+     *
+     *   For decoding, this is the number of samples the decoder needs to
+     *   output before the decoder's output is valid. When seeking, you should
+     *   start decoding this many samples prior to your desired seek point.
+     *
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+    int delay;
+
+
+    /* video only */
+    /**
+     * picture width / height.
+     *
+     * @note Those fields may not match the values of the last
+     * AVFrame output by avcodec_receive_frame() due frame
+     * reordering.
+     *
+     * - encoding: MUST be set by user.
+     * - decoding: May be set by the user before opening the decoder if known e.g.
+     *             from the container. Some decoders will require the dimensions
+     *             to be set by the caller. During decoding, the decoder may
+     *             overwrite those values as required while parsing the data.
+     */
+    int width, height;
+
+    /**
+     * Bitstream width / height, may be different from width/height e.g. when
+     * the decoded frame is cropped before being output or lowres is enabled.
+     *
+     * @note Those field may not match the value of the last
+     * AVFrame output by avcodec_receive_frame() due frame
+     * reordering.
+     *
+     * - encoding: unused
+     * - decoding: May be set by the user before opening the decoder if known
+     *             e.g. from the container. During decoding, the decoder may
+     *             overwrite those values as required while parsing the data.
+     */
+    int coded_width, coded_height;
+
+    /**
+     * the number of pictures in a group of pictures, or 0 for intra_only
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int gop_size;
+
+    /**
+     * Pixel format, see AV_PIX_FMT_xxx.
+     * May be set by the demuxer if known from headers.
+     * May be overridden by the decoder if it knows better.
+     *
+     * @note This field may not match the value of the last
+     * AVFrame output by avcodec_receive_frame() due frame
+     * reordering.
+     *
+     * - encoding: Set by user.
+     * - decoding: Set by user if known, overridden by libavcodec while
+     *             parsing the data.
+     */
+    enum AVPixelFormat pix_fmt;
+
+    /**
+     * If non NULL, 'draw_horiz_band' is called by the libavcodec
+     * decoder to draw a horizontal band. It improves cache usage. Not
+     * all codecs can do that. You must check the codec capabilities
+     * beforehand.
+     * When multithreading is used, it may be called from multiple threads
+     * at the same time; threads might draw different parts of the same AVFrame,
+     * or multiple AVFrames, and there is no guarantee that slices will be drawn
+     * in order.
+     * The function is also used by hardware acceleration APIs.
+     * It is called at least once during frame decoding to pass
+     * the data needed for hardware render.
+     * In that mode instead of pixel data, AVFrame points to
+     * a structure specific to the acceleration API. The application
+     * reads the structure and can change some fields to indicate progress
+     * or mark state.
+     * - encoding: unused
+     * - decoding: Set by user.
+     * @param height the height of the slice
+     * @param y the y position of the slice
+     * @param type 1->top field, 2->bottom field, 3->frame
+     * @param offset offset into the AVFrame.data from which the slice should be read
+     */
+    void (*draw_horiz_band)(struct AVCodecContext *s,
+                            const AVFrame *src, int offset[AV_NUM_DATA_POINTERS],
+                            int y, int type, int height);
+
+    /**
+     * Callback to negotiate the pixel format. Decoding only, may be set by the
+     * caller before avcodec_open2().
+     *
+     * Called by some decoders to select the pixel format that will be used for
+     * the output frames. This is mainly used to set up hardware acceleration,
+     * then the provided format list contains the corresponding hwaccel pixel
+     * formats alongside the "software" one. The software pixel format may also
+     * be retrieved from \ref sw_pix_fmt.
+     *
+     * This callback will be called when the coded frame properties (such as
+     * resolution, pixel format, etc.) change and more than one output format is
+     * supported for those new properties. If a hardware pixel format is chosen
+     * and initialization for it fails, the callback may be called again
+     * immediately.
+     *
+     * This callback may be called from different threads if the decoder is
+     * multi-threaded, but not from more than one thread simultaneously.
+     *
+     * @param fmt list of formats which may be used in the current
+     *            configuration, terminated by AV_PIX_FMT_NONE.
+     * @warning Behavior is undefined if the callback returns a value other
+     *          than one of the formats in fmt or AV_PIX_FMT_NONE.
+     * @return the chosen format or AV_PIX_FMT_NONE
+     */
+    enum AVPixelFormat (*get_format)(struct AVCodecContext *s, const enum AVPixelFormat * fmt);
+
+    /**
+     * maximum number of B-frames between non-B-frames
+     * Note: The output will be delayed by max_b_frames+1 relative to the input.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int max_b_frames;
+
+    /**
+     * qscale factor between IP and B-frames
+     * If > 0 then the last P-frame quantizer will be used (q= lastp_q*factor+offset).
+     * If < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset).
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float b_quant_factor;
+
+    /**
+     * qscale offset between IP and B-frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float b_quant_offset;
+
+    /**
+     * Size of the frame reordering buffer in the decoder.
+     * For MPEG-2 it is 1 IPB or 0 low delay IP.
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+    int has_b_frames;
+
+    /**
+     * qscale factor between P- and I-frames
+     * If > 0 then the last P-frame quantizer will be used (q = lastp_q * factor + offset).
+     * If < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset).
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float i_quant_factor;
+
+    /**
+     * qscale offset between P and I-frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float i_quant_offset;
+
+    /**
+     * luminance masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float lumi_masking;
+
+    /**
+     * temporary complexity masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float temporal_cplx_masking;
+
+    /**
+     * spatial complexity masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float spatial_cplx_masking;
+
+    /**
+     * p block masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float p_masking;
+
+    /**
+     * darkness masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float dark_masking;
+
+    /**
+     * slice count
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by user (or 0).
+     */
+    int slice_count;
+
+    /**
+     * slice offsets in the frame in bytes
+     * - encoding: Set/allocated by libavcodec.
+     * - decoding: Set/allocated by user (or NULL).
+     */
+    int *slice_offset;
+
+    /**
+     * sample aspect ratio (0 if unknown)
+     * That is the width of a pixel divided by the height of the pixel.
+     * Numerator and denominator must be relatively prime and smaller than 256 for some video standards.
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    AVRational sample_aspect_ratio;
+
+    /**
+     * motion estimation comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_cmp;
+    /**
+     * subpixel motion estimation comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_sub_cmp;
+    /**
+     * macroblock comparison function (not supported yet)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mb_cmp;
+    /**
+     * interlaced DCT comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int ildct_cmp;
+#define FF_CMP_SAD          0
+#define FF_CMP_SSE          1
+#define FF_CMP_SATD         2
+#define FF_CMP_DCT          3
+#define FF_CMP_PSNR         4
+#define FF_CMP_BIT          5
+#define FF_CMP_RD           6
+#define FF_CMP_ZERO         7
+#define FF_CMP_VSAD         8
+#define FF_CMP_VSSE         9
+#define FF_CMP_NSSE         10
+#define FF_CMP_W53          11
+#define FF_CMP_W97          12
+#define FF_CMP_DCTMAX       13
+#define FF_CMP_DCT264       14
+#define FF_CMP_MEDIAN_SAD   15
+#define FF_CMP_CHROMA       256
+
+    /**
+     * ME diamond size & shape
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int dia_size;
+
+    /**
+     * amount of previous MV predictors (2a+1 x 2a+1 square)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int last_predictor_count;
+
+    /**
+     * motion estimation prepass comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_pre_cmp;
+
+    /**
+     * ME prepass diamond size & shape
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int pre_dia_size;
+
+    /**
+     * subpel ME quality
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_subpel_quality;
+
+    /**
+     * maximum motion estimation search range in subpel units
+     * If 0 then no limit.
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_range;
+
+    /**
+     * slice flags
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int slice_flags;
+#define SLICE_FLAG_CODED_ORDER    0x0001 ///< draw_horiz_band() is called in coded order instead of display
+#define SLICE_FLAG_ALLOW_FIELD    0x0002 ///< allow draw_horiz_band() with field slices (MPEG-2 field pics)
+#define SLICE_FLAG_ALLOW_PLANE    0x0004 ///< allow draw_horiz_band() with 1 component at a time (SVQ1)
+
+    /**
+     * macroblock decision mode
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mb_decision;
+#define FF_MB_DECISION_SIMPLE 0        ///< uses mb_cmp
+#define FF_MB_DECISION_BITS   1        ///< chooses the one which needs the fewest bits
+#define FF_MB_DECISION_RD     2        ///< rate distortion
+
+    /**
+     * custom intra quantization matrix
+     * Must be allocated with the av_malloc() family of functions, and will be freed in
+     * avcodec_free_context().
+     * - encoding: Set/allocated by user, freed by libavcodec. Can be NULL.
+     * - decoding: Set/allocated/freed by libavcodec.
+     */
+    uint16_t *intra_matrix;
+
+    /**
+     * custom inter quantization matrix
+     * Must be allocated with the av_malloc() family of functions, and will be freed in
+     * avcodec_free_context().
+     * - encoding: Set/allocated by user, freed by libavcodec. Can be NULL.
+     * - decoding: Set/allocated/freed by libavcodec.
+     */
+    uint16_t *inter_matrix;
+
+    /**
+     * precision of the intra DC coefficient - 8
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec
+     */
+    int intra_dc_precision;
+
+    /**
+     * Number of macroblock rows at the top which are skipped.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int skip_top;
+
+    /**
+     * Number of macroblock rows at the bottom which are skipped.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int skip_bottom;
+
+    /**
+     * minimum MB Lagrange multiplier
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mb_lmin;
+
+    /**
+     * maximum MB Lagrange multiplier
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mb_lmax;
+
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int bidir_refine;
+
+    /**
+     * minimum GOP size
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int keyint_min;
+
+    /**
+     * number of reference frames
+     * - encoding: Set by user.
+     * - decoding: Set by lavc.
+     */
+    int refs;
+
+    /**
+     * Note: Value depends upon the compare function used for fullpel ME.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mv0_threshold;
+
+    /**
+     * Chromaticity coordinates of the source primaries.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVColorPrimaries color_primaries;
+
+    /**
+     * Color Transfer Characteristic.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVColorTransferCharacteristic color_trc;
+
+    /**
+     * YUV colorspace type.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVColorSpace colorspace;
+
+    /**
+     * MPEG vs JPEG YUV range.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVColorRange color_range;
+
+    /**
+     * This defines the location of chroma samples.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVChromaLocation chroma_sample_location;
+
+    /**
+     * Number of slices.
+     * Indicates number of picture subdivisions. Used for parallelized
+     * decoding.
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+    int slices;
+
+    /** Field order
+     * - encoding: set by libavcodec
+     * - decoding: Set by user.
+     */
+    enum AVFieldOrder field_order;
+
+    /* audio only */
+    int sample_rate; ///< samples per second
+
+#if FF_API_OLD_CHANNEL_LAYOUT
+    /**
+     * number of audio channels
+     * @deprecated use ch_layout.nb_channels
+     */
+    attribute_deprecated
+    int channels;
+#endif
+
+    /**
+     * audio sample format
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    enum AVSampleFormat sample_fmt;  ///< sample format
+
+    /* The following data should not be initialized. */
+    /**
+     * Number of samples per channel in an audio frame.
+     *
+     * - encoding: set by libavcodec in avcodec_open2(). Each submitted frame
+     *   except the last must contain exactly frame_size samples per channel.
+     *   May be 0 when the codec has AV_CODEC_CAP_VARIABLE_FRAME_SIZE set, then the
+     *   frame size is not restricted.
+     * - decoding: may be set by some decoders to indicate constant frame size
+     */
+    int frame_size;
+
+#if FF_API_AVCTX_FRAME_NUMBER
+    /**
+     * Frame counter, set by libavcodec.
+     *
+     * - decoding: total number of frames returned from the decoder so far.
+     * - encoding: total number of frames passed to the encoder so far.
+     *
+     *   @note the counter is not incremented if encoding/decoding resulted in
+     *   an error.
+     *   @deprecated use frame_num instead
+     */
+    attribute_deprecated
+    int frame_number;
+#endif
+
+    /**
+     * number of bytes per packet if constant and known or 0
+     * Used by some WAV based audio codecs.
+     */
+    int block_align;
+
+    /**
+     * Audio cutoff bandwidth (0 means "automatic")
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int cutoff;
+
+#if FF_API_OLD_CHANNEL_LAYOUT
+    /**
+     * Audio channel layout.
+     * - encoding: set by user.
+     * - decoding: set by user, may be overwritten by libavcodec.
+     * @deprecated use ch_layout
+     */
+    attribute_deprecated
+    uint64_t channel_layout;
+
+    /**
+     * Request decoder to use this channel layout if it can (0 for default)
+     * - encoding: unused
+     * - decoding: Set by user.
+     * @deprecated use "downmix" codec private option
+     */
+    attribute_deprecated
+    uint64_t request_channel_layout;
+#endif
+
+    /**
+     * Type of service that the audio stream conveys.
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    enum AVAudioServiceType audio_service_type;
+
+    /**
+     * desired sample format
+     * - encoding: Not used.
+     * - decoding: Set by user.
+     * Decoder will decode to this format if it can.
+     */
+    enum AVSampleFormat request_sample_fmt;
+
+    /**
+     * This callback is called at the beginning of each frame to get data
+     * buffer(s) for it. There may be one contiguous buffer for all the data or
+     * there may be a buffer per each data plane or anything in between. What
+     * this means is, you may set however many entries in buf[] you feel necessary.
+     * Each buffer must be reference-counted using the AVBuffer API (see description
+     * of buf[] below).
+     *
+     * The following fields will be set in the frame before this callback is
+     * called:
+     * - format
+     * - width, height (video only)
+     * - sample_rate, channel_layout, nb_samples (audio only)
+     * Their values may differ from the corresponding values in
+     * AVCodecContext. This callback must use the frame values, not the codec
+     * context values, to calculate the required buffer size.
+     *
+     * This callback must fill the following fields in the frame:
+     * - data[]
+     * - linesize[]
+     * - extended_data:
+     *   * if the data is planar audio with more than 8 channels, then this
+     *     callback must allocate and fill extended_data to contain all pointers
+     *     to all data planes. data[] must hold as many pointers as it can.
+     *     extended_data must be allocated with av_malloc() and will be freed in
+     *     av_frame_unref().
+     *   * otherwise extended_data must point to data
+     * - buf[] must contain one or more pointers to AVBufferRef structures. Each of
+     *   the frame's data and extended_data pointers must be contained in these. That
+     *   is, one AVBufferRef for each allocated chunk of memory, not necessarily one
+     *   AVBufferRef per data[] entry. See: av_buffer_create(), av_buffer_alloc(),
+     *   and av_buffer_ref().
+     * - extended_buf and nb_extended_buf must be allocated with av_malloc() by
+     *   this callback and filled with the extra buffers if there are more
+     *   buffers than buf[] can hold. extended_buf will be freed in
+     *   av_frame_unref().
+     *
+     * If AV_CODEC_CAP_DR1 is not set then get_buffer2() must call
+     * avcodec_default_get_buffer2() instead of providing buffers allocated by
+     * some other means.
+     *
+     * Each data plane must be aligned to the maximum required by the target
+     * CPU.
+     *
+     * @see avcodec_default_get_buffer2()
+     *
+     * Video:
+     *
+     * If AV_GET_BUFFER_FLAG_REF is set in flags then the frame may be reused
+     * (read and/or written to if it is writable) later by libavcodec.
+     *
+     * avcodec_align_dimensions2() should be used to find the required width and
+     * height, as they normally need to be rounded up to the next multiple of 16.
+     *
+     * Some decoders do not support linesizes changing between frames.
+     *
+     * If frame multithreading is used, this callback may be called from a
+     * different thread, but not from more than one at once. Does not need to be
+     * reentrant.
+     *
+     * @see avcodec_align_dimensions2()
+     *
+     * Audio:
+     *
+     * Decoders request a buffer of a particular size by setting
+     * AVFrame.nb_samples prior to calling get_buffer2(). The decoder may,
+     * however, utilize only part of the buffer by setting AVFrame.nb_samples
+     * to a smaller value in the output frame.
+     *
+     * As a convenience, av_samples_get_buffer_size() and
+     * av_samples_fill_arrays() in libavutil may be used by custom get_buffer2()
+     * functions to find the required data size and to fill data pointers and
+     * linesize. In AVFrame.linesize, only linesize[0] may be set for audio
+     * since all planes must be the same size.
+     *
+     * @see av_samples_get_buffer_size(), av_samples_fill_arrays()
+     *
+     * - encoding: unused
+     * - decoding: Set by libavcodec, user can override.
+     */
+    int (*get_buffer2)(struct AVCodecContext *s, AVFrame *frame, int flags);
+
+    /* - encoding parameters */
+    float qcompress;  ///< amount of qscale change between easy & hard scenes (0.0-1.0)
+    float qblur;      ///< amount of qscale smoothing over time (0.0-1.0)
+
+    /**
+     * minimum quantizer
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int qmin;
+
+    /**
+     * maximum quantizer
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int qmax;
+
+    /**
+     * maximum quantizer difference between frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int max_qdiff;
+
+    /**
+     * decoder bitstream buffer size
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int rc_buffer_size;
+
+    /**
+     * ratecontrol override, see RcOverride
+     * - encoding: Allocated/set/freed by user.
+     * - decoding: unused
+     */
+    int rc_override_count;
+    RcOverride *rc_override;
+
+    /**
+     * maximum bitrate
+     * - encoding: Set by user.
+     * - decoding: Set by user, may be overwritten by libavcodec.
+     */
+    int64_t rc_max_rate;
+
+    /**
+     * minimum bitrate
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int64_t rc_min_rate;
+
+    /**
+     * Ratecontrol attempt to use, at maximum, <value> of what can be used without an underflow.
+     * - encoding: Set by user.
+     * - decoding: unused.
+     */
+    float rc_max_available_vbv_use;
+
+    /**
+     * Ratecontrol attempt to use, at least, <value> times the amount needed to prevent a vbv overflow.
+     * - encoding: Set by user.
+     * - decoding: unused.
+     */
+    float rc_min_vbv_overflow_use;
+
+    /**
+     * Number of bits which should be loaded into the rc buffer before decoding starts.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int rc_initial_buffer_occupancy;
+
+    /**
+     * trellis RD quantization
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int trellis;
+
+    /**
+     * pass1 encoding statistics output buffer
+     * - encoding: Set by libavcodec.
+     * - decoding: unused
+     */
+    char *stats_out;
+
+    /**
+     * pass2 encoding statistics input buffer
+     * Concatenated stuff from stats_out of pass1 should be placed here.
+     * - encoding: Allocated/set/freed by user.
+     * - decoding: unused
+     */
+    char *stats_in;
+
+    /**
+     * Work around bugs in encoders which sometimes cannot be detected automatically.
+     * - encoding: Set by user
+     * - decoding: Set by user
+     */
+    int workaround_bugs;
+#define FF_BUG_AUTODETECT       1  ///< autodetection
+#define FF_BUG_XVID_ILACE       4
+#define FF_BUG_UMP4             8
+#define FF_BUG_NO_PADDING       16
+#define FF_BUG_AMV              32
+#define FF_BUG_QPEL_CHROMA      64
+#define FF_BUG_STD_QPEL         128
+#define FF_BUG_QPEL_CHROMA2     256
+#define FF_BUG_DIRECT_BLOCKSIZE 512
+#define FF_BUG_EDGE             1024
+#define FF_BUG_HPEL_CHROMA      2048
+#define FF_BUG_DC_CLIP          4096
+#define FF_BUG_MS               8192 ///< Work around various bugs in Microsoft's broken decoders.
+#define FF_BUG_TRUNCATED       16384
+#define FF_BUG_IEDGE           32768
+
+    /**
+     * strictly follow the standard (MPEG-4, ...).
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     * Setting this to STRICT or higher means the encoder and decoder will
+     * generally do stupid things, whereas setting it to unofficial or lower
+     * will mean the encoder might produce output that is not supported by all
+     * spec-compliant decoders. Decoders don't differentiate between normal,
+     * unofficial and experimental (that is, they always try to decode things
+     * when they can) unless they are explicitly asked to behave stupidly
+     * (=strictly conform to the specs)
+     * This may only be set to one of the FF_COMPLIANCE_* values in defs.h.
+     */
+    int strict_std_compliance;
+
+    /**
+     * error concealment flags
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int error_concealment;
+#define FF_EC_GUESS_MVS   1
+#define FF_EC_DEBLOCK     2
+#define FF_EC_FAVOR_INTER 256
+
+    /**
+     * debug
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int debug;
+#define FF_DEBUG_PICT_INFO   1
+#define FF_DEBUG_RC          2
+#define FF_DEBUG_BITSTREAM   4
+#define FF_DEBUG_MB_TYPE     8
+#define FF_DEBUG_QP          16
+#define FF_DEBUG_DCT_COEFF   0x00000040
+#define FF_DEBUG_SKIP        0x00000080
+#define FF_DEBUG_STARTCODE   0x00000100
+#define FF_DEBUG_ER          0x00000400
+#define FF_DEBUG_MMCO        0x00000800
+#define FF_DEBUG_BUGS        0x00001000
+#define FF_DEBUG_BUFFERS     0x00008000
+#define FF_DEBUG_THREADS     0x00010000
+#define FF_DEBUG_GREEN_MD    0x00800000
+#define FF_DEBUG_NOMC        0x01000000
+
+    /**
+     * Error recognition; may misdetect some more or less valid parts as errors.
+     * This is a bitfield of the AV_EF_* values defined in defs.h.
+     *
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int err_recognition;
+
+#if FF_API_REORDERED_OPAQUE
+    /**
+     * opaque 64-bit number (generally a PTS) that will be reordered and
+     * output in AVFrame.reordered_opaque
+     * - encoding: Set by libavcodec to the reordered_opaque of the input
+     *             frame corresponding to the last returned packet. Only
+     *             supported by encoders with the
+     *             AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE capability.
+     * - decoding: Set by user.
+     *
+     * @deprecated Use AV_CODEC_FLAG_COPY_OPAQUE instead
+     */
+    attribute_deprecated
+    int64_t reordered_opaque;
+#endif
+
+    /**
+     * Hardware accelerator in use
+     * - encoding: unused.
+     * - decoding: Set by libavcodec
+     */
+    const struct AVHWAccel *hwaccel;
+
+    /**
+     * Legacy hardware accelerator context.
+     *
+     * For some hardware acceleration methods, the caller may use this field to
+     * signal hwaccel-specific data to the codec. The struct pointed to by this
+     * pointer is hwaccel-dependent and defined in the respective header. Please
+     * refer to the FFmpeg HW accelerator documentation to know how to fill
+     * this.
+     *
+     * In most cases this field is optional - the necessary information may also
+     * be provided to libavcodec through @ref hw_frames_ctx or @ref
+     * hw_device_ctx (see avcodec_get_hw_config()). However, in some cases it
+     * may be the only method of signalling some (optional) information.
+     *
+     * The struct and its contents are owned by the caller.
+     *
+     * - encoding: May be set by the caller before avcodec_open2(). Must remain
+     *             valid until avcodec_free_context().
+     * - decoding: May be set by the caller in the get_format() callback.
+     *             Must remain valid until the next get_format() call,
+     *             or avcodec_free_context() (whichever comes first).
+     */
+    void *hwaccel_context;
+
+    /**
+     * error
+     * - encoding: Set by libavcodec if flags & AV_CODEC_FLAG_PSNR.
+     * - decoding: unused
+     */
+    uint64_t error[AV_NUM_DATA_POINTERS];
+
+    /**
+     * DCT algorithm, see FF_DCT_* below
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int dct_algo;
+#define FF_DCT_AUTO    0
+#define FF_DCT_FASTINT 1
+#define FF_DCT_INT     2
+#define FF_DCT_MMX     3
+#define FF_DCT_ALTIVEC 5
+#define FF_DCT_FAAN    6
+
+    /**
+     * IDCT algorithm, see FF_IDCT_* below.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int idct_algo;
+#define FF_IDCT_AUTO          0
+#define FF_IDCT_INT           1
+#define FF_IDCT_SIMPLE        2
+#define FF_IDCT_SIMPLEMMX     3
+#define FF_IDCT_ARM           7
+#define FF_IDCT_ALTIVEC       8
+#define FF_IDCT_SIMPLEARM     10
+#define FF_IDCT_XVID          14
+#define FF_IDCT_SIMPLEARMV5TE 16
+#define FF_IDCT_SIMPLEARMV6   17
+#define FF_IDCT_FAAN          20
+#define FF_IDCT_SIMPLENEON    22
+#if FF_API_IDCT_NONE
+// formerly used by xvmc
+#define FF_IDCT_NONE          24
+#endif
+#define FF_IDCT_SIMPLEAUTO    128
+
+    /**
+     * bits per sample/pixel from the demuxer (needed for huffyuv).
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by user.
+     */
+     int bits_per_coded_sample;
+
+    /**
+     * Bits per sample/pixel of internal libavcodec pixel/sample format.
+     * - encoding: set by user.
+     * - decoding: set by libavcodec.
+     */
+    int bits_per_raw_sample;
+
+    /**
+     * low resolution decoding, 1-> 1/2 size, 2->1/4 size
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+     int lowres;
+
+    /**
+     * thread count
+     * is used to decide how many independent tasks should be passed to execute()
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int thread_count;
+
+    /**
+     * Which multithreading methods to use.
+     * Use of FF_THREAD_FRAME will increase decoding delay by one frame per thread,
+     * so clients which cannot provide future frames should not use it.
+     *
+     * - encoding: Set by user, otherwise the default is used.
+     * - decoding: Set by user, otherwise the default is used.
+     */
+    int thread_type;
+#define FF_THREAD_FRAME   1 ///< Decode more than one frame at once
+#define FF_THREAD_SLICE   2 ///< Decode more than one part of a single frame at once
+
+    /**
+     * Which multithreading methods are in use by the codec.
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+    int active_thread_type;
+
+    /**
+     * The codec may call this to execute several independent things.
+     * It will return only after finishing all tasks.
+     * The user may replace this with some multithreaded implementation,
+     * the default implementation will execute the parts serially.
+     * @param count the number of things to execute
+     * - encoding: Set by libavcodec, user can override.
+     * - decoding: Set by libavcodec, user can override.
+     */
+    int (*execute)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg), void *arg2, int *ret, int count, int size);
+
+    /**
+     * The codec may call this to execute several independent things.
+     * It will return only after finishing all tasks.
+     * The user may replace this with some multithreaded implementation,
+     * the default implementation will execute the parts serially.
+     * @param c context passed also to func
+     * @param count the number of things to execute
+     * @param arg2 argument passed unchanged to func
+     * @param ret return values of executed functions, must have space for "count" values. May be NULL.
+     * @param func function that will be called count times, with jobnr from 0 to count-1.
+     *             threadnr will be in the range 0 to c->thread_count-1 < MAX_THREADS and so that no
+     *             two instances of func executing at the same time will have the same threadnr.
+     * @return always 0 currently, but code should handle a future improvement where when any call to func
+     *         returns < 0 no further calls to func may be done and < 0 is returned.
+     * - encoding: Set by libavcodec, user can override.
+     * - decoding: Set by libavcodec, user can override.
+     */
+    int (*execute2)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg, int jobnr, int threadnr), void *arg2, int *ret, int count);
+
+    /**
+     * noise vs. sse weight for the nsse comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+     int nsse_weight;
+
+    /**
+     * profile
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+     int profile;
+#define FF_PROFILE_UNKNOWN -99
+#define FF_PROFILE_RESERVED -100
+
+#define FF_PROFILE_AAC_MAIN 0
+#define FF_PROFILE_AAC_LOW  1
+#define FF_PROFILE_AAC_SSR  2
+#define FF_PROFILE_AAC_LTP  3
+#define FF_PROFILE_AAC_HE   4
+#define FF_PROFILE_AAC_HE_V2 28
+#define FF_PROFILE_AAC_LD   22
+#define FF_PROFILE_AAC_ELD  38
+#define FF_PROFILE_MPEG2_AAC_LOW 128
+#define FF_PROFILE_MPEG2_AAC_HE  131
+
+#define FF_PROFILE_DNXHD         0
+#define FF_PROFILE_DNXHR_LB      1
+#define FF_PROFILE_DNXHR_SQ      2
+#define FF_PROFILE_DNXHR_HQ      3
+#define FF_PROFILE_DNXHR_HQX     4
+#define FF_PROFILE_DNXHR_444     5
+
+#define FF_PROFILE_DTS         20
+#define FF_PROFILE_DTS_ES      30
+#define FF_PROFILE_DTS_96_24   40
+#define FF_PROFILE_DTS_HD_HRA  50
+#define FF_PROFILE_DTS_HD_MA   60
+#define FF_PROFILE_DTS_EXPRESS 70
+
+#define FF_PROFILE_MPEG2_422    0
+#define FF_PROFILE_MPEG2_HIGH   1
+#define FF_PROFILE_MPEG2_SS     2
+#define FF_PROFILE_MPEG2_SNR_SCALABLE  3
+#define FF_PROFILE_MPEG2_MAIN   4
+#define FF_PROFILE_MPEG2_SIMPLE 5
+
+#define FF_PROFILE_H264_CONSTRAINED  (1<<9)  // 8+1; constraint_set1_flag
+#define FF_PROFILE_H264_INTRA        (1<<11) // 8+3; constraint_set3_flag
+
+#define FF_PROFILE_H264_BASELINE             66
+#define FF_PROFILE_H264_CONSTRAINED_BASELINE (66|FF_PROFILE_H264_CONSTRAINED)
+#define FF_PROFILE_H264_MAIN                 77
+#define FF_PROFILE_H264_EXTENDED             88
+#define FF_PROFILE_H264_HIGH                 100
+#define FF_PROFILE_H264_HIGH_10              110
+#define FF_PROFILE_H264_HIGH_10_INTRA        (110|FF_PROFILE_H264_INTRA)
+#define FF_PROFILE_H264_MULTIVIEW_HIGH       118
+#define FF_PROFILE_H264_HIGH_422             122
+#define FF_PROFILE_H264_HIGH_422_INTRA       (122|FF_PROFILE_H264_INTRA)
+#define FF_PROFILE_H264_STEREO_HIGH          128
+#define FF_PROFILE_H264_HIGH_444             144
+#define FF_PROFILE_H264_HIGH_444_PREDICTIVE  244
+#define FF_PROFILE_H264_HIGH_444_INTRA       (244|FF_PROFILE_H264_INTRA)
+#define FF_PROFILE_H264_CAVLC_444            44
+
+#define FF_PROFILE_VC1_SIMPLE   0
+#define FF_PROFILE_VC1_MAIN     1
+#define FF_PROFILE_VC1_COMPLEX  2
+#define FF_PROFILE_VC1_ADVANCED 3
+
+#define FF_PROFILE_MPEG4_SIMPLE                     0
+#define FF_PROFILE_MPEG4_SIMPLE_SCALABLE            1
+#define FF_PROFILE_MPEG4_CORE                       2
+#define FF_PROFILE_MPEG4_MAIN                       3
+#define FF_PROFILE_MPEG4_N_BIT                      4
+#define FF_PROFILE_MPEG4_SCALABLE_TEXTURE           5
+#define FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION      6
+#define FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE     7
+#define FF_PROFILE_MPEG4_HYBRID                     8
+#define FF_PROFILE_MPEG4_ADVANCED_REAL_TIME         9
+#define FF_PROFILE_MPEG4_CORE_SCALABLE             10
+#define FF_PROFILE_MPEG4_ADVANCED_CODING           11
+#define FF_PROFILE_MPEG4_ADVANCED_CORE             12
+#define FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE 13
+#define FF_PROFILE_MPEG4_SIMPLE_STUDIO             14
+#define FF_PROFILE_MPEG4_ADVANCED_SIMPLE           15
+
+#define FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_0   1
+#define FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_1   2
+#define FF_PROFILE_JPEG2000_CSTREAM_NO_RESTRICTION  32768
+#define FF_PROFILE_JPEG2000_DCINEMA_2K              3
+#define FF_PROFILE_JPEG2000_DCINEMA_4K              4
+
+#define FF_PROFILE_VP9_0                            0
+#define FF_PROFILE_VP9_1                            1
+#define FF_PROFILE_VP9_2                            2
+#define FF_PROFILE_VP9_3                            3
+
+#define FF_PROFILE_HEVC_MAIN                        1
+#define FF_PROFILE_HEVC_MAIN_10                     2
+#define FF_PROFILE_HEVC_MAIN_STILL_PICTURE          3
+#define FF_PROFILE_HEVC_REXT                        4
+#define FF_PROFILE_HEVC_SCC                         9
+
+#define FF_PROFILE_VVC_MAIN_10                      1
+#define FF_PROFILE_VVC_MAIN_10_444                 33
+
+#define FF_PROFILE_AV1_MAIN                         0
+#define FF_PROFILE_AV1_HIGH                         1
+#define FF_PROFILE_AV1_PROFESSIONAL                 2
+
+#define FF_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT            0xc0
+#define FF_PROFILE_MJPEG_HUFFMAN_EXTENDED_SEQUENTIAL_DCT 0xc1
+#define FF_PROFILE_MJPEG_HUFFMAN_PROGRESSIVE_DCT         0xc2
+#define FF_PROFILE_MJPEG_HUFFMAN_LOSSLESS                0xc3
+#define FF_PROFILE_MJPEG_JPEG_LS                         0xf7
+
+#define FF_PROFILE_SBC_MSBC                         1
+
+#define FF_PROFILE_PRORES_PROXY     0
+#define FF_PROFILE_PRORES_LT        1
+#define FF_PROFILE_PRORES_STANDARD  2
+#define FF_PROFILE_PRORES_HQ        3
+#define FF_PROFILE_PRORES_4444      4
+#define FF_PROFILE_PRORES_XQ        5
+
+#define FF_PROFILE_ARIB_PROFILE_A 0
+#define FF_PROFILE_ARIB_PROFILE_C 1
+
+#define FF_PROFILE_KLVA_SYNC 0
+#define FF_PROFILE_KLVA_ASYNC 1
+
+    /**
+     * level
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+     int level;
+#define FF_LEVEL_UNKNOWN -99
+
+    /**
+     * Skip loop filtering for selected frames.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    enum AVDiscard skip_loop_filter;
+
+    /**
+     * Skip IDCT/dequantization for selected frames.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    enum AVDiscard skip_idct;
+
+    /**
+     * Skip decoding for selected frames.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    enum AVDiscard skip_frame;
+
+    /**
+     * Header containing style information for text subtitles.
+     * For SUBTITLE_ASS subtitle type, it should contain the whole ASS
+     * [Script Info] and [V4+ Styles] section, plus the [Events] line and
+     * the Format line following. It shouldn't include any Dialogue line.
+     * - encoding: Set/allocated/freed by user (before avcodec_open2())
+     * - decoding: Set/allocated/freed by libavcodec (by avcodec_open2())
+     */
+    uint8_t *subtitle_header;
+    int subtitle_header_size;
+
+    /**
+     * Audio only. The number of "priming" samples (padding) inserted by the
+     * encoder at the beginning of the audio. I.e. this number of leading
+     * decoded samples must be discarded by the caller to get the original audio
+     * without leading padding.
+     *
+     * - decoding: unused
+     * - encoding: Set by libavcodec. The timestamps on the output packets are
+     *             adjusted by the encoder so that they always refer to the
+     *             first sample of the data actually contained in the packet,
+     *             including any added padding.  E.g. if the timebase is
+     *             1/samplerate and the timestamp of the first input sample is
+     *             0, the timestamp of the first output packet will be
+     *             -initial_padding.
+     */
+    int initial_padding;
+
+    /**
+     * - decoding: For codecs that store a framerate value in the compressed
+     *             bitstream, the decoder may export it here. { 0, 1} when
+     *             unknown.
+     * - encoding: May be used to signal the framerate of CFR content to an
+     *             encoder.
+     */
+    AVRational framerate;
+
+    /**
+     * Nominal unaccelerated pixel format, see AV_PIX_FMT_xxx.
+     * - encoding: unused.
+     * - decoding: Set by libavcodec before calling get_format()
+     */
+    enum AVPixelFormat sw_pix_fmt;
+
+    /**
+     * Timebase in which pkt_dts/pts and AVPacket.dts/pts are.
+     * - encoding unused.
+     * - decoding set by user.
+     */
+    AVRational pkt_timebase;
+
+    /**
+     * AVCodecDescriptor
+     * - encoding: unused.
+     * - decoding: set by libavcodec.
+     */
+    const AVCodecDescriptor *codec_descriptor;
+
+    /**
+     * Current statistics for PTS correction.
+     * - decoding: maintained and used by libavcodec, not intended to be used by user apps
+     * - encoding: unused
+     */
+    int64_t pts_correction_num_faulty_pts; /// Number of incorrect PTS values so far
+    int64_t pts_correction_num_faulty_dts; /// Number of incorrect DTS values so far
+    int64_t pts_correction_last_pts;       /// PTS of the last frame
+    int64_t pts_correction_last_dts;       /// DTS of the last frame
+
+    /**
+     * Character encoding of the input subtitles file.
+     * - decoding: set by user
+     * - encoding: unused
+     */
+    char *sub_charenc;
+
+    /**
+     * Subtitles character encoding mode. Formats or codecs might be adjusting
+     * this setting (if they are doing the conversion themselves for instance).
+     * - decoding: set by libavcodec
+     * - encoding: unused
+     */
+    int sub_charenc_mode;
+#define FF_SUB_CHARENC_MODE_DO_NOTHING  -1  ///< do nothing (demuxer outputs a stream supposed to be already in UTF-8, or the codec is bitmap for instance)
+#define FF_SUB_CHARENC_MODE_AUTOMATIC    0  ///< libavcodec will select the mode itself
+#define FF_SUB_CHARENC_MODE_PRE_DECODER  1  ///< the AVPacket data needs to be recoded to UTF-8 before being fed to the decoder, requires iconv
+#define FF_SUB_CHARENC_MODE_IGNORE       2  ///< neither convert the subtitles, nor check them for valid UTF-8
+
+    /**
+     * Skip processing alpha if supported by codec.
+     * Note that if the format uses pre-multiplied alpha (common with VP6,
+     * and recommended due to better video quality/compression)
+     * the image will look as if alpha-blended onto a black background.
+     * However for formats that do not use pre-multiplied alpha
+     * there might be serious artefacts (though e.g. libswscale currently
+     * assumes pre-multiplied alpha anyway).
+     *
+     * - decoding: set by user
+     * - encoding: unused
+     */
+    int skip_alpha;
+
+    /**
+     * Number of samples to skip after a discontinuity
+     * - decoding: unused
+     * - encoding: set by libavcodec
+     */
+    int seek_preroll;
+
+    /**
+     * custom intra quantization matrix
+     * - encoding: Set by user, can be NULL.
+     * - decoding: unused.
+     */
+    uint16_t *chroma_intra_matrix;
+
+    /**
+     * dump format separator.
+     * can be ", " or "\n      " or anything else
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    uint8_t *dump_separator;
+
+    /**
+     * ',' separated list of allowed decoders.
+     * If NULL then all are allowed
+     * - encoding: unused
+     * - decoding: set by user
+     */
+    char *codec_whitelist;
+
+    /**
+     * Properties of the stream that gets decoded
+     * - encoding: unused
+     * - decoding: set by libavcodec
+     */
+    unsigned properties;
+#define FF_CODEC_PROPERTY_LOSSLESS        0x00000001
+#define FF_CODEC_PROPERTY_CLOSED_CAPTIONS 0x00000002
+#define FF_CODEC_PROPERTY_FILM_GRAIN      0x00000004
+
+    /**
+     * Additional data associated with the entire coded stream.
+     *
+     * - decoding: unused
+     * - encoding: may be set by libavcodec after avcodec_open2().
+     */
+    AVPacketSideData *coded_side_data;
+    int            nb_coded_side_data;
+
+    /**
+     * A reference to the AVHWFramesContext describing the input (for encoding)
+     * or output (decoding) frames. The reference is set by the caller and
+     * afterwards owned (and freed) by libavcodec - it should never be read by
+     * the caller after being set.
+     *
+     * - decoding: This field should be set by the caller from the get_format()
+     *             callback. The previous reference (if any) will always be
+     *             unreffed by libavcodec before the get_format() call.
+     *
+     *             If the default get_buffer2() is used with a hwaccel pixel
+     *             format, then this AVHWFramesContext will be used for
+     *             allocating the frame buffers.
+     *
+     * - encoding: For hardware encoders configured to use a hwaccel pixel
+     *             format, this field should be set by the caller to a reference
+     *             to the AVHWFramesContext describing input frames.
+     *             AVHWFramesContext.format must be equal to
+     *             AVCodecContext.pix_fmt.
+     *
+     *             This field should be set before avcodec_open2() is called.
+     */
+    AVBufferRef *hw_frames_ctx;
+
+    /**
+     * Audio only. The amount of padding (in samples) appended by the encoder to
+     * the end of the audio. I.e. this number of decoded samples must be
+     * discarded by the caller from the end of the stream to get the original
+     * audio without any trailing padding.
+     *
+     * - decoding: unused
+     * - encoding: unused
+     */
+    int trailing_padding;
+
+    /**
+     * The number of pixels per image to maximally accept.
+     *
+     * - decoding: set by user
+     * - encoding: set by user
+     */
+    int64_t max_pixels;
+
+    /**
+     * A reference to the AVHWDeviceContext describing the device which will
+     * be used by a hardware encoder/decoder.  The reference is set by the
+     * caller and afterwards owned (and freed) by libavcodec.
+     *
+     * This should be used if either the codec device does not require
+     * hardware frames or any that are used are to be allocated internally by
+     * libavcodec.  If the user wishes to supply any of the frames used as
+     * encoder input or decoder output then hw_frames_ctx should be used
+     * instead.  When hw_frames_ctx is set in get_format() for a decoder, this
+     * field will be ignored while decoding the associated stream segment, but
+     * may again be used on a following one after another get_format() call.
+     *
+     * For both encoders and decoders this field should be set before
+     * avcodec_open2() is called and must not be written to thereafter.
+     *
+     * Note that some decoders may require this field to be set initially in
+     * order to support hw_frames_ctx at all - in that case, all frames
+     * contexts used must be created on the same device.
+     */
+    AVBufferRef *hw_device_ctx;
+
+    /**
+     * Bit set of AV_HWACCEL_FLAG_* flags, which affect hardware accelerated
+     * decoding (if active).
+     * - encoding: unused
+     * - decoding: Set by user (either before avcodec_open2(), or in the
+     *             AVCodecContext.get_format callback)
+     */
+    int hwaccel_flags;
+
+    /**
+     * Video decoding only. Certain video codecs support cropping, meaning that
+     * only a sub-rectangle of the decoded frame is intended for display.  This
+     * option controls how cropping is handled by libavcodec.
+     *
+     * When set to 1 (the default), libavcodec will apply cropping internally.
+     * I.e. it will modify the output frame width/height fields and offset the
+     * data pointers (only by as much as possible while preserving alignment, or
+     * by the full amount if the AV_CODEC_FLAG_UNALIGNED flag is set) so that
+     * the frames output by the decoder refer only to the cropped area. The
+     * crop_* fields of the output frames will be zero.
+     *
+     * When set to 0, the width/height fields of the output frames will be set
+     * to the coded dimensions and the crop_* fields will describe the cropping
+     * rectangle. Applying the cropping is left to the caller.
+     *
+     * @warning When hardware acceleration with opaque output frames is used,
+     * libavcodec is unable to apply cropping from the top/left border.
+     *
+     * @note when this option is set to zero, the width/height fields of the
+     * AVCodecContext and output AVFrames have different meanings. The codec
+     * context fields store display dimensions (with the coded dimensions in
+     * coded_width/height), while the frame fields store the coded dimensions
+     * (with the display dimensions being determined by the crop_* fields).
+     */
+    int apply_cropping;
+
+    /*
+     * Video decoding only.  Sets the number of extra hardware frames which
+     * the decoder will allocate for use by the caller.  This must be set
+     * before avcodec_open2() is called.
+     *
+     * Some hardware decoders require all frames that they will use for
+     * output to be defined in advance before decoding starts.  For such
+     * decoders, the hardware frame pool must therefore be of a fixed size.
+     * The extra frames set here are on top of any number that the decoder
+     * needs internally in order to operate normally (for example, frames
+     * used as reference pictures).
+     */
+    int extra_hw_frames;
+
+    /**
+     * The percentage of damaged samples to discard a frame.
+     *
+     * - decoding: set by user
+     * - encoding: unused
+     */
+    int discard_damaged_percentage;
+
+    /**
+     * The number of samples per frame to maximally accept.
+     *
+     * - decoding: set by user
+     * - encoding: set by user
+     */
+    int64_t max_samples;
+
+    /**
+     * Bit set of AV_CODEC_EXPORT_DATA_* flags, which affects the kind of
+     * metadata exported in frame, packet, or coded stream side data by
+     * decoders and encoders.
+     *
+     * - decoding: set by user
+     * - encoding: set by user
+     */
+    int export_side_data;
+
+    /**
+     * This callback is called at the beginning of each packet to get a data
+     * buffer for it.
+     *
+     * The following field will be set in the packet before this callback is
+     * called:
+     * - size
+     * This callback must use the above value to calculate the required buffer size,
+     * which must padded by at least AV_INPUT_BUFFER_PADDING_SIZE bytes.
+     *
+     * In some specific cases, the encoder may not use the entire buffer allocated by this
+     * callback. This will be reflected in the size value in the packet once returned by
+     * avcodec_receive_packet().
+     *
+     * This callback must fill the following fields in the packet:
+     * - data: alignment requirements for AVPacket apply, if any. Some architectures and
+     *   encoders may benefit from having aligned data.
+     * - buf: must contain a pointer to an AVBufferRef structure. The packet's
+     *   data pointer must be contained in it. See: av_buffer_create(), av_buffer_alloc(),
+     *   and av_buffer_ref().
+     *
+     * If AV_CODEC_CAP_DR1 is not set then get_encode_buffer() must call
+     * avcodec_default_get_encode_buffer() instead of providing a buffer allocated by
+     * some other means.
+     *
+     * The flags field may contain a combination of AV_GET_ENCODE_BUFFER_FLAG_ flags.
+     * They may be used for example to hint what use the buffer may get after being
+     * created.
+     * Implementations of this callback may ignore flags they don't understand.
+     * If AV_GET_ENCODE_BUFFER_FLAG_REF is set in flags then the packet may be reused
+     * (read and/or written to if it is writable) later by libavcodec.
+     *
+     * This callback must be thread-safe, as when frame threading is used, it may
+     * be called from multiple threads simultaneously.
+     *
+     * @see avcodec_default_get_encode_buffer()
+     *
+     * - encoding: Set by libavcodec, user can override.
+     * - decoding: unused
+     */
+    int (*get_encode_buffer)(struct AVCodecContext *s, AVPacket *pkt, int flags);
+
+    /**
+     * Audio channel layout.
+     * - encoding: must be set by the caller, to one of AVCodec.ch_layouts.
+     * - decoding: may be set by the caller if known e.g. from the container.
+     *             The decoder can then override during decoding as needed.
+     */
+    AVChannelLayout ch_layout;
+
+    /**
+     * Frame counter, set by libavcodec.
+     *
+     * - decoding: total number of frames returned from the decoder so far.
+     * - encoding: total number of frames passed to the encoder so far.
+     *
+     *   @note the counter is not incremented if encoding/decoding resulted in
+     *   an error.
+     */
+    int64_t frame_num;
+} AVCodecContext;
+
+/**
+ * @defgroup lavc_hwaccel AVHWAccel
+ *
+ * @note  Nothing in this structure should be accessed by the user.  At some
+ *        point in future it will not be externally visible at all.
+ *
+ * @{
+ */
+typedef struct AVHWAccel {
+    /**
+     * Name of the hardware accelerated codec.
+     * The name is globally unique among encoders and among decoders (but an
+     * encoder and a decoder can share the same name).
+     */
+    const char *name;
+
+    /**
+     * Type of codec implemented by the hardware accelerator.
+     *
+     * See AVMEDIA_TYPE_xxx
+     */
+    enum AVMediaType type;
+
+    /**
+     * Codec implemented by the hardware accelerator.
+     *
+     * See AV_CODEC_ID_xxx
+     */
+    enum AVCodecID id;
+
+    /**
+     * Supported pixel format.
+     *
+     * Only hardware accelerated formats are supported here.
+     */
+    enum AVPixelFormat pix_fmt;
+
+    /**
+     * Hardware accelerated codec capabilities.
+     * see AV_HWACCEL_CODEC_CAP_*
+     */
+    int capabilities;
+
+    /*****************************************************************
+     * No fields below this line are part of the public API. They
+     * may not be used outside of libavcodec and can be changed and
+     * removed at will.
+     * New public fields should be added right above.
+     *****************************************************************
+     */
+
+    /**
+     * Allocate a custom buffer
+     */
+    int (*alloc_frame)(AVCodecContext *avctx, AVFrame *frame);
+
+    /**
+     * Called at the beginning of each frame or field picture.
+     *
+     * Meaningful frame information (codec specific) is guaranteed to
+     * be parsed at this point. This function is mandatory.
+     *
+     * Note that buf can be NULL along with buf_size set to 0.
+     * Otherwise, this means the whole frame is available at this point.
+     *
+     * @param avctx the codec context
+     * @param buf the frame data buffer base
+     * @param buf_size the size of the frame in bytes
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
+
+    /**
+     * Callback for parameter data (SPS/PPS/VPS etc).
+     *
+     * Useful for hardware decoders which keep persistent state about the
+     * video parameters, and need to receive any changes to update that state.
+     *
+     * @param avctx the codec context
+     * @param type the nal unit type
+     * @param buf the nal unit data buffer
+     * @param buf_size the size of the nal unit in bytes
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*decode_params)(AVCodecContext *avctx, int type, const uint8_t *buf, uint32_t buf_size);
+
+    /**
+     * Callback for each slice.
+     *
+     * Meaningful slice information (codec specific) is guaranteed to
+     * be parsed at this point. This function is mandatory.
+     *
+     * @param avctx the codec context
+     * @param buf the slice data buffer base
+     * @param buf_size the size of the slice in bytes
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
+
+    /**
+     * Called at the end of each frame or field picture.
+     *
+     * The whole picture is parsed at this point and can now be sent
+     * to the hardware accelerator. This function is mandatory.
+     *
+     * @param avctx the codec context
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*end_frame)(AVCodecContext *avctx);
+
+    /**
+     * Size of per-frame hardware accelerator private data.
+     *
+     * Private data is allocated with av_mallocz() before
+     * AVCodecContext.get_buffer() and deallocated after
+     * AVCodecContext.release_buffer().
+     */
+    int frame_priv_data_size;
+
+    /**
+     * Initialize the hwaccel private data.
+     *
+     * This will be called from ff_get_format(), after hwaccel and
+     * hwaccel_context are set and the hwaccel private data in AVCodecInternal
+     * is allocated.
+     */
+    int (*init)(AVCodecContext *avctx);
+
+    /**
+     * Uninitialize the hwaccel private data.
+     *
+     * This will be called from get_format() or avcodec_close(), after hwaccel
+     * and hwaccel_context are already uninitialized.
+     */
+    int (*uninit)(AVCodecContext *avctx);
+
+    /**
+     * Size of the private data to allocate in
+     * AVCodecInternal.hwaccel_priv_data.
+     */
+    int priv_data_size;
+
+    /**
+     * Internal hwaccel capabilities.
+     */
+    int caps_internal;
+
+    /**
+     * Fill the given hw_frames context with current codec parameters. Called
+     * from get_format. Refer to avcodec_get_hw_frames_parameters() for
+     * details.
+     *
+     * This CAN be called before AVHWAccel.init is called, and you must assume
+     * that avctx->hwaccel_priv_data is invalid.
+     */
+    int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
+} AVHWAccel;
+
+/**
+ * HWAccel is experimental and is thus avoided in favor of non experimental
+ * codecs
+ */
+#define AV_HWACCEL_CODEC_CAP_EXPERIMENTAL 0x0200
+
+/**
+ * Hardware acceleration should be used for decoding even if the codec level
+ * used is unknown or higher than the maximum supported level reported by the
+ * hardware driver.
+ *
+ * It's generally a good idea to pass this flag unless you have a specific
+ * reason not to, as hardware tends to under-report supported levels.
+ */
+#define AV_HWACCEL_FLAG_IGNORE_LEVEL (1 << 0)
+
+/**
+ * Hardware acceleration can output YUV pixel formats with a different chroma
+ * sampling than 4:2:0 and/or other than 8 bits per component.
+ */
+#define AV_HWACCEL_FLAG_ALLOW_HIGH_DEPTH (1 << 1)
+
+/**
+ * Hardware acceleration should still be attempted for decoding when the
+ * codec profile does not match the reported capabilities of the hardware.
+ *
+ * For example, this can be used to try to decode baseline profile H.264
+ * streams in hardware - it will often succeed, because many streams marked
+ * as baseline profile actually conform to constrained baseline profile.
+ *
+ * @warning If the stream is actually not supported then the behaviour is
+ *          undefined, and may include returning entirely incorrect output
+ *          while indicating success.
+ */
+#define AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH (1 << 2)
+
+/**
+ * Some hardware decoders (namely nvdec) can either output direct decoder
+ * surfaces, or make an on-device copy and return said copy.
+ * There is a hard limit on how many decoder surfaces there can be, and it
+ * cannot be accurately guessed ahead of time.
+ * For some processing chains, this can be okay, but others will run into the
+ * limit and in turn produce very confusing errors that require fine tuning of
+ * more or less obscure options by the user, or in extreme cases cannot be
+ * resolved at all without inserting an avfilter that forces a copy.
+ *
+ * Thus, the hwaccel will by default make a copy for safety and resilience.
+ * If a users really wants to minimize the amount of copies, they can set this
+ * flag and ensure their processing chain does not exhaust the surface pool.
+ */
+#define AV_HWACCEL_FLAG_UNSAFE_OUTPUT (1 << 3)
+
+/**
+ * @}
+ */
+
+enum AVSubtitleType {
+    SUBTITLE_NONE,
+
+    SUBTITLE_BITMAP,                ///< A bitmap, pict will be set
+
+    /**
+     * Plain text, the text field must be set by the decoder and is
+     * authoritative. ass and pict fields may contain approximations.
+     */
+    SUBTITLE_TEXT,
+
+    /**
+     * Formatted text, the ass field must be set by the decoder and is
+     * authoritative. pict and text fields may contain approximations.
+     */
+    SUBTITLE_ASS,
+};
+
+#define AV_SUBTITLE_FLAG_FORCED 0x00000001
+
+typedef struct AVSubtitleRect {
+    int x;         ///< top left corner  of pict, undefined when pict is not set
+    int y;         ///< top left corner  of pict, undefined when pict is not set
+    int w;         ///< width            of pict, undefined when pict is not set
+    int h;         ///< height           of pict, undefined when pict is not set
+    int nb_colors; ///< number of colors in pict, undefined when pict is not set
+
+    /**
+     * data+linesize for the bitmap of this subtitle.
+     * Can be set for text/ass as well once they are rendered.
+     */
+    uint8_t *data[4];
+    int linesize[4];
+
+    enum AVSubtitleType type;
+
+    char *text;                     ///< 0 terminated plain UTF-8 text
+
+    /**
+     * 0 terminated ASS/SSA compatible event line.
+     * The presentation of this is unaffected by the other values in this
+     * struct.
+     */
+    char *ass;
+
+    int flags;
+} AVSubtitleRect;
+
+typedef struct AVSubtitle {
+    uint16_t format; /* 0 = graphics */
+    uint32_t start_display_time; /* relative to packet pts, in ms */
+    uint32_t end_display_time; /* relative to packet pts, in ms */
+    unsigned num_rects;
+    AVSubtitleRect **rects;
+    int64_t pts;    ///< Same as packet pts, in AV_TIME_BASE
+} AVSubtitle;
+
+/**
+ * Return the LIBAVCODEC_VERSION_INT constant.
+ */
+unsigned avcodec_version(void);
+
+/**
+ * Return the libavcodec build-time configuration.
+ */
+const char *avcodec_configuration(void);
+
+/**
+ * Return the libavcodec license.
+ */
+const char *avcodec_license(void);
+
+/**
+ * Allocate an AVCodecContext and set its fields to default values. The
+ * resulting struct should be freed with avcodec_free_context().
+ *
+ * @param codec if non-NULL, allocate private data and initialize defaults
+ *              for the given codec. It is illegal to then call avcodec_open2()
+ *              with a different codec.
+ *              If NULL, then the codec-specific defaults won't be initialized,
+ *              which may result in suboptimal default settings (this is
+ *              important mainly for encoders, e.g. libx264).
+ *
+ * @return An AVCodecContext filled with default values or NULL on failure.
+ */
+AVCodecContext *avcodec_alloc_context3(const AVCodec *codec);
+
+/**
+ * Free the codec context and everything associated with it and write NULL to
+ * the provided pointer.
+ */
+void avcodec_free_context(AVCodecContext **avctx);
+
+/**
+ * Get the AVClass for AVCodecContext. It can be used in combination with
+ * AV_OPT_SEARCH_FAKE_OBJ for examining options.
+ *
+ * @see av_opt_find().
+ */
+const AVClass *avcodec_get_class(void);
+
+/**
+ * Get the AVClass for AVSubtitleRect. It can be used in combination with
+ * AV_OPT_SEARCH_FAKE_OBJ for examining options.
+ *
+ * @see av_opt_find().
+ */
+const AVClass *avcodec_get_subtitle_rect_class(void);
+
+/**
+ * Fill the parameters struct based on the values from the supplied codec
+ * context. Any allocated fields in par are freed and replaced with duplicates
+ * of the corresponding fields in codec.
+ *
+ * @return >= 0 on success, a negative AVERROR code on failure
+ */
+int avcodec_parameters_from_context(AVCodecParameters *par,
+                                    const AVCodecContext *codec);
+
+/**
+ * Fill the codec context based on the values from the supplied codec
+ * parameters. Any allocated fields in codec that have a corresponding field in
+ * par are freed and replaced with duplicates of the corresponding field in par.
+ * Fields in codec that do not have a counterpart in par are not touched.
+ *
+ * @return >= 0 on success, a negative AVERROR code on failure.
+ */
+int avcodec_parameters_to_context(AVCodecContext *codec,
+                                  const AVCodecParameters *par);
+
+/**
+ * Initialize the AVCodecContext to use the given AVCodec. Prior to using this
+ * function the context has to be allocated with avcodec_alloc_context3().
+ *
+ * The functions avcodec_find_decoder_by_name(), avcodec_find_encoder_by_name(),
+ * avcodec_find_decoder() and avcodec_find_encoder() provide an easy way for
+ * retrieving a codec.
+ *
+ * @note Always call this function before using decoding routines (such as
+ * @ref avcodec_receive_frame()).
+ *
+ * @code
+ * av_dict_set(&opts, "b", "2.5M", 0);
+ * codec = avcodec_find_decoder(AV_CODEC_ID_H264);
+ * if (!codec)
+ *     exit(1);
+ *
+ * context = avcodec_alloc_context3(codec);
+ *
+ * if (avcodec_open2(context, codec, opts) < 0)
+ *     exit(1);
+ * @endcode
+ *
+ * @param avctx The context to initialize.
+ * @param codec The codec to open this context for. If a non-NULL codec has been
+ *              previously passed to avcodec_alloc_context3() or
+ *              for this context, then this parameter MUST be either NULL or
+ *              equal to the previously passed codec.
+ * @param options A dictionary filled with AVCodecContext and codec-private options.
+ *                On return this object will be filled with options that were not found.
+ *
+ * @return zero on success, a negative value on error
+ * @see avcodec_alloc_context3(), avcodec_find_decoder(), avcodec_find_encoder(),
+ *      av_dict_set(), av_opt_find().
+ */
+int avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options);
+
+/**
+ * Close a given AVCodecContext and free all the data associated with it
+ * (but not the AVCodecContext itself).
+ *
+ * Calling this function on an AVCodecContext that hasn't been opened will free
+ * the codec-specific data allocated in avcodec_alloc_context3() with a non-NULL
+ * codec. Subsequent calls will do nothing.
+ *
+ * @note Do not use this function. Use avcodec_free_context() to destroy a
+ * codec context (either open or closed). Opening and closing a codec context
+ * multiple times is not supported anymore -- use multiple codec contexts
+ * instead.
+ */
+int avcodec_close(AVCodecContext *avctx);
+
+/**
+ * Free all allocated data in the given subtitle struct.
+ *
+ * @param sub AVSubtitle to free.
+ */
+void avsubtitle_free(AVSubtitle *sub);
+
+/**
+ * @}
+ */
+
+/**
+ * @addtogroup lavc_decoding
+ * @{
+ */
+
+/**
+ * The default callback for AVCodecContext.get_buffer2(). It is made public so
+ * it can be called by custom get_buffer2() implementations for decoders without
+ * AV_CODEC_CAP_DR1 set.
+ */
+int avcodec_default_get_buffer2(AVCodecContext *s, AVFrame *frame, int flags);
+
+/**
+ * The default callback for AVCodecContext.get_encode_buffer(). It is made public so
+ * it can be called by custom get_encode_buffer() implementations for encoders without
+ * AV_CODEC_CAP_DR1 set.
+ */
+int avcodec_default_get_encode_buffer(AVCodecContext *s, AVPacket *pkt, int flags);
+
+/**
+ * Modify width and height values so that they will result in a memory
+ * buffer that is acceptable for the codec if you do not use any horizontal
+ * padding.
+ *
+ * May only be used if a codec with AV_CODEC_CAP_DR1 has been opened.
+ */
+void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height);
+
+/**
+ * Modify width and height values so that they will result in a memory
+ * buffer that is acceptable for the codec if you also ensure that all
+ * line sizes are a multiple of the respective linesize_align[i].
+ *
+ * May only be used if a codec with AV_CODEC_CAP_DR1 has been opened.
+ */
+void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
+                               int linesize_align[AV_NUM_DATA_POINTERS]);
+
+#ifdef FF_API_AVCODEC_CHROMA_POS
+/**
+ * Converts AVChromaLocation to swscale x/y chroma position.
+ *
+ * The positions represent the chroma (0,0) position in a coordinates system
+ * with luma (0,0) representing the origin and luma(1,1) representing 256,256
+ *
+ * @param xpos  horizontal chroma sample position
+ * @param ypos  vertical   chroma sample position
+ * @deprecated Use av_chroma_location_enum_to_pos() instead.
+ */
+ attribute_deprecated
+int avcodec_enum_to_chroma_pos(int *xpos, int *ypos, enum AVChromaLocation pos);
+
+/**
+ * Converts swscale x/y chroma position to AVChromaLocation.
+ *
+ * The positions represent the chroma (0,0) position in a coordinates system
+ * with luma (0,0) representing the origin and luma(1,1) representing 256,256
+ *
+ * @param xpos  horizontal chroma sample position
+ * @param ypos  vertical   chroma sample position
+ * @deprecated Use av_chroma_location_pos_to_enum() instead.
+ */
+ attribute_deprecated
+enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos);
+#endif
+
+/**
+ * Decode a subtitle message.
+ * Return a negative value on error, otherwise return the number of bytes used.
+ * If no subtitle could be decompressed, got_sub_ptr is zero.
+ * Otherwise, the subtitle is stored in *sub.
+ * Note that AV_CODEC_CAP_DR1 is not available for subtitle codecs. This is for
+ * simplicity, because the performance difference is expected to be negligible
+ * and reusing a get_buffer written for video codecs would probably perform badly
+ * due to a potentially very different allocation pattern.
+ *
+ * Some decoders (those marked with AV_CODEC_CAP_DELAY) have a delay between input
+ * and output. This means that for some packets they will not immediately
+ * produce decoded output and need to be flushed at the end of decoding to get
+ * all the decoded data. Flushing is done by calling this function with packets
+ * with avpkt->data set to NULL and avpkt->size set to 0 until it stops
+ * returning subtitles. It is safe to flush even those decoders that are not
+ * marked with AV_CODEC_CAP_DELAY, then no subtitles will be returned.
+ *
+ * @note The AVCodecContext MUST have been opened with @ref avcodec_open2()
+ * before packets may be fed to the decoder.
+ *
+ * @param avctx the codec context
+ * @param[out] sub The preallocated AVSubtitle in which the decoded subtitle will be stored,
+ *                 must be freed with avsubtitle_free if *got_sub_ptr is set.
+ * @param[in,out] got_sub_ptr Zero if no subtitle could be decompressed, otherwise, it is nonzero.
+ * @param[in] avpkt The input AVPacket containing the input buffer.
+ */
+int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
+                             int *got_sub_ptr, const AVPacket *avpkt);
+
+/**
+ * Supply raw packet data as input to a decoder.
+ *
+ * Internally, this call will copy relevant AVCodecContext fields, which can
+ * influence decoding per-packet, and apply them when the packet is actually
+ * decoded. (For example AVCodecContext.skip_frame, which might direct the
+ * decoder to drop the frame contained by the packet sent with this function.)
+ *
+ * @warning The input buffer, avpkt->data must be AV_INPUT_BUFFER_PADDING_SIZE
+ *          larger than the actual read bytes because some optimized bitstream
+ *          readers read 32 or 64 bits at once and could read over the end.
+ *
+ * @note The AVCodecContext MUST have been opened with @ref avcodec_open2()
+ *       before packets may be fed to the decoder.
+ *
+ * @param avctx codec context
+ * @param[in] avpkt The input AVPacket. Usually, this will be a single video
+ *                  frame, or several complete audio frames.
+ *                  Ownership of the packet remains with the caller, and the
+ *                  decoder will not write to the packet. The decoder may create
+ *                  a reference to the packet data (or copy it if the packet is
+ *                  not reference-counted).
+ *                  Unlike with older APIs, the packet is always fully consumed,
+ *                  and if it contains multiple frames (e.g. some audio codecs),
+ *                  will require you to call avcodec_receive_frame() multiple
+ *                  times afterwards before you can send a new packet.
+ *                  It can be NULL (or an AVPacket with data set to NULL and
+ *                  size set to 0); in this case, it is considered a flush
+ *                  packet, which signals the end of the stream. Sending the
+ *                  first flush packet will return success. Subsequent ones are
+ *                  unnecessary and will return AVERROR_EOF. If the decoder
+ *                  still has frames buffered, it will return them after sending
+ *                  a flush packet.
+ *
+ * @retval 0                 success
+ * @retval AVERROR(EAGAIN)   input is not accepted in the current state - user
+ *                           must read output with avcodec_receive_frame() (once
+ *                           all output is read, the packet should be resent,
+ *                           and the call will not fail with EAGAIN).
+ * @retval AVERROR_EOF       the decoder has been flushed, and no new packets can be
+ *                           sent to it (also returned if more than 1 flush
+ *                           packet is sent)
+ * @retval AVERROR(EINVAL)   codec not opened, it is an encoder, or requires flush
+ * @retval AVERROR(ENOMEM)   failed to add packet to internal queue, or similar
+ * @retval "another negative error code" legitimate decoding errors
+ */
+int avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt);
+
+/**
+ * Return decoded output data from a decoder or encoder (when the
+ * AV_CODEC_FLAG_RECON_FRAME flag is used).
+ *
+ * @param avctx codec context
+ * @param frame This will be set to a reference-counted video or audio
+ *              frame (depending on the decoder type) allocated by the
+ *              codec. Note that the function will always call
+ *              av_frame_unref(frame) before doing anything else.
+ *
+ * @retval 0                success, a frame was returned
+ * @retval AVERROR(EAGAIN)  output is not available in this state - user must
+ *                          try to send new input
+ * @retval AVERROR_EOF      the codec has been fully flushed, and there will be
+ *                          no more output frames
+ * @retval AVERROR(EINVAL)  codec not opened, or it is an encoder without the
+ *                          AV_CODEC_FLAG_RECON_FRAME flag enabled
+ * @retval AVERROR_INPUT_CHANGED current decoded frame has changed parameters with
+ *                          respect to first decoded frame. Applicable when flag
+ *                          AV_CODEC_FLAG_DROPCHANGED is set.
+ * @retval "other negative error code" legitimate decoding errors
+ */
+int avcodec_receive_frame(AVCodecContext *avctx, AVFrame *frame);
+
+/**
+ * Supply a raw video or audio frame to the encoder. Use avcodec_receive_packet()
+ * to retrieve buffered output packets.
+ *
+ * @param avctx     codec context
+ * @param[in] frame AVFrame containing the raw audio or video frame to be encoded.
+ *                  Ownership of the frame remains with the caller, and the
+ *                  encoder will not write to the frame. The encoder may create
+ *                  a reference to the frame data (or copy it if the frame is
+ *                  not reference-counted).
+ *                  It can be NULL, in which case it is considered a flush
+ *                  packet.  This signals the end of the stream. If the encoder
+ *                  still has packets buffered, it will return them after this
+ *                  call. Once flushing mode has been entered, additional flush
+ *                  packets are ignored, and sending frames will return
+ *                  AVERROR_EOF.
+ *
+ *                  For audio:
+ *                  If AV_CODEC_CAP_VARIABLE_FRAME_SIZE is set, then each frame
+ *                  can have any number of samples.
+ *                  If it is not set, frame->nb_samples must be equal to
+ *                  avctx->frame_size for all frames except the last.
+ *                  The final frame may be smaller than avctx->frame_size.
+ * @retval 0                 success
+ * @retval AVERROR(EAGAIN)   input is not accepted in the current state - user must
+ *                           read output with avcodec_receive_packet() (once all
+ *                           output is read, the packet should be resent, and the
+ *                           call will not fail with EAGAIN).
+ * @retval AVERROR_EOF       the encoder has been flushed, and no new frames can
+ *                           be sent to it
+ * @retval AVERROR(EINVAL)   codec not opened, it is a decoder, or requires flush
+ * @retval AVERROR(ENOMEM)   failed to add packet to internal queue, or similar
+ * @retval "another negative error code" legitimate encoding errors
+ */
+int avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame);
+
+/**
+ * Read encoded data from the encoder.
+ *
+ * @param avctx codec context
+ * @param avpkt This will be set to a reference-counted packet allocated by the
+ *              encoder. Note that the function will always call
+ *              av_packet_unref(avpkt) before doing anything else.
+ * @retval 0               success
+ * @retval AVERROR(EAGAIN) output is not available in the current state - user must
+ *                         try to send input
+ * @retval AVERROR_EOF     the encoder has been fully flushed, and there will be no
+ *                         more output packets
+ * @retval AVERROR(EINVAL) codec not opened, or it is a decoder
+ * @retval "another negative error code" legitimate encoding errors
+ */
+int avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt);
+
+/**
+ * Create and return a AVHWFramesContext with values adequate for hardware
+ * decoding. This is meant to get called from the get_format callback, and is
+ * a helper for preparing a AVHWFramesContext for AVCodecContext.hw_frames_ctx.
+ * This API is for decoding with certain hardware acceleration modes/APIs only.
+ *
+ * The returned AVHWFramesContext is not initialized. The caller must do this
+ * with av_hwframe_ctx_init().
+ *
+ * Calling this function is not a requirement, but makes it simpler to avoid
+ * codec or hardware API specific details when manually allocating frames.
+ *
+ * Alternatively to this, an API user can set AVCodecContext.hw_device_ctx,
+ * which sets up AVCodecContext.hw_frames_ctx fully automatically, and makes
+ * it unnecessary to call this function or having to care about
+ * AVHWFramesContext initialization at all.
+ *
+ * There are a number of requirements for calling this function:
+ *
+ * - It must be called from get_format with the same avctx parameter that was
+ *   passed to get_format. Calling it outside of get_format is not allowed, and
+ *   can trigger undefined behavior.
+ * - The function is not always supported (see description of return values).
+ *   Even if this function returns successfully, hwaccel initialization could
+ *   fail later. (The degree to which implementations check whether the stream
+ *   is actually supported varies. Some do this check only after the user's
+ *   get_format callback returns.)
+ * - The hw_pix_fmt must be one of the choices suggested by get_format. If the
+ *   user decides to use a AVHWFramesContext prepared with this API function,
+ *   the user must return the same hw_pix_fmt from get_format.
+ * - The device_ref passed to this function must support the given hw_pix_fmt.
+ * - After calling this API function, it is the user's responsibility to
+ *   initialize the AVHWFramesContext (returned by the out_frames_ref parameter),
+ *   and to set AVCodecContext.hw_frames_ctx to it. If done, this must be done
+ *   before returning from get_format (this is implied by the normal
+ *   AVCodecContext.hw_frames_ctx API rules).
+ * - The AVHWFramesContext parameters may change every time time get_format is
+ *   called. Also, AVCodecContext.hw_frames_ctx is reset before get_format. So
+ *   you are inherently required to go through this process again on every
+ *   get_format call.
+ * - It is perfectly possible to call this function without actually using
+ *   the resulting AVHWFramesContext. One use-case might be trying to reuse a
+ *   previously initialized AVHWFramesContext, and calling this API function
+ *   only to test whether the required frame parameters have changed.
+ * - Fields that use dynamically allocated values of any kind must not be set
+ *   by the user unless setting them is explicitly allowed by the documentation.
+ *   If the user sets AVHWFramesContext.free and AVHWFramesContext.user_opaque,
+ *   the new free callback must call the potentially set previous free callback.
+ *   This API call may set any dynamically allocated fields, including the free
+ *   callback.
+ *
+ * The function will set at least the following fields on AVHWFramesContext
+ * (potentially more, depending on hwaccel API):
+ *
+ * - All fields set by av_hwframe_ctx_alloc().
+ * - Set the format field to hw_pix_fmt.
+ * - Set the sw_format field to the most suited and most versatile format. (An
+ *   implication is that this will prefer generic formats over opaque formats
+ *   with arbitrary restrictions, if possible.)
+ * - Set the width/height fields to the coded frame size, rounded up to the
+ *   API-specific minimum alignment.
+ * - Only _if_ the hwaccel requires a pre-allocated pool: set the initial_pool_size
+ *   field to the number of maximum reference surfaces possible with the codec,
+ *   plus 1 surface for the user to work (meaning the user can safely reference
+ *   at most 1 decoded surface at a time), plus additional buffering introduced
+ *   by frame threading. If the hwaccel does not require pre-allocation, the
+ *   field is left to 0, and the decoder will allocate new surfaces on demand
+ *   during decoding.
+ * - Possibly AVHWFramesContext.hwctx fields, depending on the underlying
+ *   hardware API.
+ *
+ * Essentially, out_frames_ref returns the same as av_hwframe_ctx_alloc(), but
+ * with basic frame parameters set.
+ *
+ * The function is stateless, and does not change the AVCodecContext or the
+ * device_ref AVHWDeviceContext.
+ *
+ * @param avctx The context which is currently calling get_format, and which
+ *              implicitly contains all state needed for filling the returned
+ *              AVHWFramesContext properly.
+ * @param device_ref A reference to the AVHWDeviceContext describing the device
+ *                   which will be used by the hardware decoder.
+ * @param hw_pix_fmt The hwaccel format you are going to return from get_format.
+ * @param out_frames_ref On success, set to a reference to an _uninitialized_
+ *                       AVHWFramesContext, created from the given device_ref.
+ *                       Fields will be set to values required for decoding.
+ *                       Not changed if an error is returned.
+ * @return zero on success, a negative value on error. The following error codes
+ *         have special semantics:
+ *      AVERROR(ENOENT): the decoder does not support this functionality. Setup
+ *                       is always manual, or it is a decoder which does not
+ *                       support setting AVCodecContext.hw_frames_ctx at all,
+ *                       or it is a software format.
+ *      AVERROR(EINVAL): it is known that hardware decoding is not supported for
+ *                       this configuration, or the device_ref is not supported
+ *                       for the hwaccel referenced by hw_pix_fmt.
+ */
+int avcodec_get_hw_frames_parameters(AVCodecContext *avctx,
+                                     AVBufferRef *device_ref,
+                                     enum AVPixelFormat hw_pix_fmt,
+                                     AVBufferRef **out_frames_ref);
+
+
+
+/**
+ * @defgroup lavc_parsing Frame parsing
+ * @{
+ */
+
+enum AVPictureStructure {
+    AV_PICTURE_STRUCTURE_UNKNOWN,      ///< unknown
+    AV_PICTURE_STRUCTURE_TOP_FIELD,    ///< coded as top field
+    AV_PICTURE_STRUCTURE_BOTTOM_FIELD, ///< coded as bottom field
+    AV_PICTURE_STRUCTURE_FRAME,        ///< coded as frame
+};
+
+typedef struct AVCodecParserContext {
+    void *priv_data;
+    const struct AVCodecParser *parser;
+    int64_t frame_offset; /* offset of the current frame */
+    int64_t cur_offset; /* current offset
+                           (incremented by each av_parser_parse()) */
+    int64_t next_frame_offset; /* offset of the next frame */
+    /* video info */
+    int pict_type; /* XXX: Put it back in AVCodecContext. */
+    /**
+     * This field is used for proper frame duration computation in lavf.
+     * It signals, how much longer the frame duration of the current frame
+     * is compared to normal frame duration.
+     *
+     * frame_duration = (1 + repeat_pict) * time_base
+     *
+     * It is used by codecs like H.264 to display telecined material.
+     */
+    int repeat_pict; /* XXX: Put it back in AVCodecContext. */
+    int64_t pts;     /* pts of the current frame */
+    int64_t dts;     /* dts of the current frame */
+
+    /* private data */
+    int64_t last_pts;
+    int64_t last_dts;
+    int fetch_timestamp;
+
+#define AV_PARSER_PTS_NB 4
+    int cur_frame_start_index;
+    int64_t cur_frame_offset[AV_PARSER_PTS_NB];
+    int64_t cur_frame_pts[AV_PARSER_PTS_NB];
+    int64_t cur_frame_dts[AV_PARSER_PTS_NB];
+
+    int flags;
+#define PARSER_FLAG_COMPLETE_FRAMES           0x0001
+#define PARSER_FLAG_ONCE                      0x0002
+/// Set if the parser has a valid file offset
+#define PARSER_FLAG_FETCHED_OFFSET            0x0004
+#define PARSER_FLAG_USE_CODEC_TS              0x1000
+
+    int64_t offset;      ///< byte offset from starting packet start
+    int64_t cur_frame_end[AV_PARSER_PTS_NB];
+
+    /**
+     * Set by parser to 1 for key frames and 0 for non-key frames.
+     * It is initialized to -1, so if the parser doesn't set this flag,
+     * old-style fallback using AV_PICTURE_TYPE_I picture type as key frames
+     * will be used.
+     */
+    int key_frame;
+
+    // Timestamp generation support:
+    /**
+     * Synchronization point for start of timestamp generation.
+     *
+     * Set to >0 for sync point, 0 for no sync point and <0 for undefined
+     * (default).
+     *
+     * For example, this corresponds to presence of H.264 buffering period
+     * SEI message.
+     */
+    int dts_sync_point;
+
+    /**
+     * Offset of the current timestamp against last timestamp sync point in
+     * units of AVCodecContext.time_base.
+     *
+     * Set to INT_MIN when dts_sync_point unused. Otherwise, it must
+     * contain a valid timestamp offset.
+     *
+     * Note that the timestamp of sync point has usually a nonzero
+     * dts_ref_dts_delta, which refers to the previous sync point. Offset of
+     * the next frame after timestamp sync point will be usually 1.
+     *
+     * For example, this corresponds to H.264 cpb_removal_delay.
+     */
+    int dts_ref_dts_delta;
+
+    /**
+     * Presentation delay of current frame in units of AVCodecContext.time_base.
+     *
+     * Set to INT_MIN when dts_sync_point unused. Otherwise, it must
+     * contain valid non-negative timestamp delta (presentation time of a frame
+     * must not lie in the past).
+     *
+     * This delay represents the difference between decoding and presentation
+     * time of the frame.
+     *
+     * For example, this corresponds to H.264 dpb_output_delay.
+     */
+    int pts_dts_delta;
+
+    /**
+     * Position of the packet in file.
+     *
+     * Analogous to cur_frame_pts/dts
+     */
+    int64_t cur_frame_pos[AV_PARSER_PTS_NB];
+
+    /**
+     * Byte position of currently parsed frame in stream.
+     */
+    int64_t pos;
+
+    /**
+     * Previous frame byte position.
+     */
+    int64_t last_pos;
+
+    /**
+     * Duration of the current frame.
+     * For audio, this is in units of 1 / AVCodecContext.sample_rate.
+     * For all other types, this is in units of AVCodecContext.time_base.
+     */
+    int duration;
+
+    enum AVFieldOrder field_order;
+
+    /**
+     * Indicate whether a picture is coded as a frame, top field or bottom field.
+     *
+     * For example, H.264 field_pic_flag equal to 0 corresponds to
+     * AV_PICTURE_STRUCTURE_FRAME. An H.264 picture with field_pic_flag
+     * equal to 1 and bottom_field_flag equal to 0 corresponds to
+     * AV_PICTURE_STRUCTURE_TOP_FIELD.
+     */
+    enum AVPictureStructure picture_structure;
+
+    /**
+     * Picture number incremented in presentation or output order.
+     * This field may be reinitialized at the first picture of a new sequence.
+     *
+     * For example, this corresponds to H.264 PicOrderCnt.
+     */
+    int output_picture_number;
+
+    /**
+     * Dimensions of the decoded video intended for presentation.
+     */
+    int width;
+    int height;
+
+    /**
+     * Dimensions of the coded video.
+     */
+    int coded_width;
+    int coded_height;
+
+    /**
+     * The format of the coded data, corresponds to enum AVPixelFormat for video
+     * and for enum AVSampleFormat for audio.
+     *
+     * Note that a decoder can have considerable freedom in how exactly it
+     * decodes the data, so the format reported here might be different from the
+     * one returned by a decoder.
+     */
+    int format;
+} AVCodecParserContext;
+
+typedef struct AVCodecParser {
+    int codec_ids[7]; /* several codec IDs are permitted */
+    int priv_data_size;
+    int (*parser_init)(AVCodecParserContext *s);
+    /* This callback never returns an error, a negative value means that
+     * the frame start was in a previous packet. */
+    int (*parser_parse)(AVCodecParserContext *s,
+                        AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size);
+    void (*parser_close)(AVCodecParserContext *s);
+    int (*split)(AVCodecContext *avctx, const uint8_t *buf, int buf_size);
+} AVCodecParser;
+
+/**
+ * Iterate over all registered codec parsers.
+ *
+ * @param opaque a pointer where libavcodec will store the iteration state. Must
+ *               point to NULL to start the iteration.
+ *
+ * @return the next registered codec parser or NULL when the iteration is
+ *         finished
+ */
+const AVCodecParser *av_parser_iterate(void **opaque);
+
+AVCodecParserContext *av_parser_init(int codec_id);
+
+/**
+ * Parse a packet.
+ *
+ * @param s             parser context.
+ * @param avctx         codec context.
+ * @param poutbuf       set to pointer to parsed buffer or NULL if not yet finished.
+ * @param poutbuf_size  set to size of parsed buffer or zero if not yet finished.
+ * @param buf           input buffer.
+ * @param buf_size      buffer size in bytes without the padding. I.e. the full buffer
+                        size is assumed to be buf_size + AV_INPUT_BUFFER_PADDING_SIZE.
+                        To signal EOF, this should be 0 (so that the last frame
+                        can be output).
+ * @param pts           input presentation timestamp.
+ * @param dts           input decoding timestamp.
+ * @param pos           input byte position in stream.
+ * @return the number of bytes of the input bitstream used.
+ *
+ * Example:
+ * @code
+ *   while(in_len){
+ *       len = av_parser_parse2(myparser, AVCodecContext, &data, &size,
+ *                                        in_data, in_len,
+ *                                        pts, dts, pos);
+ *       in_data += len;
+ *       in_len  -= len;
+ *
+ *       if(size)
+ *          decode_frame(data, size);
+ *   }
+ * @endcode
+ */
+int av_parser_parse2(AVCodecParserContext *s,
+                     AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size,
+                     int64_t pts, int64_t dts,
+                     int64_t pos);
+
+void av_parser_close(AVCodecParserContext *s);
+
+/**
+ * @}
+ * @}
+ */
+
+/**
+ * @addtogroup lavc_encoding
+ * @{
+ */
+
+int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                            const AVSubtitle *sub);
+
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup lavc_misc Utility functions
+ * @ingroup libavc
+ *
+ * Miscellaneous utility functions related to both encoding and decoding
+ * (or neither).
+ * @{
+ */
+
+/**
+ * @defgroup lavc_misc_pixfmt Pixel formats
+ *
+ * Functions for working with pixel formats.
+ * @{
+ */
+
+/**
+ * Return a value representing the fourCC code associated to the
+ * pixel format pix_fmt, or 0 if no associated fourCC code can be
+ * found.
+ */
+unsigned int avcodec_pix_fmt_to_codec_tag(enum AVPixelFormat pix_fmt);
+
+/**
+ * Find the best pixel format to convert to given a certain source pixel
+ * format.  When converting from one pixel format to another, information loss
+ * may occur.  For example, when converting from RGB24 to GRAY, the color
+ * information will be lost. Similarly, other losses occur when converting from
+ * some formats to other formats. avcodec_find_best_pix_fmt_of_2() searches which of
+ * the given pixel formats should be used to suffer the least amount of loss.
+ * The pixel formats from which it chooses one, are determined by the
+ * pix_fmt_list parameter.
+ *
+ *
+ * @param[in] pix_fmt_list AV_PIX_FMT_NONE terminated array of pixel formats to choose from
+ * @param[in] src_pix_fmt source pixel format
+ * @param[in] has_alpha Whether the source pixel format alpha channel is used.
+ * @param[out] loss_ptr Combination of flags informing you what kind of losses will occur.
+ * @return The best pixel format to convert to or -1 if none was found.
+ */
+enum AVPixelFormat avcodec_find_best_pix_fmt_of_list(const enum AVPixelFormat *pix_fmt_list,
+                                            enum AVPixelFormat src_pix_fmt,
+                                            int has_alpha, int *loss_ptr);
+
+enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum AVPixelFormat * fmt);
+
+/**
+ * @}
+ */
+
+void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode);
+
+int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size);
+int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2, int, int),void *arg, int *ret, int count);
+//FIXME func typedef
+
+/**
+ * Fill AVFrame audio data and linesize pointers.
+ *
+ * The buffer buf must be a preallocated buffer with a size big enough
+ * to contain the specified samples amount. The filled AVFrame data
+ * pointers will point to this buffer.
+ *
+ * AVFrame extended_data channel pointers are allocated if necessary for
+ * planar audio.
+ *
+ * @param frame       the AVFrame
+ *                    frame->nb_samples must be set prior to calling the
+ *                    function. This function fills in frame->data,
+ *                    frame->extended_data, frame->linesize[0].
+ * @param nb_channels channel count
+ * @param sample_fmt  sample format
+ * @param buf         buffer to use for frame data
+ * @param buf_size    size of buffer
+ * @param align       plane size sample alignment (0 = default)
+ * @return            >=0 on success, negative error code on failure
+ * @todo return the size in bytes required to store the samples in
+ * case of success, at the next libavutil bump
+ */
+int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels,
+                             enum AVSampleFormat sample_fmt, const uint8_t *buf,
+                             int buf_size, int align);
+
+/**
+ * Reset the internal codec state / flush internal buffers. Should be called
+ * e.g. when seeking or when switching to a different stream.
+ *
+ * @note for decoders, this function just releases any references the decoder
+ * might keep internally, but the caller's references remain valid.
+ *
+ * @note for encoders, this function will only do something if the encoder
+ * declares support for AV_CODEC_CAP_ENCODER_FLUSH. When called, the encoder
+ * will drain any remaining packets, and can then be re-used for a different
+ * stream (as opposed to sending a null frame which will leave the encoder
+ * in a permanent EOF state after draining). This can be desirable if the
+ * cost of tearing down and replacing the encoder instance is high.
+ */
+void avcodec_flush_buffers(AVCodecContext *avctx);
+
+/**
+ * Return audio frame duration.
+ *
+ * @param avctx        codec context
+ * @param frame_bytes  size of the frame, or 0 if unknown
+ * @return             frame duration, in samples, if known. 0 if not able to
+ *                     determine.
+ */
+int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes);
+
+/* memory */
+
+/**
+ * Same behaviour av_fast_malloc but the buffer has additional
+ * AV_INPUT_BUFFER_PADDING_SIZE at the end which will always be 0.
+ *
+ * In addition the whole buffer will initially and after resizes
+ * be 0-initialized so that no uninitialized data will ever appear.
+ */
+void av_fast_padded_malloc(void *ptr, unsigned int *size, size_t min_size);
+
+/**
+ * Same behaviour av_fast_padded_malloc except that buffer will always
+ * be 0-initialized after call.
+ */
+void av_fast_padded_mallocz(void *ptr, unsigned int *size, size_t min_size);
+
+/**
+ * @return a positive value if s is open (i.e. avcodec_open2() was called on it
+ * with no corresponding avcodec_close()), 0 otherwise.
+ */
+int avcodec_is_open(AVCodecContext *s);
+
+/**
+ * @}
+ */
+
+#endif /* AVCODEC_AVCODEC_H */
diff --git a/media/ffvpx/libavcodec/avcodec.symbols b/media/ffvpx/libavcodec/avcodec.symbols
new file mode 100644
index 0000000000..b15862fa50
--- /dev/null
+++ b/media/ffvpx/libavcodec/avcodec.symbols
@@ -0,0 +1,81 @@
+av_codec_ffversion
+av_codec_is_decoder
+av_codec_is_encoder
+av_codec_iterate
+av_fast_padded_malloc
+av_fast_padded_mallocz
+av_get_audio_frame_duration
+av_get_bits_per_sample
+av_get_exact_bits_per_sample
+av_get_pcm_codec
+av_get_profile_name
+av_grow_packet
+av_init_packet
+av_new_packet
+av_packet_copy_props
+av_packet_free_side_data
+av_packet_from_data
+av_packet_get_side_data
+av_packet_move_ref
+av_packet_new_side_data
+av_packet_pack_dictionary
+av_packet_ref
+av_packet_rescale_ts
+av_packet_shrink_side_data
+av_packet_side_data_name
+av_packet_unpack_dictionary
+av_packet_unref
+av_parser_close
+av_parser_init
+av_parser_parse2
+#ifdef MOZ_LIBAV_FFT
+av_rdft_calc
+av_rdft_end
+av_rdft_init
+#endif
+av_shrink_packet
+av_vorbis_parse_frame
+av_vorbis_parse_frame_flags
+av_vorbis_parse_free
+av_vorbis_parse_init
+av_vorbis_parse_reset
+av_xiphlacing
+avcodec_align_dimensions
+avcodec_align_dimensions2
+avcodec_alloc_context3
+avcodec_chroma_pos_to_enum
+avcodec_close
+avcodec_configuration
+avcodec_decode_subtitle2
+avcodec_default_execute
+avcodec_default_execute2
+avcodec_default_get_buffer2
+avcodec_default_get_format
+avcodec_descriptor_get
+avcodec_descriptor_get_by_name
+avcodec_descriptor_next
+avcodec_enum_to_chroma_pos
+avcodec_fill_audio_frame
+avcodec_find_decoder
+avcodec_find_decoder_by_name
+avcodec_find_encoder
+avcodec_find_encoder_by_name
+avcodec_flush_buffers
+avcodec_free_context
+avcodec_get_class
+avcodec_get_hw_config
+avcodec_get_name
+avcodec_get_subtitle_rect_class
+avcodec_get_type
+avcodec_is_open
+avcodec_license
+avcodec_open2
+avcodec_string
+avcodec_version
+avsubtitle_free
+avcodec_send_packet
+avcodec_receive_frame
+ff_init_vlc_from_lengths
+ff_init_vlc_sparse
+ff_mpa_freq_tab
+ff_mpa_bitrate_tab
diff --git a/media/ffvpx/libavcodec/avdct.c b/media/ffvpx/libavcodec/avdct.c
new file mode 100644
index 0000000000..e8fa41f73b
--- /dev/null
+++ b/media/ffvpx/libavcodec/avdct.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2014 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "idctdsp.h"
+#include "fdctdsp.h"
+#include "pixblockdsp.h"
+#include "avdct.h"
+
+#define OFFSET(x) offsetof(AVDCT,x)
+#define DEFAULT 0 //should be NAN but it does not work as it is not a constant in glibc as required by ANSI/ISO C
+//these names are too long to be readable
+#define V AV_OPT_FLAG_VIDEO_PARAM
+#define A AV_OPT_FLAG_AUDIO_PARAM
+#define E AV_OPT_FLAG_ENCODING_PARAM
+#define D AV_OPT_FLAG_DECODING_PARAM
+
+static const AVOption avdct_options[] = {
+{"dct", "DCT algorithm", OFFSET(dct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E, "dct"},
+{"auto", "autoselect a good one", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_AUTO }, INT_MIN, INT_MAX, V|E, "dct"},
+{"fastint", "fast integer (experimental / for debugging)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FASTINT }, INT_MIN, INT_MAX, V|E, "dct"},
+{"int", "accurate integer", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_INT }, INT_MIN, INT_MAX, V|E, "dct"},
+{"mmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_MMX }, INT_MIN, INT_MAX, V|E, "dct"},
+{"altivec", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_ALTIVEC }, INT_MIN, INT_MAX, V|E, "dct"},
+{"faan", "floating point AAN DCT (experimental / for debugging)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FAAN }, INT_MIN, INT_MAX, V|E, "dct"},
+
+{"idct", "select IDCT implementation", OFFSET(idct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E|D, "idct"},
+{"auto", "autoselect a good one", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_AUTO }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"int", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_INT }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simple", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLE }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplemmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEMMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"arm", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"altivec", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ALTIVEC }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplearm", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplearmv5te", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV5TE }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplearmv6", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV6 }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simpleneon", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLENEON }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"xvid", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"xvidmmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"faani", "floating point AAN IDCT (experimental / for debugging)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_FAAN }, INT_MIN, INT_MAX, V|D|E, "idct"},
+{"simpleauto", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEAUTO }, INT_MIN, INT_MAX, V|E|D, "idct"},
+
+{"bits_per_sample", "", OFFSET(bits_per_sample), AV_OPT_TYPE_INT, {.i64 = 8 }, 0, 14, 0,},
+{NULL},
+};
+
+static const AVClass avdct_class = {
+    .class_name              = "AVDCT",
+    .option                  = avdct_options,
+    .version                 = LIBAVUTIL_VERSION_INT,
+};
+
+const AVClass *avcodec_dct_get_class(void)
+{
+    return &avdct_class;
+}
+
+AVDCT *avcodec_dct_alloc(void)
+{
+    AVDCT *dsp = av_mallocz(sizeof(AVDCT));
+
+    if (!dsp)
+        return NULL;
+
+    dsp->av_class = &avdct_class;
+    av_opt_set_defaults(dsp);
+
+    return dsp;
+}
+
+int avcodec_dct_init(AVDCT *dsp)
+{
+    AVCodecContext *avctx = avcodec_alloc_context3(NULL);
+
+    if (!avctx)
+        return AVERROR(ENOMEM);
+
+    avctx->idct_algo = dsp->idct_algo;
+    avctx->dct_algo  = dsp->dct_algo;
+    avctx->bits_per_raw_sample = dsp->bits_per_sample;
+
+#define COPY(src, name) memcpy(&dsp->name, &src.name, sizeof(dsp->name))
+
+#if CONFIG_IDCTDSP
+    {
+        IDCTDSPContext idsp = {0};
+        ff_idctdsp_init(&idsp, avctx);
+        COPY(idsp, idct);
+        COPY(idsp, idct_permutation);
+    }
+#endif
+
+#if CONFIG_FDCTDSP
+    {
+        FDCTDSPContext fdsp;
+        ff_fdctdsp_init(&fdsp, avctx);
+        COPY(fdsp, fdct);
+    }
+#endif
+
+#if CONFIG_PIXBLOCKDSP
+    {
+        PixblockDSPContext pdsp;
+        ff_pixblockdsp_init(&pdsp, avctx);
+        COPY(pdsp, get_pixels);
+        COPY(pdsp, get_pixels_unaligned);
+    }
+#endif
+
+    avcodec_free_context(&avctx);
+
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/avdct.h b/media/ffvpx/libavcodec/avdct.h
new file mode 100644
index 0000000000..6411fab6f6
--- /dev/null
+++ b/media/ffvpx/libavcodec/avdct.h
@@ -0,0 +1,88 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AVDCT_H
+#define AVCODEC_AVDCT_H
+
+#include "libavutil/opt.h"
+
+/**
+ * AVDCT context.
+ * @note function pointers can be NULL if the specific features have been
+ *       disabled at build time.
+ */
+typedef struct AVDCT {
+    const AVClass *av_class;
+
+    void (*idct)(int16_t *block /* align 16 */);
+
+    /**
+     * IDCT input permutation.
+     * Several optimized IDCTs need a permutated input (relative to the
+     * normal order of the reference IDCT).
+     * This permutation must be performed before the idct_put/add.
+     * Note, normally this can be merged with the zigzag/alternate scan<br>
+     * An example to avoid confusion:
+     * - (->decode coeffs -> zigzag reorder -> dequant -> reference IDCT -> ...)
+     * - (x -> reference DCT -> reference IDCT -> x)
+     * - (x -> reference DCT -> simple_mmx_perm = idct_permutation
+     *    -> simple_idct_mmx -> x)
+     * - (-> decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant
+     *    -> simple_idct_mmx -> ...)
+     */
+    uint8_t idct_permutation[64];
+
+    void (*fdct)(int16_t *block /* align 16 */);
+
+
+    /**
+     * DCT algorithm.
+     * must use AVOptions to set this field.
+     */
+    int dct_algo;
+
+    /**
+     * IDCT algorithm.
+     * must use AVOptions to set this field.
+     */
+    int idct_algo;
+
+    void (*get_pixels)(int16_t *block /* align 16 */,
+                       const uint8_t *pixels /* align 8 */,
+                       ptrdiff_t line_size);
+
+    int bits_per_sample;
+
+    void (*get_pixels_unaligned)(int16_t *block /* align 16 */,
+                       const uint8_t *pixels,
+                       ptrdiff_t line_size);
+} AVDCT;
+
+/**
+ * Allocates a AVDCT context.
+ * This needs to be initialized with avcodec_dct_init() after optionally
+ * configuring it with AVOptions.
+ *
+ * To free it use av_free()
+ */
+AVDCT *avcodec_dct_alloc(void);
+int avcodec_dct_init(AVDCT *);
+
+const AVClass *avcodec_dct_get_class(void);
+
+#endif /* AVCODEC_AVDCT_H */
diff --git a/media/ffvpx/libavcodec/avfft.c b/media/ffvpx/libavcodec/avfft.c
new file mode 100644
index 0000000000..2200f37708
--- /dev/null
+++ b/media/ffvpx/libavcodec/avfft.c
@@ -0,0 +1,145 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/mem.h"
+#include "avfft.h"
+#include "fft.h"
+#include "rdft.h"
+#include "dct.h"
+
+/* FFT */
+
+FFTContext *av_fft_init(int nbits, int inverse)
+{
+    FFTContext *s = av_mallocz(sizeof(*s));
+
+    if (s && ff_fft_init(s, nbits, inverse))
+        av_freep(&s);
+
+    return s;
+}
+
+void av_fft_permute(FFTContext *s, FFTComplex *z)
+{
+    s->fft_permute(s, z);
+}
+
+void av_fft_calc(FFTContext *s, FFTComplex *z)
+{
+    s->fft_calc(s, z);
+}
+
+av_cold void av_fft_end(FFTContext *s)
+{
+    if (s) {
+        ff_fft_end(s);
+        av_free(s);
+    }
+}
+
+#if CONFIG_MDCT
+
+FFTContext *av_mdct_init(int nbits, int inverse, double scale)
+{
+    FFTContext *s = av_malloc(sizeof(*s));
+
+    if (s && ff_mdct_init(s, nbits, inverse, scale))
+        av_freep(&s);
+
+    return s;
+}
+
+void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_calc(s, output, input);
+}
+
+void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_half(s, output, input);
+}
+
+void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->mdct_calc(s, output, input);
+}
+
+av_cold void av_mdct_end(FFTContext *s)
+{
+    if (s) {
+        ff_mdct_end(s);
+        av_free(s);
+    }
+}
+
+#endif /* CONFIG_MDCT */
+
+#if CONFIG_RDFT
+
+RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
+{
+    RDFTContext *s = av_malloc(sizeof(*s));
+
+    if (s && ff_rdft_init(s, nbits, trans))
+        av_freep(&s);
+
+    return s;
+}
+
+void av_rdft_calc(RDFTContext *s, FFTSample *data)
+{
+    s->rdft_calc(s, data);
+}
+
+av_cold void av_rdft_end(RDFTContext *s)
+{
+    if (s) {
+        ff_rdft_end(s);
+        av_free(s);
+    }
+}
+
+#endif /* CONFIG_RDFT */
+
+#if CONFIG_DCT
+
+DCTContext *av_dct_init(int nbits, enum DCTTransformType inverse)
+{
+    DCTContext *s = av_malloc(sizeof(*s));
+
+    if (s && ff_dct_init(s, nbits, inverse))
+        av_freep(&s);
+
+    return s;
+}
+
+void av_dct_calc(DCTContext *s, FFTSample *data)
+{
+    s->dct_calc(s, data);
+}
+
+av_cold void av_dct_end(DCTContext *s)
+{
+    if (s) {
+        ff_dct_end(s);
+        av_free(s);
+    }
+}
+
+#endif /* CONFIG_DCT */
diff --git a/media/ffvpx/libavcodec/avfft.h b/media/ffvpx/libavcodec/avfft.h
new file mode 100644
index 0000000000..0c0f9b8d8d
--- /dev/null
+++ b/media/ffvpx/libavcodec/avfft.h
@@ -0,0 +1,118 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AVFFT_H
+#define AVCODEC_AVFFT_H
+
+/**
+ * @file
+ * @ingroup lavc_fft
+ * FFT functions
+ */
+
+/**
+ * @defgroup lavc_fft FFT functions
+ * @ingroup lavc_misc
+ *
+ * @{
+ */
+
+typedef float FFTSample;
+
+typedef struct FFTComplex {
+    FFTSample re, im;
+} FFTComplex;
+
+typedef struct FFTContext FFTContext;
+
+/**
+ * Set up a complex FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
+ */
+FFTContext *av_fft_init(int nbits, int inverse);
+
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc().
+ */
+void av_fft_permute(FFTContext *s, FFTComplex *z);
+
+/**
+ * Do a complex FFT with the parameters defined in av_fft_init(). The
+ * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ */
+void av_fft_calc(FFTContext *s, FFTComplex *z);
+
+void av_fft_end(FFTContext *s);
+
+FFTContext *av_mdct_init(int nbits, int inverse, double scale);
+void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_end(FFTContext *s);
+
+/* Real Discrete Fourier Transform */
+
+enum RDFTransformType {
+    DFT_R2C,
+    IDFT_C2R,
+    IDFT_R2C,
+    DFT_C2R,
+};
+
+typedef struct RDFTContext RDFTContext;
+
+/**
+ * Set up a real FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param trans           the type of transform
+ */
+RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans);
+void av_rdft_calc(RDFTContext *s, FFTSample *data);
+void av_rdft_end(RDFTContext *s);
+
+/* Discrete Cosine Transform */
+
+typedef struct DCTContext DCTContext;
+
+enum DCTTransformType {
+    DCT_II = 0,
+    DCT_III,
+    DCT_I,
+    DST_I,
+};
+
+/**
+ * Set up DCT.
+ *
+ * @param nbits           size of the input array:
+ *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
+ *                        (1 << nbits) + 1 for DCT-I
+ * @param type            the type of transform
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+DCTContext *av_dct_init(int nbits, enum DCTTransformType type);
+void av_dct_calc(DCTContext *s, FFTSample *data);
+void av_dct_end (DCTContext *s);
+
+/**
+ * @}
+ */
+
+#endif /* AVCODEC_AVFFT_H */
diff --git a/media/ffvpx/libavcodec/avpacket.c b/media/ffvpx/libavcodec/avpacket.c
new file mode 100644
index 0000000000..5fef65e97a
--- /dev/null
+++ b/media/ffvpx/libavcodec/avpacket.c
@@ -0,0 +1,647 @@
+/*
+ * AVPacket functions for libavcodec
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/mem.h"
+#include "libavutil/rational.h"
+
+#include "defs.h"
+#include "packet.h"
+#include "packet_internal.h"
+
+#if FF_API_INIT_PACKET
+void av_init_packet(AVPacket *pkt)
+{
+    pkt->pts                  = AV_NOPTS_VALUE;
+    pkt->dts                  = AV_NOPTS_VALUE;
+    pkt->pos                  = -1;
+    pkt->duration             = 0;
+    pkt->flags                = 0;
+    pkt->stream_index         = 0;
+    pkt->buf                  = NULL;
+    pkt->side_data            = NULL;
+    pkt->side_data_elems      = 0;
+    pkt->opaque               = NULL;
+    pkt->opaque_ref           = NULL;
+    pkt->time_base            = av_make_q(0, 1);
+}
+#endif
+
+static void get_packet_defaults(AVPacket *pkt)
+{
+    memset(pkt, 0, sizeof(*pkt));
+
+    pkt->pts             = AV_NOPTS_VALUE;
+    pkt->dts             = AV_NOPTS_VALUE;
+    pkt->pos             = -1;
+    pkt->time_base       = av_make_q(0, 1);
+}
+
+AVPacket *av_packet_alloc(void)
+{
+    AVPacket *pkt = av_malloc(sizeof(AVPacket));
+    if (!pkt)
+        return pkt;
+
+    get_packet_defaults(pkt);
+
+    return pkt;
+}
+
+void av_packet_free(AVPacket **pkt)
+{
+    if (!pkt || !*pkt)
+        return;
+
+    av_packet_unref(*pkt);
+    av_freep(pkt);
+}
+
+static int packet_alloc(AVBufferRef **buf, int size)
+{
+    int ret;
+    if (size < 0 || size >= INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
+        return AVERROR(EINVAL);
+
+    ret = av_buffer_realloc(buf, size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (ret < 0)
+        return ret;
+
+    memset((*buf)->data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    return 0;
+}
+
+int av_new_packet(AVPacket *pkt, int size)
+{
+    AVBufferRef *buf = NULL;
+    int ret = packet_alloc(&buf, size);
+    if (ret < 0)
+        return ret;
+
+    get_packet_defaults(pkt);
+    pkt->buf      = buf;
+    pkt->data     = buf->data;
+    pkt->size     = size;
+
+    return 0;
+}
+
+void av_shrink_packet(AVPacket *pkt, int size)
+{
+    if (pkt->size <= size)
+        return;
+    pkt->size = size;
+    memset(pkt->data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+}
+
+int av_grow_packet(AVPacket *pkt, int grow_by)
+{
+    int new_size;
+    av_assert0((unsigned)pkt->size <= INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE);
+    if ((unsigned)grow_by >
+        INT_MAX - (pkt->size + AV_INPUT_BUFFER_PADDING_SIZE))
+        return AVERROR(ENOMEM);
+
+    new_size = pkt->size + grow_by + AV_INPUT_BUFFER_PADDING_SIZE;
+    if (pkt->buf) {
+        size_t data_offset;
+        uint8_t *old_data = pkt->data;
+        if (pkt->data == NULL) {
+            data_offset = 0;
+            pkt->data = pkt->buf->data;
+        } else {
+            data_offset = pkt->data - pkt->buf->data;
+            if (data_offset > INT_MAX - new_size)
+                return AVERROR(ENOMEM);
+        }
+
+        if (new_size + data_offset > pkt->buf->size ||
+            !av_buffer_is_writable(pkt->buf)) {
+            int ret;
+
+            // allocate slightly more than requested to avoid excessive
+            // reallocations
+            if (new_size + data_offset < INT_MAX - new_size/16)
+                new_size += new_size/16;
+
+            ret = av_buffer_realloc(&pkt->buf, new_size + data_offset);
+            if (ret < 0) {
+                pkt->data = old_data;
+                return ret;
+            }
+            pkt->data = pkt->buf->data + data_offset;
+        }
+    } else {
+        pkt->buf = av_buffer_alloc(new_size);
+        if (!pkt->buf)
+            return AVERROR(ENOMEM);
+        if (pkt->size > 0)
+            memcpy(pkt->buf->data, pkt->data, pkt->size);
+        pkt->data = pkt->buf->data;
+    }
+    pkt->size += grow_by;
+    memset(pkt->data + pkt->size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    return 0;
+}
+
+int av_packet_from_data(AVPacket *pkt, uint8_t *data, int size)
+{
+    if (size >= INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
+        return AVERROR(EINVAL);
+
+    pkt->buf = av_buffer_create(data, size + AV_INPUT_BUFFER_PADDING_SIZE,
+                                av_buffer_default_free, NULL, 0);
+    if (!pkt->buf)
+        return AVERROR(ENOMEM);
+
+    pkt->data = data;
+    pkt->size = size;
+
+    return 0;
+}
+
+void av_packet_free_side_data(AVPacket *pkt)
+{
+    int i;
+    for (i = 0; i < pkt->side_data_elems; i++)
+        av_freep(&pkt->side_data[i].data);
+    av_freep(&pkt->side_data);
+    pkt->side_data_elems = 0;
+}
+
+int av_packet_add_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                            uint8_t *data, size_t size)
+{
+    AVPacketSideData *tmp;
+    int i, elems = pkt->side_data_elems;
+
+    for (i = 0; i < elems; i++) {
+        AVPacketSideData *sd = &pkt->side_data[i];
+
+        if (sd->type == type) {
+            av_free(sd->data);
+            sd->data = data;
+            sd->size = size;
+            return 0;
+        }
+    }
+
+    if ((unsigned)elems + 1 > AV_PKT_DATA_NB)
+        return AVERROR(ERANGE);
+
+    tmp = av_realloc(pkt->side_data, (elems + 1) * sizeof(*tmp));
+    if (!tmp)
+        return AVERROR(ENOMEM);
+
+    pkt->side_data = tmp;
+    pkt->side_data[elems].data = data;
+    pkt->side_data[elems].size = size;
+    pkt->side_data[elems].type = type;
+    pkt->side_data_elems++;
+
+    return 0;
+}
+
+
+uint8_t *av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                                 size_t size)
+{
+    int ret;
+    uint8_t *data;
+
+    if (size > SIZE_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
+        return NULL;
+    data = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!data)
+        return NULL;
+
+    ret = av_packet_add_side_data(pkt, type, data, size);
+    if (ret < 0) {
+        av_freep(&data);
+        return NULL;
+    }
+
+    return data;
+}
+
+uint8_t *av_packet_get_side_data(const AVPacket *pkt, enum AVPacketSideDataType type,
+                                 size_t *size)
+{
+    int i;
+
+    for (i = 0; i < pkt->side_data_elems; i++) {
+        if (pkt->side_data[i].type == type) {
+            if (size)
+                *size = pkt->side_data[i].size;
+            return pkt->side_data[i].data;
+        }
+    }
+    if (size)
+        *size = 0;
+    return NULL;
+}
+
+const char *av_packet_side_data_name(enum AVPacketSideDataType type)
+{
+    switch(type) {
+    case AV_PKT_DATA_PALETTE:                    return "Palette";
+    case AV_PKT_DATA_NEW_EXTRADATA:              return "New Extradata";
+    case AV_PKT_DATA_PARAM_CHANGE:               return "Param Change";
+    case AV_PKT_DATA_H263_MB_INFO:               return "H263 MB Info";
+    case AV_PKT_DATA_REPLAYGAIN:                 return "Replay Gain";
+    case AV_PKT_DATA_DISPLAYMATRIX:              return "Display Matrix";
+    case AV_PKT_DATA_STEREO3D:                   return "Stereo 3D";
+    case AV_PKT_DATA_AUDIO_SERVICE_TYPE:         return "Audio Service Type";
+    case AV_PKT_DATA_QUALITY_STATS:              return "Quality stats";
+    case AV_PKT_DATA_FALLBACK_TRACK:             return "Fallback track";
+    case AV_PKT_DATA_CPB_PROPERTIES:             return "CPB properties";
+    case AV_PKT_DATA_SKIP_SAMPLES:               return "Skip Samples";
+    case AV_PKT_DATA_JP_DUALMONO:                return "JP Dual Mono";
+    case AV_PKT_DATA_STRINGS_METADATA:           return "Strings Metadata";
+    case AV_PKT_DATA_SUBTITLE_POSITION:          return "Subtitle Position";
+    case AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL:   return "Matroska BlockAdditional";
+    case AV_PKT_DATA_WEBVTT_IDENTIFIER:          return "WebVTT ID";
+    case AV_PKT_DATA_WEBVTT_SETTINGS:            return "WebVTT Settings";
+    case AV_PKT_DATA_METADATA_UPDATE:            return "Metadata Update";
+    case AV_PKT_DATA_MPEGTS_STREAM_ID:           return "MPEGTS Stream ID";
+    case AV_PKT_DATA_MASTERING_DISPLAY_METADATA: return "Mastering display metadata";
+    case AV_PKT_DATA_CONTENT_LIGHT_LEVEL:        return "Content light level metadata";
+    case AV_PKT_DATA_SPHERICAL:                  return "Spherical Mapping";
+    case AV_PKT_DATA_A53_CC:                     return "A53 Closed Captions";
+    case AV_PKT_DATA_ENCRYPTION_INIT_INFO:       return "Encryption initialization data";
+    case AV_PKT_DATA_ENCRYPTION_INFO:            return "Encryption info";
+    case AV_PKT_DATA_AFD:                        return "Active Format Description data";
+    case AV_PKT_DATA_PRFT:                       return "Producer Reference Time";
+    case AV_PKT_DATA_ICC_PROFILE:                return "ICC Profile";
+    case AV_PKT_DATA_DOVI_CONF:                  return "DOVI configuration record";
+    case AV_PKT_DATA_S12M_TIMECODE:              return "SMPTE ST 12-1:2014 timecode";
+    case AV_PKT_DATA_DYNAMIC_HDR10_PLUS:         return "HDR10+ Dynamic Metadata (SMPTE 2094-40)";
+    }
+    return NULL;
+}
+
+uint8_t *av_packet_pack_dictionary(AVDictionary *dict, size_t *size)
+{
+    uint8_t *data = NULL;
+    *size = 0;
+
+    if (!dict)
+        return NULL;
+
+    for (int pass = 0; pass < 2; pass++) {
+        const AVDictionaryEntry *t = NULL;
+        size_t total_length = 0;
+
+        while ((t = av_dict_iterate(dict, t))) {
+            for (int i = 0; i < 2; i++) {
+                const char  *str = i ? t->value : t->key;
+                const size_t len = strlen(str) + 1;
+
+                if (pass)
+                    memcpy(data + total_length, str, len);
+                else if (len > SIZE_MAX - total_length)
+                    return NULL;
+                total_length += len;
+            }
+        }
+        if (pass)
+            break;
+        data = av_malloc(total_length);
+        if (!data)
+            return NULL;
+        *size = total_length;
+    }
+
+    return data;
+}
+
+int av_packet_unpack_dictionary(const uint8_t *data, size_t size,
+                                AVDictionary **dict)
+{
+    const uint8_t *end;
+    int ret;
+
+    if (!dict || !data || !size)
+        return 0;
+    end = data + size;
+    if (size && end[-1])
+        return AVERROR_INVALIDDATA;
+    while (data < end) {
+        const uint8_t *key = data;
+        const uint8_t *val = data + strlen(key) + 1;
+
+        if (val >= end || !*key)
+            return AVERROR_INVALIDDATA;
+
+        ret = av_dict_set(dict, key, val, 0);
+        if (ret < 0)
+            return ret;
+        data = val + strlen(val) + 1;
+    }
+
+    return 0;
+}
+
+int av_packet_shrink_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                               size_t size)
+{
+    int i;
+
+    for (i = 0; i < pkt->side_data_elems; i++) {
+        if (pkt->side_data[i].type == type) {
+            if (size > pkt->side_data[i].size)
+                return AVERROR(ENOMEM);
+            pkt->side_data[i].size = size;
+            return 0;
+        }
+    }
+    return AVERROR(ENOENT);
+}
+
+int av_packet_copy_props(AVPacket *dst, const AVPacket *src)
+{
+    int i, ret;
+
+    dst->pts                  = src->pts;
+    dst->dts                  = src->dts;
+    dst->pos                  = src->pos;
+    dst->duration             = src->duration;
+    dst->flags                = src->flags;
+    dst->stream_index         = src->stream_index;
+    dst->opaque               = src->opaque;
+    dst->time_base            = src->time_base;
+    dst->opaque_ref           = NULL;
+    dst->side_data            = NULL;
+    dst->side_data_elems      = 0;
+
+    ret = av_buffer_replace(&dst->opaque_ref, src->opaque_ref);
+    if (ret < 0)
+        return ret;
+
+    for (i = 0; i < src->side_data_elems; i++) {
+        enum AVPacketSideDataType type = src->side_data[i].type;
+        size_t size = src->side_data[i].size;
+        uint8_t *src_data = src->side_data[i].data;
+        uint8_t *dst_data = av_packet_new_side_data(dst, type, size);
+
+        if (!dst_data) {
+            av_buffer_unref(&dst->opaque_ref);
+            av_packet_free_side_data(dst);
+            return AVERROR(ENOMEM);
+        }
+        memcpy(dst_data, src_data, size);
+    }
+
+    return 0;
+}
+
+void av_packet_unref(AVPacket *pkt)
+{
+    av_packet_free_side_data(pkt);
+    av_buffer_unref(&pkt->opaque_ref);
+    av_buffer_unref(&pkt->buf);
+    get_packet_defaults(pkt);
+}
+
+int av_packet_ref(AVPacket *dst, const AVPacket *src)
+{
+    int ret;
+
+    dst->buf = NULL;
+
+    ret = av_packet_copy_props(dst, src);
+    if (ret < 0)
+        goto fail;
+
+    if (!src->buf) {
+        ret = packet_alloc(&dst->buf, src->size);
+        if (ret < 0)
+            goto fail;
+        av_assert1(!src->size || src->data);
+        if (src->size)
+            memcpy(dst->buf->data, src->data, src->size);
+
+        dst->data = dst->buf->data;
+    } else {
+        dst->buf = av_buffer_ref(src->buf);
+        if (!dst->buf) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        dst->data = src->data;
+    }
+
+    dst->size = src->size;
+
+    return 0;
+fail:
+    av_packet_unref(dst);
+    return ret;
+}
+
+AVPacket *av_packet_clone(const AVPacket *src)
+{
+    AVPacket *ret = av_packet_alloc();
+
+    if (!ret)
+        return ret;
+
+    if (av_packet_ref(ret, src))
+        av_packet_free(&ret);
+
+    return ret;
+}
+
+void av_packet_move_ref(AVPacket *dst, AVPacket *src)
+{
+    *dst = *src;
+    get_packet_defaults(src);
+}
+
+int av_packet_make_refcounted(AVPacket *pkt)
+{
+    int ret;
+
+    if (pkt->buf)
+        return 0;
+
+    ret = packet_alloc(&pkt->buf, pkt->size);
+    if (ret < 0)
+        return ret;
+    av_assert1(!pkt->size || pkt->data);
+    if (pkt->size)
+        memcpy(pkt->buf->data, pkt->data, pkt->size);
+
+    pkt->data = pkt->buf->data;
+
+    return 0;
+}
+
+int av_packet_make_writable(AVPacket *pkt)
+{
+    AVBufferRef *buf = NULL;
+    int ret;
+
+    if (pkt->buf && av_buffer_is_writable(pkt->buf))
+        return 0;
+
+    ret = packet_alloc(&buf, pkt->size);
+    if (ret < 0)
+        return ret;
+    av_assert1(!pkt->size || pkt->data);
+    if (pkt->size)
+        memcpy(buf->data, pkt->data, pkt->size);
+
+    av_buffer_unref(&pkt->buf);
+    pkt->buf  = buf;
+    pkt->data = buf->data;
+
+    return 0;
+}
+
+void av_packet_rescale_ts(AVPacket *pkt, AVRational src_tb, AVRational dst_tb)
+{
+    if (pkt->pts != AV_NOPTS_VALUE)
+        pkt->pts = av_rescale_q(pkt->pts, src_tb, dst_tb);
+    if (pkt->dts != AV_NOPTS_VALUE)
+        pkt->dts = av_rescale_q(pkt->dts, src_tb, dst_tb);
+    if (pkt->duration > 0)
+        pkt->duration = av_rescale_q(pkt->duration, src_tb, dst_tb);
+}
+
+int avpriv_packet_list_put(PacketList *packet_buffer,
+                           AVPacket      *pkt,
+                           int (*copy)(AVPacket *dst, const AVPacket *src),
+                           int flags)
+{
+    PacketListEntry *pktl = av_malloc(sizeof(*pktl));
+    int ret;
+
+    if (!pktl)
+        return AVERROR(ENOMEM);
+
+    if (copy) {
+        get_packet_defaults(&pktl->pkt);
+        ret = copy(&pktl->pkt, pkt);
+        if (ret < 0) {
+            av_free(pktl);
+            return ret;
+        }
+    } else {
+        ret = av_packet_make_refcounted(pkt);
+        if (ret < 0) {
+            av_free(pktl);
+            return ret;
+        }
+        av_packet_move_ref(&pktl->pkt, pkt);
+    }
+
+    pktl->next = NULL;
+
+    if (packet_buffer->head)
+        packet_buffer->tail->next = pktl;
+    else
+        packet_buffer->head = pktl;
+
+    /* Add the packet in the buffered packet list. */
+    packet_buffer->tail = pktl;
+    return 0;
+}
+
+int avpriv_packet_list_get(PacketList *pkt_buffer,
+                           AVPacket      *pkt)
+{
+    PacketListEntry *pktl = pkt_buffer->head;
+    if (!pktl)
+        return AVERROR(EAGAIN);
+    *pkt        = pktl->pkt;
+    pkt_buffer->head = pktl->next;
+    if (!pkt_buffer->head)
+        pkt_buffer->tail = NULL;
+    av_freep(&pktl);
+    return 0;
+}
+
+void avpriv_packet_list_free(PacketList *pkt_buf)
+{
+    PacketListEntry *tmp = pkt_buf->head;
+
+    while (tmp) {
+        PacketListEntry *pktl = tmp;
+        tmp = pktl->next;
+        av_packet_unref(&pktl->pkt);
+        av_freep(&pktl);
+    }
+    pkt_buf->head = pkt_buf->tail = NULL;
+}
+
+int ff_side_data_set_encoder_stats(AVPacket *pkt, int quality, int64_t *error, int error_count, int pict_type)
+{
+    uint8_t *side_data;
+    size_t side_data_size;
+    int i;
+
+    side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_QUALITY_STATS, &side_data_size);
+    if (!side_data) {
+        side_data_size = 4+4+8*error_count;
+        side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_STATS,
+                                            side_data_size);
+    }
+
+    if (!side_data || side_data_size < 4+4+8*error_count)
+        return AVERROR(ENOMEM);
+
+    AV_WL32(side_data   , quality  );
+    side_data[4] = pict_type;
+    side_data[5] = error_count;
+    for (i = 0; i<error_count; i++)
+        AV_WL64(side_data+8 + 8*i , error[i]);
+
+    return 0;
+}
+
+int ff_side_data_set_prft(AVPacket *pkt, int64_t timestamp)
+{
+    AVProducerReferenceTime *prft;
+    uint8_t *side_data;
+    size_t side_data_size;
+
+    side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_PRFT, &side_data_size);
+    if (!side_data) {
+        side_data_size = sizeof(AVProducerReferenceTime);
+        side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_PRFT, side_data_size);
+    }
+
+    if (!side_data || side_data_size < sizeof(AVProducerReferenceTime))
+        return AVERROR(ENOMEM);
+
+    prft = (AVProducerReferenceTime *)side_data;
+    prft->wallclock = timestamp;
+    prft->flags = 0;
+
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/avpicture.c b/media/ffvpx/libavcodec/avpicture.c
new file mode 100644
index 0000000000..56435f4fc9
--- /dev/null
+++ b/media/ffvpx/libavcodec/avpicture.c
@@ -0,0 +1,82 @@
+/*
+ * AVPicture management routines
+ * Copyright (c) 2001, 2002, 2003 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AVPicture management routines
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "libavutil/common.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/colorspace.h"
+
+#if FF_API_AVPICTURE
+FF_DISABLE_DEPRECATION_WARNINGS
+int avpicture_fill(AVPicture *picture, const uint8_t *ptr,
+                   enum AVPixelFormat pix_fmt, int width, int height)
+{
+    return av_image_fill_arrays(picture->data, picture->linesize,
+                                ptr, pix_fmt, width, height, 1);
+}
+
+int avpicture_layout(const AVPicture* src, enum AVPixelFormat pix_fmt, int width, int height,
+                     unsigned char *dest, int dest_size)
+{
+    return av_image_copy_to_buffer(dest, dest_size,
+                                   (const uint8_t * const*)src->data, src->linesize,
+                                   pix_fmt, width, height, 1);
+}
+
+int avpicture_get_size(enum AVPixelFormat pix_fmt, int width, int height)
+{
+    return av_image_get_buffer_size(pix_fmt, width, height, 1);
+}
+
+int avpicture_alloc(AVPicture *picture,
+                    enum AVPixelFormat pix_fmt, int width, int height)
+{
+    int ret = av_image_alloc(picture->data, picture->linesize,
+                             width, height, pix_fmt, 1);
+    if (ret < 0) {
+        memset(picture, 0, sizeof(AVPicture));
+        return ret;
+    }
+
+    return 0;
+}
+
+void avpicture_free(AVPicture *picture)
+{
+    av_freep(&picture->data[0]);
+}
+
+void av_picture_copy(AVPicture *dst, const AVPicture *src,
+                     enum AVPixelFormat pix_fmt, int width, int height)
+{
+    av_image_copy(dst->data, dst->linesize, (const uint8_t **)src->data,
+                  src->linesize, pix_fmt, width, height);
+}
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif /* FF_API_AVPICTURE */
diff --git a/media/ffvpx/libavcodec/bit_depth_template.c b/media/ffvpx/libavcodec/bit_depth_template.c
new file mode 100644
index 0000000000..d44d47ea45
--- /dev/null
+++ b/media/ffvpx/libavcodec/bit_depth_template.c
@@ -0,0 +1,108 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "mathops.h"
+#include "rnd_avg.h"
+#include "libavutil/intreadwrite.h"
+
+#ifndef BIT_DEPTH
+#define BIT_DEPTH 8
+#endif
+
+#ifdef AVCODEC_BIT_DEPTH_TEMPLATE_C
+#   undef pixel
+#   undef pixel2
+#   undef pixel4
+#   undef dctcoef
+#   undef idctin
+#   undef INIT_CLIP
+#   undef no_rnd_avg_pixel4
+#   undef rnd_avg_pixel4
+#   undef AV_RN2P
+#   undef AV_RN4P
+#   undef AV_RN4PA
+#   undef AV_WN2P
+#   undef AV_WN4P
+#   undef AV_WN4PA
+#   undef CLIP
+#   undef FUNC
+#   undef FUNCC
+#   undef av_clip_pixel
+#   undef PIXEL_SPLAT_X4
+#else
+#   define AVCODEC_BIT_DEPTH_TEMPLATE_C
+#endif
+
+#if BIT_DEPTH > 8
+#   define pixel  uint16_t
+#   define pixel2 uint32_t
+#   define pixel4 uint64_t
+#   define dctcoef int32_t
+
+#ifdef IN_IDCT_DEPTH
+#if IN_IDCT_DEPTH == 32
+#   define idctin int32_t
+#else
+#   define idctin int16_t
+#endif
+#else
+#   define idctin int16_t
+#endif
+
+#   define INIT_CLIP
+#   define no_rnd_avg_pixel4 no_rnd_avg64
+#   define    rnd_avg_pixel4    rnd_avg64
+#   define AV_RN2P  AV_RN32
+#   define AV_RN4P  AV_RN64
+#   define AV_RN4PA AV_RN64A
+#   define AV_WN2P  AV_WN32
+#   define AV_WN4P  AV_WN64
+#   define AV_WN4PA AV_WN64A
+#   define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
+
+#   define av_clip_pixel(a) av_clip_uintp2(a, BIT_DEPTH)
+#   define CLIP(a)          av_clip_uintp2(a, BIT_DEPTH)
+#else
+#   define pixel  uint8_t
+#   define pixel2 uint16_t
+#   define pixel4 uint32_t
+#   define dctcoef int16_t
+#   define idctin  int16_t
+
+#   define INIT_CLIP
+#   define no_rnd_avg_pixel4 no_rnd_avg32
+#   define    rnd_avg_pixel4    rnd_avg32
+#   define AV_RN2P  AV_RN16
+#   define AV_RN4P  AV_RN32
+#   define AV_RN4PA AV_RN32A
+#   define AV_WN2P  AV_WN16
+#   define AV_WN4P  AV_WN32
+#   define AV_WN4PA AV_WN32A
+#   define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
+
+#   define av_clip_pixel(a) av_clip_uint8(a)
+#   define CLIP(a) av_clip_uint8(a)
+#endif
+
+#define FUNC3(a, b, c)  a ## _ ## b ##  c
+#define FUNC2(a, b, c)  FUNC3(a, b, c)
+#define FUNC(a)  FUNC2(a, BIT_DEPTH,)
+#define FUNCC(a) FUNC2(a, BIT_DEPTH, _c)
+#define FUNC4(a, b, c)  a ## _int ## b ## _ ## c ## bit
+#define FUNC5(a, b, c)  FUNC4(a, b, c)
+#define FUNC6(a)  FUNC5(a, IN_IDCT_DEPTH, BIT_DEPTH)
diff --git a/media/ffvpx/libavcodec/bitstream.c b/media/ffvpx/libavcodec/bitstream.c
new file mode 100644
index 0000000000..3606575055
--- /dev/null
+++ b/media/ffvpx/libavcodec/bitstream.c
@@ -0,0 +1,72 @@
+/*
+ * Common bit i/o utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2010 Loren Merritt
+ *
+ * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * bitstream api.
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "config.h"
+#include "libavutil/avassert.h"
+#include "libavutil/intreadwrite.h"
+#include "put_bits.h"
+
+void ff_put_string(PutBitContext *pb, const char *string, int terminate_string)
+{
+    while (*string) {
+        put_bits(pb, 8, *string);
+        string++;
+    }
+    if (terminate_string)
+        put_bits(pb, 8, 0);
+}
+
+void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
+{
+    int words = length >> 4;
+    int bits  = length & 15;
+    int i;
+
+    if (length == 0)
+        return;
+
+    av_assert0(length <= put_bits_left(pb));
+
+    if (CONFIG_SMALL || words < 16 || put_bits_count(pb) & 7) {
+        for (i = 0; i < words; i++)
+            put_bits(pb, 16, AV_RB16(src + 2 * i));
+    } else {
+        for (i = 0; put_bits_count(pb) & 31; i++)
+            put_bits(pb, 8, src[i]);
+        flush_put_bits(pb);
+        memcpy(put_bits_ptr(pb), src + i, 2 * words - i);
+        skip_put_bytes(pb, 2 * words - i);
+    }
+
+    put_bits(pb, bits, AV_RB16(src + 2 * words) >> (16 - bits));
+}
diff --git a/media/ffvpx/libavcodec/bitstream_filters.c b/media/ffvpx/libavcodec/bitstream_filters.c
new file mode 100644
index 0000000000..e8216819ca
--- /dev/null
+++ b/media/ffvpx/libavcodec/bitstream_filters.c
@@ -0,0 +1,109 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "libavutil/log.h"
+
+#include "bsf.h"
+#include "bsf_internal.h"
+
+extern const FFBitStreamFilter ff_aac_adtstoasc_bsf;
+extern const FFBitStreamFilter ff_av1_frame_merge_bsf;
+extern const FFBitStreamFilter ff_av1_frame_split_bsf;
+extern const FFBitStreamFilter ff_av1_metadata_bsf;
+extern const FFBitStreamFilter ff_chomp_bsf;
+extern const FFBitStreamFilter ff_dump_extradata_bsf;
+extern const FFBitStreamFilter ff_dca_core_bsf;
+extern const FFBitStreamFilter ff_dts2pts_bsf;
+extern const FFBitStreamFilter ff_dv_error_marker_bsf;
+extern const FFBitStreamFilter ff_eac3_core_bsf;
+extern const FFBitStreamFilter ff_extract_extradata_bsf;
+extern const FFBitStreamFilter ff_filter_units_bsf;
+extern const FFBitStreamFilter ff_h264_metadata_bsf;
+extern const FFBitStreamFilter ff_h264_mp4toannexb_bsf;
+extern const FFBitStreamFilter ff_h264_redundant_pps_bsf;
+extern const FFBitStreamFilter ff_hapqa_extract_bsf;
+extern const FFBitStreamFilter ff_hevc_metadata_bsf;
+extern const FFBitStreamFilter ff_hevc_mp4toannexb_bsf;
+extern const FFBitStreamFilter ff_imx_dump_header_bsf;
+extern const FFBitStreamFilter ff_media100_to_mjpegb_bsf;
+extern const FFBitStreamFilter ff_mjpeg2jpeg_bsf;
+extern const FFBitStreamFilter ff_mjpega_dump_header_bsf;
+extern const FFBitStreamFilter ff_mp3_header_decompress_bsf;
+extern const FFBitStreamFilter ff_mpeg2_metadata_bsf;
+extern const FFBitStreamFilter ff_mpeg4_unpack_bframes_bsf;
+extern const FFBitStreamFilter ff_mov2textsub_bsf;
+extern const FFBitStreamFilter ff_noise_bsf;
+extern const FFBitStreamFilter ff_null_bsf;
+extern const FFBitStreamFilter ff_opus_metadata_bsf;
+extern const FFBitStreamFilter ff_pcm_rechunk_bsf;
+extern const FFBitStreamFilter ff_pgs_frame_merge_bsf;
+extern const FFBitStreamFilter ff_prores_metadata_bsf;
+extern const FFBitStreamFilter ff_remove_extradata_bsf;
+extern const FFBitStreamFilter ff_setts_bsf;
+extern const FFBitStreamFilter ff_text2movsub_bsf;
+extern const FFBitStreamFilter ff_trace_headers_bsf;
+extern const FFBitStreamFilter ff_truehd_core_bsf;
+extern const FFBitStreamFilter ff_vp9_metadata_bsf;
+extern const FFBitStreamFilter ff_vp9_raw_reorder_bsf;
+extern const FFBitStreamFilter ff_vp9_superframe_bsf;
+extern const FFBitStreamFilter ff_vp9_superframe_split_bsf;
+
+#include "libavcodec/bsf_list.c"
+
+const AVBitStreamFilter *av_bsf_iterate(void **opaque)
+{
+    uintptr_t i = (uintptr_t)*opaque;
+    const FFBitStreamFilter *f = bitstream_filters[i];
+
+    if (f) {
+        *opaque = (void*)(i + 1);
+        return &f->p;
+    }
+    return NULL;
+}
+
+const AVBitStreamFilter *av_bsf_get_by_name(const char *name)
+{
+    const AVBitStreamFilter *f = NULL;
+    void *i = 0;
+
+    if (!name)
+        return NULL;
+
+    while ((f = av_bsf_iterate(&i))) {
+        if (!strcmp(f->name, name))
+            return f;
+    }
+
+    return NULL;
+}
+
+const AVClass *ff_bsf_child_class_iterate(void **opaque)
+{
+    const AVBitStreamFilter *f;
+
+    /* find next filter with priv options */
+    while ((f = av_bsf_iterate(opaque))) {
+        if (f->priv_class)
+            return f->priv_class;
+    }
+    return NULL;
+}
diff --git a/media/ffvpx/libavcodec/blockdsp.h b/media/ffvpx/libavcodec/blockdsp.h
new file mode 100644
index 0000000000..d853adada2
--- /dev/null
+++ b/media/ffvpx/libavcodec/blockdsp.h
@@ -0,0 +1,47 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_BLOCKDSP_H
+#define AVCODEC_BLOCKDSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+/* add and put pixel (decoding)
+ * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
+ * h for op_pixels_func is limited to { width / 2, width },
+ * but never larger than 16 and never smaller than 4. */
+typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */,
+                             uint8_t value, ptrdiff_t line_size, int h);
+
+typedef struct BlockDSPContext {
+    void (*clear_block)(int16_t *block /* align 32 */);
+    void (*clear_blocks)(int16_t *blocks /* align 32 */);
+
+    op_fill_func fill_block_tab[2];
+} BlockDSPContext;
+
+void ff_blockdsp_init(BlockDSPContext *c);
+
+void ff_blockdsp_init_alpha(BlockDSPContext *c);
+void ff_blockdsp_init_arm(BlockDSPContext *c);
+void ff_blockdsp_init_ppc(BlockDSPContext *c);
+void ff_blockdsp_init_x86(BlockDSPContext *c);
+void ff_blockdsp_init_mips(BlockDSPContext *c);
+
+#endif /* AVCODEC_BLOCKDSP_H */
diff --git a/media/ffvpx/libavcodec/bsf.c b/media/ffvpx/libavcodec/bsf.c
new file mode 100644
index 0000000000..42cc1b5ab0
--- /dev/null
+++ b/media/ffvpx/libavcodec/bsf.c
@@ -0,0 +1,562 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+
+#include "config_components.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/log.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
+
+#include "bsf.h"
+#include "bsf_internal.h"
+#include "codec_desc.h"
+#include "codec_par.h"
+
+#define IS_EMPTY(pkt) (!(pkt)->data && !(pkt)->side_data_elems)
+
+static av_always_inline const FFBitStreamFilter *ff_bsf(const AVBitStreamFilter *bsf)
+{
+    return (const FFBitStreamFilter*)bsf;
+}
+
+typedef struct FFBSFContext {
+    AVBSFContext pub;
+    AVPacket *buffer_pkt;
+    int eof;
+} FFBSFContext;
+
+static av_always_inline FFBSFContext *ffbsfcontext(AVBSFContext *ctx)
+{
+    return (FFBSFContext *)ctx;
+}
+
+void av_bsf_free(AVBSFContext **pctx)
+{
+    AVBSFContext *ctx;
+    FFBSFContext *bsfi;
+
+    if (!pctx || !*pctx)
+        return;
+    ctx  = *pctx;
+    bsfi = ffbsfcontext(ctx);
+
+    if (ctx->priv_data) {
+        if (ff_bsf(ctx->filter)->close)
+            ff_bsf(ctx->filter)->close(ctx);
+        if (ctx->filter->priv_class)
+            av_opt_free(ctx->priv_data);
+        av_freep(&ctx->priv_data);
+    }
+    av_packet_free(&bsfi->buffer_pkt);
+
+    avcodec_parameters_free(&ctx->par_in);
+    avcodec_parameters_free(&ctx->par_out);
+
+    av_freep(pctx);
+}
+
+static void *bsf_child_next(void *obj, void *prev)
+{
+    AVBSFContext *ctx = obj;
+    if (!prev && ctx->filter->priv_class)
+        return ctx->priv_data;
+    return NULL;
+}
+
+static const char *bsf_to_name(void *bsf)
+{
+    return ((AVBSFContext *)bsf)->filter->name;
+}
+
+static const AVClass bsf_class = {
+    .class_name       = "AVBSFContext",
+    .item_name        = bsf_to_name,
+    .version          = LIBAVUTIL_VERSION_INT,
+    .child_next       = bsf_child_next,
+    .child_class_iterate = ff_bsf_child_class_iterate,
+    .category         = AV_CLASS_CATEGORY_BITSTREAM_FILTER,
+};
+
+const AVClass *av_bsf_get_class(void)
+{
+    return &bsf_class;
+}
+
+int av_bsf_alloc(const AVBitStreamFilter *filter, AVBSFContext **pctx)
+{
+    AVBSFContext *ctx;
+    FFBSFContext *bsfi;
+    int ret;
+
+    bsfi = av_mallocz(sizeof(*bsfi));
+    if (!bsfi)
+        return AVERROR(ENOMEM);
+    ctx  = &bsfi->pub;
+
+    ctx->av_class = &bsf_class;
+    ctx->filter   = filter;
+
+    ctx->par_in  = avcodec_parameters_alloc();
+    ctx->par_out = avcodec_parameters_alloc();
+    if (!ctx->par_in || !ctx->par_out) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    /* allocate priv data and init private options */
+    if (ff_bsf(filter)->priv_data_size) {
+        ctx->priv_data = av_mallocz(ff_bsf(filter)->priv_data_size);
+        if (!ctx->priv_data) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        if (filter->priv_class) {
+            *(const AVClass **)ctx->priv_data = filter->priv_class;
+            av_opt_set_defaults(ctx->priv_data);
+        }
+    }
+    bsfi->buffer_pkt = av_packet_alloc();
+    if (!bsfi->buffer_pkt) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    *pctx = ctx;
+    return 0;
+fail:
+    av_bsf_free(&ctx);
+    return ret;
+}
+
+int av_bsf_init(AVBSFContext *ctx)
+{
+    int ret, i;
+
+    /* check that the codec is supported */
+    if (ctx->filter->codec_ids) {
+        for (i = 0; ctx->filter->codec_ids[i] != AV_CODEC_ID_NONE; i++)
+            if (ctx->par_in->codec_id == ctx->filter->codec_ids[i])
+                break;
+        if (ctx->filter->codec_ids[i] == AV_CODEC_ID_NONE) {
+            const AVCodecDescriptor *desc = avcodec_descriptor_get(ctx->par_in->codec_id);
+            av_log(ctx, AV_LOG_ERROR, "Codec '%s' (%d) is not supported by the "
+                   "bitstream filter '%s'. Supported codecs are: ",
+                   desc ? desc->name : "unknown", ctx->par_in->codec_id, ctx->filter->name);
+            for (i = 0; ctx->filter->codec_ids[i] != AV_CODEC_ID_NONE; i++) {
+                enum AVCodecID codec_id = ctx->filter->codec_ids[i];
+                av_log(ctx, AV_LOG_ERROR, "%s (%d) ",
+                       avcodec_get_name(codec_id), codec_id);
+            }
+            av_log(ctx, AV_LOG_ERROR, "\n");
+            return AVERROR(EINVAL);
+        }
+    }
+
+    /* initialize output parameters to be the same as input
+     * init below might overwrite that */
+    ret = avcodec_parameters_copy(ctx->par_out, ctx->par_in);
+    if (ret < 0)
+        return ret;
+
+    ctx->time_base_out = ctx->time_base_in;
+
+    if (ff_bsf(ctx->filter)->init) {
+        ret = ff_bsf(ctx->filter)->init(ctx);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+void av_bsf_flush(AVBSFContext *ctx)
+{
+    FFBSFContext *const bsfi = ffbsfcontext(ctx);
+
+    bsfi->eof = 0;
+
+    av_packet_unref(bsfi->buffer_pkt);
+
+    if (ff_bsf(ctx->filter)->flush)
+        ff_bsf(ctx->filter)->flush(ctx);
+}
+
+int av_bsf_send_packet(AVBSFContext *ctx, AVPacket *pkt)
+{
+    FFBSFContext *const bsfi = ffbsfcontext(ctx);
+    int ret;
+
+    if (!pkt || IS_EMPTY(pkt)) {
+        if (pkt)
+            av_packet_unref(pkt);
+        bsfi->eof = 1;
+        return 0;
+    }
+
+    if (bsfi->eof) {
+        av_log(ctx, AV_LOG_ERROR, "A non-NULL packet sent after an EOF.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (!IS_EMPTY(bsfi->buffer_pkt))
+        return AVERROR(EAGAIN);
+
+    ret = av_packet_make_refcounted(pkt);
+    if (ret < 0)
+        return ret;
+    av_packet_move_ref(bsfi->buffer_pkt, pkt);
+
+    return 0;
+}
+
+int av_bsf_receive_packet(AVBSFContext *ctx, AVPacket *pkt)
+{
+    return ff_bsf(ctx->filter)->filter(ctx, pkt);
+}
+
+int ff_bsf_get_packet(AVBSFContext *ctx, AVPacket **pkt)
+{
+    FFBSFContext *const bsfi = ffbsfcontext(ctx);
+    AVPacket *tmp_pkt;
+
+    if (bsfi->eof)
+        return AVERROR_EOF;
+
+    if (IS_EMPTY(bsfi->buffer_pkt))
+        return AVERROR(EAGAIN);
+
+    tmp_pkt = av_packet_alloc();
+    if (!tmp_pkt)
+        return AVERROR(ENOMEM);
+
+    *pkt = bsfi->buffer_pkt;
+    bsfi->buffer_pkt = tmp_pkt;
+
+    return 0;
+}
+
+int ff_bsf_get_packet_ref(AVBSFContext *ctx, AVPacket *pkt)
+{
+    FFBSFContext *const bsfi = ffbsfcontext(ctx);
+
+    if (bsfi->eof)
+        return AVERROR_EOF;
+
+    if (IS_EMPTY(bsfi->buffer_pkt))
+        return AVERROR(EAGAIN);
+
+    av_packet_move_ref(pkt, bsfi->buffer_pkt);
+
+    return 0;
+}
+
+typedef struct BSFListContext {
+    const AVClass *class;
+
+    AVBSFContext **bsfs;
+    int nb_bsfs;
+
+    unsigned idx;           // index of currently processed BSF
+
+    char * item_name;
+} BSFListContext;
+
+
+static int bsf_list_init(AVBSFContext *bsf)
+{
+    BSFListContext *lst = bsf->priv_data;
+    int ret, i;
+    const AVCodecParameters *cod_par = bsf->par_in;
+    AVRational tb = bsf->time_base_in;
+
+    for (i = 0; i < lst->nb_bsfs; ++i) {
+        ret = avcodec_parameters_copy(lst->bsfs[i]->par_in, cod_par);
+        if (ret < 0)
+            goto fail;
+
+        lst->bsfs[i]->time_base_in = tb;
+
+        ret = av_bsf_init(lst->bsfs[i]);
+        if (ret < 0)
+            goto fail;
+
+        cod_par = lst->bsfs[i]->par_out;
+        tb = lst->bsfs[i]->time_base_out;
+    }
+
+    bsf->time_base_out = tb;
+    ret = avcodec_parameters_copy(bsf->par_out, cod_par);
+
+fail:
+    return ret;
+}
+
+static int bsf_list_filter(AVBSFContext *bsf, AVPacket *out)
+{
+    BSFListContext *lst = bsf->priv_data;
+    int ret, eof = 0;
+
+    if (!lst->nb_bsfs)
+        return ff_bsf_get_packet_ref(bsf, out);
+
+    while (1) {
+        /* get a packet from the previous filter up the chain */
+        if (lst->idx)
+            ret = av_bsf_receive_packet(lst->bsfs[lst->idx-1], out);
+        else
+            ret = ff_bsf_get_packet_ref(bsf, out);
+        if (ret == AVERROR(EAGAIN)) {
+            if (!lst->idx)
+                return ret;
+            lst->idx--;
+            continue;
+        } else if (ret == AVERROR_EOF) {
+            eof = 1;
+        } else if (ret < 0)
+            return ret;
+
+        /* send it to the next filter down the chain */
+        if (lst->idx < lst->nb_bsfs) {
+            ret = av_bsf_send_packet(lst->bsfs[lst->idx], eof ? NULL : out);
+            av_assert1(ret != AVERROR(EAGAIN));
+            if (ret < 0) {
+                av_packet_unref(out);
+                return ret;
+            }
+            lst->idx++;
+            eof = 0;
+        } else if (eof) {
+            return ret;
+        } else {
+            return 0;
+        }
+    }
+}
+
+static void bsf_list_flush(AVBSFContext *bsf)
+{
+    BSFListContext *lst = bsf->priv_data;
+
+    for (int i = 0; i < lst->nb_bsfs; i++)
+        av_bsf_flush(lst->bsfs[i]);
+    lst->idx = 0;
+}
+
+static void bsf_list_close(AVBSFContext *bsf)
+{
+    BSFListContext *lst = bsf->priv_data;
+    int i;
+
+    for (i = 0; i < lst->nb_bsfs; ++i)
+        av_bsf_free(&lst->bsfs[i]);
+    av_freep(&lst->bsfs);
+    av_freep(&lst->item_name);
+}
+
+static const char *bsf_list_item_name(void *ctx)
+{
+    static const char *null_filter_name = "null";
+    AVBSFContext *bsf_ctx = ctx;
+    BSFListContext *lst = bsf_ctx->priv_data;
+
+    if (!lst->nb_bsfs)
+        return null_filter_name;
+
+    if (!lst->item_name) {
+        int i;
+        AVBPrint bp;
+        av_bprint_init(&bp, 16, 128);
+
+        av_bprintf(&bp, "bsf_list(");
+        for (i = 0; i < lst->nb_bsfs; i++)
+            av_bprintf(&bp, i ? ",%s" : "%s", lst->bsfs[i]->filter->name);
+        av_bprintf(&bp, ")");
+
+        av_bprint_finalize(&bp, &lst->item_name);
+    }
+
+    return lst->item_name;
+}
+
+static const AVClass bsf_list_class = {
+        .class_name = "bsf_list",
+        .item_name  = bsf_list_item_name,
+        .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const FFBitStreamFilter list_bsf = {
+        .p.name         = "bsf_list",
+        .p.priv_class   = &bsf_list_class,
+        .priv_data_size = sizeof(BSFListContext),
+        .init           = bsf_list_init,
+        .filter         = bsf_list_filter,
+        .flush          = bsf_list_flush,
+        .close          = bsf_list_close,
+};
+
+struct AVBSFList {
+    AVBSFContext **bsfs;
+    int nb_bsfs;
+};
+
+AVBSFList *av_bsf_list_alloc(void)
+{
+    return av_mallocz(sizeof(AVBSFList));
+}
+
+void av_bsf_list_free(AVBSFList **lst)
+{
+    int i;
+
+    if (!*lst)
+        return;
+
+    for (i = 0; i < (*lst)->nb_bsfs; ++i)
+        av_bsf_free(&(*lst)->bsfs[i]);
+    av_free((*lst)->bsfs);
+    av_freep(lst);
+}
+
+int av_bsf_list_append(AVBSFList *lst, AVBSFContext *bsf)
+{
+    return av_dynarray_add_nofree(&lst->bsfs, &lst->nb_bsfs, bsf);
+}
+
+static int bsf_list_append_internal(AVBSFList *lst, const char *bsf_name, const char *options, AVDictionary ** options_dict)
+{
+    int ret;
+    const AVBitStreamFilter *filter;
+    AVBSFContext *bsf;
+
+    filter = av_bsf_get_by_name(bsf_name);
+    if (!filter)
+        return AVERROR_BSF_NOT_FOUND;
+
+    ret = av_bsf_alloc(filter, &bsf);
+    if (ret < 0)
+        return ret;
+
+    if (options && filter->priv_class) {
+        const AVOption *opt = av_opt_next(bsf->priv_data, NULL);
+        const char * shorthand[2] = {NULL};
+
+        if (opt)
+            shorthand[0] = opt->name;
+
+        ret = av_opt_set_from_string(bsf->priv_data, options, shorthand, "=", ":");
+        if (ret < 0)
+            goto end;
+    }
+
+    if (options_dict) {
+        ret = av_opt_set_dict2(bsf, options_dict, AV_OPT_SEARCH_CHILDREN);
+        if (ret < 0)
+            goto end;
+    }
+
+    ret = av_bsf_list_append(lst, bsf);
+
+end:
+    if (ret < 0)
+        av_bsf_free(&bsf);
+
+    return ret;
+}
+
+int av_bsf_list_append2(AVBSFList *lst, const char *bsf_name, AVDictionary ** options)
+{
+    return bsf_list_append_internal(lst, bsf_name, NULL, options);
+}
+
+int av_bsf_list_finalize(AVBSFList **lst, AVBSFContext **bsf)
+{
+    int ret = 0;
+    BSFListContext *ctx;
+
+    if ((*lst)->nb_bsfs == 1) {
+        *bsf = (*lst)->bsfs[0];
+        av_freep(&(*lst)->bsfs);
+        (*lst)->nb_bsfs = 0;
+        goto end;
+    }
+
+    ret = av_bsf_alloc(&list_bsf.p, bsf);
+    if (ret < 0)
+        return ret;
+
+    ctx = (*bsf)->priv_data;
+
+    ctx->bsfs = (*lst)->bsfs;
+    ctx->nb_bsfs = (*lst)->nb_bsfs;
+
+end:
+    av_freep(lst);
+    return ret;
+}
+
+static int bsf_parse_single(char *str, AVBSFList *bsf_lst)
+{
+    char *bsf_name, *bsf_options_str;
+
+    bsf_name = av_strtok(str, "=", &bsf_options_str);
+    if (!bsf_name)
+        return AVERROR(EINVAL);
+
+    return bsf_list_append_internal(bsf_lst, bsf_name, bsf_options_str, NULL);
+}
+
+int av_bsf_list_parse_str(const char *str, AVBSFContext **bsf_lst)
+{
+    AVBSFList *lst;
+    int ret;
+
+    if (!str)
+        return av_bsf_get_null_filter(bsf_lst);
+
+    lst = av_bsf_list_alloc();
+    if (!lst)
+        return AVERROR(ENOMEM);
+
+    do {
+        char *bsf_str = av_get_token(&str, ",");
+        ret = bsf_parse_single(bsf_str, lst);
+        av_free(bsf_str);
+        if (ret < 0)
+            goto end;
+    } while (*str && *++str);
+
+    ret = av_bsf_list_finalize(&lst, bsf_lst);
+end:
+    if (ret < 0)
+        av_bsf_list_free(&lst);
+    return ret;
+}
+
+int av_bsf_get_null_filter(AVBSFContext **bsf)
+{
+#if CONFIG_NULL_BSF
+    extern const FFBitStreamFilter ff_null_bsf;
+    return av_bsf_alloc(&ff_null_bsf.p, bsf);
+#else
+    return av_bsf_alloc(&list_bsf.p, bsf);
+#endif
+}
diff --git a/media/ffvpx/libavcodec/bsf.h b/media/ffvpx/libavcodec/bsf.h
new file mode 100644
index 0000000000..a09c69f242
--- /dev/null
+++ b/media/ffvpx/libavcodec/bsf.h
@@ -0,0 +1,332 @@
+/*
+ * Bitstream filters public API
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_BSF_H
+#define AVCODEC_BSF_H
+
+#include "libavutil/dict.h"
+#include "libavutil/log.h"
+#include "libavutil/rational.h"
+
+#include "codec_id.h"
+#include "codec_par.h"
+#include "packet.h"
+
+/**
+ * @defgroup lavc_bsf Bitstream filters
+ * @ingroup libavc
+ *
+ * Bitstream filters transform encoded media data without decoding it. This
+ * allows e.g. manipulating various header values. Bitstream filters operate on
+ * @ref AVPacket "AVPackets".
+ *
+ * The bitstream filtering API is centered around two structures:
+ * AVBitStreamFilter and AVBSFContext. The former represents a bitstream filter
+ * in abstract, the latter a specific filtering process. Obtain an
+ * AVBitStreamFilter using av_bsf_get_by_name() or av_bsf_iterate(), then pass
+ * it to av_bsf_alloc() to create an AVBSFContext. Fill in the user-settable
+ * AVBSFContext fields, as described in its documentation, then call
+ * av_bsf_init() to prepare the filter context for use.
+ *
+ * Submit packets for filtering using av_bsf_send_packet(), obtain filtered
+ * results with av_bsf_receive_packet(). When no more input packets will be
+ * sent, submit a NULL AVPacket to signal the end of the stream to the filter.
+ * av_bsf_receive_packet() will then return trailing packets, if any are
+ * produced by the filter.
+ *
+ * Finally, free the filter context with av_bsf_free().
+ * @{
+ */
+
+/**
+ * The bitstream filter state.
+ *
+ * This struct must be allocated with av_bsf_alloc() and freed with
+ * av_bsf_free().
+ *
+ * The fields in the struct will only be changed (by the caller or by the
+ * filter) as described in their documentation, and are to be considered
+ * immutable otherwise.
+ */
+typedef struct AVBSFContext {
+    /**
+     * A class for logging and AVOptions
+     */
+    const AVClass *av_class;
+
+    /**
+     * The bitstream filter this context is an instance of.
+     */
+    const struct AVBitStreamFilter *filter;
+
+    /**
+     * Opaque filter-specific private data. If filter->priv_class is non-NULL,
+     * this is an AVOptions-enabled struct.
+     */
+    void *priv_data;
+
+    /**
+     * Parameters of the input stream. This field is allocated in
+     * av_bsf_alloc(), it needs to be filled by the caller before
+     * av_bsf_init().
+     */
+    AVCodecParameters *par_in;
+
+    /**
+     * Parameters of the output stream. This field is allocated in
+     * av_bsf_alloc(), it is set by the filter in av_bsf_init().
+     */
+    AVCodecParameters *par_out;
+
+    /**
+     * The timebase used for the timestamps of the input packets. Set by the
+     * caller before av_bsf_init().
+     */
+    AVRational time_base_in;
+
+    /**
+     * The timebase used for the timestamps of the output packets. Set by the
+     * filter in av_bsf_init().
+     */
+    AVRational time_base_out;
+} AVBSFContext;
+
+typedef struct AVBitStreamFilter {
+    const char *name;
+
+    /**
+     * A list of codec ids supported by the filter, terminated by
+     * AV_CODEC_ID_NONE.
+     * May be NULL, in that case the bitstream filter works with any codec id.
+     */
+    const enum AVCodecID *codec_ids;
+
+    /**
+     * A class for the private data, used to declare bitstream filter private
+     * AVOptions. This field is NULL for bitstream filters that do not declare
+     * any options.
+     *
+     * If this field is non-NULL, the first member of the filter private data
+     * must be a pointer to AVClass, which will be set by libavcodec generic
+     * code to this class.
+     */
+    const AVClass *priv_class;
+} AVBitStreamFilter;
+
+/**
+ * @return a bitstream filter with the specified name or NULL if no such
+ *         bitstream filter exists.
+ */
+const AVBitStreamFilter *av_bsf_get_by_name(const char *name);
+
+/**
+ * Iterate over all registered bitstream filters.
+ *
+ * @param opaque a pointer where libavcodec will store the iteration state. Must
+ *               point to NULL to start the iteration.
+ *
+ * @return the next registered bitstream filter or NULL when the iteration is
+ *         finished
+ */
+const AVBitStreamFilter *av_bsf_iterate(void **opaque);
+
+/**
+ * Allocate a context for a given bitstream filter. The caller must fill in the
+ * context parameters as described in the documentation and then call
+ * av_bsf_init() before sending any data to the filter.
+ *
+ * @param filter the filter for which to allocate an instance.
+ * @param[out] ctx a pointer into which the pointer to the newly-allocated context
+ *                 will be written. It must be freed with av_bsf_free() after the
+ *                 filtering is done.
+ *
+ * @return 0 on success, a negative AVERROR code on failure
+ */
+int av_bsf_alloc(const AVBitStreamFilter *filter, AVBSFContext **ctx);
+
+/**
+ * Prepare the filter for use, after all the parameters and options have been
+ * set.
+ *
+ * @param ctx a AVBSFContext previously allocated with av_bsf_alloc()
+ */
+int av_bsf_init(AVBSFContext *ctx);
+
+/**
+ * Submit a packet for filtering.
+ *
+ * After sending each packet, the filter must be completely drained by calling
+ * av_bsf_receive_packet() repeatedly until it returns AVERROR(EAGAIN) or
+ * AVERROR_EOF.
+ *
+ * @param ctx an initialized AVBSFContext
+ * @param pkt the packet to filter. The bitstream filter will take ownership of
+ * the packet and reset the contents of pkt. pkt is not touched if an error occurs.
+ * If pkt is empty (i.e. NULL, or pkt->data is NULL and pkt->side_data_elems zero),
+ * it signals the end of the stream (i.e. no more non-empty packets will be sent;
+ * sending more empty packets does nothing) and will cause the filter to output
+ * any packets it may have buffered internally.
+ *
+ * @return
+ *  - 0 on success.
+ *  - AVERROR(EAGAIN) if packets need to be retrieved from the filter (using
+ *    av_bsf_receive_packet()) before new input can be consumed.
+ *  - Another negative AVERROR value if an error occurs.
+ */
+int av_bsf_send_packet(AVBSFContext *ctx, AVPacket *pkt);
+
+/**
+ * Retrieve a filtered packet.
+ *
+ * @param ctx an initialized AVBSFContext
+ * @param[out] pkt this struct will be filled with the contents of the filtered
+ *                 packet. It is owned by the caller and must be freed using
+ *                 av_packet_unref() when it is no longer needed.
+ *                 This parameter should be "clean" (i.e. freshly allocated
+ *                 with av_packet_alloc() or unreffed with av_packet_unref())
+ *                 when this function is called. If this function returns
+ *                 successfully, the contents of pkt will be completely
+ *                 overwritten by the returned data. On failure, pkt is not
+ *                 touched.
+ *
+ * @return
+ *  - 0 on success.
+ *  - AVERROR(EAGAIN) if more packets need to be sent to the filter (using
+ *    av_bsf_send_packet()) to get more output.
+ *  - AVERROR_EOF if there will be no further output from the filter.
+ *  - Another negative AVERROR value if an error occurs.
+ *
+ * @note one input packet may result in several output packets, so after sending
+ * a packet with av_bsf_send_packet(), this function needs to be called
+ * repeatedly until it stops returning 0. It is also possible for a filter to
+ * output fewer packets than were sent to it, so this function may return
+ * AVERROR(EAGAIN) immediately after a successful av_bsf_send_packet() call.
+ */
+int av_bsf_receive_packet(AVBSFContext *ctx, AVPacket *pkt);
+
+/**
+ * Reset the internal bitstream filter state. Should be called e.g. when seeking.
+ */
+void av_bsf_flush(AVBSFContext *ctx);
+
+/**
+ * Free a bitstream filter context and everything associated with it; write NULL
+ * into the supplied pointer.
+ */
+void av_bsf_free(AVBSFContext **ctx);
+
+/**
+ * Get the AVClass for AVBSFContext. It can be used in combination with
+ * AV_OPT_SEARCH_FAKE_OBJ for examining options.
+ *
+ * @see av_opt_find().
+ */
+const AVClass *av_bsf_get_class(void);
+
+/**
+ * Structure for chain/list of bitstream filters.
+ * Empty list can be allocated by av_bsf_list_alloc().
+ */
+typedef struct AVBSFList AVBSFList;
+
+/**
+ * Allocate empty list of bitstream filters.
+ * The list must be later freed by av_bsf_list_free()
+ * or finalized by av_bsf_list_finalize().
+ *
+ * @return Pointer to @ref AVBSFList on success, NULL in case of failure
+ */
+AVBSFList *av_bsf_list_alloc(void);
+
+/**
+ * Free list of bitstream filters.
+ *
+ * @param lst Pointer to pointer returned by av_bsf_list_alloc()
+ */
+void av_bsf_list_free(AVBSFList **lst);
+
+/**
+ * Append bitstream filter to the list of bitstream filters.
+ *
+ * @param lst List to append to
+ * @param bsf Filter context to be appended
+ *
+ * @return >=0 on success, negative AVERROR in case of failure
+ */
+int av_bsf_list_append(AVBSFList *lst, AVBSFContext *bsf);
+
+/**
+ * Construct new bitstream filter context given it's name and options
+ * and append it to the list of bitstream filters.
+ *
+ * @param lst      List to append to
+ * @param bsf_name Name of the bitstream filter
+ * @param options  Options for the bitstream filter, can be set to NULL
+ *
+ * @return >=0 on success, negative AVERROR in case of failure
+ */
+int av_bsf_list_append2(AVBSFList *lst, const char * bsf_name, AVDictionary **options);
+/**
+ * Finalize list of bitstream filters.
+ *
+ * This function will transform @ref AVBSFList to single @ref AVBSFContext,
+ * so the whole chain of bitstream filters can be treated as single filter
+ * freshly allocated by av_bsf_alloc().
+ * If the call is successful, @ref AVBSFList structure is freed and lst
+ * will be set to NULL. In case of failure, caller is responsible for
+ * freeing the structure by av_bsf_list_free()
+ *
+ * @param      lst Filter list structure to be transformed
+ * @param[out] bsf Pointer to be set to newly created @ref AVBSFContext structure
+ *                 representing the chain of bitstream filters
+ *
+ * @return >=0 on success, negative AVERROR in case of failure
+ */
+int av_bsf_list_finalize(AVBSFList **lst, AVBSFContext **bsf);
+
+/**
+ * Parse string describing list of bitstream filters and create single
+ * @ref AVBSFContext describing the whole chain of bitstream filters.
+ * Resulting @ref AVBSFContext can be treated as any other @ref AVBSFContext freshly
+ * allocated by av_bsf_alloc().
+ *
+ * @param      str String describing chain of bitstream filters in format
+ *                 `bsf1[=opt1=val1:opt2=val2][,bsf2]`
+ * @param[out] bsf Pointer to be set to newly created @ref AVBSFContext structure
+ *                 representing the chain of bitstream filters
+ *
+ * @return >=0 on success, negative AVERROR in case of failure
+ */
+int av_bsf_list_parse_str(const char *str, AVBSFContext **bsf);
+
+/**
+ * Get null/pass-through bitstream filter.
+ *
+ * @param[out] bsf Pointer to be set to new instance of pass-through bitstream filter
+ *
+ * @return
+ */
+int av_bsf_get_null_filter(AVBSFContext **bsf);
+
+/**
+ * @}
+ */
+
+#endif // AVCODEC_BSF_H
diff --git a/media/ffvpx/libavcodec/bsf_internal.h b/media/ffvpx/libavcodec/bsf_internal.h
new file mode 100644
index 0000000000..922b03c01b
--- /dev/null
+++ b/media/ffvpx/libavcodec/bsf_internal.h
@@ -0,0 +1,60 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_BSF_INTERNAL_H
+#define AVCODEC_BSF_INTERNAL_H
+
+#include "libavutil/log.h"
+
+#include "bsf.h"
+#include "packet.h"
+
+typedef struct FFBitStreamFilter {
+    /**
+     * The public AVBitStreamFilter. See bsf.h for it.
+     */
+    AVBitStreamFilter p;
+
+    int priv_data_size;
+    int (*init)(AVBSFContext *ctx);
+    int (*filter)(AVBSFContext *ctx, AVPacket *pkt);
+    void (*close)(AVBSFContext *ctx);
+    void (*flush)(AVBSFContext *ctx);
+} FFBitStreamFilter;
+
+/**
+ * Called by the bitstream filters to get the next packet for filtering.
+ * The filter is responsible for either freeing the packet or passing it to the
+ * caller.
+ */
+int ff_bsf_get_packet(AVBSFContext *ctx, AVPacket **pkt);
+
+/**
+ * Called by bitstream filters to get packet for filtering.
+ * The reference to packet is moved to provided packet structure.
+ *
+ * @param ctx pointer to AVBSFContext of filter
+ * @param pkt pointer to packet to move reference to
+ *
+ * @return 0 on success, negative AVERROR in case of failure
+ */
+int ff_bsf_get_packet_ref(AVBSFContext *ctx, AVPacket *pkt);
+
+const AVClass *ff_bsf_child_class_iterate(void **opaque);
+
+#endif /* AVCODEC_BSF_INTERNAL_H */
diff --git a/media/ffvpx/libavcodec/bsf_list.c b/media/ffvpx/libavcodec/bsf_list.c
new file mode 100644
index 0000000000..4050b41fde
--- /dev/null
+++ b/media/ffvpx/libavcodec/bsf_list.c
@@ -0,0 +1,11 @@
+#include "config_components.h"
+
+static const FFBitStreamFilter * const bitstream_filters[] = {
+#if CONFIG_VP9_SUPERFRAME_SPLIT_BSF
+    &ff_vp9_superframe_split_bsf,
+#endif
+#if CONFIG_AV1_VAAPI_HWACCEL
+    &ff_av1_frame_split_bsf,
+#endif
+    &ff_null_bsf,
+    NULL };
diff --git a/media/ffvpx/libavcodec/bytestream.h b/media/ffvpx/libavcodec/bytestream.h
new file mode 100644
index 0000000000..d0033f14f3
--- /dev/null
+++ b/media/ffvpx/libavcodec/bytestream.h
@@ -0,0 +1,380 @@
+/*
+ * Bytestream functions
+ * copyright (c) 2006 Baptiste Coudurier <baptiste.coudurier@free.fr>
+ * Copyright (c) 2012 Aneesh Dogra (lionaneesh) <lionaneesh@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_BYTESTREAM_H
+#define AVCODEC_BYTESTREAM_H
+
+#include <stdint.h>
+#include <string.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+
+typedef struct GetByteContext {
+    const uint8_t *buffer, *buffer_end, *buffer_start;
+} GetByteContext;
+
+typedef struct PutByteContext {
+    uint8_t *buffer, *buffer_end, *buffer_start;
+    int eof;
+} PutByteContext;
+
+#define DEF(type, name, bytes, read, write)                                  \
+static av_always_inline type bytestream_get_ ## name(const uint8_t **b)        \
+{                                                                              \
+    (*b) += bytes;                                                             \
+    return read(*b - bytes);                                                   \
+}                                                                              \
+static av_always_inline void bytestream_put_ ## name(uint8_t **b,              \
+                                                     const type value)         \
+{                                                                              \
+    write(*b, value);                                                          \
+    (*b) += bytes;                                                             \
+}                                                                              \
+static av_always_inline void bytestream2_put_ ## name ## u(PutByteContext *p,  \
+                                                           const type value)   \
+{                                                                              \
+    bytestream_put_ ## name(&p->buffer, value);                                \
+}                                                                              \
+static av_always_inline void bytestream2_put_ ## name(PutByteContext *p,       \
+                                                      const type value)        \
+{                                                                              \
+    if (!p->eof && (p->buffer_end - p->buffer >= bytes)) {                     \
+        write(p->buffer, value);                                               \
+        p->buffer += bytes;                                                    \
+    } else                                                                     \
+        p->eof = 1;                                                            \
+}                                                                              \
+static av_always_inline type bytestream2_get_ ## name ## u(GetByteContext *g)  \
+{                                                                              \
+    return bytestream_get_ ## name(&g->buffer);                                \
+}                                                                              \
+static av_always_inline type bytestream2_get_ ## name(GetByteContext *g)       \
+{                                                                              \
+    if (g->buffer_end - g->buffer < bytes) {                                   \
+        g->buffer = g->buffer_end;                                             \
+        return 0;                                                              \
+    }                                                                          \
+    return bytestream2_get_ ## name ## u(g);                                   \
+}                                                                              \
+static av_always_inline type bytestream2_peek_ ## name ## u(GetByteContext *g) \
+{                                                                              \
+    return read(g->buffer);                                                    \
+}                                                                              \
+static av_always_inline type bytestream2_peek_ ## name(GetByteContext *g)      \
+{                                                                              \
+    if (g->buffer_end - g->buffer < bytes)                                     \
+        return 0;                                                              \
+    return bytestream2_peek_ ## name ## u(g);                                  \
+}
+
+DEF(uint64_t,     le64, 8, AV_RL64, AV_WL64)
+DEF(unsigned int, le32, 4, AV_RL32, AV_WL32)
+DEF(unsigned int, le24, 3, AV_RL24, AV_WL24)
+DEF(unsigned int, le16, 2, AV_RL16, AV_WL16)
+DEF(uint64_t,     be64, 8, AV_RB64, AV_WB64)
+DEF(unsigned int, be32, 4, AV_RB32, AV_WB32)
+DEF(unsigned int, be24, 3, AV_RB24, AV_WB24)
+DEF(unsigned int, be16, 2, AV_RB16, AV_WB16)
+DEF(unsigned int, byte, 1, AV_RB8 , AV_WB8)
+
+#if AV_HAVE_BIGENDIAN
+#   define bytestream2_get_ne16  bytestream2_get_be16
+#   define bytestream2_get_ne24  bytestream2_get_be24
+#   define bytestream2_get_ne32  bytestream2_get_be32
+#   define bytestream2_get_ne64  bytestream2_get_be64
+#   define bytestream2_get_ne16u bytestream2_get_be16u
+#   define bytestream2_get_ne24u bytestream2_get_be24u
+#   define bytestream2_get_ne32u bytestream2_get_be32u
+#   define bytestream2_get_ne64u bytestream2_get_be64u
+#   define bytestream2_put_ne16  bytestream2_put_be16
+#   define bytestream2_put_ne24  bytestream2_put_be24
+#   define bytestream2_put_ne32  bytestream2_put_be32
+#   define bytestream2_put_ne64  bytestream2_put_be64
+#   define bytestream2_peek_ne16 bytestream2_peek_be16
+#   define bytestream2_peek_ne24 bytestream2_peek_be24
+#   define bytestream2_peek_ne32 bytestream2_peek_be32
+#   define bytestream2_peek_ne64 bytestream2_peek_be64
+#else
+#   define bytestream2_get_ne16  bytestream2_get_le16
+#   define bytestream2_get_ne24  bytestream2_get_le24
+#   define bytestream2_get_ne32  bytestream2_get_le32
+#   define bytestream2_get_ne64  bytestream2_get_le64
+#   define bytestream2_get_ne16u bytestream2_get_le16u
+#   define bytestream2_get_ne24u bytestream2_get_le24u
+#   define bytestream2_get_ne32u bytestream2_get_le32u
+#   define bytestream2_get_ne64u bytestream2_get_le64u
+#   define bytestream2_put_ne16  bytestream2_put_le16
+#   define bytestream2_put_ne24  bytestream2_put_le24
+#   define bytestream2_put_ne32  bytestream2_put_le32
+#   define bytestream2_put_ne64  bytestream2_put_le64
+#   define bytestream2_peek_ne16 bytestream2_peek_le16
+#   define bytestream2_peek_ne24 bytestream2_peek_le24
+#   define bytestream2_peek_ne32 bytestream2_peek_le32
+#   define bytestream2_peek_ne64 bytestream2_peek_le64
+#endif
+
+static av_always_inline void bytestream2_init(GetByteContext *g,
+                                              const uint8_t *buf,
+                                              int buf_size)
+{
+    av_assert0(buf_size >= 0);
+    g->buffer       = buf;
+    g->buffer_start = buf;
+    g->buffer_end   = buf + buf_size;
+}
+
+static av_always_inline void bytestream2_init_writer(PutByteContext *p,
+                                                     uint8_t *buf,
+                                                     int buf_size)
+{
+    av_assert0(buf_size >= 0);
+    p->buffer       = buf;
+    p->buffer_start = buf;
+    p->buffer_end   = buf + buf_size;
+    p->eof          = 0;
+}
+
+static av_always_inline int bytestream2_get_bytes_left(GetByteContext *g)
+{
+    return g->buffer_end - g->buffer;
+}
+
+static av_always_inline int bytestream2_get_bytes_left_p(PutByteContext *p)
+{
+    return p->buffer_end - p->buffer;
+}
+
+static av_always_inline void bytestream2_skip(GetByteContext *g,
+                                              unsigned int size)
+{
+    g->buffer += FFMIN(g->buffer_end - g->buffer, size);
+}
+
+static av_always_inline void bytestream2_skipu(GetByteContext *g,
+                                               unsigned int size)
+{
+    g->buffer += size;
+}
+
+static av_always_inline void bytestream2_skip_p(PutByteContext *p,
+                                                unsigned int size)
+{
+    int size2;
+    if (p->eof)
+        return;
+    size2 = FFMIN(p->buffer_end - p->buffer, size);
+    if (size2 != size)
+        p->eof = 1;
+    p->buffer += size2;
+}
+
+static av_always_inline int bytestream2_tell(GetByteContext *g)
+{
+    return (int)(g->buffer - g->buffer_start);
+}
+
+static av_always_inline int bytestream2_tell_p(PutByteContext *p)
+{
+    return (int)(p->buffer - p->buffer_start);
+}
+
+static av_always_inline int bytestream2_size(GetByteContext *g)
+{
+    return (int)(g->buffer_end - g->buffer_start);
+}
+
+static av_always_inline int bytestream2_size_p(PutByteContext *p)
+{
+    return (int)(p->buffer_end - p->buffer_start);
+}
+
+static av_always_inline int bytestream2_seek(GetByteContext *g,
+                                             int offset,
+                                             int whence)
+{
+    switch (whence) {
+    case SEEK_CUR:
+        offset     = av_clip(offset, -(g->buffer - g->buffer_start),
+                             g->buffer_end - g->buffer);
+        g->buffer += offset;
+        break;
+    case SEEK_END:
+        offset    = av_clip(offset, -(g->buffer_end - g->buffer_start), 0);
+        g->buffer = g->buffer_end + offset;
+        break;
+    case SEEK_SET:
+        offset    = av_clip(offset, 0, g->buffer_end - g->buffer_start);
+        g->buffer = g->buffer_start + offset;
+        break;
+    default:
+        return AVERROR(EINVAL);
+    }
+    return bytestream2_tell(g);
+}
+
+static av_always_inline int bytestream2_seek_p(PutByteContext *p,
+                                               int offset,
+                                               int whence)
+{
+    p->eof = 0;
+    switch (whence) {
+    case SEEK_CUR:
+        if (p->buffer_end - p->buffer < offset)
+            p->eof = 1;
+        offset     = av_clip(offset, -(p->buffer - p->buffer_start),
+                             p->buffer_end - p->buffer);
+        p->buffer += offset;
+        break;
+    case SEEK_END:
+        if (offset > 0)
+            p->eof = 1;
+        offset    = av_clip(offset, -(p->buffer_end - p->buffer_start), 0);
+        p->buffer = p->buffer_end + offset;
+        break;
+    case SEEK_SET:
+        if (p->buffer_end - p->buffer_start < offset)
+            p->eof = 1;
+        offset    = av_clip(offset, 0, p->buffer_end - p->buffer_start);
+        p->buffer = p->buffer_start + offset;
+        break;
+    default:
+        return AVERROR(EINVAL);
+    }
+    return bytestream2_tell_p(p);
+}
+
+static av_always_inline unsigned int bytestream2_get_buffer(GetByteContext *g,
+                                                            uint8_t *dst,
+                                                            unsigned int size)
+{
+    int size2 = FFMIN(g->buffer_end - g->buffer, size);
+    memcpy(dst, g->buffer, size2);
+    g->buffer += size2;
+    return size2;
+}
+
+static av_always_inline unsigned int bytestream2_get_bufferu(GetByteContext *g,
+                                                             uint8_t *dst,
+                                                             unsigned int size)
+{
+    memcpy(dst, g->buffer, size);
+    g->buffer += size;
+    return size;
+}
+
+static av_always_inline unsigned int bytestream2_put_buffer(PutByteContext *p,
+                                                            const uint8_t *src,
+                                                            unsigned int size)
+{
+    int size2;
+    if (p->eof)
+        return 0;
+    size2 = FFMIN(p->buffer_end - p->buffer, size);
+    if (size2 != size)
+        p->eof = 1;
+    memcpy(p->buffer, src, size2);
+    p->buffer += size2;
+    return size2;
+}
+
+static av_always_inline unsigned int bytestream2_put_bufferu(PutByteContext *p,
+                                                             const uint8_t *src,
+                                                             unsigned int size)
+{
+    memcpy(p->buffer, src, size);
+    p->buffer += size;
+    return size;
+}
+
+static av_always_inline void bytestream2_set_buffer(PutByteContext *p,
+                                                    const uint8_t c,
+                                                    unsigned int size)
+{
+    int size2;
+    if (p->eof)
+        return;
+    size2 = FFMIN(p->buffer_end - p->buffer, size);
+    if (size2 != size)
+        p->eof = 1;
+    memset(p->buffer, c, size2);
+    p->buffer += size2;
+}
+
+static av_always_inline void bytestream2_set_bufferu(PutByteContext *p,
+                                                     const uint8_t c,
+                                                     unsigned int size)
+{
+    memset(p->buffer, c, size);
+    p->buffer += size;
+}
+
+static av_always_inline unsigned int bytestream2_get_eof(PutByteContext *p)
+{
+    return p->eof;
+}
+
+static av_always_inline unsigned int bytestream2_copy_bufferu(PutByteContext *p,
+                                                              GetByteContext *g,
+                                                              unsigned int size)
+{
+    memcpy(p->buffer, g->buffer, size);
+    p->buffer += size;
+    g->buffer += size;
+    return size;
+}
+
+static av_always_inline unsigned int bytestream2_copy_buffer(PutByteContext *p,
+                                                             GetByteContext *g,
+                                                             unsigned int size)
+{
+    int size2;
+
+    if (p->eof)
+        return 0;
+    size  = FFMIN(g->buffer_end - g->buffer, size);
+    size2 = FFMIN(p->buffer_end - p->buffer, size);
+    if (size2 != size)
+        p->eof = 1;
+
+    return bytestream2_copy_bufferu(p, g, size2);
+}
+
+static av_always_inline unsigned int bytestream_get_buffer(const uint8_t **b,
+                                                           uint8_t *dst,
+                                                           unsigned int size)
+{
+    memcpy(dst, *b, size);
+    (*b) += size;
+    return size;
+}
+
+static av_always_inline void bytestream_put_buffer(uint8_t **b,
+                                                   const uint8_t *src,
+                                                   unsigned int size)
+{
+    memcpy(*b, src, size);
+    (*b) += size;
+}
+
+#endif /* AVCODEC_BYTESTREAM_H */
diff --git a/media/ffvpx/libavcodec/cbs.c b/media/ffvpx/libavcodec/cbs.c
new file mode 100644
index 0000000000..504197e06d
--- /dev/null
+++ b/media/ffvpx/libavcodec/cbs.c
@@ -0,0 +1,1028 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+
+#include "config.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/buffer.h"
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "cbs.h"
+#include "cbs_internal.h"
+
+
+static const CodedBitstreamType *const cbs_type_table[] = {
+#if CONFIG_CBS_AV1
+    &ff_cbs_type_av1,
+#endif
+#if CONFIG_CBS_H264
+    &ff_cbs_type_h264,
+#endif
+#if CONFIG_CBS_H265
+    &ff_cbs_type_h265,
+#endif
+#if CONFIG_CBS_JPEG
+    &ff_cbs_type_jpeg,
+#endif
+#if CONFIG_CBS_MPEG2
+    &ff_cbs_type_mpeg2,
+#endif
+#if CONFIG_CBS_VP9
+    &ff_cbs_type_vp9,
+#endif
+};
+
+const enum AVCodecID ff_cbs_all_codec_ids[] = {
+#if CONFIG_CBS_AV1
+    AV_CODEC_ID_AV1,
+#endif
+#if CONFIG_CBS_H264
+    AV_CODEC_ID_H264,
+#endif
+#if CONFIG_CBS_H265
+    AV_CODEC_ID_H265,
+#endif
+#if CONFIG_CBS_JPEG
+    AV_CODEC_ID_MJPEG,
+#endif
+#if CONFIG_CBS_MPEG2
+    AV_CODEC_ID_MPEG2VIDEO,
+#endif
+#if CONFIG_CBS_VP9
+    AV_CODEC_ID_VP9,
+#endif
+    AV_CODEC_ID_NONE
+};
+
+av_cold int ff_cbs_init(CodedBitstreamContext **ctx_ptr,
+                        enum AVCodecID codec_id, void *log_ctx)
+{
+    CodedBitstreamContext *ctx;
+    const CodedBitstreamType *type;
+    int i;
+
+    type = NULL;
+    for (i = 0; i < FF_ARRAY_ELEMS(cbs_type_table); i++) {
+        if (cbs_type_table[i]->codec_id == codec_id) {
+            type = cbs_type_table[i];
+            break;
+        }
+    }
+    if (!type)
+        return AVERROR(EINVAL);
+
+    ctx = av_mallocz(sizeof(*ctx));
+    if (!ctx)
+        return AVERROR(ENOMEM);
+
+    ctx->log_ctx = log_ctx;
+    ctx->codec   = type; /* Must be before any error */
+
+    if (type->priv_data_size) {
+        ctx->priv_data = av_mallocz(ctx->codec->priv_data_size);
+        if (!ctx->priv_data) {
+            av_freep(&ctx);
+            return AVERROR(ENOMEM);
+        }
+        if (type->priv_class) {
+            *(const AVClass **)ctx->priv_data = type->priv_class;
+            av_opt_set_defaults(ctx->priv_data);
+        }
+    }
+
+    ctx->decompose_unit_types = NULL;
+
+    ctx->trace_enable = 0;
+    ctx->trace_level  = AV_LOG_TRACE;
+
+    *ctx_ptr = ctx;
+    return 0;
+}
+
+av_cold void ff_cbs_flush(CodedBitstreamContext *ctx)
+{
+    if (ctx->codec->flush)
+        ctx->codec->flush(ctx);
+}
+
+av_cold void ff_cbs_close(CodedBitstreamContext **ctx_ptr)
+{
+    CodedBitstreamContext *ctx = *ctx_ptr;
+
+    if (!ctx)
+        return;
+
+    if (ctx->codec->close)
+        ctx->codec->close(ctx);
+
+    av_freep(&ctx->write_buffer);
+
+    if (ctx->codec->priv_class && ctx->priv_data)
+        av_opt_free(ctx->priv_data);
+
+    av_freep(&ctx->priv_data);
+    av_freep(ctx_ptr);
+}
+
+static void cbs_unit_uninit(CodedBitstreamUnit *unit)
+{
+    av_buffer_unref(&unit->content_ref);
+    unit->content = NULL;
+
+    av_buffer_unref(&unit->data_ref);
+    unit->data             = NULL;
+    unit->data_size        = 0;
+    unit->data_bit_padding = 0;
+}
+
+void ff_cbs_fragment_reset(CodedBitstreamFragment *frag)
+{
+    int i;
+
+    for (i = 0; i < frag->nb_units; i++)
+        cbs_unit_uninit(&frag->units[i]);
+    frag->nb_units = 0;
+
+    av_buffer_unref(&frag->data_ref);
+    frag->data             = NULL;
+    frag->data_size        = 0;
+    frag->data_bit_padding = 0;
+}
+
+av_cold void ff_cbs_fragment_free(CodedBitstreamFragment *frag)
+{
+    ff_cbs_fragment_reset(frag);
+
+    av_freep(&frag->units);
+    frag->nb_units_allocated = 0;
+}
+
+static int cbs_read_fragment_content(CodedBitstreamContext *ctx,
+                                     CodedBitstreamFragment *frag)
+{
+    int err, i, j;
+
+    for (i = 0; i < frag->nb_units; i++) {
+        CodedBitstreamUnit *unit = &frag->units[i];
+
+        if (ctx->decompose_unit_types) {
+            for (j = 0; j < ctx->nb_decompose_unit_types; j++) {
+                if (ctx->decompose_unit_types[j] == unit->type)
+                    break;
+            }
+            if (j >= ctx->nb_decompose_unit_types)
+                continue;
+        }
+
+        av_buffer_unref(&unit->content_ref);
+        unit->content = NULL;
+
+        av_assert0(unit->data && unit->data_ref);
+
+        err = ctx->codec->read_unit(ctx, unit);
+        if (err == AVERROR(ENOSYS)) {
+            av_log(ctx->log_ctx, AV_LOG_VERBOSE,
+                   "Decomposition unimplemented for unit %d "
+                   "(type %"PRIu32").\n", i, unit->type);
+        } else if (err == AVERROR(EAGAIN)) {
+            av_log(ctx->log_ctx, AV_LOG_VERBOSE,
+                   "Skipping decomposition of unit %d "
+                   "(type %"PRIu32").\n", i, unit->type);
+            av_buffer_unref(&unit->content_ref);
+            unit->content = NULL;
+        } else if (err < 0) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to read unit %d "
+                   "(type %"PRIu32").\n", i, unit->type);
+            return err;
+        }
+    }
+
+    return 0;
+}
+
+static int cbs_fill_fragment_data(CodedBitstreamFragment *frag,
+                                  const uint8_t *data, size_t size)
+{
+    av_assert0(!frag->data && !frag->data_ref);
+
+    frag->data_ref =
+        av_buffer_alloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!frag->data_ref)
+        return AVERROR(ENOMEM);
+
+    frag->data      = frag->data_ref->data;
+    frag->data_size = size;
+
+    memcpy(frag->data, data, size);
+    memset(frag->data + size, 0,
+           AV_INPUT_BUFFER_PADDING_SIZE);
+
+    return 0;
+}
+
+static int cbs_read_data(CodedBitstreamContext *ctx,
+                         CodedBitstreamFragment *frag,
+                         AVBufferRef *buf,
+                         const uint8_t *data, size_t size,
+                         int header)
+{
+    int err;
+
+    if (buf) {
+        frag->data_ref = av_buffer_ref(buf);
+        if (!frag->data_ref)
+            return AVERROR(ENOMEM);
+
+        frag->data      = (uint8_t *)data;
+        frag->data_size = size;
+
+    } else {
+        err = cbs_fill_fragment_data(frag, data, size);
+        if (err < 0)
+            return err;
+    }
+
+    err = ctx->codec->split_fragment(ctx, frag, header);
+    if (err < 0)
+        return err;
+
+    return cbs_read_fragment_content(ctx, frag);
+}
+
+int ff_cbs_read_extradata(CodedBitstreamContext *ctx,
+                          CodedBitstreamFragment *frag,
+                          const AVCodecParameters *par)
+{
+    return cbs_read_data(ctx, frag, NULL,
+                         par->extradata,
+                         par->extradata_size, 1);
+}
+
+int ff_cbs_read_extradata_from_codec(CodedBitstreamContext *ctx,
+                                     CodedBitstreamFragment *frag,
+                                     const AVCodecContext *avctx)
+{
+    return cbs_read_data(ctx, frag, NULL,
+                         avctx->extradata,
+                         avctx->extradata_size, 1);
+}
+
+int ff_cbs_read_packet(CodedBitstreamContext *ctx,
+                       CodedBitstreamFragment *frag,
+                       const AVPacket *pkt)
+{
+    return cbs_read_data(ctx, frag, pkt->buf,
+                         pkt->data, pkt->size, 0);
+}
+
+int ff_cbs_read_packet_side_data(CodedBitstreamContext *ctx,
+                                 CodedBitstreamFragment *frag,
+                                 const AVPacket *pkt)
+{
+    size_t side_data_size;
+    const uint8_t *side_data =
+        av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
+                                &side_data_size);
+
+    return cbs_read_data(ctx, frag, NULL,
+                         side_data, side_data_size, 1);
+}
+
+int ff_cbs_read(CodedBitstreamContext *ctx,
+                CodedBitstreamFragment *frag,
+                const uint8_t *data, size_t size)
+{
+    return cbs_read_data(ctx, frag, NULL,
+                         data, size, 0);
+}
+
+/**
+ * Allocate a new internal data buffer of the given size in the unit.
+ *
+ * The data buffer will have input padding.
+ */
+static int cbs_alloc_unit_data(CodedBitstreamUnit *unit,
+                               size_t size)
+{
+    av_assert0(!unit->data && !unit->data_ref);
+
+    unit->data_ref = av_buffer_alloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!unit->data_ref)
+        return AVERROR(ENOMEM);
+
+    unit->data      = unit->data_ref->data;
+    unit->data_size = size;
+
+    memset(unit->data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    return 0;
+}
+
+static int cbs_write_unit_data(CodedBitstreamContext *ctx,
+                               CodedBitstreamUnit *unit)
+{
+    PutBitContext pbc;
+    int ret;
+
+    if (!ctx->write_buffer) {
+        // Initial write buffer size is 1MB.
+        ctx->write_buffer_size = 1024 * 1024;
+
+    reallocate_and_try_again:
+        ret = av_reallocp(&ctx->write_buffer, ctx->write_buffer_size);
+        if (ret < 0) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Unable to allocate a "
+                   "sufficiently large write buffer (last attempt "
+                   "%"SIZE_SPECIFIER" bytes).\n", ctx->write_buffer_size);
+            return ret;
+        }
+    }
+
+    init_put_bits(&pbc, ctx->write_buffer, ctx->write_buffer_size);
+
+    ret = ctx->codec->write_unit(ctx, unit, &pbc);
+    if (ret < 0) {
+        if (ret == AVERROR(ENOSPC)) {
+            // Overflow.
+            if (ctx->write_buffer_size == INT_MAX / 8)
+                return AVERROR(ENOMEM);
+            ctx->write_buffer_size = FFMIN(2 * ctx->write_buffer_size, INT_MAX / 8);
+            goto reallocate_and_try_again;
+        }
+        // Write failed for some other reason.
+        return ret;
+    }
+
+    // Overflow but we didn't notice.
+    av_assert0(put_bits_count(&pbc) <= 8 * ctx->write_buffer_size);
+
+    if (put_bits_count(&pbc) % 8)
+        unit->data_bit_padding = 8 - put_bits_count(&pbc) % 8;
+    else
+        unit->data_bit_padding = 0;
+
+    flush_put_bits(&pbc);
+
+    ret = cbs_alloc_unit_data(unit, put_bytes_output(&pbc));
+    if (ret < 0)
+        return ret;
+
+    memcpy(unit->data, ctx->write_buffer, unit->data_size);
+
+    return 0;
+}
+
+int ff_cbs_write_fragment_data(CodedBitstreamContext *ctx,
+                               CodedBitstreamFragment *frag)
+{
+    int err, i;
+
+    for (i = 0; i < frag->nb_units; i++) {
+        CodedBitstreamUnit *unit = &frag->units[i];
+
+        if (!unit->content)
+            continue;
+
+        av_buffer_unref(&unit->data_ref);
+        unit->data = NULL;
+
+        err = cbs_write_unit_data(ctx, unit);
+        if (err < 0) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to write unit %d "
+                   "(type %"PRIu32").\n", i, unit->type);
+            return err;
+        }
+        av_assert0(unit->data && unit->data_ref);
+    }
+
+    av_buffer_unref(&frag->data_ref);
+    frag->data = NULL;
+
+    err = ctx->codec->assemble_fragment(ctx, frag);
+    if (err < 0) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to assemble fragment.\n");
+        return err;
+    }
+    av_assert0(frag->data && frag->data_ref);
+
+    return 0;
+}
+
+int ff_cbs_write_extradata(CodedBitstreamContext *ctx,
+                           AVCodecParameters *par,
+                           CodedBitstreamFragment *frag)
+{
+    int err;
+
+    err = ff_cbs_write_fragment_data(ctx, frag);
+    if (err < 0)
+        return err;
+
+    av_freep(&par->extradata);
+    par->extradata_size = 0;
+
+    if (!frag->data_size)
+        return 0;
+
+    par->extradata = av_malloc(frag->data_size +
+                               AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!par->extradata)
+        return AVERROR(ENOMEM);
+
+    memcpy(par->extradata, frag->data, frag->data_size);
+    memset(par->extradata + frag->data_size, 0,
+           AV_INPUT_BUFFER_PADDING_SIZE);
+    par->extradata_size = frag->data_size;
+
+    return 0;
+}
+
+int ff_cbs_write_packet(CodedBitstreamContext *ctx,
+                        AVPacket *pkt,
+                        CodedBitstreamFragment *frag)
+{
+    AVBufferRef *buf;
+    int err;
+
+    err = ff_cbs_write_fragment_data(ctx, frag);
+    if (err < 0)
+        return err;
+
+    buf = av_buffer_ref(frag->data_ref);
+    if (!buf)
+        return AVERROR(ENOMEM);
+
+    av_buffer_unref(&pkt->buf);
+
+    pkt->buf  = buf;
+    pkt->data = frag->data;
+    pkt->size = frag->data_size;
+
+    return 0;
+}
+
+
+void ff_cbs_trace_header(CodedBitstreamContext *ctx,
+                         const char *name)
+{
+    if (!ctx->trace_enable)
+        return;
+
+    av_log(ctx->log_ctx, ctx->trace_level, "%s\n", name);
+}
+
+void ff_cbs_trace_syntax_element(CodedBitstreamContext *ctx, int position,
+                                 const char *str, const int *subscripts,
+                                 const char *bits, int64_t value)
+{
+    char name[256];
+    size_t name_len, bits_len;
+    int pad, subs, i, j, k, n;
+
+    if (!ctx->trace_enable)
+        return;
+
+    av_assert0(value >= INT_MIN && value <= UINT32_MAX);
+
+    subs = subscripts ? subscripts[0] : 0;
+    n = 0;
+    for (i = j = 0; str[i];) {
+        if (str[i] == '[') {
+            if (n < subs) {
+                ++n;
+                k = snprintf(name + j, sizeof(name) - j, "[%d", subscripts[n]);
+                av_assert0(k > 0 && j + k < sizeof(name));
+                j += k;
+                for (++i; str[i] && str[i] != ']'; i++);
+                av_assert0(str[i] == ']');
+            } else {
+                while (str[i] && str[i] != ']')
+                    name[j++] = str[i++];
+                av_assert0(str[i] == ']');
+            }
+        } else {
+            av_assert0(j + 1 < sizeof(name));
+            name[j++] = str[i++];
+        }
+    }
+    av_assert0(j + 1 < sizeof(name));
+    name[j] = 0;
+    av_assert0(n == subs);
+
+    name_len = strlen(name);
+    bits_len = strlen(bits);
+
+    if (name_len + bits_len > 60)
+        pad = bits_len + 2;
+    else
+        pad = 61 - name_len;
+
+    av_log(ctx->log_ctx, ctx->trace_level, "%-10d  %s%*s = %"PRId64"\n",
+           position, name, pad, bits, value);
+}
+
+int ff_cbs_read_unsigned(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                         int width, const char *name,
+                         const int *subscripts, uint32_t *write_to,
+                         uint32_t range_min, uint32_t range_max)
+{
+    uint32_t value;
+    int position;
+
+    av_assert0(width > 0 && width <= 32);
+
+    if (get_bits_left(gbc) < width) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid value at "
+               "%s: bitstream ended.\n", name);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    value = get_bits_long(gbc, width);
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < width; i++)
+            bits[i] = value >> (width - i - 1) & 1 ? '1' : '0';
+        bits[i] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, position, name, subscripts,
+                                    bits, value);
+    }
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [%"PRIu32",%"PRIu32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+int ff_cbs_write_unsigned(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                          int width, const char *name,
+                          const int *subscripts, uint32_t value,
+                          uint32_t range_min, uint32_t range_max)
+{
+    av_assert0(width > 0 && width <= 32);
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [%"PRIu32",%"PRIu32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (put_bits_left(pbc) < width)
+        return AVERROR(ENOSPC);
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < width; i++)
+            bits[i] = value >> (width - i - 1) & 1 ? '1' : '0';
+        bits[i] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc),
+                                    name, subscripts, bits, value);
+    }
+
+    if (width < 32)
+        put_bits(pbc, width, value);
+    else
+        put_bits32(pbc, value);
+
+    return 0;
+}
+
+int ff_cbs_read_signed(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                       int width, const char *name,
+                       const int *subscripts, int32_t *write_to,
+                       int32_t range_min, int32_t range_max)
+{
+    int32_t value;
+    int position;
+
+    av_assert0(width > 0 && width <= 32);
+
+    if (get_bits_left(gbc) < width) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid value at "
+               "%s: bitstream ended.\n", name);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    value = get_sbits_long(gbc, width);
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < width; i++)
+            bits[i] = value & (1U << (width - i - 1)) ? '1' : '0';
+        bits[i] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, position, name, subscripts,
+                                    bits, value);
+    }
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRId32", but must be in [%"PRId32",%"PRId32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+int ff_cbs_write_signed(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                        int width, const char *name,
+                        const int *subscripts, int32_t value,
+                        int32_t range_min, int32_t range_max)
+{
+    av_assert0(width > 0 && width <= 32);
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRId32", but must be in [%"PRId32",%"PRId32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (put_bits_left(pbc) < width)
+        return AVERROR(ENOSPC);
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < width; i++)
+            bits[i] = value & (1U << (width - i - 1)) ? '1' : '0';
+        bits[i] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc),
+                                    name, subscripts, bits, value);
+    }
+
+    if (width < 32)
+        put_sbits(pbc, width, value);
+    else
+        put_bits32(pbc, value);
+
+    return 0;
+}
+
+
+static int cbs_insert_unit(CodedBitstreamFragment *frag,
+                           int position)
+{
+    CodedBitstreamUnit *units;
+
+    if (frag->nb_units < frag->nb_units_allocated) {
+        units = frag->units;
+
+        if (position < frag->nb_units)
+            memmove(units + position + 1, units + position,
+                    (frag->nb_units - position) * sizeof(*units));
+    } else {
+        units = av_malloc_array(frag->nb_units*2 + 1, sizeof(*units));
+        if (!units)
+            return AVERROR(ENOMEM);
+
+        frag->nb_units_allocated = 2*frag->nb_units_allocated + 1;
+
+        if (position > 0)
+            memcpy(units, frag->units, position * sizeof(*units));
+
+        if (position < frag->nb_units)
+            memcpy(units + position + 1, frag->units + position,
+                   (frag->nb_units - position) * sizeof(*units));
+    }
+
+    memset(units + position, 0, sizeof(*units));
+
+    if (units != frag->units) {
+        av_free(frag->units);
+        frag->units = units;
+    }
+
+    ++frag->nb_units;
+
+    return 0;
+}
+
+int ff_cbs_insert_unit_content(CodedBitstreamFragment *frag,
+                               int position,
+                               CodedBitstreamUnitType type,
+                               void *content,
+                               AVBufferRef *content_buf)
+{
+    CodedBitstreamUnit *unit;
+    AVBufferRef *content_ref;
+    int err;
+
+    if (position == -1)
+        position = frag->nb_units;
+    av_assert0(position >= 0 && position <= frag->nb_units);
+
+    if (content_buf) {
+        content_ref = av_buffer_ref(content_buf);
+        if (!content_ref)
+            return AVERROR(ENOMEM);
+    } else {
+        content_ref = NULL;
+    }
+
+    err = cbs_insert_unit(frag, position);
+    if (err < 0) {
+        av_buffer_unref(&content_ref);
+        return err;
+    }
+
+    unit = &frag->units[position];
+    unit->type        = type;
+    unit->content     = content;
+    unit->content_ref = content_ref;
+
+    return 0;
+}
+
+static int cbs_insert_unit_data(CodedBitstreamFragment *frag,
+                                CodedBitstreamUnitType type,
+                                uint8_t *data, size_t data_size,
+                                AVBufferRef *data_buf,
+                                int position)
+{
+    CodedBitstreamUnit *unit;
+    AVBufferRef *data_ref;
+    int err;
+
+    av_assert0(position >= 0 && position <= frag->nb_units);
+
+    if (data_buf)
+        data_ref = av_buffer_ref(data_buf);
+    else
+        data_ref = av_buffer_create(data, data_size, NULL, NULL, 0);
+    if (!data_ref) {
+        if (!data_buf)
+            av_free(data);
+        return AVERROR(ENOMEM);
+    }
+
+    err = cbs_insert_unit(frag, position);
+    if (err < 0) {
+        av_buffer_unref(&data_ref);
+        return err;
+    }
+
+    unit = &frag->units[position];
+    unit->type      = type;
+    unit->data      = data;
+    unit->data_size = data_size;
+    unit->data_ref  = data_ref;
+
+    return 0;
+}
+
+int ff_cbs_append_unit_data(CodedBitstreamFragment *frag,
+                            CodedBitstreamUnitType type,
+                            uint8_t *data, size_t data_size,
+                            AVBufferRef *data_buf)
+{
+    return cbs_insert_unit_data(frag, type,
+                                data, data_size, data_buf,
+                                frag->nb_units);
+}
+
+void ff_cbs_delete_unit(CodedBitstreamFragment *frag,
+                        int position)
+{
+    av_assert0(0 <= position && position < frag->nb_units
+                             && "Unit to be deleted not in fragment.");
+
+    cbs_unit_uninit(&frag->units[position]);
+
+    --frag->nb_units;
+
+    if (frag->nb_units > 0)
+        memmove(frag->units + position,
+                frag->units + position + 1,
+                (frag->nb_units - position) * sizeof(*frag->units));
+}
+
+static void cbs_default_free_unit_content(void *opaque, uint8_t *data)
+{
+    const CodedBitstreamUnitTypeDescriptor *desc = opaque;
+
+    for (int i = 0; i < desc->type.ref.nb_offsets; i++) {
+        void **ptr = (void**)(data + desc->type.ref.offsets[i]);
+        av_buffer_unref((AVBufferRef**)(ptr + 1));
+    }
+    av_free(data);
+}
+
+static const CodedBitstreamUnitTypeDescriptor
+    *cbs_find_unit_type_desc(CodedBitstreamContext *ctx,
+                             CodedBitstreamUnit *unit)
+{
+    const CodedBitstreamUnitTypeDescriptor *desc;
+    int i, j;
+
+    if (!ctx->codec->unit_types)
+        return NULL;
+
+    for (i = 0;; i++) {
+        desc = &ctx->codec->unit_types[i];
+        if (desc->nb_unit_types == 0)
+            break;
+        if (desc->nb_unit_types == CBS_UNIT_TYPE_RANGE) {
+            if (unit->type >= desc->unit_type.range.start &&
+                unit->type <= desc->unit_type.range.end)
+                return desc;
+        } else {
+            for (j = 0; j < desc->nb_unit_types; j++) {
+                if (desc->unit_type.list[j] == unit->type)
+                    return desc;
+            }
+        }
+    }
+    return NULL;
+}
+
+int ff_cbs_alloc_unit_content(CodedBitstreamContext *ctx,
+                              CodedBitstreamUnit *unit)
+{
+    const CodedBitstreamUnitTypeDescriptor *desc;
+
+    av_assert0(!unit->content && !unit->content_ref);
+
+    desc = cbs_find_unit_type_desc(ctx, unit);
+    if (!desc)
+        return AVERROR(ENOSYS);
+
+    unit->content = av_mallocz(desc->content_size);
+    if (!unit->content)
+        return AVERROR(ENOMEM);
+
+    unit->content_ref =
+        av_buffer_create(unit->content, desc->content_size,
+                         desc->content_type == CBS_CONTENT_TYPE_COMPLEX
+                                            ? desc->type.complex.content_free
+                                            : cbs_default_free_unit_content,
+                         (void*)desc, 0);
+    if (!unit->content_ref) {
+        av_freep(&unit->content);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int cbs_clone_internal_refs_unit_content(AVBufferRef **clone_ref,
+                                                const CodedBitstreamUnit *unit,
+                                                const CodedBitstreamUnitTypeDescriptor *desc)
+{
+    const uint8_t *src;
+    uint8_t *copy;
+    int err, i;
+
+    av_assert0(unit->content);
+    src = unit->content;
+
+    copy = av_memdup(src, desc->content_size);
+    if (!copy)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < desc->type.ref.nb_offsets; i++) {
+        const uint8_t *const *src_ptr = (const uint8_t* const*)(src + desc->type.ref.offsets[i]);
+        const AVBufferRef *src_buf = *(AVBufferRef**)(src_ptr + 1);
+        uint8_t **copy_ptr = (uint8_t**)(copy + desc->type.ref.offsets[i]);
+        AVBufferRef **copy_buf = (AVBufferRef**)(copy_ptr + 1);
+
+        if (!*src_ptr) {
+            av_assert0(!src_buf);
+            continue;
+        }
+        if (!src_buf) {
+            // We can't handle a non-refcounted pointer here - we don't
+            // have enough information to handle whatever structure lies
+            // at the other end of it.
+            err = AVERROR(EINVAL);
+            goto fail;
+        }
+
+        *copy_buf = av_buffer_ref(src_buf);
+        if (!*copy_buf) {
+            err = AVERROR(ENOMEM);
+            goto fail;
+        }
+    }
+
+    *clone_ref = av_buffer_create(copy, desc->content_size,
+                                  cbs_default_free_unit_content,
+                                  (void*)desc, 0);
+    if (!*clone_ref) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    return 0;
+
+fail:
+    for (--i; i >= 0; i--)
+        av_buffer_unref((AVBufferRef**)(copy + desc->type.ref.offsets[i]));
+    av_freep(&copy);
+    *clone_ref = NULL;
+    return err;
+}
+
+/*
+ * On success, unit->content and unit->content_ref are updated with
+ * the new content; unit is untouched on failure.
+ * Any old content_ref is simply overwritten and not freed.
+ */
+static int cbs_clone_unit_content(CodedBitstreamContext *ctx,
+                                  CodedBitstreamUnit *unit)
+{
+    const CodedBitstreamUnitTypeDescriptor *desc;
+    AVBufferRef *ref;
+    int err;
+
+    desc = cbs_find_unit_type_desc(ctx, unit);
+    if (!desc)
+        return AVERROR(ENOSYS);
+
+    switch (desc->content_type) {
+    case CBS_CONTENT_TYPE_INTERNAL_REFS:
+        err = cbs_clone_internal_refs_unit_content(&ref, unit, desc);
+        break;
+
+    case CBS_CONTENT_TYPE_COMPLEX:
+        if (!desc->type.complex.content_clone)
+            return AVERROR_PATCHWELCOME;
+        err = desc->type.complex.content_clone(&ref, unit);
+        break;
+
+    default:
+        av_assert0(0 && "Invalid content type.");
+    }
+
+    if (err < 0)
+        return err;
+
+    unit->content_ref = ref;
+    unit->content     = ref->data;
+    return 0;
+}
+
+int ff_cbs_make_unit_refcounted(CodedBitstreamContext *ctx,
+                                CodedBitstreamUnit *unit)
+{
+    av_assert0(unit->content);
+    if (unit->content_ref)
+        return 0;
+    return cbs_clone_unit_content(ctx, unit);
+}
+
+int ff_cbs_make_unit_writable(CodedBitstreamContext *ctx,
+                              CodedBitstreamUnit *unit)
+{
+    AVBufferRef *ref = unit->content_ref;
+    int err;
+
+    av_assert0(unit->content);
+    if (ref && av_buffer_is_writable(ref))
+        return 0;
+
+    err = cbs_clone_unit_content(ctx, unit);
+    if (err < 0)
+        return err;
+    av_buffer_unref(&ref);
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/cbs.h b/media/ffvpx/libavcodec/cbs.h
new file mode 100644
index 0000000000..ee21623dac
--- /dev/null
+++ b/media/ffvpx/libavcodec/cbs.h
@@ -0,0 +1,436 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CBS_H
+#define AVCODEC_CBS_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/buffer.h"
+
+#include "codec_id.h"
+#include "codec_par.h"
+#include "packet.h"
+
+
+/*
+ * This defines a framework for converting between a coded bitstream
+ * and structures defining all individual syntax elements found in
+ * such a stream.
+ *
+ * Conversion in both directions is possible.  Given a coded bitstream
+ * (any meaningful fragment), it can be parsed and decomposed into
+ * syntax elements stored in a set of codec-specific structures.
+ * Similarly, given a set of those same codec-specific structures the
+ * syntax elements can be serialised and combined to create a coded
+ * bitstream.
+ */
+
+struct AVCodecContext;
+struct CodedBitstreamType;
+
+/**
+ * The codec-specific type of a bitstream unit.
+ *
+ * AV1: obu_type
+ * H.264 / AVC: nal_unit_type
+ * H.265 / HEVC: nal_unit_type
+ * JPEG: marker value (without 0xff prefix)
+ * MPEG-2: start code value (without prefix)
+ * VP9: unused, set to zero (every unit is a frame)
+ */
+typedef uint32_t CodedBitstreamUnitType;
+
+/**
+ * Coded bitstream unit structure.
+ *
+ * A bitstream unit the smallest element of a bitstream which
+ * is meaningful on its own.  For example, an H.264 NAL unit.
+ *
+ * See the codec-specific header for the meaning of this for any
+ * particular codec.
+ */
+typedef struct CodedBitstreamUnit {
+    /**
+     * Codec-specific type of this unit.
+     */
+    CodedBitstreamUnitType type;
+
+    /**
+     * Pointer to the directly-parsable bitstream form of this unit.
+     *
+     * May be NULL if the unit currently only exists in decomposed form.
+     */
+    uint8_t *data;
+    /**
+     * The number of bytes in the bitstream (including any padding bits
+     * in the final byte).
+     */
+    size_t   data_size;
+    /**
+     * The number of bits which should be ignored in the final byte.
+     *
+     * This supports non-byte-aligned bitstreams.
+     */
+    size_t   data_bit_padding;
+    /**
+     * A reference to the buffer containing data.
+     *
+     * Must be set if data is not NULL.
+     */
+    AVBufferRef *data_ref;
+
+    /**
+     * Pointer to the decomposed form of this unit.
+     *
+     * The type of this structure depends on both the codec and the
+     * type of this unit.  May be NULL if the unit only exists in
+     * bitstream form.
+     */
+    void *content;
+    /**
+     * If content is reference counted, a reference to the buffer containing
+     * content.  Null if content is not reference counted.
+     */
+    AVBufferRef *content_ref;
+} CodedBitstreamUnit;
+
+/**
+ * Coded bitstream fragment structure, combining one or more units.
+ *
+ * This is any sequence of units.  It need not form some greater whole,
+ * though in many cases it will.  For example, an H.264 access unit,
+ * which is composed of a sequence of H.264 NAL units.
+ */
+typedef struct CodedBitstreamFragment {
+    /**
+     * Pointer to the bitstream form of this fragment.
+     *
+     * May be NULL if the fragment only exists as component units.
+     */
+    uint8_t *data;
+    /**
+     * The number of bytes in the bitstream.
+     *
+     * The number of bytes in the bitstream (including any padding bits
+     * in the final byte).
+     */
+    size_t   data_size;
+    /**
+     * The number of bits which should be ignored in the final byte.
+     */
+    size_t data_bit_padding;
+    /**
+     * A reference to the buffer containing data.
+     *
+     * Must be set if data is not NULL.
+     */
+    AVBufferRef *data_ref;
+
+    /**
+     * Number of units in this fragment.
+     *
+     * This may be zero if the fragment only exists in bitstream form
+     * and has not been decomposed.
+     */
+    int              nb_units;
+
+    /**
+     * Number of allocated units.
+     *
+     * Must always be >= nb_units; designed for internal use by cbs.
+     */
+     int             nb_units_allocated;
+
+    /**
+     * Pointer to an array of units of length nb_units_allocated.
+     * Only the first nb_units are valid.
+     *
+     * Must be NULL if nb_units_allocated is zero.
+     */
+    CodedBitstreamUnit *units;
+} CodedBitstreamFragment;
+
+/**
+ * Context structure for coded bitstream operations.
+ */
+typedef struct CodedBitstreamContext {
+    /**
+     * Logging context to be passed to all av_log() calls associated
+     * with this context.
+     */
+    void *log_ctx;
+
+    /**
+     * Internal codec-specific hooks.
+     */
+    const struct CodedBitstreamType *codec;
+
+    /**
+     * Internal codec-specific data.
+     *
+     * This contains any information needed when reading/writing
+     * bitsteams which will not necessarily be present in a fragment.
+     * For example, for H.264 it contains all currently visible
+     * parameter sets - they are required to determine the bitstream
+     * syntax but need not be present in every access unit.
+     */
+    void *priv_data;
+
+    /**
+     * Array of unit types which should be decomposed when reading.
+     *
+     * Types not in this list will be available in bitstream form only.
+     * If NULL, all supported types will be decomposed.
+     */
+    const CodedBitstreamUnitType *decompose_unit_types;
+    /**
+     * Length of the decompose_unit_types array.
+     */
+    int nb_decompose_unit_types;
+
+    /**
+     * Enable trace output during read/write operations.
+     */
+    int trace_enable;
+    /**
+     * Log level to use for trace output.
+     *
+     * From AV_LOG_*; defaults to AV_LOG_TRACE.
+     */
+    int trace_level;
+
+    /**
+     * Write buffer. Used as intermediate buffer when writing units.
+     * For internal use of cbs only.
+     */
+    uint8_t *write_buffer;
+    size_t   write_buffer_size;
+} CodedBitstreamContext;
+
+
+/**
+ * Table of all supported codec IDs.
+ *
+ * Terminated by AV_CODEC_ID_NONE.
+ */
+extern const enum AVCodecID ff_cbs_all_codec_ids[];
+
+
+/**
+ * Create and initialise a new context for the given codec.
+ */
+int ff_cbs_init(CodedBitstreamContext **ctx,
+                enum AVCodecID codec_id, void *log_ctx);
+
+/**
+ * Reset all internal state in a context.
+ */
+void ff_cbs_flush(CodedBitstreamContext *ctx);
+
+/**
+ * Close a context and free all internal state.
+ */
+void ff_cbs_close(CodedBitstreamContext **ctx);
+
+
+/**
+ * Read the extradata bitstream found in codec parameters into a
+ * fragment, then split into units and decompose.
+ *
+ * This also updates the internal state, so will need to be called for
+ * codecs with extradata to read parameter sets necessary for further
+ * parsing even if the fragment itself is not desired.
+ *
+ * The fragment must have been zeroed or reset via ff_cbs_fragment_reset
+ * before use.
+ */
+int ff_cbs_read_extradata(CodedBitstreamContext *ctx,
+                          CodedBitstreamFragment *frag,
+                          const AVCodecParameters *par);
+
+/**
+ * Read the extradata bitstream found in a codec context into a
+ * fragment, then split into units and decompose.
+ *
+ * This acts identical to ff_cbs_read_extradata() for the case where
+ * you already have a codec context.
+ */
+int ff_cbs_read_extradata_from_codec(CodedBitstreamContext *ctx,
+                                     CodedBitstreamFragment *frag,
+                                     const struct AVCodecContext *avctx);
+
+int ff_cbs_read_packet_side_data(CodedBitstreamContext *ctx,
+                                 CodedBitstreamFragment *frag,
+                                 const AVPacket *pkt);
+
+/**
+ * Read the data bitstream from a packet into a fragment, then
+ * split into units and decompose.
+ *
+ * This also updates the internal state of the coded bitstream context
+ * with any persistent data from the fragment which may be required to
+ * read following fragments (e.g. parameter sets).
+ *
+ * The fragment must have been zeroed or reset via ff_cbs_fragment_reset
+ * before use.
+ */
+int ff_cbs_read_packet(CodedBitstreamContext *ctx,
+                       CodedBitstreamFragment *frag,
+                       const AVPacket *pkt);
+
+/**
+ * Read a bitstream from a memory region into a fragment, then
+ * split into units and decompose.
+ *
+ * This also updates the internal state of the coded bitstream context
+ * with any persistent data from the fragment which may be required to
+ * read following fragments (e.g. parameter sets).
+ *
+ * The fragment must have been zeroed or reset via ff_cbs_fragment_reset
+ * before use.
+ */
+int ff_cbs_read(CodedBitstreamContext *ctx,
+                CodedBitstreamFragment *frag,
+                const uint8_t *data, size_t size);
+
+
+/**
+ * Write the content of the fragment to its own internal buffer.
+ *
+ * Writes the content of all units and then assembles them into a new
+ * data buffer.  When modifying the content of decomposed units, this
+ * can be used to regenerate the bitstream form of units or the whole
+ * fragment so that it can be extracted for other use.
+ *
+ * This also updates the internal state of the coded bitstream context
+ * with any persistent data from the fragment which may be required to
+ * write following fragments (e.g. parameter sets).
+ */
+int ff_cbs_write_fragment_data(CodedBitstreamContext *ctx,
+                               CodedBitstreamFragment *frag);
+
+/**
+ * Write the bitstream of a fragment to the extradata in codec parameters.
+ *
+ * Modifies context and fragment as ff_cbs_write_fragment_data does and
+ * replaces any existing extradata in the structure.
+ */
+int ff_cbs_write_extradata(CodedBitstreamContext *ctx,
+                           AVCodecParameters *par,
+                           CodedBitstreamFragment *frag);
+
+/**
+ * Write the bitstream of a fragment to a packet.
+ *
+ * Modifies context and fragment as ff_cbs_write_fragment_data does.
+ *
+ * On success, the packet's buf is unreferenced and its buf, data and
+ * size fields are set to the corresponding values from the newly updated
+ * fragment; other fields are not touched.  On failure, the packet is not
+ * touched at all.
+ */
+int ff_cbs_write_packet(CodedBitstreamContext *ctx,
+                        AVPacket *pkt,
+                        CodedBitstreamFragment *frag);
+
+
+/**
+ * Free the units contained in a fragment as well as the fragment's
+ * own data buffer, but not the units array itself.
+ */
+void ff_cbs_fragment_reset(CodedBitstreamFragment *frag);
+
+/**
+ * Free the units array of a fragment in addition to what
+ * ff_cbs_fragment_reset does.
+ */
+void ff_cbs_fragment_free(CodedBitstreamFragment *frag);
+
+/**
+ * Allocate a new internal content buffer matching the type of the unit.
+ *
+ * The content will be zeroed.
+ */
+int ff_cbs_alloc_unit_content(CodedBitstreamContext *ctx,
+                              CodedBitstreamUnit *unit);
+
+/**
+ * Insert a new unit into a fragment with the given content.
+ *
+ * The content structure continues to be owned by the caller if
+ * content_buf is not supplied.
+ */
+int ff_cbs_insert_unit_content(CodedBitstreamFragment *frag,
+                               int position,
+                               CodedBitstreamUnitType type,
+                               void *content,
+                               AVBufferRef *content_buf);
+
+/**
+ * Add a new unit to a fragment with the given data bitstream.
+ *
+ * If data_buf is not supplied then data must have been allocated with
+ * av_malloc() and will on success become owned by the unit after this
+ * call or freed on error.
+ */
+int ff_cbs_append_unit_data(CodedBitstreamFragment *frag,
+                            CodedBitstreamUnitType type,
+                            uint8_t *data, size_t data_size,
+                            AVBufferRef *data_buf);
+
+/**
+ * Delete a unit from a fragment and free all memory it uses.
+ *
+ * Requires position to be >= 0 and < frag->nb_units.
+ */
+void ff_cbs_delete_unit(CodedBitstreamFragment *frag,
+                        int position);
+
+
+/**
+ * Make the content of a unit refcounted.
+ *
+ * If the unit is not refcounted, this will do a deep copy of the unit
+ * content to new refcounted buffers.
+ *
+ * It is not valid to call this function on a unit which does not have
+ * decomposed content.
+ */
+int ff_cbs_make_unit_refcounted(CodedBitstreamContext *ctx,
+                                CodedBitstreamUnit *unit);
+
+/**
+ * Make the content of a unit writable so that internal fields can be
+ * modified.
+ *
+ * If it is known that there are no other references to the content of
+ * the unit, does nothing and returns success.  Otherwise (including the
+ * case where the unit content is not refcounted), it does a full clone
+ * of the content (including any internal buffers) to make a new copy,
+ * and replaces the existing references inside the unit with that.
+ *
+ * It is not valid to call this function on a unit which does not have
+ * decomposed content.
+ */
+int ff_cbs_make_unit_writable(CodedBitstreamContext *ctx,
+                              CodedBitstreamUnit *unit);
+
+
+#endif /* AVCODEC_CBS_H */
diff --git a/media/ffvpx/libavcodec/cbs_av1.c b/media/ffvpx/libavcodec/cbs_av1.c
new file mode 100644
index 0000000000..45e1288a51
--- /dev/null
+++ b/media/ffvpx/libavcodec/cbs_av1.c
@@ -0,0 +1,1366 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixfmt.h"
+
+#include "avcodec.h"
+#include "cbs.h"
+#include "cbs_internal.h"
+#include "cbs_av1.h"
+
+
+static int cbs_av1_read_uvlc(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                             const char *name, uint32_t *write_to,
+                             uint32_t range_min, uint32_t range_max)
+{
+    uint32_t zeroes, bits_value, value;
+    int position;
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    zeroes = 0;
+    while (1) {
+        if (get_bits_left(gbc) < 1) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid uvlc code at "
+                   "%s: bitstream ended.\n", name);
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (get_bits1(gbc))
+            break;
+        ++zeroes;
+    }
+
+    if (zeroes >= 32) {
+        value = MAX_UINT_BITS(32);
+    } else {
+        if (get_bits_left(gbc) < zeroes) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid uvlc code at "
+                   "%s: bitstream ended.\n", name);
+            return AVERROR_INVALIDDATA;
+        }
+
+        bits_value = get_bits_long(gbc, zeroes);
+        value = bits_value + (UINT32_C(1) << zeroes) - 1;
+    }
+
+    if (ctx->trace_enable) {
+        char bits[65];
+        int i, j, k;
+
+        if (zeroes >= 32) {
+            while (zeroes > 32) {
+                k = FFMIN(zeroes - 32, 32);
+                for (i = 0; i < k; i++)
+                    bits[i] = '0';
+                bits[i] = 0;
+                ff_cbs_trace_syntax_element(ctx, position, name,
+                                            NULL, bits, 0);
+                zeroes -= k;
+                position += k;
+            }
+        }
+
+        for (i = 0; i < zeroes; i++)
+            bits[i] = '0';
+        bits[i++] = '1';
+
+        if (zeroes < 32) {
+            for (j = 0; j < zeroes; j++)
+                bits[i++] = (bits_value >> (zeroes - j - 1) & 1) ? '1' : '0';
+        }
+
+        bits[i] = 0;
+        ff_cbs_trace_syntax_element(ctx, position, name,
+                                    NULL, bits, value);
+    }
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [%"PRIu32",%"PRIu32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_av1_write_uvlc(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                              const char *name, uint32_t value,
+                              uint32_t range_min, uint32_t range_max)
+{
+    uint32_t v;
+    int position, zeroes;
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [%"PRIu32",%"PRIu32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (ctx->trace_enable)
+        position = put_bits_count(pbc);
+
+    zeroes = av_log2(value + 1);
+    v = value - (1U << zeroes) + 1;
+    put_bits(pbc, zeroes, 0);
+    put_bits(pbc, 1, 1);
+    put_bits(pbc, zeroes, v);
+
+    if (ctx->trace_enable) {
+        char bits[65];
+        int i, j;
+        i = 0;
+        for (j = 0; j < zeroes; j++)
+            bits[i++] = '0';
+        bits[i++] = '1';
+        for (j = 0; j < zeroes; j++)
+            bits[i++] = (v >> (zeroes - j - 1) & 1) ? '1' : '0';
+        bits[i++] = 0;
+        ff_cbs_trace_syntax_element(ctx, position, name, NULL,
+                                    bits, value);
+    }
+
+    return 0;
+}
+
+static int cbs_av1_read_leb128(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                               const char *name, uint64_t *write_to)
+{
+    uint64_t value;
+    int position, err, i;
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    value = 0;
+    for (i = 0; i < 8; i++) {
+        int subscript[2] = { 1, i };
+        uint32_t byte;
+        err = ff_cbs_read_unsigned(ctx, gbc, 8, "leb128_byte[i]", subscript,
+                                   &byte, 0x00, 0xff);
+        if (err < 0)
+            return err;
+
+        value |= (uint64_t)(byte & 0x7f) << (i * 7);
+        if (!(byte & 0x80))
+            break;
+    }
+
+    if (value > UINT32_MAX)
+        return AVERROR_INVALIDDATA;
+
+    if (ctx->trace_enable)
+        ff_cbs_trace_syntax_element(ctx, position, name, NULL, "", value);
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_av1_write_leb128(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                                const char *name, uint64_t value)
+{
+    int position, err, len, i;
+    uint8_t byte;
+
+    len = (av_log2(value) + 7) / 7;
+
+    if (ctx->trace_enable)
+        position = put_bits_count(pbc);
+
+    for (i = 0; i < len; i++) {
+        int subscript[2] = { 1, i };
+
+        byte = value >> (7 * i) & 0x7f;
+        if (i < len - 1)
+            byte |= 0x80;
+
+        err = ff_cbs_write_unsigned(ctx, pbc, 8, "leb128_byte[i]", subscript,
+                                    byte, 0x00, 0xff);
+        if (err < 0)
+            return err;
+    }
+
+    if (ctx->trace_enable)
+        ff_cbs_trace_syntax_element(ctx, position, name, NULL, "", value);
+
+    return 0;
+}
+
+static int cbs_av1_read_ns(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                           uint32_t n, const char *name,
+                           const int *subscripts, uint32_t *write_to)
+{
+    uint32_t m, v, extra_bit, value;
+    int position, w;
+
+    av_assert0(n > 0);
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    w = av_log2(n) + 1;
+    m = (1 << w) - n;
+
+    if (get_bits_left(gbc) < w) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid non-symmetric value at "
+               "%s: bitstream ended.\n", name);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (w - 1 > 0)
+        v = get_bits(gbc, w - 1);
+    else
+        v = 0;
+
+    if (v < m) {
+        value = v;
+    } else {
+        extra_bit = get_bits1(gbc);
+        value = (v << 1) - m + extra_bit;
+    }
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < w - 1; i++)
+            bits[i] = (v >> i & 1) ? '1' : '0';
+        if (v >= m)
+            bits[i++] = extra_bit ? '1' : '0';
+        bits[i] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, position,
+                                    name, subscripts, bits, value);
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_av1_write_ns(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                            uint32_t n, const char *name,
+                            const int *subscripts, uint32_t value)
+{
+    uint32_t w, m, v, extra_bit;
+    int position;
+
+    if (value > n) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [0,%"PRIu32"].\n",
+               name, value, n);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (ctx->trace_enable)
+        position = put_bits_count(pbc);
+
+    w = av_log2(n) + 1;
+    m = (1 << w) - n;
+
+    if (put_bits_left(pbc) < w)
+        return AVERROR(ENOSPC);
+
+    if (value < m) {
+        v = value;
+        put_bits(pbc, w - 1, v);
+    } else {
+        v = m + ((value - m) >> 1);
+        extra_bit = (value - m) & 1;
+        put_bits(pbc, w - 1, v);
+        put_bits(pbc, 1, extra_bit);
+    }
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < w - 1; i++)
+            bits[i] = (v >> i & 1) ? '1' : '0';
+        if (value >= m)
+            bits[i++] = extra_bit ? '1' : '0';
+        bits[i] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, position,
+                                    name, subscripts, bits, value);
+    }
+
+    return 0;
+}
+
+static int cbs_av1_read_increment(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                                  uint32_t range_min, uint32_t range_max,
+                                  const char *name, uint32_t *write_to)
+{
+    uint32_t value;
+    int position, i;
+    char bits[33];
+
+    av_assert0(range_min <= range_max && range_max - range_min < sizeof(bits) - 1);
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    for (i = 0, value = range_min; value < range_max;) {
+        if (get_bits_left(gbc) < 1) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid increment value at "
+                   "%s: bitstream ended.\n", name);
+            return AVERROR_INVALIDDATA;
+        }
+        if (get_bits1(gbc)) {
+            bits[i++] = '1';
+            ++value;
+        } else {
+            bits[i++] = '0';
+            break;
+        }
+    }
+
+    if (ctx->trace_enable) {
+        bits[i] = 0;
+        ff_cbs_trace_syntax_element(ctx, position,
+                                    name, NULL, bits, value);
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_av1_write_increment(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                                   uint32_t range_min, uint32_t range_max,
+                                   const char *name, uint32_t value)
+{
+    int len;
+
+    av_assert0(range_min <= range_max && range_max - range_min < 32);
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [%"PRIu32",%"PRIu32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (value == range_max)
+        len = range_max - range_min;
+    else
+        len = value - range_min + 1;
+    if (put_bits_left(pbc) < len)
+        return AVERROR(ENOSPC);
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < len; i++) {
+            if (range_min + i == value)
+                bits[i] = '0';
+            else
+                bits[i] = '1';
+        }
+        bits[i] = 0;
+        ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc),
+                                    name, NULL, bits, value);
+    }
+
+    if (len > 0)
+        put_bits(pbc, len, (1 << len) - 1 - (value != range_max));
+
+    return 0;
+}
+
+static int cbs_av1_read_subexp(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                               uint32_t range_max, const char *name,
+                               const int *subscripts, uint32_t *write_to)
+{
+    uint32_t value;
+    int position, err;
+    uint32_t max_len, len, range_offset, range_bits;
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    av_assert0(range_max > 0);
+    max_len = av_log2(range_max - 1) - 3;
+
+    err = cbs_av1_read_increment(ctx, gbc, 0, max_len,
+                                 "subexp_more_bits", &len);
+    if (err < 0)
+        return err;
+
+    if (len) {
+        range_bits   = 2 + len;
+        range_offset = 1 << range_bits;
+    } else {
+        range_bits   = 3;
+        range_offset = 0;
+    }
+
+    if (len < max_len) {
+        err = ff_cbs_read_unsigned(ctx, gbc, range_bits,
+                                   "subexp_bits", NULL, &value,
+                                   0, MAX_UINT_BITS(range_bits));
+        if (err < 0)
+            return err;
+
+    } else {
+        err = cbs_av1_read_ns(ctx, gbc, range_max - range_offset,
+                              "subexp_final_bits", NULL, &value);
+        if (err < 0)
+            return err;
+    }
+    value += range_offset;
+
+    if (ctx->trace_enable)
+        ff_cbs_trace_syntax_element(ctx, position,
+                                    name, subscripts, "", value);
+
+    *write_to = value;
+    return err;
+}
+
+static int cbs_av1_write_subexp(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                                uint32_t range_max, const char *name,
+                                const int *subscripts, uint32_t value)
+{
+    int position, err;
+    uint32_t max_len, len, range_offset, range_bits;
+
+    if (value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [0,%"PRIu32"].\n",
+               name, value, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (ctx->trace_enable)
+        position = put_bits_count(pbc);
+
+    av_assert0(range_max > 0);
+    max_len = av_log2(range_max - 1) - 3;
+
+    if (value < 8) {
+        range_bits   = 3;
+        range_offset = 0;
+        len = 0;
+    } else {
+        range_bits = av_log2(value);
+        len = range_bits - 2;
+        if (len > max_len) {
+            // The top bin is combined with the one below it.
+            av_assert0(len == max_len + 1);
+            --range_bits;
+            len = max_len;
+        }
+        range_offset = 1 << range_bits;
+    }
+
+    err = cbs_av1_write_increment(ctx, pbc, 0, max_len,
+                                  "subexp_more_bits", len);
+    if (err < 0)
+        return err;
+
+    if (len < max_len) {
+        err = ff_cbs_write_unsigned(ctx, pbc, range_bits,
+                                    "subexp_bits", NULL,
+                                    value - range_offset,
+                                    0, MAX_UINT_BITS(range_bits));
+        if (err < 0)
+            return err;
+
+    } else {
+        err = cbs_av1_write_ns(ctx, pbc, range_max - range_offset,
+                               "subexp_final_bits", NULL,
+                               value - range_offset);
+        if (err < 0)
+            return err;
+    }
+
+    if (ctx->trace_enable)
+        ff_cbs_trace_syntax_element(ctx, position,
+                                    name, subscripts, "", value);
+
+    return err;
+}
+
+
+static int cbs_av1_tile_log2(int blksize, int target)
+{
+    int k;
+    for (k = 0; (blksize << k) < target; k++);
+    return k;
+}
+
+static int cbs_av1_get_relative_dist(const AV1RawSequenceHeader *seq,
+                                     unsigned int a, unsigned int b)
+{
+    unsigned int diff, m;
+    if (!seq->enable_order_hint)
+        return 0;
+    diff = a - b;
+    m = 1 << seq->order_hint_bits_minus_1;
+    diff = (diff & (m - 1)) - (diff & m);
+    return diff;
+}
+
+static size_t cbs_av1_get_payload_bytes_left(GetBitContext *gbc)
+{
+    GetBitContext tmp = *gbc;
+    size_t size = 0;
+    for (int i = 0; get_bits_left(&tmp) >= 8; i++) {
+        if (get_bits(&tmp, 8))
+            size = i;
+    }
+    return size;
+}
+
+
+#define HEADER(name) do { \
+        ff_cbs_trace_header(ctx, name); \
+    } while (0)
+
+#define CHECK(call) do { \
+        err = (call); \
+        if (err < 0) \
+            return err; \
+    } while (0)
+
+#define FUNC_NAME(rw, codec, name) cbs_ ## codec ## _ ## rw ## _ ## name
+#define FUNC_AV1(rw, name) FUNC_NAME(rw, av1, name)
+#define FUNC(name) FUNC_AV1(READWRITE, name)
+
+#define SUBSCRIPTS(subs, ...) (subs > 0 ? ((int[subs + 1]){ subs, __VA_ARGS__ }) : NULL)
+
+#define fb(width, name) \
+        xf(width, name, current->name, 0, MAX_UINT_BITS(width), 0, )
+#define fc(width, name, range_min, range_max) \
+        xf(width, name, current->name, range_min, range_max, 0, )
+#define flag(name) fb(1, name)
+#define su(width, name) \
+        xsu(width, name, current->name, 0, )
+
+#define fbs(width, name, subs, ...) \
+        xf(width, name, current->name, 0, MAX_UINT_BITS(width), subs, __VA_ARGS__)
+#define fcs(width, name, range_min, range_max, subs, ...) \
+        xf(width, name, current->name, range_min, range_max, subs, __VA_ARGS__)
+#define flags(name, subs, ...) \
+        xf(1, name, current->name, 0, 1, subs, __VA_ARGS__)
+#define sus(width, name, subs, ...) \
+        xsu(width, name, current->name, subs, __VA_ARGS__)
+
+#define fixed(width, name, value) do { \
+        av_unused uint32_t fixed_value = value; \
+        xf(width, name, fixed_value, value, value, 0, ); \
+    } while (0)
+
+
+#define READ
+#define READWRITE read
+#define RWContext GetBitContext
+
+#define xf(width, name, var, range_min, range_max, subs, ...) do { \
+        uint32_t value; \
+        CHECK(ff_cbs_read_unsigned(ctx, rw, width, #name, \
+                                   SUBSCRIPTS(subs, __VA_ARGS__), \
+                                   &value, range_min, range_max)); \
+        var = value; \
+    } while (0)
+
+#define xsu(width, name, var, subs, ...) do { \
+        int32_t value; \
+        CHECK(ff_cbs_read_signed(ctx, rw, width, #name, \
+                                 SUBSCRIPTS(subs, __VA_ARGS__), &value, \
+                                 MIN_INT_BITS(width), \
+                                 MAX_INT_BITS(width))); \
+        var = value; \
+    } while (0)
+
+#define uvlc(name, range_min, range_max) do { \
+        uint32_t value; \
+        CHECK(cbs_av1_read_uvlc(ctx, rw, #name, \
+                                &value, range_min, range_max)); \
+        current->name = value; \
+    } while (0)
+
+#define ns(max_value, name, subs, ...) do { \
+        uint32_t value; \
+        CHECK(cbs_av1_read_ns(ctx, rw, max_value, #name, \
+                              SUBSCRIPTS(subs, __VA_ARGS__), &value)); \
+        current->name = value; \
+    } while (0)
+
+#define increment(name, min, max) do { \
+        uint32_t value; \
+        CHECK(cbs_av1_read_increment(ctx, rw, min, max, #name, &value)); \
+        current->name = value; \
+    } while (0)
+
+#define subexp(name, max, subs, ...) do { \
+        uint32_t value; \
+        CHECK(cbs_av1_read_subexp(ctx, rw, max, #name, \
+                                  SUBSCRIPTS(subs, __VA_ARGS__), &value)); \
+        current->name = value; \
+    } while (0)
+
+#define delta_q(name) do { \
+        uint8_t delta_coded; \
+        int8_t delta_q; \
+        xf(1, name.delta_coded, delta_coded, 0, 1, 0, ); \
+        if (delta_coded) \
+            xsu(1 + 6, name.delta_q, delta_q, 0, ); \
+        else \
+            delta_q = 0; \
+        current->name = delta_q; \
+    } while (0)
+
+#define leb128(name) do { \
+        uint64_t value; \
+        CHECK(cbs_av1_read_leb128(ctx, rw, #name, &value)); \
+        current->name = value; \
+    } while (0)
+
+#define infer(name, value) do { \
+        current->name = value; \
+    } while (0)
+
+#define byte_alignment(rw) (get_bits_count(rw) % 8)
+
+#include "cbs_av1_syntax_template.c"
+
+#undef READ
+#undef READWRITE
+#undef RWContext
+#undef xf
+#undef xsu
+#undef uvlc
+#undef ns
+#undef increment
+#undef subexp
+#undef delta_q
+#undef leb128
+#undef infer
+#undef byte_alignment
+
+
+#define WRITE
+#define READWRITE write
+#define RWContext PutBitContext
+
+#define xf(width, name, var, range_min, range_max, subs, ...) do { \
+        CHECK(ff_cbs_write_unsigned(ctx, rw, width, #name, \
+                                    SUBSCRIPTS(subs, __VA_ARGS__), \
+                                    var, range_min, range_max)); \
+    } while (0)
+
+#define xsu(width, name, var, subs, ...) do { \
+        CHECK(ff_cbs_write_signed(ctx, rw, width, #name, \
+                                  SUBSCRIPTS(subs, __VA_ARGS__), var, \
+                                  MIN_INT_BITS(width), \
+                                  MAX_INT_BITS(width))); \
+    } while (0)
+
+#define uvlc(name, range_min, range_max) do { \
+        CHECK(cbs_av1_write_uvlc(ctx, rw, #name, current->name, \
+                                 range_min, range_max)); \
+    } while (0)
+
+#define ns(max_value, name, subs, ...) do { \
+        CHECK(cbs_av1_write_ns(ctx, rw, max_value, #name, \
+                               SUBSCRIPTS(subs, __VA_ARGS__), \
+                               current->name)); \
+    } while (0)
+
+#define increment(name, min, max) do { \
+        CHECK(cbs_av1_write_increment(ctx, rw, min, max, #name, \
+                                      current->name)); \
+    } while (0)
+
+#define subexp(name, max, subs, ...) do { \
+        CHECK(cbs_av1_write_subexp(ctx, rw, max, #name, \
+                                   SUBSCRIPTS(subs, __VA_ARGS__), \
+                                   current->name)); \
+    } while (0)
+
+#define delta_q(name) do { \
+        xf(1, name.delta_coded, current->name != 0, 0, 1, 0, ); \
+        if (current->name) \
+            xsu(1 + 6, name.delta_q, current->name, 0, ); \
+    } while (0)
+
+#define leb128(name) do { \
+        CHECK(cbs_av1_write_leb128(ctx, rw, #name, current->name)); \
+    } while (0)
+
+#define infer(name, value) do { \
+        if (current->name != (value)) { \
+            av_log(ctx->log_ctx, AV_LOG_ERROR, \
+                   "%s does not match inferred value: " \
+                   "%"PRId64", but should be %"PRId64".\n", \
+                   #name, (int64_t)current->name, (int64_t)(value)); \
+            return AVERROR_INVALIDDATA; \
+        } \
+    } while (0)
+
+#define byte_alignment(rw) (put_bits_count(rw) % 8)
+
+#include "cbs_av1_syntax_template.c"
+
+#undef WRITE
+#undef READWRITE
+#undef RWContext
+#undef xf
+#undef xsu
+#undef uvlc
+#undef ns
+#undef increment
+#undef subexp
+#undef delta_q
+#undef leb128
+#undef infer
+#undef byte_alignment
+
+
+static int cbs_av1_split_fragment(CodedBitstreamContext *ctx,
+                                  CodedBitstreamFragment *frag,
+                                  int header)
+{
+    GetBitContext gbc;
+    uint8_t *data;
+    size_t size;
+    uint64_t obu_length;
+    int pos, err, trace;
+
+    // Don't include this parsing in trace output.
+    trace = ctx->trace_enable;
+    ctx->trace_enable = 0;
+
+    data = frag->data;
+    size = frag->data_size;
+
+    if (INT_MAX / 8 < size) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid fragment: "
+               "too large (%"SIZE_SPECIFIER" bytes).\n", size);
+        err = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    if (header && size && data[0] & 0x80) {
+        // first bit is nonzero, the extradata does not consist purely of
+        // OBUs. Expect MP4/Matroska AV1CodecConfigurationRecord
+        int config_record_version = data[0] & 0x7f;
+
+        if (config_record_version != 1) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR,
+                   "Unknown version %d of AV1CodecConfigurationRecord "
+                   "found!\n",
+                   config_record_version);
+            err = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        if (size <= 4) {
+            if (size < 4) {
+                av_log(ctx->log_ctx, AV_LOG_WARNING,
+                       "Undersized AV1CodecConfigurationRecord v%d found!\n",
+                       config_record_version);
+                err = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+
+            goto success;
+        }
+
+        // In AV1CodecConfigurationRecord v1, actual OBUs start after
+        // four bytes. Thus set the offset as required for properly
+        // parsing them.
+        data += 4;
+        size -= 4;
+    }
+
+    while (size > 0) {
+        AV1RawOBUHeader header;
+        uint64_t obu_size;
+
+        init_get_bits(&gbc, data, 8 * size);
+
+        err = cbs_av1_read_obu_header(ctx, &gbc, &header);
+        if (err < 0)
+            goto fail;
+
+        if (header.obu_has_size_field) {
+            if (get_bits_left(&gbc) < 8) {
+                av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid OBU: fragment "
+                       "too short (%"SIZE_SPECIFIER" bytes).\n", size);
+                err = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            err = cbs_av1_read_leb128(ctx, &gbc, "obu_size", &obu_size);
+            if (err < 0)
+                goto fail;
+        } else
+            obu_size = size - 1 - header.obu_extension_flag;
+
+        pos = get_bits_count(&gbc);
+        av_assert0(pos % 8 == 0 && pos / 8 <= size);
+
+        obu_length = pos / 8 + obu_size;
+
+        if (size < obu_length) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid OBU length: "
+                   "%"PRIu64", but only %"SIZE_SPECIFIER" bytes remaining in fragment.\n",
+                   obu_length, size);
+            err = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        err = ff_cbs_append_unit_data(frag, header.obu_type,
+                                      data, obu_length, frag->data_ref);
+        if (err < 0)
+            goto fail;
+
+        data += obu_length;
+        size -= obu_length;
+    }
+
+success:
+    err = 0;
+fail:
+    ctx->trace_enable = trace;
+    return err;
+}
+
+static int cbs_av1_ref_tile_data(CodedBitstreamContext *ctx,
+                                 CodedBitstreamUnit *unit,
+                                 GetBitContext *gbc,
+                                 AV1RawTileData *td)
+{
+    int pos;
+
+    pos = get_bits_count(gbc);
+    if (pos >= 8 * unit->data_size) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Bitstream ended before "
+               "any data in tile group (%d bits read).\n", pos);
+        return AVERROR_INVALIDDATA;
+    }
+    // Must be byte-aligned at this point.
+    av_assert0(pos % 8 == 0);
+
+    td->data_ref = av_buffer_ref(unit->data_ref);
+    if (!td->data_ref)
+        return AVERROR(ENOMEM);
+
+    td->data      = unit->data      + pos / 8;
+    td->data_size = unit->data_size - pos / 8;
+
+    return 0;
+}
+
+static int cbs_av1_read_unit(CodedBitstreamContext *ctx,
+                             CodedBitstreamUnit *unit)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    AV1RawOBU *obu;
+    GetBitContext gbc;
+    int err, start_pos, end_pos;
+
+    err = ff_cbs_alloc_unit_content(ctx, unit);
+    if (err < 0)
+        return err;
+    obu = unit->content;
+
+    err = init_get_bits(&gbc, unit->data, 8 * unit->data_size);
+    if (err < 0)
+        return err;
+
+    err = cbs_av1_read_obu_header(ctx, &gbc, &obu->header);
+    if (err < 0)
+        return err;
+    av_assert0(obu->header.obu_type == unit->type);
+
+    if (obu->header.obu_has_size_field) {
+        uint64_t obu_size;
+        err = cbs_av1_read_leb128(ctx, &gbc, "obu_size", &obu_size);
+        if (err < 0)
+            return err;
+        obu->obu_size = obu_size;
+    } else {
+        if (unit->data_size < 1 + obu->header.obu_extension_flag) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid OBU length: "
+                   "unit too short (%"SIZE_SPECIFIER").\n", unit->data_size);
+            return AVERROR_INVALIDDATA;
+        }
+        obu->obu_size = unit->data_size - 1 - obu->header.obu_extension_flag;
+    }
+
+    start_pos = get_bits_count(&gbc);
+
+    if (obu->header.obu_extension_flag) {
+        if (obu->header.obu_type != AV1_OBU_SEQUENCE_HEADER &&
+            obu->header.obu_type != AV1_OBU_TEMPORAL_DELIMITER &&
+            priv->operating_point_idc) {
+            int in_temporal_layer =
+                (priv->operating_point_idc >>  priv->temporal_id    ) & 1;
+            int in_spatial_layer  =
+                (priv->operating_point_idc >> (priv->spatial_id + 8)) & 1;
+            if (!in_temporal_layer || !in_spatial_layer) {
+                return AVERROR(EAGAIN); // drop_obu()
+            }
+        }
+    }
+
+    switch (obu->header.obu_type) {
+    case AV1_OBU_SEQUENCE_HEADER:
+        {
+            err = cbs_av1_read_sequence_header_obu(ctx, &gbc,
+                                                   &obu->obu.sequence_header);
+            if (err < 0)
+                return err;
+
+            if (priv->operating_point >= 0) {
+                AV1RawSequenceHeader *sequence_header = &obu->obu.sequence_header;
+
+                if (priv->operating_point > sequence_header->operating_points_cnt_minus_1) {
+                    av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid Operating Point %d requested. "
+                                                       "Must not be higher than %u.\n",
+                           priv->operating_point, sequence_header->operating_points_cnt_minus_1);
+                    return AVERROR(EINVAL);
+                }
+                priv->operating_point_idc = sequence_header->operating_point_idc[priv->operating_point];
+            }
+
+            av_buffer_unref(&priv->sequence_header_ref);
+            priv->sequence_header = NULL;
+
+            priv->sequence_header_ref = av_buffer_ref(unit->content_ref);
+            if (!priv->sequence_header_ref)
+                return AVERROR(ENOMEM);
+            priv->sequence_header = &obu->obu.sequence_header;
+        }
+        break;
+    case AV1_OBU_TEMPORAL_DELIMITER:
+        {
+            err = cbs_av1_read_temporal_delimiter_obu(ctx, &gbc);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_FRAME_HEADER:
+    case AV1_OBU_REDUNDANT_FRAME_HEADER:
+        {
+            err = cbs_av1_read_frame_header_obu(ctx, &gbc,
+                                                &obu->obu.frame_header,
+                                                obu->header.obu_type ==
+                                                AV1_OBU_REDUNDANT_FRAME_HEADER,
+                                                unit->data_ref);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_TILE_GROUP:
+        {
+            err = cbs_av1_read_tile_group_obu(ctx, &gbc,
+                                              &obu->obu.tile_group);
+            if (err < 0)
+                return err;
+
+            err = cbs_av1_ref_tile_data(ctx, unit, &gbc,
+                                        &obu->obu.tile_group.tile_data);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_FRAME:
+        {
+            err = cbs_av1_read_frame_obu(ctx, &gbc, &obu->obu.frame,
+                                         unit->data_ref);
+            if (err < 0)
+                return err;
+
+            err = cbs_av1_ref_tile_data(ctx, unit, &gbc,
+                                        &obu->obu.frame.tile_group.tile_data);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_TILE_LIST:
+        {
+            err = cbs_av1_read_tile_list_obu(ctx, &gbc,
+                                             &obu->obu.tile_list);
+            if (err < 0)
+                return err;
+
+            err = cbs_av1_ref_tile_data(ctx, unit, &gbc,
+                                        &obu->obu.tile_list.tile_data);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_METADATA:
+        {
+            err = cbs_av1_read_metadata_obu(ctx, &gbc, &obu->obu.metadata);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_PADDING:
+        {
+            err = cbs_av1_read_padding_obu(ctx, &gbc, &obu->obu.padding);
+            if (err < 0)
+                return err;
+        }
+        break;
+    default:
+        return AVERROR(ENOSYS);
+    }
+
+    end_pos = get_bits_count(&gbc);
+    av_assert0(end_pos <= unit->data_size * 8);
+
+    if (obu->obu_size > 0 &&
+        obu->header.obu_type != AV1_OBU_TILE_GROUP &&
+        obu->header.obu_type != AV1_OBU_TILE_LIST &&
+        obu->header.obu_type != AV1_OBU_FRAME) {
+        int nb_bits = obu->obu_size * 8 + start_pos - end_pos;
+
+        if (nb_bits <= 0)
+            return AVERROR_INVALIDDATA;
+
+        err = cbs_av1_read_trailing_bits(ctx, &gbc, nb_bits);
+        if (err < 0)
+            return err;
+    }
+
+    return 0;
+}
+
+static int cbs_av1_write_obu(CodedBitstreamContext *ctx,
+                             CodedBitstreamUnit *unit,
+                             PutBitContext *pbc)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    AV1RawOBU *obu = unit->content;
+    PutBitContext pbc_tmp;
+    AV1RawTileData *td;
+    size_t header_size;
+    int err, start_pos, end_pos, data_pos;
+    CodedBitstreamAV1Context av1ctx;
+
+    // OBUs in the normal bitstream format must contain a size field
+    // in every OBU (in annex B it is optional, but we don't support
+    // writing that).
+    obu->header.obu_has_size_field = 1;
+    av1ctx = *priv;
+
+    if (priv->sequence_header_ref) {
+        av1ctx.sequence_header_ref = av_buffer_ref(priv->sequence_header_ref);
+        if (!av1ctx.sequence_header_ref)
+            return AVERROR(ENOMEM);
+    }
+
+    if (priv->frame_header_ref) {
+        av1ctx.frame_header_ref = av_buffer_ref(priv->frame_header_ref);
+        if (!av1ctx.frame_header_ref) {
+            err = AVERROR(ENOMEM);
+            goto error;
+        }
+    }
+
+    err = cbs_av1_write_obu_header(ctx, pbc, &obu->header);
+    if (err < 0)
+        goto error;
+
+    if (obu->header.obu_has_size_field) {
+        pbc_tmp = *pbc;
+        // Add space for the size field to fill later.
+        put_bits32(pbc, 0);
+        put_bits32(pbc, 0);
+    }
+
+    td = NULL;
+    start_pos = put_bits_count(pbc);
+
+    switch (obu->header.obu_type) {
+    case AV1_OBU_SEQUENCE_HEADER:
+        {
+            err = cbs_av1_write_sequence_header_obu(ctx, pbc,
+                                                    &obu->obu.sequence_header);
+            if (err < 0)
+                goto error;
+
+            av_buffer_unref(&priv->sequence_header_ref);
+            priv->sequence_header = NULL;
+
+            err = ff_cbs_make_unit_refcounted(ctx, unit);
+            if (err < 0)
+                goto error;
+
+            priv->sequence_header_ref = av_buffer_ref(unit->content_ref);
+            if (!priv->sequence_header_ref) {
+                err = AVERROR(ENOMEM);
+                goto error;
+            }
+
+            priv->sequence_header = &obu->obu.sequence_header;
+        }
+        break;
+    case AV1_OBU_TEMPORAL_DELIMITER:
+        {
+            err = cbs_av1_write_temporal_delimiter_obu(ctx, pbc);
+            if (err < 0)
+                goto error;
+        }
+        break;
+    case AV1_OBU_FRAME_HEADER:
+    case AV1_OBU_REDUNDANT_FRAME_HEADER:
+        {
+            err = cbs_av1_write_frame_header_obu(ctx, pbc,
+                                                 &obu->obu.frame_header,
+                                                 obu->header.obu_type ==
+                                                 AV1_OBU_REDUNDANT_FRAME_HEADER,
+                                                 NULL);
+            if (err < 0)
+                goto error;
+        }
+        break;
+    case AV1_OBU_TILE_GROUP:
+        {
+            err = cbs_av1_write_tile_group_obu(ctx, pbc,
+                                               &obu->obu.tile_group);
+            if (err < 0)
+                goto error;
+
+            td = &obu->obu.tile_group.tile_data;
+        }
+        break;
+    case AV1_OBU_FRAME:
+        {
+            err = cbs_av1_write_frame_obu(ctx, pbc, &obu->obu.frame, NULL);
+            if (err < 0)
+                goto error;
+
+            td = &obu->obu.frame.tile_group.tile_data;
+        }
+        break;
+    case AV1_OBU_TILE_LIST:
+        {
+            err = cbs_av1_write_tile_list_obu(ctx, pbc, &obu->obu.tile_list);
+            if (err < 0)
+                goto error;
+
+            td = &obu->obu.tile_list.tile_data;
+        }
+        break;
+    case AV1_OBU_METADATA:
+        {
+            err = cbs_av1_write_metadata_obu(ctx, pbc, &obu->obu.metadata);
+            if (err < 0)
+                goto error;
+        }
+        break;
+    case AV1_OBU_PADDING:
+        {
+            err = cbs_av1_write_padding_obu(ctx, pbc, &obu->obu.padding);
+            if (err < 0)
+                goto error;
+        }
+        break;
+    default:
+        err = AVERROR(ENOSYS);
+        goto error;
+    }
+
+    end_pos = put_bits_count(pbc);
+    header_size = (end_pos - start_pos + 7) / 8;
+    if (td) {
+        obu->obu_size = header_size + td->data_size;
+    } else if (header_size > 0) {
+        // Add trailing bits and recalculate.
+        err = cbs_av1_write_trailing_bits(ctx, pbc, 8 - end_pos % 8);
+        if (err < 0)
+            goto error;
+        end_pos = put_bits_count(pbc);
+        obu->obu_size = header_size = (end_pos - start_pos + 7) / 8;
+    } else {
+        // Empty OBU.
+        obu->obu_size = 0;
+    }
+
+    end_pos = put_bits_count(pbc);
+    // Must now be byte-aligned.
+    av_assert0(end_pos % 8 == 0);
+    flush_put_bits(pbc);
+    start_pos /= 8;
+    end_pos   /= 8;
+
+    *pbc = pbc_tmp;
+    err = cbs_av1_write_leb128(ctx, pbc, "obu_size", obu->obu_size);
+    if (err < 0)
+        goto error;
+
+    data_pos = put_bits_count(pbc) / 8;
+    flush_put_bits(pbc);
+    av_assert0(data_pos <= start_pos);
+
+    if (8 * obu->obu_size > put_bits_left(pbc)) {
+        av_buffer_unref(&priv->sequence_header_ref);
+        av_buffer_unref(&priv->frame_header_ref);
+        *priv = av1ctx;
+
+        return AVERROR(ENOSPC);
+    }
+
+    if (obu->obu_size > 0) {
+        memmove(pbc->buf + data_pos,
+                pbc->buf + start_pos, header_size);
+        skip_put_bytes(pbc, header_size);
+
+        if (td) {
+            memcpy(pbc->buf + data_pos + header_size,
+                   td->data, td->data_size);
+            skip_put_bytes(pbc, td->data_size);
+        }
+    }
+
+    // OBU data must be byte-aligned.
+    av_assert0(put_bits_count(pbc) % 8 == 0);
+    err = 0;
+
+error:
+    av_buffer_unref(&av1ctx.sequence_header_ref);
+    av_buffer_unref(&av1ctx.frame_header_ref);
+
+    return err;
+}
+
+static int cbs_av1_assemble_fragment(CodedBitstreamContext *ctx,
+                                     CodedBitstreamFragment *frag)
+{
+    size_t size, pos;
+    int i;
+
+    size = 0;
+    for (i = 0; i < frag->nb_units; i++)
+        size += frag->units[i].data_size;
+
+    frag->data_ref = av_buffer_alloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!frag->data_ref)
+        return AVERROR(ENOMEM);
+    frag->data = frag->data_ref->data;
+    memset(frag->data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    pos = 0;
+    for (i = 0; i < frag->nb_units; i++) {
+        memcpy(frag->data + pos, frag->units[i].data,
+               frag->units[i].data_size);
+        pos += frag->units[i].data_size;
+    }
+    av_assert0(pos == size);
+    frag->data_size = size;
+
+    return 0;
+}
+
+static void cbs_av1_flush(CodedBitstreamContext *ctx)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+
+    av_buffer_unref(&priv->frame_header_ref);
+    priv->sequence_header = NULL;
+    priv->frame_header = NULL;
+
+    memset(priv->ref, 0, sizeof(priv->ref));
+    priv->operating_point_idc = 0;
+    priv->seen_frame_header = 0;
+    priv->tile_num = 0;
+}
+
+static void cbs_av1_close(CodedBitstreamContext *ctx)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+
+    av_buffer_unref(&priv->sequence_header_ref);
+    av_buffer_unref(&priv->frame_header_ref);
+}
+
+static void cbs_av1_free_metadata(void *unit, uint8_t *content)
+{
+    AV1RawOBU *obu = (AV1RawOBU*)content;
+    AV1RawMetadata *md;
+
+    av_assert0(obu->header.obu_type == AV1_OBU_METADATA);
+    md = &obu->obu.metadata;
+
+    switch (md->metadata_type) {
+    case AV1_METADATA_TYPE_ITUT_T35:
+        av_buffer_unref(&md->metadata.itut_t35.payload_ref);
+        break;
+    }
+    av_free(content);
+}
+
+static const CodedBitstreamUnitTypeDescriptor cbs_av1_unit_types[] = {
+    CBS_UNIT_TYPE_POD(AV1_OBU_SEQUENCE_HEADER,        AV1RawOBU),
+    CBS_UNIT_TYPE_POD(AV1_OBU_TEMPORAL_DELIMITER,     AV1RawOBU),
+    CBS_UNIT_TYPE_POD(AV1_OBU_FRAME_HEADER,           AV1RawOBU),
+    CBS_UNIT_TYPE_POD(AV1_OBU_REDUNDANT_FRAME_HEADER, AV1RawOBU),
+
+    CBS_UNIT_TYPE_INTERNAL_REF(AV1_OBU_TILE_GROUP, AV1RawOBU,
+                               obu.tile_group.tile_data.data),
+    CBS_UNIT_TYPE_INTERNAL_REF(AV1_OBU_FRAME,      AV1RawOBU,
+                               obu.frame.tile_group.tile_data.data),
+    CBS_UNIT_TYPE_INTERNAL_REF(AV1_OBU_TILE_LIST,  AV1RawOBU,
+                               obu.tile_list.tile_data.data),
+    CBS_UNIT_TYPE_INTERNAL_REF(AV1_OBU_PADDING,    AV1RawOBU,
+                               obu.padding.payload),
+
+    CBS_UNIT_TYPE_COMPLEX(AV1_OBU_METADATA, AV1RawOBU,
+                          &cbs_av1_free_metadata),
+
+    CBS_UNIT_TYPE_END_OF_LIST
+};
+
+#define OFFSET(x) offsetof(CodedBitstreamAV1Context, x)
+static const AVOption cbs_av1_options[] = {
+    { "operating_point",  "Set operating point to select layers to parse from a scalable bitstream",
+                          OFFSET(operating_point), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, AV1_MAX_OPERATING_POINTS - 1, 0 },
+    { NULL }
+};
+
+static const AVClass cbs_av1_class = {
+    .class_name = "cbs_av1",
+    .item_name  = av_default_item_name,
+    .option     = cbs_av1_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const CodedBitstreamType ff_cbs_type_av1 = {
+    .codec_id          = AV_CODEC_ID_AV1,
+
+    .priv_class        = &cbs_av1_class,
+    .priv_data_size    = sizeof(CodedBitstreamAV1Context),
+
+    .unit_types        = cbs_av1_unit_types,
+
+    .split_fragment    = &cbs_av1_split_fragment,
+    .read_unit         = &cbs_av1_read_unit,
+    .write_unit        = &cbs_av1_write_obu,
+    .assemble_fragment = &cbs_av1_assemble_fragment,
+
+    .flush             = &cbs_av1_flush,
+    .close             = &cbs_av1_close,
+};
diff --git a/media/ffvpx/libavcodec/cbs_av1.h b/media/ffvpx/libavcodec/cbs_av1.h
new file mode 100644
index 0000000000..1fc80dcfa0
--- /dev/null
+++ b/media/ffvpx/libavcodec/cbs_av1.h
@@ -0,0 +1,464 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CBS_AV1_H
+#define AVCODEC_CBS_AV1_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "av1.h"
+#include "cbs.h"
+
+
+typedef struct AV1RawOBUHeader {
+    uint8_t obu_forbidden_bit;
+    uint8_t obu_type;
+    uint8_t obu_extension_flag;
+    uint8_t obu_has_size_field;
+    uint8_t obu_reserved_1bit;
+
+    uint8_t temporal_id;
+    uint8_t spatial_id;
+    uint8_t extension_header_reserved_3bits;
+} AV1RawOBUHeader;
+
+typedef struct AV1RawColorConfig {
+    uint8_t high_bitdepth;
+    uint8_t twelve_bit;
+    uint8_t mono_chrome;
+
+    uint8_t color_description_present_flag;
+    uint8_t color_primaries;
+    uint8_t transfer_characteristics;
+    uint8_t matrix_coefficients;
+
+    uint8_t color_range;
+    uint8_t subsampling_x;
+    uint8_t subsampling_y;
+    uint8_t chroma_sample_position;
+    uint8_t separate_uv_delta_q;
+} AV1RawColorConfig;
+
+typedef struct AV1RawTimingInfo {
+    uint32_t num_units_in_display_tick;
+    uint32_t time_scale;
+
+    uint8_t equal_picture_interval;
+    uint32_t num_ticks_per_picture_minus_1;
+} AV1RawTimingInfo;
+
+typedef struct AV1RawDecoderModelInfo {
+    uint8_t  buffer_delay_length_minus_1;
+    uint32_t num_units_in_decoding_tick;
+    uint8_t  buffer_removal_time_length_minus_1;
+    uint8_t  frame_presentation_time_length_minus_1;
+} AV1RawDecoderModelInfo;
+
+typedef struct AV1RawSequenceHeader {
+    uint8_t seq_profile;
+    uint8_t still_picture;
+    uint8_t reduced_still_picture_header;
+
+    uint8_t timing_info_present_flag;
+    uint8_t decoder_model_info_present_flag;
+    uint8_t initial_display_delay_present_flag;
+    uint8_t operating_points_cnt_minus_1;
+
+    AV1RawTimingInfo       timing_info;
+    AV1RawDecoderModelInfo decoder_model_info;
+
+    uint16_t operating_point_idc[AV1_MAX_OPERATING_POINTS];
+    uint8_t  seq_level_idx[AV1_MAX_OPERATING_POINTS];
+    uint8_t  seq_tier[AV1_MAX_OPERATING_POINTS];
+    uint8_t  decoder_model_present_for_this_op[AV1_MAX_OPERATING_POINTS];
+    uint32_t decoder_buffer_delay[AV1_MAX_OPERATING_POINTS];
+    uint32_t encoder_buffer_delay[AV1_MAX_OPERATING_POINTS];
+    uint8_t  low_delay_mode_flag[AV1_MAX_OPERATING_POINTS];
+    uint8_t  initial_display_delay_present_for_this_op[AV1_MAX_OPERATING_POINTS];
+    uint8_t  initial_display_delay_minus_1[AV1_MAX_OPERATING_POINTS];
+
+    uint8_t  frame_width_bits_minus_1;
+    uint8_t  frame_height_bits_minus_1;
+    uint16_t max_frame_width_minus_1;
+    uint16_t max_frame_height_minus_1;
+
+    uint8_t frame_id_numbers_present_flag;
+    uint8_t delta_frame_id_length_minus_2;
+    uint8_t additional_frame_id_length_minus_1;
+
+    uint8_t use_128x128_superblock;
+    uint8_t enable_filter_intra;
+    uint8_t enable_intra_edge_filter;
+    uint8_t enable_interintra_compound;
+    uint8_t enable_masked_compound;
+    uint8_t enable_warped_motion;
+    uint8_t enable_dual_filter;
+
+    uint8_t enable_order_hint;
+    uint8_t enable_jnt_comp;
+    uint8_t enable_ref_frame_mvs;
+
+    uint8_t seq_choose_screen_content_tools;
+    uint8_t seq_force_screen_content_tools;
+    uint8_t seq_choose_integer_mv;
+    uint8_t seq_force_integer_mv;
+
+    uint8_t order_hint_bits_minus_1;
+
+    uint8_t enable_superres;
+    uint8_t enable_cdef;
+    uint8_t enable_restoration;
+
+    AV1RawColorConfig color_config;
+
+    uint8_t film_grain_params_present;
+} AV1RawSequenceHeader;
+
+typedef struct AV1RawFilmGrainParams {
+    uint8_t  apply_grain;
+    uint16_t grain_seed;
+    uint8_t  update_grain;
+    uint8_t  film_grain_params_ref_idx;
+    uint8_t  num_y_points;
+    uint8_t  point_y_value[14];
+    uint8_t  point_y_scaling[14];
+    uint8_t  chroma_scaling_from_luma;
+    uint8_t  num_cb_points;
+    uint8_t  point_cb_value[10];
+    uint8_t  point_cb_scaling[10];
+    uint8_t  num_cr_points;
+    uint8_t  point_cr_value[10];
+    uint8_t  point_cr_scaling[10];
+    uint8_t  grain_scaling_minus_8;
+    uint8_t  ar_coeff_lag;
+    uint8_t  ar_coeffs_y_plus_128[24];
+    uint8_t  ar_coeffs_cb_plus_128[25];
+    uint8_t  ar_coeffs_cr_plus_128[25];
+    uint8_t  ar_coeff_shift_minus_6;
+    uint8_t  grain_scale_shift;
+    uint8_t  cb_mult;
+    uint8_t  cb_luma_mult;
+    uint16_t cb_offset;
+    uint8_t  cr_mult;
+    uint8_t  cr_luma_mult;
+    uint16_t cr_offset;
+    uint8_t  overlap_flag;
+    uint8_t  clip_to_restricted_range;
+} AV1RawFilmGrainParams;
+
+typedef struct AV1RawFrameHeader {
+    uint8_t  show_existing_frame;
+    uint8_t  frame_to_show_map_idx;
+    uint32_t frame_presentation_time;
+    uint32_t display_frame_id;
+
+    uint8_t frame_type;
+    uint8_t show_frame;
+    uint8_t showable_frame;
+
+    uint8_t error_resilient_mode;
+    uint8_t disable_cdf_update;
+    uint8_t allow_screen_content_tools;
+    uint8_t force_integer_mv;
+
+    uint32_t current_frame_id;
+    uint8_t  frame_size_override_flag;
+    uint8_t  order_hint;
+
+    uint8_t  buffer_removal_time_present_flag;
+    uint32_t buffer_removal_time[AV1_MAX_OPERATING_POINTS];
+
+    uint8_t  primary_ref_frame;
+    uint16_t frame_width_minus_1;
+    uint16_t frame_height_minus_1;
+    uint8_t  use_superres;
+    uint8_t  coded_denom;
+    uint8_t  render_and_frame_size_different;
+    uint16_t render_width_minus_1;
+    uint16_t render_height_minus_1;
+
+    uint8_t found_ref[AV1_REFS_PER_FRAME];
+
+    uint8_t refresh_frame_flags;
+    uint8_t allow_intrabc;
+    uint8_t ref_order_hint[AV1_NUM_REF_FRAMES];
+    uint8_t frame_refs_short_signaling;
+    uint8_t last_frame_idx;
+    uint8_t golden_frame_idx;
+    int8_t  ref_frame_idx[AV1_REFS_PER_FRAME];
+    uint32_t delta_frame_id_minus1[AV1_REFS_PER_FRAME];
+
+    uint8_t allow_high_precision_mv;
+    uint8_t is_filter_switchable;
+    uint8_t interpolation_filter;
+    uint8_t is_motion_mode_switchable;
+    uint8_t use_ref_frame_mvs;
+
+    uint8_t disable_frame_end_update_cdf;
+
+    uint8_t uniform_tile_spacing_flag;
+    uint8_t tile_cols_log2;
+    uint8_t tile_rows_log2;
+    uint8_t width_in_sbs_minus_1[AV1_MAX_TILE_COLS];
+    uint8_t height_in_sbs_minus_1[AV1_MAX_TILE_ROWS];
+    uint16_t context_update_tile_id;
+    uint8_t tile_size_bytes_minus1;
+
+    // These are derived values, but it's very unhelpful to have to
+    // recalculate them all the time so we store them here.
+    uint16_t tile_cols;
+    uint16_t tile_rows;
+
+    uint8_t base_q_idx;
+    int8_t  delta_q_y_dc;
+    uint8_t diff_uv_delta;
+    int8_t  delta_q_u_dc;
+    int8_t  delta_q_u_ac;
+    int8_t  delta_q_v_dc;
+    int8_t  delta_q_v_ac;
+    uint8_t using_qmatrix;
+    uint8_t qm_y;
+    uint8_t qm_u;
+    uint8_t qm_v;
+
+    uint8_t segmentation_enabled;
+    uint8_t segmentation_update_map;
+    uint8_t segmentation_temporal_update;
+    uint8_t segmentation_update_data;
+    uint8_t feature_enabled[AV1_MAX_SEGMENTS][AV1_SEG_LVL_MAX];
+    int16_t feature_value[AV1_MAX_SEGMENTS][AV1_SEG_LVL_MAX];
+
+    uint8_t delta_q_present;
+    uint8_t delta_q_res;
+    uint8_t delta_lf_present;
+    uint8_t delta_lf_res;
+    uint8_t delta_lf_multi;
+
+    uint8_t loop_filter_level[4];
+    uint8_t loop_filter_sharpness;
+    uint8_t loop_filter_delta_enabled;
+    uint8_t loop_filter_delta_update;
+    uint8_t update_ref_delta[AV1_TOTAL_REFS_PER_FRAME];
+    int8_t  loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME];
+    uint8_t update_mode_delta[2];
+    int8_t  loop_filter_mode_deltas[2];
+
+    uint8_t cdef_damping_minus_3;
+    uint8_t cdef_bits;
+    uint8_t cdef_y_pri_strength[8];
+    uint8_t cdef_y_sec_strength[8];
+    uint8_t cdef_uv_pri_strength[8];
+    uint8_t cdef_uv_sec_strength[8];
+
+    uint8_t lr_type[3];
+    uint8_t lr_unit_shift;
+    uint8_t lr_uv_shift;
+
+    uint8_t tx_mode;
+    uint8_t reference_select;
+    uint8_t skip_mode_present;
+
+    uint8_t allow_warped_motion;
+    uint8_t reduced_tx_set;
+
+    uint8_t is_global[AV1_TOTAL_REFS_PER_FRAME];
+    uint8_t is_rot_zoom[AV1_TOTAL_REFS_PER_FRAME];
+    uint8_t is_translation[AV1_TOTAL_REFS_PER_FRAME];
+    //AV1RawSubexp gm_params[AV1_TOTAL_REFS_PER_FRAME][6];
+    uint32_t gm_params[AV1_TOTAL_REFS_PER_FRAME][6];
+
+    AV1RawFilmGrainParams film_grain;
+} AV1RawFrameHeader;
+
+typedef struct AV1RawTileData {
+    uint8_t     *data;
+    AVBufferRef *data_ref;
+    size_t       data_size;
+} AV1RawTileData;
+
+typedef struct AV1RawTileGroup {
+    uint8_t  tile_start_and_end_present_flag;
+    uint16_t tg_start;
+    uint16_t tg_end;
+
+    AV1RawTileData tile_data;
+} AV1RawTileGroup;
+
+typedef struct AV1RawFrame {
+    AV1RawFrameHeader header;
+    AV1RawTileGroup   tile_group;
+} AV1RawFrame;
+
+typedef struct AV1RawTileList {
+    uint8_t output_frame_width_in_tiles_minus_1;
+    uint8_t output_frame_height_in_tiles_minus_1;
+    uint16_t tile_count_minus_1;
+
+    AV1RawTileData tile_data;
+} AV1RawTileList;
+
+typedef struct AV1RawMetadataHDRCLL {
+    uint16_t max_cll;
+    uint16_t max_fall;
+} AV1RawMetadataHDRCLL;
+
+typedef struct AV1RawMetadataHDRMDCV {
+    uint16_t primary_chromaticity_x[3];
+    uint16_t primary_chromaticity_y[3];
+    uint16_t white_point_chromaticity_x;
+    uint16_t white_point_chromaticity_y;
+    uint32_t luminance_max;
+    uint32_t luminance_min;
+} AV1RawMetadataHDRMDCV;
+
+typedef struct AV1RawMetadataScalability {
+    uint8_t scalability_mode_idc;
+    uint8_t spatial_layers_cnt_minus_1;
+    uint8_t spatial_layer_dimensions_present_flag;
+    uint8_t spatial_layer_description_present_flag;
+    uint8_t temporal_group_description_present_flag;
+    uint8_t scalability_structure_reserved_3bits;
+    uint16_t spatial_layer_max_width[4];
+    uint16_t spatial_layer_max_height[4];
+    uint8_t spatial_layer_ref_id[4];
+    uint8_t temporal_group_size;
+    uint8_t temporal_group_temporal_id[255];
+    uint8_t temporal_group_temporal_switching_up_point_flag[255];
+    uint8_t temporal_group_spatial_switching_up_point_flag[255];
+    uint8_t temporal_group_ref_cnt[255];
+    uint8_t temporal_group_ref_pic_diff[255][7];
+} AV1RawMetadataScalability;
+
+typedef struct AV1RawMetadataITUTT35 {
+    uint8_t itu_t_t35_country_code;
+    uint8_t itu_t_t35_country_code_extension_byte;
+
+    uint8_t     *payload;
+    AVBufferRef *payload_ref;
+    size_t       payload_size;
+} AV1RawMetadataITUTT35;
+
+typedef struct AV1RawMetadataTimecode {
+    uint8_t  counting_type;
+    uint8_t  full_timestamp_flag;
+    uint8_t  discontinuity_flag;
+    uint8_t  cnt_dropped_flag;
+    uint16_t n_frames;
+    uint8_t  seconds_value;
+    uint8_t  minutes_value;
+    uint8_t  hours_value;
+    uint8_t  seconds_flag;
+    uint8_t  minutes_flag;
+    uint8_t  hours_flag;
+    uint8_t  time_offset_length;
+    uint32_t time_offset_value;
+} AV1RawMetadataTimecode;
+
+typedef struct AV1RawMetadata {
+    uint64_t metadata_type;
+    union {
+        AV1RawMetadataHDRCLL      hdr_cll;
+        AV1RawMetadataHDRMDCV     hdr_mdcv;
+        AV1RawMetadataScalability scalability;
+        AV1RawMetadataITUTT35     itut_t35;
+        AV1RawMetadataTimecode    timecode;
+    } metadata;
+} AV1RawMetadata;
+
+typedef struct AV1RawPadding {
+    uint8_t     *payload;
+    AVBufferRef *payload_ref;
+    size_t       payload_size;
+} AV1RawPadding;
+
+
+typedef struct AV1RawOBU {
+    AV1RawOBUHeader header;
+
+    size_t obu_size;
+
+    union {
+        AV1RawSequenceHeader sequence_header;
+        AV1RawFrameHeader    frame_header;
+        AV1RawFrame          frame;
+        AV1RawTileGroup      tile_group;
+        AV1RawTileList       tile_list;
+        AV1RawMetadata       metadata;
+        AV1RawPadding        padding;
+    } obu;
+} AV1RawOBU;
+
+typedef struct AV1ReferenceFrameState {
+    int valid;          // RefValid
+    int frame_id;       // RefFrameId
+    int upscaled_width; // RefUpscaledWidth
+    int frame_width;    // RefFrameWidth
+    int frame_height;   // RefFrameHeight
+    int render_width;   // RefRenderWidth
+    int render_height;  // RefRenderHeight
+    int frame_type;     // RefFrameType
+    int subsampling_x;  // RefSubsamplingX
+    int subsampling_y;  // RefSubsamplingY
+    int bit_depth;      // RefBitDepth
+    int order_hint;     // RefOrderHint
+
+    int8_t  loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME];
+    int8_t  loop_filter_mode_deltas[2];
+    uint8_t feature_enabled[AV1_MAX_SEGMENTS][AV1_SEG_LVL_MAX];
+    int16_t feature_value[AV1_MAX_SEGMENTS][AV1_SEG_LVL_MAX];
+} AV1ReferenceFrameState;
+
+typedef struct CodedBitstreamAV1Context {
+    const AVClass *class;
+
+    AV1RawSequenceHeader *sequence_header;
+    AVBufferRef          *sequence_header_ref;
+
+    int     seen_frame_header;
+    AVBufferRef *frame_header_ref;
+    uint8_t     *frame_header;
+    size_t       frame_header_size;
+
+    int temporal_id;
+    int spatial_id;
+    int operating_point_idc;
+
+    int bit_depth;
+    int order_hint;
+    int frame_width;
+    int frame_height;
+    int upscaled_width;
+    int render_width;
+    int render_height;
+
+    int num_planes;
+    int coded_lossless;
+    int all_lossless;
+    int tile_cols;
+    int tile_rows;
+    int tile_num;
+
+    AV1ReferenceFrameState ref[AV1_NUM_REF_FRAMES];
+
+    // AVOptions
+    int operating_point;
+} CodedBitstreamAV1Context;
+
+
+#endif /* AVCODEC_CBS_AV1_H */
diff --git a/media/ffvpx/libavcodec/cbs_av1_syntax_template.c b/media/ffvpx/libavcodec/cbs_av1_syntax_template.c
new file mode 100644
index 0000000000..e95925a493
--- /dev/null
+++ b/media/ffvpx/libavcodec/cbs_av1_syntax_template.c
@@ -0,0 +1,2050 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static int FUNC(obu_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                            AV1RawOBUHeader *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int err;
+
+    HEADER("OBU header");
+
+    fc(1, obu_forbidden_bit, 0, 0);
+
+    fc(4, obu_type, 0, AV1_OBU_PADDING);
+    flag(obu_extension_flag);
+    flag(obu_has_size_field);
+
+    fc(1, obu_reserved_1bit, 0, 0);
+
+    if (current->obu_extension_flag) {
+        fb(3, temporal_id);
+        fb(2, spatial_id);
+        fc(3, extension_header_reserved_3bits, 0, 0);
+    } else {
+        infer(temporal_id, 0);
+        infer(spatial_id, 0);
+    }
+
+    priv->temporal_id = current->temporal_id;
+    priv->spatial_id  = current->spatial_id;
+
+    return 0;
+}
+
+static int FUNC(trailing_bits)(CodedBitstreamContext *ctx, RWContext *rw, int nb_bits)
+{
+    int err;
+
+    av_assert0(nb_bits > 0);
+
+    fixed(1, trailing_one_bit, 1);
+    --nb_bits;
+
+    while (nb_bits > 0) {
+        fixed(1, trailing_zero_bit, 0);
+        --nb_bits;
+    }
+
+    return 0;
+}
+
+static int FUNC(byte_alignment)(CodedBitstreamContext *ctx, RWContext *rw)
+{
+    int err;
+
+    while (byte_alignment(rw) != 0)
+        fixed(1, zero_bit, 0);
+
+    return 0;
+}
+
+static int FUNC(color_config)(CodedBitstreamContext *ctx, RWContext *rw,
+                              AV1RawColorConfig *current, int seq_profile)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int err;
+
+    flag(high_bitdepth);
+
+    if (seq_profile == FF_PROFILE_AV1_PROFESSIONAL &&
+        current->high_bitdepth) {
+        flag(twelve_bit);
+        priv->bit_depth = current->twelve_bit ? 12 : 10;
+    } else {
+        priv->bit_depth = current->high_bitdepth ? 10 : 8;
+    }
+
+    if (seq_profile == FF_PROFILE_AV1_HIGH)
+        infer(mono_chrome, 0);
+    else
+        flag(mono_chrome);
+    priv->num_planes = current->mono_chrome ? 1 : 3;
+
+    flag(color_description_present_flag);
+    if (current->color_description_present_flag) {
+        fb(8, color_primaries);
+        fb(8, transfer_characteristics);
+        fb(8, matrix_coefficients);
+    } else {
+        infer(color_primaries,          AVCOL_PRI_UNSPECIFIED);
+        infer(transfer_characteristics, AVCOL_TRC_UNSPECIFIED);
+        infer(matrix_coefficients,      AVCOL_SPC_UNSPECIFIED);
+    }
+
+    if (current->mono_chrome) {
+        flag(color_range);
+
+        infer(subsampling_x, 1);
+        infer(subsampling_y, 1);
+        infer(chroma_sample_position, AV1_CSP_UNKNOWN);
+        infer(separate_uv_delta_q, 0);
+
+    } else if (current->color_primaries          == AVCOL_PRI_BT709 &&
+               current->transfer_characteristics == AVCOL_TRC_IEC61966_2_1 &&
+               current->matrix_coefficients      == AVCOL_SPC_RGB) {
+        infer(color_range,   1);
+        infer(subsampling_x, 0);
+        infer(subsampling_y, 0);
+        flag(separate_uv_delta_q);
+
+    } else {
+        flag(color_range);
+
+        if (seq_profile == FF_PROFILE_AV1_MAIN) {
+            infer(subsampling_x, 1);
+            infer(subsampling_y, 1);
+        } else if (seq_profile == FF_PROFILE_AV1_HIGH) {
+            infer(subsampling_x, 0);
+            infer(subsampling_y, 0);
+        } else {
+            if (priv->bit_depth == 12) {
+                fb(1, subsampling_x);
+                if (current->subsampling_x)
+                    fb(1, subsampling_y);
+                else
+                    infer(subsampling_y, 0);
+            } else {
+                infer(subsampling_x, 1);
+                infer(subsampling_y, 0);
+            }
+        }
+        if (current->subsampling_x && current->subsampling_y) {
+            fc(2, chroma_sample_position, AV1_CSP_UNKNOWN,
+                                          AV1_CSP_COLOCATED);
+        }
+
+        flag(separate_uv_delta_q);
+    }
+
+    return 0;
+}
+
+static int FUNC(timing_info)(CodedBitstreamContext *ctx, RWContext *rw,
+                             AV1RawTimingInfo *current)
+{
+    int err;
+
+    fc(32, num_units_in_display_tick, 1, MAX_UINT_BITS(32));
+    fc(32, time_scale,                1, MAX_UINT_BITS(32));
+
+    flag(equal_picture_interval);
+    if (current->equal_picture_interval)
+        uvlc(num_ticks_per_picture_minus_1, 0, MAX_UINT_BITS(32) - 1);
+
+    return 0;
+}
+
+static int FUNC(decoder_model_info)(CodedBitstreamContext *ctx, RWContext *rw,
+                                    AV1RawDecoderModelInfo *current)
+{
+    int err;
+
+    fb(5, buffer_delay_length_minus_1);
+    fb(32, num_units_in_decoding_tick);
+    fb(5,  buffer_removal_time_length_minus_1);
+    fb(5,  frame_presentation_time_length_minus_1);
+
+    return 0;
+}
+
+static int FUNC(sequence_header_obu)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     AV1RawSequenceHeader *current)
+{
+    int i, err;
+
+    HEADER("Sequence Header");
+
+    fc(3, seq_profile, FF_PROFILE_AV1_MAIN,
+                       FF_PROFILE_AV1_PROFESSIONAL);
+    flag(still_picture);
+    flag(reduced_still_picture_header);
+
+    if (current->reduced_still_picture_header) {
+        infer(timing_info_present_flag,           0);
+        infer(decoder_model_info_present_flag,    0);
+        infer(initial_display_delay_present_flag, 0);
+        infer(operating_points_cnt_minus_1,       0);
+        infer(operating_point_idc[0],             0);
+
+        fb(5, seq_level_idx[0]);
+
+        infer(seq_tier[0], 0);
+        infer(decoder_model_present_for_this_op[0],         0);
+        infer(initial_display_delay_present_for_this_op[0], 0);
+
+    } else {
+        flag(timing_info_present_flag);
+        if (current->timing_info_present_flag) {
+            CHECK(FUNC(timing_info)(ctx, rw, &current->timing_info));
+
+            flag(decoder_model_info_present_flag);
+            if (current->decoder_model_info_present_flag) {
+                CHECK(FUNC(decoder_model_info)
+                          (ctx, rw, &current->decoder_model_info));
+            }
+        } else {
+            infer(decoder_model_info_present_flag, 0);
+        }
+
+        flag(initial_display_delay_present_flag);
+
+        fb(5, operating_points_cnt_minus_1);
+        for (i = 0; i <= current->operating_points_cnt_minus_1; i++) {
+            fbs(12, operating_point_idc[i], 1, i);
+            fbs(5,  seq_level_idx[i], 1, i);
+
+            if (current->seq_level_idx[i] > 7)
+                flags(seq_tier[i], 1, i);
+            else
+                infer(seq_tier[i], 0);
+
+            if (current->decoder_model_info_present_flag) {
+                flags(decoder_model_present_for_this_op[i], 1, i);
+                if (current->decoder_model_present_for_this_op[i]) {
+                    int n = current->decoder_model_info.buffer_delay_length_minus_1 + 1;
+                    fbs(n, decoder_buffer_delay[i], 1, i);
+                    fbs(n, encoder_buffer_delay[i], 1, i);
+                    flags(low_delay_mode_flag[i], 1, i);
+                }
+            } else {
+                infer(decoder_model_present_for_this_op[i], 0);
+            }
+
+            if (current->initial_display_delay_present_flag) {
+                flags(initial_display_delay_present_for_this_op[i], 1, i);
+                if (current->initial_display_delay_present_for_this_op[i])
+                    fbs(4, initial_display_delay_minus_1[i], 1, i);
+            }
+        }
+    }
+
+    fb(4, frame_width_bits_minus_1);
+    fb(4, frame_height_bits_minus_1);
+
+    fb(current->frame_width_bits_minus_1  + 1, max_frame_width_minus_1);
+    fb(current->frame_height_bits_minus_1 + 1, max_frame_height_minus_1);
+
+    if (current->reduced_still_picture_header)
+        infer(frame_id_numbers_present_flag, 0);
+    else
+        flag(frame_id_numbers_present_flag);
+    if (current->frame_id_numbers_present_flag) {
+        fb(4, delta_frame_id_length_minus_2);
+        fb(3, additional_frame_id_length_minus_1);
+    }
+
+    flag(use_128x128_superblock);
+    flag(enable_filter_intra);
+    flag(enable_intra_edge_filter);
+
+    if (current->reduced_still_picture_header) {
+        infer(enable_interintra_compound, 0);
+        infer(enable_masked_compound,     0);
+        infer(enable_warped_motion,       0);
+        infer(enable_dual_filter,         0);
+        infer(enable_order_hint,          0);
+        infer(enable_jnt_comp,            0);
+        infer(enable_ref_frame_mvs,       0);
+
+        infer(seq_force_screen_content_tools,
+              AV1_SELECT_SCREEN_CONTENT_TOOLS);
+        infer(seq_force_integer_mv,
+              AV1_SELECT_INTEGER_MV);
+    } else {
+        flag(enable_interintra_compound);
+        flag(enable_masked_compound);
+        flag(enable_warped_motion);
+        flag(enable_dual_filter);
+
+        flag(enable_order_hint);
+        if (current->enable_order_hint) {
+            flag(enable_jnt_comp);
+            flag(enable_ref_frame_mvs);
+        } else {
+            infer(enable_jnt_comp,      0);
+            infer(enable_ref_frame_mvs, 0);
+        }
+
+        flag(seq_choose_screen_content_tools);
+        if (current->seq_choose_screen_content_tools)
+            infer(seq_force_screen_content_tools,
+                  AV1_SELECT_SCREEN_CONTENT_TOOLS);
+        else
+            fb(1, seq_force_screen_content_tools);
+        if (current->seq_force_screen_content_tools > 0) {
+            flag(seq_choose_integer_mv);
+            if (current->seq_choose_integer_mv)
+                infer(seq_force_integer_mv,
+                      AV1_SELECT_INTEGER_MV);
+            else
+                fb(1, seq_force_integer_mv);
+        } else {
+            infer(seq_force_integer_mv, AV1_SELECT_INTEGER_MV);
+        }
+
+        if (current->enable_order_hint)
+            fb(3, order_hint_bits_minus_1);
+    }
+
+    flag(enable_superres);
+    flag(enable_cdef);
+    flag(enable_restoration);
+
+    CHECK(FUNC(color_config)(ctx, rw, &current->color_config,
+                             current->seq_profile));
+
+    flag(film_grain_params_present);
+
+    return 0;
+}
+
+static int FUNC(temporal_delimiter_obu)(CodedBitstreamContext *ctx, RWContext *rw)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+
+    HEADER("Temporal Delimiter");
+
+    priv->seen_frame_header = 0;
+
+    return 0;
+}
+
+static int FUNC(set_frame_refs)(CodedBitstreamContext *ctx, RWContext *rw,
+                                AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    static const uint8_t ref_frame_list[AV1_NUM_REF_FRAMES - 2] = {
+        AV1_REF_FRAME_LAST2, AV1_REF_FRAME_LAST3, AV1_REF_FRAME_BWDREF,
+        AV1_REF_FRAME_ALTREF2, AV1_REF_FRAME_ALTREF
+    };
+    int8_t ref_frame_idx[AV1_REFS_PER_FRAME], used_frame[AV1_NUM_REF_FRAMES];
+    int16_t shifted_order_hints[AV1_NUM_REF_FRAMES];
+    int cur_frame_hint, latest_order_hint, earliest_order_hint, ref;
+    int i, j;
+
+    for (i = 0; i < AV1_REFS_PER_FRAME; i++)
+        ref_frame_idx[i] = -1;
+    ref_frame_idx[AV1_REF_FRAME_LAST - AV1_REF_FRAME_LAST] = current->last_frame_idx;
+    ref_frame_idx[AV1_REF_FRAME_GOLDEN - AV1_REF_FRAME_LAST] = current->golden_frame_idx;
+
+    for (i = 0; i < AV1_NUM_REF_FRAMES; i++)
+        used_frame[i] = 0;
+    used_frame[current->last_frame_idx] = 1;
+    used_frame[current->golden_frame_idx] = 1;
+
+    cur_frame_hint = 1 << (seq->order_hint_bits_minus_1);
+    for (i = 0; i < AV1_NUM_REF_FRAMES; i++)
+        shifted_order_hints[i] = cur_frame_hint +
+                                 cbs_av1_get_relative_dist(seq, priv->ref[i].order_hint,
+                                                           priv->order_hint);
+
+    latest_order_hint = shifted_order_hints[current->last_frame_idx];
+    earliest_order_hint = shifted_order_hints[current->golden_frame_idx];
+
+    ref = -1;
+    for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+        int hint = shifted_order_hints[i];
+        if (!used_frame[i] && hint >= cur_frame_hint &&
+            (ref < 0 || hint >= latest_order_hint)) {
+            ref = i;
+            latest_order_hint = hint;
+        }
+    }
+    if (ref >= 0) {
+        ref_frame_idx[AV1_REF_FRAME_ALTREF - AV1_REF_FRAME_LAST] = ref;
+        used_frame[ref] = 1;
+    }
+
+    ref = -1;
+    for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+        int hint = shifted_order_hints[i];
+        if (!used_frame[i] && hint >= cur_frame_hint &&
+            (ref < 0 || hint < earliest_order_hint)) {
+            ref = i;
+            earliest_order_hint = hint;
+        }
+    }
+    if (ref >= 0) {
+        ref_frame_idx[AV1_REF_FRAME_BWDREF - AV1_REF_FRAME_LAST] = ref;
+        used_frame[ref] = 1;
+    }
+
+    ref = -1;
+    for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+        int hint = shifted_order_hints[i];
+        if (!used_frame[i] && hint >= cur_frame_hint &&
+            (ref < 0 || hint < earliest_order_hint)) {
+            ref = i;
+            earliest_order_hint = hint;
+        }
+    }
+    if (ref >= 0) {
+        ref_frame_idx[AV1_REF_FRAME_ALTREF2 - AV1_REF_FRAME_LAST] = ref;
+        used_frame[ref] = 1;
+    }
+
+    for (i = 0; i < AV1_REFS_PER_FRAME - 2; i++) {
+        int ref_frame = ref_frame_list[i];
+        if (ref_frame_idx[ref_frame - AV1_REF_FRAME_LAST] < 0 ) {
+            ref = -1;
+            for (j = 0; j < AV1_NUM_REF_FRAMES; j++) {
+                int hint = shifted_order_hints[j];
+                if (!used_frame[j] && hint < cur_frame_hint &&
+                    (ref < 0 || hint >= latest_order_hint)) {
+                    ref = j;
+                    latest_order_hint = hint;
+                }
+            }
+            if (ref >= 0) {
+                ref_frame_idx[ref_frame - AV1_REF_FRAME_LAST] = ref;
+                used_frame[ref] = 1;
+            }
+        }
+    }
+
+    ref = -1;
+    for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+        int hint = shifted_order_hints[i];
+        if (ref < 0 || hint < earliest_order_hint) {
+            ref = i;
+            earliest_order_hint = hint;
+        }
+    }
+    for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+        if (ref_frame_idx[i] < 0)
+            ref_frame_idx[i] = ref;
+        infer(ref_frame_idx[i], ref_frame_idx[i]);
+    }
+
+    return 0;
+}
+
+static int FUNC(superres_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                 AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int denom, err;
+
+    if (seq->enable_superres)
+        flag(use_superres);
+    else
+        infer(use_superres, 0);
+
+    if (current->use_superres) {
+        fb(3, coded_denom);
+        denom = current->coded_denom + AV1_SUPERRES_DENOM_MIN;
+    } else {
+        denom = AV1_SUPERRES_NUM;
+    }
+
+    priv->upscaled_width = priv->frame_width;
+    priv->frame_width = (priv->upscaled_width * AV1_SUPERRES_NUM +
+                         denom / 2) / denom;
+
+    return 0;
+}
+
+static int FUNC(frame_size)(CodedBitstreamContext *ctx, RWContext *rw,
+                            AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int err;
+
+    if (current->frame_size_override_flag) {
+        fb(seq->frame_width_bits_minus_1 + 1,  frame_width_minus_1);
+        fb(seq->frame_height_bits_minus_1 + 1, frame_height_minus_1);
+    } else {
+        infer(frame_width_minus_1,  seq->max_frame_width_minus_1);
+        infer(frame_height_minus_1, seq->max_frame_height_minus_1);
+    }
+
+    priv->frame_width  = current->frame_width_minus_1  + 1;
+    priv->frame_height = current->frame_height_minus_1 + 1;
+
+    CHECK(FUNC(superres_params)(ctx, rw, current));
+
+    return 0;
+}
+
+static int FUNC(render_size)(CodedBitstreamContext *ctx, RWContext *rw,
+                             AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int err;
+
+    flag(render_and_frame_size_different);
+
+    if (current->render_and_frame_size_different) {
+        fb(16, render_width_minus_1);
+        fb(16, render_height_minus_1);
+    } else {
+        infer(render_width_minus_1,  current->frame_width_minus_1);
+        infer(render_height_minus_1, current->frame_height_minus_1);
+    }
+
+    priv->render_width  = current->render_width_minus_1  + 1;
+    priv->render_height = current->render_height_minus_1 + 1;
+
+    return 0;
+}
+
+static int FUNC(frame_size_with_refs)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int i, err;
+
+    for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+        flags(found_ref[i], 1, i);
+        if (current->found_ref[i]) {
+            AV1ReferenceFrameState *ref =
+                &priv->ref[current->ref_frame_idx[i]];
+
+            if (!ref->valid) {
+                av_log(ctx->log_ctx, AV_LOG_ERROR,
+                       "Missing reference frame needed for frame size "
+                       "(ref = %d, ref_frame_idx = %d).\n",
+                       i, current->ref_frame_idx[i]);
+                return AVERROR_INVALIDDATA;
+            }
+
+            infer(frame_width_minus_1,   ref->upscaled_width - 1);
+            infer(frame_height_minus_1,  ref->frame_height - 1);
+            infer(render_width_minus_1,  ref->render_width - 1);
+            infer(render_height_minus_1, ref->render_height - 1);
+
+            priv->upscaled_width = ref->upscaled_width;
+            priv->frame_width    = priv->upscaled_width;
+            priv->frame_height   = ref->frame_height;
+            priv->render_width   = ref->render_width;
+            priv->render_height  = ref->render_height;
+            break;
+        }
+    }
+
+    if (i >= AV1_REFS_PER_FRAME) {
+        CHECK(FUNC(frame_size)(ctx, rw, current));
+        CHECK(FUNC(render_size)(ctx, rw, current));
+    } else {
+        CHECK(FUNC(superres_params)(ctx, rw, current));
+    }
+
+    return 0;
+}
+
+static int FUNC(interpolation_filter)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      AV1RawFrameHeader *current)
+{
+    int err;
+
+    flag(is_filter_switchable);
+    if (current->is_filter_switchable)
+        infer(interpolation_filter,
+              AV1_INTERPOLATION_FILTER_SWITCHABLE);
+    else
+        fb(2, interpolation_filter);
+
+    return 0;
+}
+
+static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw,
+                           AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int mi_cols, mi_rows, sb_cols, sb_rows, sb_shift, sb_size;
+    int max_tile_width_sb, max_tile_height_sb, max_tile_area_sb;
+    int min_log2_tile_cols, max_log2_tile_cols, max_log2_tile_rows;
+    int min_log2_tiles, min_log2_tile_rows;
+    int i, err;
+
+    mi_cols = 2 * ((priv->frame_width  + 7) >> 3);
+    mi_rows = 2 * ((priv->frame_height + 7) >> 3);
+
+    sb_cols = seq->use_128x128_superblock ? ((mi_cols + 31) >> 5)
+                                          : ((mi_cols + 15) >> 4);
+    sb_rows = seq->use_128x128_superblock ? ((mi_rows + 31) >> 5)
+                                          : ((mi_rows + 15) >> 4);
+
+    sb_shift = seq->use_128x128_superblock ? 5 : 4;
+    sb_size  = sb_shift + 2;
+
+    max_tile_width_sb = AV1_MAX_TILE_WIDTH >> sb_size;
+    max_tile_area_sb  = AV1_MAX_TILE_AREA  >> (2 * sb_size);
+
+    min_log2_tile_cols = cbs_av1_tile_log2(max_tile_width_sb, sb_cols);
+    max_log2_tile_cols = cbs_av1_tile_log2(1, FFMIN(sb_cols, AV1_MAX_TILE_COLS));
+    max_log2_tile_rows = cbs_av1_tile_log2(1, FFMIN(sb_rows, AV1_MAX_TILE_ROWS));
+    min_log2_tiles = FFMAX(min_log2_tile_cols,
+                           cbs_av1_tile_log2(max_tile_area_sb, sb_rows * sb_cols));
+
+    flag(uniform_tile_spacing_flag);
+
+    if (current->uniform_tile_spacing_flag) {
+        int tile_width_sb, tile_height_sb;
+
+        increment(tile_cols_log2, min_log2_tile_cols, max_log2_tile_cols);
+
+        tile_width_sb = (sb_cols + (1 << current->tile_cols_log2) - 1) >>
+            current->tile_cols_log2;
+        current->tile_cols = (sb_cols + tile_width_sb - 1) / tile_width_sb;
+
+        min_log2_tile_rows = FFMAX(min_log2_tiles - current->tile_cols_log2, 0);
+
+        increment(tile_rows_log2, min_log2_tile_rows, max_log2_tile_rows);
+
+        tile_height_sb = (sb_rows + (1 << current->tile_rows_log2) - 1) >>
+            current->tile_rows_log2;
+        current->tile_rows = (sb_rows + tile_height_sb - 1) / tile_height_sb;
+
+        for (i = 0; i < current->tile_cols - 1; i++)
+            infer(width_in_sbs_minus_1[i], tile_width_sb - 1);
+        infer(width_in_sbs_minus_1[i],
+              sb_cols - (current->tile_cols - 1) * tile_width_sb - 1);
+        for (i = 0; i < current->tile_rows - 1; i++)
+            infer(height_in_sbs_minus_1[i], tile_height_sb - 1);
+        infer(height_in_sbs_minus_1[i],
+              sb_rows - (current->tile_rows - 1) * tile_height_sb - 1);
+
+    } else {
+        int widest_tile_sb, start_sb, size_sb, max_width, max_height;
+
+        widest_tile_sb = 0;
+
+        start_sb = 0;
+        for (i = 0; start_sb < sb_cols && i < AV1_MAX_TILE_COLS; i++) {
+            max_width = FFMIN(sb_cols - start_sb, max_tile_width_sb);
+            ns(max_width, width_in_sbs_minus_1[i], 1, i);
+            size_sb = current->width_in_sbs_minus_1[i] + 1;
+            widest_tile_sb = FFMAX(size_sb, widest_tile_sb);
+            start_sb += size_sb;
+        }
+        current->tile_cols_log2 = cbs_av1_tile_log2(1, i);
+        current->tile_cols = i;
+
+        if (min_log2_tiles > 0)
+            max_tile_area_sb = (sb_rows * sb_cols) >> (min_log2_tiles + 1);
+        else
+            max_tile_area_sb = sb_rows * sb_cols;
+        max_tile_height_sb = FFMAX(max_tile_area_sb / widest_tile_sb, 1);
+
+        start_sb = 0;
+        for (i = 0; start_sb < sb_rows && i < AV1_MAX_TILE_ROWS; i++) {
+            max_height = FFMIN(sb_rows - start_sb, max_tile_height_sb);
+            ns(max_height, height_in_sbs_minus_1[i], 1, i);
+            size_sb = current->height_in_sbs_minus_1[i] + 1;
+            start_sb += size_sb;
+        }
+        current->tile_rows_log2 = cbs_av1_tile_log2(1, i);
+        current->tile_rows = i;
+    }
+
+    if (current->tile_cols_log2 > 0 ||
+        current->tile_rows_log2 > 0) {
+        fb(current->tile_cols_log2 + current->tile_rows_log2,
+           context_update_tile_id);
+        fb(2, tile_size_bytes_minus1);
+    } else {
+        infer(context_update_tile_id, 0);
+    }
+
+    priv->tile_cols = current->tile_cols;
+    priv->tile_rows = current->tile_rows;
+
+    return 0;
+}
+
+static int FUNC(quantization_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int err;
+
+    fb(8, base_q_idx);
+
+    delta_q(delta_q_y_dc);
+
+    if (priv->num_planes > 1) {
+        if (seq->color_config.separate_uv_delta_q)
+            flag(diff_uv_delta);
+        else
+            infer(diff_uv_delta, 0);
+
+        delta_q(delta_q_u_dc);
+        delta_q(delta_q_u_ac);
+
+        if (current->diff_uv_delta) {
+            delta_q(delta_q_v_dc);
+            delta_q(delta_q_v_ac);
+        } else {
+            infer(delta_q_v_dc, current->delta_q_u_dc);
+            infer(delta_q_v_ac, current->delta_q_u_ac);
+        }
+    } else {
+        infer(delta_q_u_dc, 0);
+        infer(delta_q_u_ac, 0);
+        infer(delta_q_v_dc, 0);
+        infer(delta_q_v_ac, 0);
+    }
+
+    flag(using_qmatrix);
+    if (current->using_qmatrix) {
+        fb(4, qm_y);
+        fb(4, qm_u);
+        if (seq->color_config.separate_uv_delta_q)
+            fb(4, qm_v);
+        else
+            infer(qm_v, current->qm_u);
+    }
+
+    return 0;
+}
+
+static int FUNC(segmentation_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    static const uint8_t bits[AV1_SEG_LVL_MAX] = { 8, 6, 6, 6, 6, 3, 0, 0 };
+    static const uint8_t sign[AV1_SEG_LVL_MAX] = { 1, 1, 1, 1, 1, 0, 0, 0 };
+    static const uint8_t default_feature_enabled[AV1_SEG_LVL_MAX] = { 0 };
+    static const int16_t default_feature_value[AV1_SEG_LVL_MAX] = { 0 };
+    int i, j, err;
+
+    flag(segmentation_enabled);
+
+    if (current->segmentation_enabled) {
+        if (current->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
+            infer(segmentation_update_map,      1);
+            infer(segmentation_temporal_update, 0);
+            infer(segmentation_update_data,     1);
+        } else {
+            flag(segmentation_update_map);
+            if (current->segmentation_update_map)
+                flag(segmentation_temporal_update);
+            else
+                infer(segmentation_temporal_update, 0);
+            flag(segmentation_update_data);
+        }
+
+        for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
+            const uint8_t *ref_feature_enabled;
+            const int16_t *ref_feature_value;
+
+            if (current->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
+                ref_feature_enabled = default_feature_enabled;
+                ref_feature_value = default_feature_value;
+            } else {
+                ref_feature_enabled =
+                    priv->ref[current->ref_frame_idx[current->primary_ref_frame]].feature_enabled[i];
+                ref_feature_value =
+                    priv->ref[current->ref_frame_idx[current->primary_ref_frame]].feature_value[i];
+            }
+
+            for (j = 0; j < AV1_SEG_LVL_MAX; j++) {
+                if (current->segmentation_update_data) {
+                    flags(feature_enabled[i][j], 2, i, j);
+
+                    if (current->feature_enabled[i][j] && bits[j] > 0) {
+                        if (sign[j])
+                            sus(1 + bits[j], feature_value[i][j], 2, i, j);
+                        else
+                            fbs(bits[j], feature_value[i][j], 2, i, j);
+                    } else {
+                        infer(feature_value[i][j], 0);
+                    }
+                } else {
+                    infer(feature_enabled[i][j], ref_feature_enabled[j]);
+                    infer(feature_value[i][j], ref_feature_value[j]);
+                }
+            }
+        }
+    } else {
+        for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
+            for (j = 0; j < AV1_SEG_LVL_MAX; j++) {
+                infer(feature_enabled[i][j], 0);
+                infer(feature_value[i][j],   0);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(delta_q_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                AV1RawFrameHeader *current)
+{
+    int err;
+
+    if (current->base_q_idx > 0)
+        flag(delta_q_present);
+    else
+        infer(delta_q_present, 0);
+
+    if (current->delta_q_present)
+        fb(2, delta_q_res);
+
+    return 0;
+}
+
+static int FUNC(delta_lf_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                 AV1RawFrameHeader *current)
+{
+    int err;
+
+    if (current->delta_q_present) {
+        if (!current->allow_intrabc)
+            flag(delta_lf_present);
+        else
+            infer(delta_lf_present, 0);
+        if (current->delta_lf_present) {
+            fb(2, delta_lf_res);
+            flag(delta_lf_multi);
+        } else {
+            infer(delta_lf_res,   0);
+            infer(delta_lf_multi, 0);
+        }
+    } else {
+        infer(delta_lf_present, 0);
+        infer(delta_lf_res,     0);
+        infer(delta_lf_multi,   0);
+    }
+
+    return 0;
+}
+
+static int FUNC(loop_filter_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                    AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    static const int8_t default_loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME] =
+        { 1, 0, 0, 0, -1, 0, -1, -1 };
+    static const int8_t default_loop_filter_mode_deltas[2] = { 0, 0 };
+    int i, err;
+
+    if (priv->coded_lossless || current->allow_intrabc) {
+        infer(loop_filter_level[0], 0);
+        infer(loop_filter_level[1], 0);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_INTRA],    1);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_LAST],     0);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_LAST2],    0);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_LAST3],    0);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_BWDREF],   0);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_GOLDEN],  -1);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF],  -1);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF2], -1);
+        for (i = 0; i < 2; i++)
+            infer(loop_filter_mode_deltas[i], 0);
+        return 0;
+    }
+
+    fb(6, loop_filter_level[0]);
+    fb(6, loop_filter_level[1]);
+
+    if (priv->num_planes > 1) {
+        if (current->loop_filter_level[0] ||
+            current->loop_filter_level[1]) {
+            fb(6, loop_filter_level[2]);
+            fb(6, loop_filter_level[3]);
+        }
+    }
+
+    fb(3, loop_filter_sharpness);
+
+    flag(loop_filter_delta_enabled);
+    if (current->loop_filter_delta_enabled) {
+        const int8_t *ref_loop_filter_ref_deltas, *ref_loop_filter_mode_deltas;
+
+        if (current->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
+            ref_loop_filter_ref_deltas = default_loop_filter_ref_deltas;
+            ref_loop_filter_mode_deltas = default_loop_filter_mode_deltas;
+        } else {
+            ref_loop_filter_ref_deltas =
+                priv->ref[current->ref_frame_idx[current->primary_ref_frame]].loop_filter_ref_deltas;
+            ref_loop_filter_mode_deltas =
+                priv->ref[current->ref_frame_idx[current->primary_ref_frame]].loop_filter_mode_deltas;
+        }
+
+        flag(loop_filter_delta_update);
+        for (i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++) {
+            if (current->loop_filter_delta_update)
+                flags(update_ref_delta[i], 1, i);
+            else
+                infer(update_ref_delta[i], 0);
+            if (current->update_ref_delta[i])
+                sus(1 + 6, loop_filter_ref_deltas[i], 1, i);
+            else
+                infer(loop_filter_ref_deltas[i], ref_loop_filter_ref_deltas[i]);
+        }
+        for (i = 0; i < 2; i++) {
+            if (current->loop_filter_delta_update)
+                flags(update_mode_delta[i], 1, i);
+            else
+                infer(update_mode_delta[i], 0);
+            if (current->update_mode_delta[i])
+                sus(1 + 6, loop_filter_mode_deltas[i], 1, i);
+            else
+                infer(loop_filter_mode_deltas[i], ref_loop_filter_mode_deltas[i]);
+        }
+    } else {
+        for (i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++)
+            infer(loop_filter_ref_deltas[i], default_loop_filter_ref_deltas[i]);
+        for (i = 0; i < 2; i++)
+            infer(loop_filter_mode_deltas[i], default_loop_filter_mode_deltas[i]);
+    }
+
+    return 0;
+}
+
+static int FUNC(cdef_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                             AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int i, err;
+
+    if (priv->coded_lossless || current->allow_intrabc ||
+        !seq->enable_cdef) {
+        infer(cdef_damping_minus_3, 0);
+        infer(cdef_bits, 0);
+        infer(cdef_y_pri_strength[0],  0);
+        infer(cdef_y_sec_strength[0],  0);
+        infer(cdef_uv_pri_strength[0], 0);
+        infer(cdef_uv_sec_strength[0], 0);
+
+        return 0;
+    }
+
+    fb(2, cdef_damping_minus_3);
+    fb(2, cdef_bits);
+
+    for (i = 0; i < (1 << current->cdef_bits); i++) {
+        fbs(4, cdef_y_pri_strength[i], 1, i);
+        fbs(2, cdef_y_sec_strength[i], 1, i);
+
+        if (priv->num_planes > 1) {
+            fbs(4, cdef_uv_pri_strength[i], 1, i);
+            fbs(2, cdef_uv_sec_strength[i], 1, i);
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(lr_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                           AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int uses_lr,  uses_chroma_lr;
+    int i, err;
+
+    if (priv->all_lossless || current->allow_intrabc ||
+        !seq->enable_restoration) {
+        return 0;
+    }
+
+    uses_lr = uses_chroma_lr = 0;
+    for (i = 0; i < priv->num_planes; i++) {
+        fbs(2, lr_type[i], 1, i);
+
+        if (current->lr_type[i] != AV1_RESTORE_NONE) {
+            uses_lr = 1;
+            if (i > 0)
+                uses_chroma_lr = 1;
+        }
+    }
+
+    if (uses_lr) {
+        if (seq->use_128x128_superblock)
+            increment(lr_unit_shift, 1, 2);
+        else
+            increment(lr_unit_shift, 0, 2);
+
+        if(seq->color_config.subsampling_x &&
+           seq->color_config.subsampling_y && uses_chroma_lr) {
+            fb(1, lr_uv_shift);
+        } else {
+            infer(lr_uv_shift, 0);
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(read_tx_mode)(CodedBitstreamContext *ctx, RWContext *rw,
+                              AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int err;
+
+    if (priv->coded_lossless)
+        infer(tx_mode, 0);
+    else
+        increment(tx_mode, 1, 2);
+
+    return 0;
+}
+
+static int FUNC(frame_reference_mode)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      AV1RawFrameHeader *current)
+{
+    int err;
+
+    if (current->frame_type == AV1_FRAME_INTRA_ONLY ||
+        current->frame_type == AV1_FRAME_KEY)
+        infer(reference_select, 0);
+    else
+        flag(reference_select);
+
+    return 0;
+}
+
+static int FUNC(skip_mode_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                  AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int skip_mode_allowed;
+    int err;
+
+    if (current->frame_type == AV1_FRAME_KEY ||
+        current->frame_type == AV1_FRAME_INTRA_ONLY ||
+        !current->reference_select || !seq->enable_order_hint) {
+        skip_mode_allowed = 0;
+    } else {
+        int forward_idx,  backward_idx;
+        int forward_hint, backward_hint;
+        int ref_hint, dist, i;
+
+        forward_idx  = -1;
+        backward_idx = -1;
+        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+            ref_hint = priv->ref[current->ref_frame_idx[i]].order_hint;
+            dist = cbs_av1_get_relative_dist(seq, ref_hint,
+                                             priv->order_hint);
+            if (dist < 0) {
+                if (forward_idx < 0 ||
+                    cbs_av1_get_relative_dist(seq, ref_hint,
+                                              forward_hint) > 0) {
+                    forward_idx  = i;
+                    forward_hint = ref_hint;
+                }
+            } else if (dist > 0) {
+                if (backward_idx < 0 ||
+                    cbs_av1_get_relative_dist(seq, ref_hint,
+                                              backward_hint) < 0) {
+                    backward_idx  = i;
+                    backward_hint = ref_hint;
+                }
+            }
+        }
+
+        if (forward_idx < 0) {
+            skip_mode_allowed = 0;
+        } else if (backward_idx >= 0) {
+            skip_mode_allowed = 1;
+            // Frames for skip mode are forward_idx and backward_idx.
+        } else {
+            int second_forward_idx;
+            int second_forward_hint;
+
+            second_forward_idx = -1;
+            for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+                ref_hint = priv->ref[current->ref_frame_idx[i]].order_hint;
+                if (cbs_av1_get_relative_dist(seq, ref_hint,
+                                              forward_hint) < 0) {
+                    if (second_forward_idx < 0 ||
+                        cbs_av1_get_relative_dist(seq, ref_hint,
+                                                  second_forward_hint) > 0) {
+                        second_forward_idx  = i;
+                        second_forward_hint = ref_hint;
+                    }
+                }
+            }
+
+            if (second_forward_idx < 0) {
+                skip_mode_allowed = 0;
+            } else {
+                skip_mode_allowed = 1;
+                // Frames for skip mode are forward_idx and second_forward_idx.
+            }
+        }
+    }
+
+    if (skip_mode_allowed)
+        flag(skip_mode_present);
+    else
+        infer(skip_mode_present, 0);
+
+    return 0;
+}
+
+static int FUNC(global_motion_param)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     AV1RawFrameHeader *current,
+                                     int type, int ref, int idx)
+{
+    uint32_t abs_bits, prec_bits, num_syms;
+    int err;
+
+    if (idx < 2) {
+        if (type == AV1_WARP_MODEL_TRANSLATION) {
+            abs_bits  = AV1_GM_ABS_TRANS_ONLY_BITS  - !current->allow_high_precision_mv;
+            prec_bits = AV1_GM_TRANS_ONLY_PREC_BITS - !current->allow_high_precision_mv;
+        } else {
+            abs_bits  = AV1_GM_ABS_TRANS_BITS;
+            prec_bits = AV1_GM_TRANS_PREC_BITS;
+        }
+    } else {
+        abs_bits  = AV1_GM_ABS_ALPHA_BITS;
+        prec_bits = AV1_GM_ALPHA_PREC_BITS;
+    }
+
+    num_syms = 2 * (1 << abs_bits) + 1;
+    subexp(gm_params[ref][idx], num_syms, 2, ref, idx);
+
+    // Actual gm_params value is not reconstructed here.
+    (void)prec_bits;
+
+    return 0;
+}
+
+static int FUNC(global_motion_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      AV1RawFrameHeader *current)
+{
+    int ref, type;
+    int err;
+
+    if (current->frame_type == AV1_FRAME_KEY ||
+        current->frame_type == AV1_FRAME_INTRA_ONLY)
+        return 0;
+
+    for (ref = AV1_REF_FRAME_LAST; ref <= AV1_REF_FRAME_ALTREF; ref++) {
+        flags(is_global[ref], 1, ref);
+        if (current->is_global[ref]) {
+            flags(is_rot_zoom[ref], 1, ref);
+            if (current->is_rot_zoom[ref]) {
+                type = AV1_WARP_MODEL_ROTZOOM;
+            } else {
+                flags(is_translation[ref], 1, ref);
+                type = current->is_translation[ref] ? AV1_WARP_MODEL_TRANSLATION
+                                                    : AV1_WARP_MODEL_AFFINE;
+            }
+        } else {
+            type = AV1_WARP_MODEL_IDENTITY;
+        }
+
+        if (type >= AV1_WARP_MODEL_ROTZOOM) {
+            CHECK(FUNC(global_motion_param)(ctx, rw, current, type, ref, 2));
+            CHECK(FUNC(global_motion_param)(ctx, rw, current, type, ref, 3));
+            if (type == AV1_WARP_MODEL_AFFINE) {
+                CHECK(FUNC(global_motion_param)(ctx, rw, current, type, ref, 4));
+                CHECK(FUNC(global_motion_param)(ctx, rw, current, type, ref, 5));
+            } else {
+                // gm_params[ref][4] = -gm_params[ref][3]
+                // gm_params[ref][5] =  gm_params[ref][2]
+            }
+        }
+        if (type >= AV1_WARP_MODEL_TRANSLATION) {
+            CHECK(FUNC(global_motion_param)(ctx, rw, current, type, ref, 0));
+            CHECK(FUNC(global_motion_param)(ctx, rw, current, type, ref, 1));
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(film_grain_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   AV1RawFilmGrainParams *current,
+                                   AV1RawFrameHeader *frame_header)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int num_pos_luma, num_pos_chroma;
+    int i, err;
+
+    if (!seq->film_grain_params_present ||
+        (!frame_header->show_frame && !frame_header->showable_frame))
+        return 0;
+
+    flag(apply_grain);
+
+    if (!current->apply_grain)
+        return 0;
+
+    fb(16, grain_seed);
+
+    if (frame_header->frame_type == AV1_FRAME_INTER)
+        flag(update_grain);
+    else
+        infer(update_grain, 1);
+
+    if (!current->update_grain) {
+        fb(3, film_grain_params_ref_idx);
+        return 0;
+    }
+
+    fc(4, num_y_points, 0, 14);
+    for (i = 0; i < current->num_y_points; i++) {
+        fcs(8, point_y_value[i],
+            i ? current->point_y_value[i - 1] + 1 : 0,
+            MAX_UINT_BITS(8) - (current->num_y_points - i - 1),
+            1, i);
+        fbs(8, point_y_scaling[i], 1, i);
+    }
+
+    if (seq->color_config.mono_chrome)
+        infer(chroma_scaling_from_luma, 0);
+    else
+        flag(chroma_scaling_from_luma);
+
+    if (seq->color_config.mono_chrome ||
+        current->chroma_scaling_from_luma ||
+        (seq->color_config.subsampling_x == 1 &&
+         seq->color_config.subsampling_y == 1 &&
+         current->num_y_points == 0)) {
+        infer(num_cb_points, 0);
+        infer(num_cr_points, 0);
+    } else {
+        fc(4, num_cb_points, 0, 10);
+        for (i = 0; i < current->num_cb_points; i++) {
+            fcs(8, point_cb_value[i],
+                i ? current->point_cb_value[i - 1] + 1 : 0,
+                MAX_UINT_BITS(8) - (current->num_cb_points - i - 1),
+                1, i);
+            fbs(8, point_cb_scaling[i], 1, i);
+        }
+        fc(4, num_cr_points, 0, 10);
+        for (i = 0; i < current->num_cr_points; i++) {
+            fcs(8, point_cr_value[i],
+                i ? current->point_cr_value[i - 1] + 1 : 0,
+                MAX_UINT_BITS(8) - (current->num_cr_points - i - 1),
+                1, i);
+            fbs(8, point_cr_scaling[i], 1, i);
+        }
+    }
+
+    fb(2, grain_scaling_minus_8);
+    fb(2, ar_coeff_lag);
+    num_pos_luma = 2 * current->ar_coeff_lag * (current->ar_coeff_lag + 1);
+    if (current->num_y_points) {
+        num_pos_chroma = num_pos_luma + 1;
+        for (i = 0; i < num_pos_luma; i++)
+            fbs(8, ar_coeffs_y_plus_128[i], 1, i);
+    } else {
+        num_pos_chroma = num_pos_luma;
+    }
+    if (current->chroma_scaling_from_luma || current->num_cb_points) {
+        for (i = 0; i < num_pos_chroma; i++)
+            fbs(8, ar_coeffs_cb_plus_128[i], 1, i);
+    }
+    if (current->chroma_scaling_from_luma || current->num_cr_points) {
+        for (i = 0; i < num_pos_chroma; i++)
+            fbs(8, ar_coeffs_cr_plus_128[i], 1, i);
+    }
+    fb(2, ar_coeff_shift_minus_6);
+    fb(2, grain_scale_shift);
+    if (current->num_cb_points) {
+        fb(8, cb_mult);
+        fb(8, cb_luma_mult);
+        fb(9, cb_offset);
+    }
+    if (current->num_cr_points) {
+        fb(8, cr_mult);
+        fb(8, cr_luma_mult);
+        fb(9, cr_offset);
+    }
+
+    flag(overlap_flag);
+    flag(clip_to_restricted_range);
+
+    return 0;
+}
+
+static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq;
+    int id_len, diff_len, all_frames, frame_is_intra, order_hint_bits;
+    int i, err;
+
+    if (!priv->sequence_header) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "No sequence header available: "
+               "unable to decode frame header.\n");
+        return AVERROR_INVALIDDATA;
+    }
+    seq = priv->sequence_header;
+
+    id_len = seq->additional_frame_id_length_minus_1 +
+             seq->delta_frame_id_length_minus_2 + 3;
+    all_frames = (1 << AV1_NUM_REF_FRAMES) - 1;
+
+    if (seq->reduced_still_picture_header) {
+        infer(show_existing_frame, 0);
+        infer(frame_type,     AV1_FRAME_KEY);
+        infer(show_frame,     1);
+        infer(showable_frame, 0);
+        frame_is_intra = 1;
+
+    } else {
+        flag(show_existing_frame);
+
+        if (current->show_existing_frame) {
+            AV1ReferenceFrameState *ref;
+
+            fb(3, frame_to_show_map_idx);
+            ref = &priv->ref[current->frame_to_show_map_idx];
+
+            if (!ref->valid) {
+                av_log(ctx->log_ctx, AV_LOG_ERROR, "Missing reference frame needed for "
+                       "show_existing_frame (frame_to_show_map_idx = %d).\n",
+                       current->frame_to_show_map_idx);
+                return AVERROR_INVALIDDATA;
+            }
+
+            if (seq->decoder_model_info_present_flag &&
+                !seq->timing_info.equal_picture_interval) {
+                fb(seq->decoder_model_info.frame_presentation_time_length_minus_1 + 1,
+                   frame_presentation_time);
+            }
+
+            if (seq->frame_id_numbers_present_flag)
+                fb(id_len, display_frame_id);
+
+            infer(frame_type, ref->frame_type);
+            if (current->frame_type == AV1_FRAME_KEY) {
+                infer(refresh_frame_flags, all_frames);
+
+                // Section 7.21
+                infer(current_frame_id, ref->frame_id);
+                priv->upscaled_width  = ref->upscaled_width;
+                priv->frame_width     = ref->frame_width;
+                priv->frame_height    = ref->frame_height;
+                priv->render_width    = ref->render_width;
+                priv->render_height   = ref->render_height;
+                priv->bit_depth       = ref->bit_depth;
+                priv->order_hint      = ref->order_hint;
+            } else
+                infer(refresh_frame_flags, 0);
+
+            infer(frame_width_minus_1,   ref->upscaled_width - 1);
+            infer(frame_height_minus_1,  ref->frame_height - 1);
+            infer(render_width_minus_1,  ref->render_width - 1);
+            infer(render_height_minus_1, ref->render_height - 1);
+
+            // Section 7.20
+            goto update_refs;
+        }
+
+        fb(2, frame_type);
+        frame_is_intra = (current->frame_type == AV1_FRAME_INTRA_ONLY ||
+                          current->frame_type == AV1_FRAME_KEY);
+
+        flag(show_frame);
+        if (current->show_frame &&
+            seq->decoder_model_info_present_flag &&
+            !seq->timing_info.equal_picture_interval) {
+            fb(seq->decoder_model_info.frame_presentation_time_length_minus_1 + 1,
+               frame_presentation_time);
+        }
+        if (current->show_frame)
+            infer(showable_frame, current->frame_type != AV1_FRAME_KEY);
+        else
+            flag(showable_frame);
+
+        if (current->frame_type == AV1_FRAME_SWITCH ||
+            (current->frame_type == AV1_FRAME_KEY && current->show_frame))
+            infer(error_resilient_mode, 1);
+        else
+            flag(error_resilient_mode);
+    }
+
+    if (current->frame_type == AV1_FRAME_KEY && current->show_frame) {
+        for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+            priv->ref[i].valid = 0;
+            priv->ref[i].order_hint = 0;
+        }
+    }
+
+    flag(disable_cdf_update);
+
+    if (seq->seq_force_screen_content_tools ==
+        AV1_SELECT_SCREEN_CONTENT_TOOLS) {
+        flag(allow_screen_content_tools);
+    } else {
+        infer(allow_screen_content_tools,
+              seq->seq_force_screen_content_tools);
+    }
+    if (current->allow_screen_content_tools) {
+        if (seq->seq_force_integer_mv == AV1_SELECT_INTEGER_MV)
+            flag(force_integer_mv);
+        else
+            infer(force_integer_mv, seq->seq_force_integer_mv);
+    } else {
+        infer(force_integer_mv, 0);
+    }
+
+    if (seq->frame_id_numbers_present_flag) {
+        fb(id_len, current_frame_id);
+
+        diff_len = seq->delta_frame_id_length_minus_2 + 2;
+        for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+            if (current->current_frame_id > (1 << diff_len)) {
+                if (priv->ref[i].frame_id > current->current_frame_id ||
+                    priv->ref[i].frame_id < (current->current_frame_id -
+                                             (1 << diff_len)))
+                    priv->ref[i].valid = 0;
+            } else {
+                if (priv->ref[i].frame_id > current->current_frame_id &&
+                    priv->ref[i].frame_id < ((1 << id_len) +
+                                             current->current_frame_id -
+                                             (1 << diff_len)))
+                    priv->ref[i].valid = 0;
+            }
+        }
+    } else {
+        infer(current_frame_id, 0);
+    }
+
+    if (current->frame_type == AV1_FRAME_SWITCH)
+        infer(frame_size_override_flag, 1);
+    else if(seq->reduced_still_picture_header)
+        infer(frame_size_override_flag, 0);
+    else
+        flag(frame_size_override_flag);
+
+    order_hint_bits =
+        seq->enable_order_hint ? seq->order_hint_bits_minus_1 + 1 : 0;
+    if (order_hint_bits > 0)
+        fb(order_hint_bits, order_hint);
+    else
+        infer(order_hint, 0);
+    priv->order_hint = current->order_hint;
+
+    if (frame_is_intra || current->error_resilient_mode)
+        infer(primary_ref_frame, AV1_PRIMARY_REF_NONE);
+    else
+        fb(3, primary_ref_frame);
+
+    if (seq->decoder_model_info_present_flag) {
+        flag(buffer_removal_time_present_flag);
+        if (current->buffer_removal_time_present_flag) {
+            for (i = 0; i <= seq->operating_points_cnt_minus_1; i++) {
+                if (seq->decoder_model_present_for_this_op[i]) {
+                    int op_pt_idc = seq->operating_point_idc[i];
+                    int in_temporal_layer = (op_pt_idc >>  priv->temporal_id    ) & 1;
+                    int in_spatial_layer  = (op_pt_idc >> (priv->spatial_id + 8)) & 1;
+                    if (seq->operating_point_idc[i] == 0 ||
+                        (in_temporal_layer && in_spatial_layer)) {
+                        fbs(seq->decoder_model_info.buffer_removal_time_length_minus_1 + 1,
+                            buffer_removal_time[i], 1, i);
+                    }
+                }
+            }
+        }
+    }
+
+    if (current->frame_type == AV1_FRAME_SWITCH ||
+        (current->frame_type == AV1_FRAME_KEY && current->show_frame))
+        infer(refresh_frame_flags, all_frames);
+    else
+        fb(8, refresh_frame_flags);
+
+    if (!frame_is_intra || current->refresh_frame_flags != all_frames) {
+        if (seq->enable_order_hint) {
+            for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+                if (current->error_resilient_mode)
+                    fbs(order_hint_bits, ref_order_hint[i], 1, i);
+                else
+                    infer(ref_order_hint[i], priv->ref[i].order_hint);
+                if (current->ref_order_hint[i] != priv->ref[i].order_hint)
+                    priv->ref[i].valid = 0;
+            }
+        }
+    }
+
+    if (current->frame_type == AV1_FRAME_KEY ||
+        current->frame_type == AV1_FRAME_INTRA_ONLY) {
+        CHECK(FUNC(frame_size)(ctx, rw, current));
+        CHECK(FUNC(render_size)(ctx, rw, current));
+
+        if (current->allow_screen_content_tools &&
+            priv->upscaled_width == priv->frame_width)
+            flag(allow_intrabc);
+        else
+            infer(allow_intrabc, 0);
+
+    } else {
+        if (!seq->enable_order_hint) {
+            infer(frame_refs_short_signaling, 0);
+        } else {
+            flag(frame_refs_short_signaling);
+            if (current->frame_refs_short_signaling) {
+                fb(3, last_frame_idx);
+                fb(3, golden_frame_idx);
+                CHECK(FUNC(set_frame_refs)(ctx, rw, current));
+            }
+        }
+
+        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+            if (!current->frame_refs_short_signaling)
+                fbs(3, ref_frame_idx[i], 1, i);
+            if (seq->frame_id_numbers_present_flag) {
+                fbs(seq->delta_frame_id_length_minus_2 + 2,
+                    delta_frame_id_minus1[i], 1, i);
+            }
+        }
+
+        if (current->frame_size_override_flag &&
+            !current->error_resilient_mode) {
+            CHECK(FUNC(frame_size_with_refs)(ctx, rw, current));
+        } else {
+            CHECK(FUNC(frame_size)(ctx, rw, current));
+            CHECK(FUNC(render_size)(ctx, rw, current));
+        }
+
+        if (current->force_integer_mv)
+            infer(allow_high_precision_mv, 0);
+        else
+            flag(allow_high_precision_mv);
+
+        CHECK(FUNC(interpolation_filter)(ctx, rw, current));
+
+        flag(is_motion_mode_switchable);
+
+        if (current->error_resilient_mode ||
+            !seq->enable_ref_frame_mvs)
+            infer(use_ref_frame_mvs, 0);
+        else
+            flag(use_ref_frame_mvs);
+
+        infer(allow_intrabc, 0);
+    }
+
+    if (!frame_is_intra) {
+        // Derive reference frame sign biases.
+    }
+
+    if (seq->reduced_still_picture_header || current->disable_cdf_update)
+        infer(disable_frame_end_update_cdf, 1);
+    else
+        flag(disable_frame_end_update_cdf);
+
+    if (current->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
+        // Init non-coeff CDFs.
+        // Setup past independence.
+    } else {
+        // Load CDF tables from previous frame.
+        // Load params from previous frame.
+    }
+
+    if (current->use_ref_frame_mvs) {
+        // Perform motion field estimation process.
+    }
+
+    CHECK(FUNC(tile_info)(ctx, rw, current));
+
+    CHECK(FUNC(quantization_params)(ctx, rw, current));
+
+    CHECK(FUNC(segmentation_params)(ctx, rw, current));
+
+    CHECK(FUNC(delta_q_params)(ctx, rw, current));
+
+    CHECK(FUNC(delta_lf_params)(ctx, rw, current));
+
+    // Init coeff CDFs / load previous segments.
+
+    priv->coded_lossless = 1;
+    for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
+        int qindex;
+        if (current->feature_enabled[i][AV1_SEG_LVL_ALT_Q]) {
+            qindex = (current->base_q_idx +
+                      current->feature_value[i][AV1_SEG_LVL_ALT_Q]);
+        } else {
+            qindex = current->base_q_idx;
+        }
+        qindex = av_clip_uintp2(qindex, 8);
+
+        if (qindex                || current->delta_q_y_dc ||
+            current->delta_q_u_ac || current->delta_q_u_dc ||
+            current->delta_q_v_ac || current->delta_q_v_dc) {
+            priv->coded_lossless = 0;
+        }
+    }
+    priv->all_lossless = priv->coded_lossless &&
+        priv->frame_width == priv->upscaled_width;
+
+    CHECK(FUNC(loop_filter_params)(ctx, rw, current));
+
+    CHECK(FUNC(cdef_params)(ctx, rw, current));
+
+    CHECK(FUNC(lr_params)(ctx, rw, current));
+
+    CHECK(FUNC(read_tx_mode)(ctx, rw, current));
+
+    CHECK(FUNC(frame_reference_mode)(ctx, rw, current));
+
+    CHECK(FUNC(skip_mode_params)(ctx, rw, current));
+
+    if (frame_is_intra || current->error_resilient_mode ||
+        !seq->enable_warped_motion)
+        infer(allow_warped_motion, 0);
+    else
+        flag(allow_warped_motion);
+
+    flag(reduced_tx_set);
+
+    CHECK(FUNC(global_motion_params)(ctx, rw, current));
+
+    CHECK(FUNC(film_grain_params)(ctx, rw, &current->film_grain, current));
+
+    av_log(ctx->log_ctx, AV_LOG_DEBUG, "Frame %d:  size %dx%d  "
+           "upscaled %d  render %dx%d  subsample %dx%d  "
+           "bitdepth %d  tiles %dx%d.\n", priv->order_hint,
+           priv->frame_width, priv->frame_height, priv->upscaled_width,
+           priv->render_width, priv->render_height,
+           seq->color_config.subsampling_x + 1,
+           seq->color_config.subsampling_y + 1, priv->bit_depth,
+           priv->tile_rows, priv->tile_cols);
+
+update_refs:
+    for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+        if (current->refresh_frame_flags & (1 << i)) {
+            priv->ref[i] = (AV1ReferenceFrameState) {
+                .valid          = 1,
+                .frame_id       = current->current_frame_id,
+                .upscaled_width = priv->upscaled_width,
+                .frame_width    = priv->frame_width,
+                .frame_height   = priv->frame_height,
+                .render_width   = priv->render_width,
+                .render_height  = priv->render_height,
+                .frame_type     = current->frame_type,
+                .subsampling_x  = seq->color_config.subsampling_x,
+                .subsampling_y  = seq->color_config.subsampling_y,
+                .bit_depth      = priv->bit_depth,
+                .order_hint     = priv->order_hint,
+            };
+            memcpy(priv->ref[i].loop_filter_ref_deltas, current->loop_filter_ref_deltas,
+                   sizeof(current->loop_filter_ref_deltas));
+            memcpy(priv->ref[i].loop_filter_mode_deltas, current->loop_filter_mode_deltas,
+                   sizeof(current->loop_filter_mode_deltas));
+            memcpy(priv->ref[i].feature_enabled, current->feature_enabled,
+                   sizeof(current->feature_enabled));
+            memcpy(priv->ref[i].feature_value, current->feature_value,
+                   sizeof(current->feature_value));
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(frame_header_obu)(CodedBitstreamContext *ctx, RWContext *rw,
+                                  AV1RawFrameHeader *current, int redundant,
+                                  AVBufferRef *rw_buffer_ref)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int start_pos, fh_bits, fh_bytes, err;
+    uint8_t *fh_start;
+
+    if (priv->seen_frame_header) {
+        if (!redundant) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid repeated "
+                   "frame header OBU.\n");
+            return AVERROR_INVALIDDATA;
+        } else {
+            GetBitContext fh;
+            size_t i, b;
+            uint32_t val;
+
+            HEADER("Redundant Frame Header");
+
+            av_assert0(priv->frame_header_ref && priv->frame_header);
+
+            init_get_bits(&fh, priv->frame_header,
+                          priv->frame_header_size);
+            for (i = 0; i < priv->frame_header_size; i += 8) {
+                b = FFMIN(priv->frame_header_size - i, 8);
+                val = get_bits(&fh, b);
+                xf(b, frame_header_copy[i],
+                   val, val, val, 1, i / 8);
+            }
+        }
+    } else {
+        if (redundant)
+            HEADER("Redundant Frame Header (used as Frame Header)");
+        else
+            HEADER("Frame Header");
+
+#ifdef READ
+        start_pos = get_bits_count(rw);
+#else
+        start_pos = put_bits_count(rw);
+#endif
+
+        CHECK(FUNC(uncompressed_header)(ctx, rw, current));
+
+        priv->tile_num = 0;
+
+        if (current->show_existing_frame) {
+            priv->seen_frame_header = 0;
+        } else {
+            priv->seen_frame_header = 1;
+
+            av_buffer_unref(&priv->frame_header_ref);
+
+#ifdef READ
+            fh_bits  = get_bits_count(rw) - start_pos;
+            fh_start = (uint8_t*)rw->buffer + start_pos / 8;
+#else
+            // Need to flush the bitwriter so that we can copy its output,
+            // but use a copy so we don't affect the caller's structure.
+            {
+                PutBitContext tmp = *rw;
+                flush_put_bits(&tmp);
+            }
+
+            fh_bits  = put_bits_count(rw) - start_pos;
+            fh_start = rw->buf + start_pos / 8;
+#endif
+            fh_bytes = (fh_bits + 7) / 8;
+
+            priv->frame_header_size = fh_bits;
+
+            if (rw_buffer_ref) {
+                priv->frame_header_ref = av_buffer_ref(rw_buffer_ref);
+                if (!priv->frame_header_ref)
+                    return AVERROR(ENOMEM);
+                priv->frame_header = fh_start;
+            } else {
+                priv->frame_header_ref =
+                    av_buffer_alloc(fh_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
+                if (!priv->frame_header_ref)
+                    return AVERROR(ENOMEM);
+                priv->frame_header = priv->frame_header_ref->data;
+                memcpy(priv->frame_header, fh_start, fh_bytes);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(tile_group_obu)(CodedBitstreamContext *ctx, RWContext *rw,
+                                AV1RawTileGroup *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int num_tiles, tile_bits;
+    int err;
+
+    HEADER("Tile Group");
+
+    num_tiles = priv->tile_cols * priv->tile_rows;
+    if (num_tiles > 1)
+        flag(tile_start_and_end_present_flag);
+    else
+        infer(tile_start_and_end_present_flag, 0);
+
+    if (num_tiles == 1 || !current->tile_start_and_end_present_flag) {
+        infer(tg_start, 0);
+        infer(tg_end, num_tiles - 1);
+    } else {
+        tile_bits = cbs_av1_tile_log2(1, priv->tile_cols) +
+                    cbs_av1_tile_log2(1, priv->tile_rows);
+        fc(tile_bits, tg_start, priv->tile_num, num_tiles - 1);
+        fc(tile_bits, tg_end, current->tg_start, num_tiles - 1);
+    }
+
+    priv->tile_num = current->tg_end + 1;
+
+    CHECK(FUNC(byte_alignment)(ctx, rw));
+
+    // Reset header for next frame.
+    if (current->tg_end == num_tiles - 1)
+        priv->seen_frame_header = 0;
+
+    // Tile data follows.
+
+    return 0;
+}
+
+static int FUNC(frame_obu)(CodedBitstreamContext *ctx, RWContext *rw,
+                           AV1RawFrame *current,
+                           AVBufferRef *rw_buffer_ref)
+{
+    int err;
+
+    CHECK(FUNC(frame_header_obu)(ctx, rw, &current->header,
+                                 0, rw_buffer_ref));
+
+    CHECK(FUNC(byte_alignment)(ctx, rw));
+
+    CHECK(FUNC(tile_group_obu)(ctx, rw, &current->tile_group));
+
+    return 0;
+}
+
+static int FUNC(tile_list_obu)(CodedBitstreamContext *ctx, RWContext *rw,
+                               AV1RawTileList *current)
+{
+    int err;
+
+    fb(8, output_frame_width_in_tiles_minus_1);
+    fb(8, output_frame_height_in_tiles_minus_1);
+
+    fb(16, tile_count_minus_1);
+
+    // Tile data follows.
+
+    return 0;
+}
+
+static int FUNC(metadata_hdr_cll)(CodedBitstreamContext *ctx, RWContext *rw,
+                                  AV1RawMetadataHDRCLL *current)
+{
+    int err;
+
+    fb(16, max_cll);
+    fb(16, max_fall);
+
+    return 0;
+}
+
+static int FUNC(metadata_hdr_mdcv)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   AV1RawMetadataHDRMDCV *current)
+{
+    int err, i;
+
+    for (i = 0; i < 3; i++) {
+        fbs(16, primary_chromaticity_x[i], 1, i);
+        fbs(16, primary_chromaticity_y[i], 1, i);
+    }
+
+    fb(16, white_point_chromaticity_x);
+    fb(16, white_point_chromaticity_y);
+
+    fb(32, luminance_max);
+    fb(32, luminance_min);
+
+    return 0;
+}
+
+static int FUNC(scalability_structure)(CodedBitstreamContext *ctx, RWContext *rw,
+                                       AV1RawMetadataScalability *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq;
+    int err, i, j;
+
+    if (!priv->sequence_header) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "No sequence header available: "
+               "unable to parse scalability metadata.\n");
+        return AVERROR_INVALIDDATA;
+    }
+    seq = priv->sequence_header;
+
+    fb(2, spatial_layers_cnt_minus_1);
+    flag(spatial_layer_dimensions_present_flag);
+    flag(spatial_layer_description_present_flag);
+    flag(temporal_group_description_present_flag);
+    fc(3, scalability_structure_reserved_3bits, 0, 0);
+    if (current->spatial_layer_dimensions_present_flag) {
+        for (i = 0; i <= current->spatial_layers_cnt_minus_1; i++) {
+            fcs(16, spatial_layer_max_width[i],
+                0, seq->max_frame_width_minus_1 + 1, 1, i);
+            fcs(16, spatial_layer_max_height[i],
+                0, seq->max_frame_height_minus_1 + 1, 1, i);
+        }
+    }
+    if (current->spatial_layer_description_present_flag) {
+        for (i = 0; i <= current->spatial_layers_cnt_minus_1; i++)
+            fbs(8, spatial_layer_ref_id[i], 1, i);
+    }
+    if (current->temporal_group_description_present_flag) {
+        fb(8, temporal_group_size);
+        for (i = 0; i < current->temporal_group_size; i++) {
+            fbs(3, temporal_group_temporal_id[i], 1, i);
+            flags(temporal_group_temporal_switching_up_point_flag[i], 1, i);
+            flags(temporal_group_spatial_switching_up_point_flag[i], 1, i);
+            fbs(3, temporal_group_ref_cnt[i], 1, i);
+            for (j = 0; j < current->temporal_group_ref_cnt[i]; j++) {
+                fbs(8, temporal_group_ref_pic_diff[i][j], 2, i, j);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(metadata_scalability)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      AV1RawMetadataScalability *current)
+{
+    int err;
+
+    fb(8, scalability_mode_idc);
+
+    if (current->scalability_mode_idc == AV1_SCALABILITY_SS)
+        CHECK(FUNC(scalability_structure)(ctx, rw, current));
+
+    return 0;
+}
+
+static int FUNC(metadata_itut_t35)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   AV1RawMetadataITUTT35 *current)
+{
+    int err;
+    size_t i;
+
+    fb(8, itu_t_t35_country_code);
+    if (current->itu_t_t35_country_code == 0xff)
+        fb(8, itu_t_t35_country_code_extension_byte);
+
+#ifdef READ
+    // The payload runs up to the start of the trailing bits, but there might
+    // be arbitrarily many trailing zeroes so we need to read through twice.
+    current->payload_size = cbs_av1_get_payload_bytes_left(rw);
+
+    current->payload_ref = av_buffer_alloc(current->payload_size);
+    if (!current->payload_ref)
+        return AVERROR(ENOMEM);
+    current->payload = current->payload_ref->data;
+#endif
+
+    for (i = 0; i < current->payload_size; i++)
+        xf(8, itu_t_t35_payload_bytes[i], current->payload[i],
+           0x00, 0xff, 1, i);
+
+    return 0;
+}
+
+static int FUNC(metadata_timecode)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   AV1RawMetadataTimecode *current)
+{
+    int err;
+
+    fb(5, counting_type);
+    flag(full_timestamp_flag);
+    flag(discontinuity_flag);
+    flag(cnt_dropped_flag);
+    fb(9, n_frames);
+
+    if (current->full_timestamp_flag) {
+        fc(6, seconds_value, 0, 59);
+        fc(6, minutes_value, 0, 59);
+        fc(5, hours_value,   0, 23);
+    } else {
+        flag(seconds_flag);
+        if (current->seconds_flag) {
+            fc(6, seconds_value, 0, 59);
+            flag(minutes_flag);
+            if (current->minutes_flag) {
+                fc(6, minutes_value, 0, 59);
+                flag(hours_flag);
+                if (current->hours_flag)
+                    fc(5, hours_value, 0, 23);
+            }
+        }
+    }
+
+    fb(5, time_offset_length);
+    if (current->time_offset_length > 0)
+        fb(current->time_offset_length, time_offset_value);
+    else
+        infer(time_offset_length, 0);
+
+    return 0;
+}
+
+static int FUNC(metadata_obu)(CodedBitstreamContext *ctx, RWContext *rw,
+                              AV1RawMetadata *current)
+{
+    int err;
+
+    leb128(metadata_type);
+
+    switch (current->metadata_type) {
+    case AV1_METADATA_TYPE_HDR_CLL:
+        CHECK(FUNC(metadata_hdr_cll)(ctx, rw, &current->metadata.hdr_cll));
+        break;
+    case AV1_METADATA_TYPE_HDR_MDCV:
+        CHECK(FUNC(metadata_hdr_mdcv)(ctx, rw, &current->metadata.hdr_mdcv));
+        break;
+    case AV1_METADATA_TYPE_SCALABILITY:
+        CHECK(FUNC(metadata_scalability)(ctx, rw, &current->metadata.scalability));
+        break;
+    case AV1_METADATA_TYPE_ITUT_T35:
+        CHECK(FUNC(metadata_itut_t35)(ctx, rw, &current->metadata.itut_t35));
+        break;
+    case AV1_METADATA_TYPE_TIMECODE:
+        CHECK(FUNC(metadata_timecode)(ctx, rw, &current->metadata.timecode));
+        break;
+    default:
+        // Unknown metadata type.
+        return AVERROR_PATCHWELCOME;
+    }
+
+    return 0;
+}
+
+static int FUNC(padding_obu)(CodedBitstreamContext *ctx, RWContext *rw,
+                             AV1RawPadding *current)
+{
+    int i, err;
+
+    HEADER("Padding");
+
+#ifdef READ
+    // The payload runs up to the start of the trailing bits, but there might
+    // be arbitrarily many trailing zeroes so we need to read through twice.
+    current->payload_size = cbs_av1_get_payload_bytes_left(rw);
+
+    current->payload_ref = av_buffer_alloc(current->payload_size);
+    if (!current->payload_ref)
+        return AVERROR(ENOMEM);
+    current->payload = current->payload_ref->data;
+#endif
+
+    for (i = 0; i < current->payload_size; i++)
+        xf(8, obu_padding_byte[i], current->payload[i], 0x00, 0xff, 1, i);
+
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/cbs_internal.h b/media/ffvpx/libavcodec/cbs_internal.h
new file mode 100644
index 0000000000..e585c77934
--- /dev/null
+++ b/media/ffvpx/libavcodec/cbs_internal.h
@@ -0,0 +1,253 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CBS_INTERNAL_H
+#define AVCODEC_CBS_INTERNAL_H
+
+#include <stdint.h>
+
+#include "libavutil/buffer.h"
+#include "libavutil/log.h"
+
+#include "cbs.h"
+#include "codec_id.h"
+#include "get_bits.h"
+#include "put_bits.h"
+
+
+enum CBSContentType {
+    // Unit content may contain some references to other structures, but all
+    // managed via buffer reference counting.  The descriptor defines the
+    // structure offsets of every buffer reference.
+    CBS_CONTENT_TYPE_INTERNAL_REFS,
+    // Unit content is something more complex.  The descriptor defines
+    // special functions to manage the content.
+    CBS_CONTENT_TYPE_COMPLEX,
+};
+
+enum {
+      // Maximum number of unit types described by the same non-range
+      // unit type descriptor.
+      CBS_MAX_LIST_UNIT_TYPES = 3,
+      // Maximum number of reference buffer offsets in any one unit.
+      CBS_MAX_REF_OFFSETS = 2,
+      // Special value used in a unit type descriptor to indicate that it
+      // applies to a large range of types rather than a set of discrete
+      // values.
+      CBS_UNIT_TYPE_RANGE = -1,
+};
+
+typedef const struct CodedBitstreamUnitTypeDescriptor {
+    // Number of entries in the unit_types array, or the special value
+    // CBS_UNIT_TYPE_RANGE to indicate that the range fields should be
+    // used instead.
+    int nb_unit_types;
+
+    union {
+        // Array of unit types that this entry describes.
+        CodedBitstreamUnitType list[CBS_MAX_LIST_UNIT_TYPES];
+        // Start and end of unit type range, used if nb_unit_types is
+        // CBS_UNIT_TYPE_RANGE.
+        struct {
+            CodedBitstreamUnitType start;
+            CodedBitstreamUnitType end;
+        } range;
+    } unit_type;
+
+    // The type of content described.
+    enum CBSContentType content_type;
+    // The size of the structure which should be allocated to contain
+    // the decomposed content of this type of unit.
+    size_t content_size;
+
+    union {
+        // This union's state is determined by content_type:
+        // ref for CBS_CONTENT_TYPE_INTERNAL_REFS,
+        // complex for CBS_CONTENT_TYPE_COMPLEX.
+        struct {
+            // Number of entries in the ref_offsets array.
+            // May be zero, then the structure is POD-like.
+            int nb_offsets;
+            // The structure must contain two adjacent elements:
+            //   type        *field;
+            //   AVBufferRef *field_ref;
+            // where field points to something in the buffer referred to by
+            // field_ref.  This offset is then set to offsetof(struct, field).
+            size_t offsets[CBS_MAX_REF_OFFSETS];
+        } ref;
+
+        struct {
+            void (*content_free)(void *opaque, uint8_t *data);
+            int  (*content_clone)(AVBufferRef **ref, CodedBitstreamUnit *unit);
+        } complex;
+    } type;
+} CodedBitstreamUnitTypeDescriptor;
+
+typedef struct CodedBitstreamType {
+    enum AVCodecID codec_id;
+
+    // A class for the private data, used to declare private AVOptions.
+    // This field is NULL for types that do not declare any options.
+    // If this field is non-NULL, the first member of the filter private data
+    // must be a pointer to AVClass.
+    const AVClass *priv_class;
+
+    size_t priv_data_size;
+
+    // List of unit type descriptors for this codec.
+    // Terminated by a descriptor with nb_unit_types equal to zero.
+    const CodedBitstreamUnitTypeDescriptor *unit_types;
+
+    // Split frag->data into coded bitstream units, creating the
+    // frag->units array.  Fill data but not content on each unit.
+    // The header argument should be set if the fragment came from
+    // a header block, which may require different parsing for some
+    // codecs (e.g. the AVCC header in H.264).
+    int (*split_fragment)(CodedBitstreamContext *ctx,
+                          CodedBitstreamFragment *frag,
+                          int header);
+
+    // Read the unit->data bitstream and decompose it, creating
+    // unit->content.
+    int (*read_unit)(CodedBitstreamContext *ctx,
+                     CodedBitstreamUnit *unit);
+
+    // Write the data bitstream from unit->content into pbc.
+    // Return value AVERROR(ENOSPC) indicates that pbc was too small.
+    int (*write_unit)(CodedBitstreamContext *ctx,
+                      CodedBitstreamUnit *unit,
+                      PutBitContext *pbc);
+
+    // Read the data from all of frag->units and assemble it into
+    // a bitstream for the whole fragment.
+    int (*assemble_fragment)(CodedBitstreamContext *ctx,
+                             CodedBitstreamFragment *frag);
+
+    // Reset the codec internal state.
+    void (*flush)(CodedBitstreamContext *ctx);
+
+    // Free the codec internal state.
+    void (*close)(CodedBitstreamContext *ctx);
+} CodedBitstreamType;
+
+
+// Helper functions for trace output.
+
+void ff_cbs_trace_header(CodedBitstreamContext *ctx,
+                         const char *name);
+
+void ff_cbs_trace_syntax_element(CodedBitstreamContext *ctx, int position,
+                                 const char *name, const int *subscripts,
+                                 const char *bitstring, int64_t value);
+
+
+// Helper functions for read/write of common bitstream elements, including
+// generation of trace output.
+
+int ff_cbs_read_unsigned(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                         int width, const char *name,
+                         const int *subscripts, uint32_t *write_to,
+                         uint32_t range_min, uint32_t range_max);
+
+int ff_cbs_write_unsigned(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                          int width, const char *name,
+                          const int *subscripts, uint32_t value,
+                          uint32_t range_min, uint32_t range_max);
+
+int ff_cbs_read_signed(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                       int width, const char *name,
+                       const int *subscripts, int32_t *write_to,
+                       int32_t range_min, int32_t range_max);
+
+int ff_cbs_write_signed(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                        int width, const char *name,
+                        const int *subscripts, int32_t value,
+                        int32_t range_min, int32_t range_max);
+
+// The largest unsigned value representable in N bits, suitable for use as
+// range_max in the above functions.
+#define MAX_UINT_BITS(length) ((UINT64_C(1) << (length)) - 1)
+
+// The largest signed value representable in N bits, suitable for use as
+// range_max in the above functions.
+#define MAX_INT_BITS(length) ((INT64_C(1) << ((length) - 1)) - 1)
+
+// The smallest signed value representable in N bits, suitable for use as
+// range_min in the above functions.
+#define MIN_INT_BITS(length) (-(INT64_C(1) << ((length) - 1)))
+
+#define TYPE_LIST(...) { __VA_ARGS__ }
+#define CBS_UNIT_TYPE_POD(type_, structure) { \
+        .nb_unit_types  = 1, \
+        .unit_type.list = { type_ }, \
+        .content_type   = CBS_CONTENT_TYPE_INTERNAL_REFS, \
+        .content_size   = sizeof(structure), \
+        .type.ref       = { .nb_offsets = 0 }, \
+    }
+#define CBS_UNIT_RANGE_POD(range_start, range_end, structure) { \
+        .nb_unit_types         = CBS_UNIT_TYPE_RANGE, \
+        .unit_type.range.start = range_start, \
+        .unit_type.range.end   = range_end, \
+        .content_type          = CBS_CONTENT_TYPE_INTERNAL_REFS, \
+        .content_size          = sizeof(structure), \
+        .type.ref              = { .nb_offsets = 0 }, \
+    }
+
+#define CBS_UNIT_TYPES_INTERNAL_REF(types, structure, ref_field) { \
+        .nb_unit_types  = FF_ARRAY_ELEMS((CodedBitstreamUnitType[])TYPE_LIST types), \
+        .unit_type.list = TYPE_LIST types, \
+        .content_type   = CBS_CONTENT_TYPE_INTERNAL_REFS, \
+        .content_size   = sizeof(structure), \
+        .type.ref       = { .nb_offsets = 1, \
+                            .offsets    = { offsetof(structure, ref_field) } }, \
+    }
+#define CBS_UNIT_TYPE_INTERNAL_REF(type, structure, ref_field) \
+    CBS_UNIT_TYPES_INTERNAL_REF((type), structure, ref_field)
+
+#define CBS_UNIT_RANGE_INTERNAL_REF(range_start, range_end, structure, ref_field) { \
+        .nb_unit_types         = CBS_UNIT_TYPE_RANGE, \
+        .unit_type.range.start = range_start, \
+        .unit_type.range.end   = range_end, \
+        .content_type          = CBS_CONTENT_TYPE_INTERNAL_REFS, \
+        .content_size          = sizeof(structure), \
+        .type.ref = { .nb_offsets = 1, \
+                      .offsets    = { offsetof(structure, ref_field) } }, \
+    }
+
+#define CBS_UNIT_TYPES_COMPLEX(types, structure, free_func) { \
+        .nb_unit_types  = FF_ARRAY_ELEMS((CodedBitstreamUnitType[])TYPE_LIST types), \
+        .unit_type.list = TYPE_LIST types, \
+        .content_type   = CBS_CONTENT_TYPE_COMPLEX, \
+        .content_size   = sizeof(structure), \
+        .type.complex   = { .content_free = free_func }, \
+    }
+#define CBS_UNIT_TYPE_COMPLEX(type, structure, free_func) \
+    CBS_UNIT_TYPES_COMPLEX((type), structure, free_func)
+
+#define CBS_UNIT_TYPE_END_OF_LIST { .nb_unit_types = 0 }
+
+
+extern const CodedBitstreamType ff_cbs_type_av1;
+extern const CodedBitstreamType ff_cbs_type_h264;
+extern const CodedBitstreamType ff_cbs_type_h265;
+extern const CodedBitstreamType ff_cbs_type_jpeg;
+extern const CodedBitstreamType ff_cbs_type_mpeg2;
+extern const CodedBitstreamType ff_cbs_type_vp9;
+
+
+#endif /* AVCODEC_CBS_INTERNAL_H */
diff --git a/media/ffvpx/libavcodec/codec.h b/media/ffvpx/libavcodec/codec.h
new file mode 100644
index 0000000000..3b1995bcfe
--- /dev/null
+++ b/media/ffvpx/libavcodec/codec.h
@@ -0,0 +1,375 @@
+/*
+ * AVCodec public API
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CODEC_H
+#define AVCODEC_CODEC_H
+
+#include <stdint.h>
+
+#include "libavutil/avutil.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/log.h"
+#include "libavutil/pixfmt.h"
+#include "libavutil/rational.h"
+#include "libavutil/samplefmt.h"
+
+#include "libavcodec/codec_id.h"
+#include "libavcodec/version_major.h"
+
+/**
+ * @addtogroup lavc_core
+ * @{
+ */
+
+/**
+ * Decoder can use draw_horiz_band callback.
+ */
+#define AV_CODEC_CAP_DRAW_HORIZ_BAND     (1 <<  0)
+/**
+ * Codec uses get_buffer() or get_encode_buffer() for allocating buffers and
+ * supports custom allocators.
+ * If not set, it might not use get_buffer() or get_encode_buffer() at all, or
+ * use operations that assume the buffer was allocated by
+ * avcodec_default_get_buffer2 or avcodec_default_get_encode_buffer.
+ */
+#define AV_CODEC_CAP_DR1                 (1 <<  1)
+/**
+ * Encoder or decoder requires flushing with NULL input at the end in order to
+ * give the complete and correct output.
+ *
+ * NOTE: If this flag is not set, the codec is guaranteed to never be fed with
+ *       with NULL data. The user can still send NULL data to the public encode
+ *       or decode function, but libavcodec will not pass it along to the codec
+ *       unless this flag is set.
+ *
+ * Decoders:
+ * The decoder has a non-zero delay and needs to be fed with avpkt->data=NULL,
+ * avpkt->size=0 at the end to get the delayed data until the decoder no longer
+ * returns frames.
+ *
+ * Encoders:
+ * The encoder needs to be fed with NULL data at the end of encoding until the
+ * encoder no longer returns data.
+ *
+ * NOTE: For encoders implementing the AVCodec.encode2() function, setting this
+ *       flag also means that the encoder must set the pts and duration for
+ *       each output packet. If this flag is not set, the pts and duration will
+ *       be determined by libavcodec from the input frame.
+ */
+#define AV_CODEC_CAP_DELAY               (1 <<  5)
+/**
+ * Codec can be fed a final frame with a smaller size.
+ * This can be used to prevent truncation of the last audio samples.
+ */
+#define AV_CODEC_CAP_SMALL_LAST_FRAME    (1 <<  6)
+
+/**
+ * Codec can output multiple frames per AVPacket
+ * Normally demuxers return one frame at a time, demuxers which do not do
+ * are connected to a parser to split what they return into proper frames.
+ * This flag is reserved to the very rare category of codecs which have a
+ * bitstream that cannot be split into frames without timeconsuming
+ * operations like full decoding. Demuxers carrying such bitstreams thus
+ * may return multiple frames in a packet. This has many disadvantages like
+ * prohibiting stream copy in many cases thus it should only be considered
+ * as a last resort.
+ */
+#define AV_CODEC_CAP_SUBFRAMES           (1 <<  8)
+/**
+ * Codec is experimental and is thus avoided in favor of non experimental
+ * encoders
+ */
+#define AV_CODEC_CAP_EXPERIMENTAL        (1 <<  9)
+/**
+ * Codec should fill in channel configuration and samplerate instead of container
+ */
+#define AV_CODEC_CAP_CHANNEL_CONF        (1 << 10)
+/**
+ * Codec supports frame-level multithreading.
+ */
+#define AV_CODEC_CAP_FRAME_THREADS       (1 << 12)
+/**
+ * Codec supports slice-based (or partition-based) multithreading.
+ */
+#define AV_CODEC_CAP_SLICE_THREADS       (1 << 13)
+/**
+ * Codec supports changed parameters at any point.
+ */
+#define AV_CODEC_CAP_PARAM_CHANGE        (1 << 14)
+/**
+ * Codec supports multithreading through a method other than slice- or
+ * frame-level multithreading. Typically this marks wrappers around
+ * multithreading-capable external libraries.
+ */
+#define AV_CODEC_CAP_OTHER_THREADS       (1 << 15)
+/**
+ * Audio encoder supports receiving a different number of samples in each call.
+ */
+#define AV_CODEC_CAP_VARIABLE_FRAME_SIZE (1 << 16)
+/**
+ * Decoder is not a preferred choice for probing.
+ * This indicates that the decoder is not a good choice for probing.
+ * It could for example be an expensive to spin up hardware decoder,
+ * or it could simply not provide a lot of useful information about
+ * the stream.
+ * A decoder marked with this flag should only be used as last resort
+ * choice for probing.
+ */
+#define AV_CODEC_CAP_AVOID_PROBING       (1 << 17)
+
+/**
+ * Codec is backed by a hardware implementation. Typically used to
+ * identify a non-hwaccel hardware decoder. For information about hwaccels, use
+ * avcodec_get_hw_config() instead.
+ */
+#define AV_CODEC_CAP_HARDWARE            (1 << 18)
+
+/**
+ * Codec is potentially backed by a hardware implementation, but not
+ * necessarily. This is used instead of AV_CODEC_CAP_HARDWARE, if the
+ * implementation provides some sort of internal fallback.
+ */
+#define AV_CODEC_CAP_HYBRID              (1 << 19)
+
+/**
+ * This encoder can reorder user opaque values from input AVFrames and return
+ * them with corresponding output packets.
+ * @see AV_CODEC_FLAG_COPY_OPAQUE
+ */
+#define AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE (1 << 20)
+
+/**
+ * This encoder can be flushed using avcodec_flush_buffers(). If this flag is
+ * not set, the encoder must be closed and reopened to ensure that no frames
+ * remain pending.
+ */
+#define AV_CODEC_CAP_ENCODER_FLUSH   (1 << 21)
+
+/**
+ * The encoder is able to output reconstructed frame data, i.e. raw frames that
+ * would be produced by decoding the encoded bitstream.
+ *
+ * Reconstructed frame output is enabled by the AV_CODEC_FLAG_RECON_FRAME flag.
+ */
+#define AV_CODEC_CAP_ENCODER_RECON_FRAME (1 << 22)
+
+/**
+ * AVProfile.
+ */
+typedef struct AVProfile {
+    int profile;
+    const char *name; ///< short name for the profile
+} AVProfile;
+
+/**
+ * AVCodec.
+ */
+typedef struct AVCodec {
+    /**
+     * Name of the codec implementation.
+     * The name is globally unique among encoders and among decoders (but an
+     * encoder and a decoder can share the same name).
+     * This is the primary way to find a codec from the user perspective.
+     */
+    const char *name;
+    /**
+     * Descriptive name for the codec, meant to be more human readable than name.
+     * You should use the NULL_IF_CONFIG_SMALL() macro to define it.
+     */
+    const char *long_name;
+    enum AVMediaType type;
+    enum AVCodecID id;
+    /**
+     * Codec capabilities.
+     * see AV_CODEC_CAP_*
+     */
+    int capabilities;
+    uint8_t max_lowres;                     ///< maximum value for lowres supported by the decoder
+    const AVRational *supported_framerates; ///< array of supported framerates, or NULL if any, array is terminated by {0,0}
+    const enum AVPixelFormat *pix_fmts;     ///< array of supported pixel formats, or NULL if unknown, array is terminated by -1
+    const int *supported_samplerates;       ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0
+    const enum AVSampleFormat *sample_fmts; ///< array of supported sample formats, or NULL if unknown, array is terminated by -1
+#if FF_API_OLD_CHANNEL_LAYOUT
+    /**
+     * @deprecated use ch_layouts instead
+     */
+    attribute_deprecated
+    const uint64_t *channel_layouts;         ///< array of support channel layouts, or NULL if unknown. array is terminated by 0
+#endif
+    const AVClass *priv_class;              ///< AVClass for the private context
+    const AVProfile *profiles;              ///< array of recognized profiles, or NULL if unknown, array is terminated by {FF_PROFILE_UNKNOWN}
+
+    /**
+     * Group name of the codec implementation.
+     * This is a short symbolic name of the wrapper backing this codec. A
+     * wrapper uses some kind of external implementation for the codec, such
+     * as an external library, or a codec implementation provided by the OS or
+     * the hardware.
+     * If this field is NULL, this is a builtin, libavcodec native codec.
+     * If non-NULL, this will be the suffix in AVCodec.name in most cases
+     * (usually AVCodec.name will be of the form "<codec_name>_<wrapper_name>").
+     */
+    const char *wrapper_name;
+
+    /**
+     * Array of supported channel layouts, terminated with a zeroed layout.
+     */
+    const AVChannelLayout *ch_layouts;
+} AVCodec;
+
+/**
+ * Iterate over all registered codecs.
+ *
+ * @param opaque a pointer where libavcodec will store the iteration state. Must
+ *               point to NULL to start the iteration.
+ *
+ * @return the next registered codec or NULL when the iteration is
+ *         finished
+ */
+const AVCodec *av_codec_iterate(void **opaque);
+
+/**
+ * Find a registered decoder with a matching codec ID.
+ *
+ * @param id AVCodecID of the requested decoder
+ * @return A decoder if one was found, NULL otherwise.
+ */
+const AVCodec *avcodec_find_decoder(enum AVCodecID id);
+
+/**
+ * Find a registered decoder with the specified name.
+ *
+ * @param name name of the requested decoder
+ * @return A decoder if one was found, NULL otherwise.
+ */
+const AVCodec *avcodec_find_decoder_by_name(const char *name);
+
+/**
+ * Find a registered encoder with a matching codec ID.
+ *
+ * @param id AVCodecID of the requested encoder
+ * @return An encoder if one was found, NULL otherwise.
+ */
+const AVCodec *avcodec_find_encoder(enum AVCodecID id);
+
+/**
+ * Find a registered encoder with the specified name.
+ *
+ * @param name name of the requested encoder
+ * @return An encoder if one was found, NULL otherwise.
+ */
+const AVCodec *avcodec_find_encoder_by_name(const char *name);
+/**
+ * @return a non-zero number if codec is an encoder, zero otherwise
+ */
+int av_codec_is_encoder(const AVCodec *codec);
+
+/**
+ * @return a non-zero number if codec is a decoder, zero otherwise
+ */
+int av_codec_is_decoder(const AVCodec *codec);
+
+/**
+ * Return a name for the specified profile, if available.
+ *
+ * @param codec the codec that is searched for the given profile
+ * @param profile the profile value for which a name is requested
+ * @return A name for the profile if found, NULL otherwise.
+ */
+const char *av_get_profile_name(const AVCodec *codec, int profile);
+
+enum {
+    /**
+     * The codec supports this format via the hw_device_ctx interface.
+     *
+     * When selecting this format, AVCodecContext.hw_device_ctx should
+     * have been set to a device of the specified type before calling
+     * avcodec_open2().
+     */
+    AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX = 0x01,
+    /**
+     * The codec supports this format via the hw_frames_ctx interface.
+     *
+     * When selecting this format for a decoder,
+     * AVCodecContext.hw_frames_ctx should be set to a suitable frames
+     * context inside the get_format() callback.  The frames context
+     * must have been created on a device of the specified type.
+     *
+     * When selecting this format for an encoder,
+     * AVCodecContext.hw_frames_ctx should be set to the context which
+     * will be used for the input frames before calling avcodec_open2().
+     */
+    AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX = 0x02,
+    /**
+     * The codec supports this format by some internal method.
+     *
+     * This format can be selected without any additional configuration -
+     * no device or frames context is required.
+     */
+    AV_CODEC_HW_CONFIG_METHOD_INTERNAL      = 0x04,
+    /**
+     * The codec supports this format by some ad-hoc method.
+     *
+     * Additional settings and/or function calls are required.  See the
+     * codec-specific documentation for details.  (Methods requiring
+     * this sort of configuration are deprecated and others should be
+     * used in preference.)
+     */
+    AV_CODEC_HW_CONFIG_METHOD_AD_HOC        = 0x08,
+};
+
+typedef struct AVCodecHWConfig {
+    /**
+     * For decoders, a hardware pixel format which that decoder may be
+     * able to decode to if suitable hardware is available.
+     *
+     * For encoders, a pixel format which the encoder may be able to
+     * accept.  If set to AV_PIX_FMT_NONE, this applies to all pixel
+     * formats supported by the codec.
+     */
+    enum AVPixelFormat pix_fmt;
+    /**
+     * Bit set of AV_CODEC_HW_CONFIG_METHOD_* flags, describing the possible
+     * setup methods which can be used with this configuration.
+     */
+    int methods;
+    /**
+     * The device type associated with the configuration.
+     *
+     * Must be set for AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX and
+     * AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX, otherwise unused.
+     */
+    enum AVHWDeviceType device_type;
+} AVCodecHWConfig;
+
+/**
+ * Retrieve supported hardware configurations for a codec.
+ *
+ * Values of index from zero to some maximum return the indexed configuration
+ * descriptor; all other values return NULL.  If the codec does not support
+ * any hardware configurations then it will always return NULL.
+ */
+const AVCodecHWConfig *avcodec_get_hw_config(const AVCodec *codec, int index);
+
+/**
+ * @}
+ */
+
+#endif /* AVCODEC_CODEC_H */
diff --git a/media/ffvpx/libavcodec/codec_desc.c b/media/ffvpx/libavcodec/codec_desc.c
new file mode 100644
index 0000000000..199f62df15
--- /dev/null
+++ b/media/ffvpx/libavcodec/codec_desc.c
@@ -0,0 +1,3693 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * This table was generated from the long and short names of AVCodecs
+ * please see the respective codec sources for authorship
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "libavutil/internal.h"
+#include "libavutil/macros.h"
+
+#include "codec_id.h"
+#include "codec_desc.h"
+#include "profiles.h"
+
+#define MT(...) (const char *const[]){ __VA_ARGS__, NULL }
+
+static const AVCodecDescriptor codec_descriptors[] = {
+    /* video codecs */
+    {
+        .id        = AV_CODEC_ID_MPEG1VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mpeg1video",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-1 video"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_MPEG2VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mpeg2video",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-2 video"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_mpeg2_video_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_H261,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "h261",
+        .long_name = NULL_IF_CONFIG_SMALL("H.261"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_H263,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "h263",
+        .long_name = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996, H.263+ / H.263-1998 / H.263 version 2"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_RV10,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rv10",
+        .long_name = NULL_IF_CONFIG_SMALL("RealVideo 1.0"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_RV20,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rv20",
+        .long_name = NULL_IF_CONFIG_SMALL("RealVideo 2.0"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_MJPEG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mjpeg",
+        .long_name = NULL_IF_CONFIG_SMALL("Motion JPEG"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+        .mime_types= MT("image/jpeg"),
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_mjpeg_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_MJPEGB,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mjpegb",
+        .long_name = NULL_IF_CONFIG_SMALL("Apple MJPEG-B"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_LJPEG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ljpeg",
+        .long_name = NULL_IF_CONFIG_SMALL("Lossless JPEG"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SP5X,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sp5x",
+        .long_name = NULL_IF_CONFIG_SMALL("Sunplus JPEG (SP5X)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_JPEGLS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "jpegls",
+        .long_name = NULL_IF_CONFIG_SMALL("JPEG-LS"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MPEG4,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mpeg4",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 part 2"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_mpeg4_video_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_RAWVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rawvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("raw video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MSMPEG4V1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "msmpeg4v1",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MSMPEG4V2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "msmpeg4v2",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MSMPEG4V3,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "msmpeg4v3",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMV1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wmv1",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMV2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wmv2",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 8"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_H263P,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "h263p",
+        .long_name = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_H263I,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "h263i",
+        .long_name = NULL_IF_CONFIG_SMALL("Intel H.263"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_FLV1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "flv1",
+        .long_name = NULL_IF_CONFIG_SMALL("FLV / Sorenson Spark / Sorenson H.263 (Flash Video)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SVQ1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "svq1",
+        .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1 / Sorenson Video 1 / SVQ1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SVQ3,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "svq3",
+        .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_DVVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dvvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("DV (Digital Video)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HUFFYUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "huffyuv",
+        .long_name = NULL_IF_CONFIG_SMALL("HuffYUV"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_CYUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cyuv",
+        .long_name = NULL_IF_CONFIG_SMALL("Creative YUV (CYUV)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_H264,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "h264",
+        .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_h264_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_INDEO3,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "indeo3",
+        .long_name = NULL_IF_CONFIG_SMALL("Intel Indeo 3"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP3,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp3",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP3"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_THEORA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "theora",
+        .long_name = NULL_IF_CONFIG_SMALL("Theora"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ASV1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "asv1",
+        .long_name = NULL_IF_CONFIG_SMALL("ASUS V1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ASV2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "asv2",
+        .long_name = NULL_IF_CONFIG_SMALL("ASUS V2"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FFV1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ffv1",
+        .long_name = NULL_IF_CONFIG_SMALL("FFmpeg video codec #1"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_4XM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "4xm",
+        .long_name = NULL_IF_CONFIG_SMALL("4X Movie"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VCR1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vcr1",
+        .long_name = NULL_IF_CONFIG_SMALL("ATI VCR1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CLJR,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cljr",
+        .long_name = NULL_IF_CONFIG_SMALL("Cirrus Logic AccuPak"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MDEC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mdec",
+        .long_name = NULL_IF_CONFIG_SMALL("Sony PlayStation MDEC (Motion DECoder)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ROQ,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "roq",
+        .long_name = NULL_IF_CONFIG_SMALL("id RoQ video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_INTERPLAY_VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "interplayvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Interplay MVE video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XAN_WC3,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xan_wc3",
+        .long_name = NULL_IF_CONFIG_SMALL("Wing Commander III / Xan"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XAN_WC4,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xan_wc4",
+        .long_name = NULL_IF_CONFIG_SMALL("Wing Commander IV / Xxan"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_RPZA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rpza",
+        .long_name = NULL_IF_CONFIG_SMALL("QuickTime video (RPZA)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CINEPAK,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cinepak",
+        .long_name = NULL_IF_CONFIG_SMALL("Cinepak"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WS_VQA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ws_vqa",
+        .long_name = NULL_IF_CONFIG_SMALL("Westwood Studios VQA (Vector Quantized Animation) video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MSRLE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "msrle",
+        .long_name = NULL_IF_CONFIG_SMALL("Microsoft RLE"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MSVIDEO1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "msvideo1",
+        .long_name = NULL_IF_CONFIG_SMALL("Microsoft Video 1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_IDCIN,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "idcin",
+        .long_name = NULL_IF_CONFIG_SMALL("id Quake II CIN video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_8BPS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "8bps",
+        .long_name = NULL_IF_CONFIG_SMALL("QuickTime 8BPS video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SMC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "smc",
+        .long_name = NULL_IF_CONFIG_SMALL("QuickTime Graphics (SMC)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FLIC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "flic",
+        .long_name = NULL_IF_CONFIG_SMALL("Autodesk Animator Flic video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_TRUEMOTION1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "truemotion1",
+        .long_name = NULL_IF_CONFIG_SMALL("Duck TrueMotion 1.0"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VMDVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vmdvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Sierra VMD video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MSZH,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mszh",
+        .long_name = NULL_IF_CONFIG_SMALL("LCL (LossLess Codec Library) MSZH"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ZLIB,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "zlib",
+        .long_name = NULL_IF_CONFIG_SMALL("LCL (LossLess Codec Library) ZLIB"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_QTRLE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "qtrle",
+        .long_name = NULL_IF_CONFIG_SMALL("QuickTime Animation (RLE) video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_TSCC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tscc",
+        .long_name = NULL_IF_CONFIG_SMALL("TechSmith Screen Capture Codec"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ULTI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ulti",
+        .long_name = NULL_IF_CONFIG_SMALL("IBM UltiMotion"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_QDRAW,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "qdraw",
+        .long_name = NULL_IF_CONFIG_SMALL("Apple QuickDraw"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_VIXL,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vixl",
+        .long_name = NULL_IF_CONFIG_SMALL("Miro VideoXL"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_QPEG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "qpeg",
+        .long_name = NULL_IF_CONFIG_SMALL("Q-team QPEG"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PNG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "png",
+        .long_name = NULL_IF_CONFIG_SMALL("PNG (Portable Network Graphics) image"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/png"),
+    },
+    {
+        .id        = AV_CODEC_ID_PPM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ppm",
+        .long_name = NULL_IF_CONFIG_SMALL("PPM (Portable PixelMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PBM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pbm",
+        .long_name = NULL_IF_CONFIG_SMALL("PBM (Portable BitMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PGM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pgm",
+        .long_name = NULL_IF_CONFIG_SMALL("PGM (Portable GrayMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PGMYUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pgmyuv",
+        .long_name = NULL_IF_CONFIG_SMALL("PGMYUV (Portable GrayMap YUV) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PAM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pam",
+        .long_name = NULL_IF_CONFIG_SMALL("PAM (Portable AnyMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-portable-pixmap"),
+    },
+    {
+        .id        = AV_CODEC_ID_FFVHUFF,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ffvhuff",
+        .long_name = NULL_IF_CONFIG_SMALL("Huffyuv FFmpeg variant"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_RV30,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rv30",
+        .long_name = NULL_IF_CONFIG_SMALL("RealVideo 3.0"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_RV40,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rv40",
+        .long_name = NULL_IF_CONFIG_SMALL("RealVideo 4.0"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_VC1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vc1",
+        .long_name = NULL_IF_CONFIG_SMALL("SMPTE VC-1"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_vc1_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_WMV3,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wmv3",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_vc1_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_LOCO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "loco",
+        .long_name = NULL_IF_CONFIG_SMALL("LOCO"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_WNV1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wnv1",
+        .long_name = NULL_IF_CONFIG_SMALL("Winnov WNV1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AASC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "aasc",
+        .long_name = NULL_IF_CONFIG_SMALL("Autodesk RLE"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_INDEO2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "indeo2",
+        .long_name = NULL_IF_CONFIG_SMALL("Intel Indeo 2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FRAPS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "fraps",
+        .long_name = NULL_IF_CONFIG_SMALL("Fraps"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_TRUEMOTION2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "truemotion2",
+        .long_name = NULL_IF_CONFIG_SMALL("Duck TrueMotion 2.0"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BMP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "bmp",
+        .long_name = NULL_IF_CONFIG_SMALL("BMP (Windows and OS/2 bitmap)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-ms-bmp"),
+    },
+    {
+        .id        = AV_CODEC_ID_CSCD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cscd",
+        .long_name = NULL_IF_CONFIG_SMALL("CamStudio"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MMVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mmvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("American Laser Games MM Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ZMBV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "zmbv",
+        .long_name = NULL_IF_CONFIG_SMALL("Zip Motion Blocks Video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_AVS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avs",
+        .long_name = NULL_IF_CONFIG_SMALL("AVS (Audio Video Standard) video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SMACKVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "smackvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Smacker video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_NUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "nuv",
+        .long_name = NULL_IF_CONFIG_SMALL("NuppelVideo/RTJPEG"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_KMVC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "kmvc",
+        .long_name = NULL_IF_CONFIG_SMALL("Karl Morton's video codec"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FLASHSV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "flashsv",
+        .long_name = NULL_IF_CONFIG_SMALL("Flash Screen Video v1"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_CAVS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cavs",
+        .long_name = NULL_IF_CONFIG_SMALL("Chinese AVS (Audio Video Standard) (AVS1-P2, JiZhun profile)"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_JPEG2000,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "jpeg2000",
+        .long_name = NULL_IF_CONFIG_SMALL("JPEG 2000"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/jp2"),
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_jpeg2000_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_VMNC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vmnc",
+        .long_name = NULL_IF_CONFIG_SMALL("VMware Screen Codec / VMware Video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_VP5,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp5",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP5"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP6,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp6",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP6"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP6F,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp6f",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP6 (Flash version)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TARGA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "targa",
+        .long_name = NULL_IF_CONFIG_SMALL("Truevision Targa image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-targa", "image/x-tga"),
+    },
+    {
+        .id        = AV_CODEC_ID_DSICINVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dsicinvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Delphine Software International CIN video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TIERTEXSEQVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tiertexseqvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Tiertex Limited SEQ video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TIFF,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tiff",
+        .long_name = NULL_IF_CONFIG_SMALL("TIFF image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/tiff"),
+    },
+    {
+        .id        = AV_CODEC_ID_GIF,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "gif",
+        .long_name = NULL_IF_CONFIG_SMALL("CompuServe GIF (Graphics Interchange Format)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/gif"),
+    },
+    {
+        .id        = AV_CODEC_ID_DXA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dxa",
+        .long_name = NULL_IF_CONFIG_SMALL("Feeble Files/ScummVM DXA"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DNXHD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dnxhd",
+        .long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_dnxhd_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_THP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "thp",
+        .long_name = NULL_IF_CONFIG_SMALL("Nintendo Gamecube THP video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SGI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sgi",
+        .long_name = NULL_IF_CONFIG_SMALL("SGI image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_C93,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "c93",
+        .long_name = NULL_IF_CONFIG_SMALL("Interplay C93"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BETHSOFTVID,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "bethsoftvid",
+        .long_name = NULL_IF_CONFIG_SMALL("Bethesda VID video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PTX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ptx",
+        .long_name = NULL_IF_CONFIG_SMALL("V.Flash PTX image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TXD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "txd",
+        .long_name = NULL_IF_CONFIG_SMALL("Renderware TXD (TeXture Dictionary) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP6A,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp6a",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP6 (Flash version, with alpha channel)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AMV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "amv",
+        .long_name = NULL_IF_CONFIG_SMALL("AMV Video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VB,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vb",
+        .long_name = NULL_IF_CONFIG_SMALL("Beam Software VB"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PCX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pcx",
+        .long_name = NULL_IF_CONFIG_SMALL("PC Paintbrush PCX image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-pcx"),
+    },
+    {
+        .id        = AV_CODEC_ID_SUNRAST,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sunrast",
+        .long_name = NULL_IF_CONFIG_SMALL("Sun Rasterfile image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_INDEO4,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "indeo4",
+        .long_name = NULL_IF_CONFIG_SMALL("Intel Indeo Video Interactive 4"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_INDEO5,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "indeo5",
+        .long_name = NULL_IF_CONFIG_SMALL("Intel Indeo Video Interactive 5"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MIMIC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mimic",
+        .long_name = NULL_IF_CONFIG_SMALL("Mimic"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_RL2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rl2",
+        .long_name = NULL_IF_CONFIG_SMALL("RL2 video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ESCAPE124,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "escape124",
+        .long_name = NULL_IF_CONFIG_SMALL("Escape 124"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DIRAC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dirac",
+        .long_name = NULL_IF_CONFIG_SMALL("Dirac"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_BFI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "bfi",
+        .long_name = NULL_IF_CONFIG_SMALL("Brute Force & Ignorance"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CMV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cmv",
+        .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts CMV video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MOTIONPIXELS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "motionpixels",
+        .long_name = NULL_IF_CONFIG_SMALL("Motion Pixels video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TGV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tgv",
+        .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts TGV video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TGQ,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tgq",
+        .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts TGQ video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TQI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tqi",
+        .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts TQI video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AURA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "aura",
+        .long_name = NULL_IF_CONFIG_SMALL("Auravision AURA"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AURA2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "aura2",
+        .long_name = NULL_IF_CONFIG_SMALL("Auravision Aura 2"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_V210X,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "v210x",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_TMV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tmv",
+        .long_name = NULL_IF_CONFIG_SMALL("8088flex TMV"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_V210,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "v210",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DPX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dpx",
+        .long_name = NULL_IF_CONFIG_SMALL("DPX (Digital Picture Exchange) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MAD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mad",
+        .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts Madcow Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FRWU,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "frwu",
+        .long_name = NULL_IF_CONFIG_SMALL("Forward Uncompressed"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_FLASHSV2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "flashsv2",
+        .long_name = NULL_IF_CONFIG_SMALL("Flash Screen Video v2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CDGRAPHICS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cdgraphics",
+        .long_name = NULL_IF_CONFIG_SMALL("CD Graphics video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_R210,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "r210",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed RGB 10-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ANM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "anm",
+        .long_name = NULL_IF_CONFIG_SMALL("Deluxe Paint Animation"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BINKVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "binkvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Bink video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_IFF_ILBM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "iff_ilbm",
+        .long_name = NULL_IF_CONFIG_SMALL("IFF ACBM/ANIM/DEEP/ILBM/PBM/RGB8/RGBN"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_KGV1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "kgv1",
+        .long_name = NULL_IF_CONFIG_SMALL("Kega Game Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_YOP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "yop",
+        .long_name = NULL_IF_CONFIG_SMALL("Psygnosis YOP Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP8,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp8",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PICTOR,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pictor",
+        .long_name = NULL_IF_CONFIG_SMALL("Pictor/PC Paint"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ANSI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ansi",
+        .long_name = NULL_IF_CONFIG_SMALL("ASCII/ANSI art"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_A64_MULTI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "a64_multi",
+        .long_name = NULL_IF_CONFIG_SMALL("Multicolor charset for Commodore 64"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_A64_MULTI5,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "a64_multi5",
+        .long_name = NULL_IF_CONFIG_SMALL("Multicolor charset for Commodore 64, extended with 5th color (colram)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_R10K,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "r10k",
+        .long_name = NULL_IF_CONFIG_SMALL("AJA Kona 10-bit RGB Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MXPEG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mxpeg",
+        .long_name = NULL_IF_CONFIG_SMALL("Mobotix MxPEG video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_LAGARITH,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "lagarith",
+        .long_name = NULL_IF_CONFIG_SMALL("Lagarith lossless"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PRORES,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "prores",
+        .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_JV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "jv",
+        .long_name = NULL_IF_CONFIG_SMALL("Bitmap Brothers JV video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DFA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dfa",
+        .long_name = NULL_IF_CONFIG_SMALL("Chronomaster DFA"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMV3IMAGE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wmv3image",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9 Image"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VC1IMAGE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vc1image",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9 Image v2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_UTVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "utvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Ut Video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_BMV_VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "bmv_video",
+        .long_name = NULL_IF_CONFIG_SMALL("Discworld II BMV video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_VBLE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vble",
+        .long_name = NULL_IF_CONFIG_SMALL("VBLE Lossless Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DXTORY,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dxtory",
+        .long_name = NULL_IF_CONFIG_SMALL("Dxtory"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_V410,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "v410",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:4:4 10-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_XWD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xwd",
+        .long_name = NULL_IF_CONFIG_SMALL("XWD (X Window Dump) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-xwindowdump"),
+    },
+    {
+        .id        = AV_CODEC_ID_CDXL,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cdxl",
+        .long_name = NULL_IF_CONFIG_SMALL("Commodore CDXL video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XBM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xbm",
+        .long_name = NULL_IF_CONFIG_SMALL("XBM (X BitMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-xbitmap"),
+    },
+    {
+        .id        = AV_CODEC_ID_ZEROCODEC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "zerocodec",
+        .long_name = NULL_IF_CONFIG_SMALL("ZeroCodec Lossless Video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MSS1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mss1",
+        .long_name = NULL_IF_CONFIG_SMALL("MS Screen 1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MSA1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "msa1",
+        .long_name = NULL_IF_CONFIG_SMALL("MS ATC Screen"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TSCC2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tscc2",
+        .long_name = NULL_IF_CONFIG_SMALL("TechSmith Screen Codec 2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MTS2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mts2",
+        .long_name = NULL_IF_CONFIG_SMALL("MS Expression Encoder Screen"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CLLC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cllc",
+        .long_name = NULL_IF_CONFIG_SMALL("Canopus Lossless Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MSS2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mss2",
+        .long_name = NULL_IF_CONFIG_SMALL("MS Windows Media Video V9 Screen"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP9,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp9",
+        .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
+        .props     = AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_AIC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "aic",
+        .long_name = NULL_IF_CONFIG_SMALL("Apple Intermediate Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ESCAPE130,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "escape130",
+        .long_name = NULL_IF_CONFIG_SMALL("Escape 130"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_G2M,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "g2m",
+        .long_name = NULL_IF_CONFIG_SMALL("Go2Meeting"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WEBP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "webp",
+        .long_name = NULL_IF_CONFIG_SMALL("WebP"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/webp"),
+    },
+    {
+        .id        = AV_CODEC_ID_HNM4_VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hnm4video",
+        .long_name = NULL_IF_CONFIG_SMALL("HNM 4 video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HEVC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hevc",
+        .long_name = NULL_IF_CONFIG_SMALL("H.265 / HEVC (High Efficiency Video Coding)"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_FIC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "fic",
+        .long_name = NULL_IF_CONFIG_SMALL("Mirillis FIC"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ALIAS_PIX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "alias_pix",
+        .long_name = NULL_IF_CONFIG_SMALL("Alias/Wavefront PIX image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_BRENDER_PIX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "brender_pix",
+        .long_name = NULL_IF_CONFIG_SMALL("BRender PIX image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PAF_VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "paf_video",
+        .long_name = NULL_IF_CONFIG_SMALL("Amazing Studio Packed Animation File Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_EXR,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "exr",
+        .long_name = NULL_IF_CONFIG_SMALL("OpenEXR image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_VP7,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp7",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SANM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sanm",
+        .long_name = NULL_IF_CONFIG_SMALL("LucasArts SANM/SMUSH video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SGIRLE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sgirle",
+        .long_name = NULL_IF_CONFIG_SMALL("SGI RLE 8-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MVC1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mvc1",
+        .long_name = NULL_IF_CONFIG_SMALL("Silicon Graphics Motion Video Compressor 1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MVC2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mvc2",
+        .long_name = NULL_IF_CONFIG_SMALL("Silicon Graphics Motion Video Compressor 2"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HQX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hqx",
+        .long_name = NULL_IF_CONFIG_SMALL("Canopus HQX"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TDSC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tdsc",
+        .long_name = NULL_IF_CONFIG_SMALL("TDSC"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HQ_HQA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hq_hqa",
+        .long_name = NULL_IF_CONFIG_SMALL("Canopus HQ/HQA"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HAP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hap",
+        .long_name = NULL_IF_CONFIG_SMALL("Vidvox Hap"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DDS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dds",
+        .long_name = NULL_IF_CONFIG_SMALL("DirectDraw Surface image decoder"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DXV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dxv",
+        .long_name = NULL_IF_CONFIG_SMALL("Resolume DXV"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SCREENPRESSO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "screenpresso",
+        .long_name = NULL_IF_CONFIG_SMALL("Screenpresso"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_RSCC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rscc",
+        .long_name = NULL_IF_CONFIG_SMALL("innoHeim/Rsupport Screen Capture Codec"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_AVS2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avs2",
+        .long_name = NULL_IF_CONFIG_SMALL("AVS2-P2/IEEE1857.4"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PGX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pgx",
+        .long_name = NULL_IF_CONFIG_SMALL("PGX (JPEG2000 Test Format)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_AVS3,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avs3",
+        .long_name = NULL_IF_CONFIG_SMALL("AVS3-P2/IEEE1857.10"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MSP2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "msp2",
+        .long_name = NULL_IF_CONFIG_SMALL("Microsoft Paint (MSP) version 2"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_VVC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vvc",
+        .long_name = NULL_IF_CONFIG_SMALL("H.266 / VVC (Versatile Video Coding)"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_vvc_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_Y41P,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "y41p",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed YUV 4:1:1 12-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_AVRP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avrp",
+        .long_name = NULL_IF_CONFIG_SMALL("Avid 1:1 10-bit RGB Packer"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_012V,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "012v",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_AVUI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avui",
+        .long_name = NULL_IF_CONFIG_SMALL("Avid Meridien Uncompressed"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+#if FF_API_AYUV_CODECID
+    {
+        .id        = AV_CODEC_ID_AYUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ayuv",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed MS 4:4:4:4"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+#endif
+    {
+        .id        = AV_CODEC_ID_TARGA_Y216,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "targa_y216",
+        .long_name = NULL_IF_CONFIG_SMALL("Pinnacle TARGA CineWave YUV16"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_V308,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "v308",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed 4:4:4"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_V408,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "v408",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed QT 4:4:4:4"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_YUV4,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "yuv4",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed 4:2:0"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_AVRN,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avrn",
+        .long_name = NULL_IF_CONFIG_SMALL("Avid AVI Codec"),
+    },
+    {
+        .id        = AV_CODEC_ID_CPIA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cpia",
+        .long_name = NULL_IF_CONFIG_SMALL("CPiA video format"),
+    },
+    {
+        .id        = AV_CODEC_ID_XFACE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xface",
+        .long_name = NULL_IF_CONFIG_SMALL("X-face image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SNOW,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "snow",
+        .long_name = NULL_IF_CONFIG_SMALL("Snow"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SMVJPEG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "smvjpeg",
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+        .long_name = NULL_IF_CONFIG_SMALL("Sigmatel Motion Video"),
+    },
+    {
+        .id        = AV_CODEC_ID_APNG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "apng",
+        .long_name = NULL_IF_CONFIG_SMALL("APNG (Animated Portable Network Graphics) image"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/png"),
+    },
+    {
+        .id        = AV_CODEC_ID_DAALA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "daala",
+        .long_name = NULL_IF_CONFIG_SMALL("Daala"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_CFHD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cfhd",
+        .long_name = NULL_IF_CONFIG_SMALL("GoPro CineForm HD"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TRUEMOTION2RT,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "truemotion2rt",
+        .long_name = NULL_IF_CONFIG_SMALL("Duck TrueMotion 2.0 Real Time"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_M101,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "m101",
+        .long_name = NULL_IF_CONFIG_SMALL("Matrox Uncompressed SD"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MAGICYUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "magicyuv",
+        .long_name = NULL_IF_CONFIG_SMALL("MagicYUV video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SHEERVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sheervideo",
+        .long_name = NULL_IF_CONFIG_SMALL("BitJazz SheerVideo"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_YLC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ylc",
+        .long_name = NULL_IF_CONFIG_SMALL("YUY2 Lossless Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PSD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "psd",
+        .long_name = NULL_IF_CONFIG_SMALL("Photoshop PSD file"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PIXLET,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pixlet",
+        .long_name = NULL_IF_CONFIG_SMALL("Apple Pixlet"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SPEEDHQ,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "speedhq",
+        .long_name = NULL_IF_CONFIG_SMALL("NewTek SpeedHQ"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FMVC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "fmvc",
+        .long_name = NULL_IF_CONFIG_SMALL("FM Screen Capture Codec"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SCPR,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "scpr",
+        .long_name = NULL_IF_CONFIG_SMALL("ScreenPressor"),
+        .props     = AV_CODEC_PROP_LOSSLESS | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CLEARVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "clearvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Iterated Systems ClearVideo"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XPM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xpm",
+        .long_name = NULL_IF_CONFIG_SMALL("XPM (X PixMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-xpixmap"),
+    },
+    {
+        .id        = AV_CODEC_ID_AV1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "av1",
+        .long_name = NULL_IF_CONFIG_SMALL("Alliance for Open Media AV1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_av1_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_BITPACKED,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "bitpacked",
+        .long_name = NULL_IF_CONFIG_SMALL("Bitpacked"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MSCC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mscc",
+        .long_name = NULL_IF_CONFIG_SMALL("Mandsoft Screen Capture Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SRGC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "srgc",
+        .long_name = NULL_IF_CONFIG_SMALL("Screen Recorder Gold Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SVG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "svg",
+        .long_name = NULL_IF_CONFIG_SMALL("Scalable Vector Graphics"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/svg+xml"),
+    },
+    {
+        .id        = AV_CODEC_ID_GDV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "gdv",
+        .long_name = NULL_IF_CONFIG_SMALL("Gremlin Digital Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FITS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "fits",
+        .long_name = NULL_IF_CONFIG_SMALL("FITS (Flexible Image Transport System)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_IMM4,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "imm4",
+        .long_name = NULL_IF_CONFIG_SMALL("Infinity IMM4"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PROSUMER,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "prosumer",
+        .long_name = NULL_IF_CONFIG_SMALL("Brooktree ProSumer Video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MWSC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mwsc",
+        .long_name = NULL_IF_CONFIG_SMALL("MatchWare Screen Capture Codec"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_WCMV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wcmv",
+        .long_name = NULL_IF_CONFIG_SMALL("WinCAM Motion Video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_RASC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rasc",
+        .long_name = NULL_IF_CONFIG_SMALL("RemotelyAnywhere Screen Capture"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HYMT,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hymt",
+        .long_name = NULL_IF_CONFIG_SMALL("HuffYUV MT"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ARBC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "arbc",
+        .long_name = NULL_IF_CONFIG_SMALL("Gryphon's Anim Compressor"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AGM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "agm",
+        .long_name = NULL_IF_CONFIG_SMALL("Amuse Graphics Movie"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_LSCR,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "lscr",
+        .long_name = NULL_IF_CONFIG_SMALL("LEAD Screen Capture"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP4,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp4",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP4"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_IMM5,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "imm5",
+        .long_name = NULL_IF_CONFIG_SMALL("Infinity IMM5"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MVDV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mvdv",
+        .long_name = NULL_IF_CONFIG_SMALL("MidiVid VQ"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MVHA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mvha",
+        .long_name = NULL_IF_CONFIG_SMALL("MidiVid Archive Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CDTOONS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cdtoons",
+        .long_name = NULL_IF_CONFIG_SMALL("CDToons video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MV30,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mv30",
+        .long_name = NULL_IF_CONFIG_SMALL("MidiVid 3.0"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_NOTCHLC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "notchlc",
+        .long_name = NULL_IF_CONFIG_SMALL("NotchLC"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PFM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pfm",
+        .long_name = NULL_IF_CONFIG_SMALL("PFM (Portable FloatMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MOBICLIP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mobiclip",
+        .long_name = NULL_IF_CONFIG_SMALL("MobiClip Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PHOTOCD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "photocd",
+        .long_name = NULL_IF_CONFIG_SMALL("Kodak Photo CD"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_IPU,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ipu",
+        .long_name = NULL_IF_CONFIG_SMALL("IPU Video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ARGO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "argo",
+        .long_name = NULL_IF_CONFIG_SMALL("Argonaut Games Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CRI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cri",
+        .long_name = NULL_IF_CONFIG_SMALL("Cintel RAW"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SIMBIOSIS_IMX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "simbiosis_imx",
+        .long_name = NULL_IF_CONFIG_SMALL("Simbiosis Interactive IMX Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SGA_VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sga",
+        .long_name = NULL_IF_CONFIG_SMALL("Digital Pictures SGA Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_GEM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "gem",
+        .long_name = NULL_IF_CONFIG_SMALL("GEM Raster image"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VBN,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vbn",
+        .long_name = NULL_IF_CONFIG_SMALL("Vizrt Binary Image"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_JPEGXL,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "jpegxl",
+        .long_name = NULL_IF_CONFIG_SMALL("JPEG XL"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/jxl"),
+    },
+    {
+        .id        = AV_CODEC_ID_QOI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "qoi",
+        .long_name = NULL_IF_CONFIG_SMALL("QOI (Quite OK Image)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PHM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "phm",
+        .long_name = NULL_IF_CONFIG_SMALL("PHM (Portable HalfFloatMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_RADIANCE_HDR,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hdr",
+        .long_name = NULL_IF_CONFIG_SMALL("HDR (Radiance RGBE format) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WBMP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wbmp",
+        .long_name = NULL_IF_CONFIG_SMALL("WBMP (Wireless Application Protocol Bitmap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MEDIA100,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "media100",
+        .long_name = NULL_IF_CONFIG_SMALL("Media 100i"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VQC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vqc",
+        .long_name = NULL_IF_CONFIG_SMALL("ViewQuest VQC"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+
+    /* various PCM "codecs" */
+    {
+        .id        = AV_CODEC_ID_PCM_S16LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s16le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit little-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S16BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s16be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit big-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U16LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u16le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 16-bit little-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U16BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u16be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 16-bit big-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S8,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s8",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 8-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U8,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u8",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 8-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_MULAW,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_mulaw",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM mu-law / G.711 mu-law"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_ALAW,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_alaw",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM A-law / G.711 A-law"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S32LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s32le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 32-bit little-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S32BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s32be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 32-bit big-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U32LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u32le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 32-bit little-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U32BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u32be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 32-bit big-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S24LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s24le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 24-bit little-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S24BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s24be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 24-bit big-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U24LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u24le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 24-bit little-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U24BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u24be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 24-bit big-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S24DAUD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s24daud",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM D-Cinema audio signed 24-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S16LE_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s16le_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit little-endian planar"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_DVD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_dvd",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 20|24-bit big-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_F32BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_f32be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM 32-bit floating point big-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_F32LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_f32le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM 32-bit floating point little-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_F64BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_f64be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM 64-bit floating point big-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_F64LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_f64le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM 64-bit floating point little-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_BLURAY,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_bluray",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16|20|24-bit big-endian for Blu-ray media"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_LXF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_lxf",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 20-bit little-endian planar"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_S302M,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "s302m",
+        .long_name = NULL_IF_CONFIG_SMALL("SMPTE 302M"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S8_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s8_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 8-bit planar"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S24LE_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s24le_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 24-bit little-endian planar"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S32LE_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s32le_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 32-bit little-endian planar"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S16BE_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s16be_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit big-endian planar"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S64LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s64le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 64-bit little-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S64BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s64be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 64-bit big-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_F16LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_f16le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM 16.8 floating point little-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_F24LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_f24le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM 24.0 floating point little-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_VIDC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_vidc",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM Archimedes VIDC"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_SGA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_sga",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM SGA"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+
+    /* various ADPCM codecs */
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_QT,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_qt",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA QuickTime"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_WAV,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_wav",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA WAV"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_DK3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_dk3",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Duck DK3"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_DK4,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_dk4",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Duck DK4"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_WS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_ws",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Westwood"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_SMJPEG,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_smjpeg",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Loki SDL MJPEG"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_MS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ms",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Microsoft"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_4XM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_4xm",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM 4X Movie"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_XA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_xa",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM CDROM XA"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_ADX,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_adx",
+        .long_name = NULL_IF_CONFIG_SMALL("SEGA CRI ADX ADPCM"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_EA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ea",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_G726,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_g726",
+        .long_name = NULL_IF_CONFIG_SMALL("G.726 ADPCM"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_CT,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ct",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Creative Technology"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_SWF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_swf",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Shockwave Flash"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_YAMAHA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_yamaha",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Yamaha"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_SBPRO_4,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_sbpro_4",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Sound Blaster Pro 4-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_SBPRO_3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_sbpro_3",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Sound Blaster Pro 2.6-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_SBPRO_2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_sbpro_2",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Sound Blaster Pro 2-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_THP,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_thp",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo THP"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_AMV,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_amv",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA AMV"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_EA_R1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ea_r1",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts R1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_EA_R3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ea_r3",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts R3"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_EA_R2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ea_r2",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts R2"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_EA_SEAD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_ea_sead",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Electronic Arts SEAD"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_EA_EACS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_ea_eacs",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Electronic Arts EACS"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_EA_XAS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ea_xas",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts XAS"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_EA_MAXIS_XA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ea_maxis_xa",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts Maxis CDROM XA"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_ISS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_iss",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Funcom ISS"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_G722,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_g722",
+        .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_APC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_apc",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA CRYO APC"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_VIMA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_vima",
+        .long_name = NULL_IF_CONFIG_SMALL("LucasArts VIMA audio"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_AFC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_afc",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo Gamecube AFC"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_OKI,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_oki",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Dialogic OKI"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_DTK,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_dtk",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo Gamecube DTK"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_RAD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_rad",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Radical"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_G726LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_g726le",
+        .long_name = NULL_IF_CONFIG_SMALL("G.726 ADPCM little-endian"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_THP_LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_thp_le",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo THP (Little-Endian)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_PSX,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_psx",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Playstation"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_AICA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_aica",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Yamaha AICA"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_DAT4,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_dat4",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Eurocom DAT4"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_MTAF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_mtaf",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM MTAF"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_AGM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_agm",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM AmuseGraphics Movie AGM"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_ARGO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_argo",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Argonaut Games"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_SSI,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_ssi",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Simon & Schuster Interactive"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_ZORK,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_zork",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Zork"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_APM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_apm",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Ubisoft APM"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_ALP,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_alp",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA High Voltage Software ALP"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_MTF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_mtf",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Capcom's MT Framework"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_CUNNING,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_cunning",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Cunning Developments"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_MOFLEX,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_moflex",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA MobiClip MOFLEX"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_ACORN,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_acorn",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Acorn Replay"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_XMD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_xmd",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Konami XMD"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+
+    /* AMR */
+    {
+        .id        = AV_CODEC_ID_AMR_NB,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "amr_nb",
+        .long_name = NULL_IF_CONFIG_SMALL("AMR-NB (Adaptive Multi-Rate NarrowBand)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AMR_WB,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "amr_wb",
+        .long_name = NULL_IF_CONFIG_SMALL("AMR-WB (Adaptive Multi-Rate WideBand)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+
+    /* RealAudio codecs*/
+    {
+        .id        = AV_CODEC_ID_RA_144,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "ra_144",
+        .long_name = NULL_IF_CONFIG_SMALL("RealAudio 1.0 (14.4K)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_RA_288,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "ra_288",
+        .long_name = NULL_IF_CONFIG_SMALL("RealAudio 2.0 (28.8K)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+
+    /* various DPCM codecs */
+    {
+        .id        = AV_CODEC_ID_ROQ_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "roq_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM id RoQ"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_INTERPLAY_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "interplay_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM Interplay"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XAN_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "xan_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM Xan"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SOL_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sol_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM Sol"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SDX2_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sdx2_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM Squareroot-Delta-Exact"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_GREMLIN_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "gremlin_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM Gremlin"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DERF_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "derf_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM Xilam DERF"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WADY_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wady_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM Marble WADY"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CBD2_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "cbd2_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM Cuberoot-Delta-Exact"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+
+    /* audio codecs */
+    {
+        .id        = AV_CODEC_ID_MP2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mp2",
+        .long_name = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MP3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mp3",
+        .long_name = NULL_IF_CONFIG_SMALL("MP3 (MPEG audio layer 3)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AAC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "aac",
+        .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_AC3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "ac3",
+        .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DTS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dts",
+        .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_dca_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_VORBIS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "vorbis",
+        .long_name = NULL_IF_CONFIG_SMALL("Vorbis"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DVAUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dvaudio",
+        .long_name = NULL_IF_CONFIG_SMALL("DV audio"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMAV1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wmav1",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMAV2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wmav2",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MACE3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mace3",
+        .long_name = NULL_IF_CONFIG_SMALL("MACE (Macintosh Audio Compression/Expansion) 3:1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MACE6,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mace6",
+        .long_name = NULL_IF_CONFIG_SMALL("MACE (Macintosh Audio Compression/Expansion) 6:1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VMDAUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "vmdaudio",
+        .long_name = NULL_IF_CONFIG_SMALL("Sierra VMD audio"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FLAC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "flac",
+        .long_name = NULL_IF_CONFIG_SMALL("FLAC (Free Lossless Audio Codec)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MP3ADU,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mp3adu",
+        .long_name = NULL_IF_CONFIG_SMALL("ADU (Application Data Unit) MP3 (MPEG audio layer 3)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MP3ON4,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mp3on4",
+        .long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SHORTEN,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "shorten",
+        .long_name = NULL_IF_CONFIG_SMALL("Shorten"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ALAC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "alac",
+        .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_WESTWOOD_SND1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "westwood_snd1",
+        .long_name = NULL_IF_CONFIG_SMALL("Westwood Audio (SND1)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_GSM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "gsm",
+        .long_name = NULL_IF_CONFIG_SMALL("GSM"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_QDM2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "qdm2",
+        .long_name = NULL_IF_CONFIG_SMALL("QDesign Music Codec 2"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_COOK,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "cook",
+        .long_name = NULL_IF_CONFIG_SMALL("Cook / Cooker / Gecko (RealAudio G2)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TRUESPEECH,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "truespeech",
+        .long_name = NULL_IF_CONFIG_SMALL("DSP Group TrueSpeech"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TTA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "tta",
+        .long_name = NULL_IF_CONFIG_SMALL("TTA (True Audio)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SMACKAUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "smackaudio",
+        .long_name = NULL_IF_CONFIG_SMALL("Smacker audio"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_QCELP,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "qcelp",
+        .long_name = NULL_IF_CONFIG_SMALL("QCELP / PureVoice"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WAVPACK,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wavpack",
+        .long_name = NULL_IF_CONFIG_SMALL("WavPack"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY |
+                     AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DSICINAUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsicinaudio",
+        .long_name = NULL_IF_CONFIG_SMALL("Delphine Software International CIN audio"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_IMC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "imc",
+        .long_name = NULL_IF_CONFIG_SMALL("IMC (Intel Music Coder)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MUSEPACK7,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "musepack7",
+        .long_name = NULL_IF_CONFIG_SMALL("Musepack SV7"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MLP,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mlp",
+        .long_name = NULL_IF_CONFIG_SMALL("MLP (Meridian Lossless Packing)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_GSM_MS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "gsm_ms",
+        .long_name = NULL_IF_CONFIG_SMALL("GSM Microsoft variant"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ATRAC3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "atrac3",
+        .long_name = NULL_IF_CONFIG_SMALL("ATRAC3 (Adaptive TRansform Acoustic Coding 3)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_APE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "ape",
+        .long_name = NULL_IF_CONFIG_SMALL("Monkey's Audio"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_NELLYMOSER,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "nellymoser",
+        .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MUSEPACK8,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "musepack8",
+        .long_name = NULL_IF_CONFIG_SMALL("Musepack SV8"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SPEEX,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "speex",
+        .long_name = NULL_IF_CONFIG_SMALL("Speex"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMAVOICE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wmavoice",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMAPRO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wmapro",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Professional"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMALOSSLESS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wmalossless",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Lossless"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ATRAC3P,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "atrac3p",
+        .long_name = NULL_IF_CONFIG_SMALL("ATRAC3+ (Adaptive TRansform Acoustic Coding 3+)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_EAC3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "eac3",
+        .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52B (AC-3, E-AC-3)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SIPR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sipr",
+        .long_name = NULL_IF_CONFIG_SMALL("RealAudio SIPR / ACELP.NET"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MP1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mp1",
+        .long_name = NULL_IF_CONFIG_SMALL("MP1 (MPEG audio layer 1)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TWINVQ,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "twinvq",
+        .long_name = NULL_IF_CONFIG_SMALL("VQF TwinVQ"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TRUEHD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "truehd",
+        .long_name = NULL_IF_CONFIG_SMALL("TrueHD"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MP4ALS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mp4als",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 Audio Lossless Coding (ALS)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ATRAC1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "atrac1",
+        .long_name = NULL_IF_CONFIG_SMALL("ATRAC1 (Adaptive TRansform Acoustic Coding)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BINKAUDIO_RDFT,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "binkaudio_rdft",
+        .long_name = NULL_IF_CONFIG_SMALL("Bink Audio (RDFT)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BINKAUDIO_DCT,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "binkaudio_dct",
+        .long_name = NULL_IF_CONFIG_SMALL("Bink Audio (DCT)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AAC_LATM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "aac_latm",
+        .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_QDMC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "qdmc",
+        .long_name = NULL_IF_CONFIG_SMALL("QDesign Music"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CELT,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "celt",
+        .long_name = NULL_IF_CONFIG_SMALL("Constrained Energy Lapped Transform (CELT)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_G723_1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "g723_1",
+        .long_name = NULL_IF_CONFIG_SMALL("G.723.1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_G729,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "g729",
+        .long_name = NULL_IF_CONFIG_SMALL("G.729"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_8SVX_EXP,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "8svx_exp",
+        .long_name = NULL_IF_CONFIG_SMALL("8SVX exponential"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_8SVX_FIB,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "8svx_fib",
+        .long_name = NULL_IF_CONFIG_SMALL("8SVX fibonacci"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BMV_AUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "bmv_audio",
+        .long_name = NULL_IF_CONFIG_SMALL("Discworld II BMV audio"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_RALF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "ralf",
+        .long_name = NULL_IF_CONFIG_SMALL("RealAudio Lossless"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_IAC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "iac",
+        .long_name = NULL_IF_CONFIG_SMALL("IAC (Indeo Audio Coder)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ILBC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "ilbc",
+        .long_name = NULL_IF_CONFIG_SMALL("iLBC (Internet Low Bitrate Codec)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_OPUS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "opus",
+        .long_name = NULL_IF_CONFIG_SMALL("Opus (Opus Interactive Audio Codec)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_COMFORT_NOISE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "comfortnoise",
+        .long_name = NULL_IF_CONFIG_SMALL("RFC 3389 Comfort Noise"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TAK,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "tak",
+        .long_name = NULL_IF_CONFIG_SMALL("TAK (Tom's lossless Audio Kompressor)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_METASOUND,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "metasound",
+        .long_name = NULL_IF_CONFIG_SMALL("Voxware MetaSound"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PAF_AUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "paf_audio",
+        .long_name = NULL_IF_CONFIG_SMALL("Amazing Studio Packed Animation File Audio"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ON2AVC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "avc",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 Audio for Video Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSS_SP,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dss_sp",
+        .long_name = NULL_IF_CONFIG_SMALL("Digital Speech Standard - Standard Play mode (DSS SP)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CODEC2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "codec2",
+        .long_name = NULL_IF_CONFIG_SMALL("codec2 (very low bitrate speech codec)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FFWAVESYNTH,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wavesynth",
+        .long_name = NULL_IF_CONFIG_SMALL("Wave synthesis pseudo-codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_SONIC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sonic",
+        .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_SONIC_LS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sonicls",
+        .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_EVRC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "evrc",
+        .long_name = NULL_IF_CONFIG_SMALL("EVRC (Enhanced Variable Rate Codec)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SMV,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "smv",
+        .long_name = NULL_IF_CONFIG_SMALL("SMV (Selectable Mode Vocoder)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSD_LSBF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsd_lsbf",
+        .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), least significant bit first"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSD_MSBF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsd_msbf",
+        .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), most significant bit first"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSD_LSBF_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsd_lsbf_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), least significant bit first, planar"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSD_MSBF_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsd_msbf_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), most significant bit first, planar"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_4GV,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "4gv",
+        .long_name = NULL_IF_CONFIG_SMALL("4GV (Fourth Generation Vocoder)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_INTERPLAY_ACM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "interplayacm",
+        .long_name = NULL_IF_CONFIG_SMALL("Interplay ACM"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XMA1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "xma1",
+        .long_name = NULL_IF_CONFIG_SMALL("Xbox Media Audio 1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XMA2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "xma2",
+        .long_name = NULL_IF_CONFIG_SMALL("Xbox Media Audio 2"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DST,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dst",
+        .long_name = NULL_IF_CONFIG_SMALL("DST (Direct Stream Transfer)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ATRAC3AL,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "atrac3al",
+        .long_name = NULL_IF_CONFIG_SMALL("ATRAC3 AL (Adaptive TRansform Acoustic Coding 3 Advanced Lossless)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ATRAC3PAL,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "atrac3pal",
+        .long_name = NULL_IF_CONFIG_SMALL("ATRAC3+ AL (Adaptive TRansform Acoustic Coding 3+ Advanced Lossless)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DOLBY_E,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dolby_e",
+        .long_name = NULL_IF_CONFIG_SMALL("Dolby E"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_APTX,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "aptx",
+        .long_name = NULL_IF_CONFIG_SMALL("aptX (Audio Processing Technology for Bluetooth)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_APTX_HD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "aptx_hd",
+        .long_name = NULL_IF_CONFIG_SMALL("aptX HD (Audio Processing Technology for Bluetooth)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SBC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sbc",
+        .long_name = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ATRAC9,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "atrac9",
+        .long_name = NULL_IF_CONFIG_SMALL("ATRAC9 (Adaptive TRansform Acoustic Coding 9)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HCOM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "hcom",
+        .long_name = NULL_IF_CONFIG_SMALL("HCOM Audio"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ACELP_KELVIN,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "acelp.kelvin",
+        .long_name = NULL_IF_CONFIG_SMALL("Sipro ACELP.KELVIN"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MPEGH_3D_AUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mpegh_3d_audio",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-H 3D Audio"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SIREN,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "siren",
+        .long_name = NULL_IF_CONFIG_SMALL("Siren"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HCA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "hca",
+        .long_name = NULL_IF_CONFIG_SMALL("CRI HCA"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FASTAUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "fastaudio",
+        .long_name = NULL_IF_CONFIG_SMALL("MobiClip FastAudio"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MSNSIREN,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "msnsiren",
+        .long_name = NULL_IF_CONFIG_SMALL("MSN Siren"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DFPWM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dfpwm",
+        .long_name = NULL_IF_CONFIG_SMALL("DFPWM (Dynamic Filter Pulse Width Modulation)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BONK,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "bonk",
+        .long_name = NULL_IF_CONFIG_SMALL("Bonk audio"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MISC4,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "misc4",
+        .long_name = NULL_IF_CONFIG_SMALL("Micronas SC-4 Audio"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_APAC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "apac",
+        .long_name = NULL_IF_CONFIG_SMALL("Marian's A-pac audio"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_FTR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "ftr",
+        .long_name = NULL_IF_CONFIG_SMALL("FTR Voice"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WAVARC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wavarc",
+        .long_name = NULL_IF_CONFIG_SMALL("Waveform Archiver"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_RKA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "rka",
+        .long_name = NULL_IF_CONFIG_SMALL("RKA (RK Audio)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+    },
+
+    /* subtitle codecs */
+    {
+        .id        = AV_CODEC_ID_DVD_SUBTITLE,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "dvd_subtitle",
+        .long_name = NULL_IF_CONFIG_SMALL("DVD subtitles"),
+        .props     = AV_CODEC_PROP_BITMAP_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_DVB_SUBTITLE,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "dvb_subtitle",
+        .long_name = NULL_IF_CONFIG_SMALL("DVB subtitles"),
+        .props     = AV_CODEC_PROP_BITMAP_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_TEXT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "text",
+        .long_name = NULL_IF_CONFIG_SMALL("raw UTF-8 text"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_XSUB,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "xsub",
+        .long_name = NULL_IF_CONFIG_SMALL("XSUB"),
+        .props     = AV_CODEC_PROP_BITMAP_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SSA,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "ssa",
+        .long_name = NULL_IF_CONFIG_SMALL("SSA (SubStation Alpha) subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_MOV_TEXT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "mov_text",
+        .long_name = NULL_IF_CONFIG_SMALL("MOV text"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_HDMV_PGS_SUBTITLE,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "hdmv_pgs_subtitle",
+        .long_name = NULL_IF_CONFIG_SMALL("HDMV Presentation Graphic Stream subtitles"),
+        .props     = AV_CODEC_PROP_BITMAP_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_DVB_TELETEXT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "dvb_teletext",
+        .long_name = NULL_IF_CONFIG_SMALL("DVB teletext"),
+    },
+    {
+        .id        = AV_CODEC_ID_SRT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "srt",
+        .long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle with embedded timing"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_MICRODVD,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "microdvd",
+        .long_name = NULL_IF_CONFIG_SMALL("MicroDVD subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_EIA_608,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "eia_608",
+        .long_name = NULL_IF_CONFIG_SMALL("EIA-608 closed captions"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_JACOSUB,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "jacosub",
+        .long_name = NULL_IF_CONFIG_SMALL("JACOsub subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SAMI,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "sami",
+        .long_name = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_REALTEXT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "realtext",
+        .long_name = NULL_IF_CONFIG_SMALL("RealText subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_STL,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "stl",
+        .long_name = NULL_IF_CONFIG_SMALL("Spruce subtitle format"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SUBVIEWER1,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "subviewer1",
+        .long_name = NULL_IF_CONFIG_SMALL("SubViewer v1 subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SUBVIEWER,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "subviewer",
+        .long_name = NULL_IF_CONFIG_SMALL("SubViewer subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SUBRIP,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "subrip",
+        .long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_WEBVTT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "webvtt",
+        .long_name = NULL_IF_CONFIG_SMALL("WebVTT subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_MPL2,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "mpl2",
+        .long_name = NULL_IF_CONFIG_SMALL("MPL2 subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_VPLAYER,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "vplayer",
+        .long_name = NULL_IF_CONFIG_SMALL("VPlayer subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_PJS,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "pjs",
+        .long_name = NULL_IF_CONFIG_SMALL("PJS (Phoenix Japanimation Society) subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_ASS,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "ass",
+        .long_name = NULL_IF_CONFIG_SMALL("ASS (Advanced SSA) subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "hdmv_text_subtitle",
+        .long_name = NULL_IF_CONFIG_SMALL("HDMV Text subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_TTML,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "ttml",
+        .long_name = NULL_IF_CONFIG_SMALL("Timed Text Markup Language"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_ARIB_CAPTION,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "arib_caption",
+        .long_name = NULL_IF_CONFIG_SMALL("ARIB STD-B24 caption"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_arib_caption_profiles),
+    },
+
+    /* other kind of codecs and pseudo-codecs */
+    {
+        .id        = AV_CODEC_ID_TTF,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "ttf",
+        .long_name = NULL_IF_CONFIG_SMALL("TrueType font"),
+        .mime_types= MT("application/x-truetype-font", "application/x-font"),
+    },
+    {
+        .id        = AV_CODEC_ID_SCTE_35,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "scte_35",
+        .long_name = NULL_IF_CONFIG_SMALL("SCTE 35 Message Queue"),
+    },
+    {
+        .id        = AV_CODEC_ID_EPG,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "epg",
+        .long_name = NULL_IF_CONFIG_SMALL("Electronic Program Guide"),
+    },
+    {
+        .id        = AV_CODEC_ID_BINTEXT,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "bintext",
+        .long_name = NULL_IF_CONFIG_SMALL("Binary text"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_XBIN,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xbin",
+        .long_name = NULL_IF_CONFIG_SMALL("eXtended BINary text"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_IDF,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "idf",
+        .long_name = NULL_IF_CONFIG_SMALL("iCEDraw text"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_OTF,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "otf",
+        .long_name = NULL_IF_CONFIG_SMALL("OpenType font"),
+        .mime_types= MT("application/vnd.ms-opentype"),
+    },
+    {
+        .id        = AV_CODEC_ID_SMPTE_KLV,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "klv",
+        .long_name = NULL_IF_CONFIG_SMALL("SMPTE 336M Key-Length-Value (KLV) metadata"),
+    },
+    {
+        .id        = AV_CODEC_ID_DVD_NAV,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "dvd_nav_packet",
+        .long_name = NULL_IF_CONFIG_SMALL("DVD Nav packet"),
+    },
+    {
+        .id        = AV_CODEC_ID_TIMED_ID3,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "timed_id3",
+        .long_name = NULL_IF_CONFIG_SMALL("timed ID3 metadata"),
+    },
+    {
+        .id        = AV_CODEC_ID_BIN_DATA,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "bin_data",
+        .long_name = NULL_IF_CONFIG_SMALL("binary data"),
+        .mime_types= MT("application/octet-stream"),
+    },
+    {
+        .id        = AV_CODEC_ID_MPEG2TS,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "mpegts",
+        .long_name = NULL_IF_CONFIG_SMALL("raw MPEG-TS stream"),
+        .mime_types= MT("application/MP2T"),
+    },
+    {
+        .id        = AV_CODEC_ID_WRAPPED_AVFRAME,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wrapped_avframe",
+        .long_name = NULL_IF_CONFIG_SMALL("AVFrame to AVPacket passthrough"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_VNULL,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vnull",
+        .long_name = NULL_IF_CONFIG_SMALL("Null video codec"),
+    },
+    {
+        .id        = AV_CODEC_ID_ANULL,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "anull",
+        .long_name = NULL_IF_CONFIG_SMALL("Null audio codec"),
+    },
+};
+
+static int descriptor_compare(const void *key, const void *member)
+{
+    enum AVCodecID id = *(const enum AVCodecID *) key;
+    const AVCodecDescriptor *desc = member;
+
+    return id - desc->id;
+}
+
+const AVCodecDescriptor *avcodec_descriptor_get(enum AVCodecID id)
+{
+    return bsearch(&id, codec_descriptors, FF_ARRAY_ELEMS(codec_descriptors),
+                   sizeof(codec_descriptors[0]), descriptor_compare);
+}
+
+const AVCodecDescriptor *avcodec_descriptor_next(const AVCodecDescriptor *prev)
+{
+    if (!prev)
+        return &codec_descriptors[0];
+    if (prev - codec_descriptors < FF_ARRAY_ELEMS(codec_descriptors) - 1)
+        return prev + 1;
+    return NULL;
+}
+
+const AVCodecDescriptor *avcodec_descriptor_get_by_name(const char *name)
+{
+    const AVCodecDescriptor *desc = NULL;
+
+    while ((desc = avcodec_descriptor_next(desc)))
+        if (!strcmp(desc->name, name))
+            return desc;
+    return NULL;
+}
+
+enum AVMediaType avcodec_get_type(enum AVCodecID codec_id)
+{
+    const AVCodecDescriptor *desc = avcodec_descriptor_get(codec_id);
+    return desc ? desc->type : AVMEDIA_TYPE_UNKNOWN;
+}
diff --git a/media/ffvpx/libavcodec/codec_desc.h b/media/ffvpx/libavcodec/codec_desc.h
new file mode 100644
index 0000000000..126b52df47
--- /dev/null
+++ b/media/ffvpx/libavcodec/codec_desc.h
@@ -0,0 +1,128 @@
+/*
+ * Codec descriptors public API
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CODEC_DESC_H
+#define AVCODEC_CODEC_DESC_H
+
+#include "libavutil/avutil.h"
+
+#include "codec_id.h"
+
+/**
+ * @addtogroup lavc_core
+ * @{
+ */
+
+/**
+ * This struct describes the properties of a single codec described by an
+ * AVCodecID.
+ * @see avcodec_descriptor_get()
+ */
+typedef struct AVCodecDescriptor {
+    enum AVCodecID     id;
+    enum AVMediaType type;
+    /**
+     * Name of the codec described by this descriptor. It is non-empty and
+     * unique for each codec descriptor. It should contain alphanumeric
+     * characters and '_' only.
+     */
+    const char      *name;
+    /**
+     * A more descriptive name for this codec. May be NULL.
+     */
+    const char *long_name;
+    /**
+     * Codec properties, a combination of AV_CODEC_PROP_* flags.
+     */
+    int             props;
+    /**
+     * MIME type(s) associated with the codec.
+     * May be NULL; if not, a NULL-terminated array of MIME types.
+     * The first item is always non-NULL and is the preferred MIME type.
+     */
+    const char *const *mime_types;
+    /**
+     * If non-NULL, an array of profiles recognized for this codec.
+     * Terminated with FF_PROFILE_UNKNOWN.
+     */
+    const struct AVProfile *profiles;
+} AVCodecDescriptor;
+
+/**
+ * Codec uses only intra compression.
+ * Video and audio codecs only.
+ */
+#define AV_CODEC_PROP_INTRA_ONLY    (1 << 0)
+/**
+ * Codec supports lossy compression. Audio and video codecs only.
+ * @note a codec may support both lossy and lossless
+ * compression modes
+ */
+#define AV_CODEC_PROP_LOSSY         (1 << 1)
+/**
+ * Codec supports lossless compression. Audio and video codecs only.
+ */
+#define AV_CODEC_PROP_LOSSLESS      (1 << 2)
+/**
+ * Codec supports frame reordering. That is, the coded order (the order in which
+ * the encoded packets are output by the encoders / stored / input to the
+ * decoders) may be different from the presentation order of the corresponding
+ * frames.
+ *
+ * For codecs that do not have this property set, PTS and DTS should always be
+ * equal.
+ */
+#define AV_CODEC_PROP_REORDER       (1 << 3)
+/**
+ * Subtitle codec is bitmap based
+ * Decoded AVSubtitle data can be read from the AVSubtitleRect->pict field.
+ */
+#define AV_CODEC_PROP_BITMAP_SUB    (1 << 16)
+/**
+ * Subtitle codec is text based.
+ * Decoded AVSubtitle data can be read from the AVSubtitleRect->ass field.
+ */
+#define AV_CODEC_PROP_TEXT_SUB      (1 << 17)
+
+/**
+ * @return descriptor for given codec ID or NULL if no descriptor exists.
+ */
+const AVCodecDescriptor *avcodec_descriptor_get(enum AVCodecID id);
+
+/**
+ * Iterate over all codec descriptors known to libavcodec.
+ *
+ * @param prev previous descriptor. NULL to get the first descriptor.
+ *
+ * @return next descriptor or NULL after the last descriptor
+ */
+const AVCodecDescriptor *avcodec_descriptor_next(const AVCodecDescriptor *prev);
+
+/**
+ * @return codec descriptor with the given name or NULL if no such descriptor
+ *         exists.
+ */
+const AVCodecDescriptor *avcodec_descriptor_get_by_name(const char *name);
+
+/**
+ * @}
+ */
+
+#endif // AVCODEC_CODEC_DESC_H
diff --git a/media/ffvpx/libavcodec/codec_id.h b/media/ffvpx/libavcodec/codec_id.h
new file mode 100644
index 0000000000..89a4a0cb89
--- /dev/null
+++ b/media/ffvpx/libavcodec/codec_id.h
@@ -0,0 +1,661 @@
+/*
+ * Codec IDs
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CODEC_ID_H
+#define AVCODEC_CODEC_ID_H
+
+#include "libavutil/avutil.h"
+#include "libavutil/samplefmt.h"
+
+#include "version_major.h"
+
+/**
+ * @addtogroup lavc_core
+ * @{
+ */
+
+/**
+ * Identify the syntax and semantics of the bitstream.
+ * The principle is roughly:
+ * Two decoders with the same ID can decode the same streams.
+ * Two encoders with the same ID can encode compatible streams.
+ * There may be slight deviations from the principle due to implementation
+ * details.
+ *
+ * If you add a codec ID to this list, add it so that
+ * 1. no value of an existing codec ID changes (that would break ABI),
+ * 2. it is as close as possible to similar codecs
+ *
+ * After adding new codec IDs, do not forget to add an entry to the codec
+ * descriptor list and bump libavcodec minor version.
+ */
+enum AVCodecID {
+    AV_CODEC_ID_NONE,
+
+    /* video codecs */
+    AV_CODEC_ID_MPEG1VIDEO,
+    AV_CODEC_ID_MPEG2VIDEO, ///< preferred ID for MPEG-1/2 video decoding
+    AV_CODEC_ID_H261,
+    AV_CODEC_ID_H263,
+    AV_CODEC_ID_RV10,
+    AV_CODEC_ID_RV20,
+    AV_CODEC_ID_MJPEG,
+    AV_CODEC_ID_MJPEGB,
+    AV_CODEC_ID_LJPEG,
+    AV_CODEC_ID_SP5X,
+    AV_CODEC_ID_JPEGLS,
+    AV_CODEC_ID_MPEG4,
+    AV_CODEC_ID_RAWVIDEO,
+    AV_CODEC_ID_MSMPEG4V1,
+    AV_CODEC_ID_MSMPEG4V2,
+    AV_CODEC_ID_MSMPEG4V3,
+    AV_CODEC_ID_WMV1,
+    AV_CODEC_ID_WMV2,
+    AV_CODEC_ID_H263P,
+    AV_CODEC_ID_H263I,
+    AV_CODEC_ID_FLV1,
+    AV_CODEC_ID_SVQ1,
+    AV_CODEC_ID_SVQ3,
+    AV_CODEC_ID_DVVIDEO,
+    AV_CODEC_ID_HUFFYUV,
+    AV_CODEC_ID_CYUV,
+    AV_CODEC_ID_H264,
+    AV_CODEC_ID_INDEO3,
+    AV_CODEC_ID_VP3,
+    AV_CODEC_ID_THEORA,
+    AV_CODEC_ID_ASV1,
+    AV_CODEC_ID_ASV2,
+    AV_CODEC_ID_FFV1,
+    AV_CODEC_ID_4XM,
+    AV_CODEC_ID_VCR1,
+    AV_CODEC_ID_CLJR,
+    AV_CODEC_ID_MDEC,
+    AV_CODEC_ID_ROQ,
+    AV_CODEC_ID_INTERPLAY_VIDEO,
+    AV_CODEC_ID_XAN_WC3,
+    AV_CODEC_ID_XAN_WC4,
+    AV_CODEC_ID_RPZA,
+    AV_CODEC_ID_CINEPAK,
+    AV_CODEC_ID_WS_VQA,
+    AV_CODEC_ID_MSRLE,
+    AV_CODEC_ID_MSVIDEO1,
+    AV_CODEC_ID_IDCIN,
+    AV_CODEC_ID_8BPS,
+    AV_CODEC_ID_SMC,
+    AV_CODEC_ID_FLIC,
+    AV_CODEC_ID_TRUEMOTION1,
+    AV_CODEC_ID_VMDVIDEO,
+    AV_CODEC_ID_MSZH,
+    AV_CODEC_ID_ZLIB,
+    AV_CODEC_ID_QTRLE,
+    AV_CODEC_ID_TSCC,
+    AV_CODEC_ID_ULTI,
+    AV_CODEC_ID_QDRAW,
+    AV_CODEC_ID_VIXL,
+    AV_CODEC_ID_QPEG,
+    AV_CODEC_ID_PNG,
+    AV_CODEC_ID_PPM,
+    AV_CODEC_ID_PBM,
+    AV_CODEC_ID_PGM,
+    AV_CODEC_ID_PGMYUV,
+    AV_CODEC_ID_PAM,
+    AV_CODEC_ID_FFVHUFF,
+    AV_CODEC_ID_RV30,
+    AV_CODEC_ID_RV40,
+    AV_CODEC_ID_VC1,
+    AV_CODEC_ID_WMV3,
+    AV_CODEC_ID_LOCO,
+    AV_CODEC_ID_WNV1,
+    AV_CODEC_ID_AASC,
+    AV_CODEC_ID_INDEO2,
+    AV_CODEC_ID_FRAPS,
+    AV_CODEC_ID_TRUEMOTION2,
+    AV_CODEC_ID_BMP,
+    AV_CODEC_ID_CSCD,
+    AV_CODEC_ID_MMVIDEO,
+    AV_CODEC_ID_ZMBV,
+    AV_CODEC_ID_AVS,
+    AV_CODEC_ID_SMACKVIDEO,
+    AV_CODEC_ID_NUV,
+    AV_CODEC_ID_KMVC,
+    AV_CODEC_ID_FLASHSV,
+    AV_CODEC_ID_CAVS,
+    AV_CODEC_ID_JPEG2000,
+    AV_CODEC_ID_VMNC,
+    AV_CODEC_ID_VP5,
+    AV_CODEC_ID_VP6,
+    AV_CODEC_ID_VP6F,
+    AV_CODEC_ID_TARGA,
+    AV_CODEC_ID_DSICINVIDEO,
+    AV_CODEC_ID_TIERTEXSEQVIDEO,
+    AV_CODEC_ID_TIFF,
+    AV_CODEC_ID_GIF,
+    AV_CODEC_ID_DXA,
+    AV_CODEC_ID_DNXHD,
+    AV_CODEC_ID_THP,
+    AV_CODEC_ID_SGI,
+    AV_CODEC_ID_C93,
+    AV_CODEC_ID_BETHSOFTVID,
+    AV_CODEC_ID_PTX,
+    AV_CODEC_ID_TXD,
+    AV_CODEC_ID_VP6A,
+    AV_CODEC_ID_AMV,
+    AV_CODEC_ID_VB,
+    AV_CODEC_ID_PCX,
+    AV_CODEC_ID_SUNRAST,
+    AV_CODEC_ID_INDEO4,
+    AV_CODEC_ID_INDEO5,
+    AV_CODEC_ID_MIMIC,
+    AV_CODEC_ID_RL2,
+    AV_CODEC_ID_ESCAPE124,
+    AV_CODEC_ID_DIRAC,
+    AV_CODEC_ID_BFI,
+    AV_CODEC_ID_CMV,
+    AV_CODEC_ID_MOTIONPIXELS,
+    AV_CODEC_ID_TGV,
+    AV_CODEC_ID_TGQ,
+    AV_CODEC_ID_TQI,
+    AV_CODEC_ID_AURA,
+    AV_CODEC_ID_AURA2,
+    AV_CODEC_ID_V210X,
+    AV_CODEC_ID_TMV,
+    AV_CODEC_ID_V210,
+    AV_CODEC_ID_DPX,
+    AV_CODEC_ID_MAD,
+    AV_CODEC_ID_FRWU,
+    AV_CODEC_ID_FLASHSV2,
+    AV_CODEC_ID_CDGRAPHICS,
+    AV_CODEC_ID_R210,
+    AV_CODEC_ID_ANM,
+    AV_CODEC_ID_BINKVIDEO,
+    AV_CODEC_ID_IFF_ILBM,
+#define AV_CODEC_ID_IFF_BYTERUN1 AV_CODEC_ID_IFF_ILBM
+    AV_CODEC_ID_KGV1,
+    AV_CODEC_ID_YOP,
+    AV_CODEC_ID_VP8,
+    AV_CODEC_ID_PICTOR,
+    AV_CODEC_ID_ANSI,
+    AV_CODEC_ID_A64_MULTI,
+    AV_CODEC_ID_A64_MULTI5,
+    AV_CODEC_ID_R10K,
+    AV_CODEC_ID_MXPEG,
+    AV_CODEC_ID_LAGARITH,
+    AV_CODEC_ID_PRORES,
+    AV_CODEC_ID_JV,
+    AV_CODEC_ID_DFA,
+    AV_CODEC_ID_WMV3IMAGE,
+    AV_CODEC_ID_VC1IMAGE,
+    AV_CODEC_ID_UTVIDEO,
+    AV_CODEC_ID_BMV_VIDEO,
+    AV_CODEC_ID_VBLE,
+    AV_CODEC_ID_DXTORY,
+    AV_CODEC_ID_V410,
+    AV_CODEC_ID_XWD,
+    AV_CODEC_ID_CDXL,
+    AV_CODEC_ID_XBM,
+    AV_CODEC_ID_ZEROCODEC,
+    AV_CODEC_ID_MSS1,
+    AV_CODEC_ID_MSA1,
+    AV_CODEC_ID_TSCC2,
+    AV_CODEC_ID_MTS2,
+    AV_CODEC_ID_CLLC,
+    AV_CODEC_ID_MSS2,
+    AV_CODEC_ID_VP9,
+    AV_CODEC_ID_AIC,
+    AV_CODEC_ID_ESCAPE130,
+    AV_CODEC_ID_G2M,
+    AV_CODEC_ID_WEBP,
+    AV_CODEC_ID_HNM4_VIDEO,
+    AV_CODEC_ID_HEVC,
+#define AV_CODEC_ID_H265 AV_CODEC_ID_HEVC
+    AV_CODEC_ID_FIC,
+    AV_CODEC_ID_ALIAS_PIX,
+    AV_CODEC_ID_BRENDER_PIX,
+    AV_CODEC_ID_PAF_VIDEO,
+    AV_CODEC_ID_EXR,
+    AV_CODEC_ID_VP7,
+    AV_CODEC_ID_SANM,
+    AV_CODEC_ID_SGIRLE,
+    AV_CODEC_ID_MVC1,
+    AV_CODEC_ID_MVC2,
+    AV_CODEC_ID_HQX,
+    AV_CODEC_ID_TDSC,
+    AV_CODEC_ID_HQ_HQA,
+    AV_CODEC_ID_HAP,
+    AV_CODEC_ID_DDS,
+    AV_CODEC_ID_DXV,
+    AV_CODEC_ID_SCREENPRESSO,
+    AV_CODEC_ID_RSCC,
+    AV_CODEC_ID_AVS2,
+    AV_CODEC_ID_PGX,
+    AV_CODEC_ID_AVS3,
+    AV_CODEC_ID_MSP2,
+    AV_CODEC_ID_VVC,
+#define AV_CODEC_ID_H266 AV_CODEC_ID_VVC
+    AV_CODEC_ID_Y41P,
+    AV_CODEC_ID_AVRP,
+    AV_CODEC_ID_012V,
+    AV_CODEC_ID_AVUI,
+#if FF_API_AYUV_CODECID
+    AV_CODEC_ID_AYUV,
+#endif
+    AV_CODEC_ID_TARGA_Y216,
+    AV_CODEC_ID_V308,
+    AV_CODEC_ID_V408,
+    AV_CODEC_ID_YUV4,
+    AV_CODEC_ID_AVRN,
+    AV_CODEC_ID_CPIA,
+    AV_CODEC_ID_XFACE,
+    AV_CODEC_ID_SNOW,
+    AV_CODEC_ID_SMVJPEG,
+    AV_CODEC_ID_APNG,
+    AV_CODEC_ID_DAALA,
+    AV_CODEC_ID_CFHD,
+    AV_CODEC_ID_TRUEMOTION2RT,
+    AV_CODEC_ID_M101,
+    AV_CODEC_ID_MAGICYUV,
+    AV_CODEC_ID_SHEERVIDEO,
+    AV_CODEC_ID_YLC,
+    AV_CODEC_ID_PSD,
+    AV_CODEC_ID_PIXLET,
+    AV_CODEC_ID_SPEEDHQ,
+    AV_CODEC_ID_FMVC,
+    AV_CODEC_ID_SCPR,
+    AV_CODEC_ID_CLEARVIDEO,
+    AV_CODEC_ID_XPM,
+    AV_CODEC_ID_AV1,
+    AV_CODEC_ID_BITPACKED,
+    AV_CODEC_ID_MSCC,
+    AV_CODEC_ID_SRGC,
+    AV_CODEC_ID_SVG,
+    AV_CODEC_ID_GDV,
+    AV_CODEC_ID_FITS,
+    AV_CODEC_ID_IMM4,
+    AV_CODEC_ID_PROSUMER,
+    AV_CODEC_ID_MWSC,
+    AV_CODEC_ID_WCMV,
+    AV_CODEC_ID_RASC,
+    AV_CODEC_ID_HYMT,
+    AV_CODEC_ID_ARBC,
+    AV_CODEC_ID_AGM,
+    AV_CODEC_ID_LSCR,
+    AV_CODEC_ID_VP4,
+    AV_CODEC_ID_IMM5,
+    AV_CODEC_ID_MVDV,
+    AV_CODEC_ID_MVHA,
+    AV_CODEC_ID_CDTOONS,
+    AV_CODEC_ID_MV30,
+    AV_CODEC_ID_NOTCHLC,
+    AV_CODEC_ID_PFM,
+    AV_CODEC_ID_MOBICLIP,
+    AV_CODEC_ID_PHOTOCD,
+    AV_CODEC_ID_IPU,
+    AV_CODEC_ID_ARGO,
+    AV_CODEC_ID_CRI,
+    AV_CODEC_ID_SIMBIOSIS_IMX,
+    AV_CODEC_ID_SGA_VIDEO,
+    AV_CODEC_ID_GEM,
+    AV_CODEC_ID_VBN,
+    AV_CODEC_ID_JPEGXL,
+    AV_CODEC_ID_QOI,
+    AV_CODEC_ID_PHM,
+    AV_CODEC_ID_RADIANCE_HDR,
+    AV_CODEC_ID_WBMP,
+    AV_CODEC_ID_MEDIA100,
+    AV_CODEC_ID_VQC,
+
+    /* various PCM "codecs" */
+    AV_CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
+    AV_CODEC_ID_PCM_S16LE = 0x10000,
+    AV_CODEC_ID_PCM_S16BE,
+    AV_CODEC_ID_PCM_U16LE,
+    AV_CODEC_ID_PCM_U16BE,
+    AV_CODEC_ID_PCM_S8,
+    AV_CODEC_ID_PCM_U8,
+    AV_CODEC_ID_PCM_MULAW,
+    AV_CODEC_ID_PCM_ALAW,
+    AV_CODEC_ID_PCM_S32LE,
+    AV_CODEC_ID_PCM_S32BE,
+    AV_CODEC_ID_PCM_U32LE,
+    AV_CODEC_ID_PCM_U32BE,
+    AV_CODEC_ID_PCM_S24LE,
+    AV_CODEC_ID_PCM_S24BE,
+    AV_CODEC_ID_PCM_U24LE,
+    AV_CODEC_ID_PCM_U24BE,
+    AV_CODEC_ID_PCM_S24DAUD,
+    AV_CODEC_ID_PCM_ZORK,
+    AV_CODEC_ID_PCM_S16LE_PLANAR,
+    AV_CODEC_ID_PCM_DVD,
+    AV_CODEC_ID_PCM_F32BE,
+    AV_CODEC_ID_PCM_F32LE,
+    AV_CODEC_ID_PCM_F64BE,
+    AV_CODEC_ID_PCM_F64LE,
+    AV_CODEC_ID_PCM_BLURAY,
+    AV_CODEC_ID_PCM_LXF,
+    AV_CODEC_ID_S302M,
+    AV_CODEC_ID_PCM_S8_PLANAR,
+    AV_CODEC_ID_PCM_S24LE_PLANAR,
+    AV_CODEC_ID_PCM_S32LE_PLANAR,
+    AV_CODEC_ID_PCM_S16BE_PLANAR,
+    AV_CODEC_ID_PCM_S64LE,
+    AV_CODEC_ID_PCM_S64BE,
+    AV_CODEC_ID_PCM_F16LE,
+    AV_CODEC_ID_PCM_F24LE,
+    AV_CODEC_ID_PCM_VIDC,
+    AV_CODEC_ID_PCM_SGA,
+
+    /* various ADPCM codecs */
+    AV_CODEC_ID_ADPCM_IMA_QT = 0x11000,
+    AV_CODEC_ID_ADPCM_IMA_WAV,
+    AV_CODEC_ID_ADPCM_IMA_DK3,
+    AV_CODEC_ID_ADPCM_IMA_DK4,
+    AV_CODEC_ID_ADPCM_IMA_WS,
+    AV_CODEC_ID_ADPCM_IMA_SMJPEG,
+    AV_CODEC_ID_ADPCM_MS,
+    AV_CODEC_ID_ADPCM_4XM,
+    AV_CODEC_ID_ADPCM_XA,
+    AV_CODEC_ID_ADPCM_ADX,
+    AV_CODEC_ID_ADPCM_EA,
+    AV_CODEC_ID_ADPCM_G726,
+    AV_CODEC_ID_ADPCM_CT,
+    AV_CODEC_ID_ADPCM_SWF,
+    AV_CODEC_ID_ADPCM_YAMAHA,
+    AV_CODEC_ID_ADPCM_SBPRO_4,
+    AV_CODEC_ID_ADPCM_SBPRO_3,
+    AV_CODEC_ID_ADPCM_SBPRO_2,
+    AV_CODEC_ID_ADPCM_THP,
+    AV_CODEC_ID_ADPCM_IMA_AMV,
+    AV_CODEC_ID_ADPCM_EA_R1,
+    AV_CODEC_ID_ADPCM_EA_R3,
+    AV_CODEC_ID_ADPCM_EA_R2,
+    AV_CODEC_ID_ADPCM_IMA_EA_SEAD,
+    AV_CODEC_ID_ADPCM_IMA_EA_EACS,
+    AV_CODEC_ID_ADPCM_EA_XAS,
+    AV_CODEC_ID_ADPCM_EA_MAXIS_XA,
+    AV_CODEC_ID_ADPCM_IMA_ISS,
+    AV_CODEC_ID_ADPCM_G722,
+    AV_CODEC_ID_ADPCM_IMA_APC,
+    AV_CODEC_ID_ADPCM_VIMA,
+    AV_CODEC_ID_ADPCM_AFC,
+    AV_CODEC_ID_ADPCM_IMA_OKI,
+    AV_CODEC_ID_ADPCM_DTK,
+    AV_CODEC_ID_ADPCM_IMA_RAD,
+    AV_CODEC_ID_ADPCM_G726LE,
+    AV_CODEC_ID_ADPCM_THP_LE,
+    AV_CODEC_ID_ADPCM_PSX,
+    AV_CODEC_ID_ADPCM_AICA,
+    AV_CODEC_ID_ADPCM_IMA_DAT4,
+    AV_CODEC_ID_ADPCM_MTAF,
+    AV_CODEC_ID_ADPCM_AGM,
+    AV_CODEC_ID_ADPCM_ARGO,
+    AV_CODEC_ID_ADPCM_IMA_SSI,
+    AV_CODEC_ID_ADPCM_ZORK,
+    AV_CODEC_ID_ADPCM_IMA_APM,
+    AV_CODEC_ID_ADPCM_IMA_ALP,
+    AV_CODEC_ID_ADPCM_IMA_MTF,
+    AV_CODEC_ID_ADPCM_IMA_CUNNING,
+    AV_CODEC_ID_ADPCM_IMA_MOFLEX,
+    AV_CODEC_ID_ADPCM_IMA_ACORN,
+    AV_CODEC_ID_ADPCM_XMD,
+
+    /* AMR */
+    AV_CODEC_ID_AMR_NB = 0x12000,
+    AV_CODEC_ID_AMR_WB,
+
+    /* RealAudio codecs*/
+    AV_CODEC_ID_RA_144 = 0x13000,
+    AV_CODEC_ID_RA_288,
+
+    /* various DPCM codecs */
+    AV_CODEC_ID_ROQ_DPCM = 0x14000,
+    AV_CODEC_ID_INTERPLAY_DPCM,
+    AV_CODEC_ID_XAN_DPCM,
+    AV_CODEC_ID_SOL_DPCM,
+    AV_CODEC_ID_SDX2_DPCM,
+    AV_CODEC_ID_GREMLIN_DPCM,
+    AV_CODEC_ID_DERF_DPCM,
+    AV_CODEC_ID_WADY_DPCM,
+    AV_CODEC_ID_CBD2_DPCM,
+
+    /* audio codecs */
+    AV_CODEC_ID_MP2 = 0x15000,
+    AV_CODEC_ID_MP3, ///< preferred ID for decoding MPEG audio layer 1, 2 or 3
+    AV_CODEC_ID_AAC,
+    AV_CODEC_ID_AC3,
+    AV_CODEC_ID_DTS,
+    AV_CODEC_ID_VORBIS,
+    AV_CODEC_ID_DVAUDIO,
+    AV_CODEC_ID_WMAV1,
+    AV_CODEC_ID_WMAV2,
+    AV_CODEC_ID_MACE3,
+    AV_CODEC_ID_MACE6,
+    AV_CODEC_ID_VMDAUDIO,
+    AV_CODEC_ID_FLAC,
+    AV_CODEC_ID_MP3ADU,
+    AV_CODEC_ID_MP3ON4,
+    AV_CODEC_ID_SHORTEN,
+    AV_CODEC_ID_ALAC,
+    AV_CODEC_ID_WESTWOOD_SND1,
+    AV_CODEC_ID_GSM, ///< as in Berlin toast format
+    AV_CODEC_ID_QDM2,
+    AV_CODEC_ID_COOK,
+    AV_CODEC_ID_TRUESPEECH,
+    AV_CODEC_ID_TTA,
+    AV_CODEC_ID_SMACKAUDIO,
+    AV_CODEC_ID_QCELP,
+    AV_CODEC_ID_WAVPACK,
+    AV_CODEC_ID_DSICINAUDIO,
+    AV_CODEC_ID_IMC,
+    AV_CODEC_ID_MUSEPACK7,
+    AV_CODEC_ID_MLP,
+    AV_CODEC_ID_GSM_MS, /* as found in WAV */
+    AV_CODEC_ID_ATRAC3,
+    AV_CODEC_ID_APE,
+    AV_CODEC_ID_NELLYMOSER,
+    AV_CODEC_ID_MUSEPACK8,
+    AV_CODEC_ID_SPEEX,
+    AV_CODEC_ID_WMAVOICE,
+    AV_CODEC_ID_WMAPRO,
+    AV_CODEC_ID_WMALOSSLESS,
+    AV_CODEC_ID_ATRAC3P,
+    AV_CODEC_ID_EAC3,
+    AV_CODEC_ID_SIPR,
+    AV_CODEC_ID_MP1,
+    AV_CODEC_ID_TWINVQ,
+    AV_CODEC_ID_TRUEHD,
+    AV_CODEC_ID_MP4ALS,
+    AV_CODEC_ID_ATRAC1,
+    AV_CODEC_ID_BINKAUDIO_RDFT,
+    AV_CODEC_ID_BINKAUDIO_DCT,
+    AV_CODEC_ID_AAC_LATM,
+    AV_CODEC_ID_QDMC,
+    AV_CODEC_ID_CELT,
+    AV_CODEC_ID_G723_1,
+    AV_CODEC_ID_G729,
+    AV_CODEC_ID_8SVX_EXP,
+    AV_CODEC_ID_8SVX_FIB,
+    AV_CODEC_ID_BMV_AUDIO,
+    AV_CODEC_ID_RALF,
+    AV_CODEC_ID_IAC,
+    AV_CODEC_ID_ILBC,
+    AV_CODEC_ID_OPUS,
+    AV_CODEC_ID_COMFORT_NOISE,
+    AV_CODEC_ID_TAK,
+    AV_CODEC_ID_METASOUND,
+    AV_CODEC_ID_PAF_AUDIO,
+    AV_CODEC_ID_ON2AVC,
+    AV_CODEC_ID_DSS_SP,
+    AV_CODEC_ID_CODEC2,
+    AV_CODEC_ID_FFWAVESYNTH,
+    AV_CODEC_ID_SONIC,
+    AV_CODEC_ID_SONIC_LS,
+    AV_CODEC_ID_EVRC,
+    AV_CODEC_ID_SMV,
+    AV_CODEC_ID_DSD_LSBF,
+    AV_CODEC_ID_DSD_MSBF,
+    AV_CODEC_ID_DSD_LSBF_PLANAR,
+    AV_CODEC_ID_DSD_MSBF_PLANAR,
+    AV_CODEC_ID_4GV,
+    AV_CODEC_ID_INTERPLAY_ACM,
+    AV_CODEC_ID_XMA1,
+    AV_CODEC_ID_XMA2,
+    AV_CODEC_ID_DST,
+    AV_CODEC_ID_ATRAC3AL,
+    AV_CODEC_ID_ATRAC3PAL,
+    AV_CODEC_ID_DOLBY_E,
+    AV_CODEC_ID_APTX,
+    AV_CODEC_ID_APTX_HD,
+    AV_CODEC_ID_SBC,
+    AV_CODEC_ID_ATRAC9,
+    AV_CODEC_ID_HCOM,
+    AV_CODEC_ID_ACELP_KELVIN,
+    AV_CODEC_ID_MPEGH_3D_AUDIO,
+    AV_CODEC_ID_SIREN,
+    AV_CODEC_ID_HCA,
+    AV_CODEC_ID_FASTAUDIO,
+    AV_CODEC_ID_MSNSIREN,
+    AV_CODEC_ID_DFPWM,
+    AV_CODEC_ID_BONK,
+    AV_CODEC_ID_MISC4,
+    AV_CODEC_ID_APAC,
+    AV_CODEC_ID_FTR,
+    AV_CODEC_ID_WAVARC,
+    AV_CODEC_ID_RKA,
+
+    /* subtitle codecs */
+    AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
+    AV_CODEC_ID_DVD_SUBTITLE = 0x17000,
+    AV_CODEC_ID_DVB_SUBTITLE,
+    AV_CODEC_ID_TEXT,  ///< raw UTF-8 text
+    AV_CODEC_ID_XSUB,
+    AV_CODEC_ID_SSA,
+    AV_CODEC_ID_MOV_TEXT,
+    AV_CODEC_ID_HDMV_PGS_SUBTITLE,
+    AV_CODEC_ID_DVB_TELETEXT,
+    AV_CODEC_ID_SRT,
+    AV_CODEC_ID_MICRODVD,
+    AV_CODEC_ID_EIA_608,
+    AV_CODEC_ID_JACOSUB,
+    AV_CODEC_ID_SAMI,
+    AV_CODEC_ID_REALTEXT,
+    AV_CODEC_ID_STL,
+    AV_CODEC_ID_SUBVIEWER1,
+    AV_CODEC_ID_SUBVIEWER,
+    AV_CODEC_ID_SUBRIP,
+    AV_CODEC_ID_WEBVTT,
+    AV_CODEC_ID_MPL2,
+    AV_CODEC_ID_VPLAYER,
+    AV_CODEC_ID_PJS,
+    AV_CODEC_ID_ASS,
+    AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
+    AV_CODEC_ID_TTML,
+    AV_CODEC_ID_ARIB_CAPTION,
+
+    /* other specific kind of codecs (generally used for attachments) */
+    AV_CODEC_ID_FIRST_UNKNOWN = 0x18000,           ///< A dummy ID pointing at the start of various fake codecs.
+    AV_CODEC_ID_TTF = 0x18000,
+
+    AV_CODEC_ID_SCTE_35, ///< Contain timestamp estimated through PCR of program stream.
+    AV_CODEC_ID_EPG,
+    AV_CODEC_ID_BINTEXT,
+    AV_CODEC_ID_XBIN,
+    AV_CODEC_ID_IDF,
+    AV_CODEC_ID_OTF,
+    AV_CODEC_ID_SMPTE_KLV,
+    AV_CODEC_ID_DVD_NAV,
+    AV_CODEC_ID_TIMED_ID3,
+    AV_CODEC_ID_BIN_DATA,
+
+
+    AV_CODEC_ID_PROBE = 0x19000, ///< codec_id is not known (like AV_CODEC_ID_NONE) but lavf should attempt to identify it
+
+    AV_CODEC_ID_MPEG2TS = 0x20000, /**< _FAKE_ codec to indicate a raw MPEG-2 TS
+                                * stream (only used by libavformat) */
+    AV_CODEC_ID_MPEG4SYSTEMS = 0x20001, /**< _FAKE_ codec to indicate a MPEG-4 Systems
+                                * stream (only used by libavformat) */
+    AV_CODEC_ID_FFMETADATA = 0x21000,   ///< Dummy codec for streams containing only metadata information.
+    AV_CODEC_ID_WRAPPED_AVFRAME = 0x21001, ///< Passthrough codec, AVFrames wrapped in AVPacket
+    /**
+     * Dummy null video codec, useful mainly for development and debugging.
+     * Null encoder/decoder discard all input and never return any output.
+     */
+    AV_CODEC_ID_VNULL,
+    /**
+     * Dummy null audio codec, useful mainly for development and debugging.
+     * Null encoder/decoder discard all input and never return any output.
+     */
+    AV_CODEC_ID_ANULL,
+};
+
+/**
+ * Get the type of the given codec.
+ */
+enum AVMediaType avcodec_get_type(enum AVCodecID codec_id);
+
+/**
+ * Get the name of a codec.
+ * @return  a static string identifying the codec; never NULL
+ */
+const char *avcodec_get_name(enum AVCodecID id);
+
+/**
+ * Return codec bits per sample.
+ *
+ * @param[in] codec_id the codec
+ * @return Number of bits per sample or zero if unknown for the given codec.
+ */
+int av_get_bits_per_sample(enum AVCodecID codec_id);
+
+/**
+ * Return codec bits per sample.
+ * Only return non-zero if the bits per sample is exactly correct, not an
+ * approximation.
+ *
+ * @param[in] codec_id the codec
+ * @return Number of bits per sample or zero if unknown for the given codec.
+ */
+int av_get_exact_bits_per_sample(enum AVCodecID codec_id);
+
+/**
+ * Return a name for the specified profile, if available.
+ *
+ * @param codec_id the ID of the codec to which the requested profile belongs
+ * @param profile the profile value for which a name is requested
+ * @return A name for the profile if found, NULL otherwise.
+ *
+ * @note unlike av_get_profile_name(), which searches a list of profiles
+ *       supported by a specific decoder or encoder implementation, this
+ *       function searches the list of profiles from the AVCodecDescriptor
+ */
+const char *avcodec_profile_name(enum AVCodecID codec_id, int profile);
+
+/**
+ * Return the PCM codec associated with a sample format.
+ * @param be  endianness, 0 for little, 1 for big,
+ *            -1 (or anything else) for native
+ * @return  AV_CODEC_ID_PCM_* or AV_CODEC_ID_NONE
+ */
+enum AVCodecID av_get_pcm_codec(enum AVSampleFormat fmt, int be);
+
+/**
+ * @}
+ */
+
+#endif // AVCODEC_CODEC_ID_H
diff --git a/media/ffvpx/libavcodec/codec_internal.h b/media/ffvpx/libavcodec/codec_internal.h
new file mode 100644
index 0000000000..130a7dc3cd
--- /dev/null
+++ b/media/ffvpx/libavcodec/codec_internal.h
@@ -0,0 +1,330 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CODEC_INTERNAL_H
+#define AVCODEC_CODEC_INTERNAL_H
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "codec.h"
+#include "config.h"
+
+/**
+ * The codec is not known to be init-threadsafe (i.e. it might be unsafe
+ * to initialize this codec and another codec concurrently, typically because
+ * the codec calls external APIs that are not known to be thread-safe).
+ * Therefore calling the codec's init function needs to be guarded with a lock.
+ */
+#define FF_CODEC_CAP_NOT_INIT_THREADSAFE    (1 << 0)
+/**
+ * The codec allows calling the close function for deallocation even if
+ * the init function returned a failure. Without this capability flag, a
+ * codec does such cleanup internally when returning failures from the
+ * init function and does not expect the close function to be called at
+ * all.
+ */
+#define FF_CODEC_CAP_INIT_CLEANUP           (1 << 1)
+/**
+ * Decoders marked with FF_CODEC_CAP_SETS_PKT_DTS want to set
+ * AVFrame.pkt_dts manually. If the flag is set, decode.c won't overwrite
+ * this field. If it's unset, decode.c tries to guess the pkt_dts field
+ * from the input AVPacket.
+ */
+#define FF_CODEC_CAP_SETS_PKT_DTS           (1 << 2)
+/**
+ * The decoder extracts and fills its parameters even if the frame is
+ * skipped due to the skip_frame setting.
+ */
+#define FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM  (1 << 3)
+/**
+ * The decoder sets the cropping fields in the output frames manually.
+ * If this cap is set, the generic code will initialize output frame
+ * dimensions to coded rather than display values.
+ */
+#define FF_CODEC_CAP_EXPORTS_CROPPING       (1 << 4)
+/**
+ * Codec initializes slice-based threading with a main function
+ */
+#define FF_CODEC_CAP_SLICE_THREAD_HAS_MF    (1 << 5)
+/*
+ * The codec supports frame threading and has inter-frame dependencies, so it
+ * uses ff_thread_report/await_progress().
+ */
+#define FF_CODEC_CAP_ALLOCATE_PROGRESS      (1 << 6)
+/**
+ * Codec handles avctx->thread_count == 0 (auto) internally.
+ */
+#define FF_CODEC_CAP_AUTO_THREADS           (1 << 7)
+/**
+ * Codec handles output frame properties internally instead of letting the
+ * internal logic derive them from AVCodecInternal.last_pkt_props.
+ */
+#define FF_CODEC_CAP_SETS_FRAME_PROPS       (1 << 8)
+/**
+ * Codec supports embedded ICC profiles (AV_FRAME_DATA_ICC_PROFILE).
+ */
+#define FF_CODEC_CAP_ICC_PROFILES           (1 << 9)
+/**
+ * The encoder has AV_CODEC_CAP_DELAY set, but does not actually have delay - it
+ * only wants to be flushed at the end to update some context variables (e.g.
+ * 2pass stats) or produce a trailing packet. Besides that it immediately
+ * produces exactly one output packet per each input frame, just as no-delay
+ * encoders do.
+ */
+#define FF_CODEC_CAP_EOF_FLUSH              (1 << 10)
+
+/**
+ * FFCodec.codec_tags termination value
+ */
+#define FF_CODEC_TAGS_END -1
+
+typedef struct FFCodecDefault {
+    const char *key;
+    const char *value;
+} FFCodecDefault;
+
+struct AVCodecContext;
+struct AVSubtitle;
+struct AVPacket;
+
+enum FFCodecType {
+    /* The codec is a decoder using the decode callback;
+     * audio and video codecs only. */
+    FF_CODEC_CB_TYPE_DECODE,
+    /* The codec is a decoder using the decode_sub callback;
+     * subtitle codecs only. */
+    FF_CODEC_CB_TYPE_DECODE_SUB,
+    /* The codec is a decoder using the receive_frame callback;
+     * audio and video codecs only. */
+    FF_CODEC_CB_TYPE_RECEIVE_FRAME,
+    /* The codec is an encoder using the encode callback;
+     * audio and video codecs only. */
+    FF_CODEC_CB_TYPE_ENCODE,
+    /* The codec is an encoder using the encode_sub callback;
+     * subtitle codecs only. */
+    FF_CODEC_CB_TYPE_ENCODE_SUB,
+    /* The codec is an encoder using the receive_packet callback;
+     * audio and video codecs only. */
+    FF_CODEC_CB_TYPE_RECEIVE_PACKET,
+};
+
+typedef struct FFCodec {
+    /**
+     * The public AVCodec. See codec.h for it.
+     */
+    AVCodec p;
+
+    /**
+     * Internal codec capabilities FF_CODEC_CAP_*.
+     */
+    unsigned caps_internal:29;
+
+    /**
+     * This field determines the type of the codec (decoder/encoder)
+     * and also the exact callback cb implemented by the codec.
+     * cb_type uses enum FFCodecType values.
+     */
+    unsigned cb_type:3;
+
+    int priv_data_size;
+    /**
+     * @name Frame-level threading support functions
+     * @{
+     */
+    /**
+     * Copy necessary context variables from a previous thread context to the current one.
+     * If not defined, the next thread will start automatically; otherwise, the codec
+     * must call ff_thread_finish_setup().
+     *
+     * dst and src will (rarely) point to the same context, in which case memcpy should be skipped.
+     */
+    int (*update_thread_context)(struct AVCodecContext *dst, const struct AVCodecContext *src);
+
+    /**
+     * Copy variables back to the user-facing context
+     */
+    int (*update_thread_context_for_user)(struct AVCodecContext *dst, const struct AVCodecContext *src);
+    /** @} */
+
+    /**
+     * Private codec-specific defaults.
+     */
+    const FFCodecDefault *defaults;
+
+    /**
+     * Initialize codec static data, called from av_codec_iterate().
+     *
+     * This is not intended for time consuming operations as it is
+     * run for every codec regardless of that codec being used.
+     */
+    void (*init_static_data)(struct FFCodec *codec);
+
+    int (*init)(struct AVCodecContext *);
+
+    union {
+        /**
+         * Decode to an AVFrame.
+         * cb is in this state if cb_type is FF_CODEC_CB_TYPE_DECODE.
+         *
+         * @param      avctx          codec context
+         * @param[out] frame          AVFrame for output
+         * @param[out] got_frame_ptr  decoder sets to 0 or 1 to indicate that
+         *                            a non-empty frame was returned in frame.
+         * @param[in]  avpkt          AVPacket containing the data to be decoded
+         * @return amount of bytes read from the packet on success,
+         *         negative error code on failure
+         */
+        int (*decode)(struct AVCodecContext *avctx, struct AVFrame *frame,
+                      int *got_frame_ptr, struct AVPacket *avpkt);
+        /**
+         * Decode subtitle data to an AVSubtitle.
+         * cb is in this state if cb_type is FF_CODEC_CB_TYPE_DECODE_SUB.
+         *
+         * Apart from that this is like the decode callback.
+         */
+        int (*decode_sub)(struct AVCodecContext *avctx, struct AVSubtitle *sub,
+                          int *got_frame_ptr, const struct AVPacket *avpkt);
+        /**
+         * Decode API with decoupled packet/frame dataflow.
+         * cb is in this state if cb_type is FF_CODEC_CB_TYPE_RECEIVE_FRAME.
+         *
+         * This function is called to get one output frame. It should call
+         * ff_decode_get_packet() to obtain input data.
+         */
+        int (*receive_frame)(struct AVCodecContext *avctx, struct AVFrame *frame);
+        /**
+         * Encode data to an AVPacket.
+         * cb is in this state if cb_type is FF_CODEC_CB_TYPE_ENCODE
+         *
+         * @param      avctx          codec context
+         * @param[out] avpkt          output AVPacket
+         * @param[in]  frame          AVFrame containing the input to be encoded
+         * @param[out] got_packet_ptr encoder sets to 0 or 1 to indicate that a
+         *                            non-empty packet was returned in avpkt.
+         * @return 0 on success, negative error code on failure
+         */
+        int (*encode)(struct AVCodecContext *avctx, struct AVPacket *avpkt,
+                      const struct AVFrame *frame, int *got_packet_ptr);
+        /**
+         * Encode subtitles to a raw buffer.
+         * cb is in this state if cb_type is FF_CODEC_CB_TYPE_ENCODE_SUB.
+         */
+        int (*encode_sub)(struct AVCodecContext *avctx, uint8_t *buf,
+                          int buf_size, const struct AVSubtitle *sub);
+        /**
+         * Encode API with decoupled frame/packet dataflow.
+         * cb is in this state if cb_type is FF_CODEC_CB_TYPE_RECEIVE_PACKET.
+         *
+         * This function is called to get one output packet.
+         * It should call ff_encode_get_frame() to obtain input data.
+         */
+        int (*receive_packet)(struct AVCodecContext *avctx, struct AVPacket *avpkt);
+    } cb;
+
+    int (*close)(struct AVCodecContext *);
+
+    /**
+     * Flush buffers.
+     * Will be called when seeking
+     */
+    void (*flush)(struct AVCodecContext *);
+
+    /**
+     * Decoding only, a comma-separated list of bitstream filters to apply to
+     * packets before decoding.
+     */
+    const char *bsfs;
+
+    /**
+     * Array of pointers to hardware configurations supported by the codec,
+     * or NULL if no hardware supported.  The array is terminated by a NULL
+     * pointer.
+     *
+     * The user can only access this field via avcodec_get_hw_config().
+     */
+    const struct AVCodecHWConfigInternal *const *hw_configs;
+
+    /**
+     * List of supported codec_tags, terminated by FF_CODEC_TAGS_END.
+     */
+    const uint32_t *codec_tags;
+} FFCodec;
+
+#if CONFIG_SMALL
+#define CODEC_LONG_NAME(str) .p.long_name = NULL
+#else
+#define CODEC_LONG_NAME(str) .p.long_name = str
+#endif
+
+#if HAVE_THREADS
+#define UPDATE_THREAD_CONTEXT(func) \
+        .update_thread_context          = (func)
+#define UPDATE_THREAD_CONTEXT_FOR_USER(func) \
+        .update_thread_context_for_user = (func)
+#else
+#define UPDATE_THREAD_CONTEXT(func) \
+        .update_thread_context          = NULL
+#define UPDATE_THREAD_CONTEXT_FOR_USER(func) \
+        .update_thread_context_for_user = NULL
+#endif
+
+#if FF_API_OLD_CHANNEL_LAYOUT
+#define CODEC_OLD_CHANNEL_LAYOUTS(...) CODEC_OLD_CHANNEL_LAYOUTS_ARRAY(((const uint64_t[]) { __VA_ARGS__, 0 }))
+#if defined(__clang__)
+#define CODEC_OLD_CHANNEL_LAYOUTS_ARRAY(array) \
+        FF_DISABLE_DEPRECATION_WARNINGS \
+        .p.channel_layouts = (array), \
+        FF_ENABLE_DEPRECATION_WARNINGS
+#else
+#define CODEC_OLD_CHANNEL_LAYOUTS_ARRAY(array) .p.channel_layouts = (array),
+#endif
+#else
+/* This is only provided to allow to test disabling FF_API_OLD_CHANNEL_LAYOUT
+ * without removing all the FF_API_OLD_CHANNEL_LAYOUT codeblocks.
+ * It is of course still expected to be removed when FF_API_OLD_CHANNEL_LAYOUT
+ * will be finally removed (along with all usages of these macros). */
+#define CODEC_OLD_CHANNEL_LAYOUTS(...)
+#define CODEC_OLD_CHANNEL_LAYOUTS_ARRAY(array)
+#endif
+
+#define FF_CODEC_DECODE_CB(func)                          \
+    .cb_type           = FF_CODEC_CB_TYPE_DECODE,         \
+    .cb.decode         = (func)
+#define FF_CODEC_DECODE_SUB_CB(func)                      \
+    .cb_type           = FF_CODEC_CB_TYPE_DECODE_SUB,     \
+    .cb.decode_sub     = (func)
+#define FF_CODEC_RECEIVE_FRAME_CB(func)                   \
+    .cb_type           = FF_CODEC_CB_TYPE_RECEIVE_FRAME,  \
+    .cb.receive_frame  = (func)
+#define FF_CODEC_ENCODE_CB(func)                          \
+    .cb_type           = FF_CODEC_CB_TYPE_ENCODE,         \
+    .cb.encode         = (func)
+#define FF_CODEC_ENCODE_SUB_CB(func)                      \
+    .cb_type           = FF_CODEC_CB_TYPE_ENCODE_SUB,     \
+    .cb.encode_sub     = (func)
+#define FF_CODEC_RECEIVE_PACKET_CB(func)                  \
+    .cb_type           = FF_CODEC_CB_TYPE_RECEIVE_PACKET, \
+    .cb.receive_packet = (func)
+
+static av_always_inline const FFCodec *ffcodec(const AVCodec *codec)
+{
+    return (const FFCodec*)codec;
+}
+
+#endif /* AVCODEC_CODEC_INTERNAL_H */
diff --git a/media/ffvpx/libavcodec/codec_list.c b/media/ffvpx/libavcodec/codec_list.c
new file mode 100644
index 0000000000..db49c8fffb
--- /dev/null
+++ b/media/ffvpx/libavcodec/codec_list.c
@@ -0,0 +1,20 @@
+static const FFCodec * const codec_list[] = {
+#if CONFIG_VP8_DECODER
+    &ff_vp8_decoder,
+#endif
+#if CONFIG_VP9_DECODER
+    &ff_vp9_decoder,
+#endif
+#if CONFIG_FLAC_DECODER
+    &ff_flac_decoder,
+#endif
+#if CONFIG_MP3_DECODER
+    &ff_mp3_decoder,
+#endif
+#if CONFIG_LIBDAV1D
+    &ff_libdav1d_decoder,
+#endif
+#if CONFIG_AV1_DECODER
+    &ff_av1_decoder,
+#endif
+    NULL };
diff --git a/media/ffvpx/libavcodec/codec_par.c b/media/ffvpx/libavcodec/codec_par.c
new file mode 100644
index 0000000000..abda649aa8
--- /dev/null
+++ b/media/ffvpx/libavcodec/codec_par.c
@@ -0,0 +1,263 @@
+/*
+ * AVCodecParameters functions for libavcodec
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AVCodecParameters functions for libavcodec.
+ */
+
+#include <string.h>
+#include "libavutil/mem.h"
+#include "avcodec.h"
+#include "codec_par.h"
+
+static void codec_parameters_reset(AVCodecParameters *par)
+{
+    av_freep(&par->extradata);
+    av_channel_layout_uninit(&par->ch_layout);
+
+    memset(par, 0, sizeof(*par));
+
+    par->codec_type          = AVMEDIA_TYPE_UNKNOWN;
+    par->codec_id            = AV_CODEC_ID_NONE;
+    par->format              = -1;
+    par->ch_layout.order     = AV_CHANNEL_ORDER_UNSPEC;
+    par->field_order         = AV_FIELD_UNKNOWN;
+    par->color_range         = AVCOL_RANGE_UNSPECIFIED;
+    par->color_primaries     = AVCOL_PRI_UNSPECIFIED;
+    par->color_trc           = AVCOL_TRC_UNSPECIFIED;
+    par->color_space         = AVCOL_SPC_UNSPECIFIED;
+    par->chroma_location     = AVCHROMA_LOC_UNSPECIFIED;
+    par->sample_aspect_ratio = (AVRational){ 0, 1 };
+    par->profile             = FF_PROFILE_UNKNOWN;
+    par->level               = FF_LEVEL_UNKNOWN;
+}
+
+AVCodecParameters *avcodec_parameters_alloc(void)
+{
+    AVCodecParameters *par = av_mallocz(sizeof(*par));
+
+    if (!par)
+        return NULL;
+    codec_parameters_reset(par);
+    return par;
+}
+
+void avcodec_parameters_free(AVCodecParameters **ppar)
+{
+    AVCodecParameters *par = *ppar;
+
+    if (!par)
+        return;
+    codec_parameters_reset(par);
+
+    av_freep(ppar);
+}
+
+int avcodec_parameters_copy(AVCodecParameters *dst, const AVCodecParameters *src)
+{
+    int ret;
+
+    codec_parameters_reset(dst);
+    memcpy(dst, src, sizeof(*dst));
+
+    dst->ch_layout      = (AVChannelLayout){0};
+    dst->extradata      = NULL;
+    dst->extradata_size = 0;
+    if (src->extradata) {
+        dst->extradata = av_mallocz(src->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!dst->extradata)
+            return AVERROR(ENOMEM);
+        memcpy(dst->extradata, src->extradata, src->extradata_size);
+        dst->extradata_size = src->extradata_size;
+    }
+
+    ret = av_channel_layout_copy(&dst->ch_layout, &src->ch_layout);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+int avcodec_parameters_from_context(AVCodecParameters *par,
+                                    const AVCodecContext *codec)
+{
+    int ret;
+
+    codec_parameters_reset(par);
+
+    par->codec_type = codec->codec_type;
+    par->codec_id   = codec->codec_id;
+    par->codec_tag  = codec->codec_tag;
+
+    par->bit_rate              = codec->bit_rate;
+    par->bits_per_coded_sample = codec->bits_per_coded_sample;
+    par->bits_per_raw_sample   = codec->bits_per_raw_sample;
+    par->profile               = codec->profile;
+    par->level                 = codec->level;
+
+    switch (par->codec_type) {
+    case AVMEDIA_TYPE_VIDEO:
+        par->format              = codec->pix_fmt;
+        par->width               = codec->width;
+        par->height              = codec->height;
+        par->field_order         = codec->field_order;
+        par->color_range         = codec->color_range;
+        par->color_primaries     = codec->color_primaries;
+        par->color_trc           = codec->color_trc;
+        par->color_space         = codec->colorspace;
+        par->chroma_location     = codec->chroma_sample_location;
+        par->sample_aspect_ratio = codec->sample_aspect_ratio;
+        par->video_delay         = codec->has_b_frames;
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        par->format           = codec->sample_fmt;
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+        // if the old/new fields are set inconsistently, prefer the old ones
+        if ((codec->channels && codec->channels != codec->ch_layout.nb_channels) ||
+            (codec->channel_layout && (codec->ch_layout.order != AV_CHANNEL_ORDER_NATIVE ||
+                                       codec->ch_layout.u.mask != codec->channel_layout))) {
+            if (codec->channel_layout)
+                av_channel_layout_from_mask(&par->ch_layout, codec->channel_layout);
+            else {
+                par->ch_layout.order       = AV_CHANNEL_ORDER_UNSPEC;
+                par->ch_layout.nb_channels = codec->channels;
+            }
+FF_ENABLE_DEPRECATION_WARNINGS
+        } else {
+#endif
+        ret = av_channel_layout_copy(&par->ch_layout, &codec->ch_layout);
+        if (ret < 0)
+            return ret;
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+        }
+        par->channel_layout  = par->ch_layout.order == AV_CHANNEL_ORDER_NATIVE ?
+                               par->ch_layout.u.mask : 0;
+        par->channels        = par->ch_layout.nb_channels;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        par->sample_rate      = codec->sample_rate;
+        par->block_align      = codec->block_align;
+        par->frame_size       = codec->frame_size;
+        par->initial_padding  = codec->initial_padding;
+        par->trailing_padding = codec->trailing_padding;
+        par->seek_preroll     = codec->seek_preroll;
+        break;
+    case AVMEDIA_TYPE_SUBTITLE:
+        par->width  = codec->width;
+        par->height = codec->height;
+        break;
+    }
+
+    if (codec->extradata) {
+        par->extradata = av_mallocz(codec->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!par->extradata)
+            return AVERROR(ENOMEM);
+        memcpy(par->extradata, codec->extradata, codec->extradata_size);
+        par->extradata_size = codec->extradata_size;
+    }
+
+    return 0;
+}
+
+int avcodec_parameters_to_context(AVCodecContext *codec,
+                                  const AVCodecParameters *par)
+{
+    int ret;
+
+    codec->codec_type = par->codec_type;
+    codec->codec_id   = par->codec_id;
+    codec->codec_tag  = par->codec_tag;
+
+    codec->bit_rate              = par->bit_rate;
+    codec->bits_per_coded_sample = par->bits_per_coded_sample;
+    codec->bits_per_raw_sample   = par->bits_per_raw_sample;
+    codec->profile               = par->profile;
+    codec->level                 = par->level;
+
+    switch (par->codec_type) {
+    case AVMEDIA_TYPE_VIDEO:
+        codec->pix_fmt                = par->format;
+        codec->width                  = par->width;
+        codec->height                 = par->height;
+        codec->field_order            = par->field_order;
+        codec->color_range            = par->color_range;
+        codec->color_primaries        = par->color_primaries;
+        codec->color_trc              = par->color_trc;
+        codec->colorspace             = par->color_space;
+        codec->chroma_sample_location = par->chroma_location;
+        codec->sample_aspect_ratio    = par->sample_aspect_ratio;
+        codec->has_b_frames           = par->video_delay;
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        codec->sample_fmt       = par->format;
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+        // if the old/new fields are set inconsistently, prefer the old ones
+        if ((par->channels && par->channels != par->ch_layout.nb_channels) ||
+            (par->channel_layout && (par->ch_layout.order != AV_CHANNEL_ORDER_NATIVE ||
+                                     par->ch_layout.u.mask != par->channel_layout))) {
+            if (par->channel_layout)
+                av_channel_layout_from_mask(&codec->ch_layout, par->channel_layout);
+            else {
+                codec->ch_layout.order       = AV_CHANNEL_ORDER_UNSPEC;
+                codec->ch_layout.nb_channels = par->channels;
+            }
+FF_ENABLE_DEPRECATION_WARNINGS
+        } else {
+#endif
+        ret = av_channel_layout_copy(&codec->ch_layout, &par->ch_layout);
+        if (ret < 0)
+            return ret;
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+        }
+        codec->channel_layout = codec->ch_layout.order == AV_CHANNEL_ORDER_NATIVE ?
+                                codec->ch_layout.u.mask : 0;
+        codec->channels       = codec->ch_layout.nb_channels;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        codec->sample_rate      = par->sample_rate;
+        codec->block_align      = par->block_align;
+        codec->frame_size       = par->frame_size;
+        codec->delay            =
+        codec->initial_padding  = par->initial_padding;
+        codec->trailing_padding = par->trailing_padding;
+        codec->seek_preroll     = par->seek_preroll;
+        break;
+    case AVMEDIA_TYPE_SUBTITLE:
+        codec->width  = par->width;
+        codec->height = par->height;
+        break;
+    }
+
+    if (par->extradata) {
+        av_freep(&codec->extradata);
+        codec->extradata = av_mallocz(par->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!codec->extradata)
+            return AVERROR(ENOMEM);
+        memcpy(codec->extradata, par->extradata, par->extradata_size);
+        codec->extradata_size = par->extradata_size;
+    }
+
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/codec_par.h b/media/ffvpx/libavcodec/codec_par.h
new file mode 100644
index 0000000000..f51d27c590
--- /dev/null
+++ b/media/ffvpx/libavcodec/codec_par.h
@@ -0,0 +1,247 @@
+/*
+ * Codec parameters public API
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CODEC_PAR_H
+#define AVCODEC_CODEC_PAR_H
+
+#include <stdint.h>
+
+#include "libavutil/avutil.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/rational.h"
+#include "libavutil/pixfmt.h"
+
+#include "codec_id.h"
+
+/**
+ * @addtogroup lavc_core
+ * @{
+ */
+
+enum AVFieldOrder {
+    AV_FIELD_UNKNOWN,
+    AV_FIELD_PROGRESSIVE,
+    AV_FIELD_TT,          ///< Top coded_first, top displayed first
+    AV_FIELD_BB,          ///< Bottom coded first, bottom displayed first
+    AV_FIELD_TB,          ///< Top coded first, bottom displayed first
+    AV_FIELD_BT,          ///< Bottom coded first, top displayed first
+};
+
+/**
+ * This struct describes the properties of an encoded stream.
+ *
+ * sizeof(AVCodecParameters) is not a part of the public ABI, this struct must
+ * be allocated with avcodec_parameters_alloc() and freed with
+ * avcodec_parameters_free().
+ */
+typedef struct AVCodecParameters {
+    /**
+     * General type of the encoded data.
+     */
+    enum AVMediaType codec_type;
+    /**
+     * Specific type of the encoded data (the codec used).
+     */
+    enum AVCodecID   codec_id;
+    /**
+     * Additional information about the codec (corresponds to the AVI FOURCC).
+     */
+    uint32_t         codec_tag;
+
+    /**
+     * Extra binary data needed for initializing the decoder, codec-dependent.
+     *
+     * Must be allocated with av_malloc() and will be freed by
+     * avcodec_parameters_free(). The allocated size of extradata must be at
+     * least extradata_size + AV_INPUT_BUFFER_PADDING_SIZE, with the padding
+     * bytes zeroed.
+     */
+    uint8_t *extradata;
+    /**
+     * Size of the extradata content in bytes.
+     */
+    int      extradata_size;
+
+    /**
+     * - video: the pixel format, the value corresponds to enum AVPixelFormat.
+     * - audio: the sample format, the value corresponds to enum AVSampleFormat.
+     */
+    int format;
+
+    /**
+     * The average bitrate of the encoded data (in bits per second).
+     */
+    int64_t bit_rate;
+
+    /**
+     * The number of bits per sample in the codedwords.
+     *
+     * This is basically the bitrate per sample. It is mandatory for a bunch of
+     * formats to actually decode them. It's the number of bits for one sample in
+     * the actual coded bitstream.
+     *
+     * This could be for example 4 for ADPCM
+     * For PCM formats this matches bits_per_raw_sample
+     * Can be 0
+     */
+    int bits_per_coded_sample;
+
+    /**
+     * This is the number of valid bits in each output sample. If the
+     * sample format has more bits, the least significant bits are additional
+     * padding bits, which are always 0. Use right shifts to reduce the sample
+     * to its actual size. For example, audio formats with 24 bit samples will
+     * have bits_per_raw_sample set to 24, and format set to AV_SAMPLE_FMT_S32.
+     * To get the original sample use "(int32_t)sample >> 8"."
+     *
+     * For ADPCM this might be 12 or 16 or similar
+     * Can be 0
+     */
+    int bits_per_raw_sample;
+
+    /**
+     * Codec-specific bitstream restrictions that the stream conforms to.
+     */
+    int profile;
+    int level;
+
+    /**
+     * Video only. The dimensions of the video frame in pixels.
+     */
+    int width;
+    int height;
+
+    /**
+     * Video only. The aspect ratio (width / height) which a single pixel
+     * should have when displayed.
+     *
+     * When the aspect ratio is unknown / undefined, the numerator should be
+     * set to 0 (the denominator may have any value).
+     */
+    AVRational sample_aspect_ratio;
+
+    /**
+     * Video only. The order of the fields in interlaced video.
+     */
+    enum AVFieldOrder                  field_order;
+
+    /**
+     * Video only. Additional colorspace characteristics.
+     */
+    enum AVColorRange                  color_range;
+    enum AVColorPrimaries              color_primaries;
+    enum AVColorTransferCharacteristic color_trc;
+    enum AVColorSpace                  color_space;
+    enum AVChromaLocation              chroma_location;
+
+    /**
+     * Video only. Number of delayed frames.
+     */
+    int video_delay;
+
+#if FF_API_OLD_CHANNEL_LAYOUT
+    /**
+     * Audio only. The channel layout bitmask. May be 0 if the channel layout is
+     * unknown or unspecified, otherwise the number of bits set must be equal to
+     * the channels field.
+     * @deprecated use ch_layout
+     */
+    attribute_deprecated
+    uint64_t channel_layout;
+    /**
+     * Audio only. The number of audio channels.
+     * @deprecated use ch_layout.nb_channels
+     */
+    attribute_deprecated
+    int      channels;
+#endif
+    /**
+     * Audio only. The number of audio samples per second.
+     */
+    int      sample_rate;
+    /**
+     * Audio only. The number of bytes per coded audio frame, required by some
+     * formats.
+     *
+     * Corresponds to nBlockAlign in WAVEFORMATEX.
+     */
+    int      block_align;
+    /**
+     * Audio only. Audio frame size, if known. Required by some formats to be static.
+     */
+    int      frame_size;
+
+    /**
+     * Audio only. The amount of padding (in samples) inserted by the encoder at
+     * the beginning of the audio. I.e. this number of leading decoded samples
+     * must be discarded by the caller to get the original audio without leading
+     * padding.
+     */
+    int initial_padding;
+    /**
+     * Audio only. The amount of padding (in samples) appended by the encoder to
+     * the end of the audio. I.e. this number of decoded samples must be
+     * discarded by the caller from the end of the stream to get the original
+     * audio without any trailing padding.
+     */
+    int trailing_padding;
+    /**
+     * Audio only. Number of samples to skip after a discontinuity.
+     */
+    int seek_preroll;
+
+    /**
+     * Audio only. The channel layout and number of channels.
+     */
+    AVChannelLayout ch_layout;
+} AVCodecParameters;
+
+/**
+ * Allocate a new AVCodecParameters and set its fields to default values
+ * (unknown/invalid/0). The returned struct must be freed with
+ * avcodec_parameters_free().
+ */
+AVCodecParameters *avcodec_parameters_alloc(void);
+
+/**
+ * Free an AVCodecParameters instance and everything associated with it and
+ * write NULL to the supplied pointer.
+ */
+void avcodec_parameters_free(AVCodecParameters **par);
+
+/**
+ * Copy the contents of src to dst. Any allocated fields in dst are freed and
+ * replaced with newly allocated duplicates of the corresponding fields in src.
+ *
+ * @return >= 0 on success, a negative AVERROR code on failure.
+ */
+int avcodec_parameters_copy(AVCodecParameters *dst, const AVCodecParameters *src);
+
+/**
+ * This function is the same as av_get_audio_frame_duration(), except it works
+ * with AVCodecParameters instead of an AVCodecContext.
+ */
+int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes);
+
+/**
+ * @}
+ */
+
+#endif // AVCODEC_CODEC_PAR_H
diff --git a/media/ffvpx/libavcodec/dct.c b/media/ffvpx/libavcodec/dct.c
new file mode 100644
index 0000000000..eeb4d154e0
--- /dev/null
+++ b/media/ffvpx/libavcodec/dct.c
@@ -0,0 +1,228 @@
+/*
+ * (I)DCT Transforms
+ * Copyright (c) 2009 Peter Ross <pross@xvid.org>
+ * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ * Copyright (c) 2010 Vitor Sessak
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/**
+ * @file
+ * (Inverse) Discrete Cosine Transforms. These are also known as the
+ * type II and type III DCTs respectively.
+ */
+
+#include <math.h>
+#include <string.h>
+
+#include "libavutil/error.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/mem.h"
+#include "dct.h"
+#include "dct32.h"
+
+/* sin((M_PI * x / (2 * n)) */
+#define SIN(s, n, x) (s->costab[(n) - (x)])
+
+/* cos((M_PI * x / (2 * n)) */
+#define COS(s, n, x) (s->costab[x])
+
+static void dst_calc_I_c(DCTContext *ctx, FFTSample *data)
+{
+    int n = 1 << ctx->nbits;
+    int i;
+
+    data[0] = 0;
+    for (i = 1; i < n / 2; i++) {
+        float tmp1   = data[i    ];
+        float tmp2   = data[n - i];
+        float s      = SIN(ctx, n, 2 * i);
+
+        s           *= tmp1 + tmp2;
+        tmp1         = (tmp1 - tmp2) * 0.5f;
+        data[i]      = s + tmp1;
+        data[n - i]  = s - tmp1;
+    }
+
+    data[n / 2] *= 2;
+    ctx->rdft.rdft_calc(&ctx->rdft, data);
+
+    data[0] *= 0.5f;
+
+    for (i = 1; i < n - 2; i += 2) {
+        data[i + 1] +=  data[i - 1];
+        data[i]      = -data[i + 2];
+    }
+
+    data[n - 1] = 0;
+}
+
+static void dct_calc_I_c(DCTContext *ctx, FFTSample *data)
+{
+    int n = 1 << ctx->nbits;
+    int i;
+    float next = -0.5f * (data[0] - data[n]);
+
+    for (i = 0; i < n / 2; i++) {
+        float tmp1 = data[i];
+        float tmp2 = data[n - i];
+        float s    = SIN(ctx, n, 2 * i);
+        float c    = COS(ctx, n, 2 * i);
+
+        c *= tmp1 - tmp2;
+        s *= tmp1 - tmp2;
+
+        next += c;
+
+        tmp1        = (tmp1 + tmp2) * 0.5f;
+        data[i]     = tmp1 - s;
+        data[n - i] = tmp1 + s;
+    }
+
+    ctx->rdft.rdft_calc(&ctx->rdft, data);
+    data[n] = data[1];
+    data[1] = next;
+
+    for (i = 3; i <= n; i += 2)
+        data[i] = data[i - 2] - data[i];
+}
+
+static void dct_calc_III_c(DCTContext *ctx, FFTSample *data)
+{
+    int n = 1 << ctx->nbits;
+    int i;
+
+    float next  = data[n - 1];
+    float inv_n = 1.0f / n;
+
+    for (i = n - 2; i >= 2; i -= 2) {
+        float val1 = data[i];
+        float val2 = data[i - 1] - data[i + 1];
+        float c    = COS(ctx, n, i);
+        float s    = SIN(ctx, n, i);
+
+        data[i]     = c * val1 + s * val2;
+        data[i + 1] = s * val1 - c * val2;
+    }
+
+    data[1] = 2 * next;
+
+    ctx->rdft.rdft_calc(&ctx->rdft, data);
+
+    for (i = 0; i < n / 2; i++) {
+        float tmp1 = data[i]         * inv_n;
+        float tmp2 = data[n - i - 1] * inv_n;
+        float csc  = ctx->csc2[i] * (tmp1 - tmp2);
+
+        tmp1            += tmp2;
+        data[i]          = tmp1 + csc;
+        data[n - i - 1]  = tmp1 - csc;
+    }
+}
+
+static void dct_calc_II_c(DCTContext *ctx, FFTSample *data)
+{
+    int n = 1 << ctx->nbits;
+    int i;
+    float next;
+
+    for (i = 0; i < n / 2; i++) {
+        float tmp1 = data[i];
+        float tmp2 = data[n - i - 1];
+        float s    = SIN(ctx, n, 2 * i + 1);
+
+        s    *= tmp1 - tmp2;
+        tmp1  = (tmp1 + tmp2) * 0.5f;
+
+        data[i]     = tmp1 + s;
+        data[n-i-1] = tmp1 - s;
+    }
+
+    ctx->rdft.rdft_calc(&ctx->rdft, data);
+
+    next     = data[1] * 0.5;
+    data[1] *= -1;
+
+    for (i = n - 2; i >= 0; i -= 2) {
+        float inr = data[i    ];
+        float ini = data[i + 1];
+        float c   = COS(ctx, n, i);
+        float s   = SIN(ctx, n, i);
+
+        data[i]     = c * inr + s * ini;
+        data[i + 1] = next;
+
+        next += s * inr - c * ini;
+    }
+}
+
+static void dct32_func(DCTContext *ctx, FFTSample *data)
+{
+    ctx->dct32(data, data);
+}
+
+av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
+{
+    int n = 1 << nbits;
+    int i;
+    int ret;
+
+    memset(s, 0, sizeof(*s));
+
+    s->nbits   = nbits;
+    s->inverse = inverse;
+
+    if (inverse == DCT_II && nbits == 5) {
+        s->dct_calc = dct32_func;
+    } else {
+        ff_init_ff_cos_tabs(nbits + 2);
+
+        s->costab = ff_cos_tabs[nbits + 2];
+        s->csc2   = av_malloc_array(n / 2, sizeof(FFTSample));
+        if (!s->csc2)
+            return AVERROR(ENOMEM);
+
+        if ((ret = ff_rdft_init(&s->rdft, nbits, inverse == DCT_III)) < 0) {
+            av_freep(&s->csc2);
+            return ret;
+        }
+
+        for (i = 0; i < n / 2; i++)
+            s->csc2[i] = 0.5 / sin((M_PI / (2 * n) * (2 * i + 1)));
+
+        switch (inverse) {
+        case DCT_I  : s->dct_calc = dct_calc_I_c;   break;
+        case DCT_II : s->dct_calc = dct_calc_II_c;  break;
+        case DCT_III: s->dct_calc = dct_calc_III_c; break;
+        case DST_I  : s->dct_calc = dst_calc_I_c;   break;
+        }
+    }
+
+    s->dct32 = ff_dct32_float;
+#if ARCH_X86
+    ff_dct_init_x86(s);
+#endif
+
+    return 0;
+}
+
+av_cold void ff_dct_end(DCTContext *s)
+{
+    ff_rdft_end(&s->rdft);
+    av_freep(&s->csc2);
+}
diff --git a/media/ffvpx/libavcodec/dct.h b/media/ffvpx/libavcodec/dct.h
new file mode 100644
index 0000000000..0a03e256d1
--- /dev/null
+++ b/media/ffvpx/libavcodec/dct.h
@@ -0,0 +1,69 @@
+/*
+ * (I)DCT Transforms
+ * Copyright (c) 2009 Peter Ross <pross@xvid.org>
+ * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ * Copyright (c) 2010 Vitor Sessak
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#if !defined(AVCODEC_DCT_H) && (!defined(FFT_FLOAT) || FFT_FLOAT)
+#define AVCODEC_DCT_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "rdft.h"
+
+struct DCTContext {
+    int nbits;
+    int inverse;
+    RDFTContext rdft;
+    const float *costab;
+    FFTSample *csc2;
+    void (*dct_calc)(struct DCTContext *s, FFTSample *data);
+    void (*dct32)(FFTSample *out, const FFTSample *in);
+};
+
+/**
+ * Set up DCT.
+ * @param nbits           size of the input array:
+ *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
+ *                        (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+int  ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType type);
+void ff_dct_end (DCTContext *s);
+
+void ff_dct_init_x86(DCTContext *s);
+
+void ff_fdct_ifast(int16_t *data);
+void ff_fdct_ifast248(int16_t *data);
+void ff_jpeg_fdct_islow_8(int16_t *data);
+void ff_jpeg_fdct_islow_10(int16_t *data);
+void ff_fdct248_islow_8(int16_t *data);
+void ff_fdct248_islow_10(int16_t *data);
+
+void ff_j_rev_dct(int16_t *data);
+void ff_j_rev_dct4(int16_t *data);
+void ff_j_rev_dct2(int16_t *data);
+void ff_j_rev_dct1(int16_t *data);
+void ff_jref_idct_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_jref_idct_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+
+#endif /* AVCODEC_DCT_H */
diff --git a/media/ffvpx/libavcodec/dct32.h b/media/ffvpx/libavcodec/dct32.h
new file mode 100644
index 0000000000..61bf223a8d
--- /dev/null
+++ b/media/ffvpx/libavcodec/dct32.h
@@ -0,0 +1,25 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DCT32_H
+#define AVCODEC_DCT32_H
+
+void ff_dct32_float(float *dst, const float *src);
+void ff_dct32_fixed(int *dst, const int *src);
+
+#endif /* AVCODEC_DCT32_H */
diff --git a/media/ffvpx/libavcodec/dct32_fixed.c b/media/ffvpx/libavcodec/dct32_fixed.c
new file mode 100644
index 0000000000..9025d5efdd
--- /dev/null
+++ b/media/ffvpx/libavcodec/dct32_fixed.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define DCT32_FLOAT 0
+#include "dct32_template.c"
diff --git a/media/ffvpx/libavcodec/dct32_float.c b/media/ffvpx/libavcodec/dct32_float.c
new file mode 100644
index 0000000000..597c9bb639
--- /dev/null
+++ b/media/ffvpx/libavcodec/dct32_float.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define DCT32_FLOAT 1
+#include "dct32_template.c"
diff --git a/media/ffvpx/libavcodec/dct32_template.c b/media/ffvpx/libavcodec/dct32_template.c
new file mode 100644
index 0000000000..51cebc053f
--- /dev/null
+++ b/media/ffvpx/libavcodec/dct32_template.c
@@ -0,0 +1,288 @@
+/*
+ * Template for the Discrete Cosine Transform for 32 samples
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dct32.h"
+#include "mathops.h"
+#include "libavutil/internal.h"
+
+#ifdef CHECKED
+#define SUINT   int
+#define SUINT32 int32_t
+#else
+#define SUINT   unsigned
+#define SUINT32 uint32_t
+#endif
+
+#if DCT32_FLOAT
+#   define dct32 ff_dct32_float
+#   define FIXHR(x)       ((float)(x))
+#   define MULH3(x, y, s) ((s)*(y)*(x))
+#   define INTFLOAT float
+#   define SUINTFLOAT float
+#else
+#   define dct32 ff_dct32_fixed
+#   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
+#   define MULH3(x, y, s) MULH((s)*(x), y)
+#   define INTFLOAT int
+#   define SUINTFLOAT SUINT
+#endif
+
+
+/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
+
+/* cos(i*pi/64) */
+
+#define COS0_0  FIXHR(0.50060299823519630134/2)
+#define COS0_1  FIXHR(0.50547095989754365998/2)
+#define COS0_2  FIXHR(0.51544730992262454697/2)
+#define COS0_3  FIXHR(0.53104259108978417447/2)
+#define COS0_4  FIXHR(0.55310389603444452782/2)
+#define COS0_5  FIXHR(0.58293496820613387367/2)
+#define COS0_6  FIXHR(0.62250412303566481615/2)
+#define COS0_7  FIXHR(0.67480834145500574602/2)
+#define COS0_8  FIXHR(0.74453627100229844977/2)
+#define COS0_9  FIXHR(0.83934964541552703873/2)
+#define COS0_10 FIXHR(0.97256823786196069369/2)
+#define COS0_11 FIXHR(1.16943993343288495515/4)
+#define COS0_12 FIXHR(1.48416461631416627724/4)
+#define COS0_13 FIXHR(2.05778100995341155085/8)
+#define COS0_14 FIXHR(3.40760841846871878570/8)
+#define COS0_15 FIXHR(10.19000812354805681150/32)
+
+#define COS1_0 FIXHR(0.50241928618815570551/2)
+#define COS1_1 FIXHR(0.52249861493968888062/2)
+#define COS1_2 FIXHR(0.56694403481635770368/2)
+#define COS1_3 FIXHR(0.64682178335999012954/2)
+#define COS1_4 FIXHR(0.78815462345125022473/2)
+#define COS1_5 FIXHR(1.06067768599034747134/4)
+#define COS1_6 FIXHR(1.72244709823833392782/4)
+#define COS1_7 FIXHR(5.10114861868916385802/16)
+
+#define COS2_0 FIXHR(0.50979557910415916894/2)
+#define COS2_1 FIXHR(0.60134488693504528054/2)
+#define COS2_2 FIXHR(0.89997622313641570463/2)
+#define COS2_3 FIXHR(2.56291544774150617881/8)
+
+#define COS3_0 FIXHR(0.54119610014619698439/2)
+#define COS3_1 FIXHR(1.30656296487637652785/4)
+
+#define COS4_0 FIXHR(M_SQRT1_2/2)
+
+/* butterfly operator */
+#define BF(a, b, c, s)\
+{\
+    tmp0 = val##a + val##b;\
+    tmp1 = val##a - val##b;\
+    val##a = tmp0;\
+    val##b = MULH3(tmp1, c, 1<<(s));\
+}
+
+#define BF0(a, b, c, s)\
+{\
+    tmp0 = tab[a] + tab[b];\
+    tmp1 = tab[a] - tab[b];\
+    val##a = tmp0;\
+    val##b = MULH3(tmp1, c, 1<<(s));\
+}
+
+#define BF1(a, b, c, d)\
+{\
+    BF(a, b, COS4_0, 1);\
+    BF(c, d,-COS4_0, 1);\
+    val##c += val##d;\
+}
+
+#define BF2(a, b, c, d)\
+{\
+    BF(a, b, COS4_0, 1);\
+    BF(c, d,-COS4_0, 1);\
+    val##c += val##d;\
+    val##a += val##c;\
+    val##c += val##b;\
+    val##b += val##d;\
+}
+
+#define ADD(a, b) val##a += val##b
+
+/* DCT32 without 1/sqrt(2) coef zero scaling. */
+void dct32(INTFLOAT *out, const INTFLOAT *tab_arg)
+{
+    const SUINTFLOAT *tab = tab_arg;
+    SUINTFLOAT tmp0, tmp1;
+
+    SUINTFLOAT val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7 ,
+             val8 , val9 , val10, val11, val12, val13, val14, val15,
+             val16, val17, val18, val19, val20, val21, val22, val23,
+             val24, val25, val26, val27, val28, val29, val30, val31;
+
+    /* pass 1 */
+    BF0( 0, 31, COS0_0 , 1);
+    BF0(15, 16, COS0_15, 5);
+    /* pass 2 */
+    BF( 0, 15, COS1_0 , 1);
+    BF(16, 31,-COS1_0 , 1);
+    /* pass 1 */
+    BF0( 7, 24, COS0_7 , 1);
+    BF0( 8, 23, COS0_8 , 1);
+    /* pass 2 */
+    BF( 7,  8, COS1_7 , 4);
+    BF(23, 24,-COS1_7 , 4);
+    /* pass 3 */
+    BF( 0,  7, COS2_0 , 1);
+    BF( 8, 15,-COS2_0 , 1);
+    BF(16, 23, COS2_0 , 1);
+    BF(24, 31,-COS2_0 , 1);
+    /* pass 1 */
+    BF0( 3, 28, COS0_3 , 1);
+    BF0(12, 19, COS0_12, 2);
+    /* pass 2 */
+    BF( 3, 12, COS1_3 , 1);
+    BF(19, 28,-COS1_3 , 1);
+    /* pass 1 */
+    BF0( 4, 27, COS0_4 , 1);
+    BF0(11, 20, COS0_11, 2);
+    /* pass 2 */
+    BF( 4, 11, COS1_4 , 1);
+    BF(20, 27,-COS1_4 , 1);
+    /* pass 3 */
+    BF( 3,  4, COS2_3 , 3);
+    BF(11, 12,-COS2_3 , 3);
+    BF(19, 20, COS2_3 , 3);
+    BF(27, 28,-COS2_3 , 3);
+    /* pass 4 */
+    BF( 0,  3, COS3_0 , 1);
+    BF( 4,  7,-COS3_0 , 1);
+    BF( 8, 11, COS3_0 , 1);
+    BF(12, 15,-COS3_0 , 1);
+    BF(16, 19, COS3_0 , 1);
+    BF(20, 23,-COS3_0 , 1);
+    BF(24, 27, COS3_0 , 1);
+    BF(28, 31,-COS3_0 , 1);
+
+
+
+    /* pass 1 */
+    BF0( 1, 30, COS0_1 , 1);
+    BF0(14, 17, COS0_14, 3);
+    /* pass 2 */
+    BF( 1, 14, COS1_1 , 1);
+    BF(17, 30,-COS1_1 , 1);
+    /* pass 1 */
+    BF0( 6, 25, COS0_6 , 1);
+    BF0( 9, 22, COS0_9 , 1);
+    /* pass 2 */
+    BF( 6,  9, COS1_6 , 2);
+    BF(22, 25,-COS1_6 , 2);
+    /* pass 3 */
+    BF( 1,  6, COS2_1 , 1);
+    BF( 9, 14,-COS2_1 , 1);
+    BF(17, 22, COS2_1 , 1);
+    BF(25, 30,-COS2_1 , 1);
+
+    /* pass 1 */
+    BF0( 2, 29, COS0_2 , 1);
+    BF0(13, 18, COS0_13, 3);
+    /* pass 2 */
+    BF( 2, 13, COS1_2 , 1);
+    BF(18, 29,-COS1_2 , 1);
+    /* pass 1 */
+    BF0( 5, 26, COS0_5 , 1);
+    BF0(10, 21, COS0_10, 1);
+    /* pass 2 */
+    BF( 5, 10, COS1_5 , 2);
+    BF(21, 26,-COS1_5 , 2);
+    /* pass 3 */
+    BF( 2,  5, COS2_2 , 1);
+    BF(10, 13,-COS2_2 , 1);
+    BF(18, 21, COS2_2 , 1);
+    BF(26, 29,-COS2_2 , 1);
+    /* pass 4 */
+    BF( 1,  2, COS3_1 , 2);
+    BF( 5,  6,-COS3_1 , 2);
+    BF( 9, 10, COS3_1 , 2);
+    BF(13, 14,-COS3_1 , 2);
+    BF(17, 18, COS3_1 , 2);
+    BF(21, 22,-COS3_1 , 2);
+    BF(25, 26, COS3_1 , 2);
+    BF(29, 30,-COS3_1 , 2);
+
+    /* pass 5 */
+    BF1( 0,  1,  2,  3);
+    BF2( 4,  5,  6,  7);
+    BF1( 8,  9, 10, 11);
+    BF2(12, 13, 14, 15);
+    BF1(16, 17, 18, 19);
+    BF2(20, 21, 22, 23);
+    BF1(24, 25, 26, 27);
+    BF2(28, 29, 30, 31);
+
+    /* pass 6 */
+
+    ADD( 8, 12);
+    ADD(12, 10);
+    ADD(10, 14);
+    ADD(14,  9);
+    ADD( 9, 13);
+    ADD(13, 11);
+    ADD(11, 15);
+
+    out[ 0] = val0;
+    out[16] = val1;
+    out[ 8] = val2;
+    out[24] = val3;
+    out[ 4] = val4;
+    out[20] = val5;
+    out[12] = val6;
+    out[28] = val7;
+    out[ 2] = val8;
+    out[18] = val9;
+    out[10] = val10;
+    out[26] = val11;
+    out[ 6] = val12;
+    out[22] = val13;
+    out[14] = val14;
+    out[30] = val15;
+
+    ADD(24, 28);
+    ADD(28, 26);
+    ADD(26, 30);
+    ADD(30, 25);
+    ADD(25, 29);
+    ADD(29, 27);
+    ADD(27, 31);
+
+    out[ 1] = val16 + val24;
+    out[17] = val17 + val25;
+    out[ 9] = val18 + val26;
+    out[25] = val19 + val27;
+    out[ 5] = val20 + val28;
+    out[21] = val21 + val29;
+    out[13] = val22 + val30;
+    out[29] = val23 + val31;
+    out[ 3] = val24 + val20;
+    out[19] = val25 + val21;
+    out[11] = val26 + val22;
+    out[27] = val27 + val23;
+    out[ 7] = val28 + val18;
+    out[23] = val29 + val19;
+    out[15] = val30 + val17;
+    out[31] = val31;
+}
diff --git a/media/ffvpx/libavcodec/decode.c b/media/ffvpx/libavcodec/decode.c
new file mode 100644
index 0000000000..be2be81089
--- /dev/null
+++ b/media/ffvpx/libavcodec/decode.c
@@ -0,0 +1,1687 @@
+/*
+ * generic decoding-related code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "config.h"
+
+#if CONFIG_ICONV
+# include <iconv.h>
+#endif
+
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/common.h"
+#include "libavutil/fifo.h"
+#include "libavutil/frame.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/intmath.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "bsf.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "hwconfig.h"
+#include "internal.h"
+#include "thread.h"
+
+static int apply_param_change(AVCodecContext *avctx, const AVPacket *avpkt)
+{
+    int ret;
+    size_t size;
+    const uint8_t *data;
+    uint32_t flags;
+    int64_t val;
+
+    data = av_packet_get_side_data(avpkt, AV_PKT_DATA_PARAM_CHANGE, &size);
+    if (!data)
+        return 0;
+
+    if (!(avctx->codec->capabilities & AV_CODEC_CAP_PARAM_CHANGE)) {
+        av_log(avctx, AV_LOG_ERROR, "This decoder does not support parameter "
+               "changes, but PARAM_CHANGE side data was sent to it.\n");
+        ret = AVERROR(EINVAL);
+        goto fail2;
+    }
+
+    if (size < 4)
+        goto fail;
+
+    flags = bytestream_get_le32(&data);
+    size -= 4;
+
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT) {
+        if (size < 4)
+            goto fail;
+        val = bytestream_get_le32(&data);
+        if (val <= 0 || val > INT_MAX) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid channel count");
+            ret = AVERROR_INVALIDDATA;
+            goto fail2;
+        }
+        avctx->channels = val;
+        size -= 4;
+    }
+    if (flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT) {
+        if (size < 8)
+            goto fail;
+        avctx->channel_layout = bytestream_get_le64(&data);
+        size -= 8;
+    }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    if (flags & AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE) {
+        if (size < 4)
+            goto fail;
+        val = bytestream_get_le32(&data);
+        if (val <= 0 || val > INT_MAX) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid sample rate");
+            ret = AVERROR_INVALIDDATA;
+            goto fail2;
+        }
+        avctx->sample_rate = val;
+        size -= 4;
+    }
+    if (flags & AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS) {
+        if (size < 8)
+            goto fail;
+        avctx->width  = bytestream_get_le32(&data);
+        avctx->height = bytestream_get_le32(&data);
+        size -= 8;
+        ret = ff_set_dimensions(avctx, avctx->width, avctx->height);
+        if (ret < 0)
+            goto fail2;
+    }
+
+    return 0;
+fail:
+    av_log(avctx, AV_LOG_ERROR, "PARAM_CHANGE side data too small.\n");
+    ret = AVERROR_INVALIDDATA;
+fail2:
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Error applying parameter changes.\n");
+        if (avctx->err_recognition & AV_EF_EXPLODE)
+            return ret;
+    }
+    return 0;
+}
+
+static int extract_packet_props(AVCodecInternal *avci, const AVPacket *pkt)
+{
+    int ret = 0;
+
+    av_packet_unref(avci->last_pkt_props);
+    if (pkt) {
+        ret = av_packet_copy_props(avci->last_pkt_props, pkt);
+        if (!ret)
+            avci->last_pkt_props->opaque = (void *)(intptr_t)pkt->size; // Needed for ff_decode_frame_props().
+    }
+    return ret;
+}
+
+static int decode_bsfs_init(AVCodecContext *avctx)
+{
+    AVCodecInternal *avci = avctx->internal;
+    const FFCodec *const codec = ffcodec(avctx->codec);
+    int ret;
+
+    if (avci->bsf)
+        return 0;
+
+    ret = av_bsf_list_parse_str(codec->bsfs, &avci->bsf);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Error parsing decoder bitstream filters '%s': %s\n", codec->bsfs, av_err2str(ret));
+        if (ret != AVERROR(ENOMEM))
+            ret = AVERROR_BUG;
+        goto fail;
+    }
+
+    /* We do not currently have an API for passing the input timebase into decoders,
+     * but no filters used here should actually need it.
+     * So we make up some plausible-looking number (the MPEG 90kHz timebase) */
+    avci->bsf->time_base_in = (AVRational){ 1, 90000 };
+    ret = avcodec_parameters_from_context(avci->bsf->par_in, avctx);
+    if (ret < 0)
+        goto fail;
+
+    ret = av_bsf_init(avci->bsf);
+    if (ret < 0)
+        goto fail;
+
+    return 0;
+fail:
+    av_bsf_free(&avci->bsf);
+    return ret;
+}
+
+int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
+{
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
+    if (avci->draining)
+        return AVERROR_EOF;
+
+    ret = av_bsf_receive_packet(avci->bsf, pkt);
+    if (ret == AVERROR_EOF)
+        avci->draining = 1;
+    if (ret < 0)
+        return ret;
+
+    if (!(ffcodec(avctx->codec)->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) {
+        ret = extract_packet_props(avctx->internal, pkt);
+        if (ret < 0)
+            goto finish;
+    }
+
+    ret = apply_param_change(avctx, pkt);
+    if (ret < 0)
+        goto finish;
+
+    return 0;
+finish:
+    av_packet_unref(pkt);
+    return ret;
+}
+
+/**
+ * Attempt to guess proper monotonic timestamps for decoded video frames
+ * which might have incorrect times. Input timestamps may wrap around, in
+ * which case the output will as well.
+ *
+ * @param pts the pts field of the decoded AVPacket, as passed through
+ * AVFrame.pts
+ * @param dts the dts field of the decoded AVPacket
+ * @return one of the input values, may be AV_NOPTS_VALUE
+ */
+static int64_t guess_correct_pts(AVCodecContext *ctx,
+                                 int64_t reordered_pts, int64_t dts)
+{
+    int64_t pts = AV_NOPTS_VALUE;
+
+    if (dts != AV_NOPTS_VALUE) {
+        ctx->pts_correction_num_faulty_dts += dts <= ctx->pts_correction_last_dts;
+        ctx->pts_correction_last_dts = dts;
+    } else if (reordered_pts != AV_NOPTS_VALUE)
+        ctx->pts_correction_last_dts = reordered_pts;
+
+    if (reordered_pts != AV_NOPTS_VALUE) {
+        ctx->pts_correction_num_faulty_pts += reordered_pts <= ctx->pts_correction_last_pts;
+        ctx->pts_correction_last_pts = reordered_pts;
+    } else if(dts != AV_NOPTS_VALUE)
+        ctx->pts_correction_last_pts = dts;
+
+    if ((ctx->pts_correction_num_faulty_pts<=ctx->pts_correction_num_faulty_dts || dts == AV_NOPTS_VALUE)
+       && reordered_pts != AV_NOPTS_VALUE)
+        pts = reordered_pts;
+    else
+        pts = dts;
+
+    return pts;
+}
+
+/*
+ * The core of the receive_frame_wrapper for the decoders implementing
+ * the simple API. Certain decoders might consume partial packets without
+ * returning any output, so this function needs to be called in a loop until it
+ * returns EAGAIN.
+ **/
+static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame, int64_t *discarded_samples)
+{
+    AVCodecInternal   *avci = avctx->internal;
+    AVPacket     *const pkt = avci->in_pkt;
+    const FFCodec *const codec = ffcodec(avctx->codec);
+    int got_frame, actual_got_frame;
+    int ret;
+
+    if (!pkt->data && !avci->draining) {
+        av_packet_unref(pkt);
+        ret = ff_decode_get_packet(avctx, pkt);
+        if (ret < 0 && ret != AVERROR_EOF)
+            return ret;
+    }
+
+    // Some codecs (at least wma lossless) will crash when feeding drain packets
+    // after EOF was signaled.
+    if (avci->draining_done)
+        return AVERROR_EOF;
+
+    if (!pkt->data &&
+        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
+          avctx->active_thread_type & FF_THREAD_FRAME))
+        return AVERROR_EOF;
+
+    got_frame = 0;
+
+    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) {
+        ret = ff_thread_decode_frame(avctx, frame, &got_frame, pkt);
+    } else {
+        ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
+
+        if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
+            frame->pkt_dts = pkt->dts;
+        if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+            if(!avctx->has_b_frames)
+                frame->pkt_pos = pkt->pos;
+            //FIXME these should be under if(!avctx->has_b_frames)
+            /* get_buffer is supposed to set frame parameters */
+            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
+                if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
+                if (!frame->width)                    frame->width               = avctx->width;
+                if (!frame->height)                   frame->height              = avctx->height;
+                if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
+            }
+        }
+    }
+    emms_c();
+    actual_got_frame = got_frame;
+
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if (frame->flags & AV_FRAME_FLAG_DISCARD)
+            got_frame = 0;
+    } else if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
+        uint8_t *side;
+        size_t side_size;
+        uint32_t discard_padding = 0;
+        uint8_t skip_reason = 0;
+        uint8_t discard_reason = 0;
+
+        if (ret >= 0 && got_frame) {
+            if (frame->format == AV_SAMPLE_FMT_NONE)
+                frame->format = avctx->sample_fmt;
+            if (!frame->ch_layout.nb_channels) {
+                int ret2 = av_channel_layout_copy(&frame->ch_layout, &avctx->ch_layout);
+                if (ret2 < 0) {
+                    ret = ret2;
+                    got_frame = 0;
+                }
+            }
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+            if (!frame->channel_layout)
+                frame->channel_layout = avctx->ch_layout.order == AV_CHANNEL_ORDER_NATIVE ?
+                                        avctx->ch_layout.u.mask : 0;
+            if (!frame->channels)
+                frame->channels = avctx->ch_layout.nb_channels;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+            if (!frame->sample_rate)
+                frame->sample_rate = avctx->sample_rate;
+        }
+
+        side= av_packet_get_side_data(avci->last_pkt_props, AV_PKT_DATA_SKIP_SAMPLES, &side_size);
+        if(side && side_size>=10) {
+            avci->skip_samples = AV_RL32(side);
+            avci->skip_samples = FFMAX(0, avci->skip_samples);
+            discard_padding = AV_RL32(side + 4);
+            av_log(avctx, AV_LOG_DEBUG, "skip %d / discard %d samples due to side data\n",
+                   avci->skip_samples, (int)discard_padding);
+            skip_reason = AV_RL8(side + 8);
+            discard_reason = AV_RL8(side + 9);
+        }
+
+        if ((frame->flags & AV_FRAME_FLAG_DISCARD) && got_frame &&
+            !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
+            avci->skip_samples = FFMAX(0, avci->skip_samples - frame->nb_samples);
+            got_frame = 0;
+            *discarded_samples += frame->nb_samples;
+        }
+
+        if (avci->skip_samples > 0 && got_frame &&
+            !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
+            if(frame->nb_samples <= avci->skip_samples){
+                got_frame = 0;
+                *discarded_samples += frame->nb_samples;
+                avci->skip_samples -= frame->nb_samples;
+                av_log(avctx, AV_LOG_DEBUG, "skip whole frame, skip left: %d\n",
+                       avci->skip_samples);
+            } else {
+                av_samples_copy(frame->extended_data, frame->extended_data, 0, avci->skip_samples,
+                                frame->nb_samples - avci->skip_samples, avctx->ch_layout.nb_channels, frame->format);
+                if(avctx->pkt_timebase.num && avctx->sample_rate) {
+                    int64_t diff_ts = av_rescale_q(avci->skip_samples,
+                                                   (AVRational){1, avctx->sample_rate},
+                                                   avctx->pkt_timebase);
+                    if(frame->pts!=AV_NOPTS_VALUE)
+                        frame->pts += diff_ts;
+                    if(frame->pkt_dts!=AV_NOPTS_VALUE)
+                        frame->pkt_dts += diff_ts;
+                    if (frame->duration >= diff_ts)
+                        frame->duration -= diff_ts;
+                } else {
+                    av_log(avctx, AV_LOG_WARNING, "Could not update timestamps for skipped samples.\n");
+                }
+                av_log(avctx, AV_LOG_DEBUG, "skip %d/%d samples\n",
+                       avci->skip_samples, frame->nb_samples);
+                *discarded_samples += avci->skip_samples;
+                frame->nb_samples -= avci->skip_samples;
+                avci->skip_samples = 0;
+            }
+        }
+
+        if (discard_padding > 0 && discard_padding <= frame->nb_samples && got_frame &&
+            !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
+            if (discard_padding == frame->nb_samples) {
+                *discarded_samples += frame->nb_samples;
+                got_frame = 0;
+            } else {
+                if(avctx->pkt_timebase.num && avctx->sample_rate) {
+                    int64_t diff_ts = av_rescale_q(frame->nb_samples - discard_padding,
+                                                   (AVRational){1, avctx->sample_rate},
+                                                   avctx->pkt_timebase);
+                    frame->duration = diff_ts;
+                } else {
+                    av_log(avctx, AV_LOG_WARNING, "Could not update timestamps for discarded samples.\n");
+                }
+                av_log(avctx, AV_LOG_DEBUG, "discard %d/%d samples\n",
+                       (int)discard_padding, frame->nb_samples);
+                frame->nb_samples -= discard_padding;
+            }
+        }
+
+        if ((avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL) && got_frame) {
+            AVFrameSideData *fside = av_frame_new_side_data(frame, AV_FRAME_DATA_SKIP_SAMPLES, 10);
+            if (fside) {
+                AV_WL32(fside->data, avci->skip_samples);
+                AV_WL32(fside->data + 4, discard_padding);
+                AV_WL8(fside->data + 8, skip_reason);
+                AV_WL8(fside->data + 9, discard_reason);
+                avci->skip_samples = 0;
+            }
+        }
+    }
+
+    if (avctx->codec->type == AVMEDIA_TYPE_AUDIO &&
+        !avci->showed_multi_packet_warning &&
+        ret >= 0 && ret != pkt->size && !(avctx->codec->capabilities & AV_CODEC_CAP_SUBFRAMES)) {
+        av_log(avctx, AV_LOG_WARNING, "Multiple frames in a packet.\n");
+        avci->showed_multi_packet_warning = 1;
+    }
+
+    if (!got_frame)
+        av_frame_unref(frame);
+
+    if (ret >= 0 && avctx->codec->type == AVMEDIA_TYPE_VIDEO)
+        ret = pkt->size;
+
+    /* do not stop draining when actual_got_frame != 0 or ret < 0 */
+    /* got_frame == 0 but actual_got_frame != 0 when frame is discarded */
+    if (avci->draining && !actual_got_frame) {
+        if (ret < 0) {
+            /* prevent infinite loop if a decoder wrongly always return error on draining */
+            /* reasonable nb_errors_max = maximum b frames + thread count */
+            int nb_errors_max = 20 + (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME ?
+                                avctx->thread_count : 1);
+
+            if (avci->nb_draining_errors++ >= nb_errors_max) {
+                av_log(avctx, AV_LOG_ERROR, "Too many errors when draining, this is a bug. "
+                       "Stop draining and force EOF.\n");
+                avci->draining_done = 1;
+                ret = AVERROR_BUG;
+            }
+        } else {
+            avci->draining_done = 1;
+        }
+    }
+
+    if (ret >= pkt->size || ret < 0) {
+        av_packet_unref(pkt);
+    } else {
+        int consumed = ret;
+
+        pkt->data                += consumed;
+        pkt->size                -= consumed;
+        pkt->pts                  = AV_NOPTS_VALUE;
+        pkt->dts                  = AV_NOPTS_VALUE;
+        if (!(codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) {
+            // See extract_packet_props() comment.
+            avci->last_pkt_props->opaque = (void *)((intptr_t)avci->last_pkt_props->opaque - consumed);
+            avci->last_pkt_props->pts = AV_NOPTS_VALUE;
+            avci->last_pkt_props->dts = AV_NOPTS_VALUE;
+        }
+    }
+
+    if (got_frame)
+        av_assert0(frame->buf[0]);
+
+    return ret < 0 ? ret : 0;
+}
+
+#if CONFIG_LCMS2
+static int detect_colorspace(AVCodecContext *avctx, AVFrame *frame)
+{
+    AVCodecInternal *avci = avctx->internal;
+    enum AVColorTransferCharacteristic trc;
+    AVColorPrimariesDesc coeffs;
+    enum AVColorPrimaries prim;
+    cmsHPROFILE profile;
+    AVFrameSideData *sd;
+    int ret;
+    if (!(avctx->flags2 & AV_CODEC_FLAG2_ICC_PROFILES))
+        return 0;
+
+    sd = av_frame_get_side_data(frame, AV_FRAME_DATA_ICC_PROFILE);
+    if (!sd || !sd->size)
+        return 0;
+
+    if (!avci->icc.avctx) {
+        ret = ff_icc_context_init(&avci->icc, avctx);
+        if (ret < 0)
+            return ret;
+    }
+
+    profile = cmsOpenProfileFromMemTHR(avci->icc.ctx, sd->data, sd->size);
+    if (!profile)
+        return AVERROR_INVALIDDATA;
+
+    ret = ff_icc_profile_read_primaries(&avci->icc, profile, &coeffs);
+    if (!ret)
+        ret = ff_icc_profile_detect_transfer(&avci->icc, profile, &trc);
+    cmsCloseProfile(profile);
+    if (ret < 0)
+        return ret;
+
+    prim = av_csp_primaries_id_from_desc(&coeffs);
+    if (prim != AVCOL_PRI_UNSPECIFIED)
+        frame->color_primaries = prim;
+    if (trc != AVCOL_TRC_UNSPECIFIED)
+        frame->color_trc = trc;
+    return 0;
+}
+#else /* !CONFIG_LCMS2 */
+static int detect_colorspace(av_unused AVCodecContext *c, av_unused AVFrame *f)
+{
+    return 0;
+}
+#endif
+
+static int decode_simple_receive_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    int ret;
+    int64_t discarded_samples = 0;
+
+    while (!frame->buf[0]) {
+        if (discarded_samples > avctx->max_samples)
+            return AVERROR(EAGAIN);
+        ret = decode_simple_internal(avctx, frame, &discarded_samples);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+{
+    AVCodecInternal *avci = avctx->internal;
+    const FFCodec *const codec = ffcodec(avctx->codec);
+    int ret, ok;
+
+    av_assert0(!frame->buf[0]);
+
+    if (codec->cb_type == FF_CODEC_CB_TYPE_RECEIVE_FRAME) {
+        ret = codec->cb.receive_frame(avctx, frame);
+    } else
+        ret = decode_simple_receive_frame(avctx, frame);
+
+    if (ret == AVERROR_EOF)
+        avci->draining_done = 1;
+
+    /* preserve ret */
+    ok = detect_colorspace(avctx, frame);
+    if (ok < 0) {
+        av_frame_unref(frame);
+        return ok;
+    }
+
+    if (!ret) {
+        frame->best_effort_timestamp = guess_correct_pts(avctx,
+                                                         frame->pts,
+                                                         frame->pkt_dts);
+
+#if FF_API_PKT_DURATION
+FF_DISABLE_DEPRECATION_WARNINGS
+        frame->pkt_duration = frame->duration;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+        /* the only case where decode data is not set should be decoders
+         * that do not call ff_get_buffer() */
+        av_assert0((frame->private_ref && frame->private_ref->size == sizeof(FrameDecodeData)) ||
+                   !(avctx->codec->capabilities & AV_CODEC_CAP_DR1));
+
+        if (frame->private_ref) {
+            FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
+
+            if (fdd->post_process) {
+                ret = fdd->post_process(avctx, frame);
+                if (ret < 0) {
+                    av_frame_unref(frame);
+                    return ret;
+                }
+            }
+        }
+    }
+
+    /* free the per-frame decode data */
+    av_buffer_unref(&frame->private_ref);
+
+    return ret;
+}
+
+int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt)
+{
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
+    if (!avcodec_is_open(avctx) || !av_codec_is_decoder(avctx->codec))
+        return AVERROR(EINVAL);
+
+    if (avctx->internal->draining)
+        return AVERROR_EOF;
+
+    if (avpkt && !avpkt->size && avpkt->data)
+        return AVERROR(EINVAL);
+
+    av_packet_unref(avci->buffer_pkt);
+    if (avpkt && (avpkt->data || avpkt->side_data_elems)) {
+        ret = av_packet_ref(avci->buffer_pkt, avpkt);
+        if (ret < 0)
+            return ret;
+    }
+
+    ret = av_bsf_send_packet(avci->bsf, avci->buffer_pkt);
+    if (ret < 0) {
+        av_packet_unref(avci->buffer_pkt);
+        return ret;
+    }
+
+    if (!avci->buffer_frame->buf[0]) {
+        ret = decode_receive_frame_internal(avctx, avci->buffer_frame);
+        if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
+            return ret;
+    }
+
+    return 0;
+}
+
+static int apply_cropping(AVCodecContext *avctx, AVFrame *frame)
+{
+    /* make sure we are noisy about decoders returning invalid cropping data */
+    if (frame->crop_left >= INT_MAX - frame->crop_right        ||
+        frame->crop_top  >= INT_MAX - frame->crop_bottom       ||
+        (frame->crop_left + frame->crop_right) >= frame->width ||
+        (frame->crop_top + frame->crop_bottom) >= frame->height) {
+        av_log(avctx, AV_LOG_WARNING,
+               "Invalid cropping information set by a decoder: "
+               "%"SIZE_SPECIFIER"/%"SIZE_SPECIFIER"/%"SIZE_SPECIFIER"/%"SIZE_SPECIFIER" "
+               "(frame size %dx%d). This is a bug, please report it\n",
+               frame->crop_left, frame->crop_right, frame->crop_top, frame->crop_bottom,
+               frame->width, frame->height);
+        frame->crop_left   = 0;
+        frame->crop_right  = 0;
+        frame->crop_top    = 0;
+        frame->crop_bottom = 0;
+        return 0;
+    }
+
+    if (!avctx->apply_cropping)
+        return 0;
+
+    return av_frame_apply_cropping(frame, avctx->flags & AV_CODEC_FLAG_UNALIGNED ?
+                                          AV_FRAME_CROP_UNALIGNED : 0);
+}
+
+// make sure frames returned to the caller are valid
+static int frame_validate(AVCodecContext *avctx, AVFrame *frame)
+{
+    if (!frame->buf[0] || frame->format < 0)
+        goto fail;
+
+    switch (avctx->codec_type) {
+    case AVMEDIA_TYPE_VIDEO:
+        if (frame->width <= 0 || frame->height <= 0)
+            goto fail;
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        if (!av_channel_layout_check(&frame->ch_layout) ||
+            frame->sample_rate <= 0)
+            goto fail;
+
+        break;
+    default: av_assert0(0);
+    }
+
+    return 0;
+fail:
+    av_log(avctx, AV_LOG_ERROR, "An invalid frame was output by a decoder. "
+           "This is a bug, please report it.\n");
+    return AVERROR_BUG;
+}
+
+int ff_decode_receive_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    AVCodecInternal *avci = avctx->internal;
+    int ret, changed;
+
+    if (!avcodec_is_open(avctx) || !av_codec_is_decoder(avctx->codec))
+        return AVERROR(EINVAL);
+
+    if (avci->buffer_frame->buf[0]) {
+        av_frame_move_ref(frame, avci->buffer_frame);
+    } else {
+        ret = decode_receive_frame_internal(avctx, frame);
+        if (ret < 0)
+            return ret;
+    }
+
+    ret = frame_validate(avctx, frame);
+    if (ret < 0)
+        goto fail;
+
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+        ret = apply_cropping(avctx, frame);
+        if (ret < 0)
+            goto fail;
+    }
+
+    avctx->frame_num++;
+#if FF_API_AVCTX_FRAME_NUMBER
+FF_DISABLE_DEPRECATION_WARNINGS
+    avctx->frame_number = avctx->frame_num;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (avctx->flags & AV_CODEC_FLAG_DROPCHANGED) {
+
+        if (avctx->frame_num == 1) {
+            avci->initial_format = frame->format;
+            switch(avctx->codec_type) {
+            case AVMEDIA_TYPE_VIDEO:
+                avci->initial_width  = frame->width;
+                avci->initial_height = frame->height;
+                break;
+            case AVMEDIA_TYPE_AUDIO:
+                avci->initial_sample_rate = frame->sample_rate ? frame->sample_rate :
+                                                                 avctx->sample_rate;
+                ret = av_channel_layout_copy(&avci->initial_ch_layout, &frame->ch_layout);
+                if (ret < 0)
+                    goto fail;
+                break;
+            }
+        }
+
+        if (avctx->frame_num > 1) {
+            changed = avci->initial_format != frame->format;
+
+            switch(avctx->codec_type) {
+            case AVMEDIA_TYPE_VIDEO:
+                changed |= avci->initial_width  != frame->width ||
+                           avci->initial_height != frame->height;
+                break;
+            case AVMEDIA_TYPE_AUDIO:
+                changed |= avci->initial_sample_rate    != frame->sample_rate ||
+                           avci->initial_sample_rate    != avctx->sample_rate ||
+                           av_channel_layout_compare(&avci->initial_ch_layout, &frame->ch_layout);
+                break;
+            }
+
+            if (changed) {
+                avci->changed_frames_dropped++;
+                av_log(avctx, AV_LOG_INFO, "dropped changed frame #%"PRId64" pts %"PRId64
+                                            " drop count: %d \n",
+                                            avctx->frame_num, frame->pts,
+                                            avci->changed_frames_dropped);
+                ret = AVERROR_INPUT_CHANGED;
+                goto fail;
+            }
+        }
+    }
+    return 0;
+fail:
+    av_frame_unref(frame);
+    return ret;
+}
+
+static void get_subtitle_defaults(AVSubtitle *sub)
+{
+    memset(sub, 0, sizeof(*sub));
+    sub->pts = AV_NOPTS_VALUE;
+}
+
+#define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */
+static int recode_subtitle(AVCodecContext *avctx, const AVPacket **outpkt,
+                           const AVPacket *inpkt, AVPacket *buf_pkt)
+{
+#if CONFIG_ICONV
+    iconv_t cd = (iconv_t)-1;
+    int ret = 0;
+    char *inb, *outb;
+    size_t inl, outl;
+#endif
+
+    if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_PRE_DECODER || inpkt->size == 0) {
+        *outpkt = inpkt;
+        return 0;
+    }
+
+#if CONFIG_ICONV
+    inb = inpkt->data;
+    inl = inpkt->size;
+
+    if (inl >= INT_MAX / UTF8_MAX_BYTES - AV_INPUT_BUFFER_PADDING_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "Subtitles packet is too big for recoding\n");
+        return AVERROR(ERANGE);
+    }
+
+    cd = iconv_open("UTF-8", avctx->sub_charenc);
+    av_assert0(cd != (iconv_t)-1);
+
+    ret = av_new_packet(buf_pkt, inl * UTF8_MAX_BYTES);
+    if (ret < 0)
+        goto end;
+    ret = av_packet_copy_props(buf_pkt, inpkt);
+    if (ret < 0)
+        goto end;
+    outb = buf_pkt->data;
+    outl = buf_pkt->size;
+
+    if (iconv(cd, &inb, &inl, &outb, &outl) == (size_t)-1 ||
+        iconv(cd, NULL, NULL, &outb, &outl) == (size_t)-1 ||
+        outl >= buf_pkt->size || inl != 0) {
+        ret = FFMIN(AVERROR(errno), -1);
+        av_log(avctx, AV_LOG_ERROR, "Unable to recode subtitle event \"%s\" "
+               "from %s to UTF-8\n", inpkt->data, avctx->sub_charenc);
+        goto end;
+    }
+    buf_pkt->size -= outl;
+    memset(buf_pkt->data + buf_pkt->size, 0, outl);
+    *outpkt = buf_pkt;
+
+    ret = 0;
+end:
+    if (ret < 0)
+        av_packet_unref(buf_pkt);
+    if (cd != (iconv_t)-1)
+        iconv_close(cd);
+    return ret;
+#else
+    av_log(avctx, AV_LOG_ERROR, "requesting subtitles recoding without iconv");
+    return AVERROR(EINVAL);
+#endif
+}
+
+static int utf8_check(const uint8_t *str)
+{
+    const uint8_t *byte;
+    uint32_t codepoint, min;
+
+    while (*str) {
+        byte = str;
+        GET_UTF8(codepoint, *(byte++), return 0;);
+        min = byte - str == 1 ? 0 : byte - str == 2 ? 0x80 :
+              1 << (5 * (byte - str) - 4);
+        if (codepoint < min || codepoint >= 0x110000 ||
+            codepoint == 0xFFFE /* BOM */ ||
+            codepoint >= 0xD800 && codepoint <= 0xDFFF /* surrogates */)
+            return 0;
+        str = byte;
+    }
+    return 1;
+}
+
+int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
+                             int *got_sub_ptr, const AVPacket *avpkt)
+{
+    int ret = 0;
+
+    if (!avpkt->data && avpkt->size) {
+        av_log(avctx, AV_LOG_ERROR, "invalid packet: NULL data, size != 0\n");
+        return AVERROR(EINVAL);
+    }
+    if (!avctx->codec)
+        return AVERROR(EINVAL);
+    if (avctx->codec->type != AVMEDIA_TYPE_SUBTITLE) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid media type for subtitles\n");
+        return AVERROR(EINVAL);
+    }
+
+    *got_sub_ptr = 0;
+    get_subtitle_defaults(sub);
+
+    if ((avctx->codec->capabilities & AV_CODEC_CAP_DELAY) || avpkt->size) {
+        AVCodecInternal *avci = avctx->internal;
+        const AVPacket *pkt;
+
+        ret = recode_subtitle(avctx, &pkt, avpkt, avci->buffer_pkt);
+        if (ret < 0)
+            return ret;
+
+        if (avctx->pkt_timebase.num && avpkt->pts != AV_NOPTS_VALUE)
+            sub->pts = av_rescale_q(avpkt->pts,
+                                    avctx->pkt_timebase, AV_TIME_BASE_Q);
+        ret = ffcodec(avctx->codec)->cb.decode_sub(avctx, sub, got_sub_ptr, pkt);
+        if (pkt == avci->buffer_pkt) // did we recode?
+            av_packet_unref(avci->buffer_pkt);
+        if (ret < 0) {
+            *got_sub_ptr = 0;
+            avsubtitle_free(sub);
+            return ret;
+        }
+        av_assert1(!sub->num_rects || *got_sub_ptr);
+
+        if (sub->num_rects && !sub->end_display_time && avpkt->duration &&
+            avctx->pkt_timebase.num) {
+            AVRational ms = { 1, 1000 };
+            sub->end_display_time = av_rescale_q(avpkt->duration,
+                                                 avctx->pkt_timebase, ms);
+        }
+
+        if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB)
+            sub->format = 0;
+        else if (avctx->codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB)
+            sub->format = 1;
+
+        for (unsigned i = 0; i < sub->num_rects; i++) {
+            if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_IGNORE &&
+                sub->rects[i]->ass && !utf8_check(sub->rects[i]->ass)) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "Invalid UTF-8 in decoded subtitles text; "
+                       "maybe missing -sub_charenc option\n");
+                avsubtitle_free(sub);
+                *got_sub_ptr = 0;
+                return AVERROR_INVALIDDATA;
+            }
+        }
+
+        if (*got_sub_ptr)
+            avctx->frame_num++;
+#if FF_API_AVCTX_FRAME_NUMBER
+FF_DISABLE_DEPRECATION_WARNINGS
+        avctx->frame_number = avctx->frame_num;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    }
+
+    return ret;
+}
+
+enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *avctx,
+                                              const enum AVPixelFormat *fmt)
+{
+    const AVPixFmtDescriptor *desc;
+    const AVCodecHWConfig *config;
+    int i, n;
+
+    // If a device was supplied when the codec was opened, assume that the
+    // user wants to use it.
+    if (avctx->hw_device_ctx && ffcodec(avctx->codec)->hw_configs) {
+        AVHWDeviceContext *device_ctx =
+            (AVHWDeviceContext*)avctx->hw_device_ctx->data;
+        for (i = 0;; i++) {
+            config = &ffcodec(avctx->codec)->hw_configs[i]->public;
+            if (!config)
+                break;
+            if (!(config->methods &
+                  AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX))
+                continue;
+            if (device_ctx->type != config->device_type)
+                continue;
+            for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) {
+                if (config->pix_fmt == fmt[n])
+                    return fmt[n];
+            }
+        }
+    }
+    // No device or other setup, so we have to choose from things which
+    // don't any other external information.
+
+    // If the last element of the list is a software format, choose it
+    // (this should be best software format if any exist).
+    for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++);
+    desc = av_pix_fmt_desc_get(fmt[n - 1]);
+    if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL))
+        return fmt[n - 1];
+
+    // Finally, traverse the list in order and choose the first entry
+    // with no external dependencies (if there is no hardware configuration
+    // information available then this just picks the first entry).
+    for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) {
+        for (i = 0;; i++) {
+            config = avcodec_get_hw_config(avctx->codec, i);
+            if (!config)
+                break;
+            if (config->pix_fmt == fmt[n])
+                break;
+        }
+        if (!config) {
+            // No specific config available, so the decoder must be able
+            // to handle this format without any additional setup.
+            return fmt[n];
+        }
+        if (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL) {
+            // Usable with only internal setup.
+            return fmt[n];
+        }
+    }
+
+    // Nothing is usable, give up.
+    return AV_PIX_FMT_NONE;
+}
+
+int ff_decode_get_hw_frames_ctx(AVCodecContext *avctx,
+                                enum AVHWDeviceType dev_type)
+{
+    AVHWDeviceContext *device_ctx;
+    AVHWFramesContext *frames_ctx;
+    int ret;
+
+    if (!avctx->hwaccel)
+        return AVERROR(ENOSYS);
+
+    if (avctx->hw_frames_ctx)
+        return 0;
+    if (!avctx->hw_device_ctx) {
+        av_log(avctx, AV_LOG_ERROR, "A hardware frames or device context is "
+                "required for hardware accelerated decoding.\n");
+        return AVERROR(EINVAL);
+    }
+
+    device_ctx = (AVHWDeviceContext *)avctx->hw_device_ctx->data;
+    if (device_ctx->type != dev_type) {
+        av_log(avctx, AV_LOG_ERROR, "Device type %s expected for hardware "
+               "decoding, but got %s.\n", av_hwdevice_get_type_name(dev_type),
+               av_hwdevice_get_type_name(device_ctx->type));
+        return AVERROR(EINVAL);
+    }
+
+    ret = avcodec_get_hw_frames_parameters(avctx,
+                                           avctx->hw_device_ctx,
+                                           avctx->hwaccel->pix_fmt,
+                                           &avctx->hw_frames_ctx);
+    if (ret < 0)
+        return ret;
+
+    frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+
+
+    if (frames_ctx->initial_pool_size) {
+        // We guarantee 4 base work surfaces. The function above guarantees 1
+        // (the absolute minimum), so add the missing count.
+        frames_ctx->initial_pool_size += 3;
+    }
+
+    ret = av_hwframe_ctx_init(avctx->hw_frames_ctx);
+    if (ret < 0) {
+        av_buffer_unref(&avctx->hw_frames_ctx);
+        return ret;
+    }
+
+    return 0;
+}
+
+int avcodec_get_hw_frames_parameters(AVCodecContext *avctx,
+                                     AVBufferRef *device_ref,
+                                     enum AVPixelFormat hw_pix_fmt,
+                                     AVBufferRef **out_frames_ref)
+{
+    AVBufferRef *frames_ref = NULL;
+    const AVCodecHWConfigInternal *hw_config;
+    const AVHWAccel *hwa;
+    int i, ret;
+
+    for (i = 0;; i++) {
+        hw_config = ffcodec(avctx->codec)->hw_configs[i];
+        if (!hw_config)
+            return AVERROR(ENOENT);
+        if (hw_config->public.pix_fmt == hw_pix_fmt)
+            break;
+    }
+
+    hwa = hw_config->hwaccel;
+    if (!hwa || !hwa->frame_params)
+        return AVERROR(ENOENT);
+
+    frames_ref = av_hwframe_ctx_alloc(device_ref);
+    if (!frames_ref)
+        return AVERROR(ENOMEM);
+
+    ret = hwa->frame_params(avctx, frames_ref);
+    if (ret >= 0) {
+        AVHWFramesContext *frames_ctx = (AVHWFramesContext*)frames_ref->data;
+
+        if (frames_ctx->initial_pool_size) {
+            // If the user has requested that extra output surfaces be
+            // available then add them here.
+            if (avctx->extra_hw_frames > 0)
+                frames_ctx->initial_pool_size += avctx->extra_hw_frames;
+
+            // If frame threading is enabled then an extra surface per thread
+            // is also required.
+            if (avctx->active_thread_type & FF_THREAD_FRAME)
+                frames_ctx->initial_pool_size += avctx->thread_count;
+        }
+
+        *out_frames_ref = frames_ref;
+    } else {
+        av_buffer_unref(&frames_ref);
+    }
+    return ret;
+}
+
+static int hwaccel_init(AVCodecContext *avctx,
+                        const AVCodecHWConfigInternal *hw_config)
+{
+    const AVHWAccel *hwaccel;
+    int err;
+
+    hwaccel = hw_config->hwaccel;
+    if (hwaccel->capabilities & AV_HWACCEL_CODEC_CAP_EXPERIMENTAL &&
+        avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
+        av_log(avctx, AV_LOG_WARNING, "Ignoring experimental hwaccel: %s\n",
+               hwaccel->name);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    if (hwaccel->priv_data_size) {
+        avctx->internal->hwaccel_priv_data =
+            av_mallocz(hwaccel->priv_data_size);
+        if (!avctx->internal->hwaccel_priv_data)
+            return AVERROR(ENOMEM);
+    }
+
+    avctx->hwaccel = hwaccel;
+    if (hwaccel->init) {
+        err = hwaccel->init(avctx);
+        if (err < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Failed setup for format %s: "
+                   "hwaccel initialisation returned error.\n",
+                   av_get_pix_fmt_name(hw_config->public.pix_fmt));
+            av_freep(&avctx->internal->hwaccel_priv_data);
+            avctx->hwaccel = NULL;
+            return err;
+        }
+    }
+
+    return 0;
+}
+
+static void hwaccel_uninit(AVCodecContext *avctx)
+{
+    if (avctx->hwaccel && avctx->hwaccel->uninit)
+        avctx->hwaccel->uninit(avctx);
+
+    av_freep(&avctx->internal->hwaccel_priv_data);
+
+    avctx->hwaccel = NULL;
+
+    av_buffer_unref(&avctx->hw_frames_ctx);
+}
+
+int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
+{
+    const AVPixFmtDescriptor *desc;
+    enum AVPixelFormat *choices;
+    enum AVPixelFormat ret, user_choice;
+    const AVCodecHWConfigInternal *hw_config;
+    const AVCodecHWConfig *config;
+    int i, n, err;
+
+    // Find end of list.
+    for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++);
+    // Must contain at least one entry.
+    av_assert0(n >= 1);
+    // If a software format is available, it must be the last entry.
+    desc = av_pix_fmt_desc_get(fmt[n - 1]);
+    if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {
+        // No software format is available.
+    } else {
+        avctx->sw_pix_fmt = fmt[n - 1];
+    }
+
+    choices = av_memdup(fmt, (n + 1) * sizeof(*choices));
+    if (!choices)
+        return AV_PIX_FMT_NONE;
+
+    for (;;) {
+        // Remove the previous hwaccel, if there was one.
+        hwaccel_uninit(avctx);
+
+        user_choice = avctx->get_format(avctx, choices);
+        if (user_choice == AV_PIX_FMT_NONE) {
+            // Explicitly chose nothing, give up.
+            ret = AV_PIX_FMT_NONE;
+            break;
+        }
+
+        desc = av_pix_fmt_desc_get(user_choice);
+        if (!desc) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid format returned by "
+                   "get_format() callback.\n");
+            ret = AV_PIX_FMT_NONE;
+            break;
+        }
+        av_log(avctx, AV_LOG_DEBUG, "Format %s chosen by get_format().\n",
+               desc->name);
+
+        for (i = 0; i < n; i++) {
+            if (choices[i] == user_choice)
+                break;
+        }
+        if (i == n) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid return from get_format(): "
+                   "%s not in possible list.\n", desc->name);
+            ret = AV_PIX_FMT_NONE;
+            break;
+        }
+
+        if (ffcodec(avctx->codec)->hw_configs) {
+            for (i = 0;; i++) {
+                hw_config = ffcodec(avctx->codec)->hw_configs[i];
+                if (!hw_config)
+                    break;
+                if (hw_config->public.pix_fmt == user_choice)
+                    break;
+            }
+        } else {
+            hw_config = NULL;
+        }
+
+        if (!hw_config) {
+            // No config available, so no extra setup required.
+            ret = user_choice;
+            break;
+        }
+        config = &hw_config->public;
+
+        if (config->methods &
+            AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX &&
+            avctx->hw_frames_ctx) {
+            const AVHWFramesContext *frames_ctx =
+                (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+            if (frames_ctx->format != user_choice) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid setup for format %s: "
+                       "does not match the format of the provided frames "
+                       "context.\n", desc->name);
+                goto try_again;
+            }
+        } else if (config->methods &
+                   AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX &&
+                   avctx->hw_device_ctx) {
+            const AVHWDeviceContext *device_ctx =
+                (AVHWDeviceContext*)avctx->hw_device_ctx->data;
+            if (device_ctx->type != config->device_type) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid setup for format %s: "
+                       "does not match the type of the provided device "
+                       "context.\n", desc->name);
+                goto try_again;
+            }
+        } else if (config->methods &
+                   AV_CODEC_HW_CONFIG_METHOD_INTERNAL) {
+            // Internal-only setup, no additional configuration.
+        } else if (config->methods &
+                   AV_CODEC_HW_CONFIG_METHOD_AD_HOC) {
+            // Some ad-hoc configuration we can't see and can't check.
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "Invalid setup for format %s: "
+                   "missing configuration.\n", desc->name);
+            goto try_again;
+        }
+        if (hw_config->hwaccel) {
+            av_log(avctx, AV_LOG_DEBUG, "Format %s requires hwaccel "
+                   "initialisation.\n", desc->name);
+            err = hwaccel_init(avctx, hw_config);
+            if (err < 0)
+                goto try_again;
+        }
+        ret = user_choice;
+        break;
+
+    try_again:
+        av_log(avctx, AV_LOG_DEBUG, "Format %s not usable, retrying "
+               "get_format() without it.\n", desc->name);
+        for (i = 0; i < n; i++) {
+            if (choices[i] == user_choice)
+                break;
+        }
+        for (; i + 1 < n; i++)
+            choices[i] = choices[i + 1];
+        --n;
+    }
+
+    av_freep(&choices);
+    return ret;
+}
+
+static int add_metadata_from_side_data(const AVPacket *avpkt, AVFrame *frame)
+{
+    size_t size;
+    const uint8_t *side_metadata;
+
+    AVDictionary **frame_md = &frame->metadata;
+
+    side_metadata = av_packet_get_side_data(avpkt,
+                                            AV_PKT_DATA_STRINGS_METADATA, &size);
+    return av_packet_unpack_dictionary(side_metadata, size, frame_md);
+}
+
+int ff_decode_frame_props_from_pkt(const AVCodecContext *avctx,
+                                   AVFrame *frame, const AVPacket *pkt)
+{
+    static const struct {
+        enum AVPacketSideDataType packet;
+        enum AVFrameSideDataType frame;
+    } sd[] = {
+        { AV_PKT_DATA_REPLAYGAIN ,                AV_FRAME_DATA_REPLAYGAIN },
+        { AV_PKT_DATA_DISPLAYMATRIX,              AV_FRAME_DATA_DISPLAYMATRIX },
+        { AV_PKT_DATA_SPHERICAL,                  AV_FRAME_DATA_SPHERICAL },
+        { AV_PKT_DATA_STEREO3D,                   AV_FRAME_DATA_STEREO3D },
+        { AV_PKT_DATA_AUDIO_SERVICE_TYPE,         AV_FRAME_DATA_AUDIO_SERVICE_TYPE },
+        { AV_PKT_DATA_MASTERING_DISPLAY_METADATA, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA },
+        { AV_PKT_DATA_CONTENT_LIGHT_LEVEL,        AV_FRAME_DATA_CONTENT_LIGHT_LEVEL },
+        { AV_PKT_DATA_A53_CC,                     AV_FRAME_DATA_A53_CC },
+        { AV_PKT_DATA_ICC_PROFILE,                AV_FRAME_DATA_ICC_PROFILE },
+        { AV_PKT_DATA_S12M_TIMECODE,              AV_FRAME_DATA_S12M_TIMECODE },
+        { AV_PKT_DATA_DYNAMIC_HDR10_PLUS,         AV_FRAME_DATA_DYNAMIC_HDR_PLUS },
+    };
+
+    frame->pts          = pkt->pts;
+    frame->pkt_pos      = pkt->pos;
+    frame->duration     = pkt->duration;
+    frame->pkt_size     = pkt->size;
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(sd); i++) {
+        size_t size;
+        uint8_t *packet_sd = av_packet_get_side_data(pkt, sd[i].packet, &size);
+        if (packet_sd) {
+            AVFrameSideData *frame_sd = av_frame_new_side_data(frame,
+                                                               sd[i].frame,
+                                                               size);
+            if (!frame_sd)
+                return AVERROR(ENOMEM);
+
+            memcpy(frame_sd->data, packet_sd, size);
+        }
+    }
+    add_metadata_from_side_data(pkt, frame);
+
+    if (pkt->flags & AV_PKT_FLAG_DISCARD) {
+        frame->flags |= AV_FRAME_FLAG_DISCARD;
+    } else {
+        frame->flags = (frame->flags & ~AV_FRAME_FLAG_DISCARD);
+    }
+
+    if (avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
+        int ret = av_buffer_replace(&frame->opaque_ref, pkt->opaque_ref);
+        if (ret < 0)
+            return ret;
+        frame->opaque = pkt->opaque;
+    }
+
+    return 0;
+}
+
+int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame)
+{
+    const AVPacket *pkt = avctx->internal->last_pkt_props;
+
+    if (!(ffcodec(avctx->codec)->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) {
+        int ret = ff_decode_frame_props_from_pkt(avctx, frame, pkt);
+        if (ret < 0)
+            return ret;
+        frame->pkt_size     = (int)(intptr_t)pkt->opaque;
+    }
+#if FF_API_REORDERED_OPAQUE
+FF_DISABLE_DEPRECATION_WARNINGS
+    frame->reordered_opaque = avctx->reordered_opaque;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (frame->color_primaries == AVCOL_PRI_UNSPECIFIED)
+        frame->color_primaries = avctx->color_primaries;
+    if (frame->color_trc == AVCOL_TRC_UNSPECIFIED)
+        frame->color_trc = avctx->color_trc;
+    if (frame->colorspace == AVCOL_SPC_UNSPECIFIED)
+        frame->colorspace = avctx->colorspace;
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED)
+        frame->color_range = avctx->color_range;
+    if (frame->chroma_location == AVCHROMA_LOC_UNSPECIFIED)
+        frame->chroma_location = avctx->chroma_sample_location;
+
+    switch (avctx->codec->type) {
+    case AVMEDIA_TYPE_VIDEO:
+        frame->format              = avctx->pix_fmt;
+        if (!frame->sample_aspect_ratio.num)
+            frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
+
+        if (frame->width && frame->height &&
+            av_image_check_sar(frame->width, frame->height,
+                               frame->sample_aspect_ratio) < 0) {
+            av_log(avctx, AV_LOG_WARNING, "ignoring invalid SAR: %u/%u\n",
+                   frame->sample_aspect_ratio.num,
+                   frame->sample_aspect_ratio.den);
+            frame->sample_aspect_ratio = (AVRational){ 0, 1 };
+        }
+
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        if (!frame->sample_rate)
+            frame->sample_rate    = avctx->sample_rate;
+        if (frame->format < 0)
+            frame->format         = avctx->sample_fmt;
+        if (!frame->ch_layout.nb_channels) {
+            int ret = av_channel_layout_copy(&frame->ch_layout, &avctx->ch_layout);
+            if (ret < 0)
+                return ret;
+        }
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+        frame->channels = frame->ch_layout.nb_channels;
+        frame->channel_layout = frame->ch_layout.order == AV_CHANNEL_ORDER_NATIVE ?
+                                frame->ch_layout.u.mask : 0;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        break;
+    }
+    return 0;
+}
+
+static void validate_avframe_allocation(AVCodecContext *avctx, AVFrame *frame)
+{
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+        int i;
+        int num_planes = av_pix_fmt_count_planes(frame->format);
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+        int flags = desc ? desc->flags : 0;
+        if (num_planes == 1 && (flags & AV_PIX_FMT_FLAG_PAL))
+            num_planes = 2;
+        for (i = 0; i < num_planes; i++) {
+            av_assert0(frame->data[i]);
+        }
+        // For formats without data like hwaccel allow unused pointers to be non-NULL.
+        for (i = num_planes; num_planes > 0 && i < FF_ARRAY_ELEMS(frame->data); i++) {
+            if (frame->data[i])
+                av_log(avctx, AV_LOG_ERROR, "Buffer returned by get_buffer2() did not zero unused plane pointers\n");
+            frame->data[i] = NULL;
+        }
+    }
+}
+
+static void decode_data_free(void *opaque, uint8_t *data)
+{
+    FrameDecodeData *fdd = (FrameDecodeData*)data;
+
+    if (fdd->post_process_opaque_free)
+        fdd->post_process_opaque_free(fdd->post_process_opaque);
+
+    if (fdd->hwaccel_priv_free)
+        fdd->hwaccel_priv_free(fdd->hwaccel_priv);
+
+    av_freep(&fdd);
+}
+
+int ff_attach_decode_data(AVFrame *frame)
+{
+    AVBufferRef *fdd_buf;
+    FrameDecodeData *fdd;
+
+    av_assert1(!frame->private_ref);
+    av_buffer_unref(&frame->private_ref);
+
+    fdd = av_mallocz(sizeof(*fdd));
+    if (!fdd)
+        return AVERROR(ENOMEM);
+
+    fdd_buf = av_buffer_create((uint8_t*)fdd, sizeof(*fdd), decode_data_free,
+                               NULL, AV_BUFFER_FLAG_READONLY);
+    if (!fdd_buf) {
+        av_freep(&fdd);
+        return AVERROR(ENOMEM);
+    }
+
+    frame->private_ref = fdd_buf;
+
+    return 0;
+}
+
+int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
+{
+    const AVHWAccel *hwaccel = avctx->hwaccel;
+    int override_dimensions = 1;
+    int ret;
+
+    av_assert0(av_codec_is_decoder(avctx->codec));
+
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+        if ((unsigned)avctx->width > INT_MAX - STRIDE_ALIGN ||
+            (ret = av_image_check_size2(FFALIGN(avctx->width, STRIDE_ALIGN), avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx)) < 0 || avctx->pix_fmt<0) {
+            av_log(avctx, AV_LOG_ERROR, "video_get_buffer: image parameters invalid\n");
+            ret = AVERROR(EINVAL);
+            goto fail;
+        }
+
+        if (frame->width <= 0 || frame->height <= 0) {
+            frame->width  = FFMAX(avctx->width,  AV_CEIL_RSHIFT(avctx->coded_width,  avctx->lowres));
+            frame->height = FFMAX(avctx->height, AV_CEIL_RSHIFT(avctx->coded_height, avctx->lowres));
+            override_dimensions = 0;
+        }
+
+        if (frame->data[0] || frame->data[1] || frame->data[2] || frame->data[3]) {
+            av_log(avctx, AV_LOG_ERROR, "pic->data[*]!=NULL in get_buffer_internal\n");
+            ret = AVERROR(EINVAL);
+            goto fail;
+        }
+    } else if (avctx->codec_type == AVMEDIA_TYPE_AUDIO) {
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+        /* compat layer for old-style get_buffer() implementations */
+        avctx->channels = avctx->ch_layout.nb_channels;
+        avctx->channel_layout = (avctx->ch_layout.order == AV_CHANNEL_ORDER_NATIVE) ?
+                                avctx->ch_layout.u.mask : 0;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+        if (frame->nb_samples * (int64_t)avctx->ch_layout.nb_channels > avctx->max_samples) {
+            av_log(avctx, AV_LOG_ERROR, "samples per frame %d, exceeds max_samples %"PRId64"\n", frame->nb_samples, avctx->max_samples);
+            ret = AVERROR(EINVAL);
+            goto fail;
+        }
+    }
+    ret = ff_decode_frame_props(avctx, frame);
+    if (ret < 0)
+        goto fail;
+
+    if (hwaccel) {
+        if (hwaccel->alloc_frame) {
+            ret = hwaccel->alloc_frame(avctx, frame);
+            goto end;
+        }
+    } else
+        avctx->sw_pix_fmt = avctx->pix_fmt;
+
+    ret = avctx->get_buffer2(avctx, frame, flags);
+    if (ret < 0)
+        goto fail;
+
+    validate_avframe_allocation(avctx, frame);
+
+    ret = ff_attach_decode_data(frame);
+    if (ret < 0)
+        goto fail;
+
+end:
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO && !override_dimensions &&
+        !(ffcodec(avctx->codec)->caps_internal & FF_CODEC_CAP_EXPORTS_CROPPING)) {
+        frame->width  = avctx->width;
+        frame->height = avctx->height;
+    }
+
+fail:
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        av_frame_unref(frame);
+    }
+
+    return ret;
+}
+
+static int reget_buffer_internal(AVCodecContext *avctx, AVFrame *frame, int flags)
+{
+    AVFrame *tmp;
+    int ret;
+
+    av_assert0(avctx->codec_type == AVMEDIA_TYPE_VIDEO);
+
+    if (frame->data[0] && (frame->width != avctx->width || frame->height != avctx->height || frame->format != avctx->pix_fmt)) {
+        av_log(avctx, AV_LOG_WARNING, "Picture changed from size:%dx%d fmt:%s to size:%dx%d fmt:%s in reget buffer()\n",
+               frame->width, frame->height, av_get_pix_fmt_name(frame->format), avctx->width, avctx->height, av_get_pix_fmt_name(avctx->pix_fmt));
+        av_frame_unref(frame);
+    }
+
+    if (!frame->data[0])
+        return ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF);
+
+    if ((flags & FF_REGET_BUFFER_FLAG_READONLY) || av_frame_is_writable(frame))
+        return ff_decode_frame_props(avctx, frame);
+
+    tmp = av_frame_alloc();
+    if (!tmp)
+        return AVERROR(ENOMEM);
+
+    av_frame_move_ref(tmp, frame);
+
+    ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF);
+    if (ret < 0) {
+        av_frame_free(&tmp);
+        return ret;
+    }
+
+    av_frame_copy(frame, tmp);
+    av_frame_free(&tmp);
+
+    return 0;
+}
+
+int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
+{
+    int ret = reget_buffer_internal(avctx, frame, flags);
+    if (ret < 0)
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    return ret;
+}
+
+int ff_decode_preinit(AVCodecContext *avctx)
+{
+    AVCodecInternal *avci = avctx->internal;
+    int ret = 0;
+
+    /* if the decoder init function was already called previously,
+     * free the already allocated subtitle_header before overwriting it */
+    av_freep(&avctx->subtitle_header);
+
+    if (avctx->codec->max_lowres < avctx->lowres || avctx->lowres < 0) {
+        av_log(avctx, AV_LOG_WARNING, "The maximum value for lowres supported by the decoder is %d\n",
+               avctx->codec->max_lowres);
+        avctx->lowres = avctx->codec->max_lowres;
+    }
+    if (avctx->sub_charenc) {
+        if (avctx->codec_type != AVMEDIA_TYPE_SUBTITLE) {
+            av_log(avctx, AV_LOG_ERROR, "Character encoding is only "
+                   "supported with subtitles codecs\n");
+            return AVERROR(EINVAL);
+        } else if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) {
+            av_log(avctx, AV_LOG_WARNING, "Codec '%s' is bitmap-based, "
+                   "subtitles character encoding will be ignored\n",
+                   avctx->codec_descriptor->name);
+            avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_DO_NOTHING;
+        } else {
+            /* input character encoding is set for a text based subtitle
+             * codec at this point */
+            if (avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_AUTOMATIC)
+                avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_PRE_DECODER;
+
+            if (avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_PRE_DECODER) {
+#if CONFIG_ICONV
+                iconv_t cd = iconv_open("UTF-8", avctx->sub_charenc);
+                if (cd == (iconv_t)-1) {
+                    ret = AVERROR(errno);
+                    av_log(avctx, AV_LOG_ERROR, "Unable to open iconv context "
+                           "with input character encoding \"%s\"\n", avctx->sub_charenc);
+                    return ret;
+                }
+                iconv_close(cd);
+#else
+                av_log(avctx, AV_LOG_ERROR, "Character encoding subtitles "
+                       "conversion needs a libavcodec built with iconv support "
+                       "for this codec\n");
+                return AVERROR(ENOSYS);
+#endif
+            }
+        }
+    }
+
+    avctx->pts_correction_num_faulty_pts =
+    avctx->pts_correction_num_faulty_dts = 0;
+    avctx->pts_correction_last_pts =
+    avctx->pts_correction_last_dts = INT64_MIN;
+
+    if (   !CONFIG_GRAY && avctx->flags & AV_CODEC_FLAG_GRAY
+        && avctx->codec_descriptor->type == AVMEDIA_TYPE_VIDEO)
+        av_log(avctx, AV_LOG_WARNING,
+               "gray decoding requested but not enabled at configuration time\n");
+    if (avctx->flags2 & AV_CODEC_FLAG2_EXPORT_MVS) {
+        avctx->export_side_data |= AV_CODEC_EXPORT_DATA_MVS;
+    }
+
+    avci->in_pkt         = av_packet_alloc();
+    avci->last_pkt_props = av_packet_alloc();
+    if (!avci->in_pkt || !avci->last_pkt_props)
+        return AVERROR(ENOMEM);
+
+    ret = decode_bsfs_init(avctx);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+int ff_copy_palette(void *dst, const AVPacket *src, void *logctx)
+{
+    size_t size;
+    const void *pal = av_packet_get_side_data(src, AV_PKT_DATA_PALETTE, &size);
+
+    if (pal && size == AVPALETTE_SIZE) {
+        memcpy(dst, pal, AVPALETTE_SIZE);
+        return 1;
+    } else if (pal) {
+        av_log(logctx, AV_LOG_ERROR,
+               "Palette size %"SIZE_SPECIFIER" is wrong\n", size);
+    }
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/decode.h b/media/ffvpx/libavcodec/decode.h
new file mode 100644
index 0000000000..8430ffbd66
--- /dev/null
+++ b/media/ffvpx/libavcodec/decode.h
@@ -0,0 +1,153 @@
+/*
+ * generic decoding-related code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DECODE_H
+#define AVCODEC_DECODE_H
+
+#include "libavutil/buffer.h"
+#include "libavutil/frame.h"
+#include "libavutil/hwcontext.h"
+
+#include "avcodec.h"
+
+/**
+ * This struct stores per-frame lavc-internal data and is attached to it via
+ * private_ref.
+ */
+typedef struct FrameDecodeData {
+    /**
+     * The callback to perform some delayed processing on the frame right
+     * before it is returned to the caller.
+     *
+     * @note This code is called at some unspecified point after the frame is
+     * returned from the decoder's decode/receive_frame call. Therefore it cannot rely
+     * on AVCodecContext being in any specific state, so it does not get to
+     * access AVCodecContext directly at all. All the state it needs must be
+     * stored in the post_process_opaque object.
+     */
+    int (*post_process)(void *logctx, AVFrame *frame);
+    void *post_process_opaque;
+    void (*post_process_opaque_free)(void *opaque);
+
+    /**
+     * Per-frame private data for hwaccels.
+     */
+    void *hwaccel_priv;
+    void (*hwaccel_priv_free)(void *priv);
+} FrameDecodeData;
+
+/**
+ * avcodec_receive_frame() implementation for decoders.
+ */
+int ff_decode_receive_frame(AVCodecContext *avctx, AVFrame *frame);
+
+/**
+ * Called by decoders to get the next packet for decoding.
+ *
+ * @param pkt An empty packet to be filled with data.
+ * @return 0 if a new reference has been successfully written to pkt
+ *         AVERROR(EAGAIN) if no data is currently available
+ *         AVERROR_EOF if and end of stream has been reached, so no more data
+ *                     will be available
+ */
+int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt);
+
+/**
+ * Set various frame properties from the provided packet.
+ */
+int ff_decode_frame_props_from_pkt(const AVCodecContext *avctx,
+                                   AVFrame *frame, const AVPacket *pkt);
+
+/**
+ * Set various frame properties from the codec context / packet data.
+ */
+int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame);
+
+/**
+ * Make sure avctx.hw_frames_ctx is set. If it's not set, the function will
+ * try to allocate it from hw_device_ctx. If that is not possible, an error
+ * message is printed, and an error code is returned.
+ */
+int ff_decode_get_hw_frames_ctx(AVCodecContext *avctx,
+                                enum AVHWDeviceType dev_type);
+
+int ff_attach_decode_data(AVFrame *frame);
+
+/**
+ * Check whether the side-data of src contains a palette of
+ * size AVPALETTE_SIZE; if so, copy it to dst and return 1;
+ * else return 0.
+ * Also emit an error message upon encountering a palette
+ * with invalid size.
+ */
+int ff_copy_palette(void *dst, const AVPacket *src, void *logctx);
+
+/**
+ * Perform decoder initialization and validation.
+ * Called when opening the decoder, before the FFCodec.init() call.
+ */
+int ff_decode_preinit(AVCodecContext *avctx);
+
+/**
+ * Check that the provided frame dimensions are valid and set them on the codec
+ * context.
+ */
+int ff_set_dimensions(AVCodecContext *s, int width, int height);
+
+/**
+ * Check that the provided sample aspect ratio is valid and set it on the codec
+ * context.
+ */
+int ff_set_sar(AVCodecContext *avctx, AVRational sar);
+
+/**
+ * Select the (possibly hardware accelerated) pixel format.
+ * This is a wrapper around AVCodecContext.get_format() and should be used
+ * instead of calling get_format() directly.
+ *
+ * The list of pixel formats must contain at least one valid entry, and is
+ * terminated with AV_PIX_FMT_NONE.  If it is possible to decode to software,
+ * the last entry in the list must be the most accurate software format.
+ * If it is not possible to decode to software, AVCodecContext.sw_pix_fmt
+ * must be set before calling this function.
+ */
+int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt);
+
+/**
+ * Get a buffer for a frame. This is a wrapper around
+ * AVCodecContext.get_buffer() and should be used instead calling get_buffer()
+ * directly.
+ */
+int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags);
+
+#define FF_REGET_BUFFER_FLAG_READONLY 1 ///< the returned buffer does not need to be writable
+/**
+ * Identical in function to ff_get_buffer(), except it reuses the existing buffer
+ * if available.
+ */
+int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame, int flags);
+
+/**
+ * Add or update AV_FRAME_DATA_MATRIXENCODING side data.
+ */
+int ff_side_data_update_matrix_encoding(AVFrame *frame,
+                                        enum AVMatrixEncoding matrix_encoding);
+
+#endif /* AVCODEC_DECODE_H */
diff --git a/media/ffvpx/libavcodec/defs.h b/media/ffvpx/libavcodec/defs.h
new file mode 100644
index 0000000000..fbe3254db2
--- /dev/null
+++ b/media/ffvpx/libavcodec/defs.h
@@ -0,0 +1,192 @@
+/*
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DEFS_H
+#define AVCODEC_DEFS_H
+
+/**
+ * @file
+ * @ingroup libavc
+ * Misc types and constants that do not belong anywhere else.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/**
+ * @ingroup lavc_decoding
+ * Required number of additionally allocated bytes at the end of the input bitstream for decoding.
+ * This is mainly needed because some optimized bitstream readers read
+ * 32 or 64 bit at once and could read over the end.<br>
+ * Note: If the first 23 bits of the additional bytes are not 0, then damaged
+ * MPEG bitstreams could cause overread and segfault.
+ */
+#define AV_INPUT_BUFFER_PADDING_SIZE 64
+
+/**
+ * Verify checksums embedded in the bitstream (could be of either encoded or
+ * decoded data, depending on the format) and print an error message on mismatch.
+ * If AV_EF_EXPLODE is also set, a mismatching checksum will result in the
+ * decoder/demuxer returning an error.
+ */
+#define AV_EF_CRCCHECK       (1<<0)
+#define AV_EF_BITSTREAM      (1<<1)   ///< detect bitstream specification deviations
+#define AV_EF_BUFFER         (1<<2)   ///< detect improper bitstream length
+#define AV_EF_EXPLODE        (1<<3)   ///< abort decoding on minor error detection
+
+#define AV_EF_IGNORE_ERR     (1<<15)  ///< ignore errors and continue
+#define AV_EF_CAREFUL        (1<<16)  ///< consider things that violate the spec, are fast to calculate and have not been seen in the wild as errors
+#define AV_EF_COMPLIANT      (1<<17)  ///< consider all spec non compliances as errors
+#define AV_EF_AGGRESSIVE     (1<<18)  ///< consider things that a sane encoder/muxer should not do as an error
+
+#define FF_COMPLIANCE_VERY_STRICT   2 ///< Strictly conform to an older more strict version of the spec or reference software.
+#define FF_COMPLIANCE_STRICT        1 ///< Strictly conform to all the things in the spec no matter what consequences.
+#define FF_COMPLIANCE_NORMAL        0
+#define FF_COMPLIANCE_UNOFFICIAL   -1 ///< Allow unofficial extensions
+#define FF_COMPLIANCE_EXPERIMENTAL -2 ///< Allow nonstandardized experimental things.
+
+/**
+ * @ingroup lavc_decoding
+ */
+enum AVDiscard{
+    /* We leave some space between them for extensions (drop some
+     * keyframes for intra-only or drop just some bidir frames). */
+    AVDISCARD_NONE    =-16, ///< discard nothing
+    AVDISCARD_DEFAULT =  0, ///< discard useless packets like 0 size packets in avi
+    AVDISCARD_NONREF  =  8, ///< discard all non reference
+    AVDISCARD_BIDIR   = 16, ///< discard all bidirectional frames
+    AVDISCARD_NONINTRA= 24, ///< discard all non intra frames
+    AVDISCARD_NONKEY  = 32, ///< discard all frames except keyframes
+    AVDISCARD_ALL     = 48, ///< discard all
+};
+
+enum AVAudioServiceType {
+    AV_AUDIO_SERVICE_TYPE_MAIN              = 0,
+    AV_AUDIO_SERVICE_TYPE_EFFECTS           = 1,
+    AV_AUDIO_SERVICE_TYPE_VISUALLY_IMPAIRED = 2,
+    AV_AUDIO_SERVICE_TYPE_HEARING_IMPAIRED  = 3,
+    AV_AUDIO_SERVICE_TYPE_DIALOGUE          = 4,
+    AV_AUDIO_SERVICE_TYPE_COMMENTARY        = 5,
+    AV_AUDIO_SERVICE_TYPE_EMERGENCY         = 6,
+    AV_AUDIO_SERVICE_TYPE_VOICE_OVER        = 7,
+    AV_AUDIO_SERVICE_TYPE_KARAOKE           = 8,
+    AV_AUDIO_SERVICE_TYPE_NB                   , ///< Not part of ABI
+};
+
+/**
+ * Pan Scan area.
+ * This specifies the area which should be displayed.
+ * Note there may be multiple such areas for one frame.
+ */
+typedef struct AVPanScan {
+    /**
+     * id
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int id;
+
+    /**
+     * width and height in 1/16 pel
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int width;
+    int height;
+
+    /**
+     * position of the top left corner in 1/16 pel for up to 3 fields/frames
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int16_t position[3][2];
+} AVPanScan;
+
+/**
+ * This structure describes the bitrate properties of an encoded bitstream. It
+ * roughly corresponds to a subset the VBV parameters for MPEG-2 or HRD
+ * parameters for H.264/HEVC.
+ */
+typedef struct AVCPBProperties {
+    /**
+     * Maximum bitrate of the stream, in bits per second.
+     * Zero if unknown or unspecified.
+     */
+    int64_t max_bitrate;
+    /**
+     * Minimum bitrate of the stream, in bits per second.
+     * Zero if unknown or unspecified.
+     */
+    int64_t min_bitrate;
+    /**
+     * Average bitrate of the stream, in bits per second.
+     * Zero if unknown or unspecified.
+     */
+    int64_t avg_bitrate;
+
+    /**
+     * The size of the buffer to which the ratecontrol is applied, in bits.
+     * Zero if unknown or unspecified.
+     */
+    int64_t buffer_size;
+
+    /**
+     * The delay between the time the packet this structure is associated with
+     * is received and the time when it should be decoded, in periods of a 27MHz
+     * clock.
+     *
+     * UINT64_MAX when unknown or unspecified.
+     */
+    uint64_t vbv_delay;
+} AVCPBProperties;
+
+/**
+ * Allocate a CPB properties structure and initialize its fields to default
+ * values.
+ *
+ * @param size if non-NULL, the size of the allocated struct will be written
+ *             here. This is useful for embedding it in side data.
+ *
+ * @return the newly allocated struct or NULL on failure
+ */
+AVCPBProperties *av_cpb_properties_alloc(size_t *size);
+
+/**
+ * This structure supplies correlation between a packet timestamp and a wall clock
+ * production time. The definition follows the Producer Reference Time ('prft')
+ * as defined in ISO/IEC 14496-12
+ */
+typedef struct AVProducerReferenceTime {
+    /**
+     * A UTC timestamp, in microseconds, since Unix epoch (e.g, av_gettime()).
+     */
+    int64_t wallclock;
+    int flags;
+} AVProducerReferenceTime;
+
+/**
+ * Encode extradata length to a buffer. Used by xiph codecs.
+ *
+ * @param s buffer to write to; must be at least (v/255+1) bytes long
+ * @param v size of extradata in bytes
+ * @return number of bytes written to the buffer.
+ */
+unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
+
+#endif // AVCODEC_DEFS_H
diff --git a/media/ffvpx/libavcodec/encode.c b/media/ffvpx/libavcodec/encode.c
new file mode 100644
index 0000000000..041fc7670e
--- /dev/null
+++ b/media/ffvpx/libavcodec/encode.c
@@ -0,0 +1,774 @@
+/*
+ * generic encoding-related code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/frame.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/samplefmt.h"
+
+#include "avcodec.h"
+#include "codec_internal.h"
+#include "encode.h"
+#include "frame_thread_encoder.h"
+#include "internal.h"
+
+int ff_alloc_packet(AVCodecContext *avctx, AVPacket *avpkt, int64_t size)
+{
+    if (size < 0 || size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid minimum required packet size %"PRId64" (max allowed is %d)\n",
+               size, INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE);
+        return AVERROR(EINVAL);
+    }
+
+    av_assert0(!avpkt->data);
+
+    av_fast_padded_malloc(&avctx->internal->byte_buffer,
+                          &avctx->internal->byte_buffer_size, size);
+    avpkt->data = avctx->internal->byte_buffer;
+    if (!avpkt->data) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to allocate packet of size %"PRId64"\n", size);
+        return AVERROR(ENOMEM);
+    }
+    avpkt->size = size;
+
+    return 0;
+}
+
+int avcodec_default_get_encode_buffer(AVCodecContext *avctx, AVPacket *avpkt, int flags)
+{
+    int ret;
+
+    if (avpkt->size < 0 || avpkt->size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
+        return AVERROR(EINVAL);
+
+    if (avpkt->data || avpkt->buf) {
+        av_log(avctx, AV_LOG_ERROR, "avpkt->{data,buf} != NULL in avcodec_default_get_encode_buffer()\n");
+        return AVERROR(EINVAL);
+    }
+
+    ret = av_buffer_realloc(&avpkt->buf, avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to allocate packet of size %d\n", avpkt->size);
+        return ret;
+    }
+    avpkt->data = avpkt->buf->data;
+
+    return 0;
+}
+
+int ff_get_encode_buffer(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int flags)
+{
+    int ret;
+
+    if (size < 0 || size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
+        return AVERROR(EINVAL);
+
+    av_assert0(!avpkt->data && !avpkt->buf);
+
+    avpkt->size = size;
+    ret = avctx->get_encode_buffer(avctx, avpkt, flags);
+    if (ret < 0)
+        goto fail;
+
+    if (!avpkt->data || !avpkt->buf) {
+        av_log(avctx, AV_LOG_ERROR, "No buffer returned by get_encode_buffer()\n");
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+    memset(avpkt->data + avpkt->size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    ret = 0;
+fail:
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_encode_buffer() failed\n");
+        av_packet_unref(avpkt);
+    }
+
+    return ret;
+}
+
+static int encode_make_refcounted(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    uint8_t *data = avpkt->data;
+    int ret;
+
+    if (avpkt->buf)
+        return 0;
+
+    avpkt->data = NULL;
+    ret = ff_get_encode_buffer(avctx, avpkt, avpkt->size, 0);
+    if (ret < 0)
+        return ret;
+    memcpy(avpkt->data, data, avpkt->size);
+
+    return 0;
+}
+
+/**
+ * Pad last frame with silence.
+ */
+static int pad_last_frame(AVCodecContext *s, AVFrame *frame, const AVFrame *src, int out_samples)
+{
+    int ret;
+
+    frame->format         = src->format;
+    frame->nb_samples     = out_samples;
+    ret = av_channel_layout_copy(&frame->ch_layout, &s->ch_layout);
+    if (ret < 0)
+        goto fail;
+    ret = av_frame_get_buffer(frame, 0);
+    if (ret < 0)
+        goto fail;
+
+    ret = av_frame_copy_props(frame, src);
+    if (ret < 0)
+        goto fail;
+
+    if ((ret = av_samples_copy(frame->extended_data, src->extended_data, 0, 0,
+                               src->nb_samples, s->ch_layout.nb_channels,
+                               s->sample_fmt)) < 0)
+        goto fail;
+    if ((ret = av_samples_set_silence(frame->extended_data, src->nb_samples,
+                                      frame->nb_samples - src->nb_samples,
+                                      s->ch_layout.nb_channels, s->sample_fmt)) < 0)
+        goto fail;
+
+    return 0;
+
+fail:
+    av_frame_unref(frame);
+    s->internal->last_audio_frame = 0;
+    return ret;
+}
+
+int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                            const AVSubtitle *sub)
+{
+    int ret;
+    if (sub->start_display_time) {
+        av_log(avctx, AV_LOG_ERROR, "start_display_time must be 0.\n");
+        return -1;
+    }
+
+    ret = ffcodec(avctx->codec)->cb.encode_sub(avctx, buf, buf_size, sub);
+    avctx->frame_num++;
+#if FF_API_AVCTX_FRAME_NUMBER
+FF_DISABLE_DEPRECATION_WARNINGS
+    avctx->frame_number = avctx->frame_num;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    return ret;
+}
+
+int ff_encode_get_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    AVCodecInternal *avci = avctx->internal;
+
+    if (avci->draining)
+        return AVERROR_EOF;
+
+    if (!avci->buffer_frame->buf[0])
+        return AVERROR(EAGAIN);
+
+    av_frame_move_ref(frame, avci->buffer_frame);
+
+    return 0;
+}
+
+int ff_encode_reordered_opaque(AVCodecContext *avctx,
+                               AVPacket *pkt, const AVFrame *frame)
+{
+#if FF_API_REORDERED_OPAQUE
+FF_DISABLE_DEPRECATION_WARNINGS
+    avctx->reordered_opaque = frame->reordered_opaque;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
+        int ret = av_buffer_replace(&pkt->opaque_ref, frame->opaque_ref);
+        if (ret < 0)
+            return ret;
+        pkt->opaque = frame->opaque;
+    }
+
+    return 0;
+}
+
+int ff_encode_encode_cb(AVCodecContext *avctx, AVPacket *avpkt,
+                        AVFrame *frame, int *got_packet)
+{
+    const FFCodec *const codec = ffcodec(avctx->codec);
+    int ret;
+
+    ret = codec->cb.encode(avctx, avpkt, frame, got_packet);
+    emms_c();
+    av_assert0(ret <= 0);
+
+    if (!ret && *got_packet) {
+        if (avpkt->data) {
+            ret = encode_make_refcounted(avctx, avpkt);
+            if (ret < 0)
+                goto unref;
+            // Date returned by encoders must always be ref-counted
+            av_assert0(avpkt->buf);
+        }
+
+        // set the timestamps for the simple no-delay case
+        // encoders with delay have to set the timestamps themselves
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY) ||
+            (frame && (codec->caps_internal & FF_CODEC_CAP_EOF_FLUSH))) {
+            if (avpkt->pts == AV_NOPTS_VALUE)
+                avpkt->pts = frame->pts;
+
+            if (!avpkt->duration) {
+                if (frame->duration)
+                    avpkt->duration = frame->duration;
+                else if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
+                    avpkt->duration = ff_samples_to_time_base(avctx,
+                                                              frame->nb_samples);
+                }
+            }
+
+            ret = ff_encode_reordered_opaque(avctx, avpkt, frame);
+            if (ret < 0)
+                goto unref;
+        }
+
+        // dts equals pts unless there is reordering
+        // there can be no reordering if there is no encoder delay
+        if (!(avctx->codec_descriptor->props & AV_CODEC_PROP_REORDER) ||
+            !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY)        ||
+            (codec->caps_internal & FF_CODEC_CAP_EOF_FLUSH))
+            avpkt->dts = avpkt->pts;
+    } else {
+unref:
+        av_packet_unref(avpkt);
+    }
+
+    if (frame)
+        av_frame_unref(frame);
+
+    return ret;
+}
+
+static int encode_simple_internal(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    AVCodecInternal   *avci = avctx->internal;
+    AVFrame          *frame = avci->in_frame;
+    const FFCodec *const codec = ffcodec(avctx->codec);
+    int got_packet;
+    int ret;
+
+    if (avci->draining_done)
+        return AVERROR_EOF;
+
+    if (!frame->buf[0] && !avci->draining) {
+        av_frame_unref(frame);
+        ret = ff_encode_get_frame(avctx, frame);
+        if (ret < 0 && ret != AVERROR_EOF)
+            return ret;
+    }
+
+    if (!frame->buf[0]) {
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
+              avci->frame_thread_encoder))
+            return AVERROR_EOF;
+
+        // Flushing is signaled with a NULL frame
+        frame = NULL;
+    }
+
+    got_packet = 0;
+
+    av_assert0(codec->cb_type == FF_CODEC_CB_TYPE_ENCODE);
+
+    if (CONFIG_FRAME_THREAD_ENCODER && avci->frame_thread_encoder)
+        /* This will unref frame. */
+        ret = ff_thread_video_encode_frame(avctx, avpkt, frame, &got_packet);
+    else {
+        ret = ff_encode_encode_cb(avctx, avpkt, frame, &got_packet);
+    }
+
+    if (avci->draining && !got_packet)
+        avci->draining_done = 1;
+
+    return ret;
+}
+
+static int encode_simple_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    int ret;
+
+    while (!avpkt->data && !avpkt->side_data) {
+        ret = encode_simple_internal(avctx, avpkt);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static int encode_receive_packet_internal(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
+    if (avci->draining_done)
+        return AVERROR_EOF;
+
+    av_assert0(!avpkt->data && !avpkt->side_data);
+
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if ((avctx->flags & AV_CODEC_FLAG_PASS1) && avctx->stats_out)
+            avctx->stats_out[0] = '\0';
+        if (av_image_check_size2(avctx->width, avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx))
+            return AVERROR(EINVAL);
+    }
+
+    if (ffcodec(avctx->codec)->cb_type == FF_CODEC_CB_TYPE_RECEIVE_PACKET) {
+        ret = ffcodec(avctx->codec)->cb.receive_packet(avctx, avpkt);
+        if (ret < 0)
+            av_packet_unref(avpkt);
+        else
+            // Encoders must always return ref-counted buffers.
+            // Side-data only packets have no data and can be not ref-counted.
+            av_assert0(!avpkt->data || avpkt->buf);
+    } else
+        ret = encode_simple_receive_packet(avctx, avpkt);
+    if (ret >= 0)
+        avpkt->flags |= avci->intra_only_flag;
+
+    if (ret == AVERROR_EOF)
+        avci->draining_done = 1;
+
+    return ret;
+}
+
+#if CONFIG_LCMS2
+static int encode_generate_icc_profile(AVCodecContext *avctx, AVFrame *frame)
+{
+    enum AVColorTransferCharacteristic trc = frame->color_trc;
+    enum AVColorPrimaries prim = frame->color_primaries;
+    const FFCodec *const codec = ffcodec(avctx->codec);
+    AVCodecInternal *avci = avctx->internal;
+    cmsHPROFILE profile;
+    int ret;
+
+    /* don't generate ICC profiles if disabled or unsupported */
+    if (!(avctx->flags2 & AV_CODEC_FLAG2_ICC_PROFILES))
+        return 0;
+    if (!(codec->caps_internal & FF_CODEC_CAP_ICC_PROFILES))
+        return 0;
+
+    if (trc == AVCOL_TRC_UNSPECIFIED)
+        trc = avctx->color_trc;
+    if (prim == AVCOL_PRI_UNSPECIFIED)
+        prim = avctx->color_primaries;
+    if (trc == AVCOL_TRC_UNSPECIFIED || prim == AVCOL_PRI_UNSPECIFIED)
+        return 0; /* can't generate ICC profile with missing csp tags */
+
+    if (av_frame_get_side_data(frame, AV_FRAME_DATA_ICC_PROFILE))
+        return 0; /* don't overwrite existing ICC profile */
+
+    if (!avci->icc.avctx) {
+        ret = ff_icc_context_init(&avci->icc, avctx);
+        if (ret < 0)
+            return ret;
+    }
+
+    ret = ff_icc_profile_generate(&avci->icc, prim, trc, &profile);
+    if (ret < 0)
+        return ret;
+
+    ret = ff_icc_profile_attach(&avci->icc, profile, frame);
+    cmsCloseProfile(profile);
+    return ret;
+}
+#else /* !CONFIG_LCMS2 */
+static int encode_generate_icc_profile(av_unused AVCodecContext *c, av_unused AVFrame *f)
+{
+    return 0;
+}
+#endif
+
+static int encode_send_frame_internal(AVCodecContext *avctx, const AVFrame *src)
+{
+    AVCodecInternal *avci = avctx->internal;
+    AVFrame *dst = avci->buffer_frame;
+    int ret;
+
+    if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
+        /* extract audio service type metadata */
+        AVFrameSideData *sd = av_frame_get_side_data(src, AV_FRAME_DATA_AUDIO_SERVICE_TYPE);
+        if (sd && sd->size >= sizeof(enum AVAudioServiceType))
+            avctx->audio_service_type = *(enum AVAudioServiceType*)sd->data;
+
+        /* check for valid frame size */
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)) {
+            /* if we already got an undersized frame, that must have been the last */
+            if (avctx->internal->last_audio_frame) {
+                av_log(avctx, AV_LOG_ERROR, "frame_size (%d) was not respected for a non-last frame\n", avctx->frame_size);
+                return AVERROR(EINVAL);
+            }
+            if (src->nb_samples > avctx->frame_size) {
+                av_log(avctx, AV_LOG_ERROR, "nb_samples (%d) > frame_size (%d)\n", src->nb_samples, avctx->frame_size);
+                return AVERROR(EINVAL);
+            }
+            if (src->nb_samples < avctx->frame_size) {
+                avctx->internal->last_audio_frame = 1;
+                if (!(avctx->codec->capabilities & AV_CODEC_CAP_SMALL_LAST_FRAME)) {
+                    int pad_samples = avci->pad_samples ? avci->pad_samples : avctx->frame_size;
+                    int out_samples = (src->nb_samples + pad_samples - 1) / pad_samples * pad_samples;
+
+                    if (out_samples != src->nb_samples) {
+                        ret = pad_last_frame(avctx, dst, src, out_samples);
+                        if (ret < 0)
+                            return ret;
+                        goto finish;
+                    }
+                }
+            }
+        }
+    }
+
+    ret = av_frame_ref(dst, src);
+    if (ret < 0)
+        return ret;
+
+finish:
+
+#if FF_API_PKT_DURATION
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (dst->pkt_duration && dst->pkt_duration != dst->duration)
+        dst->duration = dst->pkt_duration;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        ret = encode_generate_icc_profile(avctx, dst);
+        if (ret < 0)
+            return ret;
+    }
+
+    // unset frame duration unless AV_CODEC_FLAG_FRAME_DURATION is set,
+    // since otherwise we cannot be sure that whatever value it has is in the
+    // right timebase, so we would produce an incorrect value, which is worse
+    // than none at all
+    if (!(avctx->flags & AV_CODEC_FLAG_FRAME_DURATION))
+        dst->duration = 0;
+
+    return 0;
+}
+
+int attribute_align_arg avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame)
+{
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
+    if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
+        return AVERROR(EINVAL);
+
+    if (avci->draining)
+        return AVERROR_EOF;
+
+    if (avci->buffer_frame->buf[0])
+        return AVERROR(EAGAIN);
+
+    if (!frame) {
+        avci->draining = 1;
+    } else {
+        ret = encode_send_frame_internal(avctx, frame);
+        if (ret < 0)
+            return ret;
+    }
+
+    if (!avci->buffer_pkt->data && !avci->buffer_pkt->side_data) {
+        ret = encode_receive_packet_internal(avctx, avci->buffer_pkt);
+        if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
+            return ret;
+    }
+
+    avctx->frame_num++;
+#if FF_API_AVCTX_FRAME_NUMBER
+FF_DISABLE_DEPRECATION_WARNINGS
+    avctx->frame_number = avctx->frame_num;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    return 0;
+}
+
+int attribute_align_arg avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
+    av_packet_unref(avpkt);
+
+    if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
+        return AVERROR(EINVAL);
+
+    if (avci->buffer_pkt->data || avci->buffer_pkt->side_data) {
+        av_packet_move_ref(avpkt, avci->buffer_pkt);
+    } else {
+        ret = encode_receive_packet_internal(avctx, avpkt);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static int encode_preinit_video(AVCodecContext *avctx)
+{
+    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(avctx->pix_fmt);
+    int i;
+
+    if (avctx->codec->pix_fmts) {
+        for (i = 0; avctx->codec->pix_fmts[i] != AV_PIX_FMT_NONE; i++)
+            if (avctx->pix_fmt == avctx->codec->pix_fmts[i])
+                break;
+        if (avctx->codec->pix_fmts[i] == AV_PIX_FMT_NONE) {
+            char buf[128];
+            snprintf(buf, sizeof(buf), "%d", avctx->pix_fmt);
+            av_log(avctx, AV_LOG_ERROR, "Specified pixel format %s is invalid or not supported\n",
+                   (char *)av_x_if_null(av_get_pix_fmt_name(avctx->pix_fmt), buf));
+            return AVERROR(EINVAL);
+        }
+        if (avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ420P ||
+            avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ411P ||
+            avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ422P ||
+            avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ440P ||
+            avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ444P)
+            avctx->color_range = AVCOL_RANGE_JPEG;
+    }
+
+    if (    avctx->bits_per_raw_sample < 0
+        || (avctx->bits_per_raw_sample > 8 && pixdesc->comp[0].depth <= 8)) {
+        av_log(avctx, AV_LOG_WARNING, "Specified bit depth %d not possible with the specified pixel formats depth %d\n",
+            avctx->bits_per_raw_sample, pixdesc->comp[0].depth);
+        avctx->bits_per_raw_sample = pixdesc->comp[0].depth;
+    }
+    if (avctx->width <= 0 || avctx->height <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "dimensions not set\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->ticks_per_frame && avctx->time_base.num &&
+        avctx->ticks_per_frame > INT_MAX / avctx->time_base.num) {
+        av_log(avctx, AV_LOG_ERROR,
+               "ticks_per_frame %d too large for the timebase %d/%d.",
+               avctx->ticks_per_frame,
+               avctx->time_base.num,
+               avctx->time_base.den);
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->hw_frames_ctx) {
+        AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+        if (frames_ctx->format != avctx->pix_fmt) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Mismatching AVCodecContext.pix_fmt and AVHWFramesContext.format\n");
+            return AVERROR(EINVAL);
+        }
+        if (avctx->sw_pix_fmt != AV_PIX_FMT_NONE &&
+            avctx->sw_pix_fmt != frames_ctx->sw_format) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Mismatching AVCodecContext.sw_pix_fmt (%s) "
+                   "and AVHWFramesContext.sw_format (%s)\n",
+                   av_get_pix_fmt_name(avctx->sw_pix_fmt),
+                   av_get_pix_fmt_name(frames_ctx->sw_format));
+            return AVERROR(EINVAL);
+        }
+        avctx->sw_pix_fmt = frames_ctx->sw_format;
+    }
+
+    return 0;
+}
+
+static int encode_preinit_audio(AVCodecContext *avctx)
+{
+    int i;
+
+    if (avctx->codec->sample_fmts) {
+        for (i = 0; avctx->codec->sample_fmts[i] != AV_SAMPLE_FMT_NONE; i++) {
+            if (avctx->sample_fmt == avctx->codec->sample_fmts[i])
+                break;
+            if (avctx->ch_layout.nb_channels == 1 &&
+                av_get_planar_sample_fmt(avctx->sample_fmt) ==
+                av_get_planar_sample_fmt(avctx->codec->sample_fmts[i])) {
+                avctx->sample_fmt = avctx->codec->sample_fmts[i];
+                break;
+            }
+        }
+        if (avctx->codec->sample_fmts[i] == AV_SAMPLE_FMT_NONE) {
+            char buf[128];
+            snprintf(buf, sizeof(buf), "%d", avctx->sample_fmt);
+            av_log(avctx, AV_LOG_ERROR, "Specified sample format %s is invalid or not supported\n",
+                   (char *)av_x_if_null(av_get_sample_fmt_name(avctx->sample_fmt), buf));
+            return AVERROR(EINVAL);
+        }
+    }
+    if (avctx->codec->supported_samplerates) {
+        for (i = 0; avctx->codec->supported_samplerates[i] != 0; i++)
+            if (avctx->sample_rate == avctx->codec->supported_samplerates[i])
+                break;
+        if (avctx->codec->supported_samplerates[i] == 0) {
+            av_log(avctx, AV_LOG_ERROR, "Specified sample rate %d is not supported\n",
+                   avctx->sample_rate);
+            return AVERROR(EINVAL);
+        }
+    }
+    if (avctx->sample_rate < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Specified sample rate %d is not supported\n",
+                avctx->sample_rate);
+        return AVERROR(EINVAL);
+    }
+    if (avctx->codec->ch_layouts) {
+        for (i = 0; avctx->codec->ch_layouts[i].nb_channels; i++) {
+            if (!av_channel_layout_compare(&avctx->ch_layout, &avctx->codec->ch_layouts[i]))
+                break;
+        }
+        if (!avctx->codec->ch_layouts[i].nb_channels) {
+            char buf[512];
+            int ret = av_channel_layout_describe(&avctx->ch_layout, buf, sizeof(buf));
+            if (ret > 0)
+                av_log(avctx, AV_LOG_ERROR, "Specified channel layout '%s' is not supported\n", buf);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    if (!avctx->bits_per_raw_sample)
+        avctx->bits_per_raw_sample = 8 * av_get_bytes_per_sample(avctx->sample_fmt);
+
+    return 0;
+}
+
+int ff_encode_preinit(AVCodecContext *avctx)
+{
+    AVCodecInternal *avci = avctx->internal;
+    int ret = 0;
+
+    if (avctx->time_base.num <= 0 || avctx->time_base.den <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "The encoder timebase is not set.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE &&
+        !(avctx->codec->capabilities & AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE)) {
+        av_log(avctx, AV_LOG_ERROR, "The copy_opaque flag is set, but the "
+               "encoder does not support it.\n");
+        return AVERROR(EINVAL);
+    }
+
+    switch (avctx->codec_type) {
+    case AVMEDIA_TYPE_VIDEO: ret = encode_preinit_video(avctx); break;
+    case AVMEDIA_TYPE_AUDIO: ret = encode_preinit_audio(avctx); break;
+    }
+    if (ret < 0)
+        return ret;
+
+    if (   (avctx->codec_type == AVMEDIA_TYPE_VIDEO || avctx->codec_type == AVMEDIA_TYPE_AUDIO)
+        && avctx->bit_rate>0 && avctx->bit_rate<1000) {
+        av_log(avctx, AV_LOG_WARNING, "Bitrate %"PRId64" is extremely low, maybe you mean %"PRId64"k\n", avctx->bit_rate, avctx->bit_rate);
+    }
+
+    if (!avctx->rc_initial_buffer_occupancy)
+        avctx->rc_initial_buffer_occupancy = avctx->rc_buffer_size * 3LL / 4;
+
+    if (avctx->codec_descriptor->props & AV_CODEC_PROP_INTRA_ONLY)
+        avctx->internal->intra_only_flag = AV_PKT_FLAG_KEY;
+
+    if (ffcodec(avctx->codec)->cb_type == FF_CODEC_CB_TYPE_ENCODE) {
+        avci->in_frame = av_frame_alloc();
+        if (!avci->in_frame)
+            return AVERROR(ENOMEM);
+    }
+
+    if ((avctx->flags & AV_CODEC_FLAG_RECON_FRAME)) {
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_ENCODER_RECON_FRAME)) {
+            av_log(avctx, AV_LOG_ERROR, "Reconstructed frame output requested "
+                   "from an encoder not supporting it\n");
+            return AVERROR(ENOSYS);
+        }
+
+        avci->recon_frame = av_frame_alloc();
+        if (!avci->recon_frame)
+            return AVERROR(ENOMEM);
+    }
+
+    if (CONFIG_FRAME_THREAD_ENCODER) {
+        ret = ff_frame_thread_encoder_init(avctx);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+int ff_encode_alloc_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    int ret;
+
+    switch (avctx->codec->type) {
+    case AVMEDIA_TYPE_VIDEO:
+        frame->format = avctx->pix_fmt;
+        if (frame->width <= 0 || frame->height <= 0) {
+            frame->width  = FFMAX(avctx->width,  avctx->coded_width);
+            frame->height = FFMAX(avctx->height, avctx->coded_height);
+        }
+
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        frame->sample_rate = avctx->sample_rate;
+        frame->format      = avctx->sample_fmt;
+        if (!frame->ch_layout.nb_channels) {
+            ret = av_channel_layout_copy(&frame->ch_layout, &avctx->ch_layout);
+            if (ret < 0)
+                return ret;
+        }
+        break;
+    }
+
+    ret = avcodec_default_get_buffer2(avctx, frame, 0);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        av_frame_unref(frame);
+        return ret;
+    }
+
+    return 0;
+}
+
+int ff_encode_receive_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    AVCodecInternal *avci = avctx->internal;
+
+    if (!avci->recon_frame)
+        return AVERROR(EINVAL);
+    if (!avci->recon_frame->buf[0])
+        return avci->draining_done ? AVERROR_EOF : AVERROR(EAGAIN);
+
+    av_frame_move_ref(frame, avci->recon_frame);
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/encode.h b/media/ffvpx/libavcodec/encode.h
new file mode 100644
index 0000000000..26a3304045
--- /dev/null
+++ b/media/ffvpx/libavcodec/encode.h
@@ -0,0 +1,99 @@
+/*
+ * generic encoding-related code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ENCODE_H
+#define AVCODEC_ENCODE_H
+
+#include "libavutil/frame.h"
+
+#include "avcodec.h"
+#include "packet.h"
+
+/**
+ * avcodec_receive_frame() implementation for encoders.
+ */
+int ff_encode_receive_frame(AVCodecContext *avctx, AVFrame *frame);
+
+/**
+ * Called by encoders to get the next frame for encoding.
+ *
+ * @param frame An empty frame to be filled with data.
+ * @return 0 if a new reference has been successfully written to frame
+ *         AVERROR(EAGAIN) if no data is currently available
+ *         AVERROR_EOF if end of stream has been reached, so no more data
+ *                     will be available
+ */
+int ff_encode_get_frame(AVCodecContext *avctx, AVFrame *frame);
+
+/**
+ * Get a buffer for a packet. This is a wrapper around
+ * AVCodecContext.get_encode_buffer() and should be used instead calling get_encode_buffer()
+ * directly.
+ */
+int ff_get_encode_buffer(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int flags);
+
+/**
+ * Allocate buffers for a frame. Encoder equivalent to ff_get_buffer().
+ */
+int ff_encode_alloc_frame(AVCodecContext *avctx, AVFrame *frame);
+
+/**
+ * Check AVPacket size and allocate data.
+ *
+ * Encoders of type FF_CODEC_CB_TYPE_ENCODE can use this as a convenience to
+ * obtain a big enough buffer for the encoded bitstream.
+ *
+ * @param avctx   the AVCodecContext of the encoder
+ * @param avpkt   The AVPacket: on success, avpkt->data will point to a buffer
+ *                of size at least `size`; the packet will not be refcounted.
+ *                This packet must be initially blank.
+ * @param size    an upper bound of the size of the packet to encode
+ * @return        non negative on success, negative error code on failure
+ */
+int ff_alloc_packet(AVCodecContext *avctx, AVPacket *avpkt, int64_t size);
+
+/**
+ * Propagate user opaque values from the frame to avctx/pkt as needed.
+ */
+int ff_encode_reordered_opaque(AVCodecContext *avctx,
+                               AVPacket *pkt, const AVFrame *frame);
+
+/*
+ * Perform encoder initialization and validation.
+ * Called when opening the encoder, before the FFCodec.init() call.
+ */
+int ff_encode_preinit(AVCodecContext *avctx);
+
+int ff_encode_encode_cb(AVCodecContext *avctx, AVPacket *avpkt,
+                        AVFrame *frame, int *got_packet);
+
+/**
+ * Rescale from sample rate to AVCodecContext.time_base.
+ */
+static av_always_inline int64_t ff_samples_to_time_base(const AVCodecContext *avctx,
+                                                        int64_t samples)
+{
+    if (samples == AV_NOPTS_VALUE)
+        return AV_NOPTS_VALUE;
+    return av_rescale_q(samples, (AVRational){ 1, avctx->sample_rate },
+                        avctx->time_base);
+}
+
+#endif /* AVCODEC_ENCODE_H */
diff --git a/media/ffvpx/libavcodec/error_resilience.h b/media/ffvpx/libavcodec/error_resilience.h
new file mode 100644
index 0000000000..47cc8a4fc6
--- /dev/null
+++ b/media/ffvpx/libavcodec/error_resilience.h
@@ -0,0 +1,97 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ERROR_RESILIENCE_H
+#define AVCODEC_ERROR_RESILIENCE_H
+
+#include <stdint.h>
+#include <stdatomic.h>
+
+#include "avcodec.h"
+#include "me_cmp.h"
+#include "threadframe.h"
+
+///< current MB is the first after a resync marker
+#define VP_START               1
+#define ER_AC_ERROR            2
+#define ER_DC_ERROR            4
+#define ER_MV_ERROR            8
+#define ER_AC_END              16
+#define ER_DC_END              32
+#define ER_MV_END              64
+
+#define ER_MB_ERROR (ER_AC_ERROR|ER_DC_ERROR|ER_MV_ERROR)
+#define ER_MB_END   (ER_AC_END|ER_DC_END|ER_MV_END)
+
+typedef struct ERPicture {
+    AVFrame *f;
+    ThreadFrame *tf;
+
+    // it is the caller's responsibility to allocate these buffers
+    int16_t (*motion_val[2])[2];
+    int8_t *ref_index[2];
+
+    uint32_t *mb_type;
+    int field_picture;
+} ERPicture;
+
+typedef struct ERContext {
+    AVCodecContext *avctx;
+
+    me_cmp_func sad;
+    int mecc_inited;
+
+    int *mb_index2xy;
+    int mb_num;
+    int mb_width, mb_height;
+    ptrdiff_t mb_stride;
+    ptrdiff_t b8_stride;
+
+    atomic_int error_count;
+    int error_occurred;
+    uint8_t *error_status_table;
+    uint8_t *er_temp_buffer;
+    int16_t *dc_val[3];
+    uint8_t *mbskip_table;
+    uint8_t *mbintra_table;
+    int mv[2][4][2];
+
+    ERPicture cur_pic;
+    ERPicture last_pic;
+    ERPicture next_pic;
+
+    int8_t *ref_index[2];
+    int16_t (*motion_val_base[2])[2];
+
+    uint16_t pp_time;
+    uint16_t pb_time;
+    int quarter_sample;
+    int partitioned_frame;
+
+    void (*decode_mb)(void *opaque, int ref, int mv_dir, int mv_type,
+                      int (*mv)[2][4][2],
+                      int mb_x, int mb_y, int mb_intra, int mb_skipped);
+    void *opaque;
+} ERContext;
+
+void ff_er_frame_start(ERContext *s);
+void ff_er_frame_end(ERContext *s);
+void ff_er_add_slice(ERContext *s, int startx, int starty, int endx, int endy,
+                     int status);
+
+#endif /* AVCODEC_ERROR_RESILIENCE_H */
diff --git a/media/ffvpx/libavcodec/faandct.c b/media/ffvpx/libavcodec/faandct.c
new file mode 100644
index 0000000000..38c392bbae
--- /dev/null
+++ b/media/ffvpx/libavcodec/faandct.c
@@ -0,0 +1,215 @@
+/*
+ * Floating point AAN DCT
+ * this implementation is based upon the IJG integer AAN DCT (see jfdctfst.c)
+ *
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2003 Roman Shaposhnik
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/**
+ * @file
+ * @brief
+ *     Floating point AAN DCT
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "faandct.h"
+#include "libavutil/internal.h"
+#include "libavutil/libm.h"
+
+typedef float FLOAT;
+
+/* numbers generated by arbitrary precision arithmetic followed by truncation
+to 36 fractional digits (enough for a 128-bit IEEE quad, see /usr/include/math.h
+for this approach). Unfortunately, long double is not always available correctly,
+e.g ppc has issues.
+TODO: add L suffixes when ppc and toolchains sort out their stuff.
+*/
+#define B0 1.000000000000000000000000000000000000
+#define B1 0.720959822006947913789091890943021267 // (cos(pi*1/16)sqrt(2))^-1
+#define B2 0.765366864730179543456919968060797734 // (cos(pi*2/16)sqrt(2))^-1
+#define B3 0.850430094767256448766702844371412325 // (cos(pi*3/16)sqrt(2))^-1
+#define B4 1.000000000000000000000000000000000000 // (cos(pi*4/16)sqrt(2))^-1
+#define B5 1.272758580572833938461007018281767032 // (cos(pi*5/16)sqrt(2))^-1
+#define B6 1.847759065022573512256366378793576574 // (cos(pi*6/16)sqrt(2))^-1
+#define B7 3.624509785411551372409941227504289587 // (cos(pi*7/16)sqrt(2))^-1
+
+#define A1 M_SQRT1_2              // cos(pi*4/16)
+#define A2 0.54119610014619698435 // cos(pi*6/16)sqrt(2)
+#define A5 0.38268343236508977170 // cos(pi*6/16)
+#define A4 1.30656296487637652774 // cos(pi*2/16)sqrt(2)
+
+static const FLOAT postscale[64]={
+B0*B0, B0*B1, B0*B2, B0*B3, B0*B4, B0*B5, B0*B6, B0*B7,
+B1*B0, B1*B1, B1*B2, B1*B3, B1*B4, B1*B5, B1*B6, B1*B7,
+B2*B0, B2*B1, B2*B2, B2*B3, B2*B4, B2*B5, B2*B6, B2*B7,
+B3*B0, B3*B1, B3*B2, B3*B3, B3*B4, B3*B5, B3*B6, B3*B7,
+B4*B0, B4*B1, B4*B2, B4*B3, B4*B4, B4*B5, B4*B6, B4*B7,
+B5*B0, B5*B1, B5*B2, B5*B3, B5*B4, B5*B5, B5*B6, B5*B7,
+B6*B0, B6*B1, B6*B2, B6*B3, B6*B4, B6*B5, B6*B6, B6*B7,
+B7*B0, B7*B1, B7*B2, B7*B3, B7*B4, B7*B5, B7*B6, B7*B7,
+};
+
+static av_always_inline void row_fdct(FLOAT temp[64], int16_t *data)
+{
+    FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    FLOAT tmp10, tmp11, tmp12, tmp13;
+    FLOAT z2, z4, z11, z13;
+    int i;
+
+    for (i=0; i<8*8; i+=8) {
+        tmp0= data[0 + i] + data[7 + i];
+        tmp7= data[0 + i] - data[7 + i];
+        tmp1= data[1 + i] + data[6 + i];
+        tmp6= data[1 + i] - data[6 + i];
+        tmp2= data[2 + i] + data[5 + i];
+        tmp5= data[2 + i] - data[5 + i];
+        tmp3= data[3 + i] + data[4 + i];
+        tmp4= data[3 + i] - data[4 + i];
+
+        tmp10= tmp0 + tmp3;
+        tmp13= tmp0 - tmp3;
+        tmp11= tmp1 + tmp2;
+        tmp12= tmp1 - tmp2;
+
+        temp[0 + i]= tmp10 + tmp11;
+        temp[4 + i]= tmp10 - tmp11;
+
+        tmp12 += tmp13;
+        tmp12 *= A1;
+        temp[2 + i]= tmp13 + tmp12;
+        temp[6 + i]= tmp13 - tmp12;
+
+        tmp4 += tmp5;
+        tmp5 += tmp6;
+        tmp6 += tmp7;
+
+        z2= tmp4*(A2+A5) - tmp6*A5;
+        z4= tmp6*(A4-A5) + tmp4*A5;
+
+        tmp5*=A1;
+
+        z11= tmp7 + tmp5;
+        z13= tmp7 - tmp5;
+
+        temp[5 + i]= z13 + z2;
+        temp[3 + i]= z13 - z2;
+        temp[1 + i]= z11 + z4;
+        temp[7 + i]= z11 - z4;
+    }
+}
+
+void ff_faandct(int16_t *data)
+{
+    FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    FLOAT tmp10, tmp11, tmp12, tmp13;
+    FLOAT z2, z4, z11, z13;
+    FLOAT temp[64];
+    int i;
+
+    emms_c();
+
+    row_fdct(temp, data);
+
+    for (i=0; i<8; i++) {
+        tmp0= temp[8*0 + i] + temp[8*7 + i];
+        tmp7= temp[8*0 + i] - temp[8*7 + i];
+        tmp1= temp[8*1 + i] + temp[8*6 + i];
+        tmp6= temp[8*1 + i] - temp[8*6 + i];
+        tmp2= temp[8*2 + i] + temp[8*5 + i];
+        tmp5= temp[8*2 + i] - temp[8*5 + i];
+        tmp3= temp[8*3 + i] + temp[8*4 + i];
+        tmp4= temp[8*3 + i] - temp[8*4 + i];
+
+        tmp10= tmp0 + tmp3;
+        tmp13= tmp0 - tmp3;
+        tmp11= tmp1 + tmp2;
+        tmp12= tmp1 - tmp2;
+
+        data[8*0 + i]= lrintf(postscale[8*0 + i] * (tmp10 + tmp11));
+        data[8*4 + i]= lrintf(postscale[8*4 + i] * (tmp10 - tmp11));
+
+        tmp12 += tmp13;
+        tmp12 *= A1;
+        data[8*2 + i]= lrintf(postscale[8*2 + i] * (tmp13 + tmp12));
+        data[8*6 + i]= lrintf(postscale[8*6 + i] * (tmp13 - tmp12));
+
+        tmp4 += tmp5;
+        tmp5 += tmp6;
+        tmp6 += tmp7;
+
+        z2= tmp4*(A2+A5) - tmp6*A5;
+        z4= tmp6*(A4-A5) + tmp4*A5;
+
+        tmp5*=A1;
+
+        z11= tmp7 + tmp5;
+        z13= tmp7 - tmp5;
+
+        data[8*5 + i]= lrintf(postscale[8*5 + i] * (z13 + z2));
+        data[8*3 + i]= lrintf(postscale[8*3 + i] * (z13 - z2));
+        data[8*1 + i]= lrintf(postscale[8*1 + i] * (z11 + z4));
+        data[8*7 + i]= lrintf(postscale[8*7 + i] * (z11 - z4));
+    }
+}
+
+void ff_faandct248(int16_t *data)
+{
+    FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    FLOAT tmp10, tmp11, tmp12, tmp13;
+    FLOAT temp[64];
+    int i;
+
+    emms_c();
+
+    row_fdct(temp, data);
+
+    for (i=0; i<8; i++) {
+        tmp0 = temp[8*0 + i] + temp[8*1 + i];
+        tmp1 = temp[8*2 + i] + temp[8*3 + i];
+        tmp2 = temp[8*4 + i] + temp[8*5 + i];
+        tmp3 = temp[8*6 + i] + temp[8*7 + i];
+        tmp4 = temp[8*0 + i] - temp[8*1 + i];
+        tmp5 = temp[8*2 + i] - temp[8*3 + i];
+        tmp6 = temp[8*4 + i] - temp[8*5 + i];
+        tmp7 = temp[8*6 + i] - temp[8*7 + i];
+
+        tmp10 = tmp0 + tmp3;
+        tmp11 = tmp1 + tmp2;
+        tmp12 = tmp1 - tmp2;
+        tmp13 = tmp0 - tmp3;
+
+        data[8*0 + i] = lrintf(postscale[8*0 + i] * (tmp10 + tmp11));
+        data[8*4 + i] = lrintf(postscale[8*4 + i] * (tmp10 - tmp11));
+
+        tmp12 += tmp13;
+        tmp12 *= A1;
+        data[8*2 + i] = lrintf(postscale[8*2 + i] * (tmp13 + tmp12));
+        data[8*6 + i] = lrintf(postscale[8*6 + i] * (tmp13 - tmp12));
+
+        tmp10 = tmp4 + tmp7;
+        tmp11 = tmp5 + tmp6;
+        tmp12 = tmp5 - tmp6;
+        tmp13 = tmp4 - tmp7;
+
+        data[8*1 + i] = lrintf(postscale[8*0 + i] * (tmp10 + tmp11));
+        data[8*5 + i] = lrintf(postscale[8*4 + i] * (tmp10 - tmp11));
+
+        tmp12 += tmp13;
+        tmp12 *= A1;
+        data[8*3 + i] = lrintf(postscale[8*2 + i] * (tmp13 + tmp12));
+        data[8*7 + i] = lrintf(postscale[8*6 + i] * (tmp13 - tmp12));
+    }
+}
diff --git a/media/ffvpx/libavcodec/faandct.h b/media/ffvpx/libavcodec/faandct.h
new file mode 100644
index 0000000000..c5ef96dcf1
--- /dev/null
+++ b/media/ffvpx/libavcodec/faandct.h
@@ -0,0 +1,37 @@
+/*
+ * Floating point AAN DCT
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * @brief
+ *     Floating point AAN DCT
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#ifndef AVCODEC_FAANDCT_H
+#define AVCODEC_FAANDCT_H
+
+#include <stdint.h>
+
+void ff_faandct(int16_t *data);
+void ff_faandct248(int16_t *data);
+
+#endif /* AVCODEC_FAANDCT_H */
diff --git a/media/ffvpx/libavcodec/faanidct.c b/media/ffvpx/libavcodec/faanidct.c
new file mode 100644
index 0000000000..3921f82dae
--- /dev/null
+++ b/media/ffvpx/libavcodec/faanidct.c
@@ -0,0 +1,166 @@
+/*
+ * Floating point AAN IDCT
+ * Copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "faanidct.h"
+#include "libavutil/common.h"
+
+/* To allow switching to double. */
+typedef float FLOAT;
+
+#define B0 1.0000000000000000000000
+#define B1 1.3870398453221474618216 // cos(pi*1/16)sqrt(2)
+#define B2 1.3065629648763765278566 // cos(pi*2/16)sqrt(2)
+#define B3 1.1758756024193587169745 // cos(pi*3/16)sqrt(2)
+#define B4 1.0000000000000000000000 // cos(pi*4/16)sqrt(2)
+#define B5 0.7856949583871021812779 // cos(pi*5/16)sqrt(2)
+#define B6 0.5411961001461969843997 // cos(pi*6/16)sqrt(2)
+#define B7 0.2758993792829430123360 // cos(pi*7/16)sqrt(2)
+
+#define A4 0.70710678118654752438 // cos(pi*4/16)
+#define A2 0.92387953251128675613 // cos(pi*2/16)
+
+static const FLOAT prescale[64]={
+B0*B0/8, B0*B1/8, B0*B2/8, B0*B3/8, B0*B4/8, B0*B5/8, B0*B6/8, B0*B7/8,
+B1*B0/8, B1*B1/8, B1*B2/8, B1*B3/8, B1*B4/8, B1*B5/8, B1*B6/8, B1*B7/8,
+B2*B0/8, B2*B1/8, B2*B2/8, B2*B3/8, B2*B4/8, B2*B5/8, B2*B6/8, B2*B7/8,
+B3*B0/8, B3*B1/8, B3*B2/8, B3*B3/8, B3*B4/8, B3*B5/8, B3*B6/8, B3*B7/8,
+B4*B0/8, B4*B1/8, B4*B2/8, B4*B3/8, B4*B4/8, B4*B5/8, B4*B6/8, B4*B7/8,
+B5*B0/8, B5*B1/8, B5*B2/8, B5*B3/8, B5*B4/8, B5*B5/8, B5*B6/8, B5*B7/8,
+B6*B0/8, B6*B1/8, B6*B2/8, B6*B3/8, B6*B4/8, B6*B5/8, B6*B6/8, B6*B7/8,
+B7*B0/8, B7*B1/8, B7*B2/8, B7*B3/8, B7*B4/8, B7*B5/8, B7*B6/8, B7*B7/8,
+};
+
+static inline void p8idct(int16_t data[64], FLOAT temp[64], uint8_t *dest,
+                          ptrdiff_t stride, int x, int y, int type)
+{
+    int i;
+    FLOAT s04, d04, s17, d17, s26, d26, s53, d53;
+    FLOAT os07, os16, os25, os34;
+    FLOAT od07, od16, od25, od34;
+
+    for(i=0; i<y*8; i+=y){
+        s17= temp[1*x + i] + temp[7*x + i];
+        d17= temp[1*x + i] - temp[7*x + i];
+        s53= temp[5*x + i] + temp[3*x + i];
+        d53= temp[5*x + i] - temp[3*x + i];
+
+        od07=  s17 + s53;
+        od25= (s17 - s53)*(2*A4);
+
+        od34=  d17*(2*(B6-A2)) - d53*(2*A2);
+        od16=  d53*(2*(A2-B2)) + d17*(2*A2);
+
+        od16 -= od07;
+        od25 -= od16;
+        od34 += od25;
+
+        s26 = temp[2*x + i] + temp[6*x + i];
+        d26 = temp[2*x + i] - temp[6*x + i];
+        d26*= 2*A4;
+        d26-= s26;
+
+        s04= temp[0*x + i] + temp[4*x + i];
+        d04= temp[0*x + i] - temp[4*x + i];
+
+        os07= s04 + s26;
+        os34= s04 - s26;
+        os16= d04 + d26;
+        os25= d04 - d26;
+
+        if(type==0){
+            temp[0*x + i]= os07 + od07;
+            temp[7*x + i]= os07 - od07;
+            temp[1*x + i]= os16 + od16;
+            temp[6*x + i]= os16 - od16;
+            temp[2*x + i]= os25 + od25;
+            temp[5*x + i]= os25 - od25;
+            temp[3*x + i]= os34 - od34;
+            temp[4*x + i]= os34 + od34;
+        }else if(type==1){
+            data[0*x + i]= lrintf(os07 + od07);
+            data[7*x + i]= lrintf(os07 - od07);
+            data[1*x + i]= lrintf(os16 + od16);
+            data[6*x + i]= lrintf(os16 - od16);
+            data[2*x + i]= lrintf(os25 + od25);
+            data[5*x + i]= lrintf(os25 - od25);
+            data[3*x + i]= lrintf(os34 - od34);
+            data[4*x + i]= lrintf(os34 + od34);
+        }else if(type==2){
+            dest[0*stride + i]= av_clip_uint8(((int)dest[0*stride + i]) + lrintf(os07 + od07));
+            dest[7*stride + i]= av_clip_uint8(((int)dest[7*stride + i]) + lrintf(os07 - od07));
+            dest[1*stride + i]= av_clip_uint8(((int)dest[1*stride + i]) + lrintf(os16 + od16));
+            dest[6*stride + i]= av_clip_uint8(((int)dest[6*stride + i]) + lrintf(os16 - od16));
+            dest[2*stride + i]= av_clip_uint8(((int)dest[2*stride + i]) + lrintf(os25 + od25));
+            dest[5*stride + i]= av_clip_uint8(((int)dest[5*stride + i]) + lrintf(os25 - od25));
+            dest[3*stride + i]= av_clip_uint8(((int)dest[3*stride + i]) + lrintf(os34 - od34));
+            dest[4*stride + i]= av_clip_uint8(((int)dest[4*stride + i]) + lrintf(os34 + od34));
+        }else{
+            dest[0*stride + i]= av_clip_uint8(lrintf(os07 + od07));
+            dest[7*stride + i]= av_clip_uint8(lrintf(os07 - od07));
+            dest[1*stride + i]= av_clip_uint8(lrintf(os16 + od16));
+            dest[6*stride + i]= av_clip_uint8(lrintf(os16 - od16));
+            dest[2*stride + i]= av_clip_uint8(lrintf(os25 + od25));
+            dest[5*stride + i]= av_clip_uint8(lrintf(os25 - od25));
+            dest[3*stride + i]= av_clip_uint8(lrintf(os34 - od34));
+            dest[4*stride + i]= av_clip_uint8(lrintf(os34 + od34));
+        }
+    }
+}
+
+void ff_faanidct(int16_t block[64]){
+    FLOAT temp[64];
+    int i;
+
+    emms_c();
+
+    for(i=0; i<64; i++)
+        temp[i] = block[i] * prescale[i];
+
+    p8idct(block, temp, NULL, 0, 1, 8, 0);
+    p8idct(block, temp, NULL, 0, 8, 1, 1);
+}
+
+void ff_faanidct_add(uint8_t *dest, ptrdiff_t line_size, int16_t block[64])
+{
+    FLOAT temp[64];
+    int i;
+
+    emms_c();
+
+    for(i=0; i<64; i++)
+        temp[i] = block[i] * prescale[i];
+
+    p8idct(block, temp, NULL,         0, 1, 8, 0);
+    p8idct(NULL , temp, dest, line_size, 8, 1, 2);
+}
+
+void ff_faanidct_put(uint8_t *dest, ptrdiff_t line_size, int16_t block[64])
+{
+    FLOAT temp[64];
+    int i;
+
+    emms_c();
+
+    for(i=0; i<64; i++)
+        temp[i] = block[i] * prescale[i];
+
+    p8idct(block, temp, NULL,         0, 1, 8, 0);
+    p8idct(NULL , temp, dest, line_size, 8, 1, 3);
+}
diff --git a/media/ffvpx/libavcodec/faanidct.h b/media/ffvpx/libavcodec/faanidct.h
new file mode 100644
index 0000000000..6f4da67c1b
--- /dev/null
+++ b/media/ffvpx/libavcodec/faanidct.h
@@ -0,0 +1,32 @@
+/*
+ * Floating point AAN IDCT
+ * Copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FAANIDCT_H
+#define AVCODEC_FAANIDCT_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_faanidct(int16_t block[64]);
+void ff_faanidct_add(uint8_t *dest, ptrdiff_t line_size, int16_t block[64]);
+void ff_faanidct_put(uint8_t *dest, ptrdiff_t line_size, int16_t block[64]);
+
+#endif /* AVCODEC_FAANIDCT_H */
diff --git a/media/ffvpx/libavcodec/fdctdsp.c b/media/ffvpx/libavcodec/fdctdsp.c
new file mode 100644
index 0000000000..5306c9d047
--- /dev/null
+++ b/media/ffvpx/libavcodec/fdctdsp.c
@@ -0,0 +1,51 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "avcodec.h"
+#include "dct.h"
+#include "faandct.h"
+#include "fdctdsp.h"
+#include "config.h"
+
+av_cold void ff_fdctdsp_init(FDCTDSPContext *c, AVCodecContext *avctx)
+{
+    av_unused const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
+
+    if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) {
+        c->fdct    = ff_jpeg_fdct_islow_10;
+        c->fdct248 = ff_fdct248_islow_10;
+    } else if (avctx->dct_algo == FF_DCT_FASTINT) {
+        c->fdct    = ff_fdct_ifast;
+        c->fdct248 = ff_fdct_ifast248;
+#if CONFIG_FAANDCT
+    } else if (avctx->dct_algo == FF_DCT_FAAN) {
+        c->fdct    = ff_faandct;
+        c->fdct248 = ff_faandct248;
+#endif /* CONFIG_FAANDCT */
+    } else {
+        c->fdct    = ff_jpeg_fdct_islow_8; // slow/accurate/default
+        c->fdct248 = ff_fdct248_islow_8;
+    }
+
+#if ARCH_PPC
+    ff_fdctdsp_init_ppc(c, avctx, high_bit_depth);
+#elif ARCH_X86
+    ff_fdctdsp_init_x86(c, avctx, high_bit_depth);
+#endif
+}
diff --git a/media/ffvpx/libavcodec/fdctdsp.h b/media/ffvpx/libavcodec/fdctdsp.h
new file mode 100644
index 0000000000..3e1f683b9e
--- /dev/null
+++ b/media/ffvpx/libavcodec/fdctdsp.h
@@ -0,0 +1,37 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FDCTDSP_H
+#define AVCODEC_FDCTDSP_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+
+typedef struct FDCTDSPContext {
+    void (*fdct)(int16_t *block /* align 16 */);
+    void (*fdct248)(int16_t *block /* align 16 */);
+} FDCTDSPContext;
+
+void ff_fdctdsp_init(FDCTDSPContext *c, AVCodecContext *avctx);
+void ff_fdctdsp_init_ppc(FDCTDSPContext *c, AVCodecContext *avctx,
+                         unsigned high_bit_depth);
+void ff_fdctdsp_init_x86(FDCTDSPContext *c, AVCodecContext *avctx,
+                         unsigned high_bit_depth);
+
+#endif /* AVCODEC_FDCTDSP_H */
diff --git a/media/ffvpx/libavcodec/fdctdsp_init.c b/media/ffvpx/libavcodec/fdctdsp_init.c
new file mode 100644
index 0000000000..0cb5fd625b
--- /dev/null
+++ b/media/ffvpx/libavcodec/fdctdsp_init.c
@@ -0,0 +1,44 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/fdctdsp.h"
+#include "fdct.h"
+
+av_cold void ff_fdctdsp_init_x86(FDCTDSPContext *c, AVCodecContext *avctx,
+                                 unsigned high_bit_depth)
+{
+    int cpu_flags = av_get_cpu_flags();
+    const int dct_algo = avctx->dct_algo;
+
+    if (!high_bit_depth) {
+        if ((dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) {
+            if (INLINE_MMX(cpu_flags))
+                c->fdct = ff_fdct_mmx;
+
+            if (INLINE_MMXEXT(cpu_flags))
+                c->fdct = ff_fdct_mmxext;
+
+            if (INLINE_SSE2(cpu_flags))
+                c->fdct = ff_fdct_sse2;
+        }
+    }
+}
diff --git a/media/ffvpx/libavcodec/fft-internal.h b/media/ffvpx/libavcodec/fft-internal.h
new file mode 100644
index 0000000000..d89a3e38ca
--- /dev/null
+++ b/media/ffvpx/libavcodec/fft-internal.h
@@ -0,0 +1,62 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FFT_INTERNAL_H
+#define AVCODEC_FFT_INTERNAL_H
+
+#include "libavutil/mathematics.h"
+#include "fft.h"
+
+#if FFT_FLOAT
+
+#define FIX15(v) (v)
+#define sqrthalf (float)M_SQRT1_2
+
+#define BF(x, y, a, b) do {                     \
+        x = a - b;                              \
+        y = a + b;                              \
+    } while (0)
+
+#define CMUL(dre, dim, are, aim, bre, bim) do { \
+        (dre) = (are) * (bre) - (aim) * (bim);  \
+        (dim) = (are) * (bim) + (aim) * (bre);  \
+    } while (0)
+
+#else /* FFT_FLOAT */
+
+#define CMUL(dre, dim, are, aim, bre, bim) do {             \
+        int64_t accu;                                     \
+        (accu)  = (int64_t)(bre) * (are);                 \
+        (accu) -= (int64_t)(bim) * (aim);                 \
+        (dre)   = (int)(((accu) + 0x40000000) >> 31);       \
+        (accu)  = (int64_t)(bre) * (aim);                 \
+        (accu) += (int64_t)(bim) * (are);                 \
+        (dim)   = (int)(((accu) + 0x40000000) >> 31);       \
+    } while (0)
+
+#endif /* FFT_FLOAT */
+
+#define ff_imdct_calc_c FFT_NAME(ff_imdct_calc_c)
+#define ff_imdct_half_c FFT_NAME(ff_imdct_half_c)
+#define ff_mdct_calc_c  FFT_NAME(ff_mdct_calc_c)
+
+void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+
+#endif /* AVCODEC_FFT_INTERNAL_H */
diff --git a/media/ffvpx/libavcodec/fft.h b/media/ffvpx/libavcodec/fft.h
new file mode 100644
index 0000000000..d46e5a3f0b
--- /dev/null
+++ b/media/ffvpx/libavcodec/fft.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FFT_H
+#define AVCODEC_FFT_H
+
+#ifndef FFT_FLOAT
+#define FFT_FLOAT 1
+#endif
+
+#include <stdint.h>
+#include "config.h"
+
+#include "libavutil/attributes_internal.h"
+#include "libavutil/mem_internal.h"
+
+#if FFT_FLOAT
+
+#include "avfft.h"
+
+#define FFT_NAME(x) x
+
+typedef float FFTDouble;
+
+#else
+
+#define Q31(x) (int)((x)*2147483648.0 + 0.5)
+#define FFT_NAME(x) x ## _fixed_32
+
+typedef int32_t FFTSample;
+
+typedef struct FFTComplex {
+    FFTSample re, im;
+} FFTComplex;
+
+typedef int    FFTDouble;
+typedef struct FFTContext FFTContext;
+
+#endif /* FFT_FLOAT */
+
+typedef struct FFTDComplex {
+    FFTDouble re, im;
+} FFTDComplex;
+
+/* FFT computation */
+
+enum fft_permutation_type {
+    FF_FFT_PERM_DEFAULT,
+    FF_FFT_PERM_SWAP_LSBS,
+    FF_FFT_PERM_AVX,
+};
+
+enum mdct_permutation_type {
+    FF_MDCT_PERM_NONE,
+    FF_MDCT_PERM_INTERLEAVE,
+};
+
+struct FFTContext {
+    int nbits;
+    int inverse;
+    uint16_t *revtab;
+    FFTComplex *tmp_buf;
+    int mdct_size; /* size of MDCT (i.e. number of input data * 2) */
+    int mdct_bits; /* n = 2^nbits */
+    /* pre/post rotation tables */
+    FFTSample *tcos;
+    FFTSample *tsin;
+    /**
+     * Do the permutation needed BEFORE calling fft_calc().
+     */
+    void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
+    /**
+     * Do a complex FFT with the parameters defined in ff_fft_init(). The
+     * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+     */
+    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
+    void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    enum fft_permutation_type fft_permutation;
+    enum mdct_permutation_type mdct_permutation;
+    uint32_t *revtab32;
+};
+
+#if CONFIG_HARDCODED_TABLES
+#define COSTABLE_CONST const
+#define ff_init_ff_cos_tabs(index)
+#else
+#define COSTABLE_CONST
+#define ff_init_ff_cos_tabs FFT_NAME(ff_init_ff_cos_tabs)
+
+/**
+ * Initialize the cosine table in ff_cos_tabs[index]
+ * @param index index in ff_cos_tabs array of the table to initialize
+ */
+void ff_init_ff_cos_tabs(int index);
+#endif
+
+#define COSTABLE(size) \
+    COSTABLE_CONST attribute_visibility_hidden DECLARE_ALIGNED(32, FFTSample, FFT_NAME(ff_cos_##size))[size/2]
+
+extern COSTABLE(16);
+extern COSTABLE(32);
+extern COSTABLE(64);
+extern COSTABLE(128);
+extern COSTABLE(256);
+extern COSTABLE(512);
+extern COSTABLE(1024);
+extern COSTABLE(2048);
+extern COSTABLE(4096);
+extern COSTABLE(8192);
+extern COSTABLE(16384);
+extern COSTABLE(32768);
+extern COSTABLE(65536);
+extern COSTABLE(131072);
+extern COSTABLE_CONST FFTSample* const FFT_NAME(ff_cos_tabs)[18];
+
+#define ff_fft_init FFT_NAME(ff_fft_init)
+#define ff_fft_end  FFT_NAME(ff_fft_end)
+
+/**
+ * Set up a complex FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
+ */
+int ff_fft_init(FFTContext *s, int nbits, int inverse);
+
+void ff_fft_init_aarch64(FFTContext *s);
+void ff_fft_init_x86(FFTContext *s);
+void ff_fft_init_arm(FFTContext *s);
+void ff_fft_init_mips(FFTContext *s);
+void ff_fft_init_ppc(FFTContext *s);
+
+void ff_fft_end(FFTContext *s);
+
+#define ff_mdct_init FFT_NAME(ff_mdct_init)
+#define ff_mdct_end  FFT_NAME(ff_mdct_end)
+
+int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale);
+void ff_mdct_end(FFTContext *s);
+
+#endif /* AVCODEC_FFT_H */
diff --git a/media/ffvpx/libavcodec/fft_fixed_32.c b/media/ffvpx/libavcodec/fft_fixed_32.c
new file mode 100644
index 0000000000..e18dc83891
--- /dev/null
+++ b/media/ffvpx/libavcodec/fft_fixed_32.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Stanislav Ocovaj (socovaj@mips.com)
+ *           Goran Cordasic   (goran@mips.com)
+ *           Djordje Pesut    (djordje@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define FFT_FLOAT 0
+#include "fft_template.c"
diff --git a/media/ffvpx/libavcodec/fft_float.c b/media/ffvpx/libavcodec/fft_float.c
new file mode 100644
index 0000000000..a9fd01978d
--- /dev/null
+++ b/media/ffvpx/libavcodec/fft_float.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define FFT_FLOAT 1
+#include "fft_template.c"
diff --git a/media/ffvpx/libavcodec/fft_init_table.c b/media/ffvpx/libavcodec/fft_init_table.c
new file mode 100644
index 0000000000..83e35ffb7c
--- /dev/null
+++ b/media/ffvpx/libavcodec/fft_init_table.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Stanislav Ocovaj (socovaj@mips.com)
+ *           Goran Cordasic   (goran@mips.com)
+ *           Djordje Pesut    (djordje@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * definitions and initialization of LUT table for FFT
+ */
+#include "libavutil/thread.h"
+
+#include "libavcodec/fft_table.h"
+
+const int32_t ff_w_tab_sr[MAX_FFT_SIZE/(4*16)] = {
+2147483647, 2147483016, 2147481121, 2147477963, 2147473542, 2147467857, 2147460908, 2147452697,
+2147443222, 2147432484, 2147420483, 2147407218, 2147392690, 2147376899, 2147359845, 2147341527,
+2147321946, 2147301102, 2147278995, 2147255625, 2147230991, 2147205094, 2147177934, 2147149511,
+2147119825, 2147088876, 2147056664, 2147023188, 2146988450, 2146952448, 2146915184, 2146876656,
+2146836866, 2146795813, 2146753497, 2146709917, 2146665076, 2146618971, 2146571603, 2146522973,
+2146473080, 2146421924, 2146369505, 2146315824, 2146260881, 2146204674, 2146147205, 2146088474,
+2146028480, 2145967224, 2145904705, 2145840924, 2145775880, 2145709574, 2145642006, 2145573176,
+2145503083, 2145431729, 2145359112, 2145285233, 2145210092, 2145133690, 2145056025, 2144977098,
+2144896910, 2144815460, 2144732748, 2144648774, 2144563539, 2144477042, 2144389283, 2144300264,
+2144209982, 2144118439, 2144025635, 2143931570, 2143836244, 2143739656, 2143641807, 2143542697,
+2143442326, 2143340694, 2143237802, 2143133648, 2143028234, 2142921559, 2142813624, 2142704427,
+2142593971, 2142482254, 2142369276, 2142255039, 2142139541, 2142022783, 2141904764, 2141785486,
+2141664948, 2141543150, 2141420092, 2141295774, 2141170197, 2141043360, 2140915264, 2140785908,
+2140655293, 2140523418, 2140390284, 2140255892, 2140120240, 2139983329, 2139845159, 2139705730,
+2139565043, 2139423097, 2139279892, 2139135429, 2138989708, 2138842728, 2138694490, 2138544994,
+2138394240, 2138242228, 2138088958, 2137934430, 2137778644, 2137621601, 2137463301, 2137303743,
+2137142927, 2136980855, 2136817525, 2136652938, 2136487095, 2136319994, 2136151637, 2135982023,
+2135811153, 2135639026, 2135465642, 2135291003, 2135115107, 2134937956, 2134759548, 2134579885,
+2134398966, 2134216791, 2134033361, 2133848675, 2133662734, 2133475538, 2133287087, 2133097381,
+2132906420, 2132714204, 2132520734, 2132326009, 2132130030, 2131932796, 2131734309, 2131534567,
+2131333572, 2131131322, 2130927819, 2130723062, 2130517052, 2130309789, 2130101272, 2129891502,
+2129680480, 2129468204, 2129254676, 2129039895, 2128823862, 2128606576, 2128388038, 2128168248,
+2127947206, 2127724913, 2127501367, 2127276570, 2127050522, 2126823222, 2126594672, 2126364870,
+2126133817, 2125901514, 2125667960, 2125433155, 2125197100, 2124959795, 2124721240, 2124481435,
+2124240380, 2123998076, 2123754522, 2123509718, 2123263666, 2123016364, 2122767814, 2122518015,
+2122266967, 2122014670, 2121761126, 2121506333, 2121250292, 2120993003, 2120734467, 2120474683,
+2120213651, 2119951372, 2119687847, 2119423074, 2119157054, 2118889788, 2118621275, 2118351516,
+2118080511, 2117808259, 2117534762, 2117260020, 2116984031, 2116706797, 2116428319, 2116148595,
+2115867626, 2115585412, 2115301954, 2115017252, 2114731305, 2114444114, 2114155680, 2113866001,
+2113575080, 2113282914, 2112989506, 2112694855, 2112398960, 2112101824, 2111803444, 2111503822,
+2111202959, 2110900853, 2110597505, 2110292916, 2109987085, 2109680013, 2109371700, 2109062146,
+2108751352, 2108439317, 2108126041, 2107811526, 2107495770, 2107178775, 2106860540, 2106541065,
+2106220352, 2105898399, 2105575208, 2105250778, 2104925109, 2104598202, 2104270057, 2103940674,
+2103610054, 2103278196, 2102945101, 2102610768, 2102275199, 2101938393, 2101600350, 2101261071,
+2100920556, 2100578805, 2100235819, 2099891596, 2099546139, 2099199446, 2098851519, 2098502357,
+2098151960, 2097800329, 2097447464, 2097093365, 2096738032, 2096381466, 2096023667, 2095664635,
+2095304370, 2094942872, 2094580142, 2094216179, 2093850985, 2093484559, 2093116901, 2092748012,
+2092377892, 2092006541, 2091633960, 2091260147, 2090885105, 2090508833, 2090131331, 2089752599,
+2089372638, 2088991448, 2088609029, 2088225381, 2087840505, 2087454400, 2087067068, 2086678508,
+2086288720, 2085897705, 2085505463, 2085111994, 2084717298, 2084321376, 2083924228, 2083525854,
+2083126254, 2082725429, 2082323379, 2081920103, 2081515603, 2081109879, 2080702930, 2080294757,
+2079885360, 2079474740, 2079062896, 2078649830, 2078235540, 2077820028, 2077403294, 2076985338,
+2076566160, 2076145760, 2075724139, 2075301296, 2074877233, 2074451950, 2074025446, 2073597721,
+2073168777, 2072738614, 2072307231, 2071874629, 2071440808, 2071005769, 2070569511, 2070132035,
+2069693342, 2069253430, 2068812302, 2068369957, 2067926394, 2067481616, 2067035621, 2066588410,
+2066139983, 2065690341, 2065239484, 2064787411, 2064334124, 2063879623, 2063423908, 2062966978,
+2062508835, 2062049479, 2061588910, 2061127128, 2060664133, 2060199927, 2059734508, 2059267877,
+2058800036, 2058330983, 2057860719, 2057389244, 2056916560, 2056442665, 2055967560, 2055491246,
+2055013723, 2054534991, 2054055050, 2053573901, 2053091544, 2052607979, 2052123207, 2051637227,
+2051150040, 2050661647, 2050172048, 2049681242, 2049189231, 2048696014, 2048201592, 2047705965,
+2047209133, 2046711097, 2046211857, 2045711414, 2045209767, 2044706916, 2044202863, 2043697608,
+2043191150, 2042683490, 2042174628, 2041664565, 2041153301, 2040640837, 2040127172, 2039612306,
+2039096241, 2038578976, 2038060512, 2037540850, 2037019988, 2036497928, 2035974670, 2035450215,
+2034924562, 2034397712, 2033869665, 2033340422, 2032809982, 2032278347, 2031745516, 2031211490,
+2030676269, 2030139853, 2029602243, 2029063439, 2028523442, 2027982251, 2027439867, 2026896291,
+2026351522, 2025805561, 2025258408, 2024710064, 2024160529, 2023609803, 2023057887, 2022504780,
+2021950484, 2021394998, 2020838323, 2020280460, 2019721407, 2019161167, 2018599739, 2018037123,
+2017473321, 2016908331, 2016342155, 2015774793, 2015206245, 2014636511, 2014065592, 2013493489,
+2012920201, 2012345729, 2011770073, 2011193233, 2010615210, 2010036005, 2009455617, 2008874047,
+2008291295, 2007707362, 2007122248, 2006535953, 2005948478, 2005359822, 2004769987, 2004178973,
+2003586779, 2002993407, 2002398857, 2001803128, 2001206222, 2000608139, 2000008879, 1999408442,
+1998806829, 1998204040, 1997600076, 1996994937, 1996388622, 1995781134, 1995172471, 1994562635,
+1993951625, 1993339442, 1992726087, 1992111559, 1991495860, 1990878989, 1990260946, 1989641733,
+1989021350, 1988399796, 1987777073, 1987153180, 1986528118, 1985901888, 1985274489, 1984645923,
+1984016189, 1983385288, 1982753220, 1982119985, 1981485585, 1980850019, 1980213288, 1979575392,
+1978936331, 1978296106, 1977654717, 1977012165, 1976368450, 1975723572, 1975077532, 1974430331,
+1973781967, 1973132443, 1972481757, 1971829912, 1971176906, 1970522741, 1969867417, 1969210933,
+1968553292, 1967894492, 1967234535, 1966573420, 1965911148, 1965247720, 1964583136, 1963917396,
+1963250501, 1962582451, 1961913246, 1961242888, 1960571375, 1959898709, 1959224890, 1958549919,
+1957873796, 1957196520, 1956518093, 1955838516, 1955157788, 1954475909, 1953792881, 1953108703,
+1952423377, 1951736902, 1951049279, 1950360508, 1949670589, 1948979524, 1948287312, 1947593954,
+1946899451, 1946203802, 1945507008, 1944809070, 1944109987, 1943409761, 1942708392, 1942005880,
+1941302225, 1940597428, 1939891490, 1939184411, 1938476190, 1937766830, 1937056329, 1936344689,
+1935631910, 1934917992, 1934202936, 1933486742, 1932769411, 1932050943, 1931331338, 1930610597,
+1929888720, 1929165708, 1928441561, 1927716279, 1926989864, 1926262315, 1925533633, 1924803818,
+1924072871, 1923340791, 1922607581, 1921873239, 1921137767, 1920401165, 1919663432, 1918924571,
+1918184581, 1917443462, 1916701216, 1915957841, 1915213340, 1914467712, 1913720958, 1912973078,
+1912224073, 1911473942, 1910722688, 1909970309, 1909216806, 1908462181, 1907706433, 1906949562,
+1906191570, 1905432457, 1904672222, 1903910867, 1903148392, 1902384797, 1901620084, 1900854251,
+1900087301, 1899319232, 1898550047, 1897779744, 1897008325, 1896235790, 1895462140, 1894687374,
+1893911494, 1893134500, 1892356392, 1891577171, 1890796837, 1890015391, 1889232832, 1888449163,
+1887664383, 1886878492, 1886091491, 1885303381, 1884514161, 1883723833, 1882932397, 1882139853,
+1881346202, 1880551444, 1879755580, 1878958610, 1878160535, 1877361354, 1876561070, 1875759681,
+1874957189, 1874153594, 1873348897, 1872543097, 1871736196, 1870928194, 1870119091, 1869308888,
+1868497586, 1867685184, 1866871683, 1866057085, 1865241388, 1864424594, 1863606704, 1862787717,
+1861967634, 1861146456, 1860324183, 1859500816, 1858676355, 1857850800, 1857024153, 1856196413,
+1855367581, 1854537657, 1853706643, 1852874538, 1852041343, 1851207059, 1850371686, 1849535224,
+1848697674, 1847859036, 1847019312, 1846178501, 1845336604, 1844493621, 1843649553, 1842804401,
+1841958164, 1841110844, 1840262441, 1839412956, 1838562388, 1837710739, 1836858008, 1836004197,
+1835149306, 1834293336, 1833436286, 1832578158, 1831718951, 1830858668, 1829997307, 1829134869,
+1828271356, 1827406767, 1826541103, 1825674364, 1824806552, 1823937666, 1823067707, 1822196675,
+1821324572, 1820451397, 1819577151, 1818701835, 1817825449, 1816947994, 1816069469, 1815189877,
+1814309216, 1813427489, 1812544694, 1811660833, 1810775906, 1809889915, 1809002858, 1808114737,
+1807225553, 1806335305, 1805443995, 1804551623, 1803658189, 1802763694, 1801868139, 1800971523,
+1800073849, 1799175115, 1798275323, 1797374472, 1796472565, 1795569601, 1794665580, 1793760504,
+1792854372, 1791947186, 1791038946, 1790129652, 1789219305, 1788307905, 1787395453, 1786481950,
+1785567396, 1784651792, 1783735137, 1782817434, 1781898681, 1780978881, 1780058032, 1779136137,
+1778213194, 1777289206, 1776364172, 1775438094, 1774510970, 1773582803, 1772653593, 1771723340,
+1770792044, 1769859707, 1768926328, 1767991909, 1767056450, 1766119952, 1765182414, 1764243838,
+1763304224, 1762363573, 1761421885, 1760479161, 1759535401, 1758590607, 1757644777, 1756697914,
+1755750017, 1754801087, 1753851126, 1752900132, 1751948107, 1750995052, 1750040966, 1749085851,
+1748129707, 1747172535, 1746214334, 1745255107, 1744294853, 1743333573, 1742371267, 1741407936,
+1740443581, 1739478202, 1738511799, 1737544374, 1736575927, 1735606458, 1734635968, 1733664458,
+1732691928, 1731718378, 1730743810, 1729768224, 1728791620, 1727813999, 1726835361, 1725855708,
+1724875040, 1723893357, 1722910659, 1721926948, 1720942225, 1719956488, 1718969740, 1717981981,
+1716993211, 1716003431, 1715012642, 1714020844, 1713028037, 1712034223, 1711039401, 1710043573,
+1709046739, 1708048900, 1707050055, 1706050207, 1705049355, 1704047500, 1703044642, 1702040783,
+1701035922, 1700030061, 1699023199, 1698015339, 1697006479, 1695996621, 1694985765, 1693973912,
+1692961062, 1691947217, 1690932376, 1689916541, 1688899711, 1687881888, 1686863072, 1685843263,
+1684822463, 1683800672, 1682777890, 1681754118, 1680729357, 1679703608, 1678676870, 1677649144,
+1676620432, 1675590733, 1674560049, 1673528379, 1672495725, 1671462087, 1670427466, 1669391862,
+1668355276, 1667317709, 1666279161, 1665239632, 1664199124, 1663157637, 1662115172, 1661071729,
+1660027308, 1658981911, 1657935539, 1656888190, 1655839867, 1654790570, 1653740300, 1652689057,
+1651636841, 1650583654, 1649529496, 1648474367, 1647418269, 1646361202, 1645303166, 1644244162,
+1643184191, 1642123253, 1641061349, 1639998480, 1638934646, 1637869848, 1636804087, 1635737362,
+1634669676, 1633601027, 1632531418, 1631460848, 1630389319, 1629316830, 1628243383, 1627168978,
+1626093616, 1625017297, 1623940023, 1622861793, 1621782608, 1620702469, 1619621377, 1618539332,
+1617456335, 1616372386, 1615287487, 1614201637, 1613114838, 1612027089, 1610938393, 1609848749,
+1608758157, 1607666620, 1606574136, 1605480708, 1604386335, 1603291018, 1602194758, 1601097555,
+1599999411, 1598900325, 1597800299, 1596699333, 1595597428, 1594494583, 1593390801, 1592286082,
+1591180426, 1590073833, 1588966306, 1587857843, 1586748447, 1585638117, 1584526854, 1583414660,
+1582301533, 1581187476, 1580072489, 1578956572, 1577839726, 1576721952, 1575603251, 1574483623,
+1573363068, 1572241588, 1571119183, 1569995854, 1568871601, 1567746425, 1566620327, 1565493307,
+1564365367, 1563236506, 1562106725, 1560976026, 1559844408, 1558711873, 1557578421, 1556444052,
+1555308768, 1554172569, 1553035455, 1551897428, 1550758488, 1549618636, 1548477872, 1547336197,
+1546193612, 1545050118, 1543905714, 1542760402, 1541614183, 1540467057, 1539319024, 1538170087,
+1537020244, 1535869497, 1534717846, 1533565293, 1532411837, 1531257480, 1530102222, 1528946064,
+1527789007, 1526631051, 1525472197, 1524312445, 1523151797, 1521990252, 1520827813, 1519664478,
+1518500250, 1517335128, 1516169114, 1515002208, 1513834411, 1512665723, 1511496145, 1510325678,
+1509154322, 1507982079, 1506808949, 1505634932, 1504460029, 1503284242, 1502107570, 1500930014,
+1499751576, 1498572255, 1497392053, 1496210969, 1495029006, 1493846163, 1492662441, 1491477842,
+1490292364, 1489106011, 1487918781, 1486730675, 1485541696, 1484351842, 1483161115, 1481969516,
+1480777044, 1479583702, 1478389489, 1477194407, 1475998456, 1474801636, 1473603949, 1472405394,
+1471205974, 1470005688, 1468804538, 1467602523, 1466399645, 1465195904, 1463991302, 1462785838,
+1461579514, 1460372329, 1459164286, 1457955385, 1456745625, 1455535009, 1454323536, 1453111208,
+1451898025, 1450683988, 1449469098, 1448253355, 1447036760, 1445819314, 1444601017, 1443381870,
+1442161874, 1440941030, 1439719338, 1438496799, 1437273414, 1436049184, 1434824109, 1433598189,
+1432371426, 1431143821, 1429915374, 1428686085, 1427455956, 1426224988, 1424993180, 1423760534,
+1422527051, 1421292730, 1420057574, 1418821582, 1417584755, 1416347095, 1415108601, 1413869275,
+1412629117, 1411388129, 1410146309, 1408903661, 1407660183, 1406415878, 1405170745, 1403924785,
+1402678000, 1401430389, 1400181954, 1398932695, 1397682613, 1396431709, 1395179984, 1393927438,
+1392674072, 1391419886, 1390164882, 1388909060, 1387652422, 1386394966, 1385136696, 1383877610,
+1382617710, 1381356997, 1380095472, 1378833134, 1377569986, 1376306026, 1375041258, 1373775680,
+1372509294, 1371242101, 1369974101, 1368705296, 1367435685, 1366165269, 1364894050, 1363622028,
+1362349204, 1361075579, 1359801152, 1358525926, 1357249901, 1355973077, 1354695455, 1353417037,
+1352137822, 1350857812, 1349577007, 1348295409, 1347013017, 1345729833, 1344445857, 1343161090,
+1341875533, 1340589187, 1339302052, 1338014129, 1336725419, 1335435923, 1334145641, 1332854574,
+1331562723, 1330270089, 1328976672, 1327682474, 1326387494, 1325091734, 1323795195, 1322497877,
+1321199781, 1319900907, 1318601257, 1317300832, 1315999631, 1314697657, 1313394909, 1312091388,
+1310787095, 1309482032, 1308176198, 1306869594, 1305562222, 1304254082, 1302945174, 1301635500,
+1300325060, 1299013855, 1297701886, 1296389154, 1295075659, 1293761402, 1292446384, 1291130606,
+1289814068, 1288496772, 1287178717, 1285859905, 1284540337, 1283220013, 1281898935, 1280577102,
+1279254516, 1277931177, 1276607086, 1275282245, 1273956653, 1272630312, 1271303222, 1269975384,
+1268646800, 1267317469, 1265987392, 1264656571, 1263325005, 1261992697, 1260659646, 1259325853,
+1257991320, 1256656047, 1255320034, 1253983283, 1252645794, 1251307568, 1249968606, 1248628909,
+1247288478, 1245947312, 1244605414, 1243262783, 1241919421, 1240575329, 1239230506, 1237884955,
+1236538675, 1235191668, 1233843935, 1232495475, 1231146291, 1229796382, 1228445750, 1227094395,
+1225742318, 1224389521, 1223036002, 1221681765, 1220326809, 1218971135, 1217614743, 1216257636,
+1214899813, 1213541275, 1212182024, 1210822059, 1209461382, 1208099993, 1206737894, 1205375085,
+1204011567, 1202647340, 1201282407, 1199916766, 1198550419, 1197183368, 1195815612, 1194447153,
+1193077991, 1191708127, 1190337562, 1188966297, 1187594332, 1186221669, 1184848308, 1183474250,
+1182099496, 1180724046, 1179347902, 1177971064, 1176593533, 1175215310, 1173836395, 1172456790,
+1171076495, 1169695512, 1168313840, 1166931481, 1165548435, 1164164704, 1162780288, 1161395188,
+1160009405, 1158622939, 1157235792, 1155847964, 1154459456, 1153070269, 1151680403, 1150289860,
+1148898640, 1147506745, 1146114174, 1144720929, 1143327011, 1141932420, 1140537158, 1139141224,
+1137744621, 1136347348, 1134949406, 1133550797, 1132151521, 1130751579, 1129350972, 1127949701,
+1126547765, 1125145168, 1123741908, 1122337987, 1120933406, 1119528166, 1118122267, 1116715710,
+1115308496, 1113900627, 1112492101, 1111082922, 1109673089, 1108262603, 1106851465, 1105439676,
+1104027237, 1102614148, 1101200410, 1099786025, 1098370993, 1096955314, 1095538991, 1094122023,
+1092704411, 1091286156, 1089867259, 1088447722, 1087027544, 1085606726, 1084185270, 1082763176,
+1081340445, 1079917078, 1078493076, 1077068439, 1075643169, 1074217266, 1072790730, 1071363564,
+1069935768, 1068507342, 1067078288, 1065648605, 1064218296, 1062787361, 1061355801, 1059923616,
+1058490808, 1057057377, 1055623324, 1054188651, 1052753357, 1051317443, 1049880912, 1048443763,
+1047005996, 1045567615, 1044128617, 1042689006, 1041248781, 1039807944, 1038366495, 1036924436,
+1035481766, 1034038487, 1032594600, 1031150105, 1029705004, 1028259297, 1026812985, 1025366069,
+1023918550, 1022470428, 1021021705, 1019572382, 1018122458, 1016671936, 1015220816, 1013769098,
+1012316784, 1010863875, 1009410370, 1007956272, 1006501581, 1005046298, 1003590424, 1002133959,
+1000676905, 999219262, 997761031, 996302214, 994842810, 993382821, 991922248, 990461091,
+988999351, 987537030, 986074127, 984610645, 983146583, 981681943, 980216726, 978750932,
+977284562, 975817617, 974350098, 972882006, 971413342, 969944106, 968474300, 967003923,
+965532978, 964061465, 962589385, 961116739, 959643527, 958169751, 956695411, 955220508,
+953745043, 952269017, 950792431, 949315286, 947837582, 946359321, 944880503, 943401129,
+941921200, 940440717, 938959681, 937478092, 935995952, 934513261, 933030021, 931546231,
+930061894, 928577010, 927091579, 925605603, 924119082, 922632018, 921144411, 919656262,
+918167572, 916678342, 915188572, 913698265, 912207419, 910716038, 909224120, 907731667,
+906238681, 904745161, 903251110, 901756526, 900261413, 898765769, 897269597, 895772898,
+894275671, 892777918, 891279640, 889780838, 888281512, 886781663, 885281293, 883780402,
+882278992, 880777062, 879274614, 877771649, 876268167, 874764170, 873259659, 871754633,
+870249095, 868743045, 867236484, 865729413, 864221832, 862713743, 861205147, 859696043,
+858186435, 856676321, 855165703, 853654582, 852142959, 850630835, 849118210, 847605086,
+846091463, 844577343, 843062726, 841547612, 840032004, 838515901, 836999305, 835482217,
+833964638, 832446567, 830928007, 829408958, 827889422, 826369398, 824848888, 823327893,
+821806413, 820284450, 818762005, 817239078, 815715670, 814191782, 812667415, 811142571,
+809617249, 808091450, 806565177, 805038429, 803511207, 801983513, 800455346, 798926709,
+797397602, 795868026, 794337982, 792807470, 791276492, 789745049, 788213141, 786680769,
+785147934, 783614638, 782080880, 780546663, 779011986, 777476851, 775941259, 774405210,
+772868706, 771331747, 769794334, 768256469, 766718151, 765179382, 763640164, 762100496,
+760560380, 759019816, 757478806, 755937350, 754395449, 752853105, 751310318, 749767089,
+748223418, 746679308, 745134758, 743589770, 742044345, 740498483, 738952186, 737405453,
+735858287, 734310688, 732762657, 731214195, 729665303, 728115982, 726566232, 725016055,
+723465451, 721914422, 720362968, 718811090, 717258790, 715706067, 714152924, 712599360,
+711045377, 709490976, 707936158, 706380923, 704825272, 703269207, 701712728, 700155836,
+698598533, 697040818, 695482694, 693924160, 692365218, 690805869, 689246113, 687685952,
+686125387, 684564417, 683003045, 681441272, 679879097, 678316522, 676753549, 675190177,
+673626408, 672062243, 670497682, 668932727, 667367379, 665801638, 664235505, 662668981,
+661102068, 659534766, 657967075, 656398998, 654830535, 653261686, 651692453, 650122837,
+648552838, 646982457, 645411696, 643840556, 642269036, 640697139, 639124865, 637552215,
+635979190, 634405791, 632832018, 631257873, 629683357, 628108471, 626533215, 624957590,
+623381598, 621805239, 620228514, 618651424, 617073971, 615496154, 613917975, 612339436,
+610760536, 609181276, 607601658, 606021683, 604441352, 602860664, 601279623, 599698227,
+598116479, 596534378, 594951927, 593369126, 591785976, 590202477, 588618632, 587034440,
+585449903, 583865021, 582279796, 580694229, 579108320, 577522070, 575935480, 574348552,
+572761285, 571173682, 569585743, 567997469, 566408860, 564819919, 563230645, 561641039,
+560051104, 558460839, 556870245, 555279324, 553688076, 552096502, 550504604, 548912382,
+547319836, 545726969, 544133781, 542540273, 540946445, 539352300, 537757837, 536163058,
+534567963, 532972554, 531376831, 529780796, 528184449, 526587791, 524990824, 523393547,
+521795963, 520198072, 518599875, 517001373, 515402566, 513803457, 512204045, 510604332,
+509004318, 507404005, 505803394, 504202485, 502601279, 500999778, 499397982, 497795892,
+496193509, 494590835, 492987869, 491384614, 489781069, 488177236, 486573117, 484968710,
+483364019, 481759043, 480153784, 478548243, 476942419, 475336316, 473729932, 472123270,
+470516330, 468909114, 467301622, 465693854, 464085813, 462477499, 460868912, 459260055,
+457650927, 456041530, 454431865, 452821933, 451211734, 449601270, 447990541, 446379549,
+444768294, 443156777, 441545000, 439932963, 438320667, 436708113, 435095303, 433482236,
+431868915, 430255339, 428641511, 427027430, 425413098, 423798515, 422183684, 420568604,
+418953276, 417337703, 415721883, 414105819, 412489512, 410872962, 409256170, 407639137,
+406021865, 404404353, 402786604, 401168618, 399550396, 397931939, 396313247, 394694323,
+393075166, 391455778, 389836160, 388216313, 386596237, 384975934, 383355404, 381734649,
+380113669, 378492466, 376871039, 375249392, 373627523, 372005435, 370383128, 368760603,
+367137861, 365514903, 363891730, 362268343, 360644742, 359020930, 357396906, 355772673,
+354148230, 352523578, 350898719, 349273654, 347648383, 346022908, 344397230, 342771348,
+341145265, 339518981, 337892498, 336265816, 334638936, 333011859, 331384586, 329757119,
+328129457, 326501602, 324873555, 323245317, 321616889, 319988272, 318359466, 316730474,
+315101295, 313471930, 311842381, 310212649, 308582734, 306952638, 305322361, 303691904,
+302061269, 300430456, 298799466, 297168301, 295536961, 293905447, 292273760, 290641901,
+289009871, 287377671, 285745302, 284112765, 282480061, 280847190, 279214155, 277580955,
+275947592, 274314066, 272680379, 271046532, 269412525, 267778360, 266144038, 264509558,
+262874923, 261240134, 259605191, 257970095, 256334847, 254699448, 253063900, 251428203,
+249792358, 248156366, 246520228, 244883945, 243247518, 241610947, 239974235, 238337382,
+236700388, 235063255, 233425984, 231788575, 230151030, 228513350, 226875535, 225237587,
+223599506, 221961294, 220322951, 218684479, 217045878, 215407149, 213768293, 212129312,
+210490206, 208850976, 207211624, 205572149, 203932553, 202292838, 200653003, 199013051,
+197372981, 195732795, 194092495, 192452080, 190811551, 189170911, 187530159, 185889297,
+184248325, 182607245, 180966058, 179324764, 177683365, 176041861, 174400254, 172758544,
+171116733, 169474820, 167832808, 166190698, 164548489, 162906184, 161263783, 159621287,
+157978697, 156336015, 154693240, 153050374, 151407418, 149764374, 148121241, 146478021,
+144834714, 143191323, 141547847, 139904288, 138260647, 136616925, 134973122, 133329239,
+131685278, 130041240, 128397125, 126752935, 125108670, 123464332, 121819921, 120175438,
+118530885, 116886262, 115241570, 113596810, 111951983, 110307091, 108662134, 107017112,
+105372028, 103726882, 102081675, 100436408,  98791081,  97145697,  95500255,  93854758,
+ 92209205,  90563597,  88917937,  87272224,  85626460,  83980645,  82334782,  80688869,
+ 79042909,  77396903,  75750851,  74104755,  72458615,  70812432,  69166208,  67519943,
+ 65873638,  64227295,  62580914,  60934496,  59288042,  57641553,  55995030,  54348475,
+ 52701887,  51055268,  49408620,  47761942,  46115236,  44468503,  42821744,  41174960,
+ 39528151,  37881320,  36234466,  34587590,  32940695,  31293780,  29646846,  27999895,
+ 26352928,  24705945,  23058947,  21411936,  19764913,  18117878,  16470832,  14823776,
+ 13176712,  11529640,   9882561,   8235476,   6588387,   4941294,   3294197,   1647099
+};
+
+uint16_t ff_fft_offsets_lut[21845];
+
+static void fft_lut_init(uint16_t *table, int off, int size, int *index)
+{
+    if (size < 16) {
+        table[*index] = off >> 2;
+        (*index)++;
+    }
+    else {
+        fft_lut_init(table, off,                   size >> 1, index);
+        fft_lut_init(table, off +     (size >> 1), size >> 2, index);
+        fft_lut_init(table, off + 3 * (size >> 2), size >> 2, index);
+    }
+}
+
+static void fft_lut_init_start(void)
+{
+    int n = 0;
+
+    fft_lut_init(ff_fft_offsets_lut, 0, 1 << 17, &n);
+}
+
+void ff_fft_lut_init(void)
+{
+    static AVOnce init_once = AV_ONCE_INIT;
+
+    ff_thread_once(&init_once, fft_lut_init_start);
+}
diff --git a/media/ffvpx/libavcodec/fft_table.h b/media/ffvpx/libavcodec/fft_table.h
new file mode 100644
index 0000000000..09df49f2b8
--- /dev/null
+++ b/media/ffvpx/libavcodec/fft_table.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Stanislav Ocovaj (socovaj@mips.com)
+ *           Goran Cordasic   (goran@mips.com)
+ *           Djordje Pesut    (djordje@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * definitions and tables for FFT
+ */
+#ifndef AVCODEC_FFT_TABLE_H
+#define AVCODEC_FFT_TABLE_H
+
+#include "libavcodec/fft.h"
+
+#define MAX_LOG2_NFFT 17 //!< Specifies maximum allowed fft size
+#define MAX_FFT_SIZE (1 << MAX_LOG2_NFFT)
+
+extern const int32_t ff_w_tab_sr[];
+extern uint16_t ff_fft_offsets_lut[];
+void ff_fft_lut_init(void);
+
+#endif /* AVCODEC_FFT_TABLE_H */
diff --git a/media/ffvpx/libavcodec/fft_template.c b/media/ffvpx/libavcodec/fft_template.c
new file mode 100644
index 0000000000..f2742a3ae8
--- /dev/null
+++ b/media/ffvpx/libavcodec/fft_template.c
@@ -0,0 +1,628 @@
+/*
+ * FFT/IFFT transforms
+ * Copyright (c) 2008 Loren Merritt
+ * Copyright (c) 2002 Fabrice Bellard
+ * Partly based on libdjbfft by D. J. Bernstein
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * FFT/IFFT transforms.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "libavutil/mathematics.h"
+#include "libavutil/thread.h"
+#include "fft.h"
+#include "fft-internal.h"
+
+#if !FFT_FLOAT
+#include "fft_table.h"
+#else /* !FFT_FLOAT */
+
+/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
+#if !CONFIG_HARDCODED_TABLES
+COSTABLE(16);
+COSTABLE(32);
+COSTABLE(64);
+COSTABLE(128);
+COSTABLE(256);
+COSTABLE(512);
+COSTABLE(1024);
+COSTABLE(2048);
+COSTABLE(4096);
+COSTABLE(8192);
+COSTABLE(16384);
+COSTABLE(32768);
+COSTABLE(65536);
+COSTABLE(131072);
+
+static av_cold void init_ff_cos_tabs(int index)
+{
+    int i;
+    int m = 1<<index;
+    double freq = 2*M_PI/m;
+    FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
+    for(i=0; i<=m/4; i++)
+        tab[i] = FIX15(cos(i*freq));
+    for(i=1; i<m/4; i++)
+        tab[m/2-i] = tab[i];
+}
+
+typedef struct CosTabsInitOnce {
+    void (*func)(void);
+    AVOnce control;
+} CosTabsInitOnce;
+
+#define INIT_FF_COS_TABS_FUNC(index, size)          \
+static av_cold void init_ff_cos_tabs_ ## size (void)\
+{                                                   \
+    init_ff_cos_tabs(index);                        \
+}
+
+INIT_FF_COS_TABS_FUNC(4, 16)
+INIT_FF_COS_TABS_FUNC(5, 32)
+INIT_FF_COS_TABS_FUNC(6, 64)
+INIT_FF_COS_TABS_FUNC(7, 128)
+INIT_FF_COS_TABS_FUNC(8, 256)
+INIT_FF_COS_TABS_FUNC(9, 512)
+INIT_FF_COS_TABS_FUNC(10, 1024)
+INIT_FF_COS_TABS_FUNC(11, 2048)
+INIT_FF_COS_TABS_FUNC(12, 4096)
+INIT_FF_COS_TABS_FUNC(13, 8192)
+INIT_FF_COS_TABS_FUNC(14, 16384)
+INIT_FF_COS_TABS_FUNC(15, 32768)
+INIT_FF_COS_TABS_FUNC(16, 65536)
+INIT_FF_COS_TABS_FUNC(17, 131072)
+
+static CosTabsInitOnce cos_tabs_init_once[] = {
+    { NULL },
+    { NULL },
+    { NULL },
+    { NULL },
+    { init_ff_cos_tabs_16, AV_ONCE_INIT },
+    { init_ff_cos_tabs_32, AV_ONCE_INIT },
+    { init_ff_cos_tabs_64, AV_ONCE_INIT },
+    { init_ff_cos_tabs_128, AV_ONCE_INIT },
+    { init_ff_cos_tabs_256, AV_ONCE_INIT },
+    { init_ff_cos_tabs_512, AV_ONCE_INIT },
+    { init_ff_cos_tabs_1024, AV_ONCE_INIT },
+    { init_ff_cos_tabs_2048, AV_ONCE_INIT },
+    { init_ff_cos_tabs_4096, AV_ONCE_INIT },
+    { init_ff_cos_tabs_8192, AV_ONCE_INIT },
+    { init_ff_cos_tabs_16384, AV_ONCE_INIT },
+    { init_ff_cos_tabs_32768, AV_ONCE_INIT },
+    { init_ff_cos_tabs_65536, AV_ONCE_INIT },
+    { init_ff_cos_tabs_131072, AV_ONCE_INIT },
+};
+
+av_cold void ff_init_ff_cos_tabs(int index)
+{
+    ff_thread_once(&cos_tabs_init_once[index].control, cos_tabs_init_once[index].func);
+}
+#endif
+COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
+    NULL, NULL, NULL, NULL,
+    FFT_NAME(ff_cos_16),
+    FFT_NAME(ff_cos_32),
+    FFT_NAME(ff_cos_64),
+    FFT_NAME(ff_cos_128),
+    FFT_NAME(ff_cos_256),
+    FFT_NAME(ff_cos_512),
+    FFT_NAME(ff_cos_1024),
+    FFT_NAME(ff_cos_2048),
+    FFT_NAME(ff_cos_4096),
+    FFT_NAME(ff_cos_8192),
+    FFT_NAME(ff_cos_16384),
+    FFT_NAME(ff_cos_32768),
+    FFT_NAME(ff_cos_65536),
+    FFT_NAME(ff_cos_131072),
+};
+
+#endif /* FFT_FLOAT */
+
+static void fft_permute_c(FFTContext *s, FFTComplex *z);
+static void fft_calc_c(FFTContext *s, FFTComplex *z);
+
+static int split_radix_permutation(int i, int n, int inverse)
+{
+    int m;
+    if(n <= 2) return i&1;
+    m = n >> 1;
+    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
+    m >>= 1;
+    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
+    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
+}
+
+
+static const int avx_tab[] = {
+    0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
+};
+
+static int is_second_half_of_fft32(int i, int n)
+{
+    if (n <= 32)
+        return i >= 16;
+    else if (i < n/2)
+        return is_second_half_of_fft32(i, n/2);
+    else if (i < 3*n/4)
+        return is_second_half_of_fft32(i - n/2, n/4);
+    else
+        return is_second_half_of_fft32(i - 3*n/4, n/4);
+}
+
+static av_cold void fft_perm_avx(FFTContext *s)
+{
+    int i;
+    int n = 1 << s->nbits;
+
+    for (i = 0; i < n; i += 16) {
+        int k;
+        if (is_second_half_of_fft32(i, n)) {
+            for (k = 0; k < 16; k++)
+                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] =
+                    i + avx_tab[k];
+
+        } else {
+            for (k = 0; k < 16; k++) {
+                int j = i + k;
+                j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4);
+                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j;
+            }
+        }
+    }
+}
+
+av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
+{
+    int i, j, n;
+
+    s->revtab = NULL;
+    s->revtab32 = NULL;
+
+    if (nbits < 2 || nbits > 17)
+        goto fail;
+    s->nbits = nbits;
+    n = 1 << nbits;
+
+    if (nbits <= 16) {
+        s->revtab = av_malloc(n * sizeof(uint16_t));
+        if (!s->revtab)
+            goto fail;
+    } else {
+        s->revtab32 = av_malloc(n * sizeof(uint32_t));
+        if (!s->revtab32)
+            goto fail;
+    }
+    s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
+    if (!s->tmp_buf)
+        goto fail;
+    s->inverse = inverse;
+    s->fft_permutation = FF_FFT_PERM_DEFAULT;
+
+    s->fft_permute = fft_permute_c;
+    s->fft_calc    = fft_calc_c;
+#if CONFIG_MDCT
+    s->imdct_calc  = ff_imdct_calc_c;
+    s->imdct_half  = ff_imdct_half_c;
+    s->mdct_calc   = ff_mdct_calc_c;
+#endif
+
+#if FFT_FLOAT
+#if ARCH_AARCH64
+    ff_fft_init_aarch64(s);
+#elif ARCH_ARM
+    ff_fft_init_arm(s);
+#elif ARCH_PPC
+    ff_fft_init_ppc(s);
+#elif ARCH_X86
+    ff_fft_init_x86(s);
+#endif
+#if HAVE_MIPSFPU
+    ff_fft_init_mips(s);
+#endif
+    for(j=4; j<=nbits; j++) {
+        ff_init_ff_cos_tabs(j);
+    }
+#else /* FFT_FLOAT */
+    ff_fft_lut_init();
+#endif
+
+
+    if (ARCH_X86 && FFT_FLOAT && s->fft_permutation == FF_FFT_PERM_AVX) {
+        fft_perm_avx(s);
+    } else {
+#define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\
+    for(i = 0; i < n; i++) {\
+        int k;\
+        j = i;\
+        j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\
+        k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
+        s->revtab##num[k] = j;\
+    } \
+} while(0);
+
+#define PROCESS_FFT_PERM_DEFAULT(num) do {\
+    for(i = 0; i < n; i++) {\
+        int k;\
+        j = i;\
+        k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
+        s->revtab##num[k] = j;\
+    } \
+} while(0);
+
+#define SPLIT_RADIX_PERMUTATION(num) do { \
+    if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\
+        PROCESS_FFT_PERM_SWAP_LSBS(num) \
+    } else {\
+        PROCESS_FFT_PERM_DEFAULT(num) \
+    }\
+} while(0);
+
+    if (s->revtab)
+        SPLIT_RADIX_PERMUTATION()
+    if (s->revtab32)
+        SPLIT_RADIX_PERMUTATION(32)
+
+#undef PROCESS_FFT_PERM_DEFAULT
+#undef PROCESS_FFT_PERM_SWAP_LSBS
+#undef SPLIT_RADIX_PERMUTATION
+    }
+
+    return 0;
+ fail:
+    av_freep(&s->revtab);
+    av_freep(&s->revtab32);
+    av_freep(&s->tmp_buf);
+    return -1;
+}
+
+static void fft_permute_c(FFTContext *s, FFTComplex *z)
+{
+    int j, np;
+    const uint16_t *revtab = s->revtab;
+    const uint32_t *revtab32 = s->revtab32;
+    np = 1 << s->nbits;
+    /* TODO: handle split-radix permute in a more optimal way, probably in-place */
+    if (revtab) {
+        for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
+    } else
+        for(j=0;j<np;j++) s->tmp_buf[revtab32[j]] = z[j];
+
+    memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
+}
+
+av_cold void ff_fft_end(FFTContext *s)
+{
+    av_freep(&s->revtab);
+    av_freep(&s->revtab32);
+    av_freep(&s->tmp_buf);
+}
+
+#if !FFT_FLOAT
+
+static void fft_calc_c(FFTContext *s, FFTComplex *z) {
+
+    int nbits, i, n, num_transforms, offset, step;
+    int n4, n2, n34;
+    unsigned tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+    FFTComplex *tmpz;
+    const int fft_size = (1 << s->nbits);
+    int64_t accu;
+
+    num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
+
+    for (n=0; n<num_transforms; n++){
+        offset = ff_fft_offsets_lut[n] << 2;
+        tmpz = z + offset;
+
+        tmp1 = tmpz[0].re + (unsigned)tmpz[1].re;
+        tmp5 = tmpz[2].re + (unsigned)tmpz[3].re;
+        tmp2 = tmpz[0].im + (unsigned)tmpz[1].im;
+        tmp6 = tmpz[2].im + (unsigned)tmpz[3].im;
+        tmp3 = tmpz[0].re - (unsigned)tmpz[1].re;
+        tmp8 = tmpz[2].im - (unsigned)tmpz[3].im;
+        tmp4 = tmpz[0].im - (unsigned)tmpz[1].im;
+        tmp7 = tmpz[2].re - (unsigned)tmpz[3].re;
+
+        tmpz[0].re = tmp1 + tmp5;
+        tmpz[2].re = tmp1 - tmp5;
+        tmpz[0].im = tmp2 + tmp6;
+        tmpz[2].im = tmp2 - tmp6;
+        tmpz[1].re = tmp3 + tmp8;
+        tmpz[3].re = tmp3 - tmp8;
+        tmpz[1].im = tmp4 - tmp7;
+        tmpz[3].im = tmp4 + tmp7;
+    }
+
+    if (fft_size < 8)
+        return;
+
+    num_transforms = (num_transforms >> 1) | 1;
+
+    for (n=0; n<num_transforms; n++){
+        offset = ff_fft_offsets_lut[n] << 3;
+        tmpz = z + offset;
+
+        tmp1 = tmpz[4].re + (unsigned)tmpz[5].re;
+        tmp3 = tmpz[6].re + (unsigned)tmpz[7].re;
+        tmp2 = tmpz[4].im + (unsigned)tmpz[5].im;
+        tmp4 = tmpz[6].im + (unsigned)tmpz[7].im;
+        tmp5 = tmp1 + tmp3;
+        tmp7 = tmp1 - tmp3;
+        tmp6 = tmp2 + tmp4;
+        tmp8 = tmp2 - tmp4;
+
+        tmp1 = tmpz[4].re - (unsigned)tmpz[5].re;
+        tmp2 = tmpz[4].im - (unsigned)tmpz[5].im;
+        tmp3 = tmpz[6].re - (unsigned)tmpz[7].re;
+        tmp4 = tmpz[6].im - (unsigned)tmpz[7].im;
+
+        tmpz[4].re = tmpz[0].re - tmp5;
+        tmpz[0].re = tmpz[0].re + tmp5;
+        tmpz[4].im = tmpz[0].im - tmp6;
+        tmpz[0].im = tmpz[0].im + tmp6;
+        tmpz[6].re = tmpz[2].re - tmp8;
+        tmpz[2].re = tmpz[2].re + tmp8;
+        tmpz[6].im = tmpz[2].im + tmp7;
+        tmpz[2].im = tmpz[2].im - tmp7;
+
+        accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp1 + tmp2);
+        tmp5 = (int32_t)((accu + 0x40000000) >> 31);
+        accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp3 - tmp4);
+        tmp7 = (int32_t)((accu + 0x40000000) >> 31);
+        accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp2 - tmp1);
+        tmp6 = (int32_t)((accu + 0x40000000) >> 31);
+        accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp3 + tmp4);
+        tmp8 = (int32_t)((accu + 0x40000000) >> 31);
+        tmp1 = tmp5 + tmp7;
+        tmp3 = tmp5 - tmp7;
+        tmp2 = tmp6 + tmp8;
+        tmp4 = tmp6 - tmp8;
+
+        tmpz[5].re = tmpz[1].re - tmp1;
+        tmpz[1].re = tmpz[1].re + tmp1;
+        tmpz[5].im = tmpz[1].im - tmp2;
+        tmpz[1].im = tmpz[1].im + tmp2;
+        tmpz[7].re = tmpz[3].re - tmp4;
+        tmpz[3].re = tmpz[3].re + tmp4;
+        tmpz[7].im = tmpz[3].im + tmp3;
+        tmpz[3].im = tmpz[3].im - tmp3;
+    }
+
+    step = 1 << ((MAX_LOG2_NFFT-4) - 4);
+    n4 = 4;
+
+    for (nbits=4; nbits<=s->nbits; nbits++){
+        n2  = 2*n4;
+        n34 = 3*n4;
+        num_transforms = (num_transforms >> 1) | 1;
+
+        for (n=0; n<num_transforms; n++){
+            const FFTSample *w_re_ptr = ff_w_tab_sr + step;
+            const FFTSample *w_im_ptr = ff_w_tab_sr + MAX_FFT_SIZE/(4*16) - step;
+            offset = ff_fft_offsets_lut[n] << nbits;
+            tmpz = z + offset;
+
+            tmp5 = tmpz[ n2].re + (unsigned)tmpz[n34].re;
+            tmp1 = tmpz[ n2].re - (unsigned)tmpz[n34].re;
+            tmp6 = tmpz[ n2].im + (unsigned)tmpz[n34].im;
+            tmp2 = tmpz[ n2].im - (unsigned)tmpz[n34].im;
+
+            tmpz[ n2].re = tmpz[ 0].re - tmp5;
+            tmpz[  0].re = tmpz[ 0].re + tmp5;
+            tmpz[ n2].im = tmpz[ 0].im - tmp6;
+            tmpz[  0].im = tmpz[ 0].im + tmp6;
+            tmpz[n34].re = tmpz[n4].re - tmp2;
+            tmpz[ n4].re = tmpz[n4].re + tmp2;
+            tmpz[n34].im = tmpz[n4].im + tmp1;
+            tmpz[ n4].im = tmpz[n4].im - tmp1;
+
+            for (i=1; i<n4; i++){
+                FFTSample w_re = w_re_ptr[0];
+                FFTSample w_im = w_im_ptr[0];
+                accu  = (int64_t)w_re*tmpz[ n2+i].re;
+                accu += (int64_t)w_im*tmpz[ n2+i].im;
+                tmp1 = (int32_t)((accu + 0x40000000) >> 31);
+                accu  = (int64_t)w_re*tmpz[ n2+i].im;
+                accu -= (int64_t)w_im*tmpz[ n2+i].re;
+                tmp2 = (int32_t)((accu + 0x40000000) >> 31);
+                accu  = (int64_t)w_re*tmpz[n34+i].re;
+                accu -= (int64_t)w_im*tmpz[n34+i].im;
+                tmp3 = (int32_t)((accu + 0x40000000) >> 31);
+                accu  = (int64_t)w_re*tmpz[n34+i].im;
+                accu += (int64_t)w_im*tmpz[n34+i].re;
+                tmp4 = (int32_t)((accu + 0x40000000) >> 31);
+
+                tmp5 = tmp1 + tmp3;
+                tmp1 = tmp1 - tmp3;
+                tmp6 = tmp2 + tmp4;
+                tmp2 = tmp2 - tmp4;
+
+                tmpz[ n2+i].re = tmpz[   i].re - tmp5;
+                tmpz[    i].re = tmpz[   i].re + tmp5;
+                tmpz[ n2+i].im = tmpz[   i].im - tmp6;
+                tmpz[    i].im = tmpz[   i].im + tmp6;
+                tmpz[n34+i].re = tmpz[n4+i].re - tmp2;
+                tmpz[ n4+i].re = tmpz[n4+i].re + tmp2;
+                tmpz[n34+i].im = tmpz[n4+i].im + tmp1;
+                tmpz[ n4+i].im = tmpz[n4+i].im - tmp1;
+
+                w_re_ptr += step;
+                w_im_ptr -= step;
+            }
+        }
+        step >>= 1;
+        n4   <<= 1;
+    }
+}
+
+#else /* !FFT_FLOAT */
+
+#define BUTTERFLIES(a0,a1,a2,a3) {\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, a0.re, t5);\
+    BF(a3.im, a1.im, a1.im, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, a1.re, t4);\
+    BF(a2.im, a0.im, a0.im, t6);\
+}
+
+// force loading all the inputs before storing any.
+// this is slightly slower for small data, but avoids store->load aliasing
+// for addresses separated by large powers of 2.
+#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
+    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, r0, t5);\
+    BF(a3.im, a1.im, i1, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, r1, t4);\
+    BF(a2.im, a0.im, i0, t6);\
+}
+
+#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
+    CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
+    CMUL(t5, t6, a3.re, a3.im, wre,  wim);\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+
+#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
+    t1 = a2.re;\
+    t2 = a2.im;\
+    t5 = a3.re;\
+    t6 = a3.im;\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+
+/* z[0...8n-1], w[1...2n-1] */
+#define PASS(name)\
+static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
+{\
+    FFTDouble t1, t2, t3, t4, t5, t6;\
+    int o1 = 2*n;\
+    int o2 = 4*n;\
+    int o3 = 6*n;\
+    const FFTSample *wim = wre+o1;\
+    n--;\
+\
+    TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
+    TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    do {\
+        z += 2;\
+        wre += 2;\
+        wim -= 2;\
+        TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
+        TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    } while(--n);\
+}
+
+PASS(pass)
+#if !CONFIG_SMALL
+#undef BUTTERFLIES
+#define BUTTERFLIES BUTTERFLIES_BIG
+PASS(pass_big)
+#endif
+
+#define DECL_FFT(n,n2,n4)\
+static void fft##n(FFTComplex *z)\
+{\
+    fft##n2(z);\
+    fft##n4(z+n4*2);\
+    fft##n4(z+n4*3);\
+    pass(z,FFT_NAME(ff_cos_##n),n4/2);\
+}
+
+static void fft4(FFTComplex *z)
+{
+    FFTDouble t1, t2, t3, t4, t5, t6, t7, t8;
+
+    BF(t3, t1, z[0].re, z[1].re);
+    BF(t8, t6, z[3].re, z[2].re);
+    BF(z[2].re, z[0].re, t1, t6);
+    BF(t4, t2, z[0].im, z[1].im);
+    BF(t7, t5, z[2].im, z[3].im);
+    BF(z[3].im, z[1].im, t4, t8);
+    BF(z[3].re, z[1].re, t3, t7);
+    BF(z[2].im, z[0].im, t2, t5);
+}
+
+static void fft8(FFTComplex *z)
+{
+    FFTDouble t1, t2, t3, t4, t5, t6;
+
+    fft4(z);
+
+    BF(t1, z[5].re, z[4].re, -z[5].re);
+    BF(t2, z[5].im, z[4].im, -z[5].im);
+    BF(t5, z[7].re, z[6].re, -z[7].re);
+    BF(t6, z[7].im, z[6].im, -z[7].im);
+
+    BUTTERFLIES(z[0],z[2],z[4],z[6]);
+    TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
+}
+
+#if !CONFIG_SMALL
+static void fft16(FFTComplex *z)
+{
+    FFTDouble t1, t2, t3, t4, t5, t6;
+    FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1];
+    FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3];
+
+    fft8(z);
+    fft4(z+8);
+    fft4(z+12);
+
+    TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
+    TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
+    TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
+    TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
+}
+#else
+DECL_FFT(16,8,4)
+#endif
+DECL_FFT(32,16,8)
+DECL_FFT(64,32,16)
+DECL_FFT(128,64,32)
+DECL_FFT(256,128,64)
+DECL_FFT(512,256,128)
+#if !CONFIG_SMALL
+#define pass pass_big
+#endif
+DECL_FFT(1024,512,256)
+DECL_FFT(2048,1024,512)
+DECL_FFT(4096,2048,1024)
+DECL_FFT(8192,4096,2048)
+DECL_FFT(16384,8192,4096)
+DECL_FFT(32768,16384,8192)
+DECL_FFT(65536,32768,16384)
+DECL_FFT(131072,65536,32768)
+
+static void (* const fft_dispatch[])(FFTComplex*) = {
+    fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
+    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, fft131072
+};
+
+static void fft_calc_c(FFTContext *s, FFTComplex *z)
+{
+    fft_dispatch[s->nbits-2](z);
+}
+#endif /* !FFT_FLOAT */
diff --git a/media/ffvpx/libavcodec/flac.c b/media/ffvpx/libavcodec/flac.c
new file mode 100644
index 0000000000..174b4801be
--- /dev/null
+++ b/media/ffvpx/libavcodec/flac.c
@@ -0,0 +1,225 @@
+/*
+ * FLAC common code
+ * Copyright (c) 2009 Justin Ruggles
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/channel_layout.h"
+#include "libavutil/crc.h"
+#include "libavutil/log.h"
+#include "bytestream.h"
+#include "get_bits.h"
+#include "flac.h"
+#include "flacdata.h"
+#include "flac_parse.h"
+
+static const int8_t sample_size_table[] = { 0, 8, 12, 0, 16, 20, 24, 32 };
+
+static const AVChannelLayout flac_channel_layouts[8] = {
+    AV_CHANNEL_LAYOUT_MONO,
+    AV_CHANNEL_LAYOUT_STEREO,
+    AV_CHANNEL_LAYOUT_SURROUND,
+    AV_CHANNEL_LAYOUT_QUAD,
+    AV_CHANNEL_LAYOUT_5POINT0,
+    AV_CHANNEL_LAYOUT_5POINT1,
+    AV_CHANNEL_LAYOUT_6POINT1,
+    AV_CHANNEL_LAYOUT_7POINT1
+};
+
+static int64_t get_utf8(GetBitContext *gb)
+{
+    int64_t val;
+    GET_UTF8(val, get_bits(gb, 8), return -1;)
+    return val;
+}
+
+int ff_flac_decode_frame_header(AVCodecContext *avctx, GetBitContext *gb,
+                                FLACFrameInfo *fi, int log_level_offset)
+{
+    int bs_code, sr_code, bps_code;
+
+    /* frame sync code */
+    if ((get_bits(gb, 15) & 0x7FFF) != 0x7FFC) {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset, "invalid sync code\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* variable block size stream code */
+    fi->is_var_size = get_bits1(gb);
+
+    /* block size and sample rate codes */
+    bs_code = get_bits(gb, 4);
+    sr_code = get_bits(gb, 4);
+
+    /* channels and decorrelation */
+    fi->ch_mode = get_bits(gb, 4);
+    if (fi->ch_mode < FLAC_MAX_CHANNELS) {
+        fi->channels = fi->ch_mode + 1;
+        fi->ch_mode = FLAC_CHMODE_INDEPENDENT;
+    } else if (fi->ch_mode < FLAC_MAX_CHANNELS + FLAC_CHMODE_MID_SIDE) {
+        fi->channels = 2;
+        fi->ch_mode -= FLAC_MAX_CHANNELS - 1;
+    } else {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "invalid channel mode: %d\n", fi->ch_mode);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* bits per sample */
+    bps_code = get_bits(gb, 3);
+    if (bps_code == 3) {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "invalid sample size code (%d)\n",
+               bps_code);
+        return AVERROR_INVALIDDATA;
+    }
+    fi->bps = sample_size_table[bps_code];
+
+    /* reserved bit */
+    if (get_bits1(gb)) {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "broken stream, invalid padding\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* sample or frame count */
+    fi->frame_or_sample_num = get_utf8(gb);
+    if (fi->frame_or_sample_num < 0) {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "sample/frame number invalid; utf8 fscked\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* blocksize */
+    if (bs_code == 0) {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "reserved blocksize code: 0\n");
+        return AVERROR_INVALIDDATA;
+    } else if (bs_code == 6) {
+        fi->blocksize = get_bits(gb, 8) + 1;
+    } else if (bs_code == 7) {
+        fi->blocksize = get_bits(gb, 16) + 1;
+    } else {
+        fi->blocksize = ff_flac_blocksize_table[bs_code];
+    }
+
+    /* sample rate */
+    if (sr_code < 12) {
+        fi->samplerate = ff_flac_sample_rate_table[sr_code];
+    } else if (sr_code == 12) {
+        fi->samplerate = get_bits(gb, 8) * 1000;
+    } else if (sr_code == 13) {
+        fi->samplerate = get_bits(gb, 16);
+    } else if (sr_code == 14) {
+        fi->samplerate = get_bits(gb, 16) * 10;
+    } else {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "illegal sample rate code %d\n",
+               sr_code);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* header CRC-8 check */
+    skip_bits(gb, 8);
+    if (av_crc(av_crc_get_table(AV_CRC_8_ATM), 0, gb->buffer,
+               get_bits_count(gb)/8)) {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "header crc mismatch\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+int ff_flac_is_extradata_valid(AVCodecContext *avctx,
+                               uint8_t **streaminfo_start)
+{
+    if (!avctx->extradata || avctx->extradata_size < FLAC_STREAMINFO_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "extradata NULL or too small.\n");
+        return 0;
+    }
+    if (AV_RL32(avctx->extradata) != MKTAG('f','L','a','C')) {
+        /* extradata contains STREAMINFO only */
+        if (avctx->extradata_size != FLAC_STREAMINFO_SIZE) {
+            av_log(avctx, AV_LOG_WARNING, "extradata contains %d bytes too many.\n",
+                   FLAC_STREAMINFO_SIZE-avctx->extradata_size);
+        }
+        *streaminfo_start = avctx->extradata;
+    } else {
+        if (avctx->extradata_size < 8+FLAC_STREAMINFO_SIZE) {
+            av_log(avctx, AV_LOG_ERROR, "extradata too small.\n");
+            return 0;
+        }
+        *streaminfo_start = &avctx->extradata[8];
+    }
+    return 1;
+}
+
+void ff_flac_set_channel_layout(AVCodecContext *avctx, int channels)
+{
+    if (channels == avctx->ch_layout.nb_channels &&
+        avctx->ch_layout.order != AV_CHANNEL_ORDER_UNSPEC)
+        return;
+
+    av_channel_layout_uninit(&avctx->ch_layout);
+    if (channels <= FF_ARRAY_ELEMS(flac_channel_layouts))
+        avctx->ch_layout = flac_channel_layouts[channels - 1];
+    else
+        avctx->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_UNSPEC,
+                                              .nb_channels = channels };
+}
+
+int ff_flac_parse_streaminfo(AVCodecContext *avctx, struct FLACStreaminfo *s,
+                              const uint8_t *buffer)
+{
+    GetBitContext gb;
+    init_get_bits(&gb, buffer, FLAC_STREAMINFO_SIZE*8);
+
+    skip_bits(&gb, 16); /* skip min blocksize */
+    s->max_blocksize = get_bits(&gb, 16);
+    if (s->max_blocksize < FLAC_MIN_BLOCKSIZE) {
+        av_log(avctx, AV_LOG_WARNING, "invalid max blocksize: %d\n",
+               s->max_blocksize);
+        s->max_blocksize = 16;
+        return AVERROR_INVALIDDATA;
+    }
+
+    skip_bits(&gb, 24); /* skip min frame size */
+    s->max_framesize = get_bits(&gb, 24);
+
+    s->samplerate = get_bits(&gb, 20);
+    s->channels = get_bits(&gb, 3) + 1;
+    s->bps = get_bits(&gb, 5) + 1;
+
+    if (s->bps < 4) {
+        av_log(avctx, AV_LOG_ERROR, "invalid bps: %d\n", s->bps);
+        s->bps = 16;
+        return AVERROR_INVALIDDATA;
+    }
+
+    avctx->sample_rate = s->samplerate;
+    avctx->bits_per_raw_sample = s->bps;
+    ff_flac_set_channel_layout(avctx, s->channels);
+
+    s->samples = get_bits64(&gb, 36);
+
+    skip_bits_long(&gb, 64); /* md5 sum */
+    skip_bits_long(&gb, 64); /* md5 sum */
+
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/flac.h b/media/ffvpx/libavcodec/flac.h
new file mode 100644
index 0000000000..00e631ed20
--- /dev/null
+++ b/media/ffvpx/libavcodec/flac.h
@@ -0,0 +1,75 @@
+/*
+ * FLAC (Free Lossless Audio Codec) common stuff
+ * Copyright (c) 2008 Justin Ruggles
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * FLAC (Free Lossless Audio Codec) common stuff
+ */
+
+#ifndef AVCODEC_FLAC_H
+#define AVCODEC_FLAC_H
+
+#include "libavutil/intreadwrite.h"
+
+#define FLAC_STREAMINFO_SIZE   34
+#define FLAC_MAX_CHANNELS       8
+#define FLAC_MIN_BLOCKSIZE     16
+#define FLAC_MAX_BLOCKSIZE  65535
+#define FLAC_MIN_FRAME_SIZE    10
+
+enum {
+    FLAC_CHMODE_INDEPENDENT = 0,
+    FLAC_CHMODE_LEFT_SIDE   = 1,
+    FLAC_CHMODE_RIGHT_SIDE  = 2,
+    FLAC_CHMODE_MID_SIDE    = 3,
+};
+
+enum {
+    FLAC_METADATA_TYPE_STREAMINFO = 0,
+    FLAC_METADATA_TYPE_PADDING,
+    FLAC_METADATA_TYPE_APPLICATION,
+    FLAC_METADATA_TYPE_SEEKTABLE,
+    FLAC_METADATA_TYPE_VORBIS_COMMENT,
+    FLAC_METADATA_TYPE_CUESHEET,
+    FLAC_METADATA_TYPE_PICTURE,
+    FLAC_METADATA_TYPE_INVALID = 127
+};
+
+/**
+ * Parse the metadata block parameters from the header.
+ * @param[in]  block_header header data, at least 4 bytes
+ * @param[out] last indicator for last metadata block
+ * @param[out] type metadata block type
+ * @param[out] size metadata block size
+ */
+static av_always_inline void flac_parse_block_header(const uint8_t *block_header,
+                                                      int *last, int *type, int *size)
+{
+    int tmp = *block_header;
+    if (last)
+        *last = tmp & 0x80;
+    if (type)
+        *type = tmp & 0x7F;
+    if (size)
+        *size = AV_RB24(block_header + 1);
+}
+
+#endif /* AVCODEC_FLAC_H */
diff --git a/media/ffvpx/libavcodec/flac_parse.h b/media/ffvpx/libavcodec/flac_parse.h
new file mode 100644
index 0000000000..67a7320bea
--- /dev/null
+++ b/media/ffvpx/libavcodec/flac_parse.h
@@ -0,0 +1,89 @@
+/*
+ * FLAC (Free Lossless Audio Codec) decoder/parser common functions
+ * Copyright (c) 2008 Justin Ruggles
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * FLAC (Free Lossless Audio Codec) decoder/parser common functions
+ */
+
+#ifndef AVCODEC_FLAC_PARSE_H
+#define AVCODEC_FLAC_PARSE_H
+
+#include "avcodec.h"
+#include "get_bits.h"
+
+typedef struct FLACStreaminfo {
+    int samplerate;         /**< sample rate                             */
+    int channels;           /**< number of channels                      */
+    int bps;                /**< bits-per-sample                         */
+    int max_blocksize;      /**< maximum block size, in samples          */
+    int max_framesize;      /**< maximum frame size, in bytes            */
+    int64_t samples;        /**< total number of samples                 */
+} FLACStreaminfo;
+
+typedef struct FLACFrameInfo {
+    int samplerate;         /**< sample rate                             */
+    int channels;           /**< number of channels                      */
+    int bps;                /**< bits-per-sample                         */
+    int blocksize;          /**< block size of the frame                 */
+    int ch_mode;            /**< channel decorrelation mode              */
+    int64_t frame_or_sample_num;    /**< frame number or sample number   */
+    int is_var_size;                /**< specifies if the stream uses variable
+                                         block sizes or a fixed block size;
+                                         also determines the meaning of
+                                         frame_or_sample_num             */
+} FLACFrameInfo;
+
+/**
+ * Parse the Streaminfo metadata block
+ * @param[out] avctx   codec context to set basic stream parameters
+ * @param[out] s       where parsed information is stored
+ * @param[in]  buffer  pointer to start of 34-byte streaminfo data
+ *
+ * @return negative error code on faiure or >= 0 on success
+ */
+int ff_flac_parse_streaminfo(AVCodecContext *avctx, struct FLACStreaminfo *s,
+                              const uint8_t *buffer);
+
+/**
+ * Validate the FLAC extradata.
+ * @param[in]  avctx codec context containing the extradata.
+ * @param[out] format extradata format.
+ * @param[out] streaminfo_start pointer to start of 34-byte STREAMINFO data.
+ * @return 1 if valid, 0 if not valid.
+ */
+int ff_flac_is_extradata_valid(AVCodecContext *avctx,
+                               uint8_t **streaminfo_start);
+
+/**
+ * Validate and decode a frame header.
+ * @param      avctx AVCodecContext to use as av_log() context
+ * @param      gb    GetBitContext from which to read frame header
+ * @param[out] fi    frame information
+ * @param      log_level_offset  log level offset. can be used to silence error messages.
+ * @return non-zero on error, 0 if ok
+ */
+int ff_flac_decode_frame_header(AVCodecContext *avctx, GetBitContext *gb,
+                                FLACFrameInfo *fi, int log_level_offset);
+
+void ff_flac_set_channel_layout(AVCodecContext *avctx, int channels);
+
+#endif /* AVCODEC_FLAC_PARSE_H */
diff --git a/media/ffvpx/libavcodec/flacdata.c b/media/ffvpx/libavcodec/flacdata.c
new file mode 100644
index 0000000000..d96e3e0966
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdata.c
@@ -0,0 +1,33 @@
+/*
+ * FLAC data
+ * Copyright (c) 2003 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "flacdata.h"
+
+const int ff_flac_sample_rate_table[16] =
+{ 0,
+  88200, 176400, 192000,
+  8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000,
+  0, 0, 0, 0 };
+
+const int32_t ff_flac_blocksize_table[16] = {
+     0,    192, 576<<0, 576<<1, 576<<2, 576<<3,      0,      0,
+256<<0, 256<<1, 256<<2, 256<<3, 256<<4, 256<<5, 256<<6, 256<<7
+};
diff --git a/media/ffvpx/libavcodec/flacdata.h b/media/ffvpx/libavcodec/flacdata.h
new file mode 100644
index 0000000000..ef21840777
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdata.h
@@ -0,0 +1,31 @@
+/*
+ * FLAC data header
+ * Copyright (c) 2003 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FLACDATA_H
+#define AVCODEC_FLACDATA_H
+
+#include <stdint.h>
+
+extern const int ff_flac_sample_rate_table[16];
+
+extern const int32_t ff_flac_blocksize_table[16];
+
+#endif /* AVCODEC_FLACDATA_H */
diff --git a/media/ffvpx/libavcodec/flacdec.c b/media/ffvpx/libavcodec/flacdec.c
new file mode 100644
index 0000000000..cc778a8dff
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdec.c
@@ -0,0 +1,846 @@
+/*
+ * FLAC (Free Lossless Audio Codec) decoder
+ * Copyright (c) 2003 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * FLAC (Free Lossless Audio Codec) decoder
+ * @author Alex Beregszaszi
+ * @see http://flac.sourceforge.net/
+ *
+ * This decoder can be used in 1 of 2 ways: Either raw FLAC data can be fed
+ * through, starting from the initial 'fLaC' signature; or by passing the
+ * 34-byte streaminfo structure through avctx->extradata[_size] followed
+ * by data starting with the 0xFFF8 marker.
+ */
+
+#include <limits.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/crc.h"
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "codec_internal.h"
+#include "get_bits.h"
+#include "bytestream.h"
+#include "golomb.h"
+#include "flac.h"
+#include "flacdata.h"
+#include "flacdsp.h"
+#include "flac_parse.h"
+#include "thread.h"
+#include "unary.h"
+
+
+typedef struct FLACContext {
+    AVClass *class;
+    FLACStreaminfo stream_info;
+
+    AVCodecContext *avctx;                  ///< parent AVCodecContext
+    GetBitContext gb;                       ///< GetBitContext initialized to start at the current frame
+
+    int blocksize;                          ///< number of samples in the current frame
+    int sample_shift;                       ///< shift required to make output samples 16-bit or 32-bit
+    int ch_mode;                            ///< channel decorrelation type in the current frame
+    int got_streaminfo;                     ///< indicates if the STREAMINFO has been read
+
+    int32_t *decoded[FLAC_MAX_CHANNELS];    ///< decoded samples
+    uint8_t *decoded_buffer;
+    unsigned int decoded_buffer_size;
+    int64_t *decoded_33bps;                  ///< decoded samples for a 33 bps subframe
+    uint8_t *decoded_buffer_33bps;
+    unsigned int decoded_buffer_size_33bps;
+    int buggy_lpc;                          ///< use workaround for old lavc encoded files
+
+    FLACDSPContext dsp;
+} FLACContext;
+
+static int allocate_buffers(FLACContext *s);
+
+static void flac_set_bps(FLACContext *s)
+{
+    enum AVSampleFormat req = s->avctx->request_sample_fmt;
+    int need32 = s->stream_info.bps > 16;
+    int want32 = av_get_bytes_per_sample(req) > 2;
+    int planar = av_sample_fmt_is_planar(req);
+
+    if (need32 || want32) {
+        if (planar)
+            s->avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
+        else
+            s->avctx->sample_fmt = AV_SAMPLE_FMT_S32;
+        s->sample_shift = 32 - s->stream_info.bps;
+    } else {
+        if (planar)
+            s->avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
+        else
+            s->avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+        s->sample_shift = 16 - s->stream_info.bps;
+    }
+}
+
+static av_cold int flac_decode_init(AVCodecContext *avctx)
+{
+    uint8_t *streaminfo;
+    int ret;
+    FLACContext *s = avctx->priv_data;
+    s->avctx = avctx;
+
+    /* for now, the raw FLAC header is allowed to be passed to the decoder as
+       frame data instead of extradata. */
+    if (!avctx->extradata)
+        return 0;
+
+    if (!ff_flac_is_extradata_valid(avctx, &streaminfo))
+        return AVERROR_INVALIDDATA;
+
+    /* initialize based on the demuxer-supplied streamdata header */
+    ret = ff_flac_parse_streaminfo(avctx, &s->stream_info, streaminfo);
+    if (ret < 0)
+        return ret;
+    ret = allocate_buffers(s);
+    if (ret < 0)
+        return ret;
+    flac_set_bps(s);
+    ff_flacdsp_init(&s->dsp, avctx->sample_fmt,
+                    s->stream_info.channels);
+    s->got_streaminfo = 1;
+
+    return 0;
+}
+
+static void dump_headers(AVCodecContext *avctx, FLACStreaminfo *s)
+{
+    av_log(avctx, AV_LOG_DEBUG, "  Max Blocksize: %d\n", s->max_blocksize);
+    av_log(avctx, AV_LOG_DEBUG, "  Max Framesize: %d\n", s->max_framesize);
+    av_log(avctx, AV_LOG_DEBUG, "  Samplerate: %d\n", s->samplerate);
+    av_log(avctx, AV_LOG_DEBUG, "  Channels: %d\n", s->channels);
+    av_log(avctx, AV_LOG_DEBUG, "  Bits: %d\n", s->bps);
+}
+
+static int allocate_buffers(FLACContext *s)
+{
+    int buf_size;
+    int ret;
+
+    av_assert0(s->stream_info.max_blocksize);
+
+    buf_size = av_samples_get_buffer_size(NULL, s->stream_info.channels,
+                                          s->stream_info.max_blocksize,
+                                          AV_SAMPLE_FMT_S32P, 0);
+    if (buf_size < 0)
+        return buf_size;
+
+    av_fast_malloc(&s->decoded_buffer, &s->decoded_buffer_size, buf_size);
+    if (!s->decoded_buffer)
+        return AVERROR(ENOMEM);
+
+    ret = av_samples_fill_arrays((uint8_t **)s->decoded, NULL,
+                                 s->decoded_buffer,
+                                 s->stream_info.channels,
+                                 s->stream_info.max_blocksize,
+                                 AV_SAMPLE_FMT_S32P, 0);
+    if (ret >= 0 && s->stream_info.bps == 32 && s->stream_info.channels == 2) {
+        buf_size = av_samples_get_buffer_size(NULL, 1,
+                                              s->stream_info.max_blocksize,
+                                              AV_SAMPLE_FMT_S64P, 0);
+        if (buf_size < 0)
+            return buf_size;
+
+        av_fast_malloc(&s->decoded_buffer_33bps, &s->decoded_buffer_size_33bps, buf_size);
+        if (!s->decoded_buffer_33bps)
+            return AVERROR(ENOMEM);
+
+        ret = av_samples_fill_arrays((uint8_t **)&s->decoded_33bps, NULL,
+                                     s->decoded_buffer_33bps,
+                                     1,
+                                     s->stream_info.max_blocksize,
+                                     AV_SAMPLE_FMT_S64P, 0);
+
+    }
+    return ret < 0 ? ret : 0;
+}
+
+/**
+ * Parse the STREAMINFO from an inline header.
+ * @param s the flac decoding context
+ * @param buf input buffer, starting with the "fLaC" marker
+ * @param buf_size buffer size
+ * @return non-zero if metadata is invalid
+ */
+static int parse_streaminfo(FLACContext *s, const uint8_t *buf, int buf_size)
+{
+    int metadata_type, metadata_size, ret;
+
+    if (buf_size < FLAC_STREAMINFO_SIZE+8) {
+        /* need more data */
+        return 0;
+    }
+    flac_parse_block_header(&buf[4], NULL, &metadata_type, &metadata_size);
+    if (metadata_type != FLAC_METADATA_TYPE_STREAMINFO ||
+        metadata_size != FLAC_STREAMINFO_SIZE) {
+        return AVERROR_INVALIDDATA;
+    }
+    ret = ff_flac_parse_streaminfo(s->avctx, &s->stream_info, &buf[8]);
+    if (ret < 0)
+        return ret;
+    ret = allocate_buffers(s);
+    if (ret < 0)
+        return ret;
+    flac_set_bps(s);
+    ff_flacdsp_init(&s->dsp, s->avctx->sample_fmt,
+                    s->stream_info.channels);
+    s->got_streaminfo = 1;
+
+    return 0;
+}
+
+/**
+ * Determine the size of an inline header.
+ * @param buf input buffer, starting with the "fLaC" marker
+ * @param buf_size buffer size
+ * @return number of bytes in the header, or 0 if more data is needed
+ */
+static int get_metadata_size(const uint8_t *buf, int buf_size)
+{
+    int metadata_last, metadata_size;
+    const uint8_t *buf_end = buf + buf_size;
+
+    buf += 4;
+    do {
+        if (buf_end - buf < 4)
+            return AVERROR_INVALIDDATA;
+        flac_parse_block_header(buf, &metadata_last, NULL, &metadata_size);
+        buf += 4;
+        if (buf_end - buf < metadata_size) {
+            /* need more data in order to read the complete header */
+            return AVERROR_INVALIDDATA;
+        }
+        buf += metadata_size;
+    } while (!metadata_last);
+
+    return buf_size - (buf_end - buf);
+}
+
+static int decode_residuals(FLACContext *s, int32_t *decoded, int pred_order)
+{
+    GetBitContext gb = s->gb;
+    int i, tmp, partition, method_type, rice_order;
+    int rice_bits, rice_esc;
+    int samples;
+
+    method_type = get_bits(&gb, 2);
+    rice_order  = get_bits(&gb, 4);
+
+    samples   = s->blocksize >> rice_order;
+    rice_bits = 4 + method_type;
+    rice_esc  = (1 << rice_bits) - 1;
+
+    decoded += pred_order;
+    i        = pred_order;
+
+    if (method_type > 1) {
+        av_log(s->avctx, AV_LOG_ERROR, "illegal residual coding method %d\n",
+               method_type);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (samples << rice_order != s->blocksize) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid rice order: %i blocksize %i\n",
+               rice_order, s->blocksize);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (pred_order > samples) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid predictor order: %i > %i\n",
+               pred_order, samples);
+        return AVERROR_INVALIDDATA;
+    }
+
+    for (partition = 0; partition < (1 << rice_order); partition++) {
+        tmp = get_bits(&gb, rice_bits);
+        if (tmp == rice_esc) {
+            tmp = get_bits(&gb, 5);
+            for (; i < samples; i++)
+                *decoded++ = get_sbits_long(&gb, tmp);
+        } else {
+            int real_limit = (tmp > 1) ? (INT_MAX >> (tmp - 1)) + 2 : INT_MAX;
+            for (; i < samples; i++) {
+                int v = get_sr_golomb_flac(&gb, tmp, real_limit, 1);
+                if (v == 0x80000000){
+                    av_log(s->avctx, AV_LOG_ERROR, "invalid residual\n");
+                    return AVERROR_INVALIDDATA;
+                }
+
+                *decoded++ = v;
+            }
+        }
+        i= 0;
+    }
+
+    s->gb = gb;
+
+    return 0;
+}
+
+static int decode_subframe_fixed(FLACContext *s, int32_t *decoded,
+                                 int pred_order, int bps)
+{
+    const int blocksize = s->blocksize;
+    unsigned av_uninit(a), av_uninit(b), av_uninit(c), av_uninit(d);
+    int i;
+    int ret;
+
+    /* warm up samples */
+    for (i = 0; i < pred_order; i++) {
+        decoded[i] = get_sbits_long(&s->gb, bps);
+    }
+
+    if ((ret = decode_residuals(s, decoded, pred_order)) < 0)
+        return ret;
+
+    if (pred_order > 0)
+        a = decoded[pred_order-1];
+    if (pred_order > 1)
+        b = a - decoded[pred_order-2];
+    if (pred_order > 2)
+        c = b - decoded[pred_order-2] + decoded[pred_order-3];
+    if (pred_order > 3)
+        d = c - decoded[pred_order-2] + 2U*decoded[pred_order-3] - decoded[pred_order-4];
+
+    switch (pred_order) {
+    case 0:
+        break;
+    case 1:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = a += decoded[i];
+        break;
+    case 2:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = a += b += decoded[i];
+        break;
+    case 3:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = a += b += c += decoded[i];
+        break;
+    case 4:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = a += b += c += d += decoded[i];
+        break;
+    default:
+        av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n", pred_order);
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+#define DECODER_SUBFRAME_FIXED_WIDE(residual) {                       \
+    const int blocksize = s->blocksize;                               \
+    int ret;                                                          \
+                                                                      \
+    if ((ret = decode_residuals(s, residual, pred_order)) < 0)        \
+        return ret;                                                   \
+                                                                      \
+    switch (pred_order) {                                             \
+    case 0:                                                           \
+        for (int i = pred_order; i < blocksize; i++)                  \
+            decoded[i] = residual[i];                                 \
+        break;                                                        \
+    case 1:                                                           \
+        for (int i = pred_order; i < blocksize; i++)                  \
+            decoded[i] = (int64_t)residual[i] + (int64_t)decoded[i-1];\
+        break;                                                        \
+    case 2:                                                           \
+        for (int i = pred_order; i < blocksize; i++)                  \
+            decoded[i] = (int64_t)residual[i] + 2*(int64_t)decoded[i-1] - (int64_t)decoded[i-2];  \
+        break;                                                        \
+    case 3:                                                           \
+        for (int i = pred_order; i < blocksize; i++)                  \
+            decoded[i] = (int64_t)residual[i] + 3*(int64_t)decoded[i-1] - 3*(int64_t)decoded[i-2] + (int64_t)decoded[i-3];   \
+        break;                                                        \
+    case 4:                                                           \
+        for (int i = pred_order; i < blocksize; i++)                  \
+            decoded[i] = (int64_t)residual[i] + 4*(int64_t)decoded[i-1] - 6*(int64_t)decoded[i-2] + 4*(int64_t)decoded[i-3] - (int64_t)decoded[i-4];   \
+        break;                                                        \
+    default:                                                          \
+        av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n", pred_order);   \
+        return AVERROR_INVALIDDATA;                                   \
+    }                                                                 \
+    return 0;                                                         \
+}
+
+static int decode_subframe_fixed_wide(FLACContext *s, int32_t *decoded,
+                                      int pred_order, int bps)
+{
+    /* warm up samples */
+    for (int i = 0; i < pred_order; i++) {
+        decoded[i] = get_sbits_long(&s->gb, bps);
+    }
+    DECODER_SUBFRAME_FIXED_WIDE(decoded);
+}
+
+
+static int decode_subframe_fixed_33bps(FLACContext *s, int64_t *decoded,
+                                       int32_t *residual, int pred_order)
+{
+    /* warm up samples */                                             \
+    for (int i = 0; i < pred_order; i++) {                            \
+        decoded[i] = get_sbits64(&s->gb, 33);                         \
+    }                                                                 \
+    DECODER_SUBFRAME_FIXED_WIDE(residual);
+}
+
+static void lpc_analyze_remodulate(SUINT32 *decoded, const int coeffs[32],
+                                   int order, int qlevel, int len, int bps)
+{
+    int i, j;
+    int ebps = 1 << (bps-1);
+    unsigned sigma = 0;
+
+    for (i = order; i < len; i++)
+        sigma |= decoded[i] + ebps;
+
+    if (sigma < 2*ebps)
+        return;
+
+    for (i = len - 1; i >= order; i--) {
+        int64_t p = 0;
+        for (j = 0; j < order; j++)
+            p += coeffs[j] * (int64_t)(int32_t)decoded[i-order+j];
+        decoded[i] -= p >> qlevel;
+    }
+    for (i = order; i < len; i++, decoded++) {
+        int32_t p = 0;
+        for (j = 0; j < order; j++)
+            p += coeffs[j] * (uint32_t)decoded[j];
+        decoded[j] += p >> qlevel;
+    }
+}
+
+static int decode_subframe_lpc(FLACContext *s, int32_t *decoded, int pred_order,
+                               int bps)
+{
+    int i, ret;
+    int coeff_prec, qlevel;
+    int coeffs[32];
+
+    /* warm up samples */
+    for (i = 0; i < pred_order; i++) {
+        decoded[i] = get_sbits_long(&s->gb, bps);
+    }
+
+    coeff_prec = get_bits(&s->gb, 4) + 1;
+    if (coeff_prec == 16) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid coeff precision\n");
+        return AVERROR_INVALIDDATA;
+    }
+    qlevel = get_sbits(&s->gb, 5);
+    if (qlevel < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "qlevel %d not supported, maybe buggy stream\n",
+               qlevel);
+        return AVERROR_INVALIDDATA;
+    }
+
+    for (i = 0; i < pred_order; i++) {
+        coeffs[pred_order - i - 1] = get_sbits(&s->gb, coeff_prec);
+    }
+
+    if ((ret = decode_residuals(s, decoded, pred_order)) < 0)
+        return ret;
+
+    if (   (    s->buggy_lpc && s->stream_info.bps <= 16)
+        || (   !s->buggy_lpc && bps <= 16
+            && bps + coeff_prec + av_log2(pred_order) <= 32)) {
+        s->dsp.lpc16(decoded, coeffs, pred_order, qlevel, s->blocksize);
+    } else {
+        s->dsp.lpc32(decoded, coeffs, pred_order, qlevel, s->blocksize);
+        if (s->stream_info.bps <= 16)
+            lpc_analyze_remodulate(decoded, coeffs, pred_order, qlevel, s->blocksize, bps);
+    }
+
+    return 0;
+}
+
+static int decode_subframe_lpc_33bps(FLACContext *s, int64_t *decoded,
+                                     int32_t *residual, int pred_order)
+{
+    int i, j, ret;
+    int coeff_prec, qlevel;
+    int coeffs[32];
+
+    /* warm up samples */
+    for (i = 0; i < pred_order; i++) {
+        decoded[i] = get_sbits64(&s->gb, 33);
+    }
+
+    coeff_prec = get_bits(&s->gb, 4) + 1;
+    if (coeff_prec == 16) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid coeff precision\n");
+        return AVERROR_INVALIDDATA;
+    }
+    qlevel = get_sbits(&s->gb, 5);
+    if (qlevel < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "qlevel %d not supported, maybe buggy stream\n",
+               qlevel);
+        return AVERROR_INVALIDDATA;
+    }
+
+    for (i = 0; i < pred_order; i++) {
+        coeffs[pred_order - i - 1] = get_sbits(&s->gb, coeff_prec);
+    }
+
+    if ((ret = decode_residuals(s, residual, pred_order)) < 0)
+        return ret;
+
+    for (i = pred_order; i < s->blocksize; i++, decoded++) {
+        int64_t sum = 0;
+        for (j = 0; j < pred_order; j++)
+            sum += (int64_t)coeffs[j] * decoded[j];
+        decoded[j] = residual[i] + (sum >> qlevel);
+    }
+
+    return 0;
+}
+
+static inline int decode_subframe(FLACContext *s, int channel)
+{
+    int32_t *decoded = s->decoded[channel];
+    int type, wasted = 0;
+    int bps = s->stream_info.bps;
+    int i, ret;
+
+    if (channel == 0) {
+        if (s->ch_mode == FLAC_CHMODE_RIGHT_SIDE)
+            bps++;
+    } else {
+        if (s->ch_mode == FLAC_CHMODE_LEFT_SIDE || s->ch_mode == FLAC_CHMODE_MID_SIDE)
+            bps++;
+    }
+
+    if (get_bits1(&s->gb)) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid subframe padding\n");
+        return AVERROR_INVALIDDATA;
+    }
+    type = get_bits(&s->gb, 6);
+
+    if (get_bits1(&s->gb)) {
+        int left = get_bits_left(&s->gb);
+        if ( left <= 0 ||
+            (left < bps && !show_bits_long(&s->gb, left)) ||
+                           !show_bits_long(&s->gb, bps-1)) {
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "Invalid number of wasted bits > available bits (%d) - left=%d\n",
+                   bps, left);
+            return AVERROR_INVALIDDATA;
+        }
+        wasted = 1 + get_unary(&s->gb, 1, get_bits_left(&s->gb));
+        bps -= wasted;
+    }
+
+//FIXME use av_log2 for types
+    if (type == 0) {
+        if (bps < 33) {
+            int32_t tmp = get_sbits_long(&s->gb, bps);
+            for (i = 0; i < s->blocksize; i++)
+                decoded[i] = tmp;
+        } else {
+            int64_t tmp = get_sbits64(&s->gb, 33);
+            for (i = 0; i < s->blocksize; i++)
+                s->decoded_33bps[i] = tmp;
+        }
+    } else if (type == 1) {
+        if (bps < 33) {
+            for (i = 0; i < s->blocksize; i++)
+                decoded[i] = get_sbits_long(&s->gb, bps);
+        } else {
+            for (i = 0; i < s->blocksize; i++)
+                s->decoded_33bps[i] = get_sbits64(&s->gb, 33);
+        }
+    } else if ((type >= 8) && (type <= 12)) {
+        int order = type & ~0x8;
+        if (bps < 33) {
+            if (bps + order <= 32) {
+                if ((ret = decode_subframe_fixed(s, decoded, order, bps)) < 0)
+                    return ret;
+            } else {
+                if ((ret = decode_subframe_fixed_wide(s, decoded, order, bps)) < 0)
+                    return ret;
+            }
+        } else {
+            if ((ret = decode_subframe_fixed_33bps(s, s->decoded_33bps, decoded, order)) < 0)
+                return ret;
+        }
+    } else if (type >= 32) {
+        if (bps < 33) {
+            if ((ret = decode_subframe_lpc(s, decoded, (type & ~0x20)+1, bps)) < 0)
+                return ret;
+        } else {
+            if ((ret = decode_subframe_lpc_33bps(s, s->decoded_33bps, decoded, (type & ~0x20)+1)) < 0)
+                return ret;
+        }
+    } else {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid coding type\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (wasted) {
+        if (wasted+bps == 33) {
+            int i;
+            for (i = 0; i < s->blocksize; i++)
+                s->decoded_33bps[i] = (uint64_t)decoded[i] << wasted;
+        } else if (wasted < 32) {
+            int i;
+            for (i = 0; i < s->blocksize; i++)
+                decoded[i] = (unsigned)decoded[i] << wasted;
+        }
+    }
+
+    return 0;
+}
+
+static int decode_frame(FLACContext *s)
+{
+    int i, ret;
+    GetBitContext *gb = &s->gb;
+    FLACFrameInfo fi;
+
+    if ((ret = ff_flac_decode_frame_header(s->avctx, gb, &fi, 0)) < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid frame header\n");
+        return ret;
+    }
+
+    if (   s->stream_info.channels
+        && fi.channels != s->stream_info.channels
+        && s->got_streaminfo) {
+        s->stream_info.channels = fi.channels;
+        ff_flac_set_channel_layout(s->avctx, fi.channels);
+        ret = allocate_buffers(s);
+        if (ret < 0)
+            return ret;
+    }
+    s->stream_info.channels = fi.channels;
+    ff_flac_set_channel_layout(s->avctx, fi.channels);
+    s->ch_mode = fi.ch_mode;
+
+    if (!s->stream_info.bps && !fi.bps) {
+        av_log(s->avctx, AV_LOG_ERROR, "bps not found in STREAMINFO or frame header\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (!fi.bps) {
+        fi.bps = s->stream_info.bps;
+    } else if (s->stream_info.bps && fi.bps != s->stream_info.bps) {
+        av_log(s->avctx, AV_LOG_ERROR, "switching bps mid-stream is not "
+                                       "supported\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (!s->stream_info.bps) {
+        s->stream_info.bps = s->avctx->bits_per_raw_sample = fi.bps;
+        flac_set_bps(s);
+    }
+
+    if (!s->stream_info.max_blocksize)
+        s->stream_info.max_blocksize = FLAC_MAX_BLOCKSIZE;
+    if (fi.blocksize > s->stream_info.max_blocksize) {
+        av_log(s->avctx, AV_LOG_ERROR, "blocksize %d > %d\n", fi.blocksize,
+               s->stream_info.max_blocksize);
+        return AVERROR_INVALIDDATA;
+    }
+    s->blocksize = fi.blocksize;
+
+    if (!s->stream_info.samplerate && !fi.samplerate) {
+        av_log(s->avctx, AV_LOG_ERROR, "sample rate not found in STREAMINFO"
+                                        " or frame header\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (fi.samplerate == 0)
+        fi.samplerate = s->stream_info.samplerate;
+    s->stream_info.samplerate = s->avctx->sample_rate = fi.samplerate;
+
+    if (!s->got_streaminfo) {
+        ret = allocate_buffers(s);
+        if (ret < 0)
+            return ret;
+        s->got_streaminfo = 1;
+        dump_headers(s->avctx, &s->stream_info);
+    }
+    ff_flacdsp_init(&s->dsp, s->avctx->sample_fmt,
+                    s->stream_info.channels);
+
+//    dump_headers(s->avctx, &s->stream_info);
+
+    /* subframes */
+    for (i = 0; i < s->stream_info.channels; i++) {
+        if ((ret = decode_subframe(s, i)) < 0)
+            return ret;
+    }
+
+    align_get_bits(gb);
+
+    /* frame footer */
+    skip_bits(gb, 16); /* data crc */
+
+    return 0;
+}
+
+static void decorrelate_33bps(int ch_mode, int32_t **decoded, int64_t *decoded_33bps, int len)
+{
+    int i;
+    if (ch_mode == FLAC_CHMODE_LEFT_SIDE ) {
+        for (i = 0; i < len; i++)
+           decoded[1][i] = decoded[0][i] - decoded_33bps[i];
+    } else if (ch_mode == FLAC_CHMODE_RIGHT_SIDE ) {
+        for (i = 0; i < len; i++)
+           decoded[0][i] = decoded[1][i] + decoded_33bps[i];
+    } else if (ch_mode == FLAC_CHMODE_MID_SIDE ) {
+        for (i = 0; i < len; i++) {
+            uint64_t a = decoded[0][i];
+            int64_t b = decoded_33bps[i];
+            a -= b >> 1;
+            decoded[0][i] = (a + b);
+            decoded[1][i] = a;
+        }
+    }
+}
+
+static int flac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
+                             int *got_frame_ptr, AVPacket *avpkt)
+{
+    const uint8_t *buf = avpkt->data;
+    int buf_size = avpkt->size;
+    FLACContext *s = avctx->priv_data;
+    int bytes_read = 0;
+    int ret;
+
+    *got_frame_ptr = 0;
+
+    if (buf_size > 5 && !memcmp(buf, "\177FLAC", 5)) {
+        av_log(s->avctx, AV_LOG_DEBUG, "skipping flac header packet 1\n");
+        return buf_size;
+    }
+
+    if (buf_size > 0 && (*buf & 0x7F) == FLAC_METADATA_TYPE_VORBIS_COMMENT) {
+        av_log(s->avctx, AV_LOG_DEBUG, "skipping vorbis comment\n");
+        return buf_size;
+    }
+
+    /* check that there is at least the smallest decodable amount of data.
+       this amount corresponds to the smallest valid FLAC frame possible.
+       FF F8 69 02 00 00 9A 00 00 34 */
+    if (buf_size < FLAC_MIN_FRAME_SIZE)
+        return buf_size;
+
+    /* check for inline header */
+    if (AV_RB32(buf) == MKBETAG('f','L','a','C')) {
+        if (!s->got_streaminfo && (ret = parse_streaminfo(s, buf, buf_size))) {
+            av_log(s->avctx, AV_LOG_ERROR, "invalid header\n");
+            return ret;
+        }
+        return get_metadata_size(buf, buf_size);
+    }
+
+    /* decode frame */
+    if ((ret = init_get_bits8(&s->gb, buf, buf_size)) < 0)
+        return ret;
+    if ((ret = decode_frame(s)) < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "decode_frame() failed\n");
+        return ret;
+    }
+    bytes_read = get_bits_count(&s->gb)/8;
+
+    if ((s->avctx->err_recognition & (AV_EF_CRCCHECK|AV_EF_COMPLIANT)) &&
+        av_crc(av_crc_get_table(AV_CRC_16_ANSI),
+               0, buf, bytes_read)) {
+        av_log(s->avctx, AV_LOG_ERROR, "CRC error at PTS %"PRId64"\n", avpkt->pts);
+        if (s->avctx->err_recognition & AV_EF_EXPLODE)
+            return AVERROR_INVALIDDATA;
+    }
+
+    /* get output buffer */
+    frame->nb_samples = s->blocksize;
+    if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    if (s->stream_info.bps == 32 && s->ch_mode > 0) {
+        decorrelate_33bps(s->ch_mode, s->decoded, s->decoded_33bps, s->blocksize);
+        s->dsp.decorrelate[0](frame->data, s->decoded, s->stream_info.channels,
+                              s->blocksize, s->sample_shift);
+    } else {
+        s->dsp.decorrelate[s->ch_mode](frame->data, s->decoded,
+                                       s->stream_info.channels,
+                                       s->blocksize, s->sample_shift);
+    }
+
+    if (bytes_read > buf_size) {
+        av_log(s->avctx, AV_LOG_ERROR, "overread: %d\n", bytes_read - buf_size);
+        return AVERROR_INVALIDDATA;
+    }
+    if (bytes_read < buf_size) {
+        av_log(s->avctx, AV_LOG_DEBUG, "underread: %d orig size: %d\n",
+               buf_size - bytes_read, buf_size);
+    }
+
+    *got_frame_ptr = 1;
+
+    return bytes_read;
+}
+
+static av_cold int flac_decode_close(AVCodecContext *avctx)
+{
+    FLACContext *s = avctx->priv_data;
+
+    av_freep(&s->decoded_buffer);
+    av_freep(&s->decoded_buffer_33bps);
+
+    return 0;
+}
+
+static const AVOption options[] = {
+{ "use_buggy_lpc", "emulate old buggy lavc behavior", offsetof(FLACContext, buggy_lpc), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM },
+{ NULL },
+};
+
+static const AVClass flac_decoder_class = {
+    .class_name = "FLAC decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const FFCodec ff_flac_decoder = {
+    .p.name         = "flac",
+    CODEC_LONG_NAME("FLAC (Free Lossless Audio Codec)"),
+    .p.type         = AVMEDIA_TYPE_AUDIO,
+    .p.id           = AV_CODEC_ID_FLAC,
+    .priv_data_size = sizeof(FLACContext),
+    .init           = flac_decode_init,
+    .close          = flac_decode_close,
+    FF_CODEC_DECODE_CB(flac_decode_frame),
+    .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF |
+                      AV_CODEC_CAP_DR1 |
+                      AV_CODEC_CAP_FRAME_THREADS,
+    .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+                                                      AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_S32,
+                                                      AV_SAMPLE_FMT_S32P,
+                                                      AV_SAMPLE_FMT_NONE },
+    .p.priv_class   = &flac_decoder_class,
+};
diff --git a/media/ffvpx/libavcodec/flacdsp.c b/media/ffvpx/libavcodec/flacdsp.c
new file mode 100644
index 0000000000..42e231db53
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdsp.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavutil/samplefmt.h"
+#include "flacdsp.h"
+#include "config.h"
+
+#define SAMPLE_SIZE 16
+#define PLANAR 0
+#include "flacdsp_template.c"
+
+#undef  PLANAR
+#define PLANAR 1
+#include "flacdsp_template.c"
+
+#undef  SAMPLE_SIZE
+#undef  PLANAR
+#define SAMPLE_SIZE 32
+#define PLANAR 0
+#include "flacdsp_template.c"
+
+#undef  PLANAR
+#define PLANAR 1
+#include "flacdsp_template.c"
+
+static void flac_lpc_16_c(int32_t *decoded, const int coeffs[32],
+                          int pred_order, int qlevel, int len)
+{
+    int i, j;
+
+    for (i = pred_order; i < len - 1; i += 2, decoded += 2) {
+        SUINT c = coeffs[0];
+        SUINT d = decoded[0];
+        int s0 = 0, s1 = 0;
+        for (j = 1; j < pred_order; j++) {
+            s0 += c*d;
+            d = decoded[j];
+            s1 += c*d;
+            c = coeffs[j];
+        }
+        s0 += c*d;
+        d = decoded[j] += (SUINT)(s0 >> qlevel);
+        s1 += c*d;
+        decoded[j + 1] += (SUINT)(s1 >> qlevel);
+    }
+    if (i < len) {
+        int sum = 0;
+        for (j = 0; j < pred_order; j++)
+            sum += coeffs[j] * (SUINT)decoded[j];
+        decoded[j] = decoded[j] + (unsigned)(sum >> qlevel);
+    }
+}
+
+static void flac_lpc_32_c(int32_t *decoded, const int coeffs[32],
+                          int pred_order, int qlevel, int len)
+{
+    int i, j;
+
+    for (i = pred_order; i < len; i++, decoded++) {
+        int64_t sum = 0;
+        for (j = 0; j < pred_order; j++)
+            sum += (int64_t)coeffs[j] * decoded[j];
+        decoded[j] += sum >> qlevel;
+    }
+
+}
+
+av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels)
+{
+    c->lpc16        = flac_lpc_16_c;
+    c->lpc32        = flac_lpc_32_c;
+
+    switch (fmt) {
+    case AV_SAMPLE_FMT_S32:
+        c->decorrelate[0] = flac_decorrelate_indep_c_32;
+        c->decorrelate[1] = flac_decorrelate_ls_c_32;
+        c->decorrelate[2] = flac_decorrelate_rs_c_32;
+        c->decorrelate[3] = flac_decorrelate_ms_c_32;
+        break;
+
+    case AV_SAMPLE_FMT_S32P:
+        c->decorrelate[0] = flac_decorrelate_indep_c_32p;
+        c->decorrelate[1] = flac_decorrelate_ls_c_32p;
+        c->decorrelate[2] = flac_decorrelate_rs_c_32p;
+        c->decorrelate[3] = flac_decorrelate_ms_c_32p;
+        break;
+
+    case AV_SAMPLE_FMT_S16:
+        c->decorrelate[0] = flac_decorrelate_indep_c_16;
+        c->decorrelate[1] = flac_decorrelate_ls_c_16;
+        c->decorrelate[2] = flac_decorrelate_rs_c_16;
+        c->decorrelate[3] = flac_decorrelate_ms_c_16;
+        break;
+
+    case AV_SAMPLE_FMT_S16P:
+        c->decorrelate[0] = flac_decorrelate_indep_c_16p;
+        c->decorrelate[1] = flac_decorrelate_ls_c_16p;
+        c->decorrelate[2] = flac_decorrelate_rs_c_16p;
+        c->decorrelate[3] = flac_decorrelate_ms_c_16p;
+        break;
+    }
+
+#if ARCH_ARM
+    ff_flacdsp_init_arm(c, fmt, channels);
+#elif ARCH_X86
+    ff_flacdsp_init_x86(c, fmt, channels);
+#endif
+}
diff --git a/media/ffvpx/libavcodec/flacdsp.h b/media/ffvpx/libavcodec/flacdsp.h
new file mode 100644
index 0000000000..9f8ed38b66
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdsp.h
@@ -0,0 +1,43 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FLACDSP_H
+#define AVCODEC_FLACDSP_H
+
+#include <stdint.h>
+
+#include "libavutil/samplefmt.h"
+
+typedef struct FLACDSPContext {
+    void (*decorrelate[4])(uint8_t **out, int32_t **in, int channels,
+                           int len, int shift);
+    void (*lpc16)(int32_t *samples, const int coeffs[32], int order,
+                  int qlevel, int len);
+    void (*lpc32)(int32_t *samples, const int coeffs[32], int order,
+                  int qlevel, int len);
+    void (*lpc16_encode)(int32_t *res, const int32_t *smp, int len, int order,
+                         const int32_t coefs[32], int shift);
+    void (*lpc32_encode)(int32_t *res, const int32_t *smp, int len, int order,
+                         const int32_t coefs[32], int shift);
+} FLACDSPContext;
+
+void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels);
+void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels);
+void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels);
+
+#endif /* AVCODEC_FLACDSP_H */
diff --git a/media/ffvpx/libavcodec/flacdsp_lpc_template.c b/media/ffvpx/libavcodec/flacdsp_lpc_template.c
new file mode 100644
index 0000000000..dd847d3b32
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdsp_lpc_template.c
@@ -0,0 +1,159 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include "libavutil/common.h"
+#include "mathops.h"
+
+#undef FUNC
+#undef sum_type
+#undef MUL
+#undef CLIP
+#undef FSUF
+
+#define FUNC(n) AV_JOIN(n ## _, SAMPLE_SIZE)
+
+#if SAMPLE_SIZE == 32
+#   define sum_type  int64_t
+#   define MUL(a, b) MUL64(a, b)
+#   define CLIP(x) av_clipl_int32(x)
+#else
+#   define sum_type  int32_t
+#   define MUL(a, b) ((a) * (b))
+#   define CLIP(x) (x)
+#endif
+
+#define LPC1(x) {           \
+    int c = coefs[(x)-1];   \
+    p0   += MUL(c, s);      \
+    s     = smp[i-(x)+1];   \
+    p1   += MUL(c, s);      \
+}
+
+static av_always_inline void FUNC(lpc_encode_unrolled)(int32_t *res,
+                                  const int32_t *smp, int len, int order,
+                                  const int32_t *coefs, int shift, int big)
+{
+    int i;
+    for (i = order; i < len; i += 2) {
+        int s  = smp[i-order];
+        sum_type p0 = 0, p1 = 0;
+        if (big) {
+            switch (order) {
+            case 32: LPC1(32)
+            case 31: LPC1(31)
+            case 30: LPC1(30)
+            case 29: LPC1(29)
+            case 28: LPC1(28)
+            case 27: LPC1(27)
+            case 26: LPC1(26)
+            case 25: LPC1(25)
+            case 24: LPC1(24)
+            case 23: LPC1(23)
+            case 22: LPC1(22)
+            case 21: LPC1(21)
+            case 20: LPC1(20)
+            case 19: LPC1(19)
+            case 18: LPC1(18)
+            case 17: LPC1(17)
+            case 16: LPC1(16)
+            case 15: LPC1(15)
+            case 14: LPC1(14)
+            case 13: LPC1(13)
+            case 12: LPC1(12)
+            case 11: LPC1(11)
+            case 10: LPC1(10)
+            case  9: LPC1( 9)
+                     LPC1( 8)
+                     LPC1( 7)
+                     LPC1( 6)
+                     LPC1( 5)
+                     LPC1( 4)
+                     LPC1( 3)
+                     LPC1( 2)
+                     LPC1( 1)
+            }
+        } else {
+            switch (order) {
+            case  8: LPC1( 8)
+            case  7: LPC1( 7)
+            case  6: LPC1( 6)
+            case  5: LPC1( 5)
+            case  4: LPC1( 4)
+            case  3: LPC1( 3)
+            case  2: LPC1( 2)
+            case  1: LPC1( 1)
+            }
+        }
+        res[i  ] = smp[i  ] - CLIP(p0 >> shift);
+        res[i+1] = smp[i+1] - CLIP(p1 >> shift);
+    }
+}
+
+static void FUNC(flac_lpc_encode_c)(int32_t *res, const int32_t *smp, int len,
+                                    int order, const int32_t *coefs, int shift)
+{
+    int i;
+    for (i = 0; i < order; i++)
+        res[i] = smp[i];
+#if CONFIG_SMALL
+    for (i = order; i < len; i += 2) {
+        int j;
+        int s  = smp[i];
+        sum_type p0 = 0, p1 = 0;
+        for (j = 0; j < order; j++) {
+            int c = coefs[j];
+            p1   += MUL(c, s);
+            s     = smp[i-j-1];
+            p0   += MUL(c, s);
+        }
+        res[i  ] = smp[i  ] - CLIP(p0 >> shift);
+        res[i+1] = smp[i+1] - CLIP(p1 >> shift);
+    }
+#else
+    switch (order) {
+    case  1: FUNC(lpc_encode_unrolled)(res, smp, len,     1, coefs, shift, 0); break;
+    case  2: FUNC(lpc_encode_unrolled)(res, smp, len,     2, coefs, shift, 0); break;
+    case  3: FUNC(lpc_encode_unrolled)(res, smp, len,     3, coefs, shift, 0); break;
+    case  4: FUNC(lpc_encode_unrolled)(res, smp, len,     4, coefs, shift, 0); break;
+    case  5: FUNC(lpc_encode_unrolled)(res, smp, len,     5, coefs, shift, 0); break;
+    case  6: FUNC(lpc_encode_unrolled)(res, smp, len,     6, coefs, shift, 0); break;
+    case  7: FUNC(lpc_encode_unrolled)(res, smp, len,     7, coefs, shift, 0); break;
+    case  8: FUNC(lpc_encode_unrolled)(res, smp, len,     8, coefs, shift, 0); break;
+    default: FUNC(lpc_encode_unrolled)(res, smp, len, order, coefs, shift, 1); break;
+    }
+#endif
+}
+
+/* Comment for clarity/de-obfuscation.
+ *
+ * for (int i = order; i < len; i++) {
+ *     int32_t p = 0;
+ *     for (int j = 0; j < order; j++) {
+ *         int c = coefs[j];
+ *         int s = smp[(i-1)-j];
+ *         p    += c*s;
+ *     }
+ *     res[i] = smp[i] - (p >> shift);
+ * }
+ *
+ * The CONFIG_SMALL code above simplifies to this, in the case of SAMPLE_SIZE
+ * not being equal to 32 (at the present time that means for 16-bit audio). The
+ * code above does 2 samples per iteration.  Commit bfdd5bc (made all the way
+ * back in 2007) says that way is faster.
+ */
diff --git a/media/ffvpx/libavcodec/flacdsp_template.c b/media/ffvpx/libavcodec/flacdsp_template.c
new file mode 100644
index 0000000000..0a6fe59e28
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdsp_template.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include "libavutil/macros.h"
+
+#undef FUNC
+#undef FSUF
+#undef sample
+#undef sample_type
+#undef OUT
+#undef S
+
+#if SAMPLE_SIZE == 32
+#   define sample_type  int32_t
+#else
+#   define sample_type  int16_t
+#endif
+
+#if PLANAR
+#   define FSUF   AV_JOIN(SAMPLE_SIZE, p)
+#   define sample sample_type *
+#   define OUT(n) n
+#   define S(s, c, i) (s[c][i])
+#else
+#   define FSUF   SAMPLE_SIZE
+#   define sample sample_type
+#   define OUT(n) n[0]
+#   define S(s, c, i) (*s++)
+#endif
+
+#define FUNC(n) AV_JOIN(n ## _, FSUF)
+
+static void FUNC(flac_decorrelate_indep_c)(uint8_t **out, int32_t **in,
+                                           int channels, int len, int shift)
+{
+    sample *samples = (sample *) OUT(out);
+    int i, j;
+
+    for (j = 0; j < len; j++)
+        for (i = 0; i < channels; i++)
+            S(samples, i, j) = (int)((unsigned)in[i][j] << shift);
+}
+
+static void FUNC(flac_decorrelate_ls_c)(uint8_t **out, int32_t **in,
+                                        int channels, int len, int shift)
+{
+    sample *samples = (sample *) OUT(out);
+    int i;
+
+    for (i = 0; i < len; i++) {
+        unsigned a = in[0][i];
+        unsigned b = in[1][i];
+        S(samples, 0, i) =  a      << shift;
+        S(samples, 1, i) = (a - b) << shift;
+    }
+}
+
+static void FUNC(flac_decorrelate_rs_c)(uint8_t **out, int32_t **in,
+                                        int channels, int len, int shift)
+{
+    sample *samples = (sample *) OUT(out);
+    int i;
+
+    for (i = 0; i < len; i++) {
+        unsigned a = in[0][i];
+        unsigned b = in[1][i];
+        S(samples, 0, i) = (a + b) << shift;
+        S(samples, 1, i) =  b      << shift;
+    }
+}
+
+static void FUNC(flac_decorrelate_ms_c)(uint8_t **out, int32_t **in,
+                                        int channels, int len, int shift)
+{
+    sample *samples = (sample *) OUT(out);
+    int i;
+
+    for (i = 0; i < len; i++) {
+        unsigned a = in[0][i];
+        int b = in[1][i];
+        a -= b >> 1;
+        S(samples, 0, i) = (a + b) << shift;
+        S(samples, 1, i) =  a      << shift;
+    }
+}
diff --git a/media/ffvpx/libavcodec/frame_thread_encoder.h b/media/ffvpx/libavcodec/frame_thread_encoder.h
new file mode 100644
index 0000000000..201cba2a8f
--- /dev/null
+++ b/media/ffvpx/libavcodec/frame_thread_encoder.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2012 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FRAME_THREAD_ENCODER_H
+#define AVCODEC_FRAME_THREAD_ENCODER_H
+
+#include "avcodec.h"
+
+/**
+ * Initialize frame thread encoder.
+ * @note hardware encoders are not supported
+ */
+int ff_frame_thread_encoder_init(AVCodecContext *avctx);
+void ff_frame_thread_encoder_free(AVCodecContext *avctx);
+int ff_thread_video_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                                 AVFrame *frame, int *got_packet_ptr);
+
+#endif /* AVCODEC_FRAME_THREAD_ENCODER_H */
diff --git a/media/ffvpx/libavcodec/get_bits.h b/media/ffvpx/libavcodec/get_bits.h
new file mode 100644
index 0000000000..65dc080ddb
--- /dev/null
+++ b/media/ffvpx/libavcodec/get_bits.h
@@ -0,0 +1,685 @@
+/*
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * bitstream reader API header.
+ */
+
+#ifndef AVCODEC_GET_BITS_H
+#define AVCODEC_GET_BITS_H
+
+#include <stdint.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/avassert.h"
+
+#include "defs.h"
+#include "mathops.h"
+#include "vlc.h"
+
+/*
+ * Safe bitstream reading:
+ * optionally, the get_bits API can check to ensure that we
+ * don't read past input buffer boundaries. This is protected
+ * with CONFIG_SAFE_BITSTREAM_READER at the global level, and
+ * then below that with UNCHECKED_BITSTREAM_READER at the per-
+ * decoder level. This means that decoders that check internally
+ * can "#define UNCHECKED_BITSTREAM_READER 1" to disable
+ * overread checks.
+ * Boundary checking causes a minor performance penalty so for
+ * applications that won't want/need this, it can be disabled
+ * globally using "#define CONFIG_SAFE_BITSTREAM_READER 0".
+ */
+#ifndef UNCHECKED_BITSTREAM_READER
+#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
+#endif
+
+#ifndef CACHED_BITSTREAM_READER
+#define CACHED_BITSTREAM_READER 0
+#endif
+
+#if CACHED_BITSTREAM_READER
+
+// we always want the LE implementation, to provide get_bits_le()
+#define BITSTREAM_LE
+
+#ifndef BITSTREAM_READER_LE
+# define BITSTREAM_BE
+# define BITSTREAM_DEFAULT_BE
+#endif
+
+#include "bitstream.h"
+
+#undef BITSTREAM_LE
+#undef BITSTREAM_BE
+#undef BITSTREAM_DEFAULT_BE
+
+typedef BitstreamContext GetBitContext;
+
+#define get_bits_count      bits_tell
+#define get_bits_left       bits_left
+#define skip_bits_long      bits_skip
+#define skip_bits           bits_skip
+#define get_bits            bits_read_nz
+#define get_bitsz           bits_read
+#define get_bits_long       bits_read
+#define get_bits1           bits_read_bit
+#define get_bits64          bits_read_64
+#define get_xbits           bits_read_xbits
+#define get_sbits           bits_read_signed_nz
+#define get_sbits_long      bits_read_signed
+#define show_bits           bits_peek
+#define show_bits_long      bits_peek
+#define init_get_bits       bits_init
+#define init_get_bits8      bits_init8
+#define align_get_bits      bits_align
+#define get_vlc2            bits_read_vlc
+
+#define init_get_bits8_le(s, buffer, byte_size) bits_init8_le((BitstreamContextLE*)s, buffer, byte_size)
+#define get_bits_le(s, n)                       bits_read_le((BitstreamContextLE*)s, n)
+
+#define show_bits1(s)       bits_peek(s, 1)
+#define skip_bits1(s)       bits_skip(s, 1)
+
+#define skip_1stop_8data_bits bits_skip_1stop_8data
+
+#else   // CACHED_BITSTREAM_READER
+
+typedef struct GetBitContext {
+    const uint8_t *buffer, *buffer_end;
+    int index;
+    int size_in_bits;
+    int size_in_bits_plus8;
+} GetBitContext;
+
+static inline unsigned int get_bits(GetBitContext *s, int n);
+static inline void skip_bits(GetBitContext *s, int n);
+static inline unsigned int show_bits(GetBitContext *s, int n);
+
+/* Bitstream reader API docs:
+ * name
+ *   arbitrary name which is used as prefix for the internal variables
+ *
+ * gb
+ *   getbitcontext
+ *
+ * OPEN_READER(name, gb)
+ *   load gb into local variables
+ *
+ * CLOSE_READER(name, gb)
+ *   store local vars in gb
+ *
+ * UPDATE_CACHE(name, gb)
+ *   Refill the internal cache from the bitstream.
+ *   After this call at least MIN_CACHE_BITS will be available.
+ *
+ * GET_CACHE(name, gb)
+ *   Will output the contents of the internal cache,
+ *   next bit is MSB of 32 or 64 bits (FIXME 64 bits).
+ *
+ * SHOW_UBITS(name, gb, num)
+ *   Will return the next num bits.
+ *
+ * SHOW_SBITS(name, gb, num)
+ *   Will return the next num bits and do sign extension.
+ *
+ * SKIP_BITS(name, gb, num)
+ *   Will skip over the next num bits.
+ *   Note, this is equivalent to SKIP_CACHE; SKIP_COUNTER.
+ *
+ * SKIP_CACHE(name, gb, num)
+ *   Will remove the next num bits from the cache (note SKIP_COUNTER
+ *   MUST be called before UPDATE_CACHE / CLOSE_READER).
+ *
+ * SKIP_COUNTER(name, gb, num)
+ *   Will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS).
+ *
+ * LAST_SKIP_BITS(name, gb, num)
+ *   Like SKIP_BITS, to be used if next call is UPDATE_CACHE or CLOSE_READER.
+ *
+ * BITS_LEFT(name, gb)
+ *   Return the number of bits left
+ *
+ * For examples see get_bits, show_bits, skip_bits, get_vlc.
+ */
+
+#if defined LONG_BITSTREAM_READER
+#   define MIN_CACHE_BITS 32
+#else
+#   define MIN_CACHE_BITS 25
+#endif
+
+#define OPEN_READER_NOSIZE(name, gb)            \
+    unsigned int name ## _index = (gb)->index;  \
+    unsigned int av_unused name ## _cache
+
+#if UNCHECKED_BITSTREAM_READER
+#define OPEN_READER(name, gb) OPEN_READER_NOSIZE(name, gb)
+
+#define BITS_AVAILABLE(name, gb) 1
+#else
+#define OPEN_READER(name, gb)                   \
+    OPEN_READER_NOSIZE(name, gb);               \
+    unsigned int name ## _size_plus8 = (gb)->size_in_bits_plus8
+
+#define BITS_AVAILABLE(name, gb) name ## _index < name ## _size_plus8
+#endif
+
+#define CLOSE_READER(name, gb) (gb)->index = name ## _index
+
+# ifdef LONG_BITSTREAM_READER
+
+# define UPDATE_CACHE_LE(name, gb) name ## _cache = \
+      AV_RL64((gb)->buffer + (name ## _index >> 3)) >> (name ## _index & 7)
+
+# define UPDATE_CACHE_BE(name, gb) name ## _cache = \
+      AV_RB64((gb)->buffer + (name ## _index >> 3)) >> (32 - (name ## _index & 7))
+
+#else
+
+# define UPDATE_CACHE_LE(name, gb) name ## _cache = \
+      AV_RL32((gb)->buffer + (name ## _index >> 3)) >> (name ## _index & 7)
+
+# define UPDATE_CACHE_BE(name, gb) name ## _cache = \
+      AV_RB32((gb)->buffer + (name ## _index >> 3)) << (name ## _index & 7)
+
+#endif
+
+
+#ifdef BITSTREAM_READER_LE
+
+# define UPDATE_CACHE(name, gb) UPDATE_CACHE_LE(name, gb)
+
+# define SKIP_CACHE(name, gb, num) name ## _cache >>= (num)
+
+#else
+
+# define UPDATE_CACHE(name, gb) UPDATE_CACHE_BE(name, gb)
+
+# define SKIP_CACHE(name, gb, num) name ## _cache <<= (num)
+
+#endif
+
+#if UNCHECKED_BITSTREAM_READER
+#   define SKIP_COUNTER(name, gb, num) name ## _index += (num)
+#else
+#   define SKIP_COUNTER(name, gb, num) \
+    name ## _index = FFMIN(name ## _size_plus8, name ## _index + (num))
+#endif
+
+#define BITS_LEFT(name, gb) ((int)((gb)->size_in_bits - name ## _index))
+
+#define SKIP_BITS(name, gb, num)                \
+    do {                                        \
+        SKIP_CACHE(name, gb, num);              \
+        SKIP_COUNTER(name, gb, num);            \
+    } while (0)
+
+#define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
+
+#define SHOW_UBITS_LE(name, gb, num) zero_extend(name ## _cache, num)
+#define SHOW_SBITS_LE(name, gb, num) sign_extend(name ## _cache, num)
+
+#define SHOW_UBITS_BE(name, gb, num) NEG_USR32(name ## _cache, num)
+#define SHOW_SBITS_BE(name, gb, num) NEG_SSR32(name ## _cache, num)
+
+#ifdef BITSTREAM_READER_LE
+#   define SHOW_UBITS(name, gb, num) SHOW_UBITS_LE(name, gb, num)
+#   define SHOW_SBITS(name, gb, num) SHOW_SBITS_LE(name, gb, num)
+#else
+#   define SHOW_UBITS(name, gb, num) SHOW_UBITS_BE(name, gb, num)
+#   define SHOW_SBITS(name, gb, num) SHOW_SBITS_BE(name, gb, num)
+#endif
+
+#define GET_CACHE(name, gb) ((uint32_t) name ## _cache)
+
+
+static inline int get_bits_count(const GetBitContext *s)
+{
+    return s->index;
+}
+
+/**
+ * Skips the specified number of bits.
+ * @param n the number of bits to skip,
+ *          For the UNCHECKED_BITSTREAM_READER this must not cause the distance
+ *          from the start to overflow int32_t. Staying within the bitstream + padding
+ *          is sufficient, too.
+ */
+static inline void skip_bits_long(GetBitContext *s, int n)
+{
+#if UNCHECKED_BITSTREAM_READER
+    s->index += n;
+#else
+    s->index += av_clip(n, -s->index, s->size_in_bits_plus8 - s->index);
+#endif
+}
+
+/**
+ * Read MPEG-1 dc-style VLC (sign bit + mantissa with no MSB).
+ * if MSB not set it is negative
+ * @param n length in bits
+ */
+static inline int get_xbits(GetBitContext *s, int n)
+{
+    register int sign;
+    register int32_t cache;
+    OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
+    UPDATE_CACHE(re, s);
+    cache = GET_CACHE(re, s);
+    sign  = ~cache >> 31;
+    LAST_SKIP_BITS(re, s, n);
+    CLOSE_READER(re, s);
+    return (NEG_USR32(sign ^ cache, n) ^ sign) - sign;
+}
+
+static inline int get_xbits_le(GetBitContext *s, int n)
+{
+    register int sign;
+    register int32_t cache;
+    OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
+    UPDATE_CACHE_LE(re, s);
+    cache = GET_CACHE(re, s);
+    sign  = sign_extend(~cache, n) >> 31;
+    LAST_SKIP_BITS(re, s, n);
+    CLOSE_READER(re, s);
+    return (zero_extend(sign ^ cache, n) ^ sign) - sign;
+}
+
+static inline int get_sbits(GetBitContext *s, int n)
+{
+    register int tmp;
+    OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
+    UPDATE_CACHE(re, s);
+    tmp = SHOW_SBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n);
+    CLOSE_READER(re, s);
+    return tmp;
+}
+
+/**
+ * Read 1-25 bits.
+ */
+static inline unsigned int get_bits(GetBitContext *s, int n)
+{
+    register unsigned int tmp;
+    OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
+    UPDATE_CACHE(re, s);
+    tmp = SHOW_UBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n);
+    CLOSE_READER(re, s);
+    av_assert2(tmp < UINT64_C(1) << n);
+    return tmp;
+}
+
+/**
+ * Read 0-25 bits.
+ */
+static av_always_inline int get_bitsz(GetBitContext *s, int n)
+{
+    return n ? get_bits(s, n) : 0;
+}
+
+static inline unsigned int get_bits_le(GetBitContext *s, int n)
+{
+    register int tmp;
+    OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
+    UPDATE_CACHE_LE(re, s);
+    tmp = SHOW_UBITS_LE(re, s, n);
+    LAST_SKIP_BITS(re, s, n);
+    CLOSE_READER(re, s);
+    return tmp;
+}
+
+/**
+ * Show 1-25 bits.
+ */
+static inline unsigned int show_bits(GetBitContext *s, int n)
+{
+    register unsigned int tmp;
+    OPEN_READER_NOSIZE(re, s);
+    av_assert2(n>0 && n<=25);
+    UPDATE_CACHE(re, s);
+    tmp = SHOW_UBITS(re, s, n);
+    return tmp;
+}
+
+static inline void skip_bits(GetBitContext *s, int n)
+{
+    OPEN_READER(re, s);
+    LAST_SKIP_BITS(re, s, n);
+    CLOSE_READER(re, s);
+}
+
+static inline unsigned int get_bits1(GetBitContext *s)
+{
+    unsigned int index = s->index;
+    uint8_t result     = s->buffer[index >> 3];
+#ifdef BITSTREAM_READER_LE
+    result >>= index & 7;
+    result  &= 1;
+#else
+    result <<= index & 7;
+    result >>= 8 - 1;
+#endif
+#if !UNCHECKED_BITSTREAM_READER
+    if (s->index < s->size_in_bits_plus8)
+#endif
+        index++;
+    s->index = index;
+
+    return result;
+}
+
+static inline unsigned int show_bits1(GetBitContext *s)
+{
+    return show_bits(s, 1);
+}
+
+static inline void skip_bits1(GetBitContext *s)
+{
+    skip_bits(s, 1);
+}
+
+/**
+ * Read 0-32 bits.
+ */
+static inline unsigned int get_bits_long(GetBitContext *s, int n)
+{
+    av_assert2(n>=0 && n<=32);
+    if (!n) {
+        return 0;
+    } else if (n <= MIN_CACHE_BITS) {
+        return get_bits(s, n);
+    } else {
+#ifdef BITSTREAM_READER_LE
+        unsigned ret = get_bits(s, 16);
+        return ret | (get_bits(s, n - 16) << 16);
+#else
+        unsigned ret = get_bits(s, 16) << (n - 16);
+        return ret | get_bits(s, n - 16);
+#endif
+    }
+}
+
+/**
+ * Read 0-64 bits.
+ */
+static inline uint64_t get_bits64(GetBitContext *s, int n)
+{
+    if (n <= 32) {
+        return get_bits_long(s, n);
+    } else {
+#ifdef BITSTREAM_READER_LE
+        uint64_t ret = get_bits_long(s, 32);
+        return ret | (uint64_t) get_bits_long(s, n - 32) << 32;
+#else
+        uint64_t ret = (uint64_t) get_bits_long(s, n - 32) << 32;
+        return ret | get_bits_long(s, 32);
+#endif
+    }
+}
+
+/**
+ * Read 0-32 bits as a signed integer.
+ */
+static inline int get_sbits_long(GetBitContext *s, int n)
+{
+    // sign_extend(x, 0) is undefined
+    if (!n)
+        return 0;
+
+    return sign_extend(get_bits_long(s, n), n);
+}
+
+/**
+ * Read 0-64 bits as a signed integer.
+ */
+static inline int64_t get_sbits64(GetBitContext *s, int n)
+{
+    // sign_extend(x, 0) is undefined
+    if (!n)
+        return 0;
+
+    return sign_extend64(get_bits64(s, n), n);
+}
+
+/**
+ * Show 0-32 bits.
+ */
+static inline unsigned int show_bits_long(GetBitContext *s, int n)
+{
+    if (n <= MIN_CACHE_BITS) {
+        return show_bits(s, n);
+    } else {
+        GetBitContext gb = *s;
+        return get_bits_long(&gb, n);
+    }
+}
+
+
+/**
+ * Initialize GetBitContext.
+ * @param buffer bitstream buffer, must be AV_INPUT_BUFFER_PADDING_SIZE bytes
+ *        larger than the actual read bits because some optimized bitstream
+ *        readers read 32 or 64 bit at once and could read over the end
+ * @param bit_size the size of the buffer in bits
+ * @return 0 on success, AVERROR_INVALIDDATA if the buffer_size would overflow.
+ */
+static inline int init_get_bits(GetBitContext *s, const uint8_t *buffer,
+                                int bit_size)
+{
+    int buffer_size;
+    int ret = 0;
+
+    if (bit_size >= INT_MAX - FFMAX(7, AV_INPUT_BUFFER_PADDING_SIZE*8) || bit_size < 0 || !buffer) {
+        bit_size    = 0;
+        buffer      = NULL;
+        ret         = AVERROR_INVALIDDATA;
+    }
+
+    buffer_size = (bit_size + 7) >> 3;
+
+    s->buffer             = buffer;
+    s->size_in_bits       = bit_size;
+    s->size_in_bits_plus8 = bit_size + 8;
+    s->buffer_end         = buffer + buffer_size;
+    s->index              = 0;
+
+    return ret;
+}
+
+/**
+ * Initialize GetBitContext.
+ * @param buffer bitstream buffer, must be AV_INPUT_BUFFER_PADDING_SIZE bytes
+ *        larger than the actual read bits because some optimized bitstream
+ *        readers read 32 or 64 bit at once and could read over the end
+ * @param byte_size the size of the buffer in bytes
+ * @return 0 on success, AVERROR_INVALIDDATA if the buffer_size would overflow.
+ */
+static inline int init_get_bits8(GetBitContext *s, const uint8_t *buffer,
+                                 int byte_size)
+{
+    if (byte_size > INT_MAX / 8 || byte_size < 0)
+        byte_size = -1;
+    return init_get_bits(s, buffer, byte_size * 8);
+}
+
+static inline int init_get_bits8_le(GetBitContext *s, const uint8_t *buffer,
+                                    int byte_size)
+{
+    if (byte_size > INT_MAX / 8 || byte_size < 0)
+        byte_size = -1;
+    return init_get_bits(s, buffer, byte_size * 8);
+}
+
+static inline const uint8_t *align_get_bits(GetBitContext *s)
+{
+    int n = -get_bits_count(s) & 7;
+    if (n)
+        skip_bits(s, n);
+    return s->buffer + (s->index >> 3);
+}
+
+/**
+ * If the vlc code is invalid and max_depth=1, then no bits will be removed.
+ * If the vlc code is invalid and max_depth>1, then the number of bits removed
+ * is undefined.
+ */
+#define GET_VLC(code, name, gb, table, bits, max_depth)         \
+    do {                                                        \
+        int n, nb_bits;                                         \
+        unsigned int index;                                     \
+                                                                \
+        index = SHOW_UBITS(name, gb, bits);                     \
+        code  = table[index].sym;                               \
+        n     = table[index].len;                               \
+                                                                \
+        if (max_depth > 1 && n < 0) {                           \
+            LAST_SKIP_BITS(name, gb, bits);                     \
+            UPDATE_CACHE(name, gb);                             \
+                                                                \
+            nb_bits = -n;                                       \
+                                                                \
+            index = SHOW_UBITS(name, gb, nb_bits) + code;       \
+            code  = table[index].sym;                           \
+            n     = table[index].len;                           \
+            if (max_depth > 2 && n < 0) {                       \
+                LAST_SKIP_BITS(name, gb, nb_bits);              \
+                UPDATE_CACHE(name, gb);                         \
+                                                                \
+                nb_bits = -n;                                   \
+                                                                \
+                index = SHOW_UBITS(name, gb, nb_bits) + code;   \
+                code  = table[index].sym;                       \
+                n     = table[index].len;                       \
+            }                                                   \
+        }                                                       \
+        SKIP_BITS(name, gb, n);                                 \
+    } while (0)
+
+#define GET_RL_VLC(level, run, name, gb, table, bits,  \
+                   max_depth, need_update)                      \
+    do {                                                        \
+        int n, nb_bits;                                         \
+        unsigned int index;                                     \
+                                                                \
+        index = SHOW_UBITS(name, gb, bits);                     \
+        level = table[index].level;                             \
+        n     = table[index].len;                               \
+                                                                \
+        if (max_depth > 1 && n < 0) {                           \
+            SKIP_BITS(name, gb, bits);                          \
+            if (need_update) {                                  \
+                UPDATE_CACHE(name, gb);                         \
+            }                                                   \
+                                                                \
+            nb_bits = -n;                                       \
+                                                                \
+            index = SHOW_UBITS(name, gb, nb_bits) + level;      \
+            level = table[index].level;                         \
+            n     = table[index].len;                           \
+            if (max_depth > 2 && n < 0) {                       \
+                LAST_SKIP_BITS(name, gb, nb_bits);              \
+                if (need_update) {                              \
+                    UPDATE_CACHE(name, gb);                     \
+                }                                               \
+                nb_bits = -n;                                   \
+                                                                \
+                index = SHOW_UBITS(name, gb, nb_bits) + level;  \
+                level = table[index].level;                     \
+                n     = table[index].len;                       \
+            }                                                   \
+        }                                                       \
+        run = table[index].run;                                 \
+        SKIP_BITS(name, gb, n);                                 \
+    } while (0)
+
+/**
+ * Parse a vlc code.
+ * @param bits is the number of bits which will be read at once, must be
+ *             identical to nb_bits in init_vlc()
+ * @param max_depth is the number of times bits bits must be read to completely
+ *                  read the longest vlc code
+ *                  = (max_vlc_length + bits - 1) / bits
+ * @returns the code parsed or -1 if no vlc matches
+ */
+static av_always_inline int get_vlc2(GetBitContext *s, const VLCElem *table,
+                                     int bits, int max_depth)
+{
+    int code;
+
+    OPEN_READER(re, s);
+    UPDATE_CACHE(re, s);
+
+    GET_VLC(code, re, s, table, bits, max_depth);
+
+    CLOSE_READER(re, s);
+
+    return code;
+}
+
+static inline int decode012(GetBitContext *gb)
+{
+    int n;
+    n = get_bits1(gb);
+    if (n == 0)
+        return 0;
+    else
+        return get_bits1(gb) + 1;
+}
+
+static inline int decode210(GetBitContext *gb)
+{
+    if (get_bits1(gb))
+        return 0;
+    else
+        return 2 - get_bits1(gb);
+}
+
+static inline int get_bits_left(GetBitContext *gb)
+{
+    return gb->size_in_bits - get_bits_count(gb);
+}
+
+static inline int skip_1stop_8data_bits(GetBitContext *gb)
+{
+    if (get_bits_left(gb) <= 0)
+        return AVERROR_INVALIDDATA;
+
+    while (get_bits1(gb)) {
+        skip_bits(gb, 8);
+        if (get_bits_left(gb) <= 0)
+            return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+#endif // CACHED_BITSTREAM_READER
+
+#endif /* AVCODEC_GET_BITS_H */
diff --git a/media/ffvpx/libavcodec/get_buffer.c b/media/ffvpx/libavcodec/get_buffer.c
new file mode 100644
index 0000000000..a04fd878de
--- /dev/null
+++ b/media/ffvpx/libavcodec/get_buffer.c
@@ -0,0 +1,304 @@
+/*
+ * The default get_buffer2() implementation
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/avutil.h"
+#include "libavutil/buffer.h"
+#include "libavutil/frame.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/samplefmt.h"
+#include "libavutil/version.h"
+
+#include "avcodec.h"
+#include "internal.h"
+
+typedef struct FramePool {
+    /**
+     * Pools for each data plane. For audio all the planes have the same size,
+     * so only pools[0] is used.
+     */
+    AVBufferPool *pools[4];
+
+    /*
+     * Pool parameters
+     */
+    int format;
+    int width, height;
+    int stride_align[AV_NUM_DATA_POINTERS];
+    int linesize[4];
+    int planes;
+    int channels;
+    int samples;
+} FramePool;
+
+static void frame_pool_free(void *opaque, uint8_t *data)
+{
+    FramePool *pool = (FramePool*)data;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(pool->pools); i++)
+        av_buffer_pool_uninit(&pool->pools[i]);
+
+    av_freep(&data);
+}
+
+static AVBufferRef *frame_pool_alloc(void)
+{
+    FramePool *pool = av_mallocz(sizeof(*pool));
+    AVBufferRef *buf;
+
+    if (!pool)
+        return NULL;
+
+    buf = av_buffer_create((uint8_t*)pool, sizeof(*pool),
+                           frame_pool_free, NULL, 0);
+    if (!buf) {
+        av_freep(&pool);
+        return NULL;
+    }
+
+    return buf;
+}
+
+static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame)
+{
+    FramePool *pool = avctx->internal->pool ?
+                      (FramePool*)avctx->internal->pool->data : NULL;
+    AVBufferRef *pool_buf;
+    int i, ret, ch, planes;
+
+    if (avctx->codec_type == AVMEDIA_TYPE_AUDIO) {
+        int planar = av_sample_fmt_is_planar(frame->format);
+        ch     = frame->ch_layout.nb_channels;
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+        if (!ch)
+            ch = frame->channels;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        planes = planar ? ch : 1;
+    }
+
+    if (pool && pool->format == frame->format) {
+        if (avctx->codec_type == AVMEDIA_TYPE_VIDEO &&
+            pool->width == frame->width && pool->height == frame->height)
+            return 0;
+        if (avctx->codec_type == AVMEDIA_TYPE_AUDIO && pool->planes == planes &&
+            pool->channels == ch && frame->nb_samples == pool->samples)
+            return 0;
+    }
+
+    pool_buf = frame_pool_alloc();
+    if (!pool_buf)
+        return AVERROR(ENOMEM);
+    pool = (FramePool*)pool_buf->data;
+
+    switch (avctx->codec_type) {
+    case AVMEDIA_TYPE_VIDEO: {
+        int linesize[4];
+        int w = frame->width;
+        int h = frame->height;
+        int unaligned;
+        ptrdiff_t linesize1[4];
+        size_t size[4];
+
+        avcodec_align_dimensions2(avctx, &w, &h, pool->stride_align);
+
+        do {
+            // NOTE: do not align linesizes individually, this breaks e.g. assumptions
+            // that linesize[0] == 2*linesize[1] in the MPEG-encoder for 4:2:2
+            ret = av_image_fill_linesizes(linesize, avctx->pix_fmt, w);
+            if (ret < 0)
+                goto fail;
+            // increase alignment of w for next try (rhs gives the lowest bit set in w)
+            w += w & ~(w - 1);
+
+            unaligned = 0;
+            for (i = 0; i < 4; i++)
+                unaligned |= linesize[i] % pool->stride_align[i];
+        } while (unaligned);
+
+        for (i = 0; i < 4; i++)
+            linesize1[i] = linesize[i];
+        ret = av_image_fill_plane_sizes(size, avctx->pix_fmt, h, linesize1);
+        if (ret < 0)
+            goto fail;
+
+        for (i = 0; i < 4; i++) {
+            pool->linesize[i] = linesize[i];
+            if (size[i]) {
+                if (size[i] > INT_MAX - (16 + STRIDE_ALIGN - 1)) {
+                    ret = AVERROR(EINVAL);
+                    goto fail;
+                }
+                pool->pools[i] = av_buffer_pool_init(size[i] + 16 + STRIDE_ALIGN - 1,
+                                                     CONFIG_MEMORY_POISONING ?
+                                                        NULL :
+                                                        av_buffer_allocz);
+                if (!pool->pools[i]) {
+                    ret = AVERROR(ENOMEM);
+                    goto fail;
+                }
+            }
+        }
+        pool->format = frame->format;
+        pool->width  = frame->width;
+        pool->height = frame->height;
+
+        break;
+        }
+    case AVMEDIA_TYPE_AUDIO: {
+        ret = av_samples_get_buffer_size(&pool->linesize[0], ch,
+                                         frame->nb_samples, frame->format, 0);
+        if (ret < 0)
+            goto fail;
+
+        pool->pools[0] = av_buffer_pool_init(pool->linesize[0], NULL);
+        if (!pool->pools[0]) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        pool->format     = frame->format;
+        pool->planes     = planes;
+        pool->channels   = ch;
+        pool->samples = frame->nb_samples;
+        break;
+        }
+    default: av_assert0(0);
+    }
+
+    av_buffer_unref(&avctx->internal->pool);
+    avctx->internal->pool = pool_buf;
+
+    return 0;
+fail:
+    av_buffer_unref(&pool_buf);
+    return ret;
+}
+
+static int audio_get_buffer(AVCodecContext *avctx, AVFrame *frame)
+{
+    FramePool *pool = (FramePool*)avctx->internal->pool->data;
+    int planes = pool->planes;
+    int i;
+
+    frame->linesize[0] = pool->linesize[0];
+
+    if (planes > AV_NUM_DATA_POINTERS) {
+        frame->extended_data = av_calloc(planes, sizeof(*frame->extended_data));
+        frame->nb_extended_buf = planes - AV_NUM_DATA_POINTERS;
+        frame->extended_buf  = av_calloc(frame->nb_extended_buf,
+                                          sizeof(*frame->extended_buf));
+        if (!frame->extended_data || !frame->extended_buf) {
+            av_freep(&frame->extended_data);
+            av_freep(&frame->extended_buf);
+            return AVERROR(ENOMEM);
+        }
+    } else {
+        frame->extended_data = frame->data;
+        av_assert0(frame->nb_extended_buf == 0);
+    }
+
+    for (i = 0; i < FFMIN(planes, AV_NUM_DATA_POINTERS); i++) {
+        frame->buf[i] = av_buffer_pool_get(pool->pools[0]);
+        if (!frame->buf[i])
+            goto fail;
+        frame->extended_data[i] = frame->data[i] = frame->buf[i]->data;
+    }
+    for (i = 0; i < frame->nb_extended_buf; i++) {
+        frame->extended_buf[i] = av_buffer_pool_get(pool->pools[0]);
+        if (!frame->extended_buf[i])
+            goto fail;
+        frame->extended_data[i + AV_NUM_DATA_POINTERS] = frame->extended_buf[i]->data;
+    }
+
+    if (avctx->debug & FF_DEBUG_BUFFERS)
+        av_log(avctx, AV_LOG_DEBUG, "default_get_buffer called on frame %p", frame);
+
+    return 0;
+fail:
+    av_frame_unref(frame);
+    return AVERROR(ENOMEM);
+}
+
+static int video_get_buffer(AVCodecContext *s, AVFrame *pic)
+{
+    FramePool *pool = (FramePool*)s->internal->pool->data;
+    int i;
+
+    if (pic->data[0] || pic->data[1] || pic->data[2] || pic->data[3]) {
+        av_log(s, AV_LOG_ERROR, "pic->data[*]!=NULL in avcodec_default_get_buffer\n");
+        return -1;
+    }
+
+    memset(pic->data, 0, sizeof(pic->data));
+    pic->extended_data = pic->data;
+
+    for (i = 0; i < 4 && pool->pools[i]; i++) {
+        pic->linesize[i] = pool->linesize[i];
+
+        pic->buf[i] = av_buffer_pool_get(pool->pools[i]);
+        if (!pic->buf[i])
+            goto fail;
+
+        pic->data[i] = pic->buf[i]->data;
+    }
+    for (; i < AV_NUM_DATA_POINTERS; i++) {
+        pic->data[i] = NULL;
+        pic->linesize[i] = 0;
+    }
+
+    if (s->debug & FF_DEBUG_BUFFERS)
+        av_log(s, AV_LOG_DEBUG, "default_get_buffer called on pic %p\n", pic);
+
+    return 0;
+fail:
+    av_frame_unref(pic);
+    return AVERROR(ENOMEM);
+}
+
+int avcodec_default_get_buffer2(AVCodecContext *avctx, AVFrame *frame, int flags)
+{
+    int ret;
+
+    if (avctx->hw_frames_ctx) {
+        ret = av_hwframe_get_buffer(avctx->hw_frames_ctx, frame, 0);
+        frame->width  = avctx->coded_width;
+        frame->height = avctx->coded_height;
+        return ret;
+    }
+
+    if ((ret = update_frame_pool(avctx, frame)) < 0)
+        return ret;
+
+    switch (avctx->codec_type) {
+    case AVMEDIA_TYPE_VIDEO:
+        return video_get_buffer(avctx, frame);
+    case AVMEDIA_TYPE_AUDIO:
+        return audio_get_buffer(avctx, frame);
+    default:
+        return -1;
+    }
+}
diff --git a/media/ffvpx/libavcodec/golomb.c b/media/ffvpx/libavcodec/golomb.c
new file mode 100644
index 0000000000..f9ca8149eb
--- /dev/null
+++ b/media/ffvpx/libavcodec/golomb.c
@@ -0,0 +1,173 @@
+/*
+ * exp golomb vlc stuff
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * @brief
+ *     exp golomb vlc stuff
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include <stdint.h>
+
+const uint8_t ff_golomb_vlc_len[512]={
+19,17,15,15,13,13,13,13,11,11,11,11,11,11,11,11,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+
+const uint8_t ff_ue_golomb_vlc_code[512]={
+32,32,32,32,32,32,32,32,31,32,32,32,32,32,32,32,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,
+ 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,11,11,11,11,12,12,12,12,13,13,13,13,14,14,14,14,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+const int8_t ff_se_golomb_vlc_code[512]={
+ 17, 17, 17, 17, 17, 17, 17, 17, 16, 17, 17, 17, 17, 17, 17, 17,  8, -8,  9, -9, 10,-10, 11,-11, 12,-12, 13,-13, 14,-14, 15,-15,
+  4,  4,  4,  4, -4, -4, -4, -4,  5,  5,  5,  5, -5, -5, -5, -5,  6,  6,  6,  6, -6, -6, -6, -6,  7,  7,  7,  7, -7, -7, -7, -7,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+};
+
+
+const uint8_t ff_ue_golomb_len[256]={
+ 1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,11,
+11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,13,
+13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,15,
+15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,17,
+};
+
+const uint8_t ff_interleaved_golomb_vlc_len[256]={
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+};
+
+const uint8_t ff_interleaved_ue_golomb_vlc_code[256]={
+ 15,16,7, 7, 17,18,8, 8, 3, 3, 3, 3, 3, 3, 3, 3,
+ 19,20,9, 9, 21,22,10,10,4, 4, 4, 4, 4, 4, 4, 4,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 23,24,11,11,25,26,12,12,5, 5, 5, 5, 5, 5, 5, 5,
+ 27,28,13,13,29,30,14,14,6, 6, 6, 6, 6, 6, 6, 6,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+const int8_t ff_interleaved_se_golomb_vlc_code[256]={
+  8, -8,  4,  4,  9, -9, -4, -4,  2,  2,  2,  2,  2,  2,  2,  2,
+ 10,-10,  5,  5, 11,-11, -5, -5, -2, -2, -2, -2, -2, -2, -2, -2,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+ 12,-12,  6,  6, 13,-13, -6, -6,  3,  3,  3,  3,  3,  3,  3,  3,
+ 14,-14,  7,  7, 15,-15, -7, -7, -3, -3, -3, -3, -3, -3, -3, -3,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+};
+
+const uint8_t ff_interleaved_dirac_golomb_vlc_code[256]={
+0, 1, 0, 0, 2, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+4, 5, 2, 2, 6, 7, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+8, 9, 4, 4, 10,11,5, 5, 2, 2, 2, 2, 2, 2, 2, 2,
+12,13,6, 6, 14,15,7, 7, 3, 3, 3, 3, 3, 3, 3, 3,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,};
diff --git a/media/ffvpx/libavcodec/golomb.h b/media/ffvpx/libavcodec/golomb.h
new file mode 100644
index 0000000000..164c2583b6
--- /dev/null
+++ b/media/ffvpx/libavcodec/golomb.h
@@ -0,0 +1,616 @@
+/*
+ * exp golomb vlc stuff
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2004 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * @brief
+ *     exp golomb vlc stuff
+ * @author Michael Niedermayer <michaelni@gmx.at> and Alex Beregszaszi
+ */
+
+#ifndef AVCODEC_GOLOMB_H
+#define AVCODEC_GOLOMB_H
+
+#include <stdint.h>
+
+#include "get_bits.h"
+
+#define INVALID_VLC           0x80000000
+
+extern const uint8_t ff_golomb_vlc_len[512];
+extern const uint8_t ff_ue_golomb_vlc_code[512];
+extern const  int8_t ff_se_golomb_vlc_code[512];
+
+extern const uint8_t ff_interleaved_golomb_vlc_len[256];
+extern const uint8_t ff_interleaved_ue_golomb_vlc_code[256];
+extern const  int8_t ff_interleaved_se_golomb_vlc_code[256];
+extern const uint8_t ff_interleaved_dirac_golomb_vlc_code[256];
+
+/**
+ * Read an unsigned Exp-Golomb code in the range 0 to 8190.
+ *
+ * @returns the read value or a negative error code.
+ */
+static inline int get_ue_golomb(GetBitContext *gb)
+{
+    unsigned int buf;
+
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    if (buf >= (1 << 27)) {
+        buf >>= 32 - 9;
+        skip_bits_long(gb, ff_golomb_vlc_len[buf]);
+
+        return ff_ue_golomb_vlc_code[buf];
+    } else {
+        int log = 2 * av_log2(buf) - 31;
+
+        skip_bits_long(gb, 32 - log);
+        if (log < 7)
+            return AVERROR_INVALIDDATA;
+        buf >>= log;
+        buf--;
+
+        return buf;
+    }
+#else
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    if (buf >= (1 << 27)) {
+        buf >>= 32 - 9;
+        LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+
+        return ff_ue_golomb_vlc_code[buf];
+    } else {
+        int log = 2 * av_log2(buf) - 31;
+        LAST_SKIP_BITS(re, gb, 32 - log);
+        CLOSE_READER(re, gb);
+        if (log < 7)
+            return AVERROR_INVALIDDATA;
+        buf >>= log;
+        buf--;
+
+        return buf;
+    }
+#endif
+}
+
+/**
+ * Read an unsigned Exp-Golomb code in the range 0 to UINT32_MAX-1.
+ */
+static inline unsigned get_ue_golomb_long(GetBitContext *gb)
+{
+    unsigned buf, log;
+
+    buf = show_bits_long(gb, 32);
+    log = 31 - av_log2(buf);
+    skip_bits_long(gb, log);
+
+    return get_bits_long(gb, log + 1) - 1;
+}
+
+/**
+ * read unsigned exp golomb code, constraint to a max of 31.
+ * If the value encountered is not in 0..31, the return value
+ * is outside the range 0..30.
+ */
+static inline int get_ue_golomb_31(GetBitContext *gb)
+{
+    unsigned int buf;
+
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    buf >>= 32 - 9;
+    skip_bits_long(gb, ff_golomb_vlc_len[buf]);
+#else
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    buf >>= 32 - 9;
+    LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]);
+    CLOSE_READER(re, gb);
+#endif
+
+    return ff_ue_golomb_vlc_code[buf];
+}
+
+static inline unsigned get_interleaved_ue_golomb(GetBitContext *gb)
+{
+    uint32_t buf;
+
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    if (buf & 0xAA800000) {
+        buf >>= 32 - 8;
+        skip_bits_long(gb, ff_interleaved_golomb_vlc_len[buf]);
+
+        return ff_interleaved_ue_golomb_vlc_code[buf];
+    } else {
+        unsigned ret = 1;
+
+        do {
+            buf >>= 32 - 8;
+            skip_bits_long(gb, FFMIN(ff_interleaved_golomb_vlc_len[buf], 8));
+
+            if (ff_interleaved_golomb_vlc_len[buf] != 9) {
+                ret <<= (ff_interleaved_golomb_vlc_len[buf] - 1) >> 1;
+                ret  |= ff_interleaved_dirac_golomb_vlc_code[buf];
+                break;
+            }
+            ret = (ret << 4) | ff_interleaved_dirac_golomb_vlc_code[buf];
+            buf = show_bits_long(gb, 32);
+        } while (get_bits_left(gb) > 0);
+
+        return ret - 1;
+    }
+#else
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    if (buf & 0xAA800000) {
+        buf >>= 32 - 8;
+        LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+
+        return ff_interleaved_ue_golomb_vlc_code[buf];
+    } else {
+        unsigned ret = 1;
+
+        do {
+            buf >>= 32 - 8;
+            LAST_SKIP_BITS(re, gb,
+                           FFMIN(ff_interleaved_golomb_vlc_len[buf], 8));
+
+            if (ff_interleaved_golomb_vlc_len[buf] != 9) {
+                ret <<= (ff_interleaved_golomb_vlc_len[buf] - 1) >> 1;
+                ret  |= ff_interleaved_dirac_golomb_vlc_code[buf];
+                break;
+            }
+            ret = (ret << 4) | ff_interleaved_dirac_golomb_vlc_code[buf];
+            UPDATE_CACHE(re, gb);
+            buf = GET_CACHE(re, gb);
+        } while (ret<0x8000000U && BITS_AVAILABLE(re, gb));
+
+        CLOSE_READER(re, gb);
+        return ret - 1;
+    }
+#endif
+}
+
+/**
+ * read unsigned truncated exp golomb code.
+ */
+static inline int get_te0_golomb(GetBitContext *gb, int range)
+{
+    av_assert2(range >= 1);
+
+    if (range == 1)
+        return 0;
+    else if (range == 2)
+        return get_bits1(gb) ^ 1;
+    else
+        return get_ue_golomb(gb);
+}
+
+/**
+ * read unsigned truncated exp golomb code.
+ */
+static inline int get_te_golomb(GetBitContext *gb, int range)
+{
+    av_assert2(range >= 1);
+
+    if (range == 2)
+        return get_bits1(gb) ^ 1;
+    else
+        return get_ue_golomb(gb);
+}
+
+/**
+ * read signed exp golomb code.
+ */
+static inline int get_se_golomb(GetBitContext *gb)
+{
+    unsigned int buf;
+
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    if (buf >= (1 << 27)) {
+        buf >>= 32 - 9;
+        skip_bits_long(gb, ff_golomb_vlc_len[buf]);
+
+        return ff_se_golomb_vlc_code[buf];
+    } else {
+        int log = 2 * av_log2(buf) - 31;
+        buf >>= log;
+
+        skip_bits_long(gb, 32 - log);
+
+        if (buf & 1)
+            buf = -(buf >> 1);
+        else
+            buf = (buf >> 1);
+
+        return buf;
+    }
+#else
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    if (buf >= (1 << 27)) {
+        buf >>= 32 - 9;
+        LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+
+        return ff_se_golomb_vlc_code[buf];
+    } else {
+        int log = av_log2(buf), sign;
+        LAST_SKIP_BITS(re, gb, 31 - log);
+        UPDATE_CACHE(re, gb);
+        buf = GET_CACHE(re, gb);
+
+        buf >>= log;
+
+        LAST_SKIP_BITS(re, gb, 32 - log);
+        CLOSE_READER(re, gb);
+
+        sign = -(buf & 1);
+        buf  = ((buf >> 1) ^ sign) - sign;
+
+        return buf;
+    }
+#endif
+}
+
+static inline int get_se_golomb_long(GetBitContext *gb)
+{
+    unsigned int buf = get_ue_golomb_long(gb);
+    int sign = (buf & 1) - 1;
+    return ((buf >> 1) ^ sign) + 1;
+}
+
+static inline int get_interleaved_se_golomb(GetBitContext *gb)
+{
+    unsigned int buf;
+
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    if (buf & 0xAA800000) {
+        buf >>= 32 - 8;
+        skip_bits_long(gb, ff_interleaved_golomb_vlc_len[buf]);
+
+        return ff_interleaved_se_golomb_vlc_code[buf];
+    } else {
+        int log;
+        skip_bits(gb, 8);
+        buf |= 1 | show_bits(gb, 24);
+
+        if ((buf & 0xAAAAAAAA) == 0)
+            return INVALID_VLC;
+
+        for (log = 31; (buf & 0x80000000) == 0; log--)
+            buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
+
+        skip_bits_long(gb, 63 - 2 * log - 8);
+
+        return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1;
+    }
+#else
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    if (buf & 0xAA800000) {
+        buf >>= 32 - 8;
+        LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+
+        return ff_interleaved_se_golomb_vlc_code[buf];
+    } else {
+        int log;
+        LAST_SKIP_BITS(re, gb, 8);
+        UPDATE_CACHE(re, gb);
+        buf |= 1 | (GET_CACHE(re, gb) >> 8);
+
+        if ((buf & 0xAAAAAAAA) == 0)
+            return INVALID_VLC;
+
+        for (log = 31; (buf & 0x80000000) == 0; log--)
+            buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
+
+        LAST_SKIP_BITS(re, gb, 63 - 2 * log - 8);
+        CLOSE_READER(re, gb);
+
+        return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1;
+    }
+#endif
+}
+
+static inline int dirac_get_se_golomb(GetBitContext *gb)
+{
+    uint32_t ret = get_interleaved_ue_golomb(gb);
+
+    if (ret) {
+        int sign = -get_bits1(gb);
+        ret = (ret ^ sign) - sign;
+    }
+
+    return ret;
+}
+
+/**
+ * read unsigned golomb rice code (ffv1).
+ */
+static inline int get_ur_golomb(GetBitContext *gb, int k, int limit,
+                                int esc_len)
+{
+    unsigned int buf;
+    int log;
+
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    log = av_log2(buf);
+
+    if (log > 31 - limit) {
+        buf >>= log - k;
+        buf  += (30 - log) << k;
+        skip_bits_long(gb, 32 + k - log);
+
+        return buf;
+    } else {
+        skip_bits_long(gb, limit);
+        buf = get_bits_long(gb, esc_len);
+
+        return buf + limit - 1;
+    }
+#else
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    log = av_log2(buf);
+
+    if (log > 31 - limit) {
+        buf >>= log - k;
+        buf  += (30U - log) << k;
+        LAST_SKIP_BITS(re, gb, 32 + k - log);
+        CLOSE_READER(re, gb);
+
+        return buf;
+    } else {
+        LAST_SKIP_BITS(re, gb, limit);
+        UPDATE_CACHE(re, gb);
+
+        buf = SHOW_UBITS(re, gb, esc_len);
+
+        LAST_SKIP_BITS(re, gb, esc_len);
+        CLOSE_READER(re, gb);
+
+        return buf + limit - 1;
+    }
+#endif
+}
+
+/**
+ * read unsigned golomb rice code (jpegls).
+ */
+static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit,
+                                       int esc_len)
+{
+    unsigned int buf;
+    int log;
+
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    log = av_log2(buf);
+
+    if (log - k >= 1 && 32 - log < limit) {
+        buf >>= log - k;
+        buf  += (30 - log) << k;
+        skip_bits_long(gb, 32 + k - log);
+
+        return buf;
+    } else {
+        int i;
+        for (i = 0;
+             i < limit && get_bits1(gb) == 0 && get_bits_left(gb) > 0;
+             i++);
+
+        if (i < limit - 1) {
+            buf = get_bits_long(gb, k);
+
+            return buf + (i << k);
+        } else if (i == limit - 1) {
+            buf = get_bits_long(gb, esc_len);
+
+            return buf + 1;
+        } else
+            return -1;
+    }
+#else
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    log = av_log2(buf);
+
+    av_assert2(k <= 31);
+
+    if (log - k >= 32 - MIN_CACHE_BITS + (MIN_CACHE_BITS == 32) &&
+        32 - log < limit) {
+        buf >>= log - k;
+        buf  += (30U - log) << k;
+        LAST_SKIP_BITS(re, gb, 32 + k - log);
+        CLOSE_READER(re, gb);
+
+        return buf;
+    } else {
+        int i;
+        for (i = 0; i + MIN_CACHE_BITS <= limit && SHOW_UBITS(re, gb, MIN_CACHE_BITS) == 0; i += MIN_CACHE_BITS) {
+            if (gb->size_in_bits <= re_index) {
+                CLOSE_READER(re, gb);
+                return -1;
+            }
+            LAST_SKIP_BITS(re, gb, MIN_CACHE_BITS);
+            UPDATE_CACHE(re, gb);
+        }
+        for (; i < limit && SHOW_UBITS(re, gb, 1) == 0; i++) {
+            SKIP_BITS(re, gb, 1);
+        }
+        LAST_SKIP_BITS(re, gb, 1);
+        UPDATE_CACHE(re, gb);
+
+        if (i < limit - 1) {
+            if (k) {
+                if (k > MIN_CACHE_BITS - 1) {
+                    buf = SHOW_UBITS(re, gb, 16) << (k-16);
+                    LAST_SKIP_BITS(re, gb, 16);
+                    UPDATE_CACHE(re, gb);
+                    buf |= SHOW_UBITS(re, gb, k-16);
+                    LAST_SKIP_BITS(re, gb, k-16);
+                } else {
+                    buf = SHOW_UBITS(re, gb, k);
+                    LAST_SKIP_BITS(re, gb, k);
+                }
+            } else {
+                buf = 0;
+            }
+
+            buf += ((SUINT)i << k);
+        } else if (i == limit - 1) {
+            buf = SHOW_UBITS(re, gb, esc_len);
+            LAST_SKIP_BITS(re, gb, esc_len);
+
+            buf ++;
+        } else {
+            buf = -1;
+        }
+        CLOSE_READER(re, gb);
+        return buf;
+    }
+#endif
+}
+
+/**
+ * read signed golomb rice code (ffv1).
+ */
+static inline int get_sr_golomb(GetBitContext *gb, int k, int limit,
+                                int esc_len)
+{
+    unsigned v = get_ur_golomb(gb, k, limit, esc_len);
+    return (v >> 1) ^ -(v & 1);
+}
+
+/**
+ * read signed golomb rice code (flac).
+ */
+static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit,
+                                     int esc_len)
+{
+    unsigned v = get_ur_golomb_jpegls(gb, k, limit, esc_len);
+    return (v >> 1) ^ -(v & 1);
+}
+
+/**
+ * read unsigned golomb rice code (shorten).
+ */
+static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k)
+{
+    return get_ur_golomb_jpegls(gb, k, INT_MAX, 0);
+}
+
+/**
+ * read signed golomb rice code (shorten).
+ */
+static inline int get_sr_golomb_shorten(GetBitContext *gb, int k)
+{
+    int uvar = get_ur_golomb_jpegls(gb, k + 1, INT_MAX, 0);
+    return (uvar >> 1) ^ -(uvar & 1);
+}
+
+#ifdef TRACE
+
+static inline int get_ue(GetBitContext *s, const char *file, const char *func,
+                         int line)
+{
+    int show = show_bits(s, 24);
+    int pos  = get_bits_count(s);
+    int i    = get_ue_golomb(s);
+    int len  = get_bits_count(s) - pos;
+    int bits = show >> (24 - len);
+
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d ue  @%5d in %s %s:%d\n",
+           bits, len, i, pos, file, func, line);
+
+    return i;
+}
+
+static inline int get_se(GetBitContext *s, const char *file, const char *func,
+                         int line)
+{
+    int show = show_bits(s, 24);
+    int pos  = get_bits_count(s);
+    int i    = get_se_golomb(s);
+    int len  = get_bits_count(s) - pos;
+    int bits = show >> (24 - len);
+
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d se  @%5d in %s %s:%d\n",
+           bits, len, i, pos, file, func, line);
+
+    return i;
+}
+
+static inline int get_te(GetBitContext *s, int r, char *file, const char *func,
+                         int line)
+{
+    int show = show_bits(s, 24);
+    int pos  = get_bits_count(s);
+    int i    = get_te0_golomb(s, r);
+    int len  = get_bits_count(s) - pos;
+    int bits = show >> (24 - len);
+
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d te  @%5d in %s %s:%d\n",
+           bits, len, i, pos, file, func, line);
+
+    return i;
+}
+
+#define get_ue_golomb(a) get_ue(a, __FILE__, __func__, __LINE__)
+#define get_se_golomb(a) get_se(a, __FILE__, __func__, __LINE__)
+#define get_te_golomb(a, r)  get_te(a, r, __FILE__, __func__, __LINE__)
+#define get_te0_golomb(a, r) get_te(a, r, __FILE__, __func__, __LINE__)
+
+#endif /* TRACE */
+#endif /* AVCODEC_GOLOMB_H */
diff --git a/media/ffvpx/libavcodec/h263dsp.h b/media/ffvpx/libavcodec/h263dsp.h
new file mode 100644
index 0000000000..1abea3ca8c
--- /dev/null
+++ b/media/ffvpx/libavcodec/h263dsp.h
@@ -0,0 +1,35 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_H263DSP_H
+#define AVCODEC_H263DSP_H
+
+#include <stdint.h>
+
+extern const uint8_t ff_h263_loop_filter_strength[32];
+
+typedef struct H263DSPContext {
+    void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
+} H263DSPContext;
+
+void ff_h263dsp_init(H263DSPContext *ctx);
+void ff_h263dsp_init_x86(H263DSPContext *ctx);
+void ff_h263dsp_init_mips(H263DSPContext *ctx);
+
+#endif /* AVCODEC_H263DSP_H */
diff --git a/media/ffvpx/libavcodec/h264chroma.h b/media/ffvpx/libavcodec/h264chroma.h
new file mode 100644
index 0000000000..b8f9c8f4fc
--- /dev/null
+++ b/media/ffvpx/libavcodec/h264chroma.h
@@ -0,0 +1,41 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_H264CHROMA_H
+#define AVCODEC_H264CHROMA_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef void (*h264_chroma_mc_func)(uint8_t *dst /*align 8*/, const uint8_t *src /*align 1*/, ptrdiff_t srcStride, int h, int x, int y);
+
+typedef struct H264ChromaContext {
+    h264_chroma_mc_func put_h264_chroma_pixels_tab[4];
+    h264_chroma_mc_func avg_h264_chroma_pixels_tab[4];
+} H264ChromaContext;
+
+void ff_h264chroma_init(H264ChromaContext *c, int bit_depth);
+
+void ff_h264chroma_init_aarch64(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_loongarch(H264ChromaContext *c, int bit_depth);
+
+#endif /* AVCODEC_H264CHROMA_H */
diff --git a/media/ffvpx/libavcodec/h264dsp.h b/media/ffvpx/libavcodec/h264dsp.h
new file mode 100644
index 0000000000..e0880c4d88
--- /dev/null
+++ b/media/ffvpx/libavcodec/h264dsp.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 DSP functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#ifndef AVCODEC_H264DSP_H
+#define AVCODEC_H264DSP_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+typedef void (*h264_weight_func)(uint8_t *block, ptrdiff_t stride, int height,
+                                 int log2_denom, int weight, int offset);
+typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src,
+                                   ptrdiff_t stride, int height, int log2_denom,
+                                   int weightd, int weights, int offset);
+
+/**
+ * Context for storing H.264 DSP functions
+ */
+typedef struct H264DSPContext {
+    /* weighted MC */
+    h264_weight_func weight_h264_pixels_tab[4];
+    h264_biweight_func biweight_h264_pixels_tab[4];
+
+    /* loop filter */
+    void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, ptrdiff_t stride,
+                                    int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, ptrdiff_t stride,
+                                    int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, ptrdiff_t stride,
+                                          int alpha, int beta, int8_t *tc0);
+    /* v/h_loop_filter_luma_intra: align 16 */
+    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride,
+                                          int alpha, int beta);
+    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride,
+                                          int alpha, int beta);
+    void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/,
+                                                ptrdiff_t stride, int alpha, int beta);
+    void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, ptrdiff_t stride,
+                                      int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, ptrdiff_t stride,
+                                      int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/,
+                                            ptrdiff_t stride, int alpha, int beta,
+                                            int8_t *tc0);
+    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
+                                            ptrdiff_t stride, int alpha, int beta);
+    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
+                                            ptrdiff_t stride, int alpha, int beta);
+    void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/,
+                                                  ptrdiff_t stride, int alpha, int beta);
+    // h264_loop_filter_strength: simd only. the C version is inlined in h264_loopfilter.c
+    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40],
+                                      int8_t ref[2][40], int16_t mv[2][40][2],
+                                      int bidir, int edges, int step,
+                                      int mask_mv0, int mask_mv1, int field);
+
+    /* IDCT */
+    void (*h264_idct_add)(uint8_t *dst /*align 4*/,
+                          int16_t *block /*align 16*/, int stride);
+    void (*h264_idct8_add)(uint8_t *dst /*align 8*/,
+                           int16_t *block /*align 16*/, int stride);
+    void (*h264_idct_dc_add)(uint8_t *dst /*align 4*/,
+                             int16_t *block /*align 16*/, int stride);
+    void (*h264_idct8_dc_add)(uint8_t *dst /*align 8*/,
+                              int16_t *block /*align 16*/, int stride);
+
+    void (*h264_idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset,
+                            int16_t *block /*align 16*/, int stride,
+                            const uint8_t nnzc[5 * 8]);
+    void (*h264_idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset,
+                            int16_t *block /*align 16*/, int stride,
+                            const uint8_t nnzc[5 * 8]);
+    void (*h264_idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset,
+                           int16_t *block /*align 16*/, int stride,
+                           const uint8_t nnzc[15 * 8]);
+    void (*h264_idct_add16intra)(uint8_t *dst /*align 16*/, const int *blockoffset,
+                                 int16_t *block /*align 16*/,
+                                 int stride, const uint8_t nnzc[5 * 8]);
+    void (*h264_luma_dc_dequant_idct)(int16_t *output,
+                                      int16_t *input /*align 16*/, int qmul);
+    void (*h264_chroma_dc_dequant_idct)(int16_t *block, int qmul);
+
+    /* bypass-transform */
+    void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride);
+    void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
+
+    /**
+     * Search buf from the start for up to size bytes. Return the index
+     * of a zero byte, or >= size if not found. Ideally, use lookahead
+     * to filter out any zero bytes that are known to not be followed by
+     * one or more further zero bytes and a one byte. Better still, filter
+     * out any bytes that form the trailing_zero_8bits syntax element too.
+     */
+    int (*startcode_find_candidate)(const uint8_t *buf, int size);
+} H264DSPContext;
+
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
+                     const int chroma_format_idc);
+void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
+                             const int chroma_format_idc);
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth,
+                         const int chroma_format_idc);
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth,
+                         const int chroma_format_idc);
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
+                         const int chroma_format_idc);
+void ff_h264dsp_init_mips(H264DSPContext *c, const int bit_depth,
+                          const int chroma_format_idc);
+void ff_h264dsp_init_loongarch(H264DSPContext *c, const int bit_depth,
+                               const int chroma_format_idc);
+
+#endif /* AVCODEC_H264DSP_H */
diff --git a/media/ffvpx/libavcodec/h264pred.c b/media/ffvpx/libavcodec/h264pred.c
new file mode 100644
index 0000000000..25f9995a0b
--- /dev/null
+++ b/media/ffvpx/libavcodec/h264pred.c
@@ -0,0 +1,602 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 / AVC / MPEG-4 part10 prediction functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/intreadwrite.h"
+#include "codec_id.h"
+#include "h264pred.h"
+#include "mathops.h"
+
+#define BIT_DEPTH 8
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 9
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 12
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 14
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+static void pred4x4_127_dc_c(uint8_t *src, const uint8_t *topright,
+                             ptrdiff_t _stride)
+{
+    int stride = _stride;
+    const uint32_t a = 0x7F7F7F7FU;
+
+    AV_WN32A(src + 0 * stride, a);
+    AV_WN32A(src + 1 * stride, a);
+    AV_WN32A(src + 2 * stride, a);
+    AV_WN32A(src + 3 * stride, a);
+}
+
+static void pred4x4_129_dc_c(uint8_t *src, const uint8_t *topright,
+                             ptrdiff_t _stride)
+{
+    int stride = _stride;
+    const uint32_t a = 0x81818181U;
+
+    AV_WN32A(src + 0 * stride, a);
+    AV_WN32A(src + 1 * stride, a);
+    AV_WN32A(src + 2 * stride, a);
+    AV_WN32A(src + 3 * stride, a);
+}
+
+static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright,
+                                   ptrdiff_t stride)
+{
+    const unsigned lt = src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+    uint32_t v = PACK_4U8((lt + 2*t0 + t1 + 2) >> 2,
+                          (t0 + 2*t1 + t2 + 2) >> 2,
+                          (t1 + 2*t2 + t3 + 2) >> 2,
+                          (t2 + 2*t3 + t4 + 2) >> 2);
+
+    AV_WN32A(src+0*stride, v);
+    AV_WN32A(src+1*stride, v);
+    AV_WN32A(src+2*stride, v);
+    AV_WN32A(src+3*stride, v);
+}
+
+static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright,
+                                     ptrdiff_t stride)
+{
+    const unsigned lt = src[-1-1*stride];
+    LOAD_LEFT_EDGE
+
+    AV_WN32A(src+0*stride, ((lt + 2*l0 + l1 + 2) >> 2)*0x01010101);
+    AV_WN32A(src+1*stride, ((l0 + 2*l1 + l2 + 2) >> 2)*0x01010101);
+    AV_WN32A(src+2*stride, ((l1 + 2*l2 + l3 + 2) >> 2)*0x01010101);
+    AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101);
+}
+
+static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright,
+                                     ptrdiff_t stride)
+{
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(l1 + t1)>>1;
+    src[1+0*stride]=
+    src[0+1*stride]=(l2 + t2)>>1;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=
+    src[3+2*stride]=
+    src[2+3*stride]=
+    src[3+3*stride]=(l3 + t3)>>1;
+}
+
+static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright,
+                                     ptrdiff_t stride)
+{
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+    LOAD_LEFT_EDGE
+    LOAD_DOWN_LEFT_EDGE
+
+    src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
+    src[1+0*stride]=
+    src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + l4 + 2*l3 + 2)>>3;
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3 + l5 + 2*l4 + 2)>>3;
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l4 + l6 + 2*l5 + 2)>>3;
+    src[3+2*stride]=
+    src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l5 + l7 + 2*l6 + 2)>>3;
+    src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2;
+}
+
+static void pred4x4_down_left_rv40_nodown_c(uint8_t *src,
+                                            const uint8_t *topright,
+                                            ptrdiff_t stride)
+{
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
+    src[1+0*stride]=
+    src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + 3*l3 + 2)>>3;
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3*4 + 2)>>3;
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l3*4 + 2)>>3;
+    src[3+2*stride]=
+    src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l3*4 + 2)>>3;
+    src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2;
+}
+
+static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright,
+                                       ptrdiff_t stride,
+                                       const int l0, const int l1, const int l2,
+                                       const int l3, const int l4)
+{
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(2*t0 + 2*t1 + l1 + 2*l2 + l3 + 4)>>3;
+    src[1+0*stride]=
+    src[0+2*stride]=(t1 + t2 + 1)>>1;
+    src[2+0*stride]=
+    src[1+2*stride]=(t2 + t3 + 1)>>1;
+    src[3+0*stride]=
+    src[2+2*stride]=(t3 + t4+ 1)>>1;
+    src[3+2*stride]=(t4 + t5+ 1)>>1;
+    src[0+1*stride]=(t0 + 2*t1 + t2 + l2 + 2*l3 + l4 + 4)>>3;
+    src[1+1*stride]=
+    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[2+1*stride]=
+    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
+    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
+}
+
+static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright,
+                                         ptrdiff_t stride)
+{
+    LOAD_LEFT_EDGE
+    LOAD_DOWN_LEFT_EDGE
+
+    pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4);
+}
+
+static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src,
+                                                const uint8_t *topright,
+                                                ptrdiff_t stride)
+{
+    LOAD_LEFT_EDGE
+
+    pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3);
+}
+
+static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright,
+                                        ptrdiff_t stride)
+{
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(t0 + t1 + 1)>>1;
+    src[1+0*stride]=
+    src[0+2*stride]=(t1 + t2 + 1)>>1;
+    src[2+0*stride]=
+    src[1+2*stride]=(t2 + t3 + 1)>>1;
+    src[3+0*stride]=
+    src[2+2*stride]=(t3 + t4 + 1)>>1;
+    src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[1+1*stride]=
+    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[2+1*stride]=
+    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
+    src[3+2*stride]=(t4 + 2*t5 + t6 + 2)>>2;
+    src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2;
+}
+
+static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright,
+                                         ptrdiff_t stride)
+{
+    LOAD_LEFT_EDGE
+    LOAD_DOWN_LEFT_EDGE
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
+    src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
+    src[2+0*stride]=
+    src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
+    src[3+0*stride]=
+    src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
+    src[2+1*stride]=
+    src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
+    src[3+1*stride]=
+    src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
+    src[3+2*stride]=
+    src[1+3*stride]=(l3 + 2*l4 + l5 + 2)>>2;
+    src[0+3*stride]=
+    src[2+2*stride]=(t6 + t7 + l3 + l4 + 2)>>2;
+    src[2+3*stride]=(l4 + l5 + 1)>>1;
+    src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2;
+}
+
+static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src,
+                                                const uint8_t *topright,
+                                                ptrdiff_t stride)
+{
+    LOAD_LEFT_EDGE
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
+    src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
+    src[2+0*stride]=
+    src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
+    src[3+0*stride]=
+    src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
+    src[2+1*stride]=
+    src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
+    src[3+1*stride]=
+    src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
+    src[3+2*stride]=
+    src[1+3*stride]=l3;
+    src[0+3*stride]=
+    src[2+2*stride]=(t6 + t7 + 2*l3 + 2)>>2;
+    src[2+3*stride]=
+    src[3+3*stride]=l3;
+}
+
+static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright,
+                             ptrdiff_t stride)
+{
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP - src[-1-stride];
+    uint8_t *top = src-stride;
+    int y;
+
+    for (y = 0; y < 4; y++) {
+        const uint8_t *cm_in = cm + src[-1];
+        src[0] = cm_in[top[0]];
+        src[1] = cm_in[top[1]];
+        src[2] = cm_in[top[2]];
+        src[3] = cm_in[top[3]];
+        src += stride;
+    }
+}
+
+static void pred16x16_plane_svq3_c(uint8_t *src, ptrdiff_t stride)
+{
+    pred16x16_plane_compat_8_c(src, stride, 1, 0);
+}
+
+static void pred16x16_plane_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
+    pred16x16_plane_compat_8_c(src, stride, 0, 1);
+}
+
+static void pred16x16_tm_vp8_c(uint8_t *src, ptrdiff_t stride)
+{
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP - src[-1-stride];
+    uint8_t *top = src-stride;
+    int y;
+
+    for (y = 0; y < 16; y++) {
+        const uint8_t *cm_in = cm + src[-1];
+        src[0]  = cm_in[top[0]];
+        src[1]  = cm_in[top[1]];
+        src[2]  = cm_in[top[2]];
+        src[3]  = cm_in[top[3]];
+        src[4]  = cm_in[top[4]];
+        src[5]  = cm_in[top[5]];
+        src[6]  = cm_in[top[6]];
+        src[7]  = cm_in[top[7]];
+        src[8]  = cm_in[top[8]];
+        src[9]  = cm_in[top[9]];
+        src[10] = cm_in[top[10]];
+        src[11] = cm_in[top[11]];
+        src[12] = cm_in[top[12]];
+        src[13] = cm_in[top[13]];
+        src[14] = cm_in[top[14]];
+        src[15] = cm_in[top[15]];
+        src += stride;
+    }
+}
+
+static void pred8x8_left_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
+    int i;
+    unsigned dc0;
+
+    dc0=0;
+    for(i=0;i<8; i++)
+        dc0+= src[-1+i*stride];
+    dc0= 0x01010101*((dc0 + 4)>>3);
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+}
+
+static void pred8x8_top_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
+    int i;
+    unsigned dc0;
+
+    dc0=0;
+    for(i=0;i<8; i++)
+        dc0+= src[i-stride];
+    dc0= 0x01010101*((dc0 + 4)>>3);
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+}
+
+static void pred8x8_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
+    int i;
+    unsigned dc0 = 0;
+
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride] + src[i-stride];
+        dc0+= src[4+i-stride];
+        dc0+= src[-1+(i+4)*stride];
+    }
+    dc0= 0x01010101*((dc0 + 8)>>4);
+
+    for(i=0; i<4; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+    for(i=4; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+}
+
+static void pred8x8_tm_vp8_c(uint8_t *src, ptrdiff_t stride)
+{
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP - src[-1-stride];
+    uint8_t *top = src-stride;
+    int y;
+
+    for (y = 0; y < 8; y++) {
+        const uint8_t *cm_in = cm + src[-1];
+        src[0] = cm_in[top[0]];
+        src[1] = cm_in[top[1]];
+        src[2] = cm_in[top[2]];
+        src[3] = cm_in[top[3]];
+        src[4] = cm_in[top[4]];
+        src[5] = cm_in[top[5]];
+        src[6] = cm_in[top[6]];
+        src[7] = cm_in[top[7]];
+        src += stride;
+    }
+}
+
+/**
+ * Set the intra prediction function pointers.
+ */
+av_cold void ff_h264_pred_init(H264PredContext *h, int codec_id,
+                               const int bit_depth,
+                               int chroma_format_idc)
+{
+#undef FUNC
+#undef FUNCC
+#define FUNC(a, depth) a ## _ ## depth
+#define FUNCC(a, depth) a ## _ ## depth ## _c
+#define FUNCD(a) a ## _c
+
+#define H264_PRED(depth) \
+    h->pred4x4[VERT_PRED           ] = FUNCC(pred4x4_vertical,        depth);\
+    h->pred4x4[HOR_PRED            ] = FUNCC(pred4x4_horizontal,      depth);\
+    h->pred4x4[DC_PRED             ] = FUNCC(pred4x4_dc,              depth);\
+    h->pred4x4[DIAG_DOWN_LEFT_PRED ] = FUNCC(pred4x4_down_left,       depth);\
+    h->pred4x4[DIAG_DOWN_RIGHT_PRED] = FUNCC(pred4x4_down_right,      depth);\
+    h->pred4x4[VERT_RIGHT_PRED     ] = FUNCC(pred4x4_vertical_right,  depth);\
+    h->pred4x4[HOR_DOWN_PRED       ] = FUNCC(pred4x4_horizontal_down, depth);\
+    h->pred4x4[VERT_LEFT_PRED      ] = FUNCC(pred4x4_vertical_left,   depth);\
+    h->pred4x4[HOR_UP_PRED         ] = FUNCC(pred4x4_horizontal_up,   depth);\
+    h->pred4x4[LEFT_DC_PRED        ] = FUNCC(pred4x4_left_dc,         depth);\
+    h->pred4x4[TOP_DC_PRED         ] = FUNCC(pred4x4_top_dc,          depth);\
+    if (depth > 8 || codec_id != AV_CODEC_ID_VP8)\
+        h->pred4x4[DC_128_PRED     ] = FUNCC(pred4x4_128_dc,          depth);\
+\
+    h->pred8x8l[VERT_PRED           ]= FUNCC(pred8x8l_vertical            , depth);\
+    h->pred8x8l[HOR_PRED            ]= FUNCC(pred8x8l_horizontal          , depth);\
+    h->pred8x8l[DC_PRED             ]= FUNCC(pred8x8l_dc                  , depth);\
+    h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred8x8l_down_left           , depth);\
+    h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred8x8l_down_right          , depth);\
+    h->pred8x8l[VERT_RIGHT_PRED     ]= FUNCC(pred8x8l_vertical_right      , depth);\
+    h->pred8x8l[HOR_DOWN_PRED       ]= FUNCC(pred8x8l_horizontal_down     , depth);\
+    h->pred8x8l[VERT_LEFT_PRED      ]= FUNCC(pred8x8l_vertical_left       , depth);\
+    h->pred8x8l[HOR_UP_PRED         ]= FUNCC(pred8x8l_horizontal_up       , depth);\
+    h->pred8x8l[LEFT_DC_PRED        ]= FUNCC(pred8x8l_left_dc             , depth);\
+    h->pred8x8l[TOP_DC_PRED         ]= FUNCC(pred8x8l_top_dc              , depth);\
+    h->pred8x8l[DC_128_PRED         ]= FUNCC(pred8x8l_128_dc              , depth);\
+\
+    if (chroma_format_idc <= 1) {\
+        h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x8_vertical               , depth);\
+        h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x8_horizontal             , depth);\
+        h->pred8x8[PLANE_PRED8x8] = FUNCC(pred8x8_plane,       depth);\
+    } else {\
+        h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x16_vertical              , depth);\
+        h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x16_horizontal            , depth);\
+        h->pred8x8[PLANE_PRED8x8] = FUNCC(pred8x16_plane,      depth);\
+    }\
+    if (depth > 8 || (codec_id != AV_CODEC_ID_RV40 && \
+                      codec_id != AV_CODEC_ID_VP7  && \
+                      codec_id != AV_CODEC_ID_VP8)) { \
+        if (chroma_format_idc <= 1) {\
+            h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x8_dc                     , depth);\
+            h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc                , depth);\
+            h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc                 , depth);\
+            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
+            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
+            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
+            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
+        } else {\
+            h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x16_dc                    , depth);\
+            h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc               , depth);\
+            h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc                , depth);\
+            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l0t, depth);\
+            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0lt, depth);\
+            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l00, depth);\
+            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0l0, depth);\
+        }\
+    }else{\
+        h->pred8x8[DC_PRED8x8     ]= FUNCD(pred8x8_dc_rv40);\
+        h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\
+        h->pred8x8[TOP_DC_PRED8x8 ]= FUNCD(pred8x8_top_dc_rv40);\
+    }\
+    if (chroma_format_idc <= 1) {\
+        h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc                 , depth);\
+    } else {\
+        h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x16_128_dc                , depth);\
+    }\
+\
+    h->pred16x16[DC_PRED8x8     ]= FUNCC(pred16x16_dc                     , depth);\
+    h->pred16x16[VERT_PRED8x8   ]= FUNCC(pred16x16_vertical               , depth);\
+    h->pred16x16[HOR_PRED8x8    ]= FUNCC(pred16x16_horizontal             , depth);\
+    h->pred16x16[PLANE_PRED8x8  ]= FUNCC(pred16x16_plane                  , depth);\
+    h->pred16x16[LEFT_DC_PRED8x8]= FUNCC(pred16x16_left_dc                , depth);\
+    h->pred16x16[TOP_DC_PRED8x8 ]= FUNCC(pred16x16_top_dc                 , depth);\
+    h->pred16x16[DC_128_PRED8x8 ]= FUNCC(pred16x16_128_dc                 , depth);\
+\
+    /* special lossless h/v prediction for H.264 */ \
+    h->pred4x4_add  [VERT_PRED   ]= FUNCC(pred4x4_vertical_add            , depth);\
+    h->pred4x4_add  [ HOR_PRED   ]= FUNCC(pred4x4_horizontal_add          , depth);\
+    h->pred8x8l_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_add           , depth);\
+    h->pred8x8l_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_add         , depth);\
+    h->pred8x8l_filter_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_filter_add           , depth);\
+    h->pred8x8l_filter_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_filter_add         , depth);\
+    if (chroma_format_idc <= 1) {\
+        h->pred8x8_add[VERT_PRED8x8] = FUNCC(pred8x8_vertical_add,    depth);\
+        h->pred8x8_add[ HOR_PRED8x8] = FUNCC(pred8x8_horizontal_add,  depth);\
+    } else {\
+        h->pred8x8_add  [VERT_PRED8x8]= FUNCC(pred8x16_vertical_add            , depth);\
+        h->pred8x8_add  [ HOR_PRED8x8]= FUNCC(pred8x16_horizontal_add          , depth);\
+    }\
+    h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add          , depth);\
+    h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add        , depth);\
+
+    switch (bit_depth) {
+        case 9:
+            H264_PRED(9)
+            break;
+        case 10:
+            H264_PRED(10)
+            break;
+        case 12:
+            H264_PRED(12)
+            break;
+        case 14:
+            H264_PRED(14)
+            break;
+        default:
+            av_assert0(bit_depth<=8);
+            H264_PRED(8)
+            switch (codec_id) {
+            case AV_CODEC_ID_SVQ3:
+                h->pred4x4[DIAG_DOWN_LEFT_PRED] = FUNCD(pred4x4_down_left_svq3);
+                h->pred16x16[PLANE_PRED8x8    ] = FUNCD(pred16x16_plane_svq3);
+                break;
+            case AV_CODEC_ID_RV40:
+                h->pred4x4[DIAG_DOWN_LEFT_PRED] = FUNCD(pred4x4_down_left_rv40);
+                h->pred4x4[VERT_LEFT_PRED     ] = FUNCD(pred4x4_vertical_left_rv40);
+                h->pred4x4[HOR_UP_PRED        ] = FUNCD(pred4x4_horizontal_up_rv40);
+                h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN] = FUNCD(pred4x4_down_left_rv40_nodown);
+                h->pred4x4[HOR_UP_PRED_RV40_NODOWN] = FUNCD(pred4x4_horizontal_up_rv40_nodown);
+                h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN] = FUNCD(pred4x4_vertical_left_rv40_nodown);
+                h->pred16x16[PLANE_PRED8x8    ] = FUNCD(pred16x16_plane_rv40);
+                break;
+            case AV_CODEC_ID_VP7:
+            case AV_CODEC_ID_VP8:
+                h->pred4x4[VERT_PRED       ] = FUNCD(pred4x4_vertical_vp8);
+                h->pred4x4[HOR_PRED        ] = FUNCD(pred4x4_horizontal_vp8);
+                h->pred4x4[VERT_LEFT_PRED  ] = FUNCD(pred4x4_vertical_left_vp8);
+                h->pred4x4[TM_VP8_PRED     ] = FUNCD(pred4x4_tm_vp8);
+                h->pred4x4[VERT_VP8_PRED   ] = FUNCC(pred4x4_vertical, 8);
+                h->pred4x4[DC_127_PRED     ] = FUNCD(pred4x4_127_dc);
+                h->pred4x4[DC_129_PRED     ] = FUNCD(pred4x4_129_dc);
+                h->pred4x4[HOR_VP8_PRED    ] = FUNCC(pred4x4_horizontal, 8);
+                h->pred8x8[PLANE_PRED8x8   ] = FUNCD(pred8x8_tm_vp8);
+                h->pred8x8[DC_127_PRED8x8  ] = FUNCC(pred8x8_127_dc, 8);
+                h->pred8x8[DC_129_PRED8x8  ] = FUNCC(pred8x8_129_dc, 8);
+                h->pred16x16[PLANE_PRED8x8 ] = FUNCD(pred16x16_tm_vp8);
+                h->pred16x16[DC_127_PRED8x8] = FUNCC(pred16x16_127_dc, 8);
+                h->pred16x16[DC_129_PRED8x8] = FUNCC(pred16x16_129_dc, 8);
+                break;
+            }
+            break;
+    }
+
+#if ARCH_AARCH64
+    ff_h264_pred_init_aarch64(h, codec_id, bit_depth, chroma_format_idc);
+#elif ARCH_ARM
+    ff_h264_pred_init_arm(h, codec_id, bit_depth, chroma_format_idc);
+#elif ARCH_X86
+    ff_h264_pred_init_x86(h, codec_id, bit_depth, chroma_format_idc);
+#elif ARCH_MIPS
+    ff_h264_pred_init_mips(h, codec_id, bit_depth, chroma_format_idc);
+#elif ARCH_LOONGARCH
+    ff_h264_pred_init_loongarch(h, codec_id, bit_depth, chroma_format_idc);
+#endif
+}
diff --git a/media/ffvpx/libavcodec/h264pred.h b/media/ffvpx/libavcodec/h264pred.h
new file mode 100644
index 0000000000..cb008548fc
--- /dev/null
+++ b/media/ffvpx/libavcodec/h264pred.h
@@ -0,0 +1,130 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 / AVC / MPEG-4 prediction functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#ifndef AVCODEC_H264PRED_H
+#define AVCODEC_H264PRED_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * Prediction types
+ */
+//@{
+#define VERT_PRED              0
+#define HOR_PRED               1
+#define DC_PRED                2
+#define DIAG_DOWN_LEFT_PRED    3
+#define DIAG_DOWN_RIGHT_PRED   4
+#define VERT_RIGHT_PRED        5
+#define HOR_DOWN_PRED          6
+#define VERT_LEFT_PRED         7
+#define HOR_UP_PRED            8
+
+// DC edge (not for VP8)
+#define LEFT_DC_PRED           9
+#define TOP_DC_PRED           10
+#define DC_128_PRED           11
+
+// RV40 specific
+#define DIAG_DOWN_LEFT_PRED_RV40_NODOWN   12
+#define HOR_UP_PRED_RV40_NODOWN           13
+#define VERT_LEFT_PRED_RV40_NODOWN        14
+
+// VP8 specific
+#define TM_VP8_PRED            9    ///< "True Motion", used instead of plane
+#define VERT_VP8_PRED         10    ///< for VP8, #VERT_PRED is the average of
+                                    ///< (left col+cur col x2+right col) / 4;
+                                    ///< this is the "unaveraged" one
+#define HOR_VP8_PRED          14    ///< unaveraged version of #HOR_PRED, see
+                                    ///< #VERT_VP8_PRED for details
+#define DC_127_PRED           12
+#define DC_129_PRED           13
+
+#define DC_PRED8x8             0
+#define HOR_PRED8x8            1
+#define VERT_PRED8x8           2
+#define PLANE_PRED8x8          3
+
+// DC edge
+#define LEFT_DC_PRED8x8        4
+#define TOP_DC_PRED8x8         5
+#define DC_128_PRED8x8         6
+
+// H.264/SVQ3 (8x8) specific
+#define ALZHEIMER_DC_L0T_PRED8x8  7
+#define ALZHEIMER_DC_0LT_PRED8x8  8
+#define ALZHEIMER_DC_L00_PRED8x8  9
+#define ALZHEIMER_DC_0L0_PRED8x8 10
+
+// VP8 specific
+#define DC_127_PRED8x8         7
+#define DC_129_PRED8x8         8
+//@}
+
+#define PART_NOT_AVAILABLE -2
+
+/**
+ * Context for storing H.264 prediction functions
+ */
+typedef struct H264PredContext {
+    void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright,
+                              ptrdiff_t stride);
+    void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright,
+                           ptrdiff_t stride);
+    void(*pred8x8[4 + 3 + 4])(uint8_t *src, ptrdiff_t stride);
+    void(*pred16x16[4 + 3 + 2])(uint8_t *src, ptrdiff_t stride);
+
+    void(*pred4x4_add[2])(uint8_t *pix /*align  4*/,
+                          int16_t *block /*align 16*/, ptrdiff_t stride);
+    void(*pred8x8l_add[2])(uint8_t *pix /*align  8*/,
+                           int16_t *block /*align 16*/, ptrdiff_t stride);
+    void(*pred8x8l_filter_add[2])(uint8_t *pix /*align  8*/,
+                           int16_t *block /*align 16*/, int topleft, int topright, ptrdiff_t stride);
+    void(*pred8x8_add[3])(uint8_t *pix /*align  8*/,
+                          const int *block_offset,
+                          int16_t *block /*align 16*/, ptrdiff_t stride);
+    void(*pred16x16_add[3])(uint8_t *pix /*align 16*/,
+                            const int *block_offset,
+                            int16_t *block /*align 16*/, ptrdiff_t stride);
+} H264PredContext;
+
+void ff_h264_pred_init(H264PredContext *h, int codec_id,
+                       const int bit_depth, const int chroma_format_idc);
+void ff_h264_pred_init_aarch64(H264PredContext *h, int codec_id,
+                               const int bit_depth,
+                               const int chroma_format_idc);
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id,
+                           const int bit_depth, const int chroma_format_idc);
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
+                           const int bit_depth, const int chroma_format_idc);
+void ff_h264_pred_init_mips(H264PredContext *h, int codec_id,
+                            const int bit_depth, const int chroma_format_idc);
+void ff_h264_pred_init_loongarch(H264PredContext *h, int codec_id,
+                                 const int bit_depth, const int chroma_format_idc);
+
+#endif /* AVCODEC_H264PRED_H */
diff --git a/media/ffvpx/libavcodec/h264pred_template.c b/media/ffvpx/libavcodec/h264pred_template.c
new file mode 100644
index 0000000000..b5bc942a5e
--- /dev/null
+++ b/media/ffvpx/libavcodec/h264pred_template.c
@@ -0,0 +1,1333 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 / AVC / MPEG-4 part10 prediction functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "libavutil/intreadwrite.h"
+
+#include "mathops.h"
+
+#include "bit_depth_template.c"
+
+static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright,
+                                    ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const pixel4 a= AV_RN4PA(src-stride);
+
+    AV_WN4PA(src+0*stride, a);
+    AV_WN4PA(src+1*stride, a);
+    AV_WN4PA(src+2*stride, a);
+    AV_WN4PA(src+3*stride, a);
+}
+
+static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright,
+                                      ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride]));
+    AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride]));
+    AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride]));
+    AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride]));
+}
+
+static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright,
+                              ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
+                   + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
+    const pixel4 a = PIXEL_SPLAT_X4(dc);
+
+    AV_WN4PA(src+0*stride, a);
+    AV_WN4PA(src+1*stride, a);
+    AV_WN4PA(src+2*stride, a);
+    AV_WN4PA(src+3*stride, a);
+}
+
+static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright,
+                                   ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
+    const pixel4 a = PIXEL_SPLAT_X4(dc);
+
+    AV_WN4PA(src+0*stride, a);
+    AV_WN4PA(src+1*stride, a);
+    AV_WN4PA(src+2*stride, a);
+    AV_WN4PA(src+3*stride, a);
+}
+
+static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright,
+                                  ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
+    const pixel4 a = PIXEL_SPLAT_X4(dc);
+
+    AV_WN4PA(src+0*stride, a);
+    AV_WN4PA(src+1*stride, a);
+    AV_WN4PA(src+2*stride, a);
+    AV_WN4PA(src+3*stride, a);
+}
+
+static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright,
+                                  ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
+
+    AV_WN4PA(src+0*stride, a);
+    AV_WN4PA(src+1*stride, a);
+    AV_WN4PA(src+2*stride, a);
+    AV_WN4PA(src+3*stride, a);
+}
+
+
+#define LOAD_TOP_RIGHT_EDGE\
+    const unsigned av_unused t4 = topright[0];\
+    const unsigned av_unused t5 = topright[1];\
+    const unsigned av_unused t6 = topright[2];\
+    const unsigned av_unused t7 = topright[3];\
+
+#define LOAD_DOWN_LEFT_EDGE\
+    const unsigned av_unused l4 = src[-1+4*stride];\
+    const unsigned av_unused l5 = src[-1+5*stride];\
+    const unsigned av_unused l6 = src[-1+6*stride];\
+    const unsigned av_unused l7 = src[-1+7*stride];\
+
+#define LOAD_LEFT_EDGE\
+    const unsigned av_unused l0 = src[-1+0*stride];\
+    const unsigned av_unused l1 = src[-1+1*stride];\
+    const unsigned av_unused l2 = src[-1+2*stride];\
+    const unsigned av_unused l3 = src[-1+3*stride];\
+
+#define LOAD_TOP_EDGE\
+    const unsigned av_unused t0 = src[ 0-1*stride];\
+    const unsigned av_unused t1 = src[ 1-1*stride];\
+    const unsigned av_unused t2 = src[ 2-1*stride];\
+    const unsigned av_unused t3 = src[ 3-1*stride];\
+
+static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
+                                      ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
+    src[0+2*stride]=
+    src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
+    src[0+1*stride]=
+    src[1+2*stride]=
+    src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
+    src[0+0*stride]=
+    src[1+1*stride]=
+    src[2+2*stride]=
+    src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[1+0*stride]=
+    src[2+1*stride]=
+    src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[2+0*stride]=
+    src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+}
+
+static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright,
+                                     ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    const pixel *topright = (const pixel*)_topright;
+    int stride = _stride>>(sizeof(pixel)-1);
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+//    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
+    src[1+0*stride]=
+    src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
+    src[3+2*stride]=
+    src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
+    src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
+}
+
+static void FUNCC(pred4x4_vertical_right)(uint8_t *_src,
+                                          const uint8_t *topright,
+                                          ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=
+    src[1+2*stride]=(lt + t0 + 1)>>1;
+    src[1+0*stride]=
+    src[2+2*stride]=(t0 + t1 + 1)>>1;
+    src[2+0*stride]=
+    src[3+2*stride]=(t1 + t2 + 1)>>1;
+    src[3+0*stride]=(t2 + t3 + 1)>>1;
+    src[0+1*stride]=
+    src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[1+1*stride]=
+    src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[2+1*stride]=
+    src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
+    src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+}
+
+static void FUNCC(pred4x4_vertical_left)(uint8_t *_src,
+                                         const uint8_t *_topright,
+                                         ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    const pixel *topright = (const pixel*)_topright;
+    int stride = _stride>>(sizeof(pixel)-1);
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(t0 + t1 + 1)>>1;
+    src[1+0*stride]=
+    src[0+2*stride]=(t1 + t2 + 1)>>1;
+    src[2+0*stride]=
+    src[1+2*stride]=(t2 + t3 + 1)>>1;
+    src[3+0*stride]=
+    src[2+2*stride]=(t3 + t4+ 1)>>1;
+    src[3+2*stride]=(t4 + t5+ 1)>>1;
+    src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[1+1*stride]=
+    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[2+1*stride]=
+    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
+    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
+}
+
+static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
+                                         ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(l0 + l1 + 1)>>1;
+    src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+    src[2+0*stride]=
+    src[0+1*stride]=(l1 + l2 + 1)>>1;
+    src[3+0*stride]=
+    src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
+    src[2+1*stride]=
+    src[0+2*stride]=(l2 + l3 + 1)>>1;
+    src[3+1*stride]=
+    src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
+    src[3+2*stride]=
+    src[1+3*stride]=
+    src[0+3*stride]=
+    src[2+2*stride]=
+    src[2+3*stride]=
+    src[3+3*stride]=l3;
+}
+
+static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src,
+                                           const uint8_t *topright,
+                                           ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=
+    src[2+1*stride]=(lt + l0 + 1)>>1;
+    src[1+0*stride]=
+    src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[0+1*stride]=
+    src[2+2*stride]=(l0 + l1 + 1)>>1;
+    src[1+1*stride]=
+    src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
+    src[0+2*stride]=
+    src[2+3*stride]=(l1 + l2+ 1)>>1;
+    src[1+2*stride]=
+    src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+    src[0+3*stride]=(l2 + l3 + 1)>>1;
+    src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
+}
+
+static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);
+    const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);
+    const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);
+    const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3);
+
+    for(i=0; i<16; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
+        AV_WN4PA(((pixel4*)(src+i*stride))+2, c);
+        AV_WN4PA(((pixel4*)(src+i*stride))+3, d);
+    }
+}
+
+static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    for(i=0; i<16; i++){
+        const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
+
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+2, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+3, a);
+    }
+}
+
+#define PREDICT_16x16_DC(v)\
+    for(i=0; i<16; i++){\
+        AV_WN4PA(src+ 0, v);\
+        AV_WN4PA(src+ 4, v);\
+        AV_WN4PA(src+ 8, v);\
+        AV_WN4PA(src+12, v);\
+        src += stride;\
+    }
+
+static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i, dc=0;
+    pixel *src = (pixel*)_src;
+    pixel4 dcsplat;
+    stride >>= sizeof(pixel)-1;
+
+    for(i=0;i<16; i++){
+        dc+= src[-1+i*stride];
+    }
+
+    for(i=0;i<16; i++){
+        dc+= src[i-stride];
+    }
+
+    dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
+    PREDICT_16x16_DC(dcsplat);
+}
+
+static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i, dc=0;
+    pixel *src = (pixel*)_src;
+    pixel4 dcsplat;
+    stride >>= sizeof(pixel)-1;
+
+    for(i=0;i<16; i++){
+        dc+= src[-1+i*stride];
+    }
+
+    dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
+    PREDICT_16x16_DC(dcsplat);
+}
+
+static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i, dc=0;
+    pixel *src = (pixel*)_src;
+    pixel4 dcsplat;
+    stride >>= sizeof(pixel)-1;
+
+    for(i=0;i<16; i++){
+        dc+= src[i-stride];
+    }
+
+    dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
+    PREDICT_16x16_DC(dcsplat);
+}
+
+#define PRED16x16_X(n, v) \
+static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
+{\
+    int i;\
+    pixel *src = (pixel*)_src;\
+    stride >>= sizeof(pixel)-1;\
+    PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
+}
+
+PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)
+#if BIT_DEPTH == 8
+PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)
+PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)
+#endif
+
+static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src,
+                                                 ptrdiff_t _stride,
+                                                 const int svq3,
+                                                 const int rv40)
+{
+  int i, j, k;
+  int a;
+  INIT_CLIP
+  pixel *src = (pixel*)_src;
+  int stride = _stride>>(sizeof(pixel)-1);
+  const pixel * const src0 = src +7-stride;
+  const pixel *       src1 = src +8*stride-1;
+  const pixel *       src2 = src1-2*stride;    // == src+6*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+  for(k=2; k<=8; ++k) {
+    src1 += stride; src2 -= stride;
+    H += k*(src0[k] - src0[-k]);
+    V += k*(src1[0] - src2[ 0]);
+  }
+  if(svq3){
+    H = ( 5*(H/4) ) / 16;
+    V = ( 5*(V/4) ) / 16;
+
+    /* required for 100% accuracy */
+    i = H; H = V; V = i;
+  }else if(rv40){
+    H = ( H + (H>>2) ) >> 4;
+    V = ( V + (V>>2) ) >> 4;
+  }else{
+    H = ( 5*H+32 ) >> 6;
+    V = ( 5*V+32 ) >> 6;
+  }
+
+  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
+  for(j=16; j>0; --j) {
+    int b = a;
+    a += V;
+    for(i=-16; i<0; i+=4) {
+      src[16+i] = CLIP((b    ) >> 5);
+      src[17+i] = CLIP((b+  H) >> 5);
+      src[18+i] = CLIP((b+2*H) >> 5);
+      src[19+i] = CLIP((b+3*H) >> 5);
+      b += 4*H;
+    }
+    src += stride;
+  }
+}
+
+static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
+}
+
+static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
+    const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
+
+    for(i=0; i<8; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
+    }
+}
+
+static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
+    const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
+
+    for(i=0; i<16; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
+    }
+}
+
+static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    for(i=0; i<8; i++){
+        const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
+    }
+}
+
+static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+    for(i=0; i<16; i++){
+        const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
+    }
+}
+
+#define PRED8x8_X(n, v)\
+static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
+{\
+    int i;\
+    const pixel4 a = PIXEL_SPLAT_X4(v);\
+    pixel *src = (pixel*)_src;\
+    stride >>= sizeof(pixel)-1;\
+    for(i=0; i<8; i++){\
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
+    }\
+}
+
+PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)
+#if BIT_DEPTH == 8
+PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)
+PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)
+#endif
+
+static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    FUNCC(pred8x8_128_dc)(_src, stride);
+    FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
+}
+
+static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    int dc0, dc2;
+    pixel4 dc0splat, dc2splat;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    dc0=dc2=0;
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride];
+        dc2+= src[-1+(i+4)*stride];
+    }
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
+    dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
+
+    for(i=0; i<4; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat);
+    }
+    for(i=4; i<8; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat);
+    }
+}
+
+static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    FUNCC(pred8x8_left_dc)(_src, stride);
+    FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
+}
+
+static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    int dc0, dc1;
+    pixel4 dc0splat, dc1splat;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    dc0=dc1=0;
+    for(i=0;i<4; i++){
+        dc0+= src[i-stride];
+        dc1+= src[4+i-stride];
+    }
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
+    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
+
+    for(i=0; i<4; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
+    }
+    for(i=4; i<8; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
+    }
+}
+
+static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    int dc0, dc1;
+    pixel4 dc0splat, dc1splat;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    dc0=dc1=0;
+    for(i=0;i<4; i++){
+        dc0+= src[i-stride];
+        dc1+= src[4+i-stride];
+    }
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
+    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
+
+    for(i=0; i<16; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
+    }
+}
+
+static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    int dc0, dc1, dc2;
+    pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    dc0=dc1=dc2=0;
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride] + src[i-stride];
+        dc1+= src[4+i-stride];
+        dc2+= src[-1+(i+4)*stride];
+    }
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
+    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
+    dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
+    dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
+
+    for(i=0; i<4; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
+    }
+    for(i=4; i<8; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
+    }
+}
+
+static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    int dc0, dc1, dc2, dc3, dc4;
+    pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    dc0=dc1=dc2=dc3=dc4=0;
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride] + src[i-stride];
+        dc1+= src[4+i-stride];
+        dc2+= src[-1+(i+4)*stride];
+        dc3+= src[-1+(i+8)*stride];
+        dc4+= src[-1+(i+12)*stride];
+    }
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
+    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
+    dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
+    dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
+    dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
+    dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
+    dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
+    dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
+
+    for(i=0; i<4; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
+    }
+    for(i=4; i<8; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
+    }
+    for(i=8; i<12; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
+    }
+    for(i=12; i<16; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
+    }
+}
+
+//the following 4 function should not be optimized!
+static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x8_top_dc)(src, stride);
+    FUNCC(pred4x4_dc)(src, NULL, stride);
+}
+
+static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x16_top_dc)(src, stride);
+    FUNCC(pred4x4_dc)(src, NULL, stride);
+}
+
+static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x8_dc)(src, stride);
+    FUNCC(pred4x4_top_dc)(src, NULL, stride);
+}
+
+static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x16_dc)(src, stride);
+    FUNCC(pred4x4_top_dc)(src, NULL, stride);
+}
+
+static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x8_left_dc)(src, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*stride                  , NULL, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
+}
+
+static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x16_left_dc)(src, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*stride                  , NULL, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
+}
+
+static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x8_left_dc)(src, stride);
+    FUNCC(pred4x4_128_dc)(src                  , NULL, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
+}
+
+static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x16_left_dc)(src, stride);
+    FUNCC(pred4x4_128_dc)(src                  , NULL, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
+}
+
+static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride)
+{
+  int j, k;
+  int a;
+  INIT_CLIP
+  pixel *src = (pixel*)_src;
+  int stride = _stride>>(sizeof(pixel)-1);
+  const pixel * const src0 = src +3-stride;
+  const pixel *       src1 = src +4*stride-1;
+  const pixel *       src2 = src1-2*stride;    // == src+2*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+  for(k=2; k<=4; ++k) {
+    src1 += stride; src2 -= stride;
+    H += k*(src0[k] - src0[-k]);
+    V += k*(src1[0] - src2[ 0]);
+  }
+  H = ( 17*H+16 ) >> 5;
+  V = ( 17*V+16 ) >> 5;
+
+  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
+  for(j=8; j>0; --j) {
+    int b = a;
+    a += V;
+    src[0] = CLIP((b    ) >> 5);
+    src[1] = CLIP((b+  H) >> 5);
+    src[2] = CLIP((b+2*H) >> 5);
+    src[3] = CLIP((b+3*H) >> 5);
+    src[4] = CLIP((b+4*H) >> 5);
+    src[5] = CLIP((b+5*H) >> 5);
+    src[6] = CLIP((b+6*H) >> 5);
+    src[7] = CLIP((b+7*H) >> 5);
+    src += stride;
+  }
+}
+
+static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride)
+{
+  int j, k;
+  int a;
+  INIT_CLIP
+  pixel *src = (pixel*)_src;
+  int stride = _stride>>(sizeof(pixel)-1);
+  const pixel * const src0 = src +3-stride;
+  const pixel *       src1 = src +8*stride-1;
+  const pixel *       src2 = src1-2*stride;    // == src+6*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+
+  for (k = 2; k <= 4; ++k) {
+      src1 += stride; src2 -= stride;
+      H += k*(src0[k] - src0[-k]);
+      V += k*(src1[0] - src2[ 0]);
+  }
+  for (; k <= 8; ++k) {
+      src1 += stride; src2 -= stride;
+      V += k*(src1[0] - src2[0]);
+  }
+
+  H = (17*H+16) >> 5;
+  V = (5*V+32) >> 6;
+
+  a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
+  for(j=16; j>0; --j) {
+    int b = a;
+    a += V;
+    src[0] = CLIP((b    ) >> 5);
+    src[1] = CLIP((b+  H) >> 5);
+    src[2] = CLIP((b+2*H) >> 5);
+    src[3] = CLIP((b+3*H) >> 5);
+    src[4] = CLIP((b+4*H) >> 5);
+    src[5] = CLIP((b+5*H) >> 5);
+    src[6] = CLIP((b+6*H) >> 5);
+    src[7] = CLIP((b+7*H) >> 5);
+    src += stride;
+  }
+}
+
+#define SRC(x,y) src[(x)+(y)*stride]
+#define PL(y) \
+    const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_LEFT \
+    const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
+                     + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
+    PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
+    const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
+
+#define PT(x) \
+    const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOP \
+    const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
+                     + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
+    PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
+    const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
+                     + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
+
+#define PTR(x) \
+    t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOPRIGHT \
+    int t8, t9, t10, t11, t12, t13, t14, t15; \
+    if(has_topright) { \
+        PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
+        t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
+    } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
+
+#define PREDICT_8x8_LOAD_TOPLEFT \
+    const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
+
+#define PREDICT_8x8_DC(v) \
+    int y; \
+    for( y = 0; y < 8; y++ ) { \
+        AV_WN4PA(((pixel4*)src)+0, v); \
+        AV_WN4PA(((pixel4*)src)+1, v); \
+        src += stride; \
+    }
+
+static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft,
+                                   int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+
+    PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
+}
+static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft,
+                                    int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+
+    PREDICT_8x8_LOAD_LEFT;
+    const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
+    PREDICT_8x8_DC(dc);
+}
+static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft,
+                                   int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+
+    PREDICT_8x8_LOAD_TOP;
+    const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
+    PREDICT_8x8_DC(dc);
+}
+static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft,
+                               int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOP;
+    const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
+                                     +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
+    PREDICT_8x8_DC(dc);
+}
+static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft,
+                                       int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    pixel4 a;
+
+    PREDICT_8x8_LOAD_LEFT;
+#define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
+               AV_WN4PA(src+y*stride, a); \
+               AV_WN4PA(src+y*stride+4, a);
+    ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
+#undef ROW
+}
+static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft,
+                                     int has_topright, ptrdiff_t _stride)
+{
+    int y;
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    pixel4 a, b;
+
+    PREDICT_8x8_LOAD_TOP;
+    src[0] = t0;
+    src[1] = t1;
+    src[2] = t2;
+    src[3] = t3;
+    src[4] = t4;
+    src[5] = t5;
+    src[6] = t6;
+    src[7] = t7;
+    a = AV_RN4PA(((pixel4*)src)+0);
+    b = AV_RN4PA(((pixel4*)src)+1);
+    for( y = 1; y < 8; y++ ) {
+        AV_WN4PA(((pixel4*)(src+y*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
+    }
+}
+static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft,
+                                      int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_TOPRIGHT;
+    SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
+    SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
+    SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
+    SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
+    SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
+    SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
+    SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
+    SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
+    SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
+}
+static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft,
+                                       int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
+    SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+    SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
+    SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+    SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
+    SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+    SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
+    SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
+    SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+}
+static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft,
+                                           int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
+    SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+    SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
+    SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+    SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
+    SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+    SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
+    SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
+    SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
+    SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
+    SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
+    SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
+    SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
+    SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
+    SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(7,0)= (t6 + t7 + 1) >> 1;
+}
+static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft,
+                                            int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,7)= (l6 + l7 + 1) >> 1;
+    SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
+    SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
+    SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
+    SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
+    SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
+    SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
+    SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
+    SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
+    SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
+    SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
+    SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
+    SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
+    SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
+    SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
+    SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
+    SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
+    SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
+    SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
+    SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
+    SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
+}
+static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft,
+                                          int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_TOPRIGHT;
+    SRC(0,0)= (t0 + t1 + 1) >> 1;
+    SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
+    SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
+    SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
+    SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
+    SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
+    SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
+    SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
+    SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
+    SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
+    SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
+    SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
+    SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
+    SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
+    SRC(7,6)= (t10 + t11 + 1) >> 1;
+    SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
+}
+static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
+                                          int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_LEFT;
+    SRC(0,0)= (l0 + l1 + 1) >> 1;
+    SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
+    SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
+    SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
+    SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
+    SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
+    SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
+    SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
+    SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
+    SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
+    SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
+    SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
+    SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
+    SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
+    SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
+    SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
+    SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
+    SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
+}
+
+static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
+                                     int has_topright, ptrdiff_t _stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    const dctcoef *block = (const dctcoef*)_block;
+    pixel pix[8];
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_TOP;
+
+    pix[0] = t0;
+    pix[1] = t1;
+    pix[2] = t2;
+    pix[3] = t3;
+    pix[4] = t4;
+    pix[5] = t5;
+    pix[6] = t6;
+    pix[7] = t7;
+
+    for(i=0; i<8; i++){
+        pixel v = pix[i];
+        src[0*stride]= v += block[0];
+        src[1*stride]= v += block[8];
+        src[2*stride]= v += block[16];
+        src[3*stride]= v += block[24];
+        src[4*stride]= v += block[32];
+        src[5*stride]= v += block[40];
+        src[6*stride]= v += block[48];
+        src[7*stride]= v +  block[56];
+        src++;
+        block++;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 64);
+}
+
+static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
+                               int has_topright, ptrdiff_t _stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    const dctcoef *block = (const dctcoef*)_block;
+    pixel pix[8];
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_LEFT;
+
+    pix[0] = l0;
+    pix[1] = l1;
+    pix[2] = l2;
+    pix[3] = l3;
+    pix[4] = l4;
+    pix[5] = l5;
+    pix[6] = l6;
+    pix[7] = l7;
+
+    for(i=0; i<8; i++){
+        pixel v = pix[i];
+        src[0]= v += block[0];
+        src[1]= v += block[1];
+        src[2]= v += block[2];
+        src[3]= v += block[3];
+        src[4]= v += block[4];
+        src[5]= v += block[5];
+        src[6]= v += block[6];
+        src[7]= v +  block[7];
+        src+= stride;
+        block+= 8;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 64);
+}
+
+#undef PREDICT_8x8_LOAD_LEFT
+#undef PREDICT_8x8_LOAD_TOP
+#undef PREDICT_8x8_LOAD_TOPLEFT
+#undef PREDICT_8x8_LOAD_TOPRIGHT
+#undef PREDICT_8x8_DC
+#undef PTR
+#undef PT
+#undef PL
+#undef SRC
+
+static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, int16_t *_block,
+                                        ptrdiff_t stride)
+{
+    int i;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
+    stride >>= sizeof(pixel)-1;
+    pix -= stride;
+    for(i=0; i<4; i++){
+        pixel v = pix[0];
+        pix[1*stride]= v += block[0];
+        pix[2*stride]= v += block[4];
+        pix[3*stride]= v += block[8];
+        pix[4*stride]= v +  block[12];
+        pix++;
+        block++;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 16);
+}
+
+static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, int16_t *_block,
+                                          ptrdiff_t stride)
+{
+    int i;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
+    stride >>= sizeof(pixel)-1;
+    for(i=0; i<4; i++){
+        pixel v = pix[-1];
+        pix[0]= v += block[0];
+        pix[1]= v += block[1];
+        pix[2]= v += block[2];
+        pix[3]= v +  block[3];
+        pix+= stride;
+        block+= 4;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 16);
+}
+
+static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, int16_t *_block,
+                                         ptrdiff_t stride)
+{
+    int i;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
+    stride >>= sizeof(pixel)-1;
+    pix -= stride;
+    for(i=0; i<8; i++){
+        pixel v = pix[0];
+        pix[1*stride]= v += block[0];
+        pix[2*stride]= v += block[8];
+        pix[3*stride]= v += block[16];
+        pix[4*stride]= v += block[24];
+        pix[5*stride]= v += block[32];
+        pix[6*stride]= v += block[40];
+        pix[7*stride]= v += block[48];
+        pix[8*stride]= v +  block[56];
+        pix++;
+        block++;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 64);
+}
+
+static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, int16_t *_block,
+                                           ptrdiff_t stride)
+{
+    int i;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
+    stride >>= sizeof(pixel)-1;
+    for(i=0; i<8; i++){
+        pixel v = pix[-1];
+        pix[0]= v += block[0];
+        pix[1]= v += block[1];
+        pix[2]= v += block[2];
+        pix[3]= v += block[3];
+        pix[4]= v += block[4];
+        pix[5]= v += block[5];
+        pix[6]= v += block[6];
+        pix[7]= v +  block[7];
+        pix+= stride;
+        block+= 8;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 64);
+}
+
+static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,
+                                          int16_t *block,
+                                          ptrdiff_t stride)
+{
+    int i;
+    for(i=0; i<16; i++)
+        FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+}
+
+static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix,
+                                            const int *block_offset,
+                                            int16_t *block,
+                                            ptrdiff_t stride)
+{
+    int i;
+    for(i=0; i<16; i++)
+        FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+}
+
+static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,
+                                        int16_t *block, ptrdiff_t stride)
+{
+    int i;
+    for(i=0; i<4; i++)
+        FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+}
+
+static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
+                                         int16_t *block, ptrdiff_t stride)
+{
+    int i;
+    for(i=0; i<4; i++)
+        FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+    for(i=4; i<8; i++)
+        FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
+}
+
+static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,
+                                          int16_t *block,
+                                          ptrdiff_t stride)
+{
+    int i;
+    for(i=0; i<4; i++)
+        FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+}
+
+static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix,
+                                           const int *block_offset,
+                                           int16_t *block, ptrdiff_t stride)
+{
+    int i;
+    for(i=0; i<4; i++)
+        FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+    for(i=4; i<8; i++)
+        FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
+}
diff --git a/media/ffvpx/libavcodec/hpeldsp.h b/media/ffvpx/libavcodec/hpeldsp.h
new file mode 100644
index 0000000000..45e81b10a5
--- /dev/null
+++ b/media/ffvpx/libavcodec/hpeldsp.h
@@ -0,0 +1,107 @@
+/*
+ * Half-pel DSP functions.
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Half-pel DSP functions.
+ */
+
+#ifndef AVCODEC_HPELDSP_H
+#define AVCODEC_HPELDSP_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* add and put pixel (decoding) */
+// blocksizes for hpel_pixels_func are 8x4,8x8 16x8 16x16
+// h for hpel_pixels_func is limited to {width/2, width} but never larger
+// than 16 and never smaller than 4
+typedef void (*op_pixels_func)(uint8_t *block /*align width (8 or 16)*/,
+                               const uint8_t *pixels /*align 1*/,
+                               ptrdiff_t line_size, int h);
+
+/**
+ * Half-pel DSP context.
+ */
+typedef struct HpelDSPContext {
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * this is an array[4][4] of motion compensation functions for 4
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func put_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * This is an array[4][4] of motion compensation functions for 4
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b+1)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[4][4] of motion compensation functions for 2
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     * @note The size is kept at [4][4] to match the above pixel_tabs and avoid
+     *       out of bounds reads in the motion estimation code.
+     */
+    op_pixels_func put_no_rnd_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[4] of motion compensation functions for 1
+     * horizontal blocksize (16) and the 4 halfpel positions<br>
+     * *pixels_tab[0][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_no_rnd_pixels_tab[4];
+} HpelDSPContext;
+
+void ff_hpeldsp_init(HpelDSPContext *c, int flags);
+
+void ff_hpeldsp_init_aarch64(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_mips(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_loongarch(HpelDSPContext *c, int flags);
+
+#endif /* AVCODEC_HPELDSP_H */
diff --git a/media/ffvpx/libavcodec/hwaccel.h b/media/ffvpx/libavcodec/hwaccel.h
new file mode 100644
index 0000000000..3aaa92571c
--- /dev/null
+++ b/media/ffvpx/libavcodec/hwaccel.h
@@ -0,0 +1,84 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HWACCEL_H
+#define AVCODEC_HWACCEL_H
+
+#include "avcodec.h"
+#include "hwaccels.h"
+
+
+#define HWACCEL_CAP_ASYNC_SAFE      (1 << 0)
+
+
+typedef struct AVCodecHWConfigInternal {
+    /**
+     * This is the structure which will be returned to the user by
+     * avcodec_get_hw_config().
+     */
+    AVCodecHWConfig public;
+    /**
+     * If this configuration uses a hwaccel, a pointer to it.
+     * If not, NULL.
+     */
+    const AVHWAccel *hwaccel;
+} AVCodecHWConfigInternal;
+
+
+// These macros are used to simplify AVCodecHWConfigInternal definitions.
+
+#define HW_CONFIG_HWACCEL(device, frames, ad_hoc, format, device_type_, name) \
+    &(const AVCodecHWConfigInternal) { \
+        .public          = { \
+            .pix_fmt     = AV_PIX_FMT_ ## format, \
+            .methods     = (device ? AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX : 0) | \
+                           (frames ? AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX : 0) | \
+                           (ad_hoc ? AV_CODEC_HW_CONFIG_METHOD_AD_HOC        : 0),  \
+            .device_type = AV_HWDEVICE_TYPE_ ## device_type_, \
+        }, \
+        .hwaccel         = &name, \
+    }
+
+#define HW_CONFIG_INTERNAL(format) \
+    &(const AVCodecHWConfigInternal) { \
+        .public          = { \
+            .pix_fmt     = AV_PIX_FMT_ ## format, \
+            .methods     = AV_CODEC_HW_CONFIG_METHOD_INTERNAL, \
+            .device_type = AV_HWDEVICE_TYPE_NONE, \
+        }, \
+        .hwaccel         = NULL, \
+    }
+
+#define HWACCEL_DXVA2(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 1, DXVA2_VLD,    DXVA2,        ff_ ## codec ## _dxva2_hwaccel)
+#define HWACCEL_D3D11VA2(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 0, D3D11,        D3D11VA,      ff_ ## codec ## _d3d11va2_hwaccel)
+#define HWACCEL_NVDEC(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 0, CUDA,         CUDA,         ff_ ## codec ## _nvdec_hwaccel)
+#define HWACCEL_VAAPI(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 1, VAAPI,        VAAPI,        ff_ ## codec ## _vaapi_hwaccel)
+#define HWACCEL_VDPAU(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 1, VDPAU,        VDPAU,        ff_ ## codec ## _vdpau_hwaccel)
+#define HWACCEL_VIDEOTOOLBOX(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 1, VIDEOTOOLBOX, VIDEOTOOLBOX, ff_ ## codec ## _videotoolbox_hwaccel)
+#define HWACCEL_D3D11VA(codec) \
+    HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD,  NONE,         ff_ ## codec ## _d3d11va_hwaccel)
+#define HWACCEL_XVMC(codec) \
+    HW_CONFIG_HWACCEL(0, 0, 1, XVMC,         NONE,         ff_ ## codec ## _xvmc_hwaccel)
+
+#endif /* AVCODEC_HWACCEL_H */
diff --git a/media/ffvpx/libavcodec/hwaccels.h b/media/ffvpx/libavcodec/hwaccels.h
new file mode 100644
index 0000000000..aca55831f3
--- /dev/null
+++ b/media/ffvpx/libavcodec/hwaccels.h
@@ -0,0 +1,85 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HWACCELS_H
+#define AVCODEC_HWACCELS_H
+
+#include "avcodec.h"
+
+extern const AVHWAccel ff_av1_d3d11va_hwaccel;
+extern const AVHWAccel ff_av1_d3d11va2_hwaccel;
+extern const AVHWAccel ff_av1_dxva2_hwaccel;
+extern const AVHWAccel ff_av1_nvdec_hwaccel;
+extern const AVHWAccel ff_av1_vaapi_hwaccel;
+extern const AVHWAccel ff_av1_vdpau_hwaccel;
+extern const AVHWAccel ff_h263_vaapi_hwaccel;
+extern const AVHWAccel ff_h263_videotoolbox_hwaccel;
+extern const AVHWAccel ff_h264_d3d11va_hwaccel;
+extern const AVHWAccel ff_h264_d3d11va2_hwaccel;
+extern const AVHWAccel ff_h264_dxva2_hwaccel;
+extern const AVHWAccel ff_h264_nvdec_hwaccel;
+extern const AVHWAccel ff_h264_vaapi_hwaccel;
+extern const AVHWAccel ff_h264_vdpau_hwaccel;
+extern const AVHWAccel ff_h264_videotoolbox_hwaccel;
+extern const AVHWAccel ff_hevc_d3d11va_hwaccel;
+extern const AVHWAccel ff_hevc_d3d11va2_hwaccel;
+extern const AVHWAccel ff_hevc_dxva2_hwaccel;
+extern const AVHWAccel ff_hevc_nvdec_hwaccel;
+extern const AVHWAccel ff_hevc_vaapi_hwaccel;
+extern const AVHWAccel ff_hevc_vdpau_hwaccel;
+extern const AVHWAccel ff_hevc_videotoolbox_hwaccel;
+extern const AVHWAccel ff_mjpeg_nvdec_hwaccel;
+extern const AVHWAccel ff_mjpeg_vaapi_hwaccel;
+extern const AVHWAccel ff_mpeg1_nvdec_hwaccel;
+extern const AVHWAccel ff_mpeg1_vdpau_hwaccel;
+extern const AVHWAccel ff_mpeg1_videotoolbox_hwaccel;
+extern const AVHWAccel ff_mpeg2_d3d11va_hwaccel;
+extern const AVHWAccel ff_mpeg2_d3d11va2_hwaccel;
+extern const AVHWAccel ff_mpeg2_nvdec_hwaccel;
+extern const AVHWAccel ff_mpeg2_dxva2_hwaccel;
+extern const AVHWAccel ff_mpeg2_vaapi_hwaccel;
+extern const AVHWAccel ff_mpeg2_vdpau_hwaccel;
+extern const AVHWAccel ff_mpeg2_videotoolbox_hwaccel;
+extern const AVHWAccel ff_mpeg4_nvdec_hwaccel;
+extern const AVHWAccel ff_mpeg4_vaapi_hwaccel;
+extern const AVHWAccel ff_mpeg4_vdpau_hwaccel;
+extern const AVHWAccel ff_mpeg4_videotoolbox_hwaccel;
+extern const AVHWAccel ff_prores_videotoolbox_hwaccel;
+extern const AVHWAccel ff_vc1_d3d11va_hwaccel;
+extern const AVHWAccel ff_vc1_d3d11va2_hwaccel;
+extern const AVHWAccel ff_vc1_dxva2_hwaccel;
+extern const AVHWAccel ff_vc1_nvdec_hwaccel;
+extern const AVHWAccel ff_vc1_vaapi_hwaccel;
+extern const AVHWAccel ff_vc1_vdpau_hwaccel;
+extern const AVHWAccel ff_vp8_nvdec_hwaccel;
+extern const AVHWAccel ff_vp8_vaapi_hwaccel;
+extern const AVHWAccel ff_vp9_d3d11va_hwaccel;
+extern const AVHWAccel ff_vp9_d3d11va2_hwaccel;
+extern const AVHWAccel ff_vp9_dxva2_hwaccel;
+extern const AVHWAccel ff_vp9_nvdec_hwaccel;
+extern const AVHWAccel ff_vp9_vaapi_hwaccel;
+extern const AVHWAccel ff_vp9_vdpau_hwaccel;
+extern const AVHWAccel ff_vp9_videotoolbox_hwaccel;
+extern const AVHWAccel ff_wmv3_d3d11va_hwaccel;
+extern const AVHWAccel ff_wmv3_d3d11va2_hwaccel;
+extern const AVHWAccel ff_wmv3_dxva2_hwaccel;
+extern const AVHWAccel ff_wmv3_nvdec_hwaccel;
+extern const AVHWAccel ff_wmv3_vaapi_hwaccel;
+extern const AVHWAccel ff_wmv3_vdpau_hwaccel;
+
+#endif /* AVCODEC_HWACCELS_H */
diff --git a/media/ffvpx/libavcodec/hwconfig.h b/media/ffvpx/libavcodec/hwconfig.h
new file mode 100644
index 0000000000..721424912c
--- /dev/null
+++ b/media/ffvpx/libavcodec/hwconfig.h
@@ -0,0 +1,100 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HWCONFIG_H
+#define AVCODEC_HWCONFIG_H
+
+#include "avcodec.h"
+#include "hwaccels.h"
+
+
+#define HWACCEL_CAP_ASYNC_SAFE      (1 << 0)
+
+
+typedef struct AVCodecHWConfigInternal {
+    /**
+     * This is the structure which will be returned to the user by
+     * avcodec_get_hw_config().
+     */
+    AVCodecHWConfig public;
+    /**
+     * If this configuration uses a hwaccel, a pointer to it.
+     * If not, NULL.
+     */
+    const AVHWAccel *hwaccel;
+} AVCodecHWConfigInternal;
+
+
+// These macros are used to simplify AVCodecHWConfigInternal definitions.
+
+#define HW_CONFIG_HWACCEL(device, frames, ad_hoc, format, device_type_, name) \
+    &(const AVCodecHWConfigInternal) { \
+        .public          = { \
+            .pix_fmt     = AV_PIX_FMT_ ## format, \
+            .methods     = (device ? AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX : 0) | \
+                           (frames ? AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX : 0) | \
+                           (ad_hoc ? AV_CODEC_HW_CONFIG_METHOD_AD_HOC        : 0),  \
+            .device_type = AV_HWDEVICE_TYPE_ ## device_type_, \
+        }, \
+        .hwaccel         = &name, \
+    }
+
+#define HW_CONFIG_INTERNAL(format) \
+    &(const AVCodecHWConfigInternal) { \
+        .public          = { \
+            .pix_fmt     = AV_PIX_FMT_ ## format, \
+            .methods     = AV_CODEC_HW_CONFIG_METHOD_INTERNAL, \
+            .device_type = AV_HWDEVICE_TYPE_NONE, \
+        }, \
+        .hwaccel         = NULL, \
+    }
+
+#define HWACCEL_DXVA2(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 1, DXVA2_VLD,    DXVA2,        ff_ ## codec ## _dxva2_hwaccel)
+#define HWACCEL_D3D11VA2(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 0, D3D11,        D3D11VA,      ff_ ## codec ## _d3d11va2_hwaccel)
+#define HWACCEL_NVDEC(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 0, CUDA,         CUDA,         ff_ ## codec ## _nvdec_hwaccel)
+#define HWACCEL_VAAPI(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 1, VAAPI,        VAAPI,        ff_ ## codec ## _vaapi_hwaccel)
+#define HWACCEL_VDPAU(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 1, VDPAU,        VDPAU,        ff_ ## codec ## _vdpau_hwaccel)
+#define HWACCEL_VIDEOTOOLBOX(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 1, VIDEOTOOLBOX, VIDEOTOOLBOX, ff_ ## codec ## _videotoolbox_hwaccel)
+#define HWACCEL_D3D11VA(codec) \
+    HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD,  NONE,         ff_ ## codec ## _d3d11va_hwaccel)
+
+#define HW_CONFIG_ENCODER(device, frames, ad_hoc, format, device_type_) \
+    &(const AVCodecHWConfigInternal) { \
+        .public          = { \
+            .pix_fmt     = AV_PIX_FMT_ ## format, \
+            .methods     = (device ? AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX : 0) | \
+                           (frames ? AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX : 0) | \
+                           (ad_hoc ? AV_CODEC_HW_CONFIG_METHOD_AD_HOC        : 0),  \
+            .device_type = AV_HWDEVICE_TYPE_ ## device_type_, \
+        }, \
+        .hwaccel         = NULL, \
+    }
+
+#define HW_CONFIG_ENCODER_DEVICE(format, device_type_) \
+    HW_CONFIG_ENCODER(1, 0, 0, format, device_type_)
+
+#define HW_CONFIG_ENCODER_FRAMES(format, device_type_) \
+    HW_CONFIG_ENCODER(0, 1, 0, format, device_type_)
+
+#endif /* AVCODEC_HWCONFIG_H */
diff --git a/media/ffvpx/libavcodec/idctdsp.c b/media/ffvpx/libavcodec/idctdsp.c
new file mode 100644
index 0000000000..7216afb094
--- /dev/null
+++ b/media/ffvpx/libavcodec/idctdsp.c
@@ -0,0 +1,315 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "config_components.h"
+#include "libavutil/attributes.h"
+#include "libavutil/common.h"
+#include "avcodec.h"
+#include "dct.h"
+#include "faanidct.h"
+#include "idctdsp.h"
+#include "simple_idct.h"
+#include "xvididct.h"
+
+av_cold void ff_permute_scantable(uint8_t dst[64], const uint8_t src[64],
+                                  const uint8_t permutation[64])
+{
+    for (int i = 0; i < 64; i++) {
+        int j = src[i];
+        dst[i] = permutation[j];
+    }
+}
+
+av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
+                                           enum idct_permutation_type perm_type)
+{
+    int i;
+
+#if ARCH_X86
+    if (ff_init_scantable_permutation_x86(idct_permutation,
+                                          perm_type))
+        return;
+#endif
+
+    switch (perm_type) {
+    case FF_IDCT_PERM_NONE:
+        for (i = 0; i < 64; i++)
+            idct_permutation[i] = i;
+        break;
+    case FF_IDCT_PERM_LIBMPEG2:
+        for (i = 0; i < 64; i++)
+            idct_permutation[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
+        break;
+    case FF_IDCT_PERM_TRANSPOSE:
+        for (i = 0; i < 64; i++)
+            idct_permutation[i] = ((i & 7) << 3) | (i >> 3);
+        break;
+    case FF_IDCT_PERM_PARTTRANS:
+        for (i = 0; i < 64; i++)
+            idct_permutation[i] = (i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3);
+        break;
+    default:
+        av_log(NULL, AV_LOG_ERROR,
+               "Internal error, IDCT permutation not set\n");
+    }
+}
+
+void ff_put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
+                             ptrdiff_t line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for (i = 0; i < 8; i++) {
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
+        pixels[2] = av_clip_uint8(block[2]);
+        pixels[3] = av_clip_uint8(block[3]);
+        pixels[4] = av_clip_uint8(block[4]);
+        pixels[5] = av_clip_uint8(block[5]);
+        pixels[6] = av_clip_uint8(block[6]);
+        pixels[7] = av_clip_uint8(block[7]);
+
+        pixels += line_size;
+        block  += 8;
+    }
+}
+
+static void put_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
+                                 int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
+        pixels[2] = av_clip_uint8(block[2]);
+        pixels[3] = av_clip_uint8(block[3]);
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void put_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
+                                 int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void put_signed_pixels_clamped_c(const int16_t *block,
+                                        uint8_t *av_restrict pixels,
+                                        ptrdiff_t line_size)
+{
+    int i, j;
+
+    for (i = 0; i < 8; i++) {
+        for (j = 0; j < 8; j++) {
+            if (*block < -128)
+                *pixels = 0;
+            else if (*block > 127)
+                *pixels = 255;
+            else
+                *pixels = (uint8_t) (*block + 128);
+            block++;
+            pixels++;
+        }
+        pixels += (line_size - 8);
+    }
+}
+
+void ff_add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
+                             ptrdiff_t line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for (i = 0; i < 8; i++) {
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
+        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
+        pixels[4] = av_clip_uint8(pixels[4] + block[4]);
+        pixels[5] = av_clip_uint8(pixels[5] + block[5]);
+        pixels[6] = av_clip_uint8(pixels[6] + block[6]);
+        pixels[7] = av_clip_uint8(pixels[7] + block[7]);
+        pixels   += line_size;
+        block    += 8;
+    }
+}
+
+static void add_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
+                          int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
+        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void add_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
+                          int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void ff_jref_idct4_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    ff_j_rev_dct4 (block);
+    put_pixels_clamped4_c(block, dest, line_size);
+}
+static void ff_jref_idct4_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    ff_j_rev_dct4 (block);
+    add_pixels_clamped4_c(block, dest, line_size);
+}
+
+static void ff_jref_idct2_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    ff_j_rev_dct2 (block);
+    put_pixels_clamped2_c(block, dest, line_size);
+}
+static void ff_jref_idct2_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    ff_j_rev_dct2 (block);
+    add_pixels_clamped2_c(block, dest, line_size);
+}
+
+static void ff_jref_idct1_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    dest[0] = av_clip_uint8((block[0] + 4)>>3);
+}
+static void ff_jref_idct1_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
+}
+
+av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
+{
+    av_unused const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
+
+    if (avctx->lowres==1) {
+        c->idct_put  = ff_jref_idct4_put;
+        c->idct_add  = ff_jref_idct4_add;
+        c->idct      = ff_j_rev_dct4;
+        c->perm_type = FF_IDCT_PERM_NONE;
+    } else if (avctx->lowres==2) {
+        c->idct_put  = ff_jref_idct2_put;
+        c->idct_add  = ff_jref_idct2_add;
+        c->idct      = ff_j_rev_dct2;
+        c->perm_type = FF_IDCT_PERM_NONE;
+    } else if (avctx->lowres==3) {
+        c->idct_put  = ff_jref_idct1_put;
+        c->idct_add  = ff_jref_idct1_add;
+        c->idct      = ff_j_rev_dct1;
+        c->perm_type = FF_IDCT_PERM_NONE;
+    } else {
+        if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) {
+            /* 10-bit MPEG-4 Simple Studio Profile requires a higher precision IDCT
+               However, it only uses idct_put */
+            if (c->mpeg4_studio_profile) {
+                c->idct_put              = ff_simple_idct_put_int32_10bit;
+                c->idct_add              = NULL;
+                c->idct                  = NULL;
+            } else {
+                c->idct_put              = ff_simple_idct_put_int16_10bit;
+                c->idct_add              = ff_simple_idct_add_int16_10bit;
+                c->idct                  = ff_simple_idct_int16_10bit;
+            }
+            c->perm_type             = FF_IDCT_PERM_NONE;
+        } else if (avctx->bits_per_raw_sample == 12) {
+            c->idct_put              = ff_simple_idct_put_int16_12bit;
+            c->idct_add              = ff_simple_idct_add_int16_12bit;
+            c->idct                  = ff_simple_idct_int16_12bit;
+            c->perm_type             = FF_IDCT_PERM_NONE;
+        } else {
+            if (avctx->idct_algo == FF_IDCT_INT) {
+                c->idct_put  = ff_jref_idct_put;
+                c->idct_add  = ff_jref_idct_add;
+                c->idct      = ff_j_rev_dct;
+                c->perm_type = FF_IDCT_PERM_LIBMPEG2;
+#if CONFIG_FAANIDCT
+            } else if (avctx->idct_algo == FF_IDCT_FAAN) {
+                c->idct_put  = ff_faanidct_put;
+                c->idct_add  = ff_faanidct_add;
+                c->idct      = ff_faanidct;
+                c->perm_type = FF_IDCT_PERM_NONE;
+#endif /* CONFIG_FAANIDCT */
+            } else { // accurate/default
+                c->idct_put  = ff_simple_idct_put_int16_8bit;
+                c->idct_add  = ff_simple_idct_add_int16_8bit;
+                c->idct      = ff_simple_idct_int16_8bit;
+                c->perm_type = FF_IDCT_PERM_NONE;
+            }
+        }
+    }
+
+    c->put_pixels_clamped        = ff_put_pixels_clamped_c;
+    c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
+    c->add_pixels_clamped        = ff_add_pixels_clamped_c;
+
+    if (CONFIG_MPEG4_DECODER && avctx->idct_algo == FF_IDCT_XVID)
+        ff_xvid_idct_init(c, avctx);
+
+#if ARCH_AARCH64
+    ff_idctdsp_init_aarch64(c, avctx, high_bit_depth);
+#elif ARCH_ALPHA
+    ff_idctdsp_init_alpha(c, avctx, high_bit_depth);
+#elif ARCH_ARM
+    ff_idctdsp_init_arm(c, avctx, high_bit_depth);
+#elif ARCH_PPC
+    ff_idctdsp_init_ppc(c, avctx, high_bit_depth);
+#elif ARCH_RISCV
+    ff_idctdsp_init_riscv(c, avctx, high_bit_depth);
+#elif ARCH_X86
+    ff_idctdsp_init_x86(c, avctx, high_bit_depth);
+#elif ARCH_MIPS
+    ff_idctdsp_init_mips(c, avctx, high_bit_depth);
+#elif ARCH_LOONGARCH
+    ff_idctdsp_init_loongarch(c, avctx, high_bit_depth);
+#endif
+
+    ff_init_scantable_permutation(c->idct_permutation,
+                                  c->perm_type);
+}
diff --git a/media/ffvpx/libavcodec/idctdsp.h b/media/ffvpx/libavcodec/idctdsp.h
new file mode 100644
index 0000000000..7224463349
--- /dev/null
+++ b/media/ffvpx/libavcodec/idctdsp.h
@@ -0,0 +1,117 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_IDCTDSP_H
+#define AVCODEC_IDCTDSP_H
+
+#include <stdint.h>
+
+#include "config.h"
+
+#include "avcodec.h"
+
+enum idct_permutation_type {
+    FF_IDCT_PERM_NONE,
+    FF_IDCT_PERM_LIBMPEG2,
+    FF_IDCT_PERM_SIMPLE,
+    FF_IDCT_PERM_TRANSPOSE,
+    FF_IDCT_PERM_PARTTRANS,
+    FF_IDCT_PERM_SSE2,
+};
+
+void ff_permute_scantable(uint8_t dst[64], const uint8_t src[64],
+                          const uint8_t permutation[64]);
+void ff_init_scantable_permutation(uint8_t *idct_permutation,
+                                   enum idct_permutation_type perm_type);
+int ff_init_scantable_permutation_x86(uint8_t *idct_permutation,
+                                      enum idct_permutation_type perm_type);
+
+typedef struct IDCTDSPContext {
+    /* pixel ops : interface with DCT */
+    void (*put_pixels_clamped)(const int16_t *block /* align 16 */,
+                               uint8_t *av_restrict pixels /* align 8 */,
+                               ptrdiff_t line_size);
+    void (*put_signed_pixels_clamped)(const int16_t *block /* align 16 */,
+                                      uint8_t *av_restrict pixels /* align 8 */,
+                                      ptrdiff_t line_size);
+    void (*add_pixels_clamped)(const int16_t *block /* align 16 */,
+                               uint8_t *av_restrict pixels /* align 8 */,
+                               ptrdiff_t line_size);
+
+    void (*idct)(int16_t *block /* align 16 */);
+
+    /**
+     * block -> idct -> clip to unsigned 8 bit -> dest.
+     * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
+     * @param line_size size in bytes of a horizontal line of dest
+     */
+    void (*idct_put)(uint8_t *dest /* align 8 */,
+                     ptrdiff_t line_size, int16_t *block /* align 16 */);
+
+    /**
+     * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
+     * @param line_size size in bytes of a horizontal line of dest
+     */
+    void (*idct_add)(uint8_t *dest /* align 8 */,
+                     ptrdiff_t line_size, int16_t *block /* align 16 */);
+
+    /**
+     * IDCT input permutation.
+     * Several optimized IDCTs need a permutated input (relative to the
+     * normal order of the reference IDCT).
+     * This permutation must be performed before the idct_put/add.
+     * Note, normally this can be merged with the zigzag/alternate scan<br>
+     * An example to avoid confusion:
+     * - (->decode coeffs -> zigzag reorder -> dequant -> reference IDCT -> ...)
+     * - (x -> reference DCT -> reference IDCT -> x)
+     * - (x -> reference DCT -> simple_mmx_perm = idct_permutation
+     *    -> simple_idct_mmx -> x)
+     * - (-> decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant
+     *    -> simple_idct_mmx -> ...)
+     */
+    uint8_t idct_permutation[64];
+    enum idct_permutation_type perm_type;
+
+    int mpeg4_studio_profile;
+} IDCTDSPContext;
+
+void ff_put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
+                             ptrdiff_t line_size);
+void ff_add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
+                             ptrdiff_t line_size);
+
+void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx);
+
+void ff_idctdsp_init_aarch64(IDCTDSPContext *c, AVCodecContext *avctx,
+                             unsigned high_bit_depth);
+void ff_idctdsp_init_alpha(IDCTDSPContext *c, AVCodecContext *avctx,
+                           unsigned high_bit_depth);
+void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx,
+                         unsigned high_bit_depth);
+void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx,
+                         unsigned high_bit_depth);
+void ff_idctdsp_init_riscv(IDCTDSPContext *c, AVCodecContext *avctx,
+                           unsigned high_bit_depth);
+void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
+                         unsigned high_bit_depth);
+void ff_idctdsp_init_mips(IDCTDSPContext *c, AVCodecContext *avctx,
+                          unsigned high_bit_depth);
+void ff_idctdsp_init_loongarch(IDCTDSPContext *c, AVCodecContext *avctx,
+                               unsigned high_bit_depth);
+
+#endif /* AVCODEC_IDCTDSP_H */
diff --git a/media/ffvpx/libavcodec/imgconvert.c b/media/ffvpx/libavcodec/imgconvert.c
new file mode 100644
index 0000000000..96511ac7d6
--- /dev/null
+++ b/media/ffvpx/libavcodec/imgconvert.c
@@ -0,0 +1,48 @@
+/*
+ * Misc image conversion routines
+ * Copyright (c) 2001, 2002, 2003 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * misc image conversion routines
+ */
+
+#include "avcodec.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/pixfmt.h"
+
+enum AVPixelFormat avcodec_find_best_pix_fmt_of_list(const enum AVPixelFormat *pix_fmt_list,
+                                            enum AVPixelFormat src_pix_fmt,
+                                            int has_alpha, int *loss_ptr){
+    int i;
+
+    enum AVPixelFormat best = AV_PIX_FMT_NONE;
+    int loss;
+
+    for (i=0; pix_fmt_list[i] != AV_PIX_FMT_NONE; i++) {
+        loss = loss_ptr ? *loss_ptr : 0;
+        best = av_find_best_pix_fmt_of_2(best, pix_fmt_list[i], src_pix_fmt, has_alpha, &loss);
+    }
+
+    if (loss_ptr)
+        *loss_ptr = loss;
+    return best;
+}
+
diff --git a/media/ffvpx/libavcodec/internal.h b/media/ffvpx/libavcodec/internal.h
new file mode 100644
index 0000000000..a283c52e01
--- /dev/null
+++ b/media/ffvpx/libavcodec/internal.h
@@ -0,0 +1,247 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * common internal api header.
+ */
+
+#ifndef AVCODEC_INTERNAL_H
+#define AVCODEC_INTERNAL_H
+
+#include <stdint.h>
+
+#include "libavutil/buffer.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/pixfmt.h"
+#include "avcodec.h"
+#include "config.h"
+
+#if CONFIG_LCMS2
+# include "fflcms2.h"
+#endif
+
+#define FF_SANE_NB_CHANNELS 512U
+
+#if HAVE_SIMD_ALIGN_64
+#   define STRIDE_ALIGN 64 /* AVX-512 */
+#elif HAVE_SIMD_ALIGN_32
+#   define STRIDE_ALIGN 32
+#elif HAVE_SIMD_ALIGN_16
+#   define STRIDE_ALIGN 16
+#else
+#   define STRIDE_ALIGN 8
+#endif
+
+typedef struct AVCodecInternal {
+    /**
+     * When using frame-threaded decoding, this field is set for the first
+     * worker thread (e.g. to decode extradata just once).
+     */
+    int is_copy;
+
+    /**
+     * An audio frame with less than required samples has been submitted (and
+     * potentially padded with silence). Reject all subsequent frames.
+     */
+    int last_audio_frame;
+
+    /**
+     * Audio encoders can set this flag during init to indicate that they
+     * want the small last frame to be padded to a multiple of pad_samples.
+     */
+    int pad_samples;
+
+    AVBufferRef *pool;
+
+    void *thread_ctx;
+
+    /**
+     * This packet is used to hold the packet given to decoders
+     * implementing the .decode API; it is unused by the generic
+     * code for decoders implementing the .receive_frame API and
+     * may be freely used (but not freed) by them with the caveat
+     * that the packet will be unreferenced generically in
+     * avcodec_flush_buffers().
+     */
+    AVPacket *in_pkt;
+    struct AVBSFContext *bsf;
+
+    /**
+     * Properties (timestamps+side data) extracted from the last packet passed
+     * for decoding.
+     */
+    AVPacket *last_pkt_props;
+
+    /**
+     * temporary buffer used for encoders to store their bitstream
+     */
+    uint8_t *byte_buffer;
+    unsigned int byte_buffer_size;
+
+    /**
+     * This is set to AV_PKT_FLAG_KEY for encoders that encode intra-only
+     * formats (i.e. whose codec descriptor has AV_CODEC_PROP_INTRA_ONLY set).
+     * This is used to set said flag generically for said encoders.
+     */
+    int intra_only_flag;
+
+    void *frame_thread_encoder;
+
+    /**
+     * The input frame is stored here for encoders implementing the simple
+     * encode API.
+     *
+     * Not allocated in other cases.
+     */
+    AVFrame *in_frame;
+
+    /**
+     * When the AV_CODEC_FLAG_RECON_FRAME flag is used. the encoder should store
+     * here the reconstructed frame corresponding to the last returned packet.
+     *
+     * Not allocated in other cases.
+     */
+    AVFrame *recon_frame;
+
+    /**
+     * If this is set, then FFCodec->close (if existing) needs to be called
+     * for the parent AVCodecContext.
+     */
+    int needs_close;
+
+    /**
+     * Number of audio samples to skip at the start of the next decoded frame
+     */
+    int skip_samples;
+
+    /**
+     * hwaccel-specific private data
+     */
+    void *hwaccel_priv_data;
+
+    /**
+     * checks API usage: after codec draining, flush is required to resume operation
+     */
+    int draining;
+
+    /**
+     * Temporary buffers for newly received or not yet output packets/frames.
+     */
+    AVPacket *buffer_pkt;
+    AVFrame *buffer_frame;
+    int draining_done;
+
+    int showed_multi_packet_warning;
+
+    /* to prevent infinite loop on errors when draining */
+    int nb_draining_errors;
+
+    /* used when avctx flag AV_CODEC_FLAG_DROPCHANGED is set */
+    int changed_frames_dropped;
+    int initial_format;
+    int initial_width, initial_height;
+    int initial_sample_rate;
+    AVChannelLayout initial_ch_layout;
+
+#if CONFIG_LCMS2
+    FFIccContext icc; /* used to read and write embedded ICC profiles */
+#endif
+} AVCodecInternal;
+
+/**
+ * Return the index into tab at which {a,b} match elements {[0],[1]} of tab.
+ * If there is no such matching pair then size is returned.
+ */
+int ff_match_2uint16(const uint16_t (*tab)[2], int size, int a, int b);
+
+unsigned int ff_toupper4(unsigned int x);
+
+void ff_color_frame(AVFrame *frame, const int color[4]);
+
+/**
+ * Maximum size in bytes of extradata.
+ * This value was chosen such that every bit of the buffer is
+ * addressable by a 32-bit signed integer as used by get_bits.
+ */
+#define FF_MAX_EXTRADATA_SIZE ((1 << 28) - AV_INPUT_BUFFER_PADDING_SIZE)
+
+/**
+ * 2^(x) for integer x
+ * @return correctly rounded float
+ */
+static av_always_inline float ff_exp2fi(int x) {
+    /* Normal range */
+    if (-126 <= x && x <= 128)
+        return av_int2float((x+127) << 23);
+    /* Too large */
+    else if (x > 128)
+        return INFINITY;
+    /* Subnormal numbers */
+    else if (x > -150)
+        return av_int2float(1 << (x+149));
+    /* Negligibly small */
+    else
+        return 0;
+}
+
+int avpriv_h264_has_num_reorder_frames(AVCodecContext *avctx);
+
+int avpriv_codec_get_cap_skip_frame_fill_param(const AVCodec *codec);
+
+/**
+ * Add a CPB properties side data to an encoding context.
+ */
+AVCPBProperties *ff_add_cpb_side_data(AVCodecContext *avctx);
+
+/**
+ * Check AVFrame for S12M timecode side data and allocate and fill TC SEI message with timecode info
+ *
+ * @param frame      Raw frame to get S12M timecode side data from
+ * @param rate       The frame rate
+ * @param prefix_len Number of bytes to allocate before SEI message
+ * @param data       Pointer to a variable to store allocated memory
+ *                   Upon return the variable will hold NULL on error or if frame has no S12M timecode info.
+ *                   Otherwise it will point to prefix_len uninitialized bytes followed by
+ *                   *sei_size SEI message
+ * @param sei_size   Pointer to a variable to store generated SEI message length
+ * @return           Zero on success, negative error code on failure
+ */
+int ff_alloc_timecode_sei(const AVFrame *frame, AVRational rate, size_t prefix_len,
+                     void **data, size_t *sei_size);
+
+/**
+ * Get an estimated video bitrate based on frame size, frame rate and coded
+ * bits per pixel.
+ */
+int64_t ff_guess_coded_bitrate(AVCodecContext *avctx);
+
+/**
+ * Check if a value is in the list. If not, return the default value
+ *
+ * @param ctx                Context for the log msg
+ * @param val_name           Name of the checked value, for log msg
+ * @param array_valid_values Array of valid int, ended with INT_MAX
+ * @param default_value      Value return if checked value is not in the array
+ * @return                   Value or default_value.
+ */
+int ff_int_from_list_or_default(void *ctx, const char * val_name, int val,
+                                const int * array_valid_values, int default_value);
+
+#endif /* AVCODEC_INTERNAL_H */
diff --git a/media/ffvpx/libavcodec/jfdctfst.c b/media/ffvpx/libavcodec/jfdctfst.c
new file mode 100644
index 0000000000..805e05808c
--- /dev/null
+++ b/media/ffvpx/libavcodec/jfdctfst.c
@@ -0,0 +1,331 @@
+/*
+ * This file is part of the Independent JPEG Group's software.
+ *
+ * The authors make NO WARRANTY or representation, either express or implied,
+ * with respect to this software, its quality, accuracy, merchantability, or
+ * fitness for a particular purpose.  This software is provided "AS IS", and
+ * you, its user, assume the entire risk as to its quality and accuracy.
+ *
+ * This software is copyright (C) 1994-1996, Thomas G. Lane.
+ * All Rights Reserved except as specified below.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute this
+ * software (or portions thereof) for any purpose, without fee, subject to
+ * these conditions:
+ * (1) If any part of the source code for this software is distributed, then
+ * this README file must be included, with this copyright and no-warranty
+ * notice unaltered; and any additions, deletions, or changes to the original
+ * files must be clearly indicated in accompanying documentation.
+ * (2) If only executable code is distributed, then the accompanying
+ * documentation must state that "this software is based in part on the work
+ * of the Independent JPEG Group".
+ * (3) Permission for use of this software is granted only if the user accepts
+ * full responsibility for any undesirable consequences; the authors accept
+ * NO LIABILITY for damages of any kind.
+ *
+ * These conditions apply to any software derived from or based on the IJG
+ * code, not just to the unmodified library.  If you use our work, you ought
+ * to acknowledge us.
+ *
+ * Permission is NOT granted for the use of any IJG author's name or company
+ * name in advertising or publicity relating to this software or products
+ * derived from it.  This software may be referred to only as "the Independent
+ * JPEG Group's software".
+ *
+ * We specifically permit and encourage the use of this software as the basis
+ * of commercial products, provided that all warranty or liability claims are
+ * assumed by the product vendor.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+/**
+ * @file
+ * Independent JPEG Group's fast AAN dct.
+ */
+
+#include <stdint.h>
+#include "libavutil/attributes.h"
+#include "dct.h"
+
+#define DCTSIZE 8
+#define GLOBAL(x) x
+#define RIGHT_SHIFT(x, n) ((x) >> (n))
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jfdctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * Again to save a few shifts, the intermediate results between pass 1 and
+ * pass 2 are not upscaled, but are represented only to integral precision.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#define CONST_BITS  8
+
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_0_382683433  ((int32_t)   98)       /* FIX(0.382683433) */
+#define FIX_0_541196100  ((int32_t)  139)       /* FIX(0.541196100) */
+#define FIX_0_707106781  ((int32_t)  181)       /* FIX(0.707106781) */
+#define FIX_1_306562965  ((int32_t)  334)       /* FIX(1.306562965) */
+#else
+#define FIX_0_382683433  FIX(0.382683433)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_707106781  FIX(0.707106781)
+#define FIX_1_306562965  FIX(1.306562965)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a int16_t variable by an int32_t constant, and immediately
+ * descale to yield a int16_t result.
+ */
+
+#define MULTIPLY(var,const)  ((int16_t) DESCALE((var) * (const), CONST_BITS))
+
+static av_always_inline void row_fdct(int16_t * data){
+  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int tmp10, tmp11, tmp12, tmp13;
+  int z1, z2, z3, z4, z5, z11, z13;
+  int16_t *dataptr;
+  int ctr;
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;    /* phase 5 */
+    dataptr[6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;    /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;    /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781);         /* c4 */
+
+    z11 = tmp7 + z3;            /* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;      /* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;         /* advance pointer to next row */
+  }
+}
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+ff_fdct_ifast (int16_t * data)
+{
+  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int tmp10, tmp11, tmp12, tmp13;
+  int z1, z2, z3, z4, z5, z11, z13;
+  int16_t *dataptr;
+  int ctr;
+
+  row_fdct(data);
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;            /* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE*3] = z13 - z2;
+    dataptr[DCTSIZE*1] = z11 + z4;
+    dataptr[DCTSIZE*7] = z11 - z4;
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+/*
+ * Perform the forward 2-4-8 DCT on one block of samples.
+ */
+
+GLOBAL(void)
+ff_fdct_ifast248 (int16_t * data)
+{
+  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int tmp10, tmp11, tmp12, tmp13;
+  int z1;
+  int16_t *dataptr;
+  int ctr;
+
+  row_fdct(data);
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
+    tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+    tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
+    tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+    tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
+    tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    tmp13 = tmp0 - tmp3;
+
+    dataptr[DCTSIZE*0] = tmp10 + tmp11;
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
+    dataptr[DCTSIZE*2] = tmp13 + z1;
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+
+    tmp10 = tmp4 + tmp7;
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp5 - tmp6;
+    tmp13 = tmp4 - tmp7;
+
+    dataptr[DCTSIZE*1] = tmp10 + tmp11;
+    dataptr[DCTSIZE*5] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
+    dataptr[DCTSIZE*3] = tmp13 + z1;
+    dataptr[DCTSIZE*7] = tmp13 - z1;
+
+    dataptr++;                        /* advance pointer to next column */
+  }
+}
+
+
+#undef GLOBAL
+#undef CONST_BITS
+#undef DESCALE
+#undef FIX_0_541196100
+#undef FIX_1_306562965
diff --git a/media/ffvpx/libavcodec/jfdctint.c b/media/ffvpx/libavcodec/jfdctint.c
new file mode 100644
index 0000000000..6a39578f88
--- /dev/null
+++ b/media/ffvpx/libavcodec/jfdctint.c
@@ -0,0 +1,25 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BIT_DEPTH 8
+#include "jfdctint_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "jfdctint_template.c"
+#undef BIT_DEPTH
diff --git a/media/ffvpx/libavcodec/jfdctint_template.c b/media/ffvpx/libavcodec/jfdctint_template.c
new file mode 100644
index 0000000000..67fb77b5e1
--- /dev/null
+++ b/media/ffvpx/libavcodec/jfdctint_template.c
@@ -0,0 +1,398 @@
+/*
+ * This file is part of the Independent JPEG Group's software.
+ *
+ * The authors make NO WARRANTY or representation, either express or implied,
+ * with respect to this software, its quality, accuracy, merchantability, or
+ * fitness for a particular purpose.  This software is provided "AS IS", and
+ * you, its user, assume the entire risk as to its quality and accuracy.
+ *
+ * This software is copyright (C) 1991-1996, Thomas G. Lane.
+ * All Rights Reserved except as specified below.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute this
+ * software (or portions thereof) for any purpose, without fee, subject to
+ * these conditions:
+ * (1) If any part of the source code for this software is distributed, then
+ * this README file must be included, with this copyright and no-warranty
+ * notice unaltered; and any additions, deletions, or changes to the original
+ * files must be clearly indicated in accompanying documentation.
+ * (2) If only executable code is distributed, then the accompanying
+ * documentation must state that "this software is based in part on the work
+ * of the Independent JPEG Group".
+ * (3) Permission for use of this software is granted only if the user accepts
+ * full responsibility for any undesirable consequences; the authors accept
+ * NO LIABILITY for damages of any kind.
+ *
+ * These conditions apply to any software derived from or based on the IJG
+ * code, not just to the unmodified library.  If you use our work, you ought
+ * to acknowledge us.
+ *
+ * Permission is NOT granted for the use of any IJG author's name or company
+ * name in advertising or publicity relating to this software or products
+ * derived from it.  This software may be referred to only as "the Independent
+ * JPEG Group's software".
+ *
+ * We specifically permit and encourage the use of this software as the basis
+ * of commercial products, provided that all warranty or liability claims are
+ * assumed by the product vendor.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ */
+
+/**
+ * @file
+ * Independent JPEG Group's slow & accurate dct.
+ */
+
+#include "libavutil/common.h"
+#include "dct.h"
+
+#include "bit_depth_template.c"
+
+#define DCTSIZE 8
+#define BITS_IN_JSAMPLE BIT_DEPTH
+#define GLOBAL(x) x
+#define RIGHT_SHIFT(x, n) ((x) >> (n))
+#define MULTIPLY16C16(var,const) ((var)*(const))
+#define DESCALE(x,n)  RIGHT_SHIFT((x) + (1 << ((n) - 1)), n)
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+#error  "Sorry, this code only copes with 8x8 DCTs."
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true DCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D DCT,
+ * because the y0 and y4 outputs need not be divided by sqrt(N).
+ * In the IJG code, this factor of 8 is removed by the quantization step
+ * (in jcdctmgr.c), NOT in this module.
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (For 12-bit sample data, the intermediate
+ * array is int32_t anyway.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#undef CONST_BITS
+#undef PASS1_BITS
+#undef OUT_SHIFT
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  4   /* set this to 2 if 16x16 multiplies are faster */
+#define OUT_SHIFT   PASS1_BITS
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1   /* lose a little precision to avoid overflow */
+#define OUT_SHIFT   (PASS1_BITS + 1)
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((int32_t)  2446)      /* FIX(0.298631336) */
+#define FIX_0_390180644  ((int32_t)  3196)      /* FIX(0.390180644) */
+#define FIX_0_541196100  ((int32_t)  4433)      /* FIX(0.541196100) */
+#define FIX_0_765366865  ((int32_t)  6270)      /* FIX(0.765366865) */
+#define FIX_0_899976223  ((int32_t)  7373)      /* FIX(0.899976223) */
+#define FIX_1_175875602  ((int32_t)  9633)      /* FIX(1.175875602) */
+#define FIX_1_501321110  ((int32_t)  12299)     /* FIX(1.501321110) */
+#define FIX_1_847759065  ((int32_t)  15137)     /* FIX(1.847759065) */
+#define FIX_1_961570560  ((int32_t)  16069)     /* FIX(1.961570560) */
+#define FIX_2_053119869  ((int32_t)  16819)     /* FIX(2.053119869) */
+#define FIX_2_562915447  ((int32_t)  20995)     /* FIX(2.562915447) */
+#define FIX_3_072711026  ((int32_t)  25172)     /* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+static av_always_inline void FUNC(row_fdct)(int16_t *data)
+{
+  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int tmp10, tmp11, tmp12, tmp13;
+  int z1, z2, z3, z4, z5;
+  int16_t *dataptr;
+  int ctr;
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[0] = (int16_t) ((tmp10 + tmp11) * (1 << PASS1_BITS));
+    dataptr[4] = (int16_t) ((tmp10 - tmp11) * (1 << PASS1_BITS));
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[2] = (int16_t) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                                   CONST_BITS-PASS1_BITS);
+    dataptr[6] = (int16_t) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+                                   CONST_BITS-PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+    z3 += z5;
+    z4 += z5;
+
+    dataptr[7] = (int16_t) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (int16_t) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (int16_t) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
+    dataptr[1] = (int16_t) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;         /* advance pointer to next row */
+  }
+}
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+FUNC(ff_jpeg_fdct_islow)(int16_t *data)
+{
+  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int tmp10, tmp11, tmp12, tmp13;
+  int z1, z2, z3, z4, z5;
+  int16_t *dataptr;
+  int ctr;
+
+  FUNC(row_fdct)(data);
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
+    dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                                 CONST_BITS + OUT_SHIFT);
+    dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+                                 CONST_BITS + OUT_SHIFT);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+    z3 += z5;
+    z4 += z5;
+
+    dataptr[DCTSIZE*7] = DESCALE(tmp4 + z1 + z3, CONST_BITS + OUT_SHIFT);
+    dataptr[DCTSIZE*5] = DESCALE(tmp5 + z2 + z4, CONST_BITS + OUT_SHIFT);
+    dataptr[DCTSIZE*3] = DESCALE(tmp6 + z2 + z3, CONST_BITS + OUT_SHIFT);
+    dataptr[DCTSIZE*1] = DESCALE(tmp7 + z1 + z4, CONST_BITS + OUT_SHIFT);
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+/*
+ * The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT
+ * on the rows and then, instead of doing even and odd, part on the columns
+ * you do even part two times.
+ */
+GLOBAL(void)
+FUNC(ff_fdct248_islow)(int16_t *data)
+{
+  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int tmp10, tmp11, tmp12, tmp13;
+  int z1;
+  int16_t *dataptr;
+  int ctr;
+
+  FUNC(row_fdct)(data);
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
+     tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+     tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
+     tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
+     tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
+     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+     tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
+     tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
+
+     tmp10 = tmp0 + tmp3;
+     tmp11 = tmp1 + tmp2;
+     tmp12 = tmp1 - tmp2;
+     tmp13 = tmp0 - tmp3;
+
+     dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
+     dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
+
+     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+     dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                                  CONST_BITS+OUT_SHIFT);
+     dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+                                  CONST_BITS+OUT_SHIFT);
+
+     tmp10 = tmp4 + tmp7;
+     tmp11 = tmp5 + tmp6;
+     tmp12 = tmp5 - tmp6;
+     tmp13 = tmp4 - tmp7;
+
+     dataptr[DCTSIZE*1] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
+     dataptr[DCTSIZE*5] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
+
+     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+     dataptr[DCTSIZE*3] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                                  CONST_BITS + OUT_SHIFT);
+     dataptr[DCTSIZE*7] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+                                  CONST_BITS + OUT_SHIFT);
+
+     dataptr++;                 /* advance pointer to next column */
+  }
+}
diff --git a/media/ffvpx/libavcodec/jrevdct.c b/media/ffvpx/libavcodec/jrevdct.c
new file mode 100644
index 0000000000..7f1863515f
--- /dev/null
+++ b/media/ffvpx/libavcodec/jrevdct.c
@@ -0,0 +1,1172 @@
+/*
+ * This file is part of the Independent JPEG Group's software.
+ *
+ * The authors make NO WARRANTY or representation, either express or implied,
+ * with respect to this software, its quality, accuracy, merchantability, or
+ * fitness for a particular purpose.  This software is provided "AS IS", and
+ * you, its user, assume the entire risk as to its quality and accuracy.
+ *
+ * This software is copyright (C) 1991, 1992, Thomas G. Lane.
+ * All Rights Reserved except as specified below.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute this
+ * software (or portions thereof) for any purpose, without fee, subject to
+ * these conditions:
+ * (1) If any part of the source code for this software is distributed, then
+ * this README file must be included, with this copyright and no-warranty
+ * notice unaltered; and any additions, deletions, or changes to the original
+ * files must be clearly indicated in accompanying documentation.
+ * (2) If only executable code is distributed, then the accompanying
+ * documentation must state that "this software is based in part on the work
+ * of the Independent JPEG Group".
+ * (3) Permission for use of this software is granted only if the user accepts
+ * full responsibility for any undesirable consequences; the authors accept
+ * NO LIABILITY for damages of any kind.
+ *
+ * These conditions apply to any software derived from or based on the IJG
+ * code, not just to the unmodified library.  If you use our work, you ought
+ * to acknowledge us.
+ *
+ * Permission is NOT granted for the use of any IJG author's name or company
+ * name in advertising or publicity relating to this software or products
+ * derived from it.  This software may be referred to only as "the Independent
+ * JPEG Group's software".
+ *
+ * We specifically permit and encourage the use of this software as the basis
+ * of commercial products, provided that all warranty or liability claims are
+ * assumed by the product vendor.
+ *
+ * This file contains the basic inverse-DCT transformation subroutine.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ *
+ * I've made lots of modifications to attempt to take advantage of the
+ * sparse nature of the DCT matrices we're getting.  Although the logic
+ * is cumbersome, it's straightforward and the resulting code is much
+ * faster.
+ *
+ * A better way to do this would be to pass in the DCT block as a sparse
+ * matrix, perhaps with the difference cases encoded.
+ */
+
+/**
+ * @file
+ * Independent JPEG Group's LLM idct.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/intreadwrite.h"
+
+#include "dct.h"
+#include "idctdsp.h"
+
+#define EIGHT_BIT_SAMPLES
+
+#define DCTSIZE 8
+#define DCTSIZE2 64
+
+#define GLOBAL
+
+#define RIGHT_SHIFT(x, n) ((x) >> (n))
+
+typedef int16_t DCTBLOCK[DCTSIZE2];
+
+#define CONST_BITS 13
+
+/*
+ * This routine is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true IDCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D IDCT,
+ * because the y0 and y4 inputs need not be divided by sqrt(N).
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (To scale up 12-bit sample data further, an
+ * intermediate int32 array would be needed.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#ifdef EIGHT_BIT_SAMPLES
+#define PASS1_BITS  2
+#else
+#define PASS1_BITS  1   /* lose a little precision to avoid overflow */
+#endif
+
+#define ONE         ((int32_t) 1)
+
+#define CONST_SCALE (ONE << CONST_BITS)
+
+/* Convert a positive real constant to an integer scaled by CONST_SCALE.
+ * IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
+ * you will pay a significant penalty in run time.  In that case, figure
+ * the correct integer constant values and insert them by hand.
+ */
+
+/* Actually FIX is no longer used, we precomputed them all */
+#define FIX(x)  ((int32_t) ((x) * CONST_SCALE + 0.5))
+
+/* Descale and correctly round an int32_t value that's scaled by N bits.
+ * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
+ * the fudge factor is correct for either sign of X.
+ */
+
+#define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
+
+/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
+ * this provides a useful speedup on many machines.
+ * There is no way to specify a 16x16->32 multiply in portable C, but
+ * some C compilers will do the right thing if you provide the correct
+ * combination of casts.
+ * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#ifdef EIGHT_BIT_SAMPLES
+#ifdef SHORTxSHORT_32           /* may work if 'int' is 32 bits */
+#define MULTIPLY(var,const)  (((int16_t) (var)) * ((int16_t) (const)))
+#endif
+#ifdef SHORTxLCONST_32          /* known to work with Microsoft C 6.0 */
+#define MULTIPLY(var,const)  (((int16_t) (var)) * ((int32_t) (const)))
+#endif
+#endif
+
+#ifndef MULTIPLY                /* default definition */
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/*
+  Unlike our decoder where we approximate the FIXes, we need to use exact
+ones here or successive P-frames will drift too much with Reference frame coding
+*/
+#define FIX_0_211164243 1730
+#define FIX_0_275899380 2260
+#define FIX_0_298631336 2446
+#define FIX_0_390180644 3196
+#define FIX_0_509795579 4176
+#define FIX_0_541196100 4433
+#define FIX_0_601344887 4926
+#define FIX_0_765366865 6270
+#define FIX_0_785694958 6436
+#define FIX_0_899976223 7373
+#define FIX_1_061594337 8697
+#define FIX_1_111140466 9102
+#define FIX_1_175875602 9633
+#define FIX_1_306562965 10703
+#define FIX_1_387039845 11363
+#define FIX_1_451774981 11893
+#define FIX_1_501321110 12299
+#define FIX_1_662939225 13623
+#define FIX_1_847759065 15137
+#define FIX_1_961570560 16069
+#define FIX_2_053119869 16819
+#define FIX_2_172734803 17799
+#define FIX_2_562915447 20995
+#define FIX_3_072711026 25172
+
+/*
+ * Perform the inverse DCT on one block of coefficients.
+ */
+
+void ff_j_rev_dct(DCTBLOCK data)
+{
+  int32_t tmp0, tmp1, tmp2, tmp3;
+  int32_t tmp10, tmp11, tmp12, tmp13;
+  int32_t z1, z2, z3, z4, z5;
+  int32_t d0, d1, d2, d3, d4, d5, d6, d7;
+  register int16_t *dataptr;
+  int rowctr;
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  dataptr = data;
+
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any row in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * row DCT calculations can be simplified this way.
+     */
+
+    register uint8_t *idataptr = (uint8_t*)dataptr;
+
+    /* WARNING: we do the same permutation as MMX idct to simplify the
+       video core */
+    d0 = dataptr[0];
+    d2 = dataptr[1];
+    d4 = dataptr[2];
+    d6 = dataptr[3];
+    d1 = dataptr[4];
+    d3 = dataptr[5];
+    d5 = dataptr[6];
+    d7 = dataptr[7];
+
+    if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) {
+      /* AC terms all zero */
+      if (d0) {
+          /* Compute a 32 bit value to assign. */
+          int16_t dcval = (int16_t) (d0 * (1 << PASS1_BITS));
+          register unsigned v = (dcval & 0xffff) | ((uint32_t)dcval << 16);
+
+          AV_WN32A(&idataptr[ 0], v);
+          AV_WN32A(&idataptr[ 4], v);
+          AV_WN32A(&idataptr[ 8], v);
+          AV_WN32A(&idataptr[12], v);
+      }
+
+      dataptr += DCTSIZE;       /* advance pointer to next row */
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+{
+    if (d6) {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) * CONST_SCALE;
+                    tmp1 = (d0 - d4) * CONST_SCALE;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) * CONST_SCALE;
+                    tmp1 = (d0 - d4) * CONST_SCALE;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
+    } else {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) * CONST_SCALE;
+                    tmp1 = (d0 - d4) * CONST_SCALE;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) * CONST_SCALE;
+                    tmp11 = tmp12 = (d0 - d4) * CONST_SCALE;
+            }
+      }
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    if (d7) {
+        if (d5) {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z2 = d5 + d3;
+                    z3 = d7 + d3;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
+                    z2 = d5 + d3;
+                    z3 = d7 + d3;
+                    z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 = z1 + z4;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 = z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
+                    tmp0 = MULTIPLY(-d7, FIX_0_601344887);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+                    z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z3;
+                    tmp1 += z4;
+                    tmp2 = z2 + z3;
+                    tmp3 = z1 + z4;
+                }
+            }
+        } else {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z3 = d7 + d3;
+                    z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-d3, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-d1, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 = z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
+                    z3 = d7 + d3;
+
+                    tmp0 = MULTIPLY(-d7, FIX_0_601344887);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    tmp2 = MULTIPLY(d3, FIX_0_509795579);
+                    z2 = MULTIPLY(-d3, FIX_2_562915447);
+                    z5 = MULTIPLY(z3, FIX_1_175875602);
+                    z3 = MULTIPLY(-z3, FIX_0_785694958);
+
+                    tmp0 += z3;
+                    tmp1 = z2 + z5;
+                    tmp2 += z3;
+                    tmp3 = z1 + z5;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z5 = MULTIPLY(z1, FIX_1_175875602);
+
+                    z1 = MULTIPLY(z1, FIX_0_275899380);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    tmp0 = MULTIPLY(-d7, FIX_1_662939225);
+                    z4 = MULTIPLY(-d1, FIX_0_390180644);
+                    tmp3 = MULTIPLY(d1, FIX_1_111140466);
+
+                    tmp0 += z1;
+                    tmp1 = z4 + z5;
+                    tmp2 = z3 + z5;
+                    tmp3 += z1;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
+                    tmp0 = MULTIPLY(-d7, FIX_1_387039845);
+                    tmp1 = MULTIPLY(d7, FIX_1_175875602);
+                    tmp2 = MULTIPLY(-d7, FIX_0_785694958);
+                    tmp3 = MULTIPLY(d7, FIX_0_275899380);
+                }
+            }
+        }
+    } else {
+        if (d5) {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
+                    z2 = d5 + d3;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
+
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-d1, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-d3, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 = z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
+                    z2 = d5 + d3;
+
+                    z5 = MULTIPLY(z2, FIX_1_175875602);
+                    tmp1 = MULTIPLY(d5, FIX_1_662939225);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+                    z2 = MULTIPLY(-z2, FIX_1_387039845);
+                    tmp2 = MULTIPLY(d3, FIX_1_111140466);
+                    z3 = MULTIPLY(-d3, FIX_1_961570560);
+
+                    tmp0 = z3 + z5;
+                    tmp1 += z2;
+                    tmp2 += z2;
+                    tmp3 = z4 + z5;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
+                    z4 = d5 + d1;
+
+                    z5 = MULTIPLY(z4, FIX_1_175875602);
+                    z1 = MULTIPLY(-d1, FIX_0_899976223);
+                    tmp3 = MULTIPLY(d1, FIX_0_601344887);
+                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z4 = MULTIPLY(z4, FIX_0_785694958);
+
+                    tmp0 = z1 + z5;
+                    tmp1 += z4;
+                    tmp2 = z2 + z5;
+                    tmp3 += z4;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
+                    tmp0 = MULTIPLY(d5, FIX_1_175875602);
+                    tmp1 = MULTIPLY(d5, FIX_0_275899380);
+                    tmp2 = MULTIPLY(-d5, FIX_1_387039845);
+                    tmp3 = MULTIPLY(d5, FIX_0_785694958);
+                }
+            }
+        } else {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
+                    z5 = d1 + d3;
+                    tmp3 = MULTIPLY(d1, FIX_0_211164243);
+                    tmp2 = MULTIPLY(-d3, FIX_1_451774981);
+                    z1 = MULTIPLY(d1, FIX_1_061594337);
+                    z2 = MULTIPLY(-d3, FIX_2_172734803);
+                    z4 = MULTIPLY(z5, FIX_0_785694958);
+                    z5 = MULTIPLY(z5, FIX_1_175875602);
+
+                    tmp0 = z1 - z4;
+                    tmp1 = z2 + z4;
+                    tmp2 += z5;
+                    tmp3 += z5;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
+                    tmp0 = MULTIPLY(-d3, FIX_0_785694958);
+                    tmp1 = MULTIPLY(-d3, FIX_1_387039845);
+                    tmp2 = MULTIPLY(-d3, FIX_0_275899380);
+                    tmp3 = MULTIPLY(d3, FIX_1_175875602);
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
+                    tmp0 = MULTIPLY(d1, FIX_0_275899380);
+                    tmp1 = MULTIPLY(d1, FIX_0_785694958);
+                    tmp2 = MULTIPLY(d1, FIX_1_175875602);
+                    tmp3 = MULTIPLY(d1, FIX_1_387039845);
+                } else {
+                    /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
+                    tmp0 = tmp1 = tmp2 = tmp3 = 0;
+                }
+            }
+        }
+    }
+}
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[0] = (int16_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (int16_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    dataptr[1] = (int16_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    dataptr[6] = (int16_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    dataptr[2] = (int16_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (int16_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (int16_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    dataptr[4] = (int16_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;         /* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  dataptr = data;
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Columns of zeroes can be exploited in the same way as we did with rows.
+     * However, the row calculation has created many nonzero AC terms, so the
+     * simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+
+    d0 = dataptr[DCTSIZE*0];
+    d1 = dataptr[DCTSIZE*1];
+    d2 = dataptr[DCTSIZE*2];
+    d3 = dataptr[DCTSIZE*3];
+    d4 = dataptr[DCTSIZE*4];
+    d5 = dataptr[DCTSIZE*5];
+    d6 = dataptr[DCTSIZE*6];
+    d7 = dataptr[DCTSIZE*7];
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    if (d6) {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) * CONST_SCALE;
+                    tmp1 = (d0 - d4) * CONST_SCALE;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) * CONST_SCALE;
+                    tmp1 = (d0 - d4) * CONST_SCALE;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
+    } else {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) * CONST_SCALE;
+                    tmp1 = (d0 - d4) * CONST_SCALE;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) * CONST_SCALE;
+                    tmp11 = tmp12 = (d0 - d4) * CONST_SCALE;
+            }
+    }
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+    if (d7) {
+        if (d5) {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z2 = d5 + d3;
+                    z3 = d7 + d3;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
+                    z2 = d5 + d3;
+                    z3 = d7 + d3;
+                    z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 = z1 + z4;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z3 = d7;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 = z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
+                    tmp0 = MULTIPLY(-d7, FIX_0_601344887);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+                    z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z3;
+                    tmp1 += z4;
+                    tmp2 = z2 + z3;
+                    tmp3 = z1 + z4;
+                }
+            }
+        } else {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z3 = d7 + d3;
+                    z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-d3, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-d1, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 = z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
+                    z3 = d7 + d3;
+
+                    tmp0 = MULTIPLY(-d7, FIX_0_601344887);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    tmp2 = MULTIPLY(d3, FIX_0_509795579);
+                    z2 = MULTIPLY(-d3, FIX_2_562915447);
+                    z5 = MULTIPLY(z3, FIX_1_175875602);
+                    z3 = MULTIPLY(-z3, FIX_0_785694958);
+
+                    tmp0 += z3;
+                    tmp1 = z2 + z5;
+                    tmp2 += z3;
+                    tmp3 = z1 + z5;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z5 = MULTIPLY(z1, FIX_1_175875602);
+
+                    z1 = MULTIPLY(z1, FIX_0_275899380);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    tmp0 = MULTIPLY(-d7, FIX_1_662939225);
+                    z4 = MULTIPLY(-d1, FIX_0_390180644);
+                    tmp3 = MULTIPLY(d1, FIX_1_111140466);
+
+                    tmp0 += z1;
+                    tmp1 = z4 + z5;
+                    tmp2 = z3 + z5;
+                    tmp3 += z1;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
+                    tmp0 = MULTIPLY(-d7, FIX_1_387039845);
+                    tmp1 = MULTIPLY(d7, FIX_1_175875602);
+                    tmp2 = MULTIPLY(-d7, FIX_0_785694958);
+                    tmp3 = MULTIPLY(d7, FIX_0_275899380);
+                }
+            }
+        }
+    } else {
+        if (d5) {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
+                    z2 = d5 + d3;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
+
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-d1, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-d3, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 = z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
+                    z2 = d5 + d3;
+
+                    z5 = MULTIPLY(z2, FIX_1_175875602);
+                    tmp1 = MULTIPLY(d5, FIX_1_662939225);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+                    z2 = MULTIPLY(-z2, FIX_1_387039845);
+                    tmp2 = MULTIPLY(d3, FIX_1_111140466);
+                    z3 = MULTIPLY(-d3, FIX_1_961570560);
+
+                    tmp0 = z3 + z5;
+                    tmp1 += z2;
+                    tmp2 += z2;
+                    tmp3 = z4 + z5;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
+                    z4 = d5 + d1;
+
+                    z5 = MULTIPLY(z4, FIX_1_175875602);
+                    z1 = MULTIPLY(-d1, FIX_0_899976223);
+                    tmp3 = MULTIPLY(d1, FIX_0_601344887);
+                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z4 = MULTIPLY(z4, FIX_0_785694958);
+
+                    tmp0 = z1 + z5;
+                    tmp1 += z4;
+                    tmp2 = z2 + z5;
+                    tmp3 += z4;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
+                    tmp0 = MULTIPLY(d5, FIX_1_175875602);
+                    tmp1 = MULTIPLY(d5, FIX_0_275899380);
+                    tmp2 = MULTIPLY(-d5, FIX_1_387039845);
+                    tmp3 = MULTIPLY(d5, FIX_0_785694958);
+                }
+            }
+        } else {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
+                    z5 = d1 + d3;
+                    tmp3 = MULTIPLY(d1, FIX_0_211164243);
+                    tmp2 = MULTIPLY(-d3, FIX_1_451774981);
+                    z1 = MULTIPLY(d1, FIX_1_061594337);
+                    z2 = MULTIPLY(-d3, FIX_2_172734803);
+                    z4 = MULTIPLY(z5, FIX_0_785694958);
+                    z5 = MULTIPLY(z5, FIX_1_175875602);
+
+                    tmp0 = z1 - z4;
+                    tmp1 = z2 + z4;
+                    tmp2 += z5;
+                    tmp3 += z5;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
+                    tmp0 = MULTIPLY(-d3, FIX_0_785694958);
+                    tmp1 = MULTIPLY(-d3, FIX_1_387039845);
+                    tmp2 = MULTIPLY(-d3, FIX_0_275899380);
+                    tmp3 = MULTIPLY(d3, FIX_1_175875602);
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
+                    tmp0 = MULTIPLY(d1, FIX_0_275899380);
+                    tmp1 = MULTIPLY(d1, FIX_0_785694958);
+                    tmp2 = MULTIPLY(d1, FIX_1_175875602);
+                    tmp3 = MULTIPLY(d1, FIX_1_387039845);
+                } else {
+                    /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
+                    tmp0 = tmp1 = tmp2 = tmp3 = 0;
+                }
+            }
+        }
+    }
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[DCTSIZE*0] = (int16_t) DESCALE(tmp10 + tmp3,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*7] = (int16_t) DESCALE(tmp10 - tmp3,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*1] = (int16_t) DESCALE(tmp11 + tmp2,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*6] = (int16_t) DESCALE(tmp11 - tmp2,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*2] = (int16_t) DESCALE(tmp12 + tmp1,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*5] = (int16_t) DESCALE(tmp12 - tmp1,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*3] = (int16_t) DESCALE(tmp13 + tmp0,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*4] = (int16_t) DESCALE(tmp13 - tmp0,
+                                           CONST_BITS+PASS1_BITS+3);
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+#undef DCTSIZE
+#define DCTSIZE 4
+#define DCTSTRIDE 8
+
+void ff_j_rev_dct4(DCTBLOCK data)
+{
+  int32_t tmp0, tmp1, tmp2, tmp3;
+  int32_t tmp10, tmp11, tmp12, tmp13;
+  int32_t z1;
+  int32_t d0, d2, d4, d6;
+  register int16_t *dataptr;
+  int rowctr;
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  data[0] += 4;
+
+  dataptr = data;
+
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any row in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * row DCT calculations can be simplified this way.
+     */
+
+    register uint8_t *idataptr = (uint8_t*)dataptr;
+
+    d0 = dataptr[0];
+    d2 = dataptr[1];
+    d4 = dataptr[2];
+    d6 = dataptr[3];
+
+    if ((d2 | d4 | d6) == 0) {
+      /* AC terms all zero */
+      if (d0) {
+          /* Compute a 32 bit value to assign. */
+          int16_t dcval = (int16_t) (d0 * (1 << PASS1_BITS));
+          register unsigned v = (dcval & 0xffff) | ((uint32_t)dcval << 16);
+
+          AV_WN32A(&idataptr[0], v);
+          AV_WN32A(&idataptr[4], v);
+      }
+
+      dataptr += DCTSTRIDE;     /* advance pointer to next row */
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    if (d6) {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) * (1 << CONST_BITS);
+                    tmp1 = (d0 - d4) * (1 << CONST_BITS);
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) * (1 << CONST_BITS);
+                    tmp1 = (d0 - d4) * (1 << CONST_BITS);
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
+    } else {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) * (1 << CONST_BITS);
+                    tmp1 = (d0 - d4) * (1 << CONST_BITS);
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) * (1 << CONST_BITS);
+                    tmp11 = tmp12 = (d0 - d4) * (1 << CONST_BITS);
+            }
+      }
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[0] = (int16_t) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+    dataptr[1] = (int16_t) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+    dataptr[2] = (int16_t) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (int16_t) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSTRIDE;       /* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  dataptr = data;
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Columns of zeroes can be exploited in the same way as we did with rows.
+     * However, the row calculation has created many nonzero AC terms, so the
+     * simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+
+    d0 = dataptr[DCTSTRIDE*0];
+    d2 = dataptr[DCTSTRIDE*1];
+    d4 = dataptr[DCTSTRIDE*2];
+    d6 = dataptr[DCTSTRIDE*3];
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    if (d6) {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) * (1 << CONST_BITS);
+                    tmp1 = (d0 - d4) * (1 << CONST_BITS);
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) * (1 << CONST_BITS);
+                    tmp1 = (d0 - d4) * (1 << CONST_BITS);
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
+    } else {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) * (1 << CONST_BITS);
+                    tmp1 = (d0 - d4) * (1 << CONST_BITS);
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) * (1 << CONST_BITS);
+                    tmp11 = tmp12 = (d0 - d4) * (1 << CONST_BITS);
+            }
+    }
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3);
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+void ff_j_rev_dct2(DCTBLOCK data){
+  int d00, d01, d10, d11;
+
+  data[0] += 4;
+  d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE];
+  d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE];
+  d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE];
+  d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE];
+
+  data[0+0*DCTSTRIDE]= (d00 + d10)>>3;
+  data[1+0*DCTSTRIDE]= (d01 + d11)>>3;
+  data[0+1*DCTSTRIDE]= (d00 - d10)>>3;
+  data[1+1*DCTSTRIDE]= (d01 - d11)>>3;
+}
+
+void ff_j_rev_dct1(DCTBLOCK data){
+  data[0] = (data[0] + 4)>>3;
+}
+
+#undef FIX
+#undef CONST_BITS
+
+void ff_jref_idct_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    ff_j_rev_dct(block);
+    ff_put_pixels_clamped_c(block, dest, line_size);
+}
+
+void ff_jref_idct_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    ff_j_rev_dct(block);
+    ff_add_pixels_clamped_c(block, dest, line_size);
+}
diff --git a/media/ffvpx/libavcodec/libdav1d.c b/media/ffvpx/libavcodec/libdav1d.c
new file mode 100644
index 0000000000..2488a709c7
--- /dev/null
+++ b/media/ffvpx/libavcodec/libdav1d.c
@@ -0,0 +1,642 @@
+/*
+ * Copyright (c) 2018 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (c) 2018 James Almer <jamrial gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <dav1d/dav1d.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/cpu.h"
+#include "libavutil/film_grain_params.h"
+#include "libavutil/mastering_display_metadata.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+
+#include "atsc_a53.h"
+#include "avcodec.h"
+#include "bytestream.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "internal.h"
+
+#define FF_DAV1D_VERSION_AT_LEAST(x,y) \
+    (DAV1D_API_VERSION_MAJOR > (x) || DAV1D_API_VERSION_MAJOR == (x) && DAV1D_API_VERSION_MINOR >= (y))
+
+typedef struct Libdav1dContext {
+    AVClass *class;
+    Dav1dContext *c;
+    AVBufferPool *pool;
+    int pool_size;
+
+    Dav1dData data;
+    int tile_threads;
+    int frame_threads;
+    int max_frame_delay;
+    int apply_grain;
+    int operating_point;
+    int all_layers;
+} Libdav1dContext;
+
+static const enum AVPixelFormat pix_fmt[][3] = {
+    [DAV1D_PIXEL_LAYOUT_I400] = { AV_PIX_FMT_GRAY8,   AV_PIX_FMT_GRAY10,    AV_PIX_FMT_GRAY12 },
+    [DAV1D_PIXEL_LAYOUT_I420] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P12 },
+    [DAV1D_PIXEL_LAYOUT_I422] = { AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV422P12 },
+    [DAV1D_PIXEL_LAYOUT_I444] = { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV444P12 },
+};
+
+static const enum AVPixelFormat pix_fmt_rgb[3] = {
+    AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12,
+};
+
+static void libdav1d_log_callback(void *opaque, const char *fmt, va_list vl)
+{
+    AVCodecContext *c = opaque;
+
+    av_vlog(c, AV_LOG_ERROR, fmt, vl);
+}
+
+static int libdav1d_picture_allocator(Dav1dPicture *p, void *cookie)
+{
+    Libdav1dContext *dav1d = cookie;
+    enum AVPixelFormat format = pix_fmt[p->p.layout][p->seq_hdr->hbd];
+    int ret, linesize[4], h = FFALIGN(p->p.h, 128), w = FFALIGN(p->p.w, 128);
+    uint8_t *aligned_ptr, *data[4];
+    AVBufferRef *buf;
+
+    ret = av_image_get_buffer_size(format, w, h, DAV1D_PICTURE_ALIGNMENT);
+    if (ret < 0)
+        return ret;
+
+    if (ret != dav1d->pool_size) {
+        av_buffer_pool_uninit(&dav1d->pool);
+        // Use twice the amount of required padding bytes for aligned_ptr below.
+        dav1d->pool = av_buffer_pool_init(ret + DAV1D_PICTURE_ALIGNMENT * 2, NULL);
+        if (!dav1d->pool) {
+            dav1d->pool_size = 0;
+            return AVERROR(ENOMEM);
+        }
+        dav1d->pool_size = ret;
+    }
+    buf = av_buffer_pool_get(dav1d->pool);
+    if (!buf)
+        return AVERROR(ENOMEM);
+
+    // libdav1d requires DAV1D_PICTURE_ALIGNMENT aligned buffers, which av_malloc()
+    // doesn't guarantee for example when AVX is disabled at configure time.
+    // Use the extra DAV1D_PICTURE_ALIGNMENT padding bytes in the buffer to align it
+    // if required.
+    aligned_ptr = (uint8_t *)FFALIGN((uintptr_t)buf->data, DAV1D_PICTURE_ALIGNMENT);
+    ret = av_image_fill_arrays(data, linesize, aligned_ptr, format, w, h,
+                               DAV1D_PICTURE_ALIGNMENT);
+    if (ret < 0) {
+        av_buffer_unref(&buf);
+        return ret;
+    }
+
+    p->data[0] = data[0];
+    p->data[1] = data[1];
+    p->data[2] = data[2];
+    p->stride[0] = linesize[0];
+    p->stride[1] = linesize[1];
+    p->allocator_data = buf;
+
+    return 0;
+}
+
+static void libdav1d_picture_release(Dav1dPicture *p, void *cookie)
+{
+    AVBufferRef *buf = p->allocator_data;
+
+    av_buffer_unref(&buf);
+}
+
+static void libdav1d_init_params(AVCodecContext *c, const Dav1dSequenceHeader *seq)
+{
+    c->profile = seq->profile;
+    c->level = ((seq->operating_points[0].major_level - 2) << 2)
+               | seq->operating_points[0].minor_level;
+
+    switch (seq->chr) {
+    case DAV1D_CHR_VERTICAL:
+        c->chroma_sample_location = AVCHROMA_LOC_LEFT;
+        break;
+    case DAV1D_CHR_COLOCATED:
+        c->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;
+        break;
+    }
+    c->colorspace = (enum AVColorSpace) seq->mtrx;
+    c->color_primaries = (enum AVColorPrimaries) seq->pri;
+    c->color_trc = (enum AVColorTransferCharacteristic) seq->trc;
+    c->color_range = seq->color_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
+
+    if (seq->layout == DAV1D_PIXEL_LAYOUT_I444 &&
+        seq->mtrx == DAV1D_MC_IDENTITY &&
+        seq->pri  == DAV1D_COLOR_PRI_BT709 &&
+        seq->trc  == DAV1D_TRC_SRGB)
+        c->pix_fmt = pix_fmt_rgb[seq->hbd];
+    else
+        c->pix_fmt = pix_fmt[seq->layout][seq->hbd];
+
+    if (seq->num_units_in_tick && seq->time_scale) {
+        av_reduce(&c->framerate.den, &c->framerate.num,
+                  seq->num_units_in_tick, seq->time_scale, INT_MAX);
+        if (seq->equal_picture_interval)
+            c->ticks_per_frame = seq->num_ticks_per_picture;
+    }
+
+   if (seq->film_grain_present)
+       c->properties |= FF_CODEC_PROPERTY_FILM_GRAIN;
+   else
+       c->properties &= ~FF_CODEC_PROPERTY_FILM_GRAIN;
+}
+
+static av_cold int libdav1d_parse_extradata(AVCodecContext *c)
+{
+    Dav1dSequenceHeader seq;
+    size_t offset = 0;
+    int res;
+
+    if (!c->extradata || c->extradata_size <= 0)
+        return 0;
+
+    if (c->extradata[0] & 0x80) {
+        int version = c->extradata[0] & 0x7F;
+
+        if (version != 1 || c->extradata_size < 4) {
+            int explode = !!(c->err_recognition & AV_EF_EXPLODE);
+            av_log(c, explode ? AV_LOG_ERROR : AV_LOG_WARNING,
+                   "Error decoding extradata\n");
+            return explode ? AVERROR_INVALIDDATA : 0;
+        }
+
+        // Do nothing if there are no configOBUs to parse
+        if (c->extradata_size == 4)
+            return 0;
+
+        offset = 4;
+    }
+
+    res = dav1d_parse_sequence_header(&seq, c->extradata + offset,
+                                      c->extradata_size  - offset);
+    if (res < 0)
+        return 0; // Assume no seqhdr OBUs are present
+
+    libdav1d_init_params(c, &seq);
+    res = ff_set_dimensions(c, seq.max_width, seq.max_height);
+    if (res < 0)
+        return res;
+
+    return 0;
+}
+
+static av_cold int libdav1d_init(AVCodecContext *c)
+{
+    Libdav1dContext *dav1d = c->priv_data;
+    Dav1dSettings s;
+#if FF_DAV1D_VERSION_AT_LEAST(6,0)
+    int threads = c->thread_count;
+#else
+    int threads = (c->thread_count ? c->thread_count : av_cpu_count()) * 3 / 2;
+#endif
+    int res;
+
+    av_log(c, AV_LOG_INFO, "libdav1d %s\n", dav1d_version());
+
+    dav1d_default_settings(&s);
+    s.logger.cookie = c;
+    s.logger.callback = libdav1d_log_callback;
+    s.allocator.cookie = dav1d;
+    s.allocator.alloc_picture_callback = libdav1d_picture_allocator;
+    s.allocator.release_picture_callback = libdav1d_picture_release;
+    s.frame_size_limit = c->max_pixels;
+    if (dav1d->apply_grain >= 0)
+        s.apply_grain = dav1d->apply_grain;
+    else
+        s.apply_grain = !(c->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN);
+
+    s.all_layers = dav1d->all_layers;
+    if (dav1d->operating_point >= 0)
+        s.operating_point = dav1d->operating_point;
+#if FF_DAV1D_VERSION_AT_LEAST(6,2)
+    s.strict_std_compliance = c->strict_std_compliance > 0;
+#endif
+
+#if FF_DAV1D_VERSION_AT_LEAST(6,0)
+    if (dav1d->frame_threads || dav1d->tile_threads)
+        s.n_threads = FFMAX(dav1d->frame_threads, dav1d->tile_threads);
+    else
+        s.n_threads = FFMIN(threads, DAV1D_MAX_THREADS);
+    if (dav1d->max_frame_delay > 0 && (c->flags & AV_CODEC_FLAG_LOW_DELAY))
+        av_log(c, AV_LOG_WARNING, "Low delay mode requested, forcing max_frame_delay 1\n");
+    s.max_frame_delay = (c->flags & AV_CODEC_FLAG_LOW_DELAY) ? 1 : dav1d->max_frame_delay;
+    av_log(c, AV_LOG_DEBUG, "Using %d threads, %d max_frame_delay\n",
+           s.n_threads, s.max_frame_delay);
+#else
+    s.n_tile_threads = dav1d->tile_threads
+                     ? dav1d->tile_threads
+                     : FFMIN(floor(sqrt(threads)), DAV1D_MAX_TILE_THREADS);
+    s.n_frame_threads = dav1d->frame_threads
+                      ? dav1d->frame_threads
+                      : FFMIN(ceil(threads / s.n_tile_threads), DAV1D_MAX_FRAME_THREADS);
+    if (dav1d->max_frame_delay > 0)
+        s.n_frame_threads = FFMIN(s.n_frame_threads, dav1d->max_frame_delay);
+    av_log(c, AV_LOG_DEBUG, "Using %d frame threads, %d tile threads\n",
+           s.n_frame_threads, s.n_tile_threads);
+#endif
+
+#if FF_DAV1D_VERSION_AT_LEAST(6,8)
+    if (c->skip_frame >= AVDISCARD_NONKEY)
+        s.decode_frame_type = DAV1D_DECODEFRAMETYPE_KEY;
+    else if (c->skip_frame >= AVDISCARD_NONINTRA)
+        s.decode_frame_type = DAV1D_DECODEFRAMETYPE_INTRA;
+    else if (c->skip_frame >= AVDISCARD_NONREF)
+        s.decode_frame_type = DAV1D_DECODEFRAMETYPE_REFERENCE;
+#endif
+
+    res = libdav1d_parse_extradata(c);
+    if (res < 0)
+        return res;
+
+    res = dav1d_open(&dav1d->c, &s);
+    if (res < 0)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void libdav1d_flush(AVCodecContext *c)
+{
+    Libdav1dContext *dav1d = c->priv_data;
+
+    dav1d_data_unref(&dav1d->data);
+    dav1d_flush(dav1d->c);
+}
+
+typedef struct OpaqueData {
+    void    *pkt_orig_opaque;
+#if FF_API_REORDERED_OPAQUE
+    int64_t  reordered_opaque;
+#endif
+} OpaqueData;
+
+static void libdav1d_data_free(const uint8_t *data, void *opaque) {
+    AVBufferRef *buf = opaque;
+
+    av_buffer_unref(&buf);
+}
+
+static void libdav1d_user_data_free(const uint8_t *data, void *opaque) {
+    AVPacket *pkt = opaque;
+    av_assert0(data == opaque);
+    av_free(pkt->opaque);
+    av_packet_free(&pkt);
+}
+
+static int libdav1d_receive_frame(AVCodecContext *c, AVFrame *frame)
+{
+    Libdav1dContext *dav1d = c->priv_data;
+    Dav1dData *data = &dav1d->data;
+    Dav1dPicture pic = { 0 }, *p = &pic;
+    AVPacket *pkt;
+    OpaqueData *od = NULL;
+#if FF_DAV1D_VERSION_AT_LEAST(5,1)
+    enum Dav1dEventFlags event_flags = 0;
+#endif
+    int res;
+
+    if (!data->sz) {
+        pkt = av_packet_alloc();
+
+        if (!pkt)
+            return AVERROR(ENOMEM);
+
+        res = ff_decode_get_packet(c, pkt);
+        if (res < 0 && res != AVERROR_EOF) {
+            av_packet_free(&pkt);
+            return res;
+        }
+
+        if (pkt->size) {
+            res = dav1d_data_wrap(data, pkt->data, pkt->size,
+                                  libdav1d_data_free, pkt->buf);
+            if (res < 0) {
+                av_packet_free(&pkt);
+                return res;
+            }
+
+            pkt->buf = NULL;
+
+FF_DISABLE_DEPRECATION_WARNINGS
+            if (
+#if FF_API_REORDERED_OPAQUE
+                c->reordered_opaque != AV_NOPTS_VALUE ||
+#endif
+                (pkt->opaque && (c->flags & AV_CODEC_FLAG_COPY_OPAQUE))) {
+                od = av_mallocz(sizeof(*od));
+                if (!od) {
+                    av_packet_free(&pkt);
+                    dav1d_data_unref(data);
+                    return AVERROR(ENOMEM);
+                }
+                od->pkt_orig_opaque  = pkt->opaque;
+#if FF_API_REORDERED_OPAQUE
+                od->reordered_opaque = c->reordered_opaque;
+#endif
+FF_ENABLE_DEPRECATION_WARNINGS
+            }
+            pkt->opaque = od;
+
+            res = dav1d_data_wrap_user_data(data, (const uint8_t *)pkt,
+                                            libdav1d_user_data_free, pkt);
+            if (res < 0) {
+                av_free(pkt->opaque);
+                av_packet_free(&pkt);
+                dav1d_data_unref(data);
+                return res;
+            }
+            pkt = NULL;
+        } else {
+            av_packet_free(&pkt);
+            if (res >= 0)
+                return AVERROR(EAGAIN);
+        }
+    }
+
+    res = dav1d_send_data(dav1d->c, data);
+    if (res < 0) {
+        if (res == AVERROR(EINVAL))
+            res = AVERROR_INVALIDDATA;
+        if (res != AVERROR(EAGAIN)) {
+            dav1d_data_unref(data);
+            return res;
+        }
+    }
+
+    res = dav1d_get_picture(dav1d->c, p);
+    if (res < 0) {
+        if (res == AVERROR(EINVAL))
+            res = AVERROR_INVALIDDATA;
+        else if (res == AVERROR(EAGAIN) && c->internal->draining)
+            res = AVERROR_EOF;
+
+        return res;
+    }
+
+    av_assert0(p->data[0] && p->allocator_data);
+
+    // This requires the custom allocator above
+    frame->buf[0] = av_buffer_ref(p->allocator_data);
+    if (!frame->buf[0]) {
+        dav1d_picture_unref(p);
+        return AVERROR(ENOMEM);
+    }
+
+    frame->data[0] = p->data[0];
+    frame->data[1] = p->data[1];
+    frame->data[2] = p->data[2];
+    frame->linesize[0] = p->stride[0];
+    frame->linesize[1] = p->stride[1];
+    frame->linesize[2] = p->stride[1];
+
+#if FF_DAV1D_VERSION_AT_LEAST(5,1)
+    dav1d_get_event_flags(dav1d->c, &event_flags);
+    if (c->pix_fmt == AV_PIX_FMT_NONE ||
+        event_flags & DAV1D_EVENT_FLAG_NEW_SEQUENCE)
+#endif
+    libdav1d_init_params(c, p->seq_hdr);
+    res = ff_decode_frame_props(c, frame);
+    if (res < 0)
+        goto fail;
+
+    frame->width = p->p.w;
+    frame->height = p->p.h;
+    if (c->width != p->p.w || c->height != p->p.h) {
+        res = ff_set_dimensions(c, p->p.w, p->p.h);
+        if (res < 0)
+            goto fail;
+    }
+
+    av_reduce(&frame->sample_aspect_ratio.num,
+              &frame->sample_aspect_ratio.den,
+              frame->height * (int64_t)p->frame_hdr->render_width,
+              frame->width  * (int64_t)p->frame_hdr->render_height,
+              INT_MAX);
+    ff_set_sar(c, frame->sample_aspect_ratio);
+
+    pkt = (AVPacket *)p->m.user_data.data;
+    od  = pkt->opaque;
+#if FF_API_REORDERED_OPAQUE
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (od && od->reordered_opaque != AV_NOPTS_VALUE)
+        frame->reordered_opaque = od->reordered_opaque;
+    else
+        frame->reordered_opaque = AV_NOPTS_VALUE;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    // restore the original user opaque value for
+    // ff_decode_frame_props_from_pkt()
+    pkt->opaque = od ? od->pkt_orig_opaque : NULL;
+    av_freep(&od);
+
+    // match timestamps and packet size
+    res = ff_decode_frame_props_from_pkt(c, frame, pkt);
+    pkt->opaque = NULL;
+    if (res < 0)
+        goto fail;
+
+    frame->pkt_dts = pkt->pts;
+    frame->key_frame = p->frame_hdr->frame_type == DAV1D_FRAME_TYPE_KEY;
+
+    switch (p->frame_hdr->frame_type) {
+    case DAV1D_FRAME_TYPE_KEY:
+    case DAV1D_FRAME_TYPE_INTRA:
+        frame->pict_type = AV_PICTURE_TYPE_I;
+        break;
+    case DAV1D_FRAME_TYPE_INTER:
+        frame->pict_type = AV_PICTURE_TYPE_P;
+        break;
+    case DAV1D_FRAME_TYPE_SWITCH:
+        frame->pict_type = AV_PICTURE_TYPE_SP;
+        break;
+    default:
+        res = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    if (p->mastering_display) {
+        AVMasteringDisplayMetadata *mastering = av_mastering_display_metadata_create_side_data(frame);
+        if (!mastering) {
+            res = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        for (int i = 0; i < 3; i++) {
+            mastering->display_primaries[i][0] = av_make_q(p->mastering_display->primaries[i][0], 1 << 16);
+            mastering->display_primaries[i][1] = av_make_q(p->mastering_display->primaries[i][1], 1 << 16);
+        }
+        mastering->white_point[0] = av_make_q(p->mastering_display->white_point[0], 1 << 16);
+        mastering->white_point[1] = av_make_q(p->mastering_display->white_point[1], 1 << 16);
+
+        mastering->max_luminance = av_make_q(p->mastering_display->max_luminance, 1 << 8);
+        mastering->min_luminance = av_make_q(p->mastering_display->min_luminance, 1 << 14);
+
+        mastering->has_primaries = 1;
+        mastering->has_luminance = 1;
+    }
+    if (p->content_light) {
+        AVContentLightMetadata *light = av_content_light_metadata_create_side_data(frame);
+        if (!light) {
+            res = AVERROR(ENOMEM);
+            goto fail;
+        }
+        light->MaxCLL = p->content_light->max_content_light_level;
+        light->MaxFALL = p->content_light->max_frame_average_light_level;
+    }
+    if (p->itut_t35) {
+        GetByteContext gb;
+        unsigned int user_identifier;
+
+        bytestream2_init(&gb, p->itut_t35->payload, p->itut_t35->payload_size);
+        bytestream2_skip(&gb, 1); // terminal provider code
+        bytestream2_skip(&gb, 1); // terminal provider oriented code
+        user_identifier = bytestream2_get_be32(&gb);
+        switch (user_identifier) {
+        case MKBETAG('G', 'A', '9', '4'): { // closed captions
+            AVBufferRef *buf = NULL;
+
+            res = ff_parse_a53_cc(&buf, gb.buffer, bytestream2_get_bytes_left(&gb));
+            if (res < 0)
+                goto fail;
+            if (!res)
+                break;
+
+            if (!av_frame_new_side_data_from_buf(frame, AV_FRAME_DATA_A53_CC, buf))
+                av_buffer_unref(&buf);
+
+            c->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
+            break;
+        }
+        default: // ignore unsupported identifiers
+            break;
+        }
+    }
+    if (p->frame_hdr->film_grain.present && (!dav1d->apply_grain ||
+        (c->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))) {
+        AVFilmGrainParams *fgp = av_film_grain_params_create_side_data(frame);
+        if (!fgp) {
+            res = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        fgp->type = AV_FILM_GRAIN_PARAMS_AV1;
+        fgp->seed = p->frame_hdr->film_grain.data.seed;
+        fgp->codec.aom.num_y_points = p->frame_hdr->film_grain.data.num_y_points;
+        fgp->codec.aom.chroma_scaling_from_luma = p->frame_hdr->film_grain.data.chroma_scaling_from_luma;
+        fgp->codec.aom.scaling_shift = p->frame_hdr->film_grain.data.scaling_shift;
+        fgp->codec.aom.ar_coeff_lag = p->frame_hdr->film_grain.data.ar_coeff_lag;
+        fgp->codec.aom.ar_coeff_shift = p->frame_hdr->film_grain.data.ar_coeff_shift;
+        fgp->codec.aom.grain_scale_shift = p->frame_hdr->film_grain.data.grain_scale_shift;
+        fgp->codec.aom.overlap_flag = p->frame_hdr->film_grain.data.overlap_flag;
+        fgp->codec.aom.limit_output_range = p->frame_hdr->film_grain.data.clip_to_restricted_range;
+
+        memcpy(&fgp->codec.aom.y_points, &p->frame_hdr->film_grain.data.y_points,
+               sizeof(fgp->codec.aom.y_points));
+        memcpy(&fgp->codec.aom.num_uv_points, &p->frame_hdr->film_grain.data.num_uv_points,
+               sizeof(fgp->codec.aom.num_uv_points));
+        memcpy(&fgp->codec.aom.uv_points, &p->frame_hdr->film_grain.data.uv_points,
+               sizeof(fgp->codec.aom.uv_points));
+        memcpy(&fgp->codec.aom.ar_coeffs_y, &p->frame_hdr->film_grain.data.ar_coeffs_y,
+               sizeof(fgp->codec.aom.ar_coeffs_y));
+        memcpy(&fgp->codec.aom.ar_coeffs_uv[0], &p->frame_hdr->film_grain.data.ar_coeffs_uv[0],
+               sizeof(fgp->codec.aom.ar_coeffs_uv[0]));
+        memcpy(&fgp->codec.aom.ar_coeffs_uv[1], &p->frame_hdr->film_grain.data.ar_coeffs_uv[1],
+               sizeof(fgp->codec.aom.ar_coeffs_uv[1]));
+        memcpy(&fgp->codec.aom.uv_mult, &p->frame_hdr->film_grain.data.uv_mult,
+               sizeof(fgp->codec.aom.uv_mult));
+        memcpy(&fgp->codec.aom.uv_mult_luma, &p->frame_hdr->film_grain.data.uv_luma_mult,
+               sizeof(fgp->codec.aom.uv_mult_luma));
+        memcpy(&fgp->codec.aom.uv_offset, &p->frame_hdr->film_grain.data.uv_offset,
+               sizeof(fgp->codec.aom.uv_offset));
+    }
+
+    res = 0;
+fail:
+    dav1d_picture_unref(p);
+    if (res < 0)
+        av_frame_unref(frame);
+    return res;
+}
+
+static av_cold int libdav1d_close(AVCodecContext *c)
+{
+    Libdav1dContext *dav1d = c->priv_data;
+
+    av_buffer_pool_uninit(&dav1d->pool);
+    dav1d_data_unref(&dav1d->data);
+    dav1d_close(&dav1d->c);
+
+    return 0;
+}
+
+#ifndef DAV1D_MAX_FRAME_THREADS
+#define DAV1D_MAX_FRAME_THREADS DAV1D_MAX_THREADS
+#endif
+#ifndef DAV1D_MAX_TILE_THREADS
+#define DAV1D_MAX_TILE_THREADS DAV1D_MAX_THREADS
+#endif
+#ifndef DAV1D_MAX_FRAME_DELAY
+#define DAV1D_MAX_FRAME_DELAY DAV1D_MAX_FRAME_THREADS
+#endif
+
+#define OFFSET(x) offsetof(Libdav1dContext, x)
+#define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption libdav1d_options[] = {
+    { "tilethreads", "Tile threads", OFFSET(tile_threads), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, DAV1D_MAX_TILE_THREADS, VD | AV_OPT_FLAG_DEPRECATED },
+    { "framethreads", "Frame threads", OFFSET(frame_threads), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, DAV1D_MAX_FRAME_THREADS, VD | AV_OPT_FLAG_DEPRECATED },
+    { "max_frame_delay", "Max frame delay", OFFSET(max_frame_delay), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, DAV1D_MAX_FRAME_DELAY, VD },
+    { "filmgrain", "Apply Film Grain", OFFSET(apply_grain), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VD | AV_OPT_FLAG_DEPRECATED },
+    { "oppoint",  "Select an operating point of the scalable bitstream", OFFSET(operating_point), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 31, VD },
+    { "alllayers", "Output all spatial layers", OFFSET(all_layers), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
+    { NULL }
+};
+
+static const AVClass libdav1d_class = {
+    .class_name = "libdav1d decoder",
+    .item_name  = av_default_item_name,
+    .option     = libdav1d_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const FFCodec ff_libdav1d_decoder = {
+    .p.name         = "libdav1d",
+    CODEC_LONG_NAME("dav1d AV1 decoder by VideoLAN"),
+    .p.type         = AVMEDIA_TYPE_VIDEO,
+    .p.id           = AV_CODEC_ID_AV1,
+    .priv_data_size = sizeof(Libdav1dContext),
+    .init           = libdav1d_init,
+    .close          = libdav1d_close,
+    .flush          = libdav1d_flush,
+    FF_CODEC_RECEIVE_FRAME_CB(libdav1d_receive_frame),
+    .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_OTHER_THREADS,
+    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_FRAME_PROPS |
+                      FF_CODEC_CAP_AUTO_THREADS,
+    .p.priv_class   = &libdav1d_class,
+    .p.wrapper_name = "libdav1d",
+};
diff --git a/media/ffvpx/libavcodec/log2_tab.c b/media/ffvpx/libavcodec/log2_tab.c
new file mode 100644
index 0000000000..47a1df03b7
--- /dev/null
+++ b/media/ffvpx/libavcodec/log2_tab.c
@@ -0,0 +1 @@
+#include "libavutil/log2_tab.c"
diff --git a/media/ffvpx/libavcodec/mathops.h b/media/ffvpx/libavcodec/mathops.h
new file mode 100644
index 0000000000..a1dc323304
--- /dev/null
+++ b/media/ffvpx/libavcodec/mathops.h
@@ -0,0 +1,255 @@
+/*
+ * simple math operations
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_MATHOPS_H
+#define AVCODEC_MATHOPS_H
+
+#include <stdint.h>
+
+#include "libavutil/attributes_internal.h"
+#include "libavutil/common.h"
+#include "config.h"
+
+#define MAX_NEG_CROP 1024
+
+extern const uint32_t ff_inverse[257];
+extern const uint8_t ff_log2_run[41];
+extern const uint8_t ff_sqrt_tab[256];
+extern const uint8_t attribute_visibility_hidden ff_crop_tab[256 + 2 * MAX_NEG_CROP];
+extern const uint8_t ff_zigzag_direct[64];
+extern const uint8_t ff_zigzag_scan[16+1];
+
+#if   ARCH_ARM
+#   include "arm/mathops.h"
+#elif ARCH_AVR32
+#   include "avr32/mathops.h"
+#elif ARCH_MIPS
+#   include "mips/mathops.h"
+#elif ARCH_PPC
+#   include "ppc/mathops.h"
+#elif ARCH_X86
+#   include "x86/mathops.h"
+#endif
+
+/* generic implementation */
+
+#ifndef MUL64
+#   define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
+#endif
+
+#ifndef MULL
+#   define MULL(a,b,s) (MUL64(a, b) >> (s))
+#endif
+
+#ifndef MULH
+static av_always_inline int MULH(int a, int b){
+    return MUL64(a, b) >> 32;
+}
+#endif
+
+#ifndef UMULH
+static av_always_inline unsigned UMULH(unsigned a, unsigned b){
+    return ((uint64_t)(a) * (uint64_t)(b))>>32;
+}
+#endif
+
+#ifndef MAC64
+#   define MAC64(d, a, b) ((d) += MUL64(a, b))
+#endif
+
+#ifndef MLS64
+#   define MLS64(d, a, b) ((d) -= MUL64(a, b))
+#endif
+
+/* signed 16x16 -> 32 multiply add accumulate */
+#ifndef MAC16
+#   define MAC16(rt, ra, rb) rt += (ra) * (rb)
+#endif
+
+/* signed 16x16 -> 32 multiply */
+#ifndef MUL16
+#   define MUL16(ra, rb) ((ra) * (rb))
+#endif
+
+#ifndef MLS16
+#   define MLS16(rt, ra, rb) ((rt) -= (ra) * (rb))
+#endif
+
+/* median of 3 */
+#ifndef mid_pred
+#define mid_pred mid_pred
+static inline av_const int mid_pred(int a, int b, int c)
+{
+    if(a>b){
+        if(c>b){
+            if(c>a) b=a;
+            else    b=c;
+        }
+    }else{
+        if(b>c){
+            if(c>a) b=c;
+            else    b=a;
+        }
+    }
+    return b;
+}
+#endif
+
+#ifndef median4
+#define median4 median4
+static inline av_const int median4(int a, int b, int c, int d)
+{
+    if (a < b) {
+        if (c < d) return (FFMIN(b, d) + FFMAX(a, c)) / 2;
+        else       return (FFMIN(b, c) + FFMAX(a, d)) / 2;
+    } else {
+        if (c < d) return (FFMIN(a, d) + FFMAX(b, c)) / 2;
+        else       return (FFMIN(a, c) + FFMAX(b, d)) / 2;
+    }
+}
+#endif
+
+#define FF_SIGNBIT(x) ((x) >> CHAR_BIT * sizeof(x) - 1)
+
+#ifndef sign_extend
+static inline av_const int sign_extend(int val, unsigned bits)
+{
+    unsigned shift = 8 * sizeof(int) - bits;
+    union { unsigned u; int s; } v = { (unsigned) val << shift };
+    return v.s >> shift;
+}
+#endif
+
+#ifndef sign_extend64
+static inline av_const int64_t sign_extend64(int64_t val, unsigned bits)
+{
+    unsigned shift = 8 * sizeof(int64_t) - bits;
+    union { uint64_t u; int64_t s; } v = { (uint64_t) val << shift };
+    return v.s >> shift;
+}
+#endif
+
+#ifndef zero_extend
+static inline av_const unsigned zero_extend(unsigned val, unsigned bits)
+{
+    return (val << ((8 * sizeof(int)) - bits)) >> ((8 * sizeof(int)) - bits);
+}
+#endif
+
+#ifndef COPY3_IF_LT
+#define COPY3_IF_LT(x, y, a, b, c, d)\
+if ((y) < (x)) {\
+    (x) = (y);\
+    (a) = (b);\
+    (c) = (d);\
+}
+#endif
+
+#ifndef MASK_ABS
+#define MASK_ABS(mask, level) do {              \
+        mask  = level >> 31;                    \
+        level = (level ^ mask) - mask;          \
+    } while (0)
+#endif
+
+#ifndef NEG_SSR32
+#   define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s)))
+#endif
+
+#ifndef NEG_USR32
+#   define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
+#endif
+
+#if HAVE_BIGENDIAN
+# ifndef PACK_2U8
+#   define PACK_2U8(a,b)     (((a) <<  8) | (b))
+# endif
+# ifndef PACK_4U8
+#   define PACK_4U8(a,b,c,d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
+# endif
+# ifndef PACK_2U16
+#   define PACK_2U16(a,b)    (((a) << 16) | (b))
+# endif
+#else
+# ifndef PACK_2U8
+#   define PACK_2U8(a,b)     (((b) <<  8) | (a))
+# endif
+# ifndef PACK_4U2
+#   define PACK_4U8(a,b,c,d) (((d) << 24) | ((c) << 16) | ((b) << 8) | (a))
+# endif
+# ifndef PACK_2U16
+#   define PACK_2U16(a,b)    (((b) << 16) | (a))
+# endif
+#endif
+
+#ifndef PACK_2S8
+#   define PACK_2S8(a,b)     PACK_2U8((a)&255, (b)&255)
+#endif
+#ifndef PACK_4S8
+#   define PACK_4S8(a,b,c,d) PACK_4U8((a)&255, (b)&255, (c)&255, (d)&255)
+#endif
+#ifndef PACK_2S16
+#   define PACK_2S16(a,b)    PACK_2U16((a)&0xffff, (b)&0xffff)
+#endif
+
+#ifndef FASTDIV
+#   define FASTDIV(a,b) ((uint32_t)((((uint64_t)a) * ff_inverse[b]) >> 32))
+#endif /* FASTDIV */
+
+#ifndef ff_sqrt
+#define ff_sqrt ff_sqrt
+static inline av_const unsigned int ff_sqrt(unsigned int a)
+{
+    unsigned int b;
+
+    if (a < 255) return (ff_sqrt_tab[a + 1] - 1) >> 4;
+    else if (a < (1 << 12)) b = ff_sqrt_tab[a >> 4] >> 2;
+#if !CONFIG_SMALL
+    else if (a < (1 << 14)) b = ff_sqrt_tab[a >> 6] >> 1;
+    else if (a < (1 << 16)) b = ff_sqrt_tab[a >> 8]   ;
+#endif
+    else {
+        int s = av_log2_16bit(a >> 16) >> 1;
+        unsigned int c = a >> (s + 2);
+        b = ff_sqrt_tab[c >> (s + 8)];
+        b = FASTDIV(c,b) + (b << s);
+    }
+
+    return b - (a < b * b);
+}
+#endif
+
+static inline av_const float ff_sqrf(float a)
+{
+    return a*a;
+}
+
+static inline int8_t ff_u8_to_s8(uint8_t a)
+{
+    union {
+        uint8_t u8;
+        int8_t  s8;
+    } b;
+    b.u8 = a;
+    return b.s8;
+}
+
+#endif /* AVCODEC_MATHOPS_H */
diff --git a/media/ffvpx/libavcodec/mathtables.c b/media/ffvpx/libavcodec/mathtables.c
new file mode 100644
index 0000000000..8b0031eb00
--- /dev/null
+++ b/media/ffvpx/libavcodec/mathtables.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "mathops.h"
+
+/* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256
+ * for a>16909558, is an overestimate by less than 1 part in 1<<24 */
+const uint32_t ff_inverse[257]={
+         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757,
+ 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154,
+ 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709,
+ 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333,
+ 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367,
+ 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283,
+  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315,
+  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085,
+  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498,
+  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675,
+  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441,
+  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183,
+  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712,
+  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400,
+  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163,
+  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641,
+  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573,
+  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737,
+  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493,
+  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373,
+  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368,
+  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671,
+  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767,
+  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740,
+  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751,
+  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635,
+  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593,
+  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944,
+  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933,
+  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575,
+  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532,
+  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
+  16777216
+};
+
+const uint8_t ff_sqrt_tab[256]={
+  0, 16, 23, 28, 32, 36, 40, 43, 46, 48, 51, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 77, 79, 80, 82, 84, 85, 87, 88, 90,
+ 91, 92, 94, 95, 96, 98, 99,100,102,103,104,105,107,108,109,110,111,112,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,144,145,146,147,148,149,150,151,151,152,153,154,155,156,156,
+157,158,159,160,160,161,162,163,164,164,165,166,167,168,168,169,170,171,171,172,173,174,174,175,176,176,177,178,179,179,180,181,
+182,182,183,184,184,185,186,186,187,188,188,189,190,190,191,192,192,193,194,194,195,196,196,197,198,198,199,200,200,201,202,202,
+203,204,204,205,205,206,207,207,208,208,209,210,210,211,212,212,213,213,214,215,215,216,216,217,218,218,219,219,220,220,221,222,
+222,223,223,224,224,225,226,226,227,227,228,228,229,230,230,231,231,232,232,233,233,234,235,235,236,236,237,237,238,238,239,239,
+240,240,241,242,242,243,243,244,244,245,245,246,246,247,247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255,255,255
+};
+
+#define times4(x) x, x, x, x
+#define times256(x) times4(times4(times4(times4(times4(x)))))
+
+const uint8_t ff_crop_tab[256 + 2 * MAX_NEG_CROP] = {
+times256(0x00),
+0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
+0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
+0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
+0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
+0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
+0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
+0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
+0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
+0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
+0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
+0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
+0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
+0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
+0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
+0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
+times256(0xFF)
+};
+
+const uint8_t ff_zigzag_direct[64] = {
+    0,   1,  8, 16,  9,  2,  3, 10,
+    17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+const uint8_t ff_zigzag_scan[16+1] = {
+    0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
+    1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
+    1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
+    3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
+};
+
+const uint8_t ff_log2_run[41] = {
+ 0,  0,  0,  0,  1,  1,  1,  1,
+ 2,  2,  2,  2,  3,  3,  3,  3,
+ 4,  4,  5,  5,  6,  6,  7,  7,
+ 8,  9, 10, 11, 12, 13, 14, 15,
+16, 17, 18, 19, 20, 21, 22, 23,
+24,
+};
diff --git a/media/ffvpx/libavcodec/me_cmp.h b/media/ffvpx/libavcodec/me_cmp.h
new file mode 100644
index 0000000000..90ea76c891
--- /dev/null
+++ b/media/ffvpx/libavcodec/me_cmp.h
@@ -0,0 +1,96 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ME_CMP_H
+#define AVCODEC_ME_CMP_H
+
+#include <stdint.h>
+
+#include "libavutil/attributes_internal.h"
+
+#include "avcodec.h"
+
+extern const uint32_t attribute_visibility_hidden ff_square_tab[512];
+
+
+/* minimum alignment rules ;)
+ * If you notice errors in the align stuff, need more alignment for some ASM code
+ * for some CPU or need to use a function with less aligned data then send a mail
+ * to the ffmpeg-devel mailing list, ...
+ *
+ * !warning These alignments might not match reality, (missing attribute((align))
+ * stuff somewhere possible).
+ * I (Michael) did not check them, these are just the alignments which I think
+ * could be reached easily ...
+ *
+ * !future video codecs might need functions with less strict alignment
+ */
+
+struct MpegEncContext;
+/* Motion estimation:
+ * h is limited to { width / 2, width, 2 * width },
+ * but never larger than 16 and never smaller than 2.
+ * Although currently h < 4 is not used as functions with
+ * width < 8 are neither used nor implemented. */
+typedef int (*me_cmp_func)(struct MpegEncContext *c,
+                           const uint8_t *blk1 /* align width (8 or 16) */,
+                           const uint8_t *blk2 /* align 1 */, ptrdiff_t stride,
+                           int h);
+
+typedef struct MECmpContext {
+    int (*sum_abs_dctelem)(const int16_t *block /* align 16 */);
+
+    me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
+    me_cmp_func sse[6];
+    me_cmp_func hadamard8_diff[6];
+    me_cmp_func dct_sad[6];
+    me_cmp_func quant_psnr[6];
+    me_cmp_func bit[6];
+    me_cmp_func rd[6];
+    me_cmp_func vsad[6];
+    me_cmp_func vsse[6];
+    me_cmp_func nsse[6];
+    me_cmp_func w53[6];
+    me_cmp_func w97[6];
+    me_cmp_func dct_max[6];
+    me_cmp_func dct264_sad[6];
+
+    me_cmp_func me_pre_cmp[6];
+    me_cmp_func me_cmp[6];
+    me_cmp_func me_sub_cmp[6];
+    me_cmp_func mb_cmp[6];
+    me_cmp_func ildct_cmp[6]; // only width 16 used
+    me_cmp_func frame_skip_cmp[6]; // only width 8 used
+
+    me_cmp_func pix_abs[2][4];
+    me_cmp_func median_sad[6];
+} MECmpContext;
+
+void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_aarch64(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_arm(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_mips(MECmpContext *c, AVCodecContext *avctx);
+
+void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type);
+
+void ff_dsputil_init_dwt(MECmpContext *c);
+
+#endif /* AVCODEC_ME_CMP_H */
diff --git a/media/ffvpx/libavcodec/motion_est.h b/media/ffvpx/libavcodec/motion_est.h
new file mode 100644
index 0000000000..f6a563b08c
--- /dev/null
+++ b/media/ffvpx/libavcodec/motion_est.h
@@ -0,0 +1,132 @@
+/*
+ * Motion estimation
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MOTION_EST_H
+#define AVCODEC_MOTION_EST_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+#include "hpeldsp.h"
+#include "qpeldsp.h"
+
+struct MpegEncContext;
+
+#if ARCH_IA64 // Limit static arrays to avoid gcc failing "short data segment overflowed"
+#define MAX_MV 1024
+#else
+#define MAX_MV 4096
+#endif
+#define MAX_DMV (2*MAX_MV)
+#define ME_MAP_SIZE 64
+
+#define FF_ME_ZERO 0
+#define FF_ME_EPZS 1
+#define FF_ME_XONE 2
+
+/**
+ * Motion estimation context.
+ */
+typedef struct MotionEstContext {
+    AVCodecContext *avctx;
+    int skip;                       ///< set if ME is skipped for the current MB
+    int co_located_mv[4][2];        ///< mv from last P-frame for direct mode ME
+    int direct_basis_mv[4][2];
+    uint8_t *scratchpad;            /**< data area for the ME algo, so that
+                                     * the ME does not need to malloc/free. */
+    uint8_t *temp;
+    uint32_t *map;                  ///< map to avoid duplicate evaluations
+    uint32_t *score_map;            ///< map to store the scores
+    unsigned map_generation;
+    int pre_penalty_factor;
+    int penalty_factor;             /**< an estimate of the bits required to
+                                     * code a given mv value, e.g. (1,0) takes
+                                     * more bits than (0,0). We have to
+                                     * estimate whether any reduction in
+                                     * residual is worth the extra bits. */
+    int sub_penalty_factor;
+    int mb_penalty_factor;
+    int flags;
+    int sub_flags;
+    int mb_flags;
+    int pre_pass;                   ///< = 1 for the pre pass
+    int dia_size;
+    int xmin;
+    int xmax;
+    int ymin;
+    int ymax;
+    int pred_x;
+    int pred_y;
+    const uint8_t *src[4][4];
+    const uint8_t *ref[4][4];
+    int stride;
+    int uvstride;
+    /* temp variables for picture complexity calculation */
+    int64_t mc_mb_var_sum_temp;
+    int64_t mb_var_sum_temp;
+    int scene_change_score;
+
+    op_pixels_func(*hpel_put)[4];
+    op_pixels_func(*hpel_avg)[4];
+    qpel_mc_func(*qpel_put)[16];
+    qpel_mc_func(*qpel_avg)[16];
+    const uint8_t (*mv_penalty)[MAX_DMV * 2 + 1]; ///< bit amount needed to encode a MV
+    const uint8_t *current_mv_penalty;
+    int (*sub_motion_search)(struct MpegEncContext *s,
+                             int *mx_ptr, int *my_ptr, int dmin,
+                             int src_index, int ref_index,
+                             int size, int h);
+} MotionEstContext;
+
+static inline int ff_h263_round_chroma(int x)
+{
+    //FIXME static or not?
+    static const uint8_t h263_chroma_roundtab[16] = {
+    //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
+        0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,
+    };
+    return h263_chroma_roundtab[x & 0xf] + (x >> 3);
+}
+
+int ff_init_me(struct MpegEncContext *s);
+
+void ff_estimate_p_frame_motion(struct MpegEncContext *s, int mb_x, int mb_y);
+void ff_estimate_b_frame_motion(struct MpegEncContext *s, int mb_x, int mb_y);
+
+int ff_pre_estimate_p_frame_motion(struct MpegEncContext *s,
+                                   int mb_x, int mb_y);
+
+int ff_epzs_motion_search(struct MpegEncContext *s, int *mx_ptr, int *my_ptr,
+                          int P[10][2], int src_index, int ref_index,
+                          const int16_t (*last_mv)[2], int ref_mv_scale,
+                          int size, int h);
+
+int ff_get_mb_score(struct MpegEncContext *s, int mx, int my, int src_index,
+                    int ref_index, int size, int h, int add_rate);
+
+int ff_get_best_fcode(struct MpegEncContext *s,
+                      const int16_t (*mv_table)[2], int type);
+
+void ff_fix_long_p_mvs(struct MpegEncContext *s, int type);
+void ff_fix_long_mvs(struct MpegEncContext *s, uint8_t *field_select_table,
+                     int field_select, int16_t (*mv_table)[2], int f_code,
+                     int type, int truncate);
+
+#endif /* AVCODEC_MOTION_EST_H */
diff --git a/media/ffvpx/libavcodec/moz.build b/media/ffvpx/libavcodec/moz.build
new file mode 100644
index 0000000000..61d9962a71
--- /dev/null
+++ b/media/ffvpx/libavcodec/moz.build
@@ -0,0 +1,143 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Due to duplicate file names, we compile libavutil/x86 in its own
+# moz.build file.
+if CONFIG['FFVPX_ASFLAGS']:
+    if CONFIG['CPU_ARCH'] == 'x86' or CONFIG['CPU_ARCH'] == 'x86_64':
+        DIRS += ['x86']
+    elif CONFIG['CPU_ARCH'] == 'arm':
+        DIRS += ['arm']
+
+if CONFIG['CPU_ARCH'] == 'aarch64':
+    DIRS += ['aarch64']
+
+SharedLibrary('mozavcodec')
+SOURCES += [
+    'allcodecs.c',
+    'avcodec.c',
+    'avdct.c',
+    'avpacket.c',
+    'bitstream.c',
+    'bitstream_filters.c',
+    'bsf.c',
+    'codec_desc.c',
+    'codec_par.c',
+    'dct.c',
+    'dct32_fixed.c',
+    'dct32_float.c',
+    'decode.c',
+    'encode.c',
+    'faandct.c',
+    'faanidct.c',
+    'fdctdsp.c',
+    'fft_fixed_32.c',
+    'fft_float.c',
+    'fft_init_table.c',
+    'flac.c',
+    'flacdata.c',
+    'flacdec.c',
+    'flacdsp.c',
+    'get_buffer.c',
+    'idctdsp.c',
+    'jfdctfst.c',
+    'jfdctint.c',
+    'jrevdct.c',
+    'log2_tab.c',
+    'mpegaudio.c',
+    'mpegaudiodata.c',
+    'mpegaudiodec_common.c',
+    'mpegaudiodec_fixed.c',
+    'mpegaudiodecheader.c',
+    'mpegaudiodsp.c',
+    'mpegaudiodsp_data.c',
+    'mpegaudiodsp_fixed.c',
+    'mpegaudiodsp_float.c',
+    'mpegaudiotabs.c',
+    'null_bsf.c',
+    'options.c',
+    'parser.c',
+    'parsers.c',
+    'profiles.c',
+    'pthread.c',
+    'pthread_frame.c',
+    'pthread_slice.c',
+    'rdft.c',
+    'reverse.c',
+    'simple_idct.c',
+    'utils.c',
+    'version.c',
+    'vlc.c',
+    'vorbis_parser.c',
+    'xiph.c'
+]
+
+if not CONFIG['MOZ_FFVPX_AUDIOONLY']:
+    SOURCES += [
+        'atsc_a53.c',
+        'av1_frame_split_bsf.c',
+        'av1dec.c',
+        'avpicture.c',
+        'cbs.c',
+        'cbs_av1.c',
+        'golomb.c',
+        'h264pred.c',
+        'imgconvert.c',
+        'libdav1d.c',
+        'mathtables.c',
+        'qsv_api.c',
+        'raw.c',
+        'videodsp.c',
+        'vp8.c',
+        'vp8_parser.c',
+        'vp8dsp.c',
+        'vp9.c',
+        'vp9_parser.c',
+        'vp9_superframe_split_bsf.c',
+        'vp9block.c',
+        'vp9data.c',
+        'vp9dsp.c',
+        'vp9dsp_10bpp.c',
+        'vp9dsp_12bpp.c',
+        'vp9dsp_8bpp.c',
+        'vp9lpf.c',
+        'vp9mvs.c',
+        'vp9prob.c',
+        'vp9recon.c',
+        'vpx_rac.c',
+    ]
+    USE_LIBS += [
+        'dav1d',
+        'media_libdav1d_asm',
+    ]
+    if CONFIG['MOZ_WAYLAND']:
+        LOCAL_INCLUDES += ['/media/mozva']
+        SOURCES += [
+            'vaapi_av1.c',
+            'vaapi_decode.c',
+            'vaapi_vp8.c',
+            'vaapi_vp9.c',
+        ]
+        USE_LIBS += [
+          'mozva'
+        ]
+
+if CONFIG['MOZ_LIBAV_FFT']:
+    SOURCES += [
+        'avfft.c',
+    ]
+
+SYMBOLS_FILE = 'avcodec.symbols'
+NoVisibilityFlags()
+
+USE_LIBS += [
+     'mozavutil'
+]
+
+if CONFIG['OS_TARGET'] != 'WINNT':
+    OS_LIBS += ['m']
+
+include("../ffvpxcommon.mozbuild")
diff --git a/media/ffvpx/libavcodec/mpeg12data.h b/media/ffvpx/libavcodec/mpeg12data.h
new file mode 100644
index 0000000000..bc39655fbf
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpeg12data.h
@@ -0,0 +1,53 @@
+/*
+ * MPEG-1/2 tables
+ * copyright (c) 2000,2001 Fabrice Bellard
+ * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * MPEG-1/2 tables.
+ */
+
+#ifndef AVCODEC_MPEG12DATA_H
+#define AVCODEC_MPEG12DATA_H
+
+#include <stdint.h>
+#include "libavutil/rational.h"
+
+extern const uint16_t ff_mpeg1_default_intra_matrix[];
+extern const uint16_t ff_mpeg1_default_non_intra_matrix[64];
+
+extern const uint16_t ff_mpeg12_vlc_dc_lum_code[12];
+extern const unsigned char ff_mpeg12_vlc_dc_lum_bits[12];
+extern const uint16_t ff_mpeg12_vlc_dc_chroma_code[12];
+extern const unsigned char ff_mpeg12_vlc_dc_chroma_bits[12];
+
+extern const uint8_t ff_mpeg12_mbAddrIncrTable[36][2];
+extern const uint8_t ff_mpeg12_mbPatTable[64][2];
+
+extern const uint8_t ff_mpeg12_mbMotionVectorTable[17][2];
+
+extern const AVRational ff_mpeg12_frame_rate_tab[];
+extern const AVRational ff_mpeg2_frame_rate_tab[];
+
+extern const float ff_mpeg1_aspect[16];
+extern const AVRational ff_mpeg2_aspect[16];
+
+#endif /* AVCODEC_MPEG12DATA_H */
diff --git a/media/ffvpx/libavcodec/mpegaudio.c b/media/ffvpx/libavcodec/mpegaudio.c
new file mode 100644
index 0000000000..cba52992ef
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudio.c
@@ -0,0 +1,50 @@
+/*
+ * MPEG Audio common code
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * MPEG Audio common code.
+ */
+
+#include "mpegaudio.h"
+
+
+/* bitrate is in kb/s */
+int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf)
+{
+    int ch_bitrate, table;
+
+    ch_bitrate = bitrate / nb_channels;
+    if (!lsf) {
+        if ((freq == 48000 && ch_bitrate >= 56) ||
+            (ch_bitrate >= 56 && ch_bitrate <= 80))
+            table = 0;
+        else if (freq != 48000 && ch_bitrate >= 96)
+            table = 1;
+        else if (freq != 32000 && ch_bitrate <= 48)
+            table = 2;
+        else
+            table = 3;
+    } else {
+        table = 4;
+    }
+    return table;
+}
diff --git a/media/ffvpx/libavcodec/mpegaudio.h b/media/ffvpx/libavcodec/mpegaudio.h
new file mode 100644
index 0000000000..74590a8e8b
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudio.h
@@ -0,0 +1,81 @@
+/*
+ * copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * mpeg audio declarations for both encoder and decoder.
+ */
+
+#ifndef AVCODEC_MPEGAUDIO_H
+#define AVCODEC_MPEGAUDIO_H
+
+#ifndef USE_FLOATS
+#   define USE_FLOATS 0
+#endif
+
+#include <stdint.h>
+#include "libavutil/internal.h"
+
+/* max frame size, in samples */
+#define MPA_FRAME_SIZE 1152
+
+/* max compressed frame size */
+#define MPA_MAX_CODED_FRAME_SIZE 1792
+
+#define MPA_MAX_CHANNELS 2
+
+#define SBLIMIT 32 /* number of subbands */
+
+#define MPA_STEREO  0
+#define MPA_JSTEREO 1
+#define MPA_DUAL    2
+#define MPA_MONO    3
+
+#ifndef FRAC_BITS
+#define FRAC_BITS   23   /* fractional bits for sb_samples and dct */
+#define WFRAC_BITS  16   /* fractional bits for window */
+#endif
+
+#define IMDCT_SCALAR 1.759
+
+#define FRAC_ONE    (1 << FRAC_BITS)
+
+#define FIX(a)   ((int)((a) * FRAC_ONE))
+
+#if USE_FLOATS
+#   define INTFLOAT float
+#   define SUINTFLOAT float
+typedef float MPA_INT;
+typedef float OUT_INT;
+#elif FRAC_BITS <= 15
+#   define INTFLOAT int
+#   define SUINTFLOAT SUINT
+typedef int16_t MPA_INT;
+typedef int16_t OUT_INT;
+#else
+#   define INTFLOAT int
+#   define SUINTFLOAT SUINT
+typedef int32_t MPA_INT;
+typedef int16_t OUT_INT;
+#endif
+
+int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
+
+#endif /* AVCODEC_MPEGAUDIO_H */
diff --git a/media/ffvpx/libavcodec/mpegaudio_tablegen.h b/media/ffvpx/libavcodec/mpegaudio_tablegen.h
new file mode 100644
index 0000000000..bae6962ac0
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudio_tablegen.h
@@ -0,0 +1,89 @@
+/*
+ * Header file for hardcoded mpegaudiodec tables
+ *
+ * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGAUDIO_TABLEGEN_H
+#define AVCODEC_MPEGAUDIO_TABLEGEN_H
+
+#include <stdint.h>
+#include <math.h>
+#include "libavutil/attributes.h"
+
+#if CONFIG_HARDCODED_TABLES
+#define mpegaudio_tableinit()
+#include "libavcodec/mpegaudio_tables.h"
+#else
+#if defined(BUILD_TABLES) || !USE_FLOATS
+#define FIXED_TABLE
+static uint32_t exp_table_fixed[512];
+static uint32_t expval_table_fixed[512][16];
+#endif
+
+#if defined(BUILD_TABLES) || USE_FLOATS
+#define FLOAT_TABLE
+static float exp_table_float[512];
+static float expval_table_float[512][16];
+#endif
+
+#define IMDCT_SCALAR 1.759
+
+static av_cold void mpegaudio_tableinit(void)
+{
+    int i, value, exponent;
+    static const double exp2_lut[4] = {
+        1.00000000000000000000, /* 2 ^ (0 * 0.25) */
+        1.18920711500272106672, /* 2 ^ (1 * 0.25) */
+        M_SQRT2               , /* 2 ^ (2 * 0.25) */
+        1.68179283050742908606, /* 2 ^ (3 * 0.25) */
+    };
+    double pow43_lut[16];
+    double exp2_base = 2.11758236813575084767080625169910490512847900390625e-22; // 2^(-72)
+    double exp2_val;
+
+    for (i = 0; i < 16; ++i)
+        pow43_lut[i] = i * cbrt(i);
+
+    for (exponent = 0; exponent < 512; exponent++) {
+        if (exponent && (exponent & 3) == 0)
+            exp2_base *= 2;
+        exp2_val = exp2_base * exp2_lut[exponent & 3] / IMDCT_SCALAR;
+        for (value = 0; value < 16; value++) {
+            double f = pow43_lut[value] * exp2_val;
+#ifdef FIXED_TABLE
+            expval_table_fixed[exponent][value] = (f < 0xFFFFFFFF ? llrint(f) : 0xFFFFFFFF);
+#endif
+#ifdef FLOAT_TABLE
+            expval_table_float[exponent][value] = f;
+#endif
+        }
+#ifdef FIXED_TABLE
+        exp_table_fixed[exponent] = expval_table_fixed[exponent][1];
+#endif
+#ifdef FLOAT_TABLE
+        exp_table_float[exponent] = expval_table_float[exponent][1];
+#endif
+    }
+}
+#undef FLOAT_TABLE
+#undef FIXED_TABLE
+#endif /* CONFIG_HARDCODED_TABLES */
+
+#endif /* AVCODEC_MPEGAUDIO_TABLEGEN_H */
diff --git a/media/ffvpx/libavcodec/mpegaudiodata.c b/media/ffvpx/libavcodec/mpegaudiodata.c
new file mode 100644
index 0000000000..669590908f
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodata.c
@@ -0,0 +1,133 @@
+/*
+ * MPEG Audio common tables
+ * copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * mpeg audio layer common tables.
+ */
+
+#include "mpegaudiodata.h"
+
+/*******************************************************/
+/* layer 2 tables */
+
+const int ff_mpa_sblimit_table[5] = { 27 , 30 , 8, 12 , 30 };
+
+const int ff_mpa_quant_steps[17] = {
+    3,     5,    7,    9,    15,
+    31,    63,  127,  255,   511,
+    1023,  2047, 4095, 8191, 16383,
+    32767, 65535
+};
+
+/* we use a negative value if grouped */
+const int ff_mpa_quant_bits[17] = {
+    -5,  -7,  3, -10, 4,
+     5,  6,  7,  8,  9,
+    10, 11, 12, 13, 14,
+    15, 16
+};
+
+/* encoding tables which give the quantization index. Note how it is
+   possible to store them efficiently ! */
+static const unsigned char alloc_table_1[] = {
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+};
+
+static const unsigned char alloc_table_3[] = {
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+};
+
+static const unsigned char alloc_table_4[] = {
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+};
+
+const unsigned char * const ff_mpa_alloc_tables[5] =
+{ alloc_table_1, alloc_table_1, alloc_table_3, alloc_table_3, alloc_table_4, };
diff --git a/media/ffvpx/libavcodec/mpegaudiodata.h b/media/ffvpx/libavcodec/mpegaudiodata.h
new file mode 100644
index 0000000000..a4148a1ffe
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodata.h
@@ -0,0 +1,82 @@
+/*
+ * MPEG Audio common tables
+ * copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * mpeg audio layer common tables.
+ */
+
+#ifndef AVCODEC_MPEGAUDIODATA_H
+#define AVCODEC_MPEGAUDIODATA_H
+
+#include <stdint.h>
+
+#include "config.h"
+
+#include "vlc.h"
+
+#define MODE_EXT_MS_STEREO 2
+#define MODE_EXT_I_STEREO  1
+
+extern const uint16_t ff_mpa_bitrate_tab[2][3][15];
+extern const uint16_t ff_mpa_freq_tab[3];
+extern const int ff_mpa_sblimit_table[5];
+extern const int ff_mpa_quant_steps[17];
+extern const int ff_mpa_quant_bits[17];
+extern const unsigned char * const ff_mpa_alloc_tables[5];
+
+#define TABLE_4_3_SIZE ((8191 + 16)*4)
+#if CONFIG_HARDCODED_TABLES
+extern const int8_t   ff_table_4_3_exp  [TABLE_4_3_SIZE];
+extern const uint32_t ff_table_4_3_value[TABLE_4_3_SIZE];
+#else
+extern int8_t   ff_table_4_3_exp  [TABLE_4_3_SIZE];
+extern uint32_t ff_table_4_3_value[TABLE_4_3_SIZE];
+#endif
+
+/* VLCs for decoding layer 3 huffman tables */
+extern VLC ff_huff_vlc[16];
+extern VLC ff_huff_quad_vlc[2];
+
+/* layer3 scale factor size */
+extern const uint8_t ff_slen_table[2][16];
+/* number of lsf scale factors for a given size */
+extern const uint8_t ff_lsf_nsf_table[6][3][4];
+extern const uint8_t ff_mpa_huff_data[32][2];
+
+/* band size tables */
+extern const uint8_t ff_band_size_long[9][22];
+extern const uint8_t ff_band_size_short[9][13];
+/* computed from ff_band_size_long */
+extern uint16_t ff_band_index_long[9][23];
+
+extern int16_t *const ff_division_tabs[4];
+
+/* lower 2 bits: modulo 3, higher bits: shift */
+extern uint16_t ff_scale_factor_modshift[64];
+
+extern const uint8_t ff_mpa_pretab[2][22];
+
+/* Initialize tables shared between the fixed and
+ * floating point MPEG audio decoders. */
+void ff_mpegaudiodec_common_init_static(void);
+
+#endif /* AVCODEC_MPEGAUDIODATA_H */
diff --git a/media/ffvpx/libavcodec/mpegaudiodec_common.c b/media/ffvpx/libavcodec/mpegaudiodec_common.c
new file mode 100644
index 0000000000..5fcb39b325
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodec_common.c
@@ -0,0 +1,483 @@
+/*
+ * MPEG Audio decoder
+ * copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * mpeg audio layer decoder tables.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/libm.h"
+#include "libavutil/thread.h"
+
+#include "mpegaudiodata.h"
+
+#include "mpegaudiodec_common_tablegen.h"
+
+uint16_t ff_scale_factor_modshift[64];
+
+static int16_t division_tab3[1 << 6 ];
+static int16_t division_tab5[1 << 8 ];
+static int16_t division_tab9[1 << 11];
+
+int16_t *const ff_division_tabs[4] = {
+    division_tab3, division_tab5, NULL, division_tab9
+};
+
+
+/*******************************************************/
+/* layer 3 tables */
+
+const uint8_t ff_slen_table[2][16] = {
+    { 0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
+    { 0, 1, 2, 3, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 3 },
+};
+
+const uint8_t ff_lsf_nsf_table[6][3][4] = {
+    { {  6,  5,  5, 5 }, {  9,  9,  9, 9 }, {  6,  9,  9, 9 } },
+    { {  6,  5,  7, 3 }, {  9,  9, 12, 6 }, {  6,  9, 12, 6 } },
+    { { 11, 10,  0, 0 }, { 18, 18,  0, 0 }, { 15, 18,  0, 0 } },
+    { {  7,  7,  7, 0 }, { 12, 12, 12, 0 }, {  6, 15, 12, 0 } },
+    { {  6,  6,  6, 3 }, { 12,  9,  9, 6 }, {  6, 12,  9, 6 } },
+    { {  8,  8,  5, 0 }, { 15, 12,  9, 0 }, {  6, 18,  9, 0 } },
+};
+
+/* mpegaudio layer 3 huffman tables */
+VLC ff_huff_vlc[16];
+static VLCElem huff_vlc_tables[128 + 128 + 128 + 130 + 128 + 154 + 166 + 142 +
+                               204 + 190 + 170 + 542 + 460 + 662 + 414];
+VLC ff_huff_quad_vlc[2];
+static VLCElem huff_quad_vlc_tables[64 + 16];
+
+static const uint8_t mpa_hufflens[] = {
+    /* Huffman table 1 - 4 entries */
+     3,  3,  2,  1,
+    /* Huffman table 2 - 9 entries */
+     6,  6,  5,  5,  5,  3,  3,  3,  1,
+    /* Huffman table 3 - 9 entries */
+     6,  6,  5,  5,  5,  3,  2,  2,  2,
+    /* Huffman table 5 - 16 entries */
+     8,  8,  7,  6,  7,  7,  7,  7,  6,  6,  6,  6,  3,  3,  3,  1,
+    /* Huffman table 6 - 16 entries */
+     7,  7,  6,  6,  6,  5,  5,  5,  5,  4,  4,  4,  3,  2,  3,  3,
+    /* Huffman table 7 - 36 entries */
+    10, 10, 10, 10,  9,  9,  9,  9,  8,  8,  9,  9,  8,  9,  9,  8,  8,  7,  7,
+     7,  8,  8,  8,  8,  7,  7,  7,  7,  6,  5,  6,  6,  4,  3,  3,  1,
+    /* Huffman table 8 - 36 entries */
+    11, 11, 10,  9, 10, 10,  9,  9,  9,  8,  8,  9,  9,  9,  9,  8,  8,  8,  7,
+     8,  8,  8,  8,  8,  8,  8,  8,  6,  6,  6,  4,  4,  2,  3,  3,  2,
+    /* Huffman table 9 - 36 entries */
+     9,  9,  8,  8,  9,  9,  8,  8,  8,  8,  7,  7,  7,  8,  8,  7,  7,  7,  7,
+     6,  6,  6,  6,  5,  5,  6,  6,  5,  5,  4,  4,  4,  3,  3,  3,  3,
+    /* Huffman table 10 - 64 entries */
+    11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 11, 11, 10,  9,  9, 10,
+    10,  9,  9, 10, 10,  9, 10, 10,  8,  8,  9,  9, 10, 10,  9,  9, 10, 10,  8,
+     8,  8,  9,  9,  9,  9,  9,  9,  8,  8,  8,  8,  8,  8,  7,  7,  7,  7,  6,
+     6,  6,  6,  4,  3,  3,  1,
+    /* Huffman table 11 - 64 entries */
+    10, 10, 10, 10, 10, 10, 10, 11, 11, 10, 10,  9,  9,  9, 10, 10, 10, 10,  8,
+     8,  9,  9,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9,  8,  7,  8,  8,  7,  7,
+     8,  8,  8,  9,  9,  8,  8,  8,  8,  8,  8,  7,  7,  6,  6,  7,  7,  6,  5,
+     4,  5,  5,  3,  3,  3,  2,
+    /* Huffman table 12 - 64 entries */
+    10, 10,  9,  9,  9,  9,  9,  9,  9,  8,  8,  9,  9,  8,  8,  8,  8,  8,  8,
+     9,  9,  8,  8,  8,  8,  8,  9,  9,  7,  7,  7,  8,  8,  8,  8,  8,  8,  7,
+     7,  7,  7,  8,  8,  7,  7,  7,  6,  6,  6,  6,  7,  7,  6,  5,  5,  5,  4,
+     4,  5,  5,  4,  3,  3,  3,
+    /* Huffman table 13 - 256 entries */
+    19, 19, 18, 17, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 15, 15, 16,
+    16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 15, 16, 16, 14, 14, 15,
+    15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 14, 13, 14,
+    14, 13, 13, 14, 14, 13, 14, 14, 13, 14, 14, 13, 14, 14, 13, 13, 14, 14, 12,
+    12, 12, 13, 13, 13, 13, 13, 13, 12, 13, 13, 12, 12, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 12, 12, 13, 13, 12, 12, 12, 12, 13, 13, 13, 13, 12,
+    13, 13, 12, 11, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 12, 12, 11,
+    11, 12, 12, 11, 12, 12, 12, 12, 11, 11, 12, 12, 11, 12, 12, 11, 12, 12, 11,
+    12, 12, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 11, 11,
+    10, 11, 11, 10, 11, 11, 11, 11, 10, 10, 11, 11, 10, 10, 11, 11, 11, 11, 11,
+    11,  9,  9, 10, 10, 10, 10, 10, 11, 11,  9,  9,  9, 10, 10,  9,  9, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10,  8,  9,  9,  9,  9,  9,  9, 10, 10,  9,  9,
+     9,  8,  8,  9,  9,  9,  9,  9,  9,  8,  7,  8,  8,  8,  8,  7,  7,  7,  7,
+     7,  6,  6,  6,  6,  4,  4,  3,  1,
+    /* Huffman table 15 - 256 entries */
+    13, 13, 13, 13, 12, 13, 13, 13, 13, 13, 13, 12, 13, 13, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13,
+    13, 11, 11, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 12, 12, 11, 11, 11, 11,
+    11, 11, 11, 11, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 11, 11, 11, 11, 11,
+    11, 10, 11, 11, 11, 11, 11, 11, 10, 10, 11, 11, 10, 10, 10, 10, 11, 11, 10,
+    10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 11, 11,  9, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10,  9, 10, 10, 10, 10,  9, 10, 10,  9, 10,
+    10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  9, 10, 10,  9,  9,  9,
+     9,  9,  9, 10, 10,  9,  9,  9,  9,  9,  9,  8,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,  8,
+     8,  8,  9,  9,  8,  8,  8,  8,  8,  8,  8,  9,  9,  8,  7,  8,  8,  7,  7,
+     7,  7,  8,  8,  7,  7,  7,  7,  7,  6,  7,  7,  6,  6,  7,  7,  6,  6,  6,
+     5,  5,  5,  5,  5,  3,  4,  4,  3,
+    /* Huffman table 16 - 256 entries */
+    11, 11, 11, 11, 11, 11, 11, 11, 10, 11, 11, 11, 11, 10, 10, 10, 10, 10,  8,
+    10, 10,  9,  9,  9,  9, 10, 16, 17, 17, 15, 15, 16, 16, 14, 15, 15, 14, 14,
+    15, 15, 14, 14, 15, 15, 15, 15, 14, 15, 15, 14, 13,  8,  9,  9,  8,  8, 13,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 14, 14, 14, 14, 13, 14, 14,
+    13, 13, 13, 14, 14, 14, 14, 13, 13, 14, 14, 13, 14, 14, 12, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 13, 13, 13, 13, 13, 13, 12, 13,
+    13, 12, 12, 13, 13, 11, 12, 12, 12, 12, 12, 12, 12, 13, 13, 11, 12, 12, 12,
+    12, 11, 12, 12, 12, 12, 12, 12, 12, 12, 11, 12, 12, 11, 11, 11, 11, 12, 12,
+    12, 12, 12, 12, 12, 12, 11, 12, 12, 11, 12, 12, 11, 12, 12, 11, 12, 12, 11,
+    10, 10, 11, 11, 11, 11, 11, 11, 10, 10, 11, 11, 10, 10, 11, 11, 11, 11, 11,
+    11, 11, 11, 10, 11, 11, 10, 10, 10, 11, 11, 10, 10, 11, 11, 10, 10, 11, 11,
+    10,  9,  9, 10, 10, 10, 10, 10, 10,  9,  9,  9, 10, 10,  9, 10, 10,  9,  9,
+     8,  9,  9,  9,  9,  9,  9,  9,  9,  8,  8,  9,  9,  8,  8,  7,  7,  8,  8,
+     7,  6,  6,  6,  6,  4,  4,  3,  1,
+    /* Huffman table 24 - 256 entries */
+     8,  8,  8,  8,  8,  8,  8,  8,  7,  8,  8,  7,  7,  8,  8,  7,  7,  7,  7,
+     7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  9, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11,  4, 11, 11, 11, 11, 12, 12, 11, 10, 11, 11, 10, 10, 10, 10, 11, 11, 10,
+    10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 11, 11, 10, 11, 11, 10,  9, 10, 10, 10, 10, 11, 11, 10,  9,  9, 10,
+    10,  9, 10, 10, 10, 10,  9,  9, 10, 10,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9,  9,  9,  9,  9,  9, 10, 10,  9,  9,  9, 10, 10,  8,  9,  9,  8,
+     8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  8,  8,  8,  8,  8,
+     8,  9,  9,  7,  8,  8,  7,  7,  7,  7,  7,  8,  8,  7,  7,  6,  6,  7,  7,
+     6,  5,  5,  6,  6,  4,  4,  4,  4,
+};
+
+static const uint8_t mpa_huffsymbols[] = {
+    /* Huffman table 1 - 4 entries */
+    0x11, 0x01, 0x10, 0x00,
+    /* Huffman table 2 - 9 entries */
+    0x22, 0x02, 0x12, 0x21, 0x20, 0x11, 0x01, 0x10, 0x00,
+    /* Huffman table 3 - 9 entries */
+    0x22, 0x02, 0x12, 0x21, 0x20, 0x10, 0x11, 0x01, 0x00,
+    /* Huffman table 5 - 16 entries */
+    0x33, 0x23, 0x32, 0x31, 0x13, 0x03, 0x30, 0x22, 0x12, 0x21, 0x02, 0x20,
+    0x11, 0x01, 0x10, 0x00,
+    /* Huffman table 6 - 16 entries */
+    0x33, 0x03, 0x23, 0x32, 0x30, 0x13, 0x31, 0x22, 0x02, 0x12, 0x21, 0x20,
+    0x01, 0x11, 0x10, 0x00,
+    /* Huffman table 7 - 36 entries */
+    0x55, 0x45, 0x54, 0x53, 0x35, 0x44, 0x25, 0x52, 0x15, 0x51, 0x05, 0x34,
+    0x50, 0x43, 0x33, 0x24, 0x42, 0x14, 0x41, 0x40, 0x04, 0x23, 0x32, 0x03,
+    0x13, 0x31, 0x30, 0x22, 0x12, 0x21, 0x02, 0x20, 0x11, 0x01, 0x10, 0x00,
+    /* Huffman table 8 - 36 entries */
+    0x55, 0x54, 0x45, 0x53, 0x35, 0x44, 0x25, 0x52, 0x05, 0x15, 0x51, 0x34,
+    0x43, 0x50, 0x33, 0x24, 0x42, 0x14, 0x41, 0x04, 0x40, 0x23, 0x32, 0x13,
+    0x31, 0x03, 0x30, 0x22, 0x02, 0x20, 0x12, 0x21, 0x11, 0x01, 0x10, 0x00,
+    /* Huffman table 9 - 36 entries */
+    0x55, 0x45, 0x35, 0x53, 0x54, 0x05, 0x44, 0x25, 0x52, 0x15, 0x51, 0x34,
+    0x43, 0x50, 0x04, 0x24, 0x42, 0x33, 0x40, 0x14, 0x41, 0x23, 0x32, 0x13,
+    0x31, 0x03, 0x30, 0x22, 0x02, 0x12, 0x21, 0x20, 0x11, 0x01, 0x10, 0x00,
+    /* Huffman table 10 - 64 entries */
+    0x77, 0x67, 0x76, 0x57, 0x75, 0x66, 0x47, 0x74, 0x56, 0x65, 0x37, 0x73,
+    0x46, 0x55, 0x54, 0x63, 0x27, 0x72, 0x64, 0x07, 0x70, 0x62, 0x45, 0x35,
+    0x06, 0x53, 0x44, 0x17, 0x71, 0x36, 0x26, 0x25, 0x52, 0x15, 0x51, 0x34,
+    0x43, 0x16, 0x61, 0x60, 0x05, 0x50, 0x24, 0x42, 0x33, 0x04, 0x14, 0x41,
+    0x40, 0x23, 0x32, 0x03, 0x13, 0x31, 0x30, 0x22, 0x12, 0x21, 0x02, 0x20,
+    0x11, 0x01, 0x10, 0x00,
+    /* Huffman table 11 - 64 entries */
+    0x77, 0x67, 0x76, 0x75, 0x66, 0x47, 0x74, 0x57, 0x55, 0x56, 0x65, 0x37,
+    0x73, 0x46, 0x45, 0x54, 0x35, 0x53, 0x27, 0x72, 0x64, 0x07, 0x71, 0x17,
+    0x70, 0x36, 0x63, 0x60, 0x44, 0x25, 0x52, 0x05, 0x15, 0x62, 0x26, 0x06,
+    0x16, 0x61, 0x51, 0x34, 0x50, 0x43, 0x33, 0x24, 0x42, 0x14, 0x41, 0x04,
+    0x40, 0x23, 0x32, 0x13, 0x31, 0x03, 0x30, 0x22, 0x21, 0x12, 0x02, 0x20,
+    0x11, 0x01, 0x10, 0x00,
+    /* Huffman table 12 - 64 entries */
+    0x77, 0x67, 0x76, 0x57, 0x75, 0x66, 0x47, 0x74, 0x65, 0x56, 0x37, 0x73,
+    0x55, 0x27, 0x72, 0x46, 0x64, 0x17, 0x71, 0x07, 0x70, 0x36, 0x63, 0x45,
+    0x54, 0x44, 0x06, 0x05, 0x26, 0x62, 0x61, 0x16, 0x60, 0x35, 0x53, 0x25,
+    0x52, 0x15, 0x51, 0x34, 0x43, 0x50, 0x04, 0x24, 0x42, 0x14, 0x33, 0x41,
+    0x23, 0x32, 0x40, 0x03, 0x30, 0x13, 0x31, 0x22, 0x12, 0x21, 0x02, 0x20,
+    0x00, 0x11, 0x01, 0x10,
+    /* Huffman table 13 - 256 entries */
+    0xFE, 0xFC, 0xFD, 0xED, 0xFF, 0xEF, 0xDF, 0xEE, 0xCF, 0xDE, 0xBF, 0xFB,
+    0xCE, 0xDC, 0xAF, 0xE9, 0xEC, 0xDD, 0xFA, 0xCD, 0xBE, 0xEB, 0x9F, 0xF9,
+    0xEA, 0xBD, 0xDB, 0x8F, 0xF8, 0xCC, 0xAE, 0x9E, 0x8E, 0x7F, 0x7E, 0xF7,
+    0xDA, 0xAD, 0xBC, 0xCB, 0xF6, 0x6F, 0xE8, 0x5F, 0x9D, 0xD9, 0xF5, 0xE7,
+    0xAC, 0xBB, 0x4F, 0xF4, 0xCA, 0xE6, 0xF3, 0x3F, 0x8D, 0xD8, 0x2F, 0xF2,
+    0x6E, 0x9C, 0x0F, 0xC9, 0x5E, 0xAB, 0x7D, 0xD7, 0x4E, 0xC8, 0xD6, 0x3E,
+    0xB9, 0x9B, 0xAA, 0x1F, 0xF1, 0xF0, 0xBA, 0xE5, 0xE4, 0x8C, 0x6D, 0xE3,
+    0xE2, 0x2E, 0x0E, 0x1E, 0xE1, 0xE0, 0x5D, 0xD5, 0x7C, 0xC7, 0x4D, 0x8B,
+    0xB8, 0xD4, 0x9A, 0xA9, 0x6C, 0xC6, 0x3D, 0xD3, 0x7B, 0x2D, 0xD2, 0x1D,
+    0xB7, 0x5C, 0xC5, 0x99, 0x7A, 0xC3, 0xA7, 0x97, 0x4B, 0xD1, 0x0D, 0xD0,
+    0x8A, 0xA8, 0x4C, 0xC4, 0x6B, 0xB6, 0x3C, 0x2C, 0xC2, 0x5B, 0xB5, 0x89,
+    0x1C, 0xC1, 0x98, 0x0C, 0xC0, 0xB4, 0x6A, 0xA6, 0x79, 0x3B, 0xB3, 0x88,
+    0x5A, 0x2B, 0xA5, 0x69, 0xA4, 0x78, 0x87, 0x94, 0x77, 0x76, 0xB2, 0x1B,
+    0xB1, 0x0B, 0xB0, 0x96, 0x4A, 0x3A, 0xA3, 0x59, 0x95, 0x2A, 0xA2, 0x1A,
+    0xA1, 0x0A, 0x68, 0xA0, 0x86, 0x49, 0x93, 0x39, 0x58, 0x85, 0x67, 0x29,
+    0x92, 0x57, 0x75, 0x38, 0x83, 0x66, 0x47, 0x74, 0x56, 0x65, 0x73, 0x19,
+    0x91, 0x09, 0x90, 0x48, 0x84, 0x72, 0x46, 0x64, 0x28, 0x82, 0x18, 0x37,
+    0x27, 0x17, 0x71, 0x55, 0x07, 0x70, 0x36, 0x63, 0x45, 0x54, 0x26, 0x62,
+    0x35, 0x81, 0x08, 0x80, 0x16, 0x61, 0x06, 0x60, 0x53, 0x44, 0x25, 0x52,
+    0x05, 0x15, 0x51, 0x34, 0x43, 0x50, 0x24, 0x42, 0x33, 0x14, 0x41, 0x04,
+    0x40, 0x23, 0x32, 0x13, 0x31, 0x03, 0x30, 0x22, 0x12, 0x21, 0x02, 0x20,
+    0x11, 0x01, 0x10, 0x00,
+    /* Huffman table 15 - 256 entries */
+    0xFF, 0xEF, 0xFE, 0xDF, 0xEE, 0xFD, 0xCF, 0xFC, 0xDE, 0xED, 0xBF, 0xFB,
+    0xCE, 0xEC, 0xDD, 0xAF, 0xFA, 0xBE, 0xEB, 0xCD, 0xDC, 0x9F, 0xF9, 0xEA,
+    0xBD, 0xDB, 0x8F, 0xF8, 0xCC, 0x9E, 0xE9, 0x7F, 0xF7, 0xAD, 0xDA, 0xBC,
+    0x6F, 0xAE, 0x0F, 0xCB, 0xF6, 0x8E, 0xE8, 0x5F, 0x9D, 0xF5, 0x7E, 0xE7,
+    0xAC, 0xCA, 0xBB, 0xD9, 0x8D, 0x4F, 0xF4, 0x3F, 0xF3, 0xD8, 0xE6, 0x2F,
+    0xF2, 0x6E, 0xF0, 0x1F, 0xF1, 0x9C, 0xC9, 0x5E, 0xAB, 0xBA, 0xE5, 0x7D,
+    0xD7, 0x4E, 0xE4, 0x8C, 0xC8, 0x3E, 0x6D, 0xD6, 0xE3, 0x9B, 0xB9, 0x2E,
+    0xAA, 0xE2, 0x1E, 0xE1, 0x0E, 0xE0, 0x5D, 0xD5, 0x7C, 0xC7, 0x4D, 0x8B,
+    0xD4, 0xB8, 0x9A, 0xA9, 0x6C, 0xC6, 0x3D, 0xD3, 0xD2, 0x2D, 0x0D, 0x1D,
+    0x7B, 0xB7, 0xD1, 0x5C, 0xD0, 0xC5, 0x8A, 0xA8, 0x4C, 0xC4, 0x6B, 0xB6,
+    0x99, 0x0C, 0x3C, 0xC3, 0x7A, 0xA7, 0xA6, 0xC0, 0x0B, 0xC2, 0x2C, 0x5B,
+    0xB5, 0x1C, 0x89, 0x98, 0xC1, 0x4B, 0xB4, 0x6A, 0x3B, 0x79, 0xB3, 0x97,
+    0x88, 0x2B, 0x5A, 0xB2, 0xA5, 0x1B, 0xB1, 0xB0, 0x69, 0x96, 0x4A, 0xA4,
+    0x78, 0x87, 0x3A, 0xA3, 0x59, 0x95, 0x2A, 0xA2, 0x1A, 0xA1, 0x0A, 0xA0,
+    0x68, 0x86, 0x49, 0x94, 0x39, 0x93, 0x77, 0x09, 0x58, 0x85, 0x29, 0x67,
+    0x76, 0x92, 0x91, 0x19, 0x90, 0x48, 0x84, 0x57, 0x75, 0x38, 0x83, 0x66,
+    0x47, 0x28, 0x82, 0x18, 0x81, 0x74, 0x08, 0x80, 0x56, 0x65, 0x37, 0x73,
+    0x46, 0x27, 0x72, 0x64, 0x17, 0x55, 0x71, 0x07, 0x70, 0x36, 0x63, 0x45,
+    0x54, 0x26, 0x62, 0x16, 0x06, 0x60, 0x35, 0x61, 0x53, 0x44, 0x25, 0x52,
+    0x15, 0x51, 0x05, 0x50, 0x34, 0x43, 0x24, 0x42, 0x33, 0x41, 0x14, 0x04,
+    0x23, 0x32, 0x40, 0x03, 0x13, 0x31, 0x30, 0x22, 0x12, 0x21, 0x02, 0x20,
+    0x11, 0x01, 0x10, 0x00,
+    /* Huffman table 16 - 256 entries */
+    0xEF, 0xFE, 0xDF, 0xFD, 0xCF, 0xFC, 0xBF, 0xFB, 0xAF, 0xFA, 0x9F, 0xF9,
+    0xF8, 0x8F, 0x7F, 0xF7, 0x6F, 0xF6, 0xFF, 0x5F, 0xF5, 0x4F, 0xF4, 0xF3,
+    0xF0, 0x3F, 0xCE, 0xEC, 0xDD, 0xDE, 0xE9, 0xEA, 0xD9, 0xEE, 0xED, 0xEB,
+    0xBE, 0xCD, 0xDC, 0xDB, 0xAE, 0xCC, 0xAD, 0xDA, 0x7E, 0xAC, 0xCA, 0xC9,
+    0x7D, 0x5E, 0xBD, 0xF2, 0x2F, 0x0F, 0x1F, 0xF1, 0x9E, 0xBC, 0xCB, 0x8E,
+    0xE8, 0x9D, 0xE7, 0xBB, 0x8D, 0xD8, 0x6E, 0xE6, 0x9C, 0xAB, 0xBA, 0xE5,
+    0xD7, 0x4E, 0xE4, 0x8C, 0xC8, 0x3E, 0x6D, 0xD6, 0x9B, 0xB9, 0xAA, 0xE1,
+    0xD4, 0xB8, 0xA9, 0x7B, 0xB7, 0xD0, 0xE3, 0x0E, 0xE0, 0x5D, 0xD5, 0x7C,
+    0xC7, 0x4D, 0x8B, 0x9A, 0x6C, 0xC6, 0x3D, 0x5C, 0xC5, 0x0D, 0x8A, 0xA8,
+    0x99, 0x4C, 0xB6, 0x7A, 0x3C, 0x5B, 0x89, 0x1C, 0xC0, 0x98, 0x79, 0xE2,
+    0x2E, 0x1E, 0xD3, 0x2D, 0xD2, 0xD1, 0x3B, 0x97, 0x88, 0x1D, 0xC4, 0x6B,
+    0xC3, 0xA7, 0x2C, 0xC2, 0xB5, 0xC1, 0x0C, 0x4B, 0xB4, 0x6A, 0xA6, 0xB3,
+    0x5A, 0xA5, 0x2B, 0xB2, 0x1B, 0xB1, 0x0B, 0xB0, 0x69, 0x96, 0x4A, 0xA4,
+    0x78, 0x87, 0xA3, 0x3A, 0x59, 0x2A, 0x95, 0x68, 0xA1, 0x86, 0x77, 0x94,
+    0x49, 0x57, 0x67, 0xA2, 0x1A, 0x0A, 0xA0, 0x39, 0x93, 0x58, 0x85, 0x29,
+    0x92, 0x76, 0x09, 0x19, 0x91, 0x90, 0x48, 0x84, 0x75, 0x38, 0x83, 0x66,
+    0x28, 0x82, 0x47, 0x74, 0x18, 0x81, 0x80, 0x08, 0x56, 0x37, 0x73, 0x65,
+    0x46, 0x27, 0x72, 0x64, 0x55, 0x07, 0x17, 0x71, 0x70, 0x36, 0x63, 0x45,
+    0x54, 0x26, 0x62, 0x16, 0x61, 0x06, 0x60, 0x53, 0x35, 0x44, 0x25, 0x52,
+    0x51, 0x15, 0x05, 0x34, 0x43, 0x50, 0x24, 0x42, 0x33, 0x14, 0x41, 0x04,
+    0x40, 0x23, 0x32, 0x13, 0x31, 0x03, 0x30, 0x22, 0x12, 0x21, 0x02, 0x20,
+    0x11, 0x01, 0x10, 0x00,
+    /* Huffman table 24 - 256 entries */
+    0xEF, 0xFE, 0xDF, 0xFD, 0xCF, 0xFC, 0xBF, 0xFB, 0xFA, 0xAF, 0x9F, 0xF9,
+    0xF8, 0x8F, 0x7F, 0xF7, 0x6F, 0xF6, 0x5F, 0xF5, 0x4F, 0xF4, 0x3F, 0xF3,
+    0x2F, 0xF2, 0xF1, 0x1F, 0xF0, 0x0F, 0xEE, 0xDE, 0xED, 0xCE, 0xEC, 0xDD,
+    0xBE, 0xEB, 0xCD, 0xDC, 0xAE, 0xEA, 0xBD, 0xDB, 0xCC, 0x9E, 0xE9, 0xAD,
+    0xDA, 0xBC, 0xCB, 0x8E, 0xE8, 0x9D, 0xD9, 0x7E, 0xE7, 0xAC, 0xFF, 0xCA,
+    0xBB, 0x8D, 0xD8, 0x0E, 0xE0, 0x0D, 0xE6, 0x6E, 0x9C, 0xC9, 0x5E, 0xBA,
+    0xE5, 0xAB, 0x7D, 0xD7, 0xE4, 0x8C, 0xC8, 0x4E, 0x2E, 0x3E, 0x6D, 0xD6,
+    0xE3, 0x9B, 0xB9, 0xAA, 0xE2, 0x1E, 0xE1, 0x5D, 0xD5, 0x7C, 0xC7, 0x4D,
+    0x8B, 0xB8, 0xD4, 0x9A, 0xA9, 0x6C, 0xC6, 0x3D, 0xD3, 0x2D, 0xD2, 0x1D,
+    0x7B, 0xB7, 0xD1, 0x5C, 0xC5, 0x8A, 0xA8, 0x99, 0x4C, 0xC4, 0x6B, 0xB6,
+    0xD0, 0x0C, 0x3C, 0xC3, 0x7A, 0xA7, 0x2C, 0xC2, 0x5B, 0xB5, 0x1C, 0x89,
+    0x98, 0xC1, 0x4B, 0xC0, 0x0B, 0x3B, 0xB0, 0x0A, 0x1A, 0xB4, 0x6A, 0xA6,
+    0x79, 0x97, 0xA0, 0x09, 0x90, 0xB3, 0x88, 0x2B, 0x5A, 0xB2, 0xA5, 0x1B,
+    0xB1, 0x69, 0x96, 0xA4, 0x4A, 0x78, 0x87, 0x3A, 0xA3, 0x59, 0x95, 0x2A,
+    0xA2, 0xA1, 0x68, 0x86, 0x77, 0x49, 0x94, 0x39, 0x93, 0x58, 0x85, 0x29,
+    0x67, 0x76, 0x92, 0x19, 0x91, 0x48, 0x84, 0x57, 0x75, 0x38, 0x83, 0x66,
+    0x28, 0x82, 0x18, 0x47, 0x74, 0x81, 0x08, 0x80, 0x56, 0x65, 0x17, 0x07,
+    0x70, 0x73, 0x37, 0x27, 0x72, 0x46, 0x64, 0x55, 0x71, 0x36, 0x63, 0x45,
+    0x54, 0x26, 0x62, 0x16, 0x61, 0x06, 0x60, 0x35, 0x53, 0x44, 0x25, 0x52,
+    0x15, 0x05, 0x50, 0x51, 0x34, 0x43, 0x24, 0x42, 0x33, 0x14, 0x41, 0x04,
+    0x40, 0x23, 0x32, 0x13, 0x31, 0x03, 0x30, 0x22, 0x12, 0x21, 0x02, 0x20,
+    0x11, 0x01, 0x10, 0x00,
+};
+
+static const uint8_t mpa_huff_sizes_minus_one[] =
+{
+    3, 8, 8, 15, 15, 35, 35, 35, 63, 63, 63, 255, 255, 255, 255
+};
+
+const uint8_t ff_mpa_huff_data[32][2] = {
+{ 0, 0 },
+{ 1, 0 },
+{ 2, 0 },
+{ 3, 0 },
+{ 0, 0 },
+{ 4, 0 },
+{ 5, 0 },
+{ 6, 0 },
+{ 7, 0 },
+{ 8, 0 },
+{ 9, 0 },
+{ 10, 0 },
+{ 11, 0 },
+{ 12, 0 },
+{ 0, 0 },
+{ 13, 0 },
+{ 14, 1 },
+{ 14, 2 },
+{ 14, 3 },
+{ 14, 4 },
+{ 14, 6 },
+{ 14, 8 },
+{ 14, 10 },
+{ 14, 13 },
+{ 15, 4 },
+{ 15, 5 },
+{ 15, 6 },
+{ 15, 7 },
+{ 15, 8 },
+{ 15, 9 },
+{ 15, 11 },
+{ 15, 13 },
+};
+
+
+/* huffman tables for quadrules */
+static const uint8_t mpa_quad_codes[2][16] = {
+    {  1,  5,  4,  5,  6,  5,  4,  4, 7,  3,  6,  0,  7,  2,  3,  1, },
+    { 15, 14, 13, 12, 11, 10,  9,  8, 7,  6,  5,  4,  3,  2,  1,  0, },
+};
+
+static const uint8_t mpa_quad_bits[2][16] = {
+    { 1, 4, 4, 5, 4, 6, 5, 6, 4, 5, 5, 6, 5, 6, 6, 6, },
+    { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, },
+};
+
+const uint8_t ff_band_size_long[9][22] = {
+{ 4, 4, 4, 4, 4, 4, 6, 6, 8, 8, 10,
+  12, 16, 20, 24, 28, 34, 42, 50, 54, 76, 158, }, /* 44100 */
+{ 4, 4, 4, 4, 4, 4, 6, 6, 6, 8, 10,
+  12, 16, 18, 22, 28, 34, 40, 46, 54, 54, 192, }, /* 48000 */
+{ 4, 4, 4, 4, 4, 4, 6, 6, 8, 10, 12,
+  16, 20, 24, 30, 38, 46, 56, 68, 84, 102, 26, }, /* 32000 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 22050 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  18, 22, 26, 32, 38, 46, 54, 62, 70, 76, 36, }, /* 24000 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 16000 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 11025 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 12000 */
+{ 12, 12, 12, 12, 12, 12, 16, 20, 24, 28, 32,
+  40, 48, 56, 64, 76, 90, 2, 2, 2, 2, 2, }, /* 8000 */
+};
+
+const uint8_t ff_band_size_short[9][13] = {
+{ 4, 4, 4, 4, 6, 8, 10, 12, 14, 18, 22, 30, 56, }, /* 44100 */
+{ 4, 4, 4, 4, 6, 6, 10, 12, 14, 16, 20, 26, 66, }, /* 48000 */
+{ 4, 4, 4, 4, 6, 8, 12, 16, 20, 26, 34, 42, 12, }, /* 32000 */
+{ 4, 4, 4, 6, 6, 8, 10, 14, 18, 26, 32, 42, 18, }, /* 22050 */
+{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 32, 44, 12, }, /* 24000 */
+{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 30, 40, 18, }, /* 16000 */
+{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 30, 40, 18, }, /* 11025 */
+{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 30, 40, 18, }, /* 12000 */
+{ 8, 8, 8, 12, 16, 20, 24, 28, 36, 2, 2, 2, 26, }, /* 8000 */
+};
+
+uint16_t ff_band_index_long[9][23];
+
+const uint8_t ff_mpa_pretab[2][22] = {
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 2, 0 },
+};
+
+static av_cold void mpegaudiodec_common_init_static(void)
+{
+    const uint8_t *huff_sym = mpa_huffsymbols, *huff_lens = mpa_hufflens;
+    int offset;
+
+    /* scale factors table for layer 1/2 */
+    for (int i = 0; i < 64; i++) {
+        int shift, mod;
+        /* 1.0 (i = 3) is normalized to 2 ^ FRAC_BITS */
+        shift = i / 3;
+        mod   = i % 3;
+        ff_scale_factor_modshift[i] = mod | (shift << 2);
+    }
+
+    /* huffman decode tables */
+    offset = 0;
+    for (int i = 0; i < 15;) {
+        uint16_t tmp_symbols[256];
+        int nb_codes_minus_one = mpa_huff_sizes_minus_one[i];
+        int j;
+
+        for (j = 0; j <= nb_codes_minus_one; j++) {
+            uint8_t high = huff_sym[j] & 0xF0, low = huff_sym[j] & 0xF;
+
+            tmp_symbols[j] = high << 1 | ((high && low) << 4) | low;
+        }
+
+        ff_huff_vlc[++i].table         = huff_vlc_tables + offset;
+        ff_huff_vlc[i].table_allocated = FF_ARRAY_ELEMS(huff_vlc_tables) - offset;
+        ff_init_vlc_from_lengths(&ff_huff_vlc[i], 7, j,
+                                 huff_lens, 1, tmp_symbols, 2, 2,
+                                 0, INIT_VLC_STATIC_OVERLONG, NULL);
+        offset    += ff_huff_vlc[i].table_size;
+        huff_lens += j;
+        huff_sym  += j;
+    }
+    av_assert0(offset == FF_ARRAY_ELEMS(huff_vlc_tables));
+
+    offset = 0;
+    for (int i = 0; i < 2; i++) {
+        int bits = i == 0 ? 6 : 4;
+        ff_huff_quad_vlc[i].table = huff_quad_vlc_tables + offset;
+        ff_huff_quad_vlc[i].table_allocated = 1 << bits;
+        offset                             += 1 << bits;
+        init_vlc(&ff_huff_quad_vlc[i], bits, 16,
+                 mpa_quad_bits[i], 1, 1, mpa_quad_codes[i], 1, 1,
+                 INIT_VLC_USE_NEW_STATIC);
+    }
+    av_assert0(offset == FF_ARRAY_ELEMS(huff_quad_vlc_tables));
+
+    for (int i = 0; i < 9; i++) {
+        int k = 0;
+        for (int j = 0; j < 22; j++) {
+            ff_band_index_long[i][j] = k;
+            k += ff_band_size_long[i][j] >> 1;
+        }
+        ff_band_index_long[i][22] = k;
+    }
+
+    for (int i = 0; i < 4; i++) {
+        if (ff_mpa_quant_bits[i] < 0) {
+            for (int j = 0; j < (1 << (-ff_mpa_quant_bits[i] + 1)); j++) {
+                int val1, val2, val3, steps;
+                int val = j;
+                steps   = ff_mpa_quant_steps[i];
+                val1    = val % steps;
+                val    /= steps;
+                val2    = val % steps;
+                val3    = val / steps;
+                ff_division_tabs[i][j] = val1 + (val2 << 4) + (val3 << 8);
+            }
+        }
+    }
+    mpegaudiodec_common_tableinit();
+}
+
+av_cold void ff_mpegaudiodec_common_init_static(void)
+{
+    static AVOnce init_static_once = AV_ONCE_INIT;
+
+    ff_thread_once(&init_static_once, mpegaudiodec_common_init_static);
+}
diff --git a/media/ffvpx/libavcodec/mpegaudiodec_common_tablegen.h b/media/ffvpx/libavcodec/mpegaudiodec_common_tablegen.h
new file mode 100644
index 0000000000..bf402c9d84
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodec_common_tablegen.h
@@ -0,0 +1,72 @@
+/*
+ * Header file for hardcoded shared mpegaudiodec tables
+ *
+ * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
+ * Copyright (c) 2020 Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGAUDIODEC_COMMON_TABLEGEN_H
+#define AVCODEC_MPEGAUDIODEC_COMMON_TABLEGEN_H
+
+#include <stdint.h>
+
+#define TABLE_4_3_SIZE ((8191 + 16)*4)
+
+#if CONFIG_HARDCODED_TABLES
+#define mpegaudiodec_common_tableinit()
+#include "libavcodec/mpegaudiodec_common_tables.h"
+#else
+#include <math.h>
+#include "libavutil/attributes.h"
+
+int8_t   ff_table_4_3_exp  [TABLE_4_3_SIZE];
+uint32_t ff_table_4_3_value[TABLE_4_3_SIZE];
+
+#define FRAC_BITS 23
+#define IMDCT_SCALAR 1.759
+
+static av_cold void mpegaudiodec_common_tableinit(void)
+{
+    static const double exp2_lut[4] = {
+        1.00000000000000000000, /* 2 ^ (0 * 0.25) */
+        1.18920711500272106672, /* 2 ^ (1 * 0.25) */
+        M_SQRT2               , /* 2 ^ (2 * 0.25) */
+        1.68179283050742908606, /* 2 ^ (3 * 0.25) */
+    };
+    double pow43_val = 0;
+
+    for (int i = 1; i < TABLE_4_3_SIZE; i++) {
+        double f, fm;
+        int e, m;
+        double value = i / 4;
+        if ((i & 3) == 0)
+            pow43_val = value / IMDCT_SCALAR * cbrt(value);
+        f  = pow43_val * exp2_lut[i & 3];
+        fm = frexp(f, &e);
+        m  = llrint(fm * (1LL << 31));
+        e += FRAC_BITS - 31 + 5 - 100;
+
+        /* normalized to FRAC_BITS */
+        ff_table_4_3_value[i] =  m;
+        ff_table_4_3_exp  [i] = -e;
+    }
+}
+
+#endif /* CONFIG_HARDCODED_TABLES */
+#endif /* AVCODEC_MPEGAUDIODEC_COMMON_TABLEGEN_H */
diff --git a/media/ffvpx/libavcodec/mpegaudiodec_fixed.c b/media/ffvpx/libavcodec/mpegaudiodec_fixed.c
new file mode 100644
index 0000000000..b5b6822a19
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodec_fixed.c
@@ -0,0 +1,148 @@
+/*
+ * Fixed-point MPEG audio decoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "config_components.h"
+#include "libavutil/samplefmt.h"
+
+#define USE_FLOATS 0
+
+#include "codec_internal.h"
+#include "mpegaudio.h"
+
+#define SHR(a,b)       (((int)(a))>>(b))
+/* WARNING: only correct for positive numbers */
+#define FIXR_OLD(a)    ((int)((a) * FRAC_ONE + 0.5))
+#define FIXR(a)        ((int)((a) * FRAC_ONE + 0.5))
+#define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
+#define MULH3(x, y, s) MULH((s)*(x), y)
+#define MULLx(x, y, s) MULL((int)(x),(y),s)
+#define RENAME(a)      a ## _fixed
+#define OUT_FMT   AV_SAMPLE_FMT_S16
+#define OUT_FMT_P AV_SAMPLE_FMT_S16P
+
+/* Intensity stereo table. See commit b91d46614df189e7905538e7f5c4ed9c7ed0d274
+ * (float based mp1/mp2/mp3 decoders.) for how they were created. */
+static const int32_t is_table[2][16] = {
+    { 0x000000, 0x1B0CB1, 0x2ED9EC, 0x400000, 0x512614, 0x64F34F, 0x800000 },
+    { 0x800000, 0x64F34F, 0x512614, 0x400000, 0x2ED9EC, 0x1B0CB1, 0x000000 }
+};
+
+/* Antialiasing table. See commit ce4a29c066cddfc180979ed86396812f24337985
+ * (optimize antialias) for how they were created. */
+static const int32_t csa_table[8][4] = {
+    { 0x36E129F8, 0xDF128056, 0x15F3AA4E, 0xA831565E },
+    { 0x386E75F2, 0xE1CF24A5, 0x1A3D9A97, 0xA960AEB3 },
+    { 0x3CC6B73A, 0xEBF19FA6, 0x28B856E0, 0xAF2AE86C },
+    { 0x3EEEA054, 0xF45B88BC, 0x334A2910, 0xB56CE868 },
+    { 0x3FB6905C, 0xF9F27F18, 0x39A90F74, 0xBA3BEEBC },
+    { 0x3FF23F20, 0xFD60D1E4, 0x3D531104, 0xBD6E92C4 },
+    { 0x3FFE5932, 0xFF175EE4, 0x3F15B816, 0xBF1905B2 },
+    { 0x3FFFE34A, 0xFFC3612F, 0x3FC34479, 0xBFC37DE5 }
+};
+
+#include "mpegaudiodec_template.c"
+
+#if CONFIG_MP1_DECODER
+const FFCodec ff_mp1_decoder = {
+    .p.name         = "mp1",
+    CODEC_LONG_NAME("MP1 (MPEG audio layer 1)"),
+    .p.type         = AVMEDIA_TYPE_AUDIO,
+    .p.id           = AV_CODEC_ID_MP1,
+    .priv_data_size = sizeof(MPADecodeContext),
+    .init           = decode_init,
+    FF_CODEC_DECODE_CB(decode_frame),
+    .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF |
+                      AV_CODEC_CAP_DR1,
+    .flush          = flush,
+    .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_S16,
+                                                      AV_SAMPLE_FMT_NONE },
+};
+#endif
+#if CONFIG_MP2_DECODER
+const FFCodec ff_mp2_decoder = {
+    .p.name         = "mp2",
+    CODEC_LONG_NAME("MP2 (MPEG audio layer 2)"),
+    .p.type         = AVMEDIA_TYPE_AUDIO,
+    .p.id           = AV_CODEC_ID_MP2,
+    .priv_data_size = sizeof(MPADecodeContext),
+    .init           = decode_init,
+    FF_CODEC_DECODE_CB(decode_frame),
+    .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF |
+                      AV_CODEC_CAP_DR1,
+    .flush          = flush,
+    .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_S16,
+                                                      AV_SAMPLE_FMT_NONE },
+};
+#endif
+#if CONFIG_MP3_DECODER
+const FFCodec ff_mp3_decoder = {
+    .p.name         = "mp3",
+    CODEC_LONG_NAME("MP3 (MPEG audio layer 3)"),
+    .p.type         = AVMEDIA_TYPE_AUDIO,
+    .p.id           = AV_CODEC_ID_MP3,
+    .priv_data_size = sizeof(MPADecodeContext),
+    .init           = decode_init,
+    FF_CODEC_DECODE_CB(decode_frame),
+    .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF |
+                      AV_CODEC_CAP_DR1,
+    .flush          = flush,
+    .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_S16,
+                                                      AV_SAMPLE_FMT_NONE },
+};
+#endif
+#if CONFIG_MP3ADU_DECODER
+const FFCodec ff_mp3adu_decoder = {
+    .p.name         = "mp3adu",
+    CODEC_LONG_NAME("ADU (Application Data Unit) MP3 (MPEG audio layer 3)"),
+    .p.type         = AVMEDIA_TYPE_AUDIO,
+    .p.id           = AV_CODEC_ID_MP3ADU,
+    .priv_data_size = sizeof(MPADecodeContext),
+    .init           = decode_init,
+    FF_CODEC_DECODE_CB(decode_frame_adu),
+    .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF |
+                      AV_CODEC_CAP_DR1,
+    .flush          = flush,
+    .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_S16,
+                                                      AV_SAMPLE_FMT_NONE },
+};
+#endif
+#if CONFIG_MP3ON4_DECODER
+const FFCodec ff_mp3on4_decoder = {
+    .p.name         = "mp3on4",
+    CODEC_LONG_NAME("MP3onMP4"),
+    .p.type         = AVMEDIA_TYPE_AUDIO,
+    .p.id           = AV_CODEC_ID_MP3ON4,
+    .priv_data_size = sizeof(MP3On4DecodeContext),
+    .init           = decode_init_mp3on4,
+    .close          = decode_close_mp3on4,
+    FF_CODEC_DECODE_CB(decode_frame_mp3on4),
+    .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF |
+                      AV_CODEC_CAP_DR1,
+    .flush          = flush_mp3on4,
+    .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_NONE },
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+};
+#endif
diff --git a/media/ffvpx/libavcodec/mpegaudiodec_template.c b/media/ffvpx/libavcodec/mpegaudiodec_template.c
new file mode 100644
index 0000000000..3e4ee79be6
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodec_template.c
@@ -0,0 +1,1899 @@
+/*
+ * MPEG Audio decoder
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * MPEG Audio decoder
+ */
+
+#include "config_components.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/crc.h"
+#include "libavutil/float_dsp.h"
+#include "libavutil/libm.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/thread.h"
+
+#include "avcodec.h"
+#include "decode.h"
+#include "get_bits.h"
+#include "mathops.h"
+#include "mpegaudiodsp.h"
+
+/*
+ * TODO:
+ *  - test lsf / mpeg25 extensively.
+ */
+
+#include "mpegaudio.h"
+#include "mpegaudiodecheader.h"
+
+#define BACKSTEP_SIZE 512
+#define EXTRABYTES 24
+#define LAST_BUF_SIZE 2 * BACKSTEP_SIZE + EXTRABYTES
+
+/* layer 3 "granule" */
+typedef struct GranuleDef {
+    uint8_t scfsi;
+    int part2_3_length;
+    int big_values;
+    int global_gain;
+    int scalefac_compress;
+    uint8_t block_type;
+    uint8_t switch_point;
+    int table_select[3];
+    int subblock_gain[3];
+    uint8_t scalefac_scale;
+    uint8_t count1table_select;
+    int region_size[3]; /* number of huffman codes in each region */
+    int preflag;
+    int short_start, long_end; /* long/short band indexes */
+    uint8_t scale_factors[40];
+    DECLARE_ALIGNED(16, INTFLOAT, sb_hybrid)[SBLIMIT * 18]; /* 576 samples */
+} GranuleDef;
+
+typedef struct MPADecodeContext {
+    MPA_DECODE_HEADER
+    uint8_t last_buf[LAST_BUF_SIZE];
+    int last_buf_size;
+    int extrasize;
+    /* next header (used in free format parsing) */
+    uint32_t free_format_next_header;
+    GetBitContext gb;
+    GetBitContext in_gb;
+    DECLARE_ALIGNED(32, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2];
+    int synth_buf_offset[MPA_MAX_CHANNELS];
+    DECLARE_ALIGNED(32, INTFLOAT, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
+    INTFLOAT mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
+    GranuleDef granules[2][2]; /* Used in Layer 3 */
+    int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3
+    int dither_state;
+    int err_recognition;
+    AVCodecContext* avctx;
+    MPADSPContext mpadsp;
+    void (*butterflies_float)(float *av_restrict v1, float *av_restrict v2, int len);
+    AVFrame *frame;
+    uint32_t crc;
+} MPADecodeContext;
+
+#define HEADER_SIZE 4
+
+#include "mpegaudiodata.h"
+
+#include "mpegaudio_tablegen.h"
+/* intensity stereo coef table */
+static INTFLOAT is_table_lsf[2][2][16];
+
+/* [i][j]:  2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */
+static int32_t scale_factor_mult[15][3];
+/* mult table for layer 2 group quantization */
+
+#define SCALE_GEN(v) \
+{ FIXR_OLD(1.0 * (v)), FIXR_OLD(0.7937005259 * (v)), FIXR_OLD(0.6299605249 * (v)) }
+
+static const int32_t scale_factor_mult2[3][3] = {
+    SCALE_GEN(4.0 / 3.0), /* 3 steps */
+    SCALE_GEN(4.0 / 5.0), /* 5 steps */
+    SCALE_GEN(4.0 / 9.0), /* 9 steps */
+};
+
+/**
+ * Convert region offsets to region sizes and truncate
+ * size to big_values.
+ */
+static void region_offset2size(GranuleDef *g)
+{
+    int i, k, j = 0;
+    g->region_size[2] = 576 / 2;
+    for (i = 0; i < 3; i++) {
+        k = FFMIN(g->region_size[i], g->big_values);
+        g->region_size[i] = k - j;
+        j = k;
+    }
+}
+
+static void init_short_region(MPADecodeContext *s, GranuleDef *g)
+{
+    if (g->block_type == 2) {
+        if (s->sample_rate_index != 8)
+            g->region_size[0] = (36 / 2);
+        else
+            g->region_size[0] = (72 / 2);
+    } else {
+        if (s->sample_rate_index <= 2)
+            g->region_size[0] = (36 / 2);
+        else if (s->sample_rate_index != 8)
+            g->region_size[0] = (54 / 2);
+        else
+            g->region_size[0] = (108 / 2);
+    }
+    g->region_size[1] = (576 / 2);
+}
+
+static void init_long_region(MPADecodeContext *s, GranuleDef *g,
+                             int ra1, int ra2)
+{
+    int l;
+    g->region_size[0] = ff_band_index_long[s->sample_rate_index][ra1 + 1];
+    /* should not overflow */
+    l = FFMIN(ra1 + ra2 + 2, 22);
+    g->region_size[1] = ff_band_index_long[s->sample_rate_index][      l];
+}
+
+static void compute_band_indexes(MPADecodeContext *s, GranuleDef *g)
+{
+    if (g->block_type == 2) {
+        if (g->switch_point) {
+            if(s->sample_rate_index == 8)
+                avpriv_request_sample(s->avctx, "switch point in 8khz");
+            /* if switched mode, we handle the 36 first samples as
+                long blocks.  For 8000Hz, we handle the 72 first
+                exponents as long blocks */
+            if (s->sample_rate_index <= 2)
+                g->long_end = 8;
+            else
+                g->long_end = 6;
+
+            g->short_start = 3;
+        } else {
+            g->long_end    = 0;
+            g->short_start = 0;
+        }
+    } else {
+        g->short_start = 13;
+        g->long_end    = 22;
+    }
+}
+
+/* layer 1 unscaling */
+/* n = number of bits of the mantissa minus 1 */
+static inline int l1_unscale(int n, int mant, int scale_factor)
+{
+    int shift, mod;
+    int64_t val;
+
+    shift   = ff_scale_factor_modshift[scale_factor];
+    mod     = shift & 3;
+    shift >>= 2;
+    val     = MUL64((int)(mant + (-1U << n) + 1), scale_factor_mult[n-1][mod]);
+    shift  += n;
+    /* NOTE: at this point, 1 <= shift >= 21 + 15 */
+    return (int)((val + (1LL << (shift - 1))) >> shift);
+}
+
+static inline int l2_unscale_group(int steps, int mant, int scale_factor)
+{
+    int shift, mod, val;
+
+    shift   = ff_scale_factor_modshift[scale_factor];
+    mod     = shift & 3;
+    shift >>= 2;
+
+    val = (mant - (steps >> 1)) * scale_factor_mult2[steps >> 2][mod];
+    /* NOTE: at this point, 0 <= shift <= 21 */
+    if (shift > 0)
+        val = (val + (1 << (shift - 1))) >> shift;
+    return val;
+}
+
+/* compute value^(4/3) * 2^(exponent/4). It normalized to FRAC_BITS */
+static inline int l3_unscale(int value, int exponent)
+{
+    unsigned int m;
+    int e;
+
+    e  = ff_table_4_3_exp  [4 * value + (exponent & 3)];
+    m  = ff_table_4_3_value[4 * value + (exponent & 3)];
+    e -= exponent >> 2;
+#ifdef DEBUG
+    if(e < 1)
+        av_log(NULL, AV_LOG_WARNING, "l3_unscale: e is %d\n", e);
+#endif
+    if (e > (SUINT)31)
+        return 0;
+    m = (m + ((1U << e) >> 1)) >> e;
+
+    return m;
+}
+
+static av_cold void decode_init_static(void)
+{
+    int i, j;
+
+    /* scale factor multiply for layer 1 */
+    for (i = 0; i < 15; i++) {
+        int n, norm;
+        n = i + 2;
+        norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
+        scale_factor_mult[i][0] = MULLx(norm, FIXR(1.0          * 2.0), FRAC_BITS);
+        scale_factor_mult[i][1] = MULLx(norm, FIXR(0.7937005259 * 2.0), FRAC_BITS);
+        scale_factor_mult[i][2] = MULLx(norm, FIXR(0.6299605249 * 2.0), FRAC_BITS);
+        ff_dlog(NULL, "%d: norm=%x s=%"PRIx32" %"PRIx32" %"PRIx32"\n", i,
+                (unsigned)norm,
+                scale_factor_mult[i][0],
+                scale_factor_mult[i][1],
+                scale_factor_mult[i][2]);
+    }
+
+    /* compute n ^ (4/3) and store it in mantissa/exp format */
+
+    mpegaudio_tableinit();
+
+    for (i = 0; i < 16; i++) {
+        double f;
+        int e, k;
+
+        for (j = 0; j < 2; j++) {
+            e = -(j + 1) * ((i + 1) >> 1);
+            f = exp2(e / 4.0);
+            k = i & 1;
+            is_table_lsf[j][k ^ 1][i] = FIXR(f);
+            is_table_lsf[j][k    ][i] = FIXR(1.0);
+            ff_dlog(NULL, "is_table_lsf %d %d: %f %f\n",
+                    i, j, (float) is_table_lsf[j][0][i],
+                    (float) is_table_lsf[j][1][i]);
+        }
+    }
+    RENAME(ff_mpa_synth_init)();
+    ff_mpegaudiodec_common_init_static();
+}
+
+static av_cold int decode_init(AVCodecContext * avctx)
+{
+    static AVOnce init_static_once = AV_ONCE_INIT;
+    MPADecodeContext *s = avctx->priv_data;
+
+    s->avctx = avctx;
+
+#if USE_FLOATS
+    {
+        AVFloatDSPContext *fdsp;
+        fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
+        if (!fdsp)
+            return AVERROR(ENOMEM);
+        s->butterflies_float = fdsp->butterflies_float;
+        av_free(fdsp);
+    }
+#endif
+
+    ff_mpadsp_init(&s->mpadsp);
+
+    if (avctx->request_sample_fmt == OUT_FMT &&
+        avctx->codec_id != AV_CODEC_ID_MP3ON4)
+        avctx->sample_fmt = OUT_FMT;
+    else
+        avctx->sample_fmt = OUT_FMT_P;
+    s->err_recognition = avctx->err_recognition;
+
+    if (avctx->codec_id == AV_CODEC_ID_MP3ADU)
+        s->adu_mode = 1;
+
+    ff_thread_once(&init_static_once, decode_init_static);
+
+    return 0;
+}
+
+#define C3 FIXHR(0.86602540378443864676/2)
+#define C4 FIXHR(0.70710678118654752439/2) //0.5 / cos(pi*(9)/36)
+#define C5 FIXHR(0.51763809020504152469/2) //0.5 / cos(pi*(5)/36)
+#define C6 FIXHR(1.93185165257813657349/4) //0.5 / cos(pi*(15)/36)
+
+/* 12 points IMDCT. We compute it "by hand" by factorizing obvious
+   cases. */
+static void imdct12(INTFLOAT *out, SUINTFLOAT *in)
+{
+    SUINTFLOAT in0, in1, in2, in3, in4, in5, t1, t2;
+
+    in0  = in[0*3];
+    in1  = in[1*3] + in[0*3];
+    in2  = in[2*3] + in[1*3];
+    in3  = in[3*3] + in[2*3];
+    in4  = in[4*3] + in[3*3];
+    in5  = in[5*3] + in[4*3];
+    in5 += in3;
+    in3 += in1;
+
+    in2  = MULH3(in2, C3, 2);
+    in3  = MULH3(in3, C3, 4);
+
+    t1   = in0 - in4;
+    t2   = MULH3(in1 - in5, C4, 2);
+
+    out[ 7] =
+    out[10] = t1 + t2;
+    out[ 1] =
+    out[ 4] = t1 - t2;
+
+    in0    += SHR(in4, 1);
+    in4     = in0 + in2;
+    in5    += 2*in1;
+    in1     = MULH3(in5 + in3, C5, 1);
+    out[ 8] =
+    out[ 9] = in4 + in1;
+    out[ 2] =
+    out[ 3] = in4 - in1;
+
+    in0    -= in2;
+    in5     = MULH3(in5 - in3, C6, 2);
+    out[ 0] =
+    out[ 5] = in0 - in5;
+    out[ 6] =
+    out[11] = in0 + in5;
+}
+
+static int handle_crc(MPADecodeContext *s, int sec_len)
+{
+    if (s->error_protection && (s->err_recognition & AV_EF_CRCCHECK)) {
+        const uint8_t *buf = s->gb.buffer - HEADER_SIZE;
+        int sec_byte_len  = sec_len >> 3;
+        int sec_rem_bits  = sec_len & 7;
+        const AVCRC *crc_tab = av_crc_get_table(AV_CRC_16_ANSI);
+        uint8_t tmp_buf[4];
+        uint32_t crc_val = av_crc(crc_tab, UINT16_MAX, &buf[2], 2);
+        crc_val = av_crc(crc_tab, crc_val, &buf[6], sec_byte_len);
+
+        AV_WB32(tmp_buf,
+                ((buf[6 + sec_byte_len] & (0xFF00U >> sec_rem_bits)) << 24) +
+                ((s->crc << 16) >> sec_rem_bits));
+
+        crc_val = av_crc(crc_tab, crc_val, tmp_buf, 3);
+
+        if (crc_val) {
+            av_log(s->avctx, AV_LOG_ERROR, "CRC mismatch %X!\n", crc_val);
+            if (s->err_recognition & AV_EF_EXPLODE)
+                return AVERROR_INVALIDDATA;
+        }
+    }
+    return 0;
+}
+
+/* return the number of decoded frames */
+static int mp_decode_layer1(MPADecodeContext *s)
+{
+    int bound, i, v, n, ch, j, mant;
+    uint8_t allocation[MPA_MAX_CHANNELS][SBLIMIT];
+    uint8_t scale_factors[MPA_MAX_CHANNELS][SBLIMIT];
+    int ret;
+
+    ret = handle_crc(s, (s->nb_channels == 1) ? 8*16  : 8*32);
+    if (ret < 0)
+        return ret;
+
+    if (s->mode == MPA_JSTEREO)
+        bound = (s->mode_ext + 1) * 4;
+    else
+        bound = SBLIMIT;
+
+    /* allocation bits */
+    for (i = 0; i < bound; i++) {
+        for (ch = 0; ch < s->nb_channels; ch++) {
+            allocation[ch][i] = get_bits(&s->gb, 4);
+        }
+    }
+    for (i = bound; i < SBLIMIT; i++)
+        allocation[0][i] = get_bits(&s->gb, 4);
+
+    /* scale factors */
+    for (i = 0; i < bound; i++) {
+        for (ch = 0; ch < s->nb_channels; ch++) {
+            if (allocation[ch][i])
+                scale_factors[ch][i] = get_bits(&s->gb, 6);
+        }
+    }
+    for (i = bound; i < SBLIMIT; i++) {
+        if (allocation[0][i]) {
+            scale_factors[0][i] = get_bits(&s->gb, 6);
+            scale_factors[1][i] = get_bits(&s->gb, 6);
+        }
+    }
+
+    /* compute samples */
+    for (j = 0; j < 12; j++) {
+        for (i = 0; i < bound; i++) {
+            for (ch = 0; ch < s->nb_channels; ch++) {
+                n = allocation[ch][i];
+                if (n) {
+                    mant = get_bits(&s->gb, n + 1);
+                    v = l1_unscale(n, mant, scale_factors[ch][i]);
+                } else {
+                    v = 0;
+                }
+                s->sb_samples[ch][j][i] = v;
+            }
+        }
+        for (i = bound; i < SBLIMIT; i++) {
+            n = allocation[0][i];
+            if (n) {
+                mant = get_bits(&s->gb, n + 1);
+                v = l1_unscale(n, mant, scale_factors[0][i]);
+                s->sb_samples[0][j][i] = v;
+                v = l1_unscale(n, mant, scale_factors[1][i]);
+                s->sb_samples[1][j][i] = v;
+            } else {
+                s->sb_samples[0][j][i] = 0;
+                s->sb_samples[1][j][i] = 0;
+            }
+        }
+    }
+    return 12;
+}
+
+static int mp_decode_layer2(MPADecodeContext *s)
+{
+    int sblimit; /* number of used subbands */
+    const unsigned char *alloc_table;
+    int table, bit_alloc_bits, i, j, ch, bound, v;
+    unsigned char bit_alloc[MPA_MAX_CHANNELS][SBLIMIT];
+    unsigned char scale_code[MPA_MAX_CHANNELS][SBLIMIT];
+    unsigned char scale_factors[MPA_MAX_CHANNELS][SBLIMIT][3], *sf;
+    int scale, qindex, bits, steps, k, l, m, b;
+    int ret;
+
+    /* select decoding table */
+    table = ff_mpa_l2_select_table(s->bit_rate / 1000, s->nb_channels,
+                                   s->sample_rate, s->lsf);
+    sblimit     = ff_mpa_sblimit_table[table];
+    alloc_table = ff_mpa_alloc_tables[table];
+
+    if (s->mode == MPA_JSTEREO)
+        bound = (s->mode_ext + 1) * 4;
+    else
+        bound = sblimit;
+
+    ff_dlog(s->avctx, "bound=%d sblimit=%d\n", bound, sblimit);
+
+    /* sanity check */
+    if (bound > sblimit)
+        bound = sblimit;
+
+    /* parse bit allocation */
+    j = 0;
+    for (i = 0; i < bound; i++) {
+        bit_alloc_bits = alloc_table[j];
+        for (ch = 0; ch < s->nb_channels; ch++)
+            bit_alloc[ch][i] = get_bits(&s->gb, bit_alloc_bits);
+        j += 1 << bit_alloc_bits;
+    }
+    for (i = bound; i < sblimit; i++) {
+        bit_alloc_bits = alloc_table[j];
+        v = get_bits(&s->gb, bit_alloc_bits);
+        bit_alloc[0][i] = v;
+        bit_alloc[1][i] = v;
+        j += 1 << bit_alloc_bits;
+    }
+
+    /* scale codes */
+    for (i = 0; i < sblimit; i++) {
+        for (ch = 0; ch < s->nb_channels; ch++) {
+            if (bit_alloc[ch][i])
+                scale_code[ch][i] = get_bits(&s->gb, 2);
+        }
+    }
+
+    ret = handle_crc(s, get_bits_count(&s->gb) - 16);
+    if (ret < 0)
+        return ret;
+
+    /* scale factors */
+    for (i = 0; i < sblimit; i++) {
+        for (ch = 0; ch < s->nb_channels; ch++) {
+            if (bit_alloc[ch][i]) {
+                sf = scale_factors[ch][i];
+                switch (scale_code[ch][i]) {
+                default:
+                case 0:
+                    sf[0] = get_bits(&s->gb, 6);
+                    sf[1] = get_bits(&s->gb, 6);
+                    sf[2] = get_bits(&s->gb, 6);
+                    break;
+                case 2:
+                    sf[0] = get_bits(&s->gb, 6);
+                    sf[1] = sf[0];
+                    sf[2] = sf[0];
+                    break;
+                case 1:
+                    sf[0] = get_bits(&s->gb, 6);
+                    sf[2] = get_bits(&s->gb, 6);
+                    sf[1] = sf[0];
+                    break;
+                case 3:
+                    sf[0] = get_bits(&s->gb, 6);
+                    sf[2] = get_bits(&s->gb, 6);
+                    sf[1] = sf[2];
+                    break;
+                }
+            }
+        }
+    }
+
+    /* samples */
+    for (k = 0; k < 3; k++) {
+        for (l = 0; l < 12; l += 3) {
+            j = 0;
+            for (i = 0; i < bound; i++) {
+                bit_alloc_bits = alloc_table[j];
+                for (ch = 0; ch < s->nb_channels; ch++) {
+                    b = bit_alloc[ch][i];
+                    if (b) {
+                        scale = scale_factors[ch][i][k];
+                        qindex = alloc_table[j+b];
+                        bits = ff_mpa_quant_bits[qindex];
+                        if (bits < 0) {
+                            int v2;
+                            /* 3 values at the same time */
+                            v = get_bits(&s->gb, -bits);
+                            v2 = ff_division_tabs[qindex][v];
+                            steps  = ff_mpa_quant_steps[qindex];
+
+                            s->sb_samples[ch][k * 12 + l + 0][i] =
+                                l2_unscale_group(steps,  v2       & 15, scale);
+                            s->sb_samples[ch][k * 12 + l + 1][i] =
+                                l2_unscale_group(steps, (v2 >> 4) & 15, scale);
+                            s->sb_samples[ch][k * 12 + l + 2][i] =
+                                l2_unscale_group(steps,  v2 >> 8      , scale);
+                        } else {
+                            for (m = 0; m < 3; m++) {
+                                v = get_bits(&s->gb, bits);
+                                v = l1_unscale(bits - 1, v, scale);
+                                s->sb_samples[ch][k * 12 + l + m][i] = v;
+                            }
+                        }
+                    } else {
+                        s->sb_samples[ch][k * 12 + l + 0][i] = 0;
+                        s->sb_samples[ch][k * 12 + l + 1][i] = 0;
+                        s->sb_samples[ch][k * 12 + l + 2][i] = 0;
+                    }
+                }
+                /* next subband in alloc table */
+                j += 1 << bit_alloc_bits;
+            }
+            /* XXX: find a way to avoid this duplication of code */
+            for (i = bound; i < sblimit; i++) {
+                bit_alloc_bits = alloc_table[j];
+                b = bit_alloc[0][i];
+                if (b) {
+                    int mant, scale0, scale1;
+                    scale0 = scale_factors[0][i][k];
+                    scale1 = scale_factors[1][i][k];
+                    qindex = alloc_table[j + b];
+                    bits = ff_mpa_quant_bits[qindex];
+                    if (bits < 0) {
+                        /* 3 values at the same time */
+                        v = get_bits(&s->gb, -bits);
+                        steps = ff_mpa_quant_steps[qindex];
+                        mant = v % steps;
+                        v = v / steps;
+                        s->sb_samples[0][k * 12 + l + 0][i] =
+                            l2_unscale_group(steps, mant, scale0);
+                        s->sb_samples[1][k * 12 + l + 0][i] =
+                            l2_unscale_group(steps, mant, scale1);
+                        mant = v % steps;
+                        v = v / steps;
+                        s->sb_samples[0][k * 12 + l + 1][i] =
+                            l2_unscale_group(steps, mant, scale0);
+                        s->sb_samples[1][k * 12 + l + 1][i] =
+                            l2_unscale_group(steps, mant, scale1);
+                        s->sb_samples[0][k * 12 + l + 2][i] =
+                            l2_unscale_group(steps, v, scale0);
+                        s->sb_samples[1][k * 12 + l + 2][i] =
+                            l2_unscale_group(steps, v, scale1);
+                    } else {
+                        for (m = 0; m < 3; m++) {
+                            mant = get_bits(&s->gb, bits);
+                            s->sb_samples[0][k * 12 + l + m][i] =
+                                l1_unscale(bits - 1, mant, scale0);
+                            s->sb_samples[1][k * 12 + l + m][i] =
+                                l1_unscale(bits - 1, mant, scale1);
+                        }
+                    }
+                } else {
+                    s->sb_samples[0][k * 12 + l + 0][i] = 0;
+                    s->sb_samples[0][k * 12 + l + 1][i] = 0;
+                    s->sb_samples[0][k * 12 + l + 2][i] = 0;
+                    s->sb_samples[1][k * 12 + l + 0][i] = 0;
+                    s->sb_samples[1][k * 12 + l + 1][i] = 0;
+                    s->sb_samples[1][k * 12 + l + 2][i] = 0;
+                }
+                /* next subband in alloc table */
+                j += 1 << bit_alloc_bits;
+            }
+            /* fill remaining samples to zero */
+            for (i = sblimit; i < SBLIMIT; i++) {
+                for (ch = 0; ch < s->nb_channels; ch++) {
+                    s->sb_samples[ch][k * 12 + l + 0][i] = 0;
+                    s->sb_samples[ch][k * 12 + l + 1][i] = 0;
+                    s->sb_samples[ch][k * 12 + l + 2][i] = 0;
+                }
+            }
+        }
+    }
+    return 3 * 12;
+}
+
+#define SPLIT(dst,sf,n)             \
+    if (n == 3) {                   \
+        int m = (sf * 171) >> 9;    \
+        dst   = sf - 3 * m;         \
+        sf    = m;                  \
+    } else if (n == 4) {            \
+        dst  = sf & 3;              \
+        sf >>= 2;                   \
+    } else if (n == 5) {            \
+        int m = (sf * 205) >> 10;   \
+        dst   = sf - 5 * m;         \
+        sf    = m;                  \
+    } else if (n == 6) {            \
+        int m = (sf * 171) >> 10;   \
+        dst   = sf - 6 * m;         \
+        sf    = m;                  \
+    } else {                        \
+        dst = 0;                    \
+    }
+
+static av_always_inline void lsf_sf_expand(int *slen, int sf, int n1, int n2,
+                                           int n3)
+{
+    SPLIT(slen[3], sf, n3)
+    SPLIT(slen[2], sf, n2)
+    SPLIT(slen[1], sf, n1)
+    slen[0] = sf;
+}
+
+static void exponents_from_scale_factors(MPADecodeContext *s, GranuleDef *g,
+                                         int16_t *exponents)
+{
+    const uint8_t *bstab, *pretab;
+    int len, i, j, k, l, v0, shift, gain, gains[3];
+    int16_t *exp_ptr;
+
+    exp_ptr = exponents;
+    gain    = g->global_gain - 210;
+    shift   = g->scalefac_scale + 1;
+
+    bstab  = ff_band_size_long[s->sample_rate_index];
+    pretab = ff_mpa_pretab[g->preflag];
+    for (i = 0; i < g->long_end; i++) {
+        v0 = gain - ((g->scale_factors[i] + pretab[i]) << shift) + 400;
+        len = bstab[i];
+        for (j = len; j > 0; j--)
+            *exp_ptr++ = v0;
+    }
+
+    if (g->short_start < 13) {
+        bstab    = ff_band_size_short[s->sample_rate_index];
+        gains[0] = gain - (g->subblock_gain[0] << 3);
+        gains[1] = gain - (g->subblock_gain[1] << 3);
+        gains[2] = gain - (g->subblock_gain[2] << 3);
+        k        = g->long_end;
+        for (i = g->short_start; i < 13; i++) {
+            len = bstab[i];
+            for (l = 0; l < 3; l++) {
+                v0 = gains[l] - (g->scale_factors[k++] << shift) + 400;
+                for (j = len; j > 0; j--)
+                    *exp_ptr++ = v0;
+            }
+        }
+    }
+}
+
+static void switch_buffer(MPADecodeContext *s, int *pos, int *end_pos,
+                          int *end_pos2)
+{
+    if (s->in_gb.buffer && *pos >= s->gb.size_in_bits - s->extrasize * 8) {
+        s->gb           = s->in_gb;
+        s->in_gb.buffer = NULL;
+        s->extrasize    = 0;
+        av_assert2((get_bits_count(&s->gb) & 7) == 0);
+        skip_bits_long(&s->gb, *pos - *end_pos);
+        *end_pos2 =
+        *end_pos  = *end_pos2 + get_bits_count(&s->gb) - *pos;
+        *pos      = get_bits_count(&s->gb);
+    }
+}
+
+/* Following is an optimized code for
+            INTFLOAT v = *src
+            if(get_bits1(&s->gb))
+                v = -v;
+            *dst = v;
+*/
+#if USE_FLOATS
+#define READ_FLIP_SIGN(dst,src)                     \
+    v = AV_RN32A(src) ^ (get_bits1(&s->gb) << 31);  \
+    AV_WN32A(dst, v);
+#else
+#define READ_FLIP_SIGN(dst,src)     \
+    v      = -get_bits1(&s->gb);    \
+    *(dst) = (*(src) ^ v) - v;
+#endif
+
+static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
+                          int16_t *exponents, int end_pos2)
+{
+    int s_index;
+    int i;
+    int last_pos, bits_left;
+    VLC *vlc;
+    int end_pos = FFMIN(end_pos2, s->gb.size_in_bits - s->extrasize * 8);
+
+    /* low frequencies (called big values) */
+    s_index = 0;
+    for (i = 0; i < 3; i++) {
+        int j, k, l, linbits;
+        j = g->region_size[i];
+        if (j == 0)
+            continue;
+        /* select vlc table */
+        k       = g->table_select[i];
+        l       = ff_mpa_huff_data[k][0];
+        linbits = ff_mpa_huff_data[k][1];
+        vlc     = &ff_huff_vlc[l];
+
+        if (!l) {
+            memset(&g->sb_hybrid[s_index], 0, sizeof(*g->sb_hybrid) * 2 * j);
+            s_index += 2 * j;
+            continue;
+        }
+
+        /* read huffcode and compute each couple */
+        for (; j > 0; j--) {
+            int exponent, x, y;
+            int v;
+            int pos = get_bits_count(&s->gb);
+
+            if (pos >= end_pos){
+                switch_buffer(s, &pos, &end_pos, &end_pos2);
+                if (pos >= end_pos)
+                    break;
+            }
+            y = get_vlc2(&s->gb, vlc->table, 7, 3);
+
+            if (!y) {
+                g->sb_hybrid[s_index    ] =
+                g->sb_hybrid[s_index + 1] = 0;
+                s_index += 2;
+                continue;
+            }
+
+            exponent= exponents[s_index];
+
+            ff_dlog(s->avctx, "region=%d n=%d y=%d exp=%d\n",
+                    i, g->region_size[i] - j, y, exponent);
+            if (y & 16) {
+                x = y >> 5;
+                y = y & 0x0f;
+                if (x < 15) {
+                    READ_FLIP_SIGN(g->sb_hybrid + s_index, RENAME(expval_table)[exponent] + x)
+                } else {
+                    x += get_bitsz(&s->gb, linbits);
+                    v  = l3_unscale(x, exponent);
+                    if (get_bits1(&s->gb))
+                        v = -v;
+                    g->sb_hybrid[s_index] = v;
+                }
+                if (y < 15) {
+                    READ_FLIP_SIGN(g->sb_hybrid + s_index + 1, RENAME(expval_table)[exponent] + y)
+                } else {
+                    y += get_bitsz(&s->gb, linbits);
+                    v  = l3_unscale(y, exponent);
+                    if (get_bits1(&s->gb))
+                        v = -v;
+                    g->sb_hybrid[s_index + 1] = v;
+                }
+            } else {
+                x = y >> 5;
+                y = y & 0x0f;
+                x += y;
+                if (x < 15) {
+                    READ_FLIP_SIGN(g->sb_hybrid + s_index + !!y, RENAME(expval_table)[exponent] + x)
+                } else {
+                    x += get_bitsz(&s->gb, linbits);
+                    v  = l3_unscale(x, exponent);
+                    if (get_bits1(&s->gb))
+                        v = -v;
+                    g->sb_hybrid[s_index+!!y] = v;
+                }
+                g->sb_hybrid[s_index + !y] = 0;
+            }
+            s_index += 2;
+        }
+    }
+
+    /* high frequencies */
+    vlc = &ff_huff_quad_vlc[g->count1table_select];
+    last_pos = 0;
+    while (s_index <= 572) {
+        int pos, code;
+        pos = get_bits_count(&s->gb);
+        if (pos >= end_pos) {
+            if (pos > end_pos2 && last_pos) {
+                /* some encoders generate an incorrect size for this
+                   part. We must go back into the data */
+                s_index -= 4;
+                skip_bits_long(&s->gb, last_pos - pos);
+                av_log(s->avctx, AV_LOG_INFO, "overread, skip %d enddists: %d %d\n", last_pos - pos, end_pos-pos, end_pos2-pos);
+                if(s->err_recognition & (AV_EF_BITSTREAM|AV_EF_COMPLIANT))
+                    s_index=0;
+                break;
+            }
+            switch_buffer(s, &pos, &end_pos, &end_pos2);
+            if (pos >= end_pos)
+                break;
+        }
+        last_pos = pos;
+
+        code = get_vlc2(&s->gb, vlc->table, vlc->bits, 1);
+        ff_dlog(s->avctx, "t=%d code=%d\n", g->count1table_select, code);
+        g->sb_hybrid[s_index + 0] =
+        g->sb_hybrid[s_index + 1] =
+        g->sb_hybrid[s_index + 2] =
+        g->sb_hybrid[s_index + 3] = 0;
+        while (code) {
+            static const int idxtab[16] = { 3,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0 };
+            int v;
+            int pos = s_index + idxtab[code];
+            code   ^= 8 >> idxtab[code];
+            READ_FLIP_SIGN(g->sb_hybrid + pos, RENAME(exp_table)+exponents[pos])
+        }
+        s_index += 4;
+    }
+    /* skip extension bits */
+    bits_left = end_pos2 - get_bits_count(&s->gb);
+    if (bits_left < 0 && (s->err_recognition & (AV_EF_BUFFER|AV_EF_COMPLIANT))) {
+        av_log(s->avctx, AV_LOG_ERROR, "bits_left=%d\n", bits_left);
+        s_index=0;
+    } else if (bits_left > 0 && (s->err_recognition & (AV_EF_BUFFER|AV_EF_AGGRESSIVE))) {
+        av_log(s->avctx, AV_LOG_ERROR, "bits_left=%d\n", bits_left);
+        s_index = 0;
+    }
+    memset(&g->sb_hybrid[s_index], 0, sizeof(*g->sb_hybrid) * (576 - s_index));
+    skip_bits_long(&s->gb, bits_left);
+
+    i = get_bits_count(&s->gb);
+    switch_buffer(s, &i, &end_pos, &end_pos2);
+
+    return 0;
+}
+
+/* Reorder short blocks from bitstream order to interleaved order. It
+   would be faster to do it in parsing, but the code would be far more
+   complicated */
+static void reorder_block(MPADecodeContext *s, GranuleDef *g)
+{
+    int i, j, len;
+    INTFLOAT *ptr, *dst, *ptr1;
+    INTFLOAT tmp[576];
+
+    if (g->block_type != 2)
+        return;
+
+    if (g->switch_point) {
+        if (s->sample_rate_index != 8)
+            ptr = g->sb_hybrid + 36;
+        else
+            ptr = g->sb_hybrid + 72;
+    } else {
+        ptr = g->sb_hybrid;
+    }
+
+    for (i = g->short_start; i < 13; i++) {
+        len  = ff_band_size_short[s->sample_rate_index][i];
+        ptr1 = ptr;
+        dst  = tmp;
+        for (j = len; j > 0; j--) {
+            *dst++ = ptr[0*len];
+            *dst++ = ptr[1*len];
+            *dst++ = ptr[2*len];
+            ptr++;
+        }
+        ptr += 2 * len;
+        memcpy(ptr1, tmp, len * 3 * sizeof(*ptr1));
+    }
+}
+
+#define ISQRT2 FIXR(0.70710678118654752440)
+
+static void compute_stereo(MPADecodeContext *s, GranuleDef *g0, GranuleDef *g1)
+{
+    int i, j, k, l;
+    int sf_max, sf, len, non_zero_found;
+    INTFLOAT *tab0, *tab1, v1, v2;
+    const INTFLOAT (*is_tab)[16];
+    SUINTFLOAT tmp0, tmp1;
+    int non_zero_found_short[3];
+
+    /* intensity stereo */
+    if (s->mode_ext & MODE_EXT_I_STEREO) {
+        if (!s->lsf) {
+            is_tab = is_table;
+            sf_max = 7;
+        } else {
+            is_tab = is_table_lsf[g1->scalefac_compress & 1];
+            sf_max = 16;
+        }
+
+        tab0 = g0->sb_hybrid + 576;
+        tab1 = g1->sb_hybrid + 576;
+
+        non_zero_found_short[0] = 0;
+        non_zero_found_short[1] = 0;
+        non_zero_found_short[2] = 0;
+        k = (13 - g1->short_start) * 3 + g1->long_end - 3;
+        for (i = 12; i >= g1->short_start; i--) {
+            /* for last band, use previous scale factor */
+            if (i != 11)
+                k -= 3;
+            len = ff_band_size_short[s->sample_rate_index][i];
+            for (l = 2; l >= 0; l--) {
+                tab0 -= len;
+                tab1 -= len;
+                if (!non_zero_found_short[l]) {
+                    /* test if non zero band. if so, stop doing i-stereo */
+                    for (j = 0; j < len; j++) {
+                        if (tab1[j] != 0) {
+                            non_zero_found_short[l] = 1;
+                            goto found1;
+                        }
+                    }
+                    sf = g1->scale_factors[k + l];
+                    if (sf >= sf_max)
+                        goto found1;
+
+                    v1 = is_tab[0][sf];
+                    v2 = is_tab[1][sf];
+                    for (j = 0; j < len; j++) {
+                        tmp0    = tab0[j];
+                        tab0[j] = MULLx(tmp0, v1, FRAC_BITS);
+                        tab1[j] = MULLx(tmp0, v2, FRAC_BITS);
+                    }
+                } else {
+found1:
+                    if (s->mode_ext & MODE_EXT_MS_STEREO) {
+                        /* lower part of the spectrum : do ms stereo
+                           if enabled */
+                        for (j = 0; j < len; j++) {
+                            tmp0    = tab0[j];
+                            tmp1    = tab1[j];
+                            tab0[j] = MULLx(tmp0 + tmp1, ISQRT2, FRAC_BITS);
+                            tab1[j] = MULLx(tmp0 - tmp1, ISQRT2, FRAC_BITS);
+                        }
+                    }
+                }
+            }
+        }
+
+        non_zero_found = non_zero_found_short[0] |
+                         non_zero_found_short[1] |
+                         non_zero_found_short[2];
+
+        for (i = g1->long_end - 1;i >= 0;i--) {
+            len   = ff_band_size_long[s->sample_rate_index][i];
+            tab0 -= len;
+            tab1 -= len;
+            /* test if non zero band. if so, stop doing i-stereo */
+            if (!non_zero_found) {
+                for (j = 0; j < len; j++) {
+                    if (tab1[j] != 0) {
+                        non_zero_found = 1;
+                        goto found2;
+                    }
+                }
+                /* for last band, use previous scale factor */
+                k  = (i == 21) ? 20 : i;
+                sf = g1->scale_factors[k];
+                if (sf >= sf_max)
+                    goto found2;
+                v1 = is_tab[0][sf];
+                v2 = is_tab[1][sf];
+                for (j = 0; j < len; j++) {
+                    tmp0    = tab0[j];
+                    tab0[j] = MULLx(tmp0, v1, FRAC_BITS);
+                    tab1[j] = MULLx(tmp0, v2, FRAC_BITS);
+                }
+            } else {
+found2:
+                if (s->mode_ext & MODE_EXT_MS_STEREO) {
+                    /* lower part of the spectrum : do ms stereo
+                       if enabled */
+                    for (j = 0; j < len; j++) {
+                        tmp0    = tab0[j];
+                        tmp1    = tab1[j];
+                        tab0[j] = MULLx(tmp0 + tmp1, ISQRT2, FRAC_BITS);
+                        tab1[j] = MULLx(tmp0 - tmp1, ISQRT2, FRAC_BITS);
+                    }
+                }
+            }
+        }
+    } else if (s->mode_ext & MODE_EXT_MS_STEREO) {
+        /* ms stereo ONLY */
+        /* NOTE: the 1/sqrt(2) normalization factor is included in the
+           global gain */
+#if USE_FLOATS
+       s->butterflies_float(g0->sb_hybrid, g1->sb_hybrid, 576);
+#else
+        tab0 = g0->sb_hybrid;
+        tab1 = g1->sb_hybrid;
+        for (i = 0; i < 576; i++) {
+            tmp0    = tab0[i];
+            tmp1    = tab1[i];
+            tab0[i] = tmp0 + tmp1;
+            tab1[i] = tmp0 - tmp1;
+        }
+#endif
+    }
+}
+
+#if USE_FLOATS
+#if HAVE_MIPSFPU
+#   include "mips/compute_antialias_float.h"
+#endif /* HAVE_MIPSFPU */
+#else
+#if HAVE_MIPSDSP
+#   include "mips/compute_antialias_fixed.h"
+#endif /* HAVE_MIPSDSP */
+#endif /* USE_FLOATS */
+
+#ifndef compute_antialias
+#if USE_FLOATS
+#define AA(j) do {                                                      \
+        float tmp0 = ptr[-1-j];                                         \
+        float tmp1 = ptr[   j];                                         \
+        ptr[-1-j] = tmp0 * csa_table[j][0] - tmp1 * csa_table[j][1];    \
+        ptr[   j] = tmp0 * csa_table[j][1] + tmp1 * csa_table[j][0];    \
+    } while (0)
+#else
+#define AA(j) do {                                              \
+        SUINT tmp0 = ptr[-1-j];                                   \
+        SUINT tmp1 = ptr[   j];                                   \
+        SUINT tmp2 = MULH(tmp0 + tmp1, csa_table[j][0]);          \
+        ptr[-1-j] = 4 * (tmp2 - MULH(tmp1, csa_table[j][2]));   \
+        ptr[   j] = 4 * (tmp2 + MULH(tmp0, csa_table[j][3]));   \
+    } while (0)
+#endif
+
+static void compute_antialias(MPADecodeContext *s, GranuleDef *g)
+{
+    INTFLOAT *ptr;
+    int n, i;
+
+    /* we antialias only "long" bands */
+    if (g->block_type == 2) {
+        if (!g->switch_point)
+            return;
+        /* XXX: check this for 8000Hz case */
+        n = 1;
+    } else {
+        n = SBLIMIT - 1;
+    }
+
+    ptr = g->sb_hybrid + 18;
+    for (i = n; i > 0; i--) {
+        AA(0);
+        AA(1);
+        AA(2);
+        AA(3);
+        AA(4);
+        AA(5);
+        AA(6);
+        AA(7);
+
+        ptr += 18;
+    }
+}
+#endif /* compute_antialias */
+
+static void compute_imdct(MPADecodeContext *s, GranuleDef *g,
+                          INTFLOAT *sb_samples, INTFLOAT *mdct_buf)
+{
+    INTFLOAT *win, *out_ptr, *ptr, *buf, *ptr1;
+    INTFLOAT out2[12];
+    int i, j, mdct_long_end, sblimit;
+
+    /* find last non zero block */
+    ptr  = g->sb_hybrid + 576;
+    ptr1 = g->sb_hybrid + 2 * 18;
+    while (ptr >= ptr1) {
+        int32_t *p;
+        ptr -= 6;
+        p    = (int32_t*)ptr;
+        if (p[0] | p[1] | p[2] | p[3] | p[4] | p[5])
+            break;
+    }
+    sblimit = ((ptr - g->sb_hybrid) / 18) + 1;
+
+    if (g->block_type == 2) {
+        /* XXX: check for 8000 Hz */
+        if (g->switch_point)
+            mdct_long_end = 2;
+        else
+            mdct_long_end = 0;
+    } else {
+        mdct_long_end = sblimit;
+    }
+
+    s->mpadsp.RENAME(imdct36_blocks)(sb_samples, mdct_buf, g->sb_hybrid,
+                                     mdct_long_end, g->switch_point,
+                                     g->block_type);
+
+    buf = mdct_buf + 4*18*(mdct_long_end >> 2) + (mdct_long_end & 3);
+    ptr = g->sb_hybrid + 18 * mdct_long_end;
+
+    for (j = mdct_long_end; j < sblimit; j++) {
+        /* select frequency inversion */
+        win     = RENAME(ff_mdct_win)[2 + (4  & -(j & 1))];
+        out_ptr = sb_samples + j;
+
+        for (i = 0; i < 6; i++) {
+            *out_ptr = buf[4*i];
+            out_ptr += SBLIMIT;
+        }
+        imdct12(out2, ptr + 0);
+        for (i = 0; i < 6; i++) {
+            *out_ptr     = MULH3(out2[i    ], win[i    ], 1) + buf[4*(i + 6*1)];
+            buf[4*(i + 6*2)] = MULH3(out2[i + 6], win[i + 6], 1);
+            out_ptr += SBLIMIT;
+        }
+        imdct12(out2, ptr + 1);
+        for (i = 0; i < 6; i++) {
+            *out_ptr     = MULH3(out2[i    ], win[i    ], 1) + buf[4*(i + 6*2)];
+            buf[4*(i + 6*0)] = MULH3(out2[i + 6], win[i + 6], 1);
+            out_ptr += SBLIMIT;
+        }
+        imdct12(out2, ptr + 2);
+        for (i = 0; i < 6; i++) {
+            buf[4*(i + 6*0)] = MULH3(out2[i    ], win[i    ], 1) + buf[4*(i + 6*0)];
+            buf[4*(i + 6*1)] = MULH3(out2[i + 6], win[i + 6], 1);
+            buf[4*(i + 6*2)] = 0;
+        }
+        ptr += 18;
+        buf += (j&3) != 3 ? 1 : (4*18-3);
+    }
+    /* zero bands */
+    for (j = sblimit; j < SBLIMIT; j++) {
+        /* overlap */
+        out_ptr = sb_samples + j;
+        for (i = 0; i < 18; i++) {
+            *out_ptr = buf[4*i];
+            buf[4*i]   = 0;
+            out_ptr += SBLIMIT;
+        }
+        buf += (j&3) != 3 ? 1 : (4*18-3);
+    }
+}
+
+/* main layer3 decoding function */
+static int mp_decode_layer3(MPADecodeContext *s)
+{
+    int nb_granules, main_data_begin;
+    int gr, ch, blocksplit_flag, i, j, k, n, bits_pos;
+    GranuleDef *g;
+    int16_t exponents[576]; //FIXME try INTFLOAT
+    int ret;
+
+    /* read side info */
+    if (s->lsf) {
+        ret = handle_crc(s, ((s->nb_channels == 1) ? 8*9  : 8*17));
+        main_data_begin = get_bits(&s->gb, 8);
+        skip_bits(&s->gb, s->nb_channels);
+        nb_granules = 1;
+    } else {
+        ret = handle_crc(s, ((s->nb_channels == 1) ? 8*17 : 8*32));
+        main_data_begin = get_bits(&s->gb, 9);
+        if (s->nb_channels == 2)
+            skip_bits(&s->gb, 3);
+        else
+            skip_bits(&s->gb, 5);
+        nb_granules = 2;
+        for (ch = 0; ch < s->nb_channels; ch++) {
+            s->granules[ch][0].scfsi = 0;/* all scale factors are transmitted */
+            s->granules[ch][1].scfsi = get_bits(&s->gb, 4);
+        }
+    }
+    if (ret < 0)
+        return ret;
+
+    for (gr = 0; gr < nb_granules; gr++) {
+        for (ch = 0; ch < s->nb_channels; ch++) {
+            ff_dlog(s->avctx, "gr=%d ch=%d: side_info\n", gr, ch);
+            g = &s->granules[ch][gr];
+            g->part2_3_length = get_bits(&s->gb, 12);
+            g->big_values     = get_bits(&s->gb,  9);
+            if (g->big_values > 288) {
+                av_log(s->avctx, AV_LOG_ERROR, "big_values too big\n");
+                return AVERROR_INVALIDDATA;
+            }
+
+            g->global_gain = get_bits(&s->gb, 8);
+            /* if MS stereo only is selected, we precompute the
+               1/sqrt(2) renormalization factor */
+            if ((s->mode_ext & (MODE_EXT_MS_STEREO | MODE_EXT_I_STEREO)) ==
+                MODE_EXT_MS_STEREO)
+                g->global_gain -= 2;
+            if (s->lsf)
+                g->scalefac_compress = get_bits(&s->gb, 9);
+            else
+                g->scalefac_compress = get_bits(&s->gb, 4);
+            blocksplit_flag = get_bits1(&s->gb);
+            if (blocksplit_flag) {
+                g->block_type = get_bits(&s->gb, 2);
+                if (g->block_type == 0) {
+                    av_log(s->avctx, AV_LOG_ERROR, "invalid block type\n");
+                    return AVERROR_INVALIDDATA;
+                }
+                g->switch_point = get_bits1(&s->gb);
+                for (i = 0; i < 2; i++)
+                    g->table_select[i] = get_bits(&s->gb, 5);
+                for (i = 0; i < 3; i++)
+                    g->subblock_gain[i] = get_bits(&s->gb, 3);
+                init_short_region(s, g);
+            } else {
+                int region_address1, region_address2;
+                g->block_type = 0;
+                g->switch_point = 0;
+                for (i = 0; i < 3; i++)
+                    g->table_select[i] = get_bits(&s->gb, 5);
+                /* compute huffman coded region sizes */
+                region_address1 = get_bits(&s->gb, 4);
+                region_address2 = get_bits(&s->gb, 3);
+                ff_dlog(s->avctx, "region1=%d region2=%d\n",
+                        region_address1, region_address2);
+                init_long_region(s, g, region_address1, region_address2);
+            }
+            region_offset2size(g);
+            compute_band_indexes(s, g);
+
+            g->preflag = 0;
+            if (!s->lsf)
+                g->preflag = get_bits1(&s->gb);
+            g->scalefac_scale     = get_bits1(&s->gb);
+            g->count1table_select = get_bits1(&s->gb);
+            ff_dlog(s->avctx, "block_type=%d switch_point=%d\n",
+                    g->block_type, g->switch_point);
+        }
+    }
+
+    if (!s->adu_mode) {
+        int skip;
+        const uint8_t *ptr = s->gb.buffer + (get_bits_count(&s->gb) >> 3);
+        s->extrasize = av_clip((get_bits_left(&s->gb) >> 3) - s->extrasize, 0,
+                               FFMAX(0, LAST_BUF_SIZE - s->last_buf_size));
+        av_assert1((get_bits_count(&s->gb) & 7) == 0);
+        /* now we get bits from the main_data_begin offset */
+        ff_dlog(s->avctx, "seekback:%d, lastbuf:%d\n",
+                main_data_begin, s->last_buf_size);
+
+        memcpy(s->last_buf + s->last_buf_size, ptr, s->extrasize);
+        s->in_gb = s->gb;
+        init_get_bits(&s->gb, s->last_buf, (s->last_buf_size + s->extrasize) * 8);
+        s->last_buf_size <<= 3;
+        for (gr = 0; gr < nb_granules && (s->last_buf_size >> 3) < main_data_begin; gr++) {
+            for (ch = 0; ch < s->nb_channels; ch++) {
+                g = &s->granules[ch][gr];
+                s->last_buf_size += g->part2_3_length;
+                memset(g->sb_hybrid, 0, sizeof(g->sb_hybrid));
+                compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]);
+            }
+        }
+        skip = s->last_buf_size - 8 * main_data_begin;
+        if (skip >= s->gb.size_in_bits - s->extrasize * 8 && s->in_gb.buffer) {
+            skip_bits_long(&s->in_gb, skip - s->gb.size_in_bits + s->extrasize * 8);
+            s->gb           = s->in_gb;
+            s->in_gb.buffer = NULL;
+            s->extrasize    = 0;
+        } else {
+            skip_bits_long(&s->gb, skip);
+        }
+    } else {
+        gr = 0;
+        s->extrasize = 0;
+    }
+
+    for (; gr < nb_granules; gr++) {
+        for (ch = 0; ch < s->nb_channels; ch++) {
+            g = &s->granules[ch][gr];
+            bits_pos = get_bits_count(&s->gb);
+
+            if (!s->lsf) {
+                uint8_t *sc;
+                int slen, slen1, slen2;
+
+                /* MPEG-1 scale factors */
+                slen1 = ff_slen_table[0][g->scalefac_compress];
+                slen2 = ff_slen_table[1][g->scalefac_compress];
+                ff_dlog(s->avctx, "slen1=%d slen2=%d\n", slen1, slen2);
+                if (g->block_type == 2) {
+                    n = g->switch_point ? 17 : 18;
+                    j = 0;
+                    if (slen1) {
+                        for (i = 0; i < n; i++)
+                            g->scale_factors[j++] = get_bits(&s->gb, slen1);
+                    } else {
+                        for (i = 0; i < n; i++)
+                            g->scale_factors[j++] = 0;
+                    }
+                    if (slen2) {
+                        for (i = 0; i < 18; i++)
+                            g->scale_factors[j++] = get_bits(&s->gb, slen2);
+                        for (i = 0; i < 3; i++)
+                            g->scale_factors[j++] = 0;
+                    } else {
+                        for (i = 0; i < 21; i++)
+                            g->scale_factors[j++] = 0;
+                    }
+                } else {
+                    sc = s->granules[ch][0].scale_factors;
+                    j = 0;
+                    for (k = 0; k < 4; k++) {
+                        n = k == 0 ? 6 : 5;
+                        if ((g->scfsi & (0x8 >> k)) == 0) {
+                            slen = (k < 2) ? slen1 : slen2;
+                            if (slen) {
+                                for (i = 0; i < n; i++)
+                                    g->scale_factors[j++] = get_bits(&s->gb, slen);
+                            } else {
+                                for (i = 0; i < n; i++)
+                                    g->scale_factors[j++] = 0;
+                            }
+                        } else {
+                            /* simply copy from last granule */
+                            for (i = 0; i < n; i++) {
+                                g->scale_factors[j] = sc[j];
+                                j++;
+                            }
+                        }
+                    }
+                    g->scale_factors[j++] = 0;
+                }
+            } else {
+                int tindex, tindex2, slen[4], sl, sf;
+
+                /* LSF scale factors */
+                if (g->block_type == 2)
+                    tindex = g->switch_point ? 2 : 1;
+                else
+                    tindex = 0;
+
+                sf = g->scalefac_compress;
+                if ((s->mode_ext & MODE_EXT_I_STEREO) && ch == 1) {
+                    /* intensity stereo case */
+                    sf >>= 1;
+                    if (sf < 180) {
+                        lsf_sf_expand(slen, sf, 6, 6, 0);
+                        tindex2 = 3;
+                    } else if (sf < 244) {
+                        lsf_sf_expand(slen, sf - 180, 4, 4, 0);
+                        tindex2 = 4;
+                    } else {
+                        lsf_sf_expand(slen, sf - 244, 3, 0, 0);
+                        tindex2 = 5;
+                    }
+                } else {
+                    /* normal case */
+                    if (sf < 400) {
+                        lsf_sf_expand(slen, sf, 5, 4, 4);
+                        tindex2 = 0;
+                    } else if (sf < 500) {
+                        lsf_sf_expand(slen, sf - 400, 5, 4, 0);
+                        tindex2 = 1;
+                    } else {
+                        lsf_sf_expand(slen, sf - 500, 3, 0, 0);
+                        tindex2 = 2;
+                        g->preflag = 1;
+                    }
+                }
+
+                j = 0;
+                for (k = 0; k < 4; k++) {
+                    n  = ff_lsf_nsf_table[tindex2][tindex][k];
+                    sl = slen[k];
+                    if (sl) {
+                        for (i = 0; i < n; i++)
+                            g->scale_factors[j++] = get_bits(&s->gb, sl);
+                    } else {
+                        for (i = 0; i < n; i++)
+                            g->scale_factors[j++] = 0;
+                    }
+                }
+                /* XXX: should compute exact size */
+                for (; j < 40; j++)
+                    g->scale_factors[j] = 0;
+            }
+
+            exponents_from_scale_factors(s, g, exponents);
+
+            /* read Huffman coded residue */
+            huffman_decode(s, g, exponents, bits_pos + g->part2_3_length);
+        } /* ch */
+
+        if (s->mode == MPA_JSTEREO)
+            compute_stereo(s, &s->granules[0][gr], &s->granules[1][gr]);
+
+        for (ch = 0; ch < s->nb_channels; ch++) {
+            g = &s->granules[ch][gr];
+
+            reorder_block(s, g);
+            compute_antialias(s, g);
+            compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]);
+        }
+    } /* gr */
+    if (get_bits_count(&s->gb) < 0)
+        skip_bits_long(&s->gb, -get_bits_count(&s->gb));
+    return nb_granules * 18;
+}
+
+static int mp_decode_frame(MPADecodeContext *s, OUT_INT **samples,
+                           const uint8_t *buf, int buf_size)
+{
+    int i, nb_frames, ch, ret;
+    OUT_INT *samples_ptr;
+
+    init_get_bits(&s->gb, buf + HEADER_SIZE, (buf_size - HEADER_SIZE) * 8);
+    if (s->error_protection)
+        s->crc = get_bits(&s->gb, 16);
+
+    switch(s->layer) {
+    case 1:
+        s->avctx->frame_size = 384;
+        nb_frames = mp_decode_layer1(s);
+        break;
+    case 2:
+        s->avctx->frame_size = 1152;
+        nb_frames = mp_decode_layer2(s);
+        break;
+    case 3:
+        s->avctx->frame_size = s->lsf ? 576 : 1152;
+    default:
+        nb_frames = mp_decode_layer3(s);
+
+        s->last_buf_size=0;
+        if (s->in_gb.buffer) {
+            align_get_bits(&s->gb);
+            i = (get_bits_left(&s->gb) >> 3) - s->extrasize;
+            if (i >= 0 && i <= BACKSTEP_SIZE) {
+                memmove(s->last_buf, s->gb.buffer + (get_bits_count(&s->gb) >> 3), i);
+                s->last_buf_size=i;
+            } else
+                av_log(s->avctx, AV_LOG_ERROR, "invalid old backstep %d\n", i);
+            s->gb           = s->in_gb;
+            s->in_gb.buffer = NULL;
+            s->extrasize    = 0;
+        }
+
+        align_get_bits(&s->gb);
+        av_assert1((get_bits_count(&s->gb) & 7) == 0);
+        i = (get_bits_left(&s->gb) >> 3) - s->extrasize;
+        if (i < 0 || i > BACKSTEP_SIZE || nb_frames < 0) {
+            if (i < 0)
+                av_log(s->avctx, AV_LOG_ERROR, "invalid new backstep %d\n", i);
+            i = FFMIN(BACKSTEP_SIZE, buf_size - HEADER_SIZE);
+        }
+        av_assert1(i <= buf_size - HEADER_SIZE && i >= 0);
+        memcpy(s->last_buf + s->last_buf_size, s->gb.buffer + buf_size - HEADER_SIZE - i, i);
+        s->last_buf_size += i;
+    }
+
+    if(nb_frames < 0)
+        return nb_frames;
+
+    /* get output buffer */
+    if (!samples) {
+        av_assert0(s->frame);
+        s->frame->nb_samples = s->avctx->frame_size;
+        if ((ret = ff_get_buffer(s->avctx, s->frame, 0)) < 0)
+            return ret;
+        samples = (OUT_INT **)s->frame->extended_data;
+    }
+
+    /* apply the synthesis filter */
+    for (ch = 0; ch < s->nb_channels; ch++) {
+        int sample_stride;
+        if (s->avctx->sample_fmt == OUT_FMT_P) {
+            samples_ptr   = samples[ch];
+            sample_stride = 1;
+        } else {
+            samples_ptr   = samples[0] + ch;
+            sample_stride = s->nb_channels;
+        }
+        for (i = 0; i < nb_frames; i++) {
+            RENAME(ff_mpa_synth_filter)(&s->mpadsp, s->synth_buf[ch],
+                                        &(s->synth_buf_offset[ch]),
+                                        RENAME(ff_mpa_synth_window),
+                                        &s->dither_state, samples_ptr,
+                                        sample_stride, s->sb_samples[ch][i]);
+            samples_ptr += 32 * sample_stride;
+        }
+    }
+
+    return nb_frames * 32 * sizeof(OUT_INT) * s->nb_channels;
+}
+
+static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
+                        int *got_frame_ptr, AVPacket *avpkt)
+{
+    const uint8_t *buf  = avpkt->data;
+    int buf_size        = avpkt->size;
+    MPADecodeContext *s = avctx->priv_data;
+    uint32_t header;
+    int ret;
+
+    int skipped = 0;
+    while(buf_size && !*buf){
+        buf++;
+        buf_size--;
+        skipped++;
+    }
+
+    if (buf_size < HEADER_SIZE)
+        return AVERROR_INVALIDDATA;
+
+    header = AV_RB32(buf);
+    if (header >> 8 == AV_RB32("TAG") >> 8) {
+        av_log(avctx, AV_LOG_DEBUG, "discarding ID3 tag\n");
+        return buf_size + skipped;
+    }
+    ret = avpriv_mpegaudio_decode_header((MPADecodeHeader *)s, header);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Header missing\n");
+        return AVERROR_INVALIDDATA;
+    } else if (ret == 1) {
+        /* free format: prepare to compute frame size */
+        s->frame_size = -1;
+        return AVERROR_INVALIDDATA;
+    }
+    /* update codec info */
+    av_channel_layout_uninit(&avctx->ch_layout);
+    avctx->ch_layout = s->nb_channels == 1 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO :
+                                             (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO;
+    if (!avctx->bit_rate)
+        avctx->bit_rate = s->bit_rate;
+
+    if (s->frame_size <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
+        return AVERROR_INVALIDDATA;
+    } else if (s->frame_size < buf_size) {
+        av_log(avctx, AV_LOG_DEBUG, "incorrect frame size - multiple frames in buffer?\n");
+        buf_size= s->frame_size;
+    }
+
+    s->frame = frame;
+
+    ret = mp_decode_frame(s, NULL, buf, buf_size);
+    if (ret >= 0) {
+        s->frame->nb_samples = avctx->frame_size;
+        *got_frame_ptr       = 1;
+        avctx->sample_rate   = s->sample_rate;
+        //FIXME maybe move the other codec info stuff from above here too
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "Error while decoding MPEG audio frame.\n");
+        /* Only return an error if the bad frame makes up the whole packet or
+         * the error is related to buffer management.
+         * If there is more data in the packet, just consume the bad frame
+         * instead of returning an error, which would discard the whole
+         * packet. */
+        *got_frame_ptr = 0;
+        if (buf_size == avpkt->size || ret != AVERROR_INVALIDDATA)
+            return ret;
+    }
+    s->frame_size = 0;
+    return buf_size + skipped;
+}
+
+static void mp_flush(MPADecodeContext *ctx)
+{
+    memset(ctx->synth_buf, 0, sizeof(ctx->synth_buf));
+    memset(ctx->mdct_buf, 0, sizeof(ctx->mdct_buf));
+    ctx->last_buf_size = 0;
+    ctx->dither_state = 0;
+}
+
+static void flush(AVCodecContext *avctx)
+{
+    mp_flush(avctx->priv_data);
+}
+
+#if CONFIG_MP3ADU_DECODER || CONFIG_MP3ADUFLOAT_DECODER
+static int decode_frame_adu(AVCodecContext *avctx, AVFrame *frame,
+                            int *got_frame_ptr, AVPacket *avpkt)
+{
+    const uint8_t *buf  = avpkt->data;
+    int buf_size        = avpkt->size;
+    MPADecodeContext *s = avctx->priv_data;
+    uint32_t header;
+    int len, ret;
+
+    len = buf_size;
+
+    // Discard too short frames
+    if (buf_size < HEADER_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "Packet is too small\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+
+    if (len > MPA_MAX_CODED_FRAME_SIZE)
+        len = MPA_MAX_CODED_FRAME_SIZE;
+
+    // Get header and restore sync word
+    header = AV_RB32(buf) | 0xffe00000;
+
+    ret = avpriv_mpegaudio_decode_header((MPADecodeHeader *)s, header);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid frame header\n");
+        return ret;
+    }
+    /* update codec info */
+    avctx->sample_rate = s->sample_rate;
+    av_channel_layout_uninit(&avctx->ch_layout);
+    avctx->ch_layout = s->nb_channels == 1 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO :
+                                             (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO;
+    if (!avctx->bit_rate)
+        avctx->bit_rate = s->bit_rate;
+
+    s->frame_size = len;
+
+    s->frame = frame;
+
+    ret = mp_decode_frame(s, NULL, buf, buf_size);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Error while decoding MPEG audio frame.\n");
+        return ret;
+    }
+
+    *got_frame_ptr = 1;
+
+    return buf_size;
+}
+#endif /* CONFIG_MP3ADU_DECODER || CONFIG_MP3ADUFLOAT_DECODER */
+
+#if CONFIG_MP3ON4_DECODER || CONFIG_MP3ON4FLOAT_DECODER
+
+/**
+ * Context for MP3On4 decoder
+ */
+typedef struct MP3On4DecodeContext {
+    int frames;                     ///< number of mp3 frames per block (number of mp3 decoder instances)
+    int syncword;                   ///< syncword patch
+    const uint8_t *coff;            ///< channel offsets in output buffer
+    MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance
+} MP3On4DecodeContext;
+
+#include "mpeg4audio.h"
+
+/* Next 3 arrays are indexed by channel config number (passed via codecdata) */
+
+/* number of mp3 decoder instances */
+static const uint8_t mp3Frames[8] = { 0, 1, 1, 2, 3, 3, 4, 5 };
+
+/* offsets into output buffer, assume output order is FL FR C LFE BL BR SL SR */
+static const uint8_t chan_offset[8][5] = {
+    { 0             },
+    { 0             },  // C
+    { 0             },  // FLR
+    { 2, 0          },  // C FLR
+    { 2, 0, 3       },  // C FLR BS
+    { 2, 0, 3       },  // C FLR BLRS
+    { 2, 0, 4, 3    },  // C FLR BLRS LFE
+    { 2, 0, 6, 4, 3 },  // C FLR BLRS BLR LFE
+};
+
+/* mp3on4 channel layouts */
+static const int16_t chan_layout[8] = {
+    0,
+    AV_CH_LAYOUT_MONO,
+    AV_CH_LAYOUT_STEREO,
+    AV_CH_LAYOUT_SURROUND,
+    AV_CH_LAYOUT_4POINT0,
+    AV_CH_LAYOUT_5POINT0,
+    AV_CH_LAYOUT_5POINT1,
+    AV_CH_LAYOUT_7POINT1
+};
+
+static av_cold int decode_close_mp3on4(AVCodecContext * avctx)
+{
+    MP3On4DecodeContext *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < s->frames; i++)
+        av_freep(&s->mp3decctx[i]);
+
+    return 0;
+}
+
+
+static av_cold int decode_init_mp3on4(AVCodecContext * avctx)
+{
+    MP3On4DecodeContext *s = avctx->priv_data;
+    MPEG4AudioConfig cfg;
+    int i, ret;
+
+    if ((avctx->extradata_size < 2) || !avctx->extradata) {
+        av_log(avctx, AV_LOG_ERROR, "Codec extradata missing or too short.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    avpriv_mpeg4audio_get_config2(&cfg, avctx->extradata,
+                                  avctx->extradata_size, 1, avctx);
+    if (!cfg.chan_config || cfg.chan_config > 7) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid channel config number.\n");
+        return AVERROR_INVALIDDATA;
+    }
+    s->frames             = mp3Frames[cfg.chan_config];
+    s->coff               = chan_offset[cfg.chan_config];
+    av_channel_layout_uninit(&avctx->ch_layout);
+    av_channel_layout_from_mask(&avctx->ch_layout, chan_layout[cfg.chan_config]);
+
+    if (cfg.sample_rate < 16000)
+        s->syncword = 0xffe00000;
+    else
+        s->syncword = 0xfff00000;
+
+    /* Init the first mp3 decoder in standard way, so that all tables get builded
+     * We replace avctx->priv_data with the context of the first decoder so that
+     * decode_init() does not have to be changed.
+     * Other decoders will be initialized here copying data from the first context
+     */
+    // Allocate zeroed memory for the first decoder context
+    s->mp3decctx[0] = av_mallocz(sizeof(MPADecodeContext));
+    if (!s->mp3decctx[0])
+        return AVERROR(ENOMEM);
+    // Put decoder context in place to make init_decode() happy
+    avctx->priv_data = s->mp3decctx[0];
+    ret = decode_init(avctx);
+    // Restore mp3on4 context pointer
+    avctx->priv_data = s;
+    if (ret < 0)
+        return ret;
+    s->mp3decctx[0]->adu_mode = 1; // Set adu mode
+
+    /* Create a separate codec/context for each frame (first is already ok).
+     * Each frame is 1 or 2 channels - up to 5 frames allowed
+     */
+    for (i = 1; i < s->frames; i++) {
+        s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext));
+        if (!s->mp3decctx[i])
+            return AVERROR(ENOMEM);
+        s->mp3decctx[i]->adu_mode = 1;
+        s->mp3decctx[i]->avctx = avctx;
+        s->mp3decctx[i]->mpadsp = s->mp3decctx[0]->mpadsp;
+        s->mp3decctx[i]->butterflies_float = s->mp3decctx[0]->butterflies_float;
+    }
+
+    return 0;
+}
+
+
+static void flush_mp3on4(AVCodecContext *avctx)
+{
+    int i;
+    MP3On4DecodeContext *s = avctx->priv_data;
+
+    for (i = 0; i < s->frames; i++)
+        mp_flush(s->mp3decctx[i]);
+}
+
+
+static int decode_frame_mp3on4(AVCodecContext *avctx, AVFrame *frame,
+                               int *got_frame_ptr, AVPacket *avpkt)
+{
+    const uint8_t *buf     = avpkt->data;
+    int buf_size           = avpkt->size;
+    MP3On4DecodeContext *s = avctx->priv_data;
+    MPADecodeContext *m;
+    int fsize, len = buf_size, out_size = 0;
+    uint32_t header;
+    OUT_INT **out_samples;
+    OUT_INT *outptr[2];
+    int fr, ch, ret;
+
+    /* get output buffer */
+    frame->nb_samples = MPA_FRAME_SIZE;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+    out_samples = (OUT_INT **)frame->extended_data;
+
+    // Discard too short frames
+    if (buf_size < HEADER_SIZE)
+        return AVERROR_INVALIDDATA;
+
+    avctx->bit_rate = 0;
+
+    ch = 0;
+    for (fr = 0; fr < s->frames; fr++) {
+        fsize = AV_RB16(buf) >> 4;
+        fsize = FFMIN3(fsize, len, MPA_MAX_CODED_FRAME_SIZE);
+        m     = s->mp3decctx[fr];
+        av_assert1(m);
+
+        if (fsize < HEADER_SIZE) {
+            av_log(avctx, AV_LOG_ERROR, "Frame size smaller than header size\n");
+            return AVERROR_INVALIDDATA;
+        }
+        header = (AV_RB32(buf) & 0x000fffff) | s->syncword; // patch header
+
+        ret = avpriv_mpegaudio_decode_header((MPADecodeHeader *)m, header);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Bad header, discard block\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (ch + m->nb_channels > avctx->ch_layout.nb_channels ||
+            s->coff[fr] + m->nb_channels > avctx->ch_layout.nb_channels) {
+            av_log(avctx, AV_LOG_ERROR, "frame channel count exceeds codec "
+                                        "channel count\n");
+            return AVERROR_INVALIDDATA;
+        }
+        ch += m->nb_channels;
+
+        outptr[0] = out_samples[s->coff[fr]];
+        if (m->nb_channels > 1)
+            outptr[1] = out_samples[s->coff[fr] + 1];
+
+        if ((ret = mp_decode_frame(m, outptr, buf, fsize)) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "failed to decode channel %d\n", ch);
+            memset(outptr[0], 0, MPA_FRAME_SIZE*sizeof(OUT_INT));
+            if (m->nb_channels > 1)
+                memset(outptr[1], 0, MPA_FRAME_SIZE*sizeof(OUT_INT));
+            ret = m->nb_channels * MPA_FRAME_SIZE*sizeof(OUT_INT);
+        }
+
+        out_size += ret;
+        buf      += fsize;
+        len      -= fsize;
+
+        avctx->bit_rate += m->bit_rate;
+    }
+    if (ch != avctx->ch_layout.nb_channels) {
+        av_log(avctx, AV_LOG_ERROR, "failed to decode all channels\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* update codec info */
+    avctx->sample_rate = s->mp3decctx[0]->sample_rate;
+
+    frame->nb_samples = out_size / (avctx->ch_layout.nb_channels * sizeof(OUT_INT));
+    *got_frame_ptr    = 1;
+
+    return buf_size;
+}
+#endif /* CONFIG_MP3ON4_DECODER || CONFIG_MP3ON4FLOAT_DECODER */
diff --git a/media/ffvpx/libavcodec/mpegaudiodecheader.c b/media/ffvpx/libavcodec/mpegaudiodecheader.c
new file mode 100644
index 0000000000..ef63befbf4
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodecheader.c
@@ -0,0 +1,152 @@
+/*
+ * MPEG Audio header decoder
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * MPEG Audio header decoder.
+ */
+
+#include "libavutil/macros.h"
+
+#include "mpegaudio.h"
+#include "mpegaudiodata.h"
+#include "mpegaudiodecheader.h"
+
+
+int avpriv_mpegaudio_decode_header(MPADecodeHeader *s, uint32_t header)
+{
+    int sample_rate, frame_size, mpeg25, padding;
+    int sample_rate_index, bitrate_index;
+    int ret;
+
+    ret = ff_mpa_check_header(header);
+    if (ret < 0)
+        return ret;
+
+    if (header & (1<<20)) {
+        s->lsf = (header & (1<<19)) ? 0 : 1;
+        mpeg25 = 0;
+    } else {
+        s->lsf = 1;
+        mpeg25 = 1;
+    }
+
+    s->layer = 4 - ((header >> 17) & 3);
+    /* extract frequency */
+    sample_rate_index = (header >> 10) & 3;
+    if (sample_rate_index >= FF_ARRAY_ELEMS(ff_mpa_freq_tab))
+        sample_rate_index = 0;
+    sample_rate = ff_mpa_freq_tab[sample_rate_index] >> (s->lsf + mpeg25);
+    sample_rate_index += 3 * (s->lsf + mpeg25);
+    s->sample_rate_index = sample_rate_index;
+    s->error_protection = ((header >> 16) & 1) ^ 1;
+    s->sample_rate = sample_rate;
+
+    bitrate_index = (header >> 12) & 0xf;
+    padding = (header >> 9) & 1;
+    //extension = (header >> 8) & 1;
+    s->mode = (header >> 6) & 3;
+    s->mode_ext = (header >> 4) & 3;
+    //copyright = (header >> 3) & 1;
+    //original = (header >> 2) & 1;
+    //emphasis = header & 3;
+
+    if (s->mode == MPA_MONO)
+        s->nb_channels = 1;
+    else
+        s->nb_channels = 2;
+
+    if (bitrate_index != 0) {
+        frame_size = ff_mpa_bitrate_tab[s->lsf][s->layer - 1][bitrate_index];
+        s->bit_rate = frame_size * 1000;
+        switch(s->layer) {
+        case 1:
+            frame_size = (frame_size * 12000) / sample_rate;
+            frame_size = (frame_size + padding) * 4;
+            break;
+        case 2:
+            frame_size = (frame_size * 144000) / sample_rate;
+            frame_size += padding;
+            break;
+        default:
+        case 3:
+            frame_size = (frame_size * 144000) / (sample_rate << s->lsf);
+            frame_size += padding;
+            break;
+        }
+        s->frame_size = frame_size;
+    } else {
+        /* if no frame size computed, signal it */
+        return 1;
+    }
+
+#if defined(DEBUG)
+    ff_dlog(NULL, "layer%d, %d Hz, %d kbits/s, ",
+           s->layer, s->sample_rate, s->bit_rate);
+    if (s->nb_channels == 2) {
+        if (s->layer == 3) {
+            if (s->mode_ext & MODE_EXT_MS_STEREO)
+                ff_dlog(NULL, "ms-");
+            if (s->mode_ext & MODE_EXT_I_STEREO)
+                ff_dlog(NULL, "i-");
+        }
+        ff_dlog(NULL, "stereo");
+    } else {
+        ff_dlog(NULL, "mono");
+    }
+    ff_dlog(NULL, "\n");
+#endif
+    return 0;
+}
+
+int ff_mpa_decode_header(uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate, enum AVCodecID *codec_id)
+{
+    MPADecodeHeader s1, *s = &s1;
+
+    if (avpriv_mpegaudio_decode_header(s, head) != 0) {
+        return -1;
+    }
+
+    switch(s->layer) {
+    case 1:
+        *codec_id = AV_CODEC_ID_MP1;
+        *frame_size = 384;
+        break;
+    case 2:
+        *codec_id = AV_CODEC_ID_MP2;
+        *frame_size = 1152;
+        break;
+    default:
+    case 3:
+        if (*codec_id != AV_CODEC_ID_MP3ADU)
+            *codec_id = AV_CODEC_ID_MP3;
+        if (s->lsf)
+            *frame_size = 576;
+        else
+            *frame_size = 1152;
+        break;
+    }
+
+    *sample_rate = s->sample_rate;
+    *channels = s->nb_channels;
+    *bit_rate = s->bit_rate;
+    return s->frame_size;
+}
diff --git a/media/ffvpx/libavcodec/mpegaudiodecheader.h b/media/ffvpx/libavcodec/mpegaudiodecheader.h
new file mode 100644
index 0000000000..ed5d1f3b33
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodecheader.h
@@ -0,0 +1,81 @@
+/*
+ * MPEG Audio header decoder
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * MPEG Audio header decoder.
+ */
+
+#ifndef AVCODEC_MPEGAUDIODECHEADER_H
+#define AVCODEC_MPEGAUDIODECHEADER_H
+
+#include <stdint.h>
+#include "codec_id.h"
+
+#define MP3_MASK 0xFFFE0CCF
+
+#define MPA_DECODE_HEADER \
+    int frame_size; \
+    int error_protection; \
+    int layer; \
+    int sample_rate; \
+    int sample_rate_index; /* between 0 and 8 */ \
+    int bit_rate; \
+    int nb_channels; \
+    int mode; \
+    int mode_ext; \
+    int lsf;
+
+typedef struct MPADecodeHeader {
+  MPA_DECODE_HEADER
+} MPADecodeHeader;
+
+/* header decoding. MUST check the header before because no
+   consistency check is done there. Return 1 if free format found and
+   that the frame size must be computed externally */
+int avpriv_mpegaudio_decode_header(MPADecodeHeader *s, uint32_t header);
+
+/* useful helper to get MPEG audio stream info. Return -1 if error in
+   header, otherwise the coded frame size in bytes */
+int ff_mpa_decode_header(uint32_t head, int *sample_rate,
+                         int *channels, int *frame_size, int *bitrate, enum AVCodecID *codec_id);
+
+/* fast header check for resync */
+static inline int ff_mpa_check_header(uint32_t header){
+    /* header */
+    if ((header & 0xffe00000) != 0xffe00000)
+        return -1;
+    /* version check */
+    if ((header & (3<<19)) == 1<<19)
+        return -1;
+    /* layer check */
+    if ((header & (3<<17)) == 0)
+        return -1;
+    /* bit rate */
+    if ((header & (0xf<<12)) == 0xf<<12)
+        return -1;
+    /* frequency */
+    if ((header & (3<<10)) == 3<<10)
+        return -1;
+    return 0;
+}
+
+#endif /* AVCODEC_MPEGAUDIODECHEADER_H */
diff --git a/media/ffvpx/libavcodec/mpegaudiodectab.h b/media/ffvpx/libavcodec/mpegaudiodectab.h
new file mode 100644
index 0000000000..accd12b8e2
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodectab.h
@@ -0,0 +1,615 @@
+/*
+ * MPEG Audio decoder
+ * copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * mpeg audio layer decoder tables.
+ */
+
+#ifndef AVCODEC_MPEGAUDIODECTAB_H
+#define AVCODEC_MPEGAUDIODECTAB_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "mpegaudio.h"
+
+/*******************************************************/
+/* layer 3 tables */
+
+/* layer 3 huffman tables */
+typedef struct HuffTable {
+    int xsize;
+    const uint8_t *bits;
+    const uint16_t *codes;
+} HuffTable;
+
+/* layer3 scale factor size */
+static const uint8_t slen_table[2][16] = {
+    { 0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
+    { 0, 1, 2, 3, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 3 },
+};
+
+/* number of lsf scale factors for a given size */
+static const uint8_t lsf_nsf_table[6][3][4] = {
+    { {  6,  5,  5, 5 }, {  9,  9,  9, 9 }, {  6,  9,  9, 9 } },
+    { {  6,  5,  7, 3 }, {  9,  9, 12, 6 }, {  6,  9, 12, 6 } },
+    { { 11, 10,  0, 0 }, { 18, 18,  0, 0 }, { 15, 18,  0, 0 } },
+    { {  7,  7,  7, 0 }, { 12, 12, 12, 0 }, {  6, 15, 12, 0 } },
+    { {  6,  6,  6, 3 }, { 12,  9,  9, 6 }, {  6, 12,  9, 6 } },
+    { {  8,  8,  5, 0 }, { 15, 12,  9, 0 }, {  6, 18,  9, 0 } },
+};
+
+/* mpegaudio layer 3 huffman tables */
+
+static const uint16_t mpa_huffcodes_1[4] = {
+ 0x0001, 0x0001, 0x0001, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_1[4] = {
+  1,  3,  2,  3,
+};
+
+static const uint16_t mpa_huffcodes_2[9] = {
+ 0x0001, 0x0002, 0x0001, 0x0003, 0x0001, 0x0001, 0x0003, 0x0002,
+ 0x0000,
+};
+
+static const uint8_t mpa_huffbits_2[9] = {
+  1,  3,  6,  3,  3,  5,  5,  5,
+  6,
+};
+
+static const uint16_t mpa_huffcodes_3[9] = {
+ 0x0003, 0x0002, 0x0001, 0x0001, 0x0001, 0x0001, 0x0003, 0x0002,
+ 0x0000,
+};
+
+static const uint8_t mpa_huffbits_3[9] = {
+  2,  2,  6,  3,  2,  5,  5,  5,
+  6,
+};
+
+static const uint16_t mpa_huffcodes_5[16] = {
+ 0x0001, 0x0002, 0x0006, 0x0005, 0x0003, 0x0001, 0x0004, 0x0004,
+ 0x0007, 0x0005, 0x0007, 0x0001, 0x0006, 0x0001, 0x0001, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_5[16] = {
+  1,  3,  6,  7,  3,  3,  6,  7,
+  6,  6,  7,  8,  7,  6,  7,  8,
+};
+
+static const uint16_t mpa_huffcodes_6[16] = {
+ 0x0007, 0x0003, 0x0005, 0x0001, 0x0006, 0x0002, 0x0003, 0x0002,
+ 0x0005, 0x0004, 0x0004, 0x0001, 0x0003, 0x0003, 0x0002, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_6[16] = {
+  3,  3,  5,  7,  3,  2,  4,  5,
+  4,  4,  5,  6,  6,  5,  6,  7,
+};
+
+static const uint16_t mpa_huffcodes_7[36] = {
+ 0x0001, 0x0002, 0x000a, 0x0013, 0x0010, 0x000a, 0x0003, 0x0003,
+ 0x0007, 0x000a, 0x0005, 0x0003, 0x000b, 0x0004, 0x000d, 0x0011,
+ 0x0008, 0x0004, 0x000c, 0x000b, 0x0012, 0x000f, 0x000b, 0x0002,
+ 0x0007, 0x0006, 0x0009, 0x000e, 0x0003, 0x0001, 0x0006, 0x0004,
+ 0x0005, 0x0003, 0x0002, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_7[36] = {
+  1,  3,  6,  8,  8,  9,  3,  4,
+  6,  7,  7,  8,  6,  5,  7,  8,
+  8,  9,  7,  7,  8,  9,  9,  9,
+  7,  7,  8,  9,  9, 10,  8,  8,
+  9, 10, 10, 10,
+};
+
+static const uint16_t mpa_huffcodes_8[36] = {
+ 0x0003, 0x0004, 0x0006, 0x0012, 0x000c, 0x0005, 0x0005, 0x0001,
+ 0x0002, 0x0010, 0x0009, 0x0003, 0x0007, 0x0003, 0x0005, 0x000e,
+ 0x0007, 0x0003, 0x0013, 0x0011, 0x000f, 0x000d, 0x000a, 0x0004,
+ 0x000d, 0x0005, 0x0008, 0x000b, 0x0005, 0x0001, 0x000c, 0x0004,
+ 0x0004, 0x0001, 0x0001, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_8[36] = {
+  2,  3,  6,  8,  8,  9,  3,  2,
+  4,  8,  8,  8,  6,  4,  6,  8,
+  8,  9,  8,  8,  8,  9,  9, 10,
+  8,  7,  8,  9, 10, 10,  9,  8,
+  9,  9, 11, 11,
+};
+
+static const uint16_t mpa_huffcodes_9[36] = {
+ 0x0007, 0x0005, 0x0009, 0x000e, 0x000f, 0x0007, 0x0006, 0x0004,
+ 0x0005, 0x0005, 0x0006, 0x0007, 0x0007, 0x0006, 0x0008, 0x0008,
+ 0x0008, 0x0005, 0x000f, 0x0006, 0x0009, 0x000a, 0x0005, 0x0001,
+ 0x000b, 0x0007, 0x0009, 0x0006, 0x0004, 0x0001, 0x000e, 0x0004,
+ 0x0006, 0x0002, 0x0006, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_9[36] = {
+  3,  3,  5,  6,  8,  9,  3,  3,
+  4,  5,  6,  8,  4,  4,  5,  6,
+  7,  8,  6,  5,  6,  7,  7,  8,
+  7,  6,  7,  7,  8,  9,  8,  7,
+  8,  8,  9,  9,
+};
+
+static const uint16_t mpa_huffcodes_10[64] = {
+ 0x0001, 0x0002, 0x000a, 0x0017, 0x0023, 0x001e, 0x000c, 0x0011,
+ 0x0003, 0x0003, 0x0008, 0x000c, 0x0012, 0x0015, 0x000c, 0x0007,
+ 0x000b, 0x0009, 0x000f, 0x0015, 0x0020, 0x0028, 0x0013, 0x0006,
+ 0x000e, 0x000d, 0x0016, 0x0022, 0x002e, 0x0017, 0x0012, 0x0007,
+ 0x0014, 0x0013, 0x0021, 0x002f, 0x001b, 0x0016, 0x0009, 0x0003,
+ 0x001f, 0x0016, 0x0029, 0x001a, 0x0015, 0x0014, 0x0005, 0x0003,
+ 0x000e, 0x000d, 0x000a, 0x000b, 0x0010, 0x0006, 0x0005, 0x0001,
+ 0x0009, 0x0008, 0x0007, 0x0008, 0x0004, 0x0004, 0x0002, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_10[64] = {
+  1,  3,  6,  8,  9,  9,  9, 10,
+  3,  4,  6,  7,  8,  9,  8,  8,
+  6,  6,  7,  8,  9, 10,  9,  9,
+  7,  7,  8,  9, 10, 10,  9, 10,
+  8,  8,  9, 10, 10, 10, 10, 10,
+  9,  9, 10, 10, 11, 11, 10, 11,
+  8,  8,  9, 10, 10, 10, 11, 11,
+  9,  8,  9, 10, 10, 11, 11, 11,
+};
+
+static const uint16_t mpa_huffcodes_11[64] = {
+ 0x0003, 0x0004, 0x000a, 0x0018, 0x0022, 0x0021, 0x0015, 0x000f,
+ 0x0005, 0x0003, 0x0004, 0x000a, 0x0020, 0x0011, 0x000b, 0x000a,
+ 0x000b, 0x0007, 0x000d, 0x0012, 0x001e, 0x001f, 0x0014, 0x0005,
+ 0x0019, 0x000b, 0x0013, 0x003b, 0x001b, 0x0012, 0x000c, 0x0005,
+ 0x0023, 0x0021, 0x001f, 0x003a, 0x001e, 0x0010, 0x0007, 0x0005,
+ 0x001c, 0x001a, 0x0020, 0x0013, 0x0011, 0x000f, 0x0008, 0x000e,
+ 0x000e, 0x000c, 0x0009, 0x000d, 0x000e, 0x0009, 0x0004, 0x0001,
+ 0x000b, 0x0004, 0x0006, 0x0006, 0x0006, 0x0003, 0x0002, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_11[64] = {
+  2,  3,  5,  7,  8,  9,  8,  9,
+  3,  3,  4,  6,  8,  8,  7,  8,
+  5,  5,  6,  7,  8,  9,  8,  8,
+  7,  6,  7,  9,  8, 10,  8,  9,
+  8,  8,  8,  9,  9, 10,  9, 10,
+  8,  8,  9, 10, 10, 11, 10, 11,
+  8,  7,  7,  8,  9, 10, 10, 10,
+  8,  7,  8,  9, 10, 10, 10, 10,
+};
+
+static const uint16_t mpa_huffcodes_12[64] = {
+ 0x0009, 0x0006, 0x0010, 0x0021, 0x0029, 0x0027, 0x0026, 0x001a,
+ 0x0007, 0x0005, 0x0006, 0x0009, 0x0017, 0x0010, 0x001a, 0x000b,
+ 0x0011, 0x0007, 0x000b, 0x000e, 0x0015, 0x001e, 0x000a, 0x0007,
+ 0x0011, 0x000a, 0x000f, 0x000c, 0x0012, 0x001c, 0x000e, 0x0005,
+ 0x0020, 0x000d, 0x0016, 0x0013, 0x0012, 0x0010, 0x0009, 0x0005,
+ 0x0028, 0x0011, 0x001f, 0x001d, 0x0011, 0x000d, 0x0004, 0x0002,
+ 0x001b, 0x000c, 0x000b, 0x000f, 0x000a, 0x0007, 0x0004, 0x0001,
+ 0x001b, 0x000c, 0x0008, 0x000c, 0x0006, 0x0003, 0x0001, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_12[64] = {
+  4,  3,  5,  7,  8,  9,  9,  9,
+  3,  3,  4,  5,  7,  7,  8,  8,
+  5,  4,  5,  6,  7,  8,  7,  8,
+  6,  5,  6,  6,  7,  8,  8,  8,
+  7,  6,  7,  7,  8,  8,  8,  9,
+  8,  7,  8,  8,  8,  9,  8,  9,
+  8,  7,  7,  8,  8,  9,  9, 10,
+  9,  8,  8,  9,  9,  9,  9, 10,
+};
+
+static const uint16_t mpa_huffcodes_13[256] = {
+ 0x0001, 0x0005, 0x000e, 0x0015, 0x0022, 0x0033, 0x002e, 0x0047,
+ 0x002a, 0x0034, 0x0044, 0x0034, 0x0043, 0x002c, 0x002b, 0x0013,
+ 0x0003, 0x0004, 0x000c, 0x0013, 0x001f, 0x001a, 0x002c, 0x0021,
+ 0x001f, 0x0018, 0x0020, 0x0018, 0x001f, 0x0023, 0x0016, 0x000e,
+ 0x000f, 0x000d, 0x0017, 0x0024, 0x003b, 0x0031, 0x004d, 0x0041,
+ 0x001d, 0x0028, 0x001e, 0x0028, 0x001b, 0x0021, 0x002a, 0x0010,
+ 0x0016, 0x0014, 0x0025, 0x003d, 0x0038, 0x004f, 0x0049, 0x0040,
+ 0x002b, 0x004c, 0x0038, 0x0025, 0x001a, 0x001f, 0x0019, 0x000e,
+ 0x0023, 0x0010, 0x003c, 0x0039, 0x0061, 0x004b, 0x0072, 0x005b,
+ 0x0036, 0x0049, 0x0037, 0x0029, 0x0030, 0x0035, 0x0017, 0x0018,
+ 0x003a, 0x001b, 0x0032, 0x0060, 0x004c, 0x0046, 0x005d, 0x0054,
+ 0x004d, 0x003a, 0x004f, 0x001d, 0x004a, 0x0031, 0x0029, 0x0011,
+ 0x002f, 0x002d, 0x004e, 0x004a, 0x0073, 0x005e, 0x005a, 0x004f,
+ 0x0045, 0x0053, 0x0047, 0x0032, 0x003b, 0x0026, 0x0024, 0x000f,
+ 0x0048, 0x0022, 0x0038, 0x005f, 0x005c, 0x0055, 0x005b, 0x005a,
+ 0x0056, 0x0049, 0x004d, 0x0041, 0x0033, 0x002c, 0x002b, 0x002a,
+ 0x002b, 0x0014, 0x001e, 0x002c, 0x0037, 0x004e, 0x0048, 0x0057,
+ 0x004e, 0x003d, 0x002e, 0x0036, 0x0025, 0x001e, 0x0014, 0x0010,
+ 0x0035, 0x0019, 0x0029, 0x0025, 0x002c, 0x003b, 0x0036, 0x0051,
+ 0x0042, 0x004c, 0x0039, 0x0036, 0x0025, 0x0012, 0x0027, 0x000b,
+ 0x0023, 0x0021, 0x001f, 0x0039, 0x002a, 0x0052, 0x0048, 0x0050,
+ 0x002f, 0x003a, 0x0037, 0x0015, 0x0016, 0x001a, 0x0026, 0x0016,
+ 0x0035, 0x0019, 0x0017, 0x0026, 0x0046, 0x003c, 0x0033, 0x0024,
+ 0x0037, 0x001a, 0x0022, 0x0017, 0x001b, 0x000e, 0x0009, 0x0007,
+ 0x0022, 0x0020, 0x001c, 0x0027, 0x0031, 0x004b, 0x001e, 0x0034,
+ 0x0030, 0x0028, 0x0034, 0x001c, 0x0012, 0x0011, 0x0009, 0x0005,
+ 0x002d, 0x0015, 0x0022, 0x0040, 0x0038, 0x0032, 0x0031, 0x002d,
+ 0x001f, 0x0013, 0x000c, 0x000f, 0x000a, 0x0007, 0x0006, 0x0003,
+ 0x0030, 0x0017, 0x0014, 0x0027, 0x0024, 0x0023, 0x0035, 0x0015,
+ 0x0010, 0x0017, 0x000d, 0x000a, 0x0006, 0x0001, 0x0004, 0x0002,
+ 0x0010, 0x000f, 0x0011, 0x001b, 0x0019, 0x0014, 0x001d, 0x000b,
+ 0x0011, 0x000c, 0x0010, 0x0008, 0x0001, 0x0001, 0x0000, 0x0001,
+};
+
+static const uint8_t mpa_huffbits_13[256] = {
+  1,  4,  6,  7,  8,  9,  9, 10,
+  9, 10, 11, 11, 12, 12, 13, 13,
+  3,  4,  6,  7,  8,  8,  9,  9,
+  9,  9, 10, 10, 11, 12, 12, 12,
+  6,  6,  7,  8,  9,  9, 10, 10,
+  9, 10, 10, 11, 11, 12, 13, 13,
+  7,  7,  8,  9,  9, 10, 10, 10,
+ 10, 11, 11, 11, 11, 12, 13, 13,
+  8,  7,  9,  9, 10, 10, 11, 11,
+ 10, 11, 11, 12, 12, 13, 13, 14,
+  9,  8,  9, 10, 10, 10, 11, 11,
+ 11, 11, 12, 11, 13, 13, 14, 14,
+  9,  9, 10, 10, 11, 11, 11, 11,
+ 11, 12, 12, 12, 13, 13, 14, 14,
+ 10,  9, 10, 11, 11, 11, 12, 12,
+ 12, 12, 13, 13, 13, 14, 16, 16,
+  9,  8,  9, 10, 10, 11, 11, 12,
+ 12, 12, 12, 13, 13, 14, 15, 15,
+ 10,  9, 10, 10, 11, 11, 11, 13,
+ 12, 13, 13, 14, 14, 14, 16, 15,
+ 10, 10, 10, 11, 11, 12, 12, 13,
+ 12, 13, 14, 13, 14, 15, 16, 17,
+ 11, 10, 10, 11, 12, 12, 12, 12,
+ 13, 13, 13, 14, 15, 15, 15, 16,
+ 11, 11, 11, 12, 12, 13, 12, 13,
+ 14, 14, 15, 15, 15, 16, 16, 16,
+ 12, 11, 12, 13, 13, 13, 14, 14,
+ 14, 14, 14, 15, 16, 15, 16, 16,
+ 13, 12, 12, 13, 13, 13, 15, 14,
+ 14, 17, 15, 15, 15, 17, 16, 16,
+ 12, 12, 13, 14, 14, 14, 15, 14,
+ 15, 15, 16, 16, 19, 18, 19, 16,
+};
+
+static const uint16_t mpa_huffcodes_15[256] = {
+ 0x0007, 0x000c, 0x0012, 0x0035, 0x002f, 0x004c, 0x007c, 0x006c,
+ 0x0059, 0x007b, 0x006c, 0x0077, 0x006b, 0x0051, 0x007a, 0x003f,
+ 0x000d, 0x0005, 0x0010, 0x001b, 0x002e, 0x0024, 0x003d, 0x0033,
+ 0x002a, 0x0046, 0x0034, 0x0053, 0x0041, 0x0029, 0x003b, 0x0024,
+ 0x0013, 0x0011, 0x000f, 0x0018, 0x0029, 0x0022, 0x003b, 0x0030,
+ 0x0028, 0x0040, 0x0032, 0x004e, 0x003e, 0x0050, 0x0038, 0x0021,
+ 0x001d, 0x001c, 0x0019, 0x002b, 0x0027, 0x003f, 0x0037, 0x005d,
+ 0x004c, 0x003b, 0x005d, 0x0048, 0x0036, 0x004b, 0x0032, 0x001d,
+ 0x0034, 0x0016, 0x002a, 0x0028, 0x0043, 0x0039, 0x005f, 0x004f,
+ 0x0048, 0x0039, 0x0059, 0x0045, 0x0031, 0x0042, 0x002e, 0x001b,
+ 0x004d, 0x0025, 0x0023, 0x0042, 0x003a, 0x0034, 0x005b, 0x004a,
+ 0x003e, 0x0030, 0x004f, 0x003f, 0x005a, 0x003e, 0x0028, 0x0026,
+ 0x007d, 0x0020, 0x003c, 0x0038, 0x0032, 0x005c, 0x004e, 0x0041,
+ 0x0037, 0x0057, 0x0047, 0x0033, 0x0049, 0x0033, 0x0046, 0x001e,
+ 0x006d, 0x0035, 0x0031, 0x005e, 0x0058, 0x004b, 0x0042, 0x007a,
+ 0x005b, 0x0049, 0x0038, 0x002a, 0x0040, 0x002c, 0x0015, 0x0019,
+ 0x005a, 0x002b, 0x0029, 0x004d, 0x0049, 0x003f, 0x0038, 0x005c,
+ 0x004d, 0x0042, 0x002f, 0x0043, 0x0030, 0x0035, 0x0024, 0x0014,
+ 0x0047, 0x0022, 0x0043, 0x003c, 0x003a, 0x0031, 0x0058, 0x004c,
+ 0x0043, 0x006a, 0x0047, 0x0036, 0x0026, 0x0027, 0x0017, 0x000f,
+ 0x006d, 0x0035, 0x0033, 0x002f, 0x005a, 0x0052, 0x003a, 0x0039,
+ 0x0030, 0x0048, 0x0039, 0x0029, 0x0017, 0x001b, 0x003e, 0x0009,
+ 0x0056, 0x002a, 0x0028, 0x0025, 0x0046, 0x0040, 0x0034, 0x002b,
+ 0x0046, 0x0037, 0x002a, 0x0019, 0x001d, 0x0012, 0x000b, 0x000b,
+ 0x0076, 0x0044, 0x001e, 0x0037, 0x0032, 0x002e, 0x004a, 0x0041,
+ 0x0031, 0x0027, 0x0018, 0x0010, 0x0016, 0x000d, 0x000e, 0x0007,
+ 0x005b, 0x002c, 0x0027, 0x0026, 0x0022, 0x003f, 0x0034, 0x002d,
+ 0x001f, 0x0034, 0x001c, 0x0013, 0x000e, 0x0008, 0x0009, 0x0003,
+ 0x007b, 0x003c, 0x003a, 0x0035, 0x002f, 0x002b, 0x0020, 0x0016,
+ 0x0025, 0x0018, 0x0011, 0x000c, 0x000f, 0x000a, 0x0002, 0x0001,
+ 0x0047, 0x0025, 0x0022, 0x001e, 0x001c, 0x0014, 0x0011, 0x001a,
+ 0x0015, 0x0010, 0x000a, 0x0006, 0x0008, 0x0006, 0x0002, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_15[256] = {
+  3,  4,  5,  7,  7,  8,  9,  9,
+  9, 10, 10, 11, 11, 11, 12, 13,
+  4,  3,  5,  6,  7,  7,  8,  8,
+  8,  9,  9, 10, 10, 10, 11, 11,
+  5,  5,  5,  6,  7,  7,  8,  8,
+  8,  9,  9, 10, 10, 11, 11, 11,
+  6,  6,  6,  7,  7,  8,  8,  9,
+  9,  9, 10, 10, 10, 11, 11, 11,
+  7,  6,  7,  7,  8,  8,  9,  9,
+  9,  9, 10, 10, 10, 11, 11, 11,
+  8,  7,  7,  8,  8,  8,  9,  9,
+  9,  9, 10, 10, 11, 11, 11, 12,
+  9,  7,  8,  8,  8,  9,  9,  9,
+  9, 10, 10, 10, 11, 11, 12, 12,
+  9,  8,  8,  9,  9,  9,  9, 10,
+ 10, 10, 10, 10, 11, 11, 11, 12,
+  9,  8,  8,  9,  9,  9,  9, 10,
+ 10, 10, 10, 11, 11, 12, 12, 12,
+  9,  8,  9,  9,  9,  9, 10, 10,
+ 10, 11, 11, 11, 11, 12, 12, 12,
+ 10,  9,  9,  9, 10, 10, 10, 10,
+ 10, 11, 11, 11, 11, 12, 13, 12,
+ 10,  9,  9,  9, 10, 10, 10, 10,
+ 11, 11, 11, 11, 12, 12, 12, 13,
+ 11, 10,  9, 10, 10, 10, 11, 11,
+ 11, 11, 11, 11, 12, 12, 13, 13,
+ 11, 10, 10, 10, 10, 11, 11, 11,
+ 11, 12, 12, 12, 12, 12, 13, 13,
+ 12, 11, 11, 11, 11, 11, 11, 11,
+ 12, 12, 12, 12, 13, 13, 12, 13,
+ 12, 11, 11, 11, 11, 11, 11, 12,
+ 12, 12, 12, 12, 13, 13, 13, 13,
+};
+
+static const uint16_t mpa_huffcodes_16[256] = {
+ 0x0001, 0x0005, 0x000e, 0x002c, 0x004a, 0x003f, 0x006e, 0x005d,
+ 0x00ac, 0x0095, 0x008a, 0x00f2, 0x00e1, 0x00c3, 0x0178, 0x0011,
+ 0x0003, 0x0004, 0x000c, 0x0014, 0x0023, 0x003e, 0x0035, 0x002f,
+ 0x0053, 0x004b, 0x0044, 0x0077, 0x00c9, 0x006b, 0x00cf, 0x0009,
+ 0x000f, 0x000d, 0x0017, 0x0026, 0x0043, 0x003a, 0x0067, 0x005a,
+ 0x00a1, 0x0048, 0x007f, 0x0075, 0x006e, 0x00d1, 0x00ce, 0x0010,
+ 0x002d, 0x0015, 0x0027, 0x0045, 0x0040, 0x0072, 0x0063, 0x0057,
+ 0x009e, 0x008c, 0x00fc, 0x00d4, 0x00c7, 0x0183, 0x016d, 0x001a,
+ 0x004b, 0x0024, 0x0044, 0x0041, 0x0073, 0x0065, 0x00b3, 0x00a4,
+ 0x009b, 0x0108, 0x00f6, 0x00e2, 0x018b, 0x017e, 0x016a, 0x0009,
+ 0x0042, 0x001e, 0x003b, 0x0038, 0x0066, 0x00b9, 0x00ad, 0x0109,
+ 0x008e, 0x00fd, 0x00e8, 0x0190, 0x0184, 0x017a, 0x01bd, 0x0010,
+ 0x006f, 0x0036, 0x0034, 0x0064, 0x00b8, 0x00b2, 0x00a0, 0x0085,
+ 0x0101, 0x00f4, 0x00e4, 0x00d9, 0x0181, 0x016e, 0x02cb, 0x000a,
+ 0x0062, 0x0030, 0x005b, 0x0058, 0x00a5, 0x009d, 0x0094, 0x0105,
+ 0x00f8, 0x0197, 0x018d, 0x0174, 0x017c, 0x0379, 0x0374, 0x0008,
+ 0x0055, 0x0054, 0x0051, 0x009f, 0x009c, 0x008f, 0x0104, 0x00f9,
+ 0x01ab, 0x0191, 0x0188, 0x017f, 0x02d7, 0x02c9, 0x02c4, 0x0007,
+ 0x009a, 0x004c, 0x0049, 0x008d, 0x0083, 0x0100, 0x00f5, 0x01aa,
+ 0x0196, 0x018a, 0x0180, 0x02df, 0x0167, 0x02c6, 0x0160, 0x000b,
+ 0x008b, 0x0081, 0x0043, 0x007d, 0x00f7, 0x00e9, 0x00e5, 0x00db,
+ 0x0189, 0x02e7, 0x02e1, 0x02d0, 0x0375, 0x0372, 0x01b7, 0x0004,
+ 0x00f3, 0x0078, 0x0076, 0x0073, 0x00e3, 0x00df, 0x018c, 0x02ea,
+ 0x02e6, 0x02e0, 0x02d1, 0x02c8, 0x02c2, 0x00df, 0x01b4, 0x0006,
+ 0x00ca, 0x00e0, 0x00de, 0x00da, 0x00d8, 0x0185, 0x0182, 0x017d,
+ 0x016c, 0x0378, 0x01bb, 0x02c3, 0x01b8, 0x01b5, 0x06c0, 0x0004,
+ 0x02eb, 0x00d3, 0x00d2, 0x00d0, 0x0172, 0x017b, 0x02de, 0x02d3,
+ 0x02ca, 0x06c7, 0x0373, 0x036d, 0x036c, 0x0d83, 0x0361, 0x0002,
+ 0x0179, 0x0171, 0x0066, 0x00bb, 0x02d6, 0x02d2, 0x0166, 0x02c7,
+ 0x02c5, 0x0362, 0x06c6, 0x0367, 0x0d82, 0x0366, 0x01b2, 0x0000,
+ 0x000c, 0x000a, 0x0007, 0x000b, 0x000a, 0x0011, 0x000b, 0x0009,
+ 0x000d, 0x000c, 0x000a, 0x0007, 0x0005, 0x0003, 0x0001, 0x0003,
+};
+
+static const uint8_t mpa_huffbits_16[256] = {
+  1,  4,  6,  8,  9,  9, 10, 10,
+ 11, 11, 11, 12, 12, 12, 13,  9,
+  3,  4,  6,  7,  8,  9,  9,  9,
+ 10, 10, 10, 11, 12, 11, 12,  8,
+  6,  6,  7,  8,  9,  9, 10, 10,
+ 11, 10, 11, 11, 11, 12, 12,  9,
+  8,  7,  8,  9,  9, 10, 10, 10,
+ 11, 11, 12, 12, 12, 13, 13, 10,
+  9,  8,  9,  9, 10, 10, 11, 11,
+ 11, 12, 12, 12, 13, 13, 13,  9,
+  9,  8,  9,  9, 10, 11, 11, 12,
+ 11, 12, 12, 13, 13, 13, 14, 10,
+ 10,  9,  9, 10, 11, 11, 11, 11,
+ 12, 12, 12, 12, 13, 13, 14, 10,
+ 10,  9, 10, 10, 11, 11, 11, 12,
+ 12, 13, 13, 13, 13, 15, 15, 10,
+ 10, 10, 10, 11, 11, 11, 12, 12,
+ 13, 13, 13, 13, 14, 14, 14, 10,
+ 11, 10, 10, 11, 11, 12, 12, 13,
+ 13, 13, 13, 14, 13, 14, 13, 11,
+ 11, 11, 10, 11, 12, 12, 12, 12,
+ 13, 14, 14, 14, 15, 15, 14, 10,
+ 12, 11, 11, 11, 12, 12, 13, 14,
+ 14, 14, 14, 14, 14, 13, 14, 11,
+ 12, 12, 12, 12, 12, 13, 13, 13,
+ 13, 15, 14, 14, 14, 14, 16, 11,
+ 14, 12, 12, 12, 13, 13, 14, 14,
+ 14, 16, 15, 15, 15, 17, 15, 11,
+ 13, 13, 11, 12, 14, 14, 13, 14,
+ 14, 15, 16, 15, 17, 15, 14, 11,
+  9,  8,  8,  9,  9, 10, 10, 10,
+ 11, 11, 11, 11, 11, 11, 11,  8,
+};
+
+static const uint16_t mpa_huffcodes_24[256] = {
+ 0x000f, 0x000d, 0x002e, 0x0050, 0x0092, 0x0106, 0x00f8, 0x01b2,
+ 0x01aa, 0x029d, 0x028d, 0x0289, 0x026d, 0x0205, 0x0408, 0x0058,
+ 0x000e, 0x000c, 0x0015, 0x0026, 0x0047, 0x0082, 0x007a, 0x00d8,
+ 0x00d1, 0x00c6, 0x0147, 0x0159, 0x013f, 0x0129, 0x0117, 0x002a,
+ 0x002f, 0x0016, 0x0029, 0x004a, 0x0044, 0x0080, 0x0078, 0x00dd,
+ 0x00cf, 0x00c2, 0x00b6, 0x0154, 0x013b, 0x0127, 0x021d, 0x0012,
+ 0x0051, 0x0027, 0x004b, 0x0046, 0x0086, 0x007d, 0x0074, 0x00dc,
+ 0x00cc, 0x00be, 0x00b2, 0x0145, 0x0137, 0x0125, 0x010f, 0x0010,
+ 0x0093, 0x0048, 0x0045, 0x0087, 0x007f, 0x0076, 0x0070, 0x00d2,
+ 0x00c8, 0x00bc, 0x0160, 0x0143, 0x0132, 0x011d, 0x021c, 0x000e,
+ 0x0107, 0x0042, 0x0081, 0x007e, 0x0077, 0x0072, 0x00d6, 0x00ca,
+ 0x00c0, 0x00b4, 0x0155, 0x013d, 0x012d, 0x0119, 0x0106, 0x000c,
+ 0x00f9, 0x007b, 0x0079, 0x0075, 0x0071, 0x00d7, 0x00ce, 0x00c3,
+ 0x00b9, 0x015b, 0x014a, 0x0134, 0x0123, 0x0110, 0x0208, 0x000a,
+ 0x01b3, 0x0073, 0x006f, 0x006d, 0x00d3, 0x00cb, 0x00c4, 0x00bb,
+ 0x0161, 0x014c, 0x0139, 0x012a, 0x011b, 0x0213, 0x017d, 0x0011,
+ 0x01ab, 0x00d4, 0x00d0, 0x00cd, 0x00c9, 0x00c1, 0x00ba, 0x00b1,
+ 0x00a9, 0x0140, 0x012f, 0x011e, 0x010c, 0x0202, 0x0179, 0x0010,
+ 0x014f, 0x00c7, 0x00c5, 0x00bf, 0x00bd, 0x00b5, 0x00ae, 0x014d,
+ 0x0141, 0x0131, 0x0121, 0x0113, 0x0209, 0x017b, 0x0173, 0x000b,
+ 0x029c, 0x00b8, 0x00b7, 0x00b3, 0x00af, 0x0158, 0x014b, 0x013a,
+ 0x0130, 0x0122, 0x0115, 0x0212, 0x017f, 0x0175, 0x016e, 0x000a,
+ 0x028c, 0x015a, 0x00ab, 0x00a8, 0x00a4, 0x013e, 0x0135, 0x012b,
+ 0x011f, 0x0114, 0x0107, 0x0201, 0x0177, 0x0170, 0x016a, 0x0006,
+ 0x0288, 0x0142, 0x013c, 0x0138, 0x0133, 0x012e, 0x0124, 0x011c,
+ 0x010d, 0x0105, 0x0200, 0x0178, 0x0172, 0x016c, 0x0167, 0x0004,
+ 0x026c, 0x012c, 0x0128, 0x0126, 0x0120, 0x011a, 0x0111, 0x010a,
+ 0x0203, 0x017c, 0x0176, 0x0171, 0x016d, 0x0169, 0x0165, 0x0002,
+ 0x0409, 0x0118, 0x0116, 0x0112, 0x010b, 0x0108, 0x0103, 0x017e,
+ 0x017a, 0x0174, 0x016f, 0x016b, 0x0168, 0x0166, 0x0164, 0x0000,
+ 0x002b, 0x0014, 0x0013, 0x0011, 0x000f, 0x000d, 0x000b, 0x0009,
+ 0x0007, 0x0006, 0x0004, 0x0007, 0x0005, 0x0003, 0x0001, 0x0003,
+};
+
+static const uint8_t mpa_huffbits_24[256] = {
+  4,  4,  6,  7,  8,  9,  9, 10,
+ 10, 11, 11, 11, 11, 11, 12,  9,
+  4,  4,  5,  6,  7,  8,  8,  9,
+  9,  9, 10, 10, 10, 10, 10,  8,
+  6,  5,  6,  7,  7,  8,  8,  9,
+  9,  9,  9, 10, 10, 10, 11,  7,
+  7,  6,  7,  7,  8,  8,  8,  9,
+  9,  9,  9, 10, 10, 10, 10,  7,
+  8,  7,  7,  8,  8,  8,  8,  9,
+  9,  9, 10, 10, 10, 10, 11,  7,
+  9,  7,  8,  8,  8,  8,  9,  9,
+  9,  9, 10, 10, 10, 10, 10,  7,
+  9,  8,  8,  8,  8,  9,  9,  9,
+  9, 10, 10, 10, 10, 10, 11,  7,
+ 10,  8,  8,  8,  9,  9,  9,  9,
+ 10, 10, 10, 10, 10, 11, 11,  8,
+ 10,  9,  9,  9,  9,  9,  9,  9,
+  9, 10, 10, 10, 10, 11, 11,  8,
+ 10,  9,  9,  9,  9,  9,  9, 10,
+ 10, 10, 10, 10, 11, 11, 11,  8,
+ 11,  9,  9,  9,  9, 10, 10, 10,
+ 10, 10, 10, 11, 11, 11, 11,  8,
+ 11, 10,  9,  9,  9, 10, 10, 10,
+ 10, 10, 10, 11, 11, 11, 11,  8,
+ 11, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 11, 11, 11, 11, 11,  8,
+ 11, 10, 10, 10, 10, 10, 10, 10,
+ 11, 11, 11, 11, 11, 11, 11,  8,
+ 12, 10, 10, 10, 10, 10, 10, 11,
+ 11, 11, 11, 11, 11, 11, 11,  8,
+  8,  7,  7,  7,  7,  7,  7,  7,
+  7,  7,  7,  8,  8,  8,  8,  4,
+};
+
+static const HuffTable mpa_huff_tables[16] = {
+{ 1, NULL, NULL },
+{ 2, mpa_huffbits_1, mpa_huffcodes_1 },
+{ 3, mpa_huffbits_2, mpa_huffcodes_2 },
+{ 3, mpa_huffbits_3, mpa_huffcodes_3 },
+{ 4, mpa_huffbits_5, mpa_huffcodes_5 },
+{ 4, mpa_huffbits_6, mpa_huffcodes_6 },
+{ 6, mpa_huffbits_7, mpa_huffcodes_7 },
+{ 6, mpa_huffbits_8, mpa_huffcodes_8 },
+{ 6, mpa_huffbits_9, mpa_huffcodes_9 },
+{ 8, mpa_huffbits_10, mpa_huffcodes_10 },
+{ 8, mpa_huffbits_11, mpa_huffcodes_11 },
+{ 8, mpa_huffbits_12, mpa_huffcodes_12 },
+{ 16, mpa_huffbits_13, mpa_huffcodes_13 },
+{ 16, mpa_huffbits_15, mpa_huffcodes_15 },
+{ 16, mpa_huffbits_16, mpa_huffcodes_16 },
+{ 16, mpa_huffbits_24, mpa_huffcodes_24 },
+};
+
+static const uint8_t mpa_huff_data[32][2] = {
+{ 0, 0 },
+{ 1, 0 },
+{ 2, 0 },
+{ 3, 0 },
+{ 0, 0 },
+{ 4, 0 },
+{ 5, 0 },
+{ 6, 0 },
+{ 7, 0 },
+{ 8, 0 },
+{ 9, 0 },
+{ 10, 0 },
+{ 11, 0 },
+{ 12, 0 },
+{ 0, 0 },
+{ 13, 0 },
+{ 14, 1 },
+{ 14, 2 },
+{ 14, 3 },
+{ 14, 4 },
+{ 14, 6 },
+{ 14, 8 },
+{ 14, 10 },
+{ 14, 13 },
+{ 15, 4 },
+{ 15, 5 },
+{ 15, 6 },
+{ 15, 7 },
+{ 15, 8 },
+{ 15, 9 },
+{ 15, 11 },
+{ 15, 13 },
+};
+
+
+/* huffman tables for quadrules */
+static const uint8_t mpa_quad_codes[2][16] = {
+    {  1,  5,  4,  5,  6,  5,  4,  4, 7,  3,  6,  0,  7,  2,  3,  1, },
+    { 15, 14, 13, 12, 11, 10,  9,  8, 7,  6,  5,  4,  3,  2,  1,  0, },
+};
+
+static const uint8_t mpa_quad_bits[2][16] = {
+    { 1, 4, 4, 5, 4, 6, 5, 6, 4, 5, 5, 6, 5, 6, 6, 6, },
+    { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, },
+};
+
+/* band size tables */
+static const uint8_t band_size_long[9][22] = {
+{ 4, 4, 4, 4, 4, 4, 6, 6, 8, 8, 10,
+  12, 16, 20, 24, 28, 34, 42, 50, 54, 76, 158, }, /* 44100 */
+{ 4, 4, 4, 4, 4, 4, 6, 6, 6, 8, 10,
+  12, 16, 18, 22, 28, 34, 40, 46, 54, 54, 192, }, /* 48000 */
+{ 4, 4, 4, 4, 4, 4, 6, 6, 8, 10, 12,
+  16, 20, 24, 30, 38, 46, 56, 68, 84, 102, 26, }, /* 32000 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 22050 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  18, 22, 26, 32, 38, 46, 52, 64, 70, 76, 36, }, /* 24000 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 16000 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 11025 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 12000 */
+{ 12, 12, 12, 12, 12, 12, 16, 20, 24, 28, 32,
+  40, 48, 56, 64, 76, 90, 2, 2, 2, 2, 2, }, /* 8000 */
+};
+
+static const uint8_t band_size_short[9][13] = {
+{ 4, 4, 4, 4, 6, 8, 10, 12, 14, 18, 22, 30, 56, }, /* 44100 */
+{ 4, 4, 4, 4, 6, 6, 10, 12, 14, 16, 20, 26, 66, }, /* 48000 */
+{ 4, 4, 4, 4, 6, 8, 12, 16, 20, 26, 34, 42, 12, }, /* 32000 */
+{ 4, 4, 4, 6, 6, 8, 10, 14, 18, 26, 32, 42, 18, }, /* 22050 */
+{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 32, 44, 12, }, /* 24000 */
+{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 30, 40, 18, }, /* 16000 */
+{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 30, 40, 18, }, /* 11025 */
+{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 30, 40, 18, }, /* 12000 */
+{ 8, 8, 8, 12, 16, 20, 24, 28, 36, 2, 2, 2, 26, }, /* 8000 */
+};
+
+static const uint8_t mpa_pretab[2][22] = {
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 2, 0 },
+};
+
+/* table for alias reduction (XXX: store it as integer !) */
+static const float ci_table[8] = {
+    -0.6, -0.535, -0.33, -0.185, -0.095, -0.041, -0.0142, -0.0037,
+};
+
+#endif /* AVCODEC_MPEGAUDIODECTAB_H */
diff --git a/media/ffvpx/libavcodec/mpegaudiodsp.c b/media/ffvpx/libavcodec/mpegaudiodsp.c
new file mode 100644
index 0000000000..5a5a679d91
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodsp.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2011 Mans Rullgard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/thread.h"
+#include "mpegaudio.h"
+#include "mpegaudiodsp.h"
+#include "dct.h"
+#include "dct32.h"
+
+static AVOnce mpadsp_table_init = AV_ONCE_INIT;
+
+static av_cold void mpadsp_init_tabs(void)
+{
+    int i, j;
+    /* compute mdct windows */
+    for (i = 0; i < 36; i++) {
+        for (j = 0; j < 4; j++) {
+            double d;
+
+            if (j == 2 && i % 3 != 1)
+                continue;
+
+            d = sin(M_PI * (i + 0.5) / 36.0);
+            if (j == 1) {
+                if      (i >= 30) d = 0;
+                else if (i >= 24) d = sin(M_PI * (i - 18 + 0.5) / 12.0);
+                else if (i >= 18) d = 1;
+            } else if (j == 3) {
+                if      (i <   6) d = 0;
+                else if (i <  12) d = sin(M_PI * (i -  6 + 0.5) / 12.0);
+                else if (i <  18) d = 1;
+            }
+            //merge last stage of imdct into the window coefficients
+            d *= 0.5 * IMDCT_SCALAR / cos(M_PI * (2 * i + 19) / 72);
+
+            if (j == 2) {
+                ff_mdct_win_float[j][i/3] = d / (1 << 5);
+                ff_mdct_win_fixed[j][i/3] = d / (1 << 5) * (1LL << 32) + 0.5;
+            } else {
+                int idx = i < 18 ? i : i + (MDCT_BUF_SIZE/2 - 18);
+                ff_mdct_win_float[j][idx] = d / (1 << 5);
+                ff_mdct_win_fixed[j][idx] = d / (1 << 5) * (1LL << 32) + 0.5;
+            }
+        }
+    }
+
+    /* NOTE: we do frequency inversion after the MDCT by changing
+        the sign of the right window coefs */
+    for (j = 0; j < 4; j++) {
+        for (i = 0; i < MDCT_BUF_SIZE; i += 2) {
+            ff_mdct_win_float[j + 4][i    ] =  ff_mdct_win_float[j][i    ];
+            ff_mdct_win_float[j + 4][i + 1] = -ff_mdct_win_float[j][i + 1];
+            ff_mdct_win_fixed[j + 4][i    ] =  ff_mdct_win_fixed[j][i    ];
+            ff_mdct_win_fixed[j + 4][i + 1] = -ff_mdct_win_fixed[j][i + 1];
+        }
+    }
+
+#if ARCH_X86
+    ff_mpadsp_init_x86_tabs();
+#endif
+}
+
+av_cold void ff_mpadsp_init(MPADSPContext *s)
+{
+    DCTContext dct;
+
+    ff_dct_init(&dct, 5, DCT_II);
+    ff_thread_once(&mpadsp_table_init, &mpadsp_init_tabs);
+
+    s->apply_window_float = ff_mpadsp_apply_window_float;
+    s->apply_window_fixed = ff_mpadsp_apply_window_fixed;
+
+    s->dct32_float = dct.dct32;
+    s->dct32_fixed = ff_dct32_fixed;
+
+    s->imdct36_blocks_float = ff_imdct36_blocks_float;
+    s->imdct36_blocks_fixed = ff_imdct36_blocks_fixed;
+
+#if ARCH_AARCH64
+    ff_mpadsp_init_aarch64(s);
+#elif ARCH_ARM
+    ff_mpadsp_init_arm(s);
+#elif ARCH_PPC
+    ff_mpadsp_init_ppc(s);
+#elif ARCH_X86
+    ff_mpadsp_init_x86(s);
+#endif
+#if HAVE_MIPSFPU
+    ff_mpadsp_init_mipsfpu(s);
+#endif
+#if HAVE_MIPSDSP
+    ff_mpadsp_init_mipsdsp(s);
+#endif
+}
diff --git a/media/ffvpx/libavcodec/mpegaudiodsp.h b/media/ffvpx/libavcodec/mpegaudiodsp.h
new file mode 100644
index 0000000000..7bc635191a
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodsp.h
@@ -0,0 +1,92 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGAUDIODSP_H
+#define AVCODEC_MPEGAUDIODSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/macros.h"
+
+typedef struct MPADSPContext {
+    void (*apply_window_float)(float *synth_buf, float *window,
+                               int *dither_state, float *samples,
+                               ptrdiff_t incr);
+    void (*apply_window_fixed)(int32_t *synth_buf, int32_t *window,
+                               int *dither_state, int16_t *samples,
+                               ptrdiff_t incr);
+    void (*dct32_float)(float *dst, const float *src);
+    void (*dct32_fixed)(int *dst, const int *src);
+
+    void (*imdct36_blocks_float)(float *out, float *buf, float *in,
+                                 int count, int switch_point, int block_type);
+    void (*imdct36_blocks_fixed)(int *out, int *buf, int *in,
+                                 int count, int switch_point, int block_type);
+} MPADSPContext;
+
+void ff_mpadsp_init(MPADSPContext *s);
+
+extern int32_t ff_mpa_synth_window_fixed[];
+extern float   ff_mpa_synth_window_float[];
+
+extern const int32_t ff_mpa_enwindow[257];
+
+void ff_mpa_synth_filter_fixed(MPADSPContext *s,
+                               int32_t *synth_buf_ptr, int *synth_buf_offset,
+                               int32_t *window, int *dither_state,
+                               int16_t *samples, ptrdiff_t incr,
+                               int32_t *sb_samples);
+
+void ff_mpa_synth_filter_float(MPADSPContext *s,
+                               float *synth_buf_ptr, int *synth_buf_offset,
+                               float *window, int *dither_state,
+                               float *samples, ptrdiff_t incr,
+                               float *sb_samples);
+
+void ff_mpadsp_init_aarch64(MPADSPContext *s);
+void ff_mpadsp_init_arm(MPADSPContext *s);
+void ff_mpadsp_init_ppc(MPADSPContext *s);
+void ff_mpadsp_init_x86(MPADSPContext *s);
+void ff_mpadsp_init_x86_tabs(void);
+void ff_mpadsp_init_mipsfpu(MPADSPContext *s);
+void ff_mpadsp_init_mipsdsp(MPADSPContext *s);
+
+void ff_mpa_synth_init_float(void);
+void ff_mpa_synth_init_fixed(void);
+
+void ff_mpadsp_apply_window_float(float *synth_buf, float *window,
+                                  int *dither_state, float *samples,
+                                  ptrdiff_t incr);
+void ff_mpadsp_apply_window_fixed(int32_t *synth_buf, int32_t *window,
+                                  int *dither_state, int16_t *samples,
+                                  ptrdiff_t incr);
+
+void ff_imdct36_blocks_float(float *out, float *buf, float *in,
+                             int count, int switch_point, int block_type);
+
+void ff_imdct36_blocks_fixed(int *out, int *buf, int *in,
+                             int count, int switch_point, int block_type);
+
+/** For SSE implementation, MDCT_BUF_SIZE/2 should be 128-bit aligned */
+#define MDCT_BUF_SIZE FFALIGN(36, 2*4)
+
+extern int ff_mdct_win_fixed[8][MDCT_BUF_SIZE];
+extern float ff_mdct_win_float[8][MDCT_BUF_SIZE];
+
+#endif /* AVCODEC_MPEGAUDIODSP_H */
diff --git a/media/ffvpx/libavcodec/mpegaudiodsp_data.c b/media/ffvpx/libavcodec/mpegaudiodsp_data.c
new file mode 100644
index 0000000000..4550de9b80
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodsp_data.c
@@ -0,0 +1,56 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "mpegaudiodsp.h"
+
+/* half mpeg encoding window (full precision) */
+const int32_t ff_mpa_enwindow[257] = {
+     0,    -1,    -1,    -1,    -1,    -1,    -1,    -2,
+    -2,    -2,    -2,    -3,    -3,    -4,    -4,    -5,
+    -5,    -6,    -7,    -7,    -8,    -9,   -10,   -11,
+   -13,   -14,   -16,   -17,   -19,   -21,   -24,   -26,
+   -29,   -31,   -35,   -38,   -41,   -45,   -49,   -53,
+   -58,   -63,   -68,   -73,   -79,   -85,   -91,   -97,
+  -104,  -111,  -117,  -125,  -132,  -139,  -147,  -154,
+  -161,  -169,  -176,  -183,  -190,  -196,  -202,  -208,
+   213,   218,   222,   225,   227,   228,   228,   227,
+   224,   221,   215,   208,   200,   189,   177,   163,
+   146,   127,   106,    83,    57,    29,    -2,   -36,
+   -72,  -111,  -153,  -197,  -244,  -294,  -347,  -401,
+  -459,  -519,  -581,  -645,  -711,  -779,  -848,  -919,
+  -991, -1064, -1137, -1210, -1283, -1356, -1428, -1498,
+ -1567, -1634, -1698, -1759, -1817, -1870, -1919, -1962,
+ -2001, -2032, -2057, -2075, -2085, -2087, -2080, -2063,
+  2037,  2000,  1952,  1893,  1822,  1739,  1644,  1535,
+  1414,  1280,  1131,   970,   794,   605,   402,   185,
+   -45,  -288,  -545,  -814, -1095, -1388, -1692, -2006,
+ -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788,
+ -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597,
+ -7910, -8209, -8491, -8755, -8998, -9219, -9416, -9585,
+ -9727, -9838, -9916, -9959, -9966, -9935, -9863, -9750,
+ -9592, -9389, -9139, -8840, -8492, -8092, -7640, -7134,
+  6574,  5959,  5288,  4561,  3776,  2935,  2037,  1082,
+    70,  -998, -2122, -3300, -4533, -5818, -7154, -8540,
+ -9975,-11455,-12980,-14548,-16155,-17799,-19478,-21189,
+-22929,-24694,-26482,-28289,-30112,-31947,-33791,-35640,
+-37489,-39336,-41176,-43006,-44821,-46617,-48390,-50137,
+-51853,-53534,-55178,-56778,-58333,-59838,-61289,-62684,
+-64019,-65290,-66494,-67629,-68692,-69679,-70590,-71420,
+-72169,-72835,-73415,-73908,-74313,-74630,-74856,-74992,
+ 75038,
+};
diff --git a/media/ffvpx/libavcodec/mpegaudiodsp_fixed.c b/media/ffvpx/libavcodec/mpegaudiodsp_fixed.c
new file mode 100644
index 0000000000..83c9d66095
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodsp_fixed.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define USE_FLOATS 0
+#include "mpegaudiodsp_template.c"
diff --git a/media/ffvpx/libavcodec/mpegaudiodsp_float.c b/media/ffvpx/libavcodec/mpegaudiodsp_float.c
new file mode 100644
index 0000000000..c45b136089
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodsp_float.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define USE_FLOATS 1
+#include "mpegaudiodsp_template.c"
diff --git a/media/ffvpx/libavcodec/mpegaudiodsp_template.c b/media/ffvpx/libavcodec/mpegaudiodsp_template.c
new file mode 100644
index 0000000000..fbbd94e486
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiodsp_template.c
@@ -0,0 +1,372 @@
+/*
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/thread.h"
+
+#include "dct32.h"
+#include "mathops.h"
+#include "mpegaudiodsp.h"
+#include "mpegaudio.h"
+
+#if USE_FLOATS
+#define RENAME(n) n##_float
+
+static inline float round_sample(float *sum)
+{
+    float sum1=*sum;
+    *sum = 0;
+    return sum1;
+}
+
+#define MACS(rt, ra, rb) rt+=(ra)*(rb)
+#define MULS(ra, rb) ((ra)*(rb))
+#define MULH3(x, y, s) ((s)*(y)*(x))
+#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
+#define MULLx(x, y, s) ((y)*(x))
+#define FIXHR(x)        ((float)(x))
+#define FIXR(x)        ((float)(x))
+#define SHR(a,b)       ((a)*(1.0f/(1<<(b))))
+
+#else
+
+#define RENAME(n) n##_fixed
+#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
+
+static inline int round_sample(int64_t *sum)
+{
+    int sum1;
+    sum1 = (int)((*sum) >> OUT_SHIFT);
+    *sum &= (1<<OUT_SHIFT)-1;
+    return av_clip_int16(sum1);
+}
+
+#   define MULS(ra, rb) MUL64(ra, rb)
+#   define MACS(rt, ra, rb) MAC64(rt, ra, rb)
+#   define MLSS(rt, ra, rb) MLS64(rt, ra, rb)
+#   define MULH3(x, y, s) MULH((s)*(x), y)
+#   define MULLx(x, y, s) MULL((int)(x),(y),s)
+#   define SHR(a,b)       (((int)(a))>>(b))
+#   define FIXR(a)        ((int)((a) * FRAC_ONE + 0.5))
+#   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
+#endif
+
+/** Window for MDCT. Actually only the elements in [0,17] and
+    [MDCT_BUF_SIZE/2, MDCT_BUF_SIZE/2 + 17] are actually used. The rest
+    is just to preserve alignment for SIMD implementations.
+*/
+DECLARE_ALIGNED(16, INTFLOAT, RENAME(ff_mdct_win))[8][MDCT_BUF_SIZE];
+
+DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256];
+
+#define SUM8(op, sum, w, p)               \
+{                                         \
+    op(sum, (w)[0 * 64], (p)[0 * 64]);    \
+    op(sum, (w)[1 * 64], (p)[1 * 64]);    \
+    op(sum, (w)[2 * 64], (p)[2 * 64]);    \
+    op(sum, (w)[3 * 64], (p)[3 * 64]);    \
+    op(sum, (w)[4 * 64], (p)[4 * 64]);    \
+    op(sum, (w)[5 * 64], (p)[5 * 64]);    \
+    op(sum, (w)[6 * 64], (p)[6 * 64]);    \
+    op(sum, (w)[7 * 64], (p)[7 * 64]);    \
+}
+
+#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
+{                                               \
+    INTFLOAT tmp;\
+    tmp = p[0 * 64];\
+    op1(sum1, (w1)[0 * 64], tmp);\
+    op2(sum2, (w2)[0 * 64], tmp);\
+    tmp = p[1 * 64];\
+    op1(sum1, (w1)[1 * 64], tmp);\
+    op2(sum2, (w2)[1 * 64], tmp);\
+    tmp = p[2 * 64];\
+    op1(sum1, (w1)[2 * 64], tmp);\
+    op2(sum2, (w2)[2 * 64], tmp);\
+    tmp = p[3 * 64];\
+    op1(sum1, (w1)[3 * 64], tmp);\
+    op2(sum2, (w2)[3 * 64], tmp);\
+    tmp = p[4 * 64];\
+    op1(sum1, (w1)[4 * 64], tmp);\
+    op2(sum2, (w2)[4 * 64], tmp);\
+    tmp = p[5 * 64];\
+    op1(sum1, (w1)[5 * 64], tmp);\
+    op2(sum2, (w2)[5 * 64], tmp);\
+    tmp = p[6 * 64];\
+    op1(sum1, (w1)[6 * 64], tmp);\
+    op2(sum2, (w2)[6 * 64], tmp);\
+    tmp = p[7 * 64];\
+    op1(sum1, (w1)[7 * 64], tmp);\
+    op2(sum2, (w2)[7 * 64], tmp);\
+}
+
+void RENAME(ff_mpadsp_apply_window)(MPA_INT *synth_buf, MPA_INT *window,
+                                  int *dither_state, OUT_INT *samples,
+                                  ptrdiff_t incr)
+{
+    register const MPA_INT *w, *w2, *p;
+    int j;
+    OUT_INT *samples2;
+#if USE_FLOATS
+    float sum, sum2;
+#else
+    int64_t sum, sum2;
+#endif
+
+    /* copy to avoid wrap */
+    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf));
+
+    samples2 = samples + 31 * incr;
+    w = window;
+    w2 = window + 31;
+
+    sum = *dither_state;
+    p = synth_buf + 16;
+    SUM8(MACS, sum, w, p);
+    p = synth_buf + 48;
+    SUM8(MLSS, sum, w + 32, p);
+    *samples = round_sample(&sum);
+    samples += incr;
+    w++;
+
+    /* we calculate two samples at the same time to avoid one memory
+       access per two sample */
+    for(j=1;j<16;j++) {
+        sum2 = 0;
+        p = synth_buf + 16 + j;
+        SUM8P2(sum, MACS, sum2, MLSS, w, w2, p);
+        p = synth_buf + 48 - j;
+        SUM8P2(sum, MLSS, sum2, MLSS, w + 32, w2 + 32, p);
+
+        *samples = round_sample(&sum);
+        samples += incr;
+        sum += sum2;
+        *samples2 = round_sample(&sum);
+        samples2 -= incr;
+        w++;
+        w2--;
+    }
+
+    p = synth_buf + 32;
+    SUM8(MLSS, sum, w + 32, p);
+    *samples = round_sample(&sum);
+    *dither_state= sum;
+}
+
+/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
+   32 samples. */
+void RENAME(ff_mpa_synth_filter)(MPADSPContext *s, MPA_INT *synth_buf_ptr,
+                                 int *synth_buf_offset,
+                                 MPA_INT *window, int *dither_state,
+                                 OUT_INT *samples, ptrdiff_t incr,
+                                 MPA_INT *sb_samples)
+{
+    MPA_INT *synth_buf;
+    int offset;
+
+    offset = *synth_buf_offset;
+    synth_buf = synth_buf_ptr + offset;
+
+    s->RENAME(dct32)(synth_buf, sb_samples);
+    s->RENAME(apply_window)(synth_buf, window, dither_state, samples, incr);
+
+    offset = (offset - 32) & 511;
+    *synth_buf_offset = offset;
+}
+
+static av_cold void mpa_synth_init(MPA_INT *window)
+{
+    int i, j;
+
+    /* max = 18760, max sum over all 16 coefs : 44736 */
+    for(i=0;i<257;i++) {
+        INTFLOAT v;
+        v = ff_mpa_enwindow[i];
+#if USE_FLOATS
+        v *= 1.0 / (1LL<<(16 + FRAC_BITS));
+#endif
+        window[i] = v;
+        if ((i & 63) != 0)
+            v = -v;
+        if (i != 0)
+            window[512 - i] = v;
+    }
+
+
+    // Needed for avoiding shuffles in ASM implementations
+    for(i=0; i < 8; i++)
+        for(j=0; j < 16; j++)
+            window[512+16*i+j] = window[64*i+32-j];
+
+    for(i=0; i < 8; i++)
+        for(j=0; j < 16; j++)
+            window[512+128+16*i+j] = window[64*i+48-j];
+}
+
+static av_cold void mpa_synth_window_init(void)
+{
+    mpa_synth_init(RENAME(ff_mpa_synth_window));
+}
+
+av_cold void RENAME(ff_mpa_synth_init)(void)
+{
+    static AVOnce init_static_once = AV_ONCE_INIT;
+    ff_thread_once(&init_static_once, mpa_synth_window_init);
+}
+
+/* cos(pi*i/18) */
+#define C1 FIXHR(0.98480775301220805936/2)
+#define C2 FIXHR(0.93969262078590838405/2)
+#define C3 FIXHR(0.86602540378443864676/2)
+#define C4 FIXHR(0.76604444311897803520/2)
+#define C5 FIXHR(0.64278760968653932632/2)
+#define C6 FIXHR(0.5/2)
+#define C7 FIXHR(0.34202014332566873304/2)
+#define C8 FIXHR(0.17364817766693034885/2)
+
+/* 0.5 / cos(pi*(2*i+1)/36) */
+static const INTFLOAT icos36[9] = {
+    FIXR(0.50190991877167369479),
+    FIXR(0.51763809020504152469), //0
+    FIXR(0.55168895948124587824),
+    FIXR(0.61038729438072803416),
+    FIXR(0.70710678118654752439), //1
+    FIXR(0.87172339781054900991),
+    FIXR(1.18310079157624925896),
+    FIXR(1.93185165257813657349), //2
+    FIXR(5.73685662283492756461),
+};
+
+/* 0.5 / cos(pi*(2*i+1)/36) */
+static const INTFLOAT icos36h[9] = {
+    FIXHR(0.50190991877167369479/2),
+    FIXHR(0.51763809020504152469/2), //0
+    FIXHR(0.55168895948124587824/2),
+    FIXHR(0.61038729438072803416/2),
+    FIXHR(0.70710678118654752439/2), //1
+    FIXHR(0.87172339781054900991/2),
+    FIXHR(1.18310079157624925896/4),
+    FIXHR(1.93185165257813657349/4), //2
+//    FIXHR(5.73685662283492756461),
+};
+
+/* using Lee like decomposition followed by hand coded 9 points DCT */
+static void imdct36(INTFLOAT *out, INTFLOAT *buf, SUINTFLOAT *in, INTFLOAT *win)
+{
+    int i, j;
+    SUINTFLOAT t0, t1, t2, t3, s0, s1, s2, s3;
+    SUINTFLOAT tmp[18], *tmp1, *in1;
+
+    for (i = 17; i >= 1; i--)
+        in[i] += in[i-1];
+    for (i = 17; i >= 3; i -= 2)
+        in[i] += in[i-2];
+
+    for (j = 0; j < 2; j++) {
+        tmp1 = tmp + j;
+        in1 = in + j;
+
+        t2 = in1[2*4] + in1[2*8] - in1[2*2];
+
+        t3 = in1[2*0] + SHR(in1[2*6],1);
+        t1 = in1[2*0] - in1[2*6];
+        tmp1[ 6] = t1 - SHR(t2,1);
+        tmp1[16] = t1 + t2;
+
+        t0 = MULH3(in1[2*2] + in1[2*4] ,    C2, 2);
+        t1 = MULH3(in1[2*4] - in1[2*8] , -2*C8, 1);
+        t2 = MULH3(in1[2*2] + in1[2*8] ,   -C4, 2);
+
+        tmp1[10] = t3 - t0 - t2;
+        tmp1[ 2] = t3 + t0 + t1;
+        tmp1[14] = t3 + t2 - t1;
+
+        tmp1[ 4] = MULH3(in1[2*5] + in1[2*7] - in1[2*1], -C3, 2);
+        t2 = MULH3(in1[2*1] + in1[2*5],    C1, 2);
+        t3 = MULH3(in1[2*5] - in1[2*7], -2*C7, 1);
+        t0 = MULH3(in1[2*3], C3, 2);
+
+        t1 = MULH3(in1[2*1] + in1[2*7],   -C5, 2);
+
+        tmp1[ 0] = t2 + t3 + t0;
+        tmp1[12] = t2 + t1 - t0;
+        tmp1[ 8] = t3 - t1 - t0;
+    }
+
+    i = 0;
+    for (j = 0; j < 4; j++) {
+        t0 = tmp[i];
+        t1 = tmp[i + 2];
+        s0 = t1 + t0;
+        s2 = t1 - t0;
+
+        t2 = tmp[i + 1];
+        t3 = tmp[i + 3];
+        s1 = MULH3(t3 + t2, icos36h[    j], 2);
+        s3 = MULLx(t3 - t2, icos36 [8 - j], FRAC_BITS);
+
+        t0 = s0 + s1;
+        t1 = s0 - s1;
+        out[(9 + j) * SBLIMIT] = MULH3(t1, win[     9 + j], 1) + buf[4*(9 + j)];
+        out[(8 - j) * SBLIMIT] = MULH3(t1, win[     8 - j], 1) + buf[4*(8 - j)];
+        buf[4 * ( 9 + j     )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + j], 1);
+        buf[4 * ( 8 - j     )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 8 - j], 1);
+
+        t0 = s2 + s3;
+        t1 = s2 - s3;
+        out[(9 + 8 - j) * SBLIMIT] = MULH3(t1, win[     9 + 8 - j], 1) + buf[4*(9 + 8 - j)];
+        out[         j  * SBLIMIT] = MULH3(t1, win[             j], 1) + buf[4*(        j)];
+        buf[4 * ( 9 + 8 - j     )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + 8 - j], 1);
+        buf[4 * (         j     )] = MULH3(t0, win[MDCT_BUF_SIZE/2         + j], 1);
+        i += 4;
+    }
+
+    s0 = tmp[16];
+    s1 = MULH3(tmp[17], icos36h[4], 2);
+    t0 = s0 + s1;
+    t1 = s0 - s1;
+    out[(9 + 4) * SBLIMIT] = MULH3(t1, win[     9 + 4], 1) + buf[4*(9 + 4)];
+    out[(8 - 4) * SBLIMIT] = MULH3(t1, win[     8 - 4], 1) + buf[4*(8 - 4)];
+    buf[4 * ( 9 + 4     )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + 4], 1);
+    buf[4 * ( 8 - 4     )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 8 - 4], 1);
+}
+
+void RENAME(ff_imdct36_blocks)(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in,
+                               int count, int switch_point, int block_type)
+{
+    int j;
+    for (j=0 ; j < count; j++) {
+        /* apply window & overlap with previous buffer */
+
+        /* select window */
+        int win_idx = (switch_point && j < 2) ? 0 : block_type;
+        INTFLOAT *win = RENAME(ff_mdct_win)[win_idx + (4 & -(j & 1))];
+
+        imdct36(out, buf, in, win);
+
+        in  += 18;
+        buf += ((j&3) != 3 ? 1 : (72-3));
+        out++;
+    }
+}
+
diff --git a/media/ffvpx/libavcodec/mpegaudiotab.h b/media/ffvpx/libavcodec/mpegaudiotab.h
new file mode 100644
index 0000000000..bb2e5de4ea
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiotab.h
@@ -0,0 +1,102 @@
+/*
+ * mpeg audio layer 2 tables. Most of them come from the mpeg audio
+ * specification.
+ *
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * mpeg audio layer 2 tables.
+ * Most of them come from the mpeg audio specification.
+ */
+
+#ifndef AVCODEC_MPEGAUDIOTAB_H
+#define AVCODEC_MPEGAUDIOTAB_H
+
+#include <stdint.h>
+#include "mpegaudio.h"
+
+static const int costab32[30] = {
+    FIX(0.54119610014619701222),
+    FIX(1.3065629648763763537),
+
+    FIX(0.50979557910415917998),
+    FIX(2.5629154477415054814),
+    FIX(0.89997622313641556513),
+    FIX(0.60134488693504528634),
+
+    FIX(0.5024192861881556782),
+    FIX(5.1011486186891552563),
+    FIX(0.78815462345125020249),
+    FIX(0.64682178335999007679),
+    FIX(0.56694403481635768927),
+    FIX(1.0606776859903470633),
+    FIX(1.7224470982383341955),
+    FIX(0.52249861493968885462),
+
+    FIX(10.19000812354803287),
+    FIX(0.674808341455005678),
+    FIX(1.1694399334328846596),
+    FIX(0.53104259108978413284),
+    FIX(2.0577810099534108446),
+    FIX(0.58293496820613388554),
+    FIX(0.83934964541552681272),
+    FIX(0.50547095989754364798),
+    FIX(3.4076084184687189804),
+    FIX(0.62250412303566482475),
+    FIX(0.97256823786196078263),
+    FIX(0.51544730992262455249),
+    FIX(1.4841646163141661852),
+    FIX(0.5531038960344445421),
+    FIX(0.74453627100229857749),
+    FIX(0.5006029982351962726),
+};
+
+static const int bitinv32[32] = {
+    0,  16,  8, 24,  4,  20,  12,  28,
+    2,  18, 10, 26,  6,  22,  14,  30,
+    1,  17,  9, 25,  5,  21,  13,  29,
+    3,  19, 11, 27,  7,  23,  15,  31
+};
+
+
+/* signal to noise ratio of each quantification step (could be
+   computed from quant_steps[]). The values are dB multiplied by 10
+*/
+static const unsigned short quant_snr[17] = {
+     70, 110, 160, 208,
+    253, 316, 378, 439,
+    499, 559, 620, 680,
+    740, 800, 861, 920,
+    980
+};
+
+/* fixed psycho acoustic model. Values of SNR taken from the 'toolame'
+   project */
+static const float fixed_smr[SBLIMIT] =  {
+    30, 17, 16, 10, 3, 12, 8, 2.5,
+    5, 5, 6, 6, 5, 6, 10, 6,
+    -4, -10, -21, -30, -42, -55, -68, -75,
+    -75, -75, -75, -75, -91, -107, -110, -108
+};
+
+static const unsigned char nb_scale_factors[4] = { 3, 2, 1, 2 };
+
+#endif /* AVCODEC_MPEGAUDIOTAB_H */
diff --git a/media/ffvpx/libavcodec/mpegaudiotabs.c b/media/ffvpx/libavcodec/mpegaudiotabs.c
new file mode 100644
index 0000000000..eaa380c808
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiotabs.c
@@ -0,0 +1,22 @@
+/*
+ * MPEG Audio common tables
+ * copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "mpegaudiotabs.h"
diff --git a/media/ffvpx/libavcodec/mpegaudiotabs.h b/media/ffvpx/libavcodec/mpegaudiotabs.h
new file mode 100644
index 0000000000..671b83848d
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegaudiotabs.h
@@ -0,0 +1,39 @@
+/*
+ * MPEG Audio common tables
+ * copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGAUDIOTABS_H
+#define AVCODEC_MPEGAUDIOTABS_H
+
+#include <stdint.h>
+
+const uint16_t ff_mpa_bitrate_tab[2][3][15] = {
+    { { 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448 },
+      { 0, 32, 48, 56,  64,  80,  96, 112, 128, 160, 192, 224, 256, 320, 384 },
+      { 0, 32, 40, 48,  56,  64,  80,  96, 112, 128, 160, 192, 224, 256, 320 } },
+    { { 0, 32, 48, 56,  64,  80,  96, 112, 128, 144, 160, 176, 192, 224, 256 },
+      { 0,  8, 16, 24,  32,  40,  48,  56,  64,  80,  96, 112, 128, 144, 160 },
+      { 0,  8, 16, 24,  32,  40,  48,  56,  64,  80,  96, 112, 128, 144, 160 }
+    }
+};
+
+const uint16_t ff_mpa_freq_tab[3] = { 44100, 48000, 32000 };
+
+#endif
diff --git a/media/ffvpx/libavcodec/mpegpicture.h b/media/ffvpx/libavcodec/mpegpicture.h
new file mode 100644
index 0000000000..7919aa402c
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegpicture.h
@@ -0,0 +1,105 @@
+/*
+ * Mpeg video formats-related defines and utility functions
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGPICTURE_H
+#define AVCODEC_MPEGPICTURE_H
+
+#include <stdint.h>
+
+#include "libavutil/frame.h"
+
+#include "avcodec.h"
+#include "motion_est.h"
+#include "threadframe.h"
+
+#define MPEGVIDEO_MAX_PLANES 4
+#define MAX_PICTURE_COUNT 36
+#define EDGE_WIDTH 16
+
+typedef struct ScratchpadContext {
+    uint8_t *edge_emu_buffer;     ///< temporary buffer for if MVs point to out-of-frame data
+    uint8_t *rd_scratchpad;       ///< scratchpad for rate distortion mb decision
+    uint8_t *obmc_scratchpad;
+    uint8_t *b_scratchpad;        ///< scratchpad used for writing into write only buffers
+} ScratchpadContext;
+
+/**
+ * Picture.
+ */
+typedef struct Picture {
+    struct AVFrame *f;
+    ThreadFrame tf;
+
+    AVBufferRef *qscale_table_buf;
+    int8_t *qscale_table;
+
+    AVBufferRef *motion_val_buf[2];
+    int16_t (*motion_val[2])[2];
+
+    AVBufferRef *mb_type_buf;
+    uint32_t *mb_type;          ///< types and macros are defined in mpegutils.h
+
+    AVBufferRef *mbskip_table_buf;
+    uint8_t *mbskip_table;
+
+    AVBufferRef *ref_index_buf[2];
+    int8_t *ref_index[2];
+
+    int alloc_mb_width;         ///< mb_width used to allocate tables
+    int alloc_mb_height;        ///< mb_height used to allocate tables
+    int alloc_mb_stride;        ///< mb_stride used to allocate tables
+
+    AVBufferRef *hwaccel_priv_buf;
+    void *hwaccel_picture_private; ///< Hardware accelerator private data
+
+    int field_picture;          ///< whether or not the picture was encoded in separate fields
+
+    int b_frame_score;
+    int needs_realloc;          ///< Picture needs to be reallocated (eg due to a frame size change)
+
+    int reference;
+    int shared;
+
+    int display_picture_number;
+    int coded_picture_number;
+} Picture;
+
+/**
+ * Allocate a Picture.
+ * The pixels are allocated/set by calling get_buffer() if shared = 0.
+ */
+int ff_alloc_picture(AVCodecContext *avctx, Picture *pic, MotionEstContext *me,
+                     ScratchpadContext *sc, int shared, int encoding,
+                     int chroma_x_shift, int chroma_y_shift, int out_format,
+                     int mb_stride, int mb_width, int mb_height, int b8_stride,
+                     ptrdiff_t *linesize, ptrdiff_t *uvlinesize);
+
+int ff_mpeg_framesize_alloc(AVCodecContext *avctx, MotionEstContext *me,
+                            ScratchpadContext *sc, int linesize);
+
+int ff_mpeg_ref_picture(AVCodecContext *avctx, Picture *dst, Picture *src);
+void ff_mpeg_unref_picture(AVCodecContext *avctx, Picture *picture);
+
+void ff_mpv_picture_free(AVCodecContext *avctx, Picture *pic);
+int ff_update_picture_tables(Picture *dst, const Picture *src);
+
+int ff_find_unused_picture(AVCodecContext *avctx, Picture *picture, int shared);
+
+#endif /* AVCODEC_MPEGPICTURE_H */
diff --git a/media/ffvpx/libavcodec/mpegutils.h b/media/ffvpx/libavcodec/mpegutils.h
new file mode 100644
index 0000000000..386110bb8c
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegutils.h
@@ -0,0 +1,142 @@
+/*
+ * Mpeg video formats-related defines and utility functions
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGUTILS_H
+#define AVCODEC_MPEGUTILS_H
+
+#include <stdint.h>
+
+#include "libavutil/frame.h"
+
+#include "avcodec.h"
+
+/**
+ * Return value for header parsers if frame is not coded.
+ * */
+#define FRAME_SKIPPED 100
+
+/* picture type */
+#define PICT_TOP_FIELD     1
+#define PICT_BOTTOM_FIELD  2
+#define PICT_FRAME         3
+
+#define MAX_MB_BYTES    (30 * 16 * 16 * 3 / 8 + 120)
+#define MAX_FCODE        7
+
+/* MB types */
+#define MB_TYPE_INTRA4x4   (1 <<  0)
+#define MB_TYPE_INTRA16x16 (1 <<  1) // FIXME H.264-specific
+#define MB_TYPE_INTRA_PCM  (1 <<  2) // FIXME H.264-specific
+#define MB_TYPE_16x16      (1 <<  3)
+#define MB_TYPE_16x8       (1 <<  4)
+#define MB_TYPE_8x16       (1 <<  5)
+#define MB_TYPE_8x8        (1 <<  6)
+#define MB_TYPE_INTERLACED (1 <<  7)
+#define MB_TYPE_DIRECT2    (1 <<  8) // FIXME
+#define MB_TYPE_ACPRED     (1 <<  9)
+#define MB_TYPE_GMC        (1 << 10)
+#define MB_TYPE_SKIP       (1 << 11)
+#define MB_TYPE_P0L0       (1 << 12)
+#define MB_TYPE_P1L0       (1 << 13)
+#define MB_TYPE_P0L1       (1 << 14)
+#define MB_TYPE_P1L1       (1 << 15)
+#define MB_TYPE_L0         (MB_TYPE_P0L0 | MB_TYPE_P1L0)
+#define MB_TYPE_L1         (MB_TYPE_P0L1 | MB_TYPE_P1L1)
+#define MB_TYPE_L0L1       (MB_TYPE_L0   | MB_TYPE_L1)
+#define MB_TYPE_QUANT      (1 << 16)
+#define MB_TYPE_CBP        (1 << 17)
+
+#define MB_TYPE_INTRA    MB_TYPE_INTRA4x4 // default mb_type if there is just one type
+
+#define IS_INTRA4x4(a)   ((a) & MB_TYPE_INTRA4x4)
+#define IS_INTRA16x16(a) ((a) & MB_TYPE_INTRA16x16)
+#define IS_PCM(a)        ((a) & MB_TYPE_INTRA_PCM)
+#define IS_INTRA(a)      ((a) & 7)
+#define IS_INTER(a)      ((a) & (MB_TYPE_16x16 | MB_TYPE_16x8 | \
+                                 MB_TYPE_8x16  | MB_TYPE_8x8))
+#define IS_SKIP(a)       ((a) & MB_TYPE_SKIP)
+#define IS_INTRA_PCM(a)  ((a) & MB_TYPE_INTRA_PCM)
+#define IS_INTERLACED(a) ((a) & MB_TYPE_INTERLACED)
+#define IS_DIRECT(a)     ((a) & MB_TYPE_DIRECT2)
+#define IS_GMC(a)        ((a) & MB_TYPE_GMC)
+#define IS_16X16(a)      ((a) & MB_TYPE_16x16)
+#define IS_16X8(a)       ((a) & MB_TYPE_16x8)
+#define IS_8X16(a)       ((a) & MB_TYPE_8x16)
+#define IS_8X8(a)        ((a) & MB_TYPE_8x8)
+#define IS_SUB_8X8(a)    ((a) & MB_TYPE_16x16) // note reused
+#define IS_SUB_8X4(a)    ((a) & MB_TYPE_16x8)  // note reused
+#define IS_SUB_4X8(a)    ((a) & MB_TYPE_8x16)  // note reused
+#define IS_SUB_4X4(a)    ((a) & MB_TYPE_8x8)   // note reused
+#define IS_ACPRED(a)     ((a) & MB_TYPE_ACPRED)
+#define IS_QUANT(a)      ((a) & MB_TYPE_QUANT)
+#define IS_DIR(a, part, list) ((a) & (MB_TYPE_P0L0 << ((part) + 2 * (list))))
+
+// does this mb use listX, note does not work if subMBs
+#define USES_LIST(a, list) ((a) & ((MB_TYPE_P0L0 | MB_TYPE_P1L0) << (2 * (list))))
+
+#define HAS_CBP(a)       ((a) & MB_TYPE_CBP)
+
+/* MB types for encoding */
+#define CANDIDATE_MB_TYPE_INTRA      (1 <<  0)
+#define CANDIDATE_MB_TYPE_INTER      (1 <<  1)
+#define CANDIDATE_MB_TYPE_INTER4V    (1 <<  2)
+#define CANDIDATE_MB_TYPE_SKIPPED    (1 <<  3)
+
+#define CANDIDATE_MB_TYPE_DIRECT     (1 <<  4)
+#define CANDIDATE_MB_TYPE_FORWARD    (1 <<  5)
+#define CANDIDATE_MB_TYPE_BACKWARD   (1 <<  6)
+#define CANDIDATE_MB_TYPE_BIDIR      (1 <<  7)
+
+#define CANDIDATE_MB_TYPE_INTER_I    (1 <<  8)
+#define CANDIDATE_MB_TYPE_FORWARD_I  (1 <<  9)
+#define CANDIDATE_MB_TYPE_BACKWARD_I (1 << 10)
+#define CANDIDATE_MB_TYPE_BIDIR_I    (1 << 11)
+
+#define CANDIDATE_MB_TYPE_DIRECT0    (1 << 12)
+
+#define INPLACE_OFFSET 16
+
+enum OutputFormat {
+    FMT_MPEG1,
+    FMT_H261,
+    FMT_H263,
+    FMT_MJPEG,
+    FMT_SPEEDHQ,
+};
+
+
+/**
+ * Draw a horizontal band if supported.
+ *
+ * @param h is the normal height, this will be reduced automatically if needed
+ */
+void ff_draw_horiz_band(AVCodecContext *avctx, const AVFrame *cur, const AVFrame *last,
+                        int y, int h, int picture_structure, int first_field,
+                        int low_delay);
+
+/**
+ * Print debugging info for the given picture.
+ */
+void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict,
+                          const uint8_t *mbskip_table, const uint32_t *mbtype_table,
+                          const int8_t *qscale_table, int16_t (*const motion_val[2])[2],
+                          int mb_width, int mb_height, int mb_stride, int quarter_sample);
+
+#endif /* AVCODEC_MPEGUTILS_H */
diff --git a/media/ffvpx/libavcodec/mpegvideo.h b/media/ffvpx/libavcodec/mpegvideo.h
new file mode 100644
index 0000000000..55828e6102
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegvideo.h
@@ -0,0 +1,612 @@
+/*
+ * Generic DCT based hybrid video encoder
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * mpegvideo header.
+ */
+
+#ifndef AVCODEC_MPEGVIDEO_H
+#define AVCODEC_MPEGVIDEO_H
+
+#include "avcodec.h"
+#include "blockdsp.h"
+#include "error_resilience.h"
+#include "fdctdsp.h"
+#include "get_bits.h"
+#include "h264chroma.h"
+#include "h263dsp.h"
+#include "hpeldsp.h"
+#include "idctdsp.h"
+#include "me_cmp.h"
+#include "motion_est.h"
+#include "mpegpicture.h"
+#include "mpegvideoencdsp.h"
+#include "pixblockdsp.h"
+#include "put_bits.h"
+#include "ratecontrol.h"
+#include "mpegutils.h"
+#include "qpeldsp.h"
+#include "videodsp.h"
+
+#define MAX_THREADS 32
+
+#define MAX_B_FRAMES 16
+
+/**
+ * Scantable.
+ */
+typedef struct ScanTable {
+    const uint8_t *scantable;
+    uint8_t permutated[64];
+    uint8_t raster_end[64];
+} ScanTable;
+
+/**
+ * MpegEncContext.
+ */
+typedef struct MpegEncContext {
+    AVClass *class;
+
+    int y_dc_scale, c_dc_scale;
+    int ac_pred;
+    int block_last_index[12];  ///< last non zero coefficient in block
+    int h263_aic;              ///< Advanced INTRA Coding (AIC)
+
+    /* scantables */
+    ScanTable inter_scantable; ///< if inter == intra then intra should be used to reduce the cache usage
+
+    /* WARNING: changes above this line require updates to hardcoded
+     *          offsets used in ASM. */
+
+    ScanTable intra_scantable;
+    uint8_t permutated_intra_h_scantable[64];
+    uint8_t permutated_intra_v_scantable[64];
+
+    struct AVCodecContext *avctx;
+    /* The following pointer is intended for codecs sharing code
+     * between decoder and encoder and in need of a common context to do so. */
+    void *private_ctx;
+    /* the following parameters must be initialized before encoding */
+    int width, height;///< picture size. must be a multiple of 16
+    int gop_size;
+    int intra_only;   ///< if true, only intra pictures are generated
+    int64_t bit_rate; ///< wanted bit rate
+    enum OutputFormat out_format; ///< output format
+    int h263_pred;    ///< use MPEG-4/H.263 ac/dc predictions
+    int pb_frame;     ///< PB-frame mode (0 = none, 1 = base, 2 = improved)
+
+/* the following codec id fields are deprecated in favor of codec_id */
+    int h263_plus;    ///< H.263+ headers
+    int h263_flv;     ///< use flv H.263 header
+
+    enum AVCodecID codec_id;     /* see AV_CODEC_ID_xxx */
+    int fixed_qscale; ///< fixed qscale if non zero
+    int encoding;     ///< true if we are encoding (vs decoding)
+    int max_b_frames; ///< max number of B-frames for encoding
+    int luma_elim_threshold;
+    int chroma_elim_threshold;
+    int workaround_bugs;       ///< workaround bugs in encoders which cannot be detected automatically
+    int codec_tag;             ///< internal codec_tag upper case converted from avctx codec_tag
+    /* the following fields are managed internally by the encoder */
+
+    /* sequence parameters */
+    int context_initialized;
+    int input_picture_number;  ///< used to set pic->display_picture_number, should not be used for/by anything else
+    int coded_picture_number;  ///< used to set pic->coded_picture_number, should not be used for/by anything else
+    int picture_number;       //FIXME remove, unclear definition
+    int extradata_parsed;
+    int picture_in_gop_number; ///< 0-> first pic in gop, ...
+    int mb_width, mb_height;   ///< number of MBs horizontally & vertically
+    int mb_stride;             ///< mb_width+1 used for some arrays to allow simple addressing of left & top MBs without sig11
+    int b8_stride;             ///< 2*mb_width+1 used for some 8x8 block arrays to allow simple addressing
+    int h_edge_pos, v_edge_pos;///< horizontal / vertical position of the right/bottom edge (pixel replication)
+    int mb_num;                ///< number of MBs of a picture
+    ptrdiff_t linesize;        ///< line size, in bytes, may be different from width
+    ptrdiff_t uvlinesize;      ///< line size, for chroma in bytes, may be different from width
+    Picture *picture;          ///< main picture buffer
+    Picture **input_picture;   ///< next pictures on display order for encoding
+    Picture **reordered_input_picture; ///< pointer to the next pictures in coded order for encoding
+
+    int64_t user_specified_pts; ///< last non-zero pts from AVFrame which was passed into avcodec_send_frame()
+    /**
+     * pts difference between the first and second input frame, used for
+     * calculating dts of the first frame when there's a delay */
+    int64_t dts_delta;
+    /**
+     * reordered pts to be used as dts for the next output frame when there's
+     * a delay */
+    int64_t reordered_pts;
+
+    /** bit output */
+    PutBitContext pb;
+
+    int start_mb_y;            ///< start mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
+    int end_mb_y;              ///< end   mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
+    struct MpegEncContext *thread_context[MAX_THREADS];
+    int slice_context_count;   ///< number of used thread_contexts
+
+    /**
+     * copy of the previous picture structure.
+     * note, linesize & data, might not match the previous picture (for field pictures)
+     */
+    Picture last_picture;
+
+    /**
+     * copy of the next picture structure.
+     * note, linesize & data, might not match the next picture (for field pictures)
+     */
+    Picture next_picture;
+
+    /**
+     * Reference to the source picture for encoding.
+     * note, linesize & data, might not match the source picture (for field pictures)
+     */
+    AVFrame *new_picture;
+
+    /**
+     * copy of the current picture structure.
+     * note, linesize & data, might not match the current picture (for field pictures)
+     */
+    Picture current_picture;    ///< buffer to store the decompressed current picture
+
+    Picture *last_picture_ptr;     ///< pointer to the previous picture.
+    Picture *next_picture_ptr;     ///< pointer to the next picture (for bidir pred)
+    Picture *current_picture_ptr;  ///< pointer to the current picture
+    int skipped_last_frame;
+    int last_dc[3];                ///< last DC values for MPEG-1
+    int16_t *dc_val_base;
+    int16_t *dc_val[3];            ///< used for MPEG-4 DC prediction, all 3 arrays must be continuous
+    const uint8_t *y_dc_scale_table;     ///< qscale -> y_dc_scale table
+    const uint8_t *c_dc_scale_table;     ///< qscale -> c_dc_scale table
+    const uint8_t *chroma_qscale_table;  ///< qscale -> chroma_qscale (H.263)
+    uint8_t *coded_block_base;
+    uint8_t *coded_block;          ///< used for coded block pattern prediction (msmpeg4v3, wmv1)
+    int16_t (*ac_val_base)[16];
+    int16_t (*ac_val[3])[16];      ///< used for MPEG-4 AC prediction, all 3 arrays must be continuous
+    int mb_skipped;                ///< MUST BE SET only during DECODING
+    uint8_t *mbskip_table;        /**< used to avoid copy if macroblock skipped (for black regions for example)
+                                   and used for B-frame encoding & decoding (contains skip table of next P-frame) */
+    uint8_t *mbintra_table;       ///< used to avoid setting {ac, dc, cbp}-pred stuff to zero on inter MB decoding
+    uint8_t *cbp_table;           ///< used to store cbp, ac_pred for partitioned decoding
+    uint8_t *pred_dir_table;      ///< used to store pred_dir for partitioned decoding
+
+    ScratchpadContext sc;
+
+    int qscale;                 ///< QP
+    int chroma_qscale;          ///< chroma QP
+    unsigned int lambda;        ///< Lagrange multiplier used in rate distortion
+    unsigned int lambda2;       ///< (lambda*lambda) >> FF_LAMBDA_SHIFT
+    int *lambda_table;
+    int adaptive_quant;         ///< use adaptive quantization
+    int dquant;                 ///< qscale difference to prev qscale
+    int pict_type;              ///< AV_PICTURE_TYPE_I, AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, ...
+    int vbv_delay;
+    int last_pict_type; //FIXME removes
+    int last_non_b_pict_type;   ///< used for MPEG-4 gmc B-frames & ratecontrol
+    int droppable;
+    int last_lambda_for[5];     ///< last lambda for a specific pict type
+    int skipdct;                ///< skip dct and code zero residual
+
+    /* motion compensation */
+    int unrestricted_mv;        ///< mv can point outside of the coded picture
+    int h263_long_vectors;      ///< use horrible H.263v1 long vector mode
+
+    BlockDSPContext bdsp;
+    FDCTDSPContext fdsp;
+    H264ChromaContext h264chroma;
+    HpelDSPContext hdsp;
+    IDCTDSPContext idsp;
+    MECmpContext mecc;
+    MpegvideoEncDSPContext mpvencdsp;
+    PixblockDSPContext pdsp;
+    QpelDSPContext qdsp;
+    VideoDSPContext vdsp;
+    H263DSPContext h263dsp;
+    int f_code;                 ///< forward MV resolution
+    int b_code;                 ///< backward MV resolution for B-frames (MPEG-4)
+    int16_t (*p_mv_table_base)[2];
+    int16_t (*b_forw_mv_table_base)[2];
+    int16_t (*b_back_mv_table_base)[2];
+    int16_t (*b_bidir_forw_mv_table_base)[2];
+    int16_t (*b_bidir_back_mv_table_base)[2];
+    int16_t (*b_direct_mv_table_base)[2];
+    int16_t (*p_field_mv_table_base)[2];
+    int16_t (*b_field_mv_table_base)[2];
+    int16_t (*p_mv_table)[2];            ///< MV table (1MV per MB) P-frame encoding
+    int16_t (*b_forw_mv_table)[2];       ///< MV table (1MV per MB) forward mode B-frame encoding
+    int16_t (*b_back_mv_table)[2];       ///< MV table (1MV per MB) backward mode B-frame encoding
+    int16_t (*b_bidir_forw_mv_table)[2]; ///< MV table (1MV per MB) bidir mode B-frame encoding
+    int16_t (*b_bidir_back_mv_table)[2]; ///< MV table (1MV per MB) bidir mode B-frame encoding
+    int16_t (*b_direct_mv_table)[2];     ///< MV table (1MV per MB) direct mode B-frame encoding
+    int16_t (*p_field_mv_table[2][2])[2];   ///< MV table (2MV per MB) interlaced P-frame encoding
+    int16_t (*b_field_mv_table[2][2][2])[2];///< MV table (4MV per MB) interlaced B-frame encoding
+    uint8_t (*p_field_select_table[2]);  ///< Only the first element is allocated
+    uint8_t (*b_field_select_table[2][2]); ///< Only the first element is allocated
+
+    /* The following fields are encoder-only */
+    uint16_t *mb_var;           ///< Table for MB variances
+    uint16_t *mc_mb_var;        ///< Table for motion compensated MB variances
+    uint8_t *mb_mean;           ///< Table for MB luminance
+    int64_t mb_var_sum;         ///< sum of MB variance for current frame
+    int64_t mc_mb_var_sum;      ///< motion compensated MB variance for current frame
+    uint64_t encoding_error[MPEGVIDEO_MAX_PLANES];
+
+    int motion_est;                      ///< ME algorithm
+    int me_penalty_compensation;
+    int me_pre;                          ///< prepass for motion estimation
+    int mv_dir;
+#define MV_DIR_FORWARD   1
+#define MV_DIR_BACKWARD  2
+#define MV_DIRECT        4 ///< bidirectional mode where the difference equals the MV of the last P/S/I-Frame (MPEG-4)
+    int mv_type;
+#define MV_TYPE_16X16       0   ///< 1 vector for the whole mb
+#define MV_TYPE_8X8         1   ///< 4 vectors (H.263, MPEG-4 4MV)
+#define MV_TYPE_16X8        2   ///< 2 vectors, one per 16x8 block
+#define MV_TYPE_FIELD       3   ///< 2 vectors, one per field
+#define MV_TYPE_DMV         4   ///< 2 vectors, special mpeg2 Dual Prime Vectors
+    /**motion vectors for a macroblock
+       first coordinate : 0 = forward 1 = backward
+       second "         : depend on type
+       third  "         : 0 = x, 1 = y
+    */
+    int mv[2][4][2];
+    int field_select[2][2];
+    int last_mv[2][2][2];             ///< last MV, used for MV prediction in MPEG-1 & B-frame MPEG-4
+    const uint8_t *fcode_tab;         ///< smallest fcode needed for each MV
+    int16_t direct_scale_mv[2][64];   ///< precomputed to avoid divisions in ff_mpeg4_set_direct_mv
+
+    MotionEstContext me;
+
+    int no_rounding;  /**< apply no rounding to motion compensation (MPEG-4, msmpeg4, ...)
+                        for B-frames rounding mode is always 0 */
+
+    /* macroblock layer */
+    int mb_x, mb_y;
+    int mb_skip_run;
+    int mb_intra;
+    uint16_t *mb_type;  ///< Table for candidate MB types for encoding (defines in mpegutils.h)
+
+    int block_index[6]; ///< index to current MB in block based arrays with edges
+    int block_wrap[6];
+    uint8_t *dest[3];
+
+    int *mb_index2xy;        ///< mb_index -> mb_x + mb_y*mb_stride
+
+    /** matrix transmitted in the bitstream */
+    uint16_t intra_matrix[64];
+    uint16_t chroma_intra_matrix[64];
+    uint16_t inter_matrix[64];
+    uint16_t chroma_inter_matrix[64];
+
+    int intra_quant_bias;    ///< bias for the quantizer
+    int inter_quant_bias;    ///< bias for the quantizer
+    int min_qcoeff;          ///< minimum encodable coefficient
+    int max_qcoeff;          ///< maximum encodable coefficient
+    int ac_esc_length;       ///< num of bits needed to encode the longest esc
+    uint8_t *intra_ac_vlc_length;
+    uint8_t *intra_ac_vlc_last_length;
+    uint8_t *intra_chroma_ac_vlc_length;
+    uint8_t *intra_chroma_ac_vlc_last_length;
+    uint8_t *inter_ac_vlc_length;
+    uint8_t *inter_ac_vlc_last_length;
+    uint8_t *luma_dc_vlc_length;
+
+    int coded_score[12];
+
+    /** precomputed matrix (combine qscale and DCT renorm) */
+    int (*q_intra_matrix)[64];
+    int (*q_chroma_intra_matrix)[64];
+    int (*q_inter_matrix)[64];
+    /** identical to the above but for MMX & these are not permutated, second 64 entries are bias*/
+    uint16_t (*q_intra_matrix16)[2][64];
+    uint16_t (*q_chroma_intra_matrix16)[2][64];
+    uint16_t (*q_inter_matrix16)[2][64];
+
+    /* noise reduction */
+    int (*dct_error_sum)[64];
+    int dct_count[2];
+    uint16_t (*dct_offset)[64];
+
+    /* bit rate control */
+    int64_t total_bits;
+    int frame_bits;                ///< bits used for the current frame
+    int stuffing_bits;             ///< bits used for stuffing
+    int next_lambda;               ///< next lambda used for retrying to encode a frame
+    RateControlContext rc_context; ///< contains stuff only accessed in ratecontrol.c
+
+    /* statistics, used for 2-pass encoding */
+    int mv_bits;
+    int header_bits;
+    int i_tex_bits;
+    int p_tex_bits;
+    int i_count;
+    int skip_count;
+    int misc_bits; ///< cbp, mb_type
+    int last_bits; ///< temp var used for calculating the above vars
+
+    /* error concealment / resync */
+    int resync_mb_x;                 ///< x position of last resync marker
+    int resync_mb_y;                 ///< y position of last resync marker
+    GetBitContext last_resync_gb;    ///< used to search for the next resync marker
+    int mb_num_left;                 ///< number of MBs left in this video packet (for partitioned Slices only)
+
+    /* H.263 specific */
+    int gob_index;
+    int obmc;                       ///< overlapped block motion compensation
+    int mb_info;                    ///< interval for outputting info about mb offsets as side data
+    int prev_mb_info, last_mb_info;
+    uint8_t *mb_info_ptr;
+    int mb_info_size;
+    int ehc_mode;
+
+    /* H.263+ specific */
+    int umvplus;                    ///< == H.263+ && unrestricted_mv
+    int h263_aic_dir;               ///< AIC direction: 0 = left, 1 = top
+    int h263_slice_structured;
+    int alt_inter_vlc;              ///< alternative inter vlc
+    int modified_quant;
+    int loop_filter;
+    int custom_pcf;
+
+    /* MPEG-4 specific */
+    int studio_profile;
+    int dct_precision;
+    ///< number of bits to represent the fractional part of time (encoder only)
+    int time_increment_bits;
+    int last_time_base;
+    int time_base;                  ///< time in seconds of last I,P,S Frame
+    int64_t time;                   ///< time of current frame
+    int64_t last_non_b_time;
+    uint16_t pp_time;               ///< time distance between the last 2 p,s,i frames
+    uint16_t pb_time;               ///< time distance between the last b and p,s,i frame
+    uint16_t pp_field_time;
+    uint16_t pb_field_time;         ///< like above, just for interlaced
+    int mcsel;
+    int quant_precision;
+    int quarter_sample;              ///< 1->qpel, 0->half pel ME/MC
+    int data_partitioning;           ///< data partitioning flag from header
+    int partitioned_frame;           ///< is current frame partitioned
+    int low_delay;                   ///< no reordering needed / has no B-frames
+    PutBitContext tex_pb;            ///< used for data partitioned VOPs
+    PutBitContext pb2;               ///< used for data partitioned VOPs
+    int mpeg_quant;
+    int padding_bug_score;             ///< used to detect the VERY common padding bug in MPEG-4
+
+    /* divx specific, used to workaround (many) bugs in divx5 */
+    int divx_packed;
+    uint8_t *bitstream_buffer; //Divx 5.01 puts several frames in a single one, this is used to reorder them
+    int bitstream_buffer_size;
+    unsigned int allocated_bitstream_buffer_size;
+
+    /* RV10 specific */
+    int rv10_version; ///< RV10 version: 0 or 3
+    int rv10_first_dc_coded[3];
+
+    /* MJPEG specific */
+    struct MJpegContext *mjpeg_ctx;
+    int esc_pos;
+
+    /* MSMPEG4 specific */
+    int mv_table_index;
+    int rl_table_index;
+    int rl_chroma_table_index;
+    int dc_table_index;
+    int use_skip_mb_code;
+    int slice_height;      ///< in macroblocks
+    int first_slice_line;  ///< used in MPEG-4 too to handle resync markers
+    int flipflop_rounding;
+    int msmpeg4_version;   ///< 0=not msmpeg4, 1=mp41, 2=mp42, 3=mp43/divx3 4=wmv1/7 5=wmv2/8
+    int per_mb_rl_table;
+    int esc3_level_length;
+    int esc3_run_length;
+    int inter_intra_pred;
+    int mspel;
+
+    /* decompression specific */
+    GetBitContext gb;
+
+    /* MPEG-1 specific */
+    int last_mv_dir;         ///< last mv_dir, used for B-frame encoding
+    int vbv_delay_pos;       ///< offset of vbv_delay in the bitstream
+
+    /* MPEG-2-specific - I wished not to have to support this mess. */
+    int progressive_sequence;
+    int mpeg_f_code[2][2];
+
+    // picture structure defines are loaded from mpegutils.h
+    int picture_structure;
+
+    int intra_dc_precision;
+    int frame_pred_frame_dct;
+    int top_field_first;
+    int concealment_motion_vectors;
+    int q_scale_type;
+    int brd_scale;
+    int intra_vlc_format;
+    int alternate_scan;
+    int repeat_first_field;
+    int chroma_420_type;
+    int chroma_format;
+#define CHROMA_420 1
+#define CHROMA_422 2
+#define CHROMA_444 3
+    int chroma_x_shift;//depend on pix_format, that depend on chroma_format
+    int chroma_y_shift;
+
+    int progressive_frame;
+    int full_pel[2];
+    int interlaced_dct;
+    int first_field;         ///< is 1 for the first field of a field picture 0 otherwise
+
+    /* RTP specific */
+    int rtp_mode;
+    int rtp_payload_size;
+
+    uint8_t *ptr_lastgob;
+    int16_t (*pblocks[12])[64];
+
+    int16_t (*block)[64]; ///< points to one of the following blocks
+    int16_t (*blocks)[12][64]; // for HQ mode we need to keep the best block
+    int (*decode_mb)(struct MpegEncContext *s, int16_t block[12][64]); // used by some codecs to avoid a switch()
+
+#define SLICE_OK         0
+#define SLICE_ERROR     -1
+#define SLICE_END       -2 ///<end marker found
+#define SLICE_NOEND     -3 ///<no end marker or error found but mb count exceeded
+
+    void (*dct_unquantize_mpeg1_intra)(struct MpegEncContext *s,
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_mpeg1_inter)(struct MpegEncContext *s,
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_mpeg2_intra)(struct MpegEncContext *s,
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_mpeg2_inter)(struct MpegEncContext *s,
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_h263_intra)(struct MpegEncContext *s,
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_h263_inter)(struct MpegEncContext *s,
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_intra)(struct MpegEncContext *s, // unquantizer to use (MPEG-4 can use both)
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_inter)(struct MpegEncContext *s, // unquantizer to use (MPEG-4 can use both)
+                           int16_t *block/*align 16*/, int n, int qscale);
+    int (*dct_quantize)(struct MpegEncContext *s, int16_t *block/*align 16*/, int n, int qscale, int *overflow);
+    int (*fast_dct_quantize)(struct MpegEncContext *s, int16_t *block/*align 16*/, int n, int qscale, int *overflow);
+    void (*denoise_dct)(struct MpegEncContext *s, int16_t *block);
+
+    int mpv_flags;      ///< flags set by private options
+    int quantizer_noise_shaping;
+
+    /**
+     * ratecontrol qmin qmax limiting method
+     * 0-> clipping, 1-> use a nice continuous function to limit qscale within qmin/qmax.
+     */
+    float rc_qsquish;
+    float rc_qmod_amp;
+    int   rc_qmod_freq;
+    float rc_initial_cplx;
+    float rc_buffer_aggressivity;
+    float border_masking;
+    int lmin, lmax;
+    int vbv_ignore_qmax;
+
+    char *rc_eq;
+
+    /* temp buffers for rate control */
+    float *cplx_tab, *bits_tab;
+
+    /* flag to indicate a reinitialization is required, e.g. after
+     * a frame size change */
+    int context_reinit;
+
+    ERContext er;
+
+    int error_rate;
+
+    /* temporary frames used by b_frame_strategy = 2 */
+    AVFrame *tmp_frames[MAX_B_FRAMES + 2];
+    int b_frame_strategy;
+    int b_sensitivity;
+
+    /* frame skip options for encoding */
+    int frame_skip_threshold;
+    int frame_skip_factor;
+    int frame_skip_exp;
+    int frame_skip_cmp;
+
+    int scenechange_threshold;
+    int noise_reduction;
+
+    int intra_penalty;
+} MpegEncContext;
+
+
+/**
+ * Set the given MpegEncContext to common defaults (same for encoding
+ * and decoding).  The changed fields will not depend upon the prior
+ * state of the MpegEncContext.
+ */
+void ff_mpv_common_defaults(MpegEncContext *s);
+
+int ff_mpv_common_init(MpegEncContext *s);
+void ff_mpv_common_init_arm(MpegEncContext *s);
+void ff_mpv_common_init_axp(MpegEncContext *s);
+void ff_mpv_common_init_neon(MpegEncContext *s);
+void ff_mpv_common_init_ppc(MpegEncContext *s);
+void ff_mpv_common_init_x86(MpegEncContext *s);
+void ff_mpv_common_init_mips(MpegEncContext *s);
+/**
+ * Initialize an MpegEncContext's thread contexts. Presumes that
+ * slice_context_count is already set and that all the fields
+ * that are freed/reset in free_duplicate_context() are NULL.
+ */
+int ff_mpv_init_duplicate_contexts(MpegEncContext *s);
+/**
+ * Initialize and allocates MpegEncContext fields dependent on the resolution.
+ */
+int ff_mpv_init_context_frame(MpegEncContext *s);
+/**
+ * Frees and resets MpegEncContext fields depending on the resolution
+ * as well as the slice thread contexts.
+ * Is used during resolution changes to avoid a full reinitialization of the
+ * codec.
+ */
+void ff_mpv_free_context_frame(MpegEncContext *s);
+
+void ff_mpv_common_end(MpegEncContext *s);
+
+void ff_clean_intra_table_entries(MpegEncContext *s);
+
+int ff_update_duplicate_context(MpegEncContext *dst, const MpegEncContext *src);
+void ff_set_qscale(MpegEncContext * s, int qscale);
+
+void ff_mpv_idct_init(MpegEncContext *s);
+void ff_init_scantable(const uint8_t *permutation, ScanTable *st,
+                       const uint8_t *src_scantable);
+void ff_init_block_index(MpegEncContext *s);
+
+void ff_mpv_motion(MpegEncContext *s,
+                   uint8_t *dest_y, uint8_t *dest_cb,
+                   uint8_t *dest_cr, int dir,
+                   uint8_t *const *ref_picture,
+                   op_pixels_func (*pix_op)[4],
+                   qpel_mc_func (*qpix_op)[16]);
+
+static inline void ff_update_block_index(MpegEncContext *s, int bits_per_raw_sample,
+                                         int lowres, int chroma_x_shift)
+{
+    const int bytes_per_pixel = 1 + (bits_per_raw_sample > 8);
+    const int block_size = (8 * bytes_per_pixel) >> lowres;
+
+    s->block_index[0]+=2;
+    s->block_index[1]+=2;
+    s->block_index[2]+=2;
+    s->block_index[3]+=2;
+    s->block_index[4]++;
+    s->block_index[5]++;
+    s->dest[0]+= 2*block_size;
+    s->dest[1] += (2 >> chroma_x_shift) * block_size;
+    s->dest[2] += (2 >> chroma_x_shift) * block_size;
+}
+
+#endif /* AVCODEC_MPEGVIDEO_H */
diff --git a/media/ffvpx/libavcodec/mpegvideodata.h b/media/ffvpx/libavcodec/mpegvideodata.h
new file mode 100644
index 0000000000..42c9d6c293
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegvideodata.h
@@ -0,0 +1,39 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGVIDEODATA_H
+#define AVCODEC_MPEGVIDEODATA_H
+
+#include <stdint.h>
+
+#include "libavutil/attributes_internal.h"
+
+FF_VISIBILITY_PUSH_HIDDEN
+/* encoding scans */
+extern const uint8_t ff_alternate_horizontal_scan[64];
+extern const uint8_t ff_alternate_vertical_scan[64];
+
+extern const uint8_t ff_mpeg12_dc_scale_table[4][32];
+static const uint8_t *const ff_mpeg1_dc_scale_table = ff_mpeg12_dc_scale_table[0];
+
+extern const uint8_t ff_mpeg2_non_linear_qscale[32];
+
+extern const uint8_t ff_default_chroma_qscale_table[32];
+FF_VISIBILITY_POP_HIDDEN
+
+#endif /* AVCODEC_MPEGVIDEODATA_H */
diff --git a/media/ffvpx/libavcodec/mpegvideodsp.h b/media/ffvpx/libavcodec/mpegvideodsp.h
new file mode 100644
index 0000000000..293e2548d3
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegvideodsp.h
@@ -0,0 +1,47 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGVIDEODSP_H
+#define AVCODEC_MPEGVIDEODSP_H
+
+#include <stdint.h>
+
+void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+              int dxx, int dxy, int dyx, int dyy, int shift, int r,
+              int width, int height);
+
+typedef struct MpegVideoDSPContext {
+    /**
+     * translational global motion compensation.
+     */
+    void (*gmc1)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */,
+                 int srcStride, int h, int x16, int y16, int rounder);
+    /**
+     * global motion compensation.
+     */
+    void (*gmc)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */,
+                int stride, int h, int ox, int oy,
+                int dxx, int dxy, int dyx, int dyy,
+                int shift, int r, int width, int height);
+} MpegVideoDSPContext;
+
+void ff_mpegvideodsp_init(MpegVideoDSPContext *c);
+void ff_mpegvideodsp_init_ppc(MpegVideoDSPContext *c);
+void ff_mpegvideodsp_init_x86(MpegVideoDSPContext *c);
+
+#endif /* AVCODEC_MPEGVIDEODSP_H */
diff --git a/media/ffvpx/libavcodec/mpegvideoencdsp.h b/media/ffvpx/libavcodec/mpegvideoencdsp.h
new file mode 100644
index 0000000000..95084679d9
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegvideoencdsp.h
@@ -0,0 +1,58 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGVIDEOENCDSP_H
+#define AVCODEC_MPEGVIDEOENCDSP_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+
+#define BASIS_SHIFT 16
+#define RECON_SHIFT 6
+
+#define EDGE_TOP    1
+#define EDGE_BOTTOM 2
+
+typedef struct MpegvideoEncDSPContext {
+    int (*try_8x8basis)(const int16_t rem[64], const int16_t weight[64],
+                        const int16_t basis[64], int scale);
+    void (*add_8x8basis)(int16_t rem[64], const int16_t basis[64], int scale);
+
+    int (*pix_sum)(const uint8_t *pix, int line_size);
+    int (*pix_norm1)(const uint8_t *pix, int line_size);
+
+    void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src,
+                      int src_wrap, int width, int height);
+
+    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height,
+                       int w, int h, int sides);
+} MpegvideoEncDSPContext;
+
+void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
+                             AVCodecContext *avctx);
+void ff_mpegvideoencdsp_init_arm(MpegvideoEncDSPContext *c,
+                                 AVCodecContext *avctx);
+void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c,
+                                 AVCodecContext *avctx);
+void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
+                                 AVCodecContext *avctx);
+void ff_mpegvideoencdsp_init_mips(MpegvideoEncDSPContext *c,
+                                  AVCodecContext *avctx);
+
+#endif /* AVCODEC_MPEGVIDEOENCDSP_H */
diff --git a/media/ffvpx/libavcodec/null_bsf.c b/media/ffvpx/libavcodec/null_bsf.c
new file mode 100644
index 0000000000..28237076fb
--- /dev/null
+++ b/media/ffvpx/libavcodec/null_bsf.c
@@ -0,0 +1,29 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Null bitstream filter -- pass the input through unchanged.
+ */
+
+#include "bsf_internal.h"
+
+const FFBitStreamFilter ff_null_bsf = {
+    .p.name         = "null",
+    .filter         = ff_bsf_get_packet_ref,
+};
diff --git a/media/ffvpx/libavcodec/options.c b/media/ffvpx/libavcodec/options.c
new file mode 100644
index 0000000000..a9b35ee1c3
--- /dev/null
+++ b/media/ffvpx/libavcodec/options.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Options definition for AVCodecContext.
+ */
+
+#include "config_components.h"
+
+#include "avcodec.h"
+#include "codec_internal.h"
+#include "libavutil/avassert.h"
+#include "libavutil/internal.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include <string.h>
+
+FF_DISABLE_DEPRECATION_WARNINGS
+#include "options_table.h"
+FF_ENABLE_DEPRECATION_WARNINGS
+
+static const char* context_to_name(void* ptr) {
+    AVCodecContext *avc= ptr;
+
+    if (avc && avc->codec)
+        return avc->codec->name;
+    else
+        return "NULL";
+}
+
+static void *codec_child_next(void *obj, void *prev)
+{
+    AVCodecContext *s = obj;
+    if (!prev && s->codec && s->codec->priv_class && s->priv_data)
+        return s->priv_data;
+    return NULL;
+}
+
+static const AVClass *codec_child_class_iterate(void **iter)
+{
+    const AVCodec *c;
+    /* find next codec with priv options */
+    while (c = av_codec_iterate(iter))
+        if (c->priv_class)
+            return c->priv_class;
+    return NULL;
+}
+
+static AVClassCategory get_category(void *ptr)
+{
+    AVCodecContext* avctx = ptr;
+    if (avctx->codec && av_codec_is_decoder(avctx->codec))
+        return AV_CLASS_CATEGORY_DECODER;
+    else
+        return AV_CLASS_CATEGORY_ENCODER;
+}
+
+static const AVClass av_codec_context_class = {
+    .class_name              = "AVCodecContext",
+    .item_name               = context_to_name,
+    .option                  = avcodec_options,
+    .version                 = LIBAVUTIL_VERSION_INT,
+    .log_level_offset_offset = offsetof(AVCodecContext, log_level_offset),
+    .child_next              = codec_child_next,
+    .child_class_iterate     = codec_child_class_iterate,
+    .category                = AV_CLASS_CATEGORY_ENCODER,
+    .get_category            = get_category,
+};
+
+static int init_context_defaults(AVCodecContext *s, const AVCodec *codec)
+{
+    const FFCodec *const codec2 = ffcodec(codec);
+    int flags=0;
+    memset(s, 0, sizeof(AVCodecContext));
+
+    s->av_class = &av_codec_context_class;
+
+    s->codec_type = codec ? codec->type : AVMEDIA_TYPE_UNKNOWN;
+    if (codec) {
+        s->codec = codec;
+        s->codec_id = codec->id;
+    }
+
+    if(s->codec_type == AVMEDIA_TYPE_AUDIO)
+        flags= AV_OPT_FLAG_AUDIO_PARAM;
+    else if(s->codec_type == AVMEDIA_TYPE_VIDEO)
+        flags= AV_OPT_FLAG_VIDEO_PARAM;
+    else if(s->codec_type == AVMEDIA_TYPE_SUBTITLE)
+        flags= AV_OPT_FLAG_SUBTITLE_PARAM;
+    av_opt_set_defaults2(s, flags, flags);
+
+    av_channel_layout_uninit(&s->ch_layout);
+
+    s->time_base           = (AVRational){0,1};
+    s->framerate           = (AVRational){ 0, 1 };
+    s->pkt_timebase        = (AVRational){ 0, 1 };
+    s->get_buffer2         = avcodec_default_get_buffer2;
+    s->get_format          = avcodec_default_get_format;
+    s->get_encode_buffer   = avcodec_default_get_encode_buffer;
+    s->execute             = avcodec_default_execute;
+    s->execute2            = avcodec_default_execute2;
+    s->sample_aspect_ratio = (AVRational){0,1};
+    s->ch_layout.order     = AV_CHANNEL_ORDER_UNSPEC;
+    s->pix_fmt             = AV_PIX_FMT_NONE;
+    s->sw_pix_fmt          = AV_PIX_FMT_NONE;
+    s->sample_fmt          = AV_SAMPLE_FMT_NONE;
+
+#if FF_API_REORDERED_OPAQUE
+FF_DISABLE_DEPRECATION_WARNINGS
+    s->reordered_opaque    = AV_NOPTS_VALUE;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    if(codec && codec2->priv_data_size){
+        s->priv_data = av_mallocz(codec2->priv_data_size);
+        if (!s->priv_data)
+            return AVERROR(ENOMEM);
+        if(codec->priv_class){
+            *(const AVClass**)s->priv_data = codec->priv_class;
+            av_opt_set_defaults(s->priv_data);
+        }
+    }
+    if (codec && codec2->defaults) {
+        int ret;
+        const FFCodecDefault *d = codec2->defaults;
+        while (d->key) {
+            ret = av_opt_set(s, d->key, d->value, 0);
+            av_assert0(ret >= 0);
+            d++;
+        }
+    }
+    return 0;
+}
+
+AVCodecContext *avcodec_alloc_context3(const AVCodec *codec)
+{
+    AVCodecContext *avctx= av_malloc(sizeof(AVCodecContext));
+
+    if (!avctx)
+        return NULL;
+
+    if (init_context_defaults(avctx, codec) < 0) {
+        av_free(avctx);
+        return NULL;
+    }
+
+    return avctx;
+}
+
+void avcodec_free_context(AVCodecContext **pavctx)
+{
+    AVCodecContext *avctx = *pavctx;
+
+    if (!avctx)
+        return;
+
+    avcodec_close(avctx);
+
+    av_freep(&avctx->extradata);
+    av_freep(&avctx->subtitle_header);
+    av_freep(&avctx->intra_matrix);
+    av_freep(&avctx->inter_matrix);
+    av_freep(&avctx->rc_override);
+    av_channel_layout_uninit(&avctx->ch_layout);
+
+    av_freep(pavctx);
+}
+
+const AVClass *avcodec_get_class(void)
+{
+    return &av_codec_context_class;
+}
+
+#define SROFFSET(x) offsetof(AVSubtitleRect,x)
+
+static const AVOption subtitle_rect_options[]={
+{"x", "", SROFFSET(x), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"y", "", SROFFSET(y), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"w", "", SROFFSET(w), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"h", "", SROFFSET(h), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"type", "", SROFFSET(type), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"flags", "", SROFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = 0}, 0, 1, 0, "flags"},
+{"forced", "", SROFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = 0}, 0, 1, 0},
+{NULL},
+};
+
+static const AVClass av_subtitle_rect_class = {
+    .class_name             = "AVSubtitleRect",
+    .item_name              = NULL,
+    .option                 = subtitle_rect_options,
+    .version                = LIBAVUTIL_VERSION_INT,
+};
+
+const AVClass *avcodec_get_subtitle_rect_class(void)
+{
+    return &av_subtitle_rect_class;
+}
diff --git a/media/ffvpx/libavcodec/options_table.h b/media/ffvpx/libavcodec/options_table.h
new file mode 100644
index 0000000000..4fea57673a
--- /dev/null
+++ b/media/ffvpx/libavcodec/options_table.h
@@ -0,0 +1,412 @@
+/*
+ * Copyright (c) 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_OPTIONS_TABLE_H
+#define AVCODEC_OPTIONS_TABLE_H
+
+#include "config_components.h"
+
+#include <float.h>
+#include <limits.h>
+#include <stdint.h>
+
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "version_major.h"
+
+#define OFFSET(x) offsetof(AVCodecContext,x)
+#define DEFAULT 0 //should be NAN but it does not work as it is not a constant in glibc as required by ANSI/ISO C
+//these names are too long to be readable
+#define V AV_OPT_FLAG_VIDEO_PARAM
+#define A AV_OPT_FLAG_AUDIO_PARAM
+#define S AV_OPT_FLAG_SUBTITLE_PARAM
+#define E AV_OPT_FLAG_ENCODING_PARAM
+#define D AV_OPT_FLAG_DECODING_PARAM
+#define CC AV_OPT_FLAG_CHILD_CONSTS
+
+#define AV_CODEC_DEFAULT_BITRATE 200*1000
+
+static const AVOption avcodec_options[] = {
+{"b", "set bitrate (in bits/s)", OFFSET(bit_rate), AV_OPT_TYPE_INT64, {.i64 = AV_CODEC_DEFAULT_BITRATE }, 0, INT64_MAX, A|V|E},
+{"ab", "set bitrate (in bits/s)", OFFSET(bit_rate), AV_OPT_TYPE_INT64, {.i64 = 128*1000 }, 0, INT_MAX, A|E},
+{"bt", "Set video bitrate tolerance (in bits/s). In 1-pass mode, bitrate tolerance specifies how far "
+       "ratecontrol is willing to deviate from the target average bitrate value. This is not related "
+       "to minimum/maximum bitrate. Lowering tolerance too much has an adverse effect on quality.",
+       OFFSET(bit_rate_tolerance), AV_OPT_TYPE_INT, {.i64 = AV_CODEC_DEFAULT_BITRATE*20 }, 1, INT_MAX, V|E},
+{"flags", NULL, OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, 0, UINT_MAX, V|A|S|E|D, "flags"},
+{"unaligned", "allow decoders to produce unaligned output", 0, AV_OPT_TYPE_CONST, { .i64 = AV_CODEC_FLAG_UNALIGNED }, INT_MIN, INT_MAX, V | D, "flags" },
+{"mv4", "use four motion vectors per macroblock (MPEG-4)", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_4MV }, INT_MIN, INT_MAX, V|E, "flags"},
+{"qpel", "use 1/4-pel motion compensation", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_QPEL }, INT_MIN, INT_MAX, V|E, "flags"},
+{"loop", "use loop filter", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_LOOP_FILTER }, INT_MIN, INT_MAX, V|E, "flags"},
+{"qscale", "use fixed qscale", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_QSCALE }, INT_MIN, INT_MAX, 0, "flags"},
+{"recon_frame", "export reconstructed frames", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_RECON_FRAME}, .unit = "flags"},
+{"copy_opaque", "propagate opaque values", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_COPY_OPAQUE}, .unit = "flags"},
+{"frame_duration", "use frame durations", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_FRAME_DURATION}, .unit = "flags"},
+{"pass1", "use internal 2-pass ratecontrol in first  pass mode", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_PASS1 }, INT_MIN, INT_MAX, 0, "flags"},
+{"pass2", "use internal 2-pass ratecontrol in second pass mode", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_PASS2 }, INT_MIN, INT_MAX, 0, "flags"},
+{"gray", "only decode/encode grayscale", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_GRAY }, INT_MIN, INT_MAX, V|E|D, "flags"},
+{"psnr", "error[?] variables will be set during encoding", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_PSNR }, INT_MIN, INT_MAX, V|E, "flags"},
+{"ildct", "use interlaced DCT", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_INTERLACED_DCT }, INT_MIN, INT_MAX, V|E, "flags"},
+{"low_delay", "force low delay", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_LOW_DELAY }, INT_MIN, INT_MAX, V|D|E, "flags"},
+{"global_header", "place global headers in extradata instead of every keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_GLOBAL_HEADER }, INT_MIN, INT_MAX, V|A|E, "flags"},
+{"bitexact", "use only bitexact functions (except (I)DCT)", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_BITEXACT }, INT_MIN, INT_MAX, A|V|S|D|E, "flags"},
+{"aic", "H.263 advanced intra coding / MPEG-4 AC prediction", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_AC_PRED }, INT_MIN, INT_MAX, V|E, "flags"},
+{"ilme", "interlaced motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_INTERLACED_ME }, INT_MIN, INT_MAX, V|E, "flags"},
+{"cgop", "closed GOP", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_CLOSED_GOP }, INT_MIN, INT_MAX, V|E, "flags"},
+{"output_corrupt", "Output even potentially corrupted frames", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_OUTPUT_CORRUPT }, INT_MIN, INT_MAX, V|D, "flags"},
+{"drop_changed", "Drop frames whose parameters differ from first decoded frame", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_DROPCHANGED }, INT_MIN, INT_MAX, A|V|D, "flags"},
+{"flags2", NULL, OFFSET(flags2), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT}, 0, UINT_MAX, V|A|E|D|S, "flags2"},
+{"fast", "allow non-spec-compliant speedup tricks", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_FAST }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"noout", "skip bitstream encoding", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_NO_OUTPUT }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"ignorecrop", "ignore cropping information from sps", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_IGNORE_CROP }, INT_MIN, INT_MAX, V|D, "flags2"},
+{"local_header", "place global headers at every keyframe instead of in extradata", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_LOCAL_HEADER }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"chunks", "Frame data might be split into multiple chunks", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_CHUNKS }, INT_MIN, INT_MAX, V|D, "flags2"},
+{"showall", "Show all frames before the first keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_SHOW_ALL }, INT_MIN, INT_MAX, V|D, "flags2"},
+{"export_mvs", "export motion vectors through frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_EXPORT_MVS}, INT_MIN, INT_MAX, V|D, "flags2"},
+{"skip_manual", "do not skip samples and export skip information as frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_SKIP_MANUAL}, INT_MIN, INT_MAX, A|D, "flags2"},
+{"ass_ro_flush_noop", "do not reset ASS ReadOrder field on flush", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_RO_FLUSH_NOOP}, INT_MIN, INT_MAX, S|D, "flags2"},
+{"icc_profiles", "generate/parse embedded ICC profiles from/to colorimetry tags", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_ICC_PROFILES}, INT_MIN, INT_MAX, S|D, "flags2"},
+{"export_side_data", "Export metadata as side data", OFFSET(export_side_data), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT}, 0, UINT_MAX, A|V|S|D|E, "export_side_data"},
+{"mvs", "export motion vectors through frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_EXPORT_DATA_MVS}, INT_MIN, INT_MAX, V|D, "export_side_data"},
+{"prft", "export Producer Reference Time through packet side data", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_EXPORT_DATA_PRFT}, INT_MIN, INT_MAX, A|V|S|E, "export_side_data"},
+{"venc_params", "export video encoding parameters through frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS}, INT_MIN, INT_MAX, V|D, "export_side_data"},
+{"film_grain", "export film grain parameters through frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_EXPORT_DATA_FILM_GRAIN}, INT_MIN, INT_MAX, V|D, "export_side_data"},
+{"time_base", NULL, OFFSET(time_base), AV_OPT_TYPE_RATIONAL, {.dbl = 0}, 0, INT_MAX},
+{"g", "set the group of picture (GOP) size", OFFSET(gop_size), AV_OPT_TYPE_INT, {.i64 = 12 }, INT_MIN, INT_MAX, V|E},
+{"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, A|D|E},
+#if FF_API_OLD_CHANNEL_LAYOUT
+{"ac", "set number of audio channels", OFFSET(channels), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, A|D|E},
+#endif
+{"cutoff", "set cutoff bandwidth", OFFSET(cutoff), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, A|E},
+{"frame_size", NULL, OFFSET(frame_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, A|E},
+{"frame_number", NULL, OFFSET(frame_num), AV_OPT_TYPE_INT64, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
+{"delay", NULL, OFFSET(delay), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
+{"qcomp", "video quantizer scale compression (VBR). Constant of ratecontrol equation. "
+          "Recommended range for default rc_eq: 0.0-1.0",
+          OFFSET(qcompress), AV_OPT_TYPE_FLOAT, {.dbl = 0.5 }, -FLT_MAX, FLT_MAX, V|E},
+{"qblur", "video quantizer scale blur (VBR)", OFFSET(qblur), AV_OPT_TYPE_FLOAT, {.dbl = 0.5 }, -1, FLT_MAX, V|E},
+{"qmin", "minimum video quantizer scale (VBR)", OFFSET(qmin), AV_OPT_TYPE_INT, {.i64 = 2 }, -1, 69, V|E},
+{"qmax", "maximum video quantizer scale (VBR)", OFFSET(qmax), AV_OPT_TYPE_INT, {.i64 = 31 }, -1, 1024, V|E},
+{"qdiff", "maximum difference between the quantizer scales (VBR)", OFFSET(max_qdiff), AV_OPT_TYPE_INT, {.i64 = 3 }, INT_MIN, INT_MAX, V|E},
+{"bf", "set maximum number of B-frames between non-B-frames", OFFSET(max_b_frames), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, -1, INT_MAX, V|E},
+{"b_qfactor", "QP factor between P- and B-frames", OFFSET(b_quant_factor), AV_OPT_TYPE_FLOAT, {.dbl = 1.25 }, -FLT_MAX, FLT_MAX, V|E},
+{"codec_tag", NULL, OFFSET(codec_tag), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
+{"bug", "work around not autodetected encoder bugs", OFFSET(workaround_bugs), AV_OPT_TYPE_FLAGS, {.i64 = FF_BUG_AUTODETECT }, INT_MIN, INT_MAX, V|D, "bug"},
+{"autodetect", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_AUTODETECT }, INT_MIN, INT_MAX, V|D, "bug"},
+{"xvid_ilace", "Xvid interlacing bug (autodetected if FOURCC == XVIX)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_XVID_ILACE }, INT_MIN, INT_MAX, V|D, "bug"},
+{"ump4", "(autodetected if FOURCC == UMP4)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_UMP4 }, INT_MIN, INT_MAX, V|D, "bug"},
+{"no_padding", "padding bug (autodetected)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_NO_PADDING }, INT_MIN, INT_MAX, V|D, "bug"},
+{"amv", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_AMV }, INT_MIN, INT_MAX, V|D, "bug"},
+{"qpel_chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_QPEL_CHROMA }, INT_MIN, INT_MAX, V|D, "bug"},
+{"std_qpel", "old standard qpel (autodetected per FOURCC/version)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_STD_QPEL }, INT_MIN, INT_MAX, V|D, "bug"},
+{"qpel_chroma2", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_QPEL_CHROMA2 }, INT_MIN, INT_MAX, V|D, "bug"},
+{"direct_blocksize", "direct-qpel-blocksize bug (autodetected per FOURCC/version)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_DIRECT_BLOCKSIZE }, INT_MIN, INT_MAX, V|D, "bug"},
+{"edge", "edge padding bug (autodetected per FOURCC/version)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_EDGE }, INT_MIN, INT_MAX, V|D, "bug"},
+{"hpel_chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_HPEL_CHROMA }, INT_MIN, INT_MAX, V|D, "bug"},
+{"dc_clip", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_DC_CLIP }, INT_MIN, INT_MAX, V|D, "bug"},
+{"ms", "work around various bugs in Microsoft's broken decoders", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_MS }, INT_MIN, INT_MAX, V|D, "bug"},
+{"trunc", "truncated frames", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_TRUNCATED}, INT_MIN, INT_MAX, V|D, "bug"},
+{"iedge", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_IEDGE }, INT_MIN, INT_MAX, V|D, "bug"},
+{"strict", "how strictly to follow the standards", OFFSET(strict_std_compliance), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, A|V|D|E, "strict"},
+{"very", "strictly conform to a older more strict version of the spec or reference software", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_VERY_STRICT }, INT_MIN, INT_MAX, A|V|D|E, "strict"},
+{"strict", "strictly conform to all the things in the spec no matter what the consequences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_STRICT }, INT_MIN, INT_MAX, A|V|D|E, "strict"},
+{"normal", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_NORMAL }, INT_MIN, INT_MAX, A|V|D|E, "strict"},
+{"unofficial", "allow unofficial extensions", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_UNOFFICIAL }, INT_MIN, INT_MAX, A|V|D|E, "strict"},
+{"experimental", "allow non-standardized experimental things", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_EXPERIMENTAL }, INT_MIN, INT_MAX, A|V|D|E, "strict"},
+{"b_qoffset", "QP offset between P- and B-frames", OFFSET(b_quant_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.25 }, -FLT_MAX, FLT_MAX, V|E},
+{"err_detect", "set error detection flags", OFFSET(err_recognition), AV_OPT_TYPE_FLAGS, {.i64 = 0 }, INT_MIN, INT_MAX, A|V|S|D|E, "err_detect"},
+{"crccheck", "verify embedded CRCs", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_CRCCHECK }, INT_MIN, INT_MAX, A|V|S|D|E, "err_detect"},
+{"bitstream", "detect bitstream specification deviations", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_BITSTREAM }, INT_MIN, INT_MAX, A|V|S|D|E, "err_detect"},
+{"buffer", "detect improper bitstream length", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_BUFFER }, INT_MIN, INT_MAX, A|V|S|D|E, "err_detect"},
+{"explode", "abort decoding on minor error detection", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_EXPLODE }, INT_MIN, INT_MAX, A|V|S|D|E, "err_detect"},
+{"ignore_err", "ignore errors", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_IGNORE_ERR }, INT_MIN, INT_MAX, A|V|S|D|E, "err_detect"},
+{"careful",    "consider things that violate the spec, are fast to check and have not been seen in the wild as errors", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_CAREFUL }, INT_MIN, INT_MAX, A|V|S|D|E, "err_detect"},
+{"compliant",  "consider all spec non compliancies as errors", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_COMPLIANT | AV_EF_CAREFUL }, INT_MIN, INT_MAX, A|V|S|D|E, "err_detect"},
+{"aggressive", "consider things that a sane encoder should not do as an error", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_AGGRESSIVE | AV_EF_COMPLIANT | AV_EF_CAREFUL}, INT_MIN, INT_MAX, A|V|S|D|E, "err_detect"},
+{"has_b_frames", NULL, OFFSET(has_b_frames), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX},
+{"block_align", NULL, OFFSET(block_align), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX},
+{"rc_override_count", NULL, OFFSET(rc_override_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
+{"maxrate", "maximum bitrate (in bits/s). Used for VBV together with bufsize.", OFFSET(rc_max_rate), AV_OPT_TYPE_INT64, {.i64 = DEFAULT }, 0, INT_MAX, V|A|E},
+{"minrate", "minimum bitrate (in bits/s). Most useful in setting up a CBR encode. It is of little use otherwise.",
+            OFFSET(rc_min_rate), AV_OPT_TYPE_INT64, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
+{"bufsize", "set ratecontrol buffer size (in bits)", OFFSET(rc_buffer_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, A|V|E},
+{"i_qfactor", "QP factor between P- and I-frames", OFFSET(i_quant_factor), AV_OPT_TYPE_FLOAT, {.dbl = -0.8 }, -FLT_MAX, FLT_MAX, V|E},
+{"i_qoffset", "QP offset between P- and I-frames", OFFSET(i_quant_offset), AV_OPT_TYPE_FLOAT, {.dbl = 0.0 }, -FLT_MAX, FLT_MAX, V|E},
+{"dct", "DCT algorithm", OFFSET(dct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E, "dct"},
+{"auto", "autoselect a good one", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_AUTO }, INT_MIN, INT_MAX, V|E, "dct"},
+{"fastint", "fast integer", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FASTINT }, INT_MIN, INT_MAX, V|E, "dct"},
+{"int", "accurate integer", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_INT }, INT_MIN, INT_MAX, V|E, "dct"},
+{"mmx", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_MMX }, INT_MIN, INT_MAX, V|E, "dct"},
+{"altivec", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_ALTIVEC }, INT_MIN, INT_MAX, V|E, "dct"},
+{"faan", "floating point AAN DCT", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FAAN }, INT_MIN, INT_MAX, V|E, "dct"},
+{"lumi_mask", "compresses bright areas stronger than medium ones", OFFSET(lumi_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
+{"tcplx_mask", "temporal complexity masking", OFFSET(temporal_cplx_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
+{"scplx_mask", "spatial complexity masking", OFFSET(spatial_cplx_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
+{"p_mask", "inter masking", OFFSET(p_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
+{"dark_mask", "compresses dark areas stronger than medium ones", OFFSET(dark_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
+{"idct", "select IDCT implementation", OFFSET(idct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E|D, "idct"},
+{"auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_AUTO }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"int", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_INT }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simple", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLE }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplemmx", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEMMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"arm", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"altivec", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ALTIVEC }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplearm", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplearmv5te", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV5TE }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplearmv6", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV6 }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simpleneon", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLENEON }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"xvid", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"xvidmmx", "deprecated, for compatibility only", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"faani", "floating point AAN IDCT", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_FAAN }, INT_MIN, INT_MAX, V|D|E, "idct"},
+{"simpleauto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEAUTO }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"slice_count", NULL, OFFSET(slice_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
+{"ec", "set error concealment strategy", OFFSET(error_concealment), AV_OPT_TYPE_FLAGS, {.i64 = 3 }, INT_MIN, INT_MAX, V|D, "ec"},
+{"guess_mvs", "iterative motion vector (MV) search (slow)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_EC_GUESS_MVS }, INT_MIN, INT_MAX, V|D, "ec"},
+{"deblock", "use strong deblock filter for damaged MBs", 0, AV_OPT_TYPE_CONST, {.i64 = FF_EC_DEBLOCK }, INT_MIN, INT_MAX, V|D, "ec"},
+{"favor_inter", "favor predicting from the previous frame", 0, AV_OPT_TYPE_CONST, {.i64 = FF_EC_FAVOR_INTER }, INT_MIN, INT_MAX, V|D, "ec"},
+{"bits_per_coded_sample", NULL, OFFSET(bits_per_coded_sample), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX},
+{"aspect", "sample aspect ratio", OFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL, {.dbl = 0}, 0, 10, V|E},
+{"sar",    "sample aspect ratio", OFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL, {.dbl = 0}, 0, 10, V|E},
+{"debug", "print specific debug info", OFFSET(debug), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, 0, INT_MAX, V|A|S|E|D, "debug"},
+{"pict", "picture info", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_PICT_INFO }, INT_MIN, INT_MAX, V|D, "debug"},
+{"rc", "rate control", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_RC }, INT_MIN, INT_MAX, V|E, "debug"},
+{"bitstream", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_BITSTREAM }, INT_MIN, INT_MAX, V|D, "debug"},
+{"mb_type", "macroblock (MB) type", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_MB_TYPE }, INT_MIN, INT_MAX, V|D, "debug"},
+{"qp", "per-block quantization parameter (QP)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_QP }, INT_MIN, INT_MAX, V|D, "debug"},
+{"dct_coeff", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_DCT_COEFF }, INT_MIN, INT_MAX, V|D, "debug"},
+{"green_metadata", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_GREEN_MD }, INT_MIN, INT_MAX, V|D, "debug"},
+{"skip", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_SKIP }, INT_MIN, INT_MAX, V|D, "debug"},
+{"startcode", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_STARTCODE }, INT_MIN, INT_MAX, V|D, "debug"},
+{"er", "error recognition", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_ER }, INT_MIN, INT_MAX, V|D, "debug"},
+{"mmco", "memory management control operations (H.264)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_MMCO }, INT_MIN, INT_MAX, V|D, "debug"},
+{"bugs", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_BUGS }, INT_MIN, INT_MAX, V|D, "debug"},
+{"buffers", "picture buffer allocations", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_BUFFERS }, INT_MIN, INT_MAX, V|D, "debug"},
+{"thread_ops", "threading operations", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_THREADS }, INT_MIN, INT_MAX, V|A|D, "debug"},
+{"nomc", "skip motion compensation", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_NOMC }, INT_MIN, INT_MAX, V|A|D, "debug"},
+{"dia_size", "diamond type & size for motion estimation", OFFSET(dia_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"last_pred", "amount of motion predictors from the previous frame", OFFSET(last_predictor_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"pre_dia_size", "diamond type & size for motion estimation pre-pass", OFFSET(pre_dia_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"subq", "sub-pel motion estimation quality", OFFSET(me_subpel_quality), AV_OPT_TYPE_INT, {.i64 = 8 }, INT_MIN, INT_MAX, V|E},
+{"me_range", "limit motion vectors range (1023 for DivX player)", OFFSET(me_range), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"global_quality", NULL, OFFSET(global_quality), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
+{"slice_flags", NULL, OFFSET(slice_flags), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
+{"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, 2, V|E, "mbd"},
+{"simple", "use mbcmp", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_SIMPLE }, INT_MIN, INT_MAX, V|E, "mbd"},
+{"bits", "use fewest bits", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_BITS }, INT_MIN, INT_MAX, V|E, "mbd"},
+{"rd", "use best rate distortion", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_RD }, INT_MIN, INT_MAX, V|E, "mbd"},
+{"rc_init_occupancy", "number of bits which should be loaded into the rc buffer before decoding starts", OFFSET(rc_initial_buffer_occupancy), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"threads", "set the number of threads", OFFSET(thread_count), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, INT_MAX, V|A|E|D, "threads"},
+{"auto", "autodetect a suitable number of threads to use", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, V|E|D, "threads"},
+{"dc", "intra_dc_precision", OFFSET(intra_dc_precision), AV_OPT_TYPE_INT, {.i64 = 0 }, -8, 16, V|E},
+{"nssew", "nsse weight", OFFSET(nsse_weight), AV_OPT_TYPE_INT, {.i64 = 8 }, INT_MIN, INT_MAX, V|E},
+{"skip_top", "number of macroblock rows at the top which are skipped", OFFSET(skip_top), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|D},
+{"skip_bottom", "number of macroblock rows at the bottom which are skipped", OFFSET(skip_bottom), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|D},
+{"profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT, {.i64 = FF_PROFILE_UNKNOWN }, INT_MIN, INT_MAX, V|A|E|CC, "avctx.profile"},
+{"unknown", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "avctx.profile"},
+{"main10",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_HEVC_MAIN_10 }, INT_MIN, INT_MAX, V|E, "avctx.profile"},
+{"level", NULL, OFFSET(level), AV_OPT_TYPE_INT, {.i64 = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E|CC, "avctx.level"},
+{"unknown", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "avctx.level"},
+{"lowres", "decode at 1= 1/2, 2=1/4, 3=1/8 resolutions", OFFSET(lowres), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, V|A|D},
+{"cmp", "full-pel ME compare function", OFFSET(me_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"subcmp", "sub-pel ME compare function", OFFSET(me_sub_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"mbcmp", "macroblock compare function", OFFSET(mb_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"ildctcmp", "interlaced DCT compare function", OFFSET(ildct_cmp), AV_OPT_TYPE_INT, {.i64 = FF_CMP_VSAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"precmp", "pre motion estimation compare function", OFFSET(me_pre_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"sad", "sum of absolute differences, fast", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"sse", "sum of squared errors", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"satd", "sum of absolute Hadamard transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SATD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"dct", "sum of absolute DCT transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"psnr", "sum of squared quantization errors (avoid, low quality)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_PSNR }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"bit", "number of bits needed for the block", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_BIT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"rd", "rate distortion optimal, slow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_RD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"zero", "0", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_ZERO }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"vsad", "sum of absolute vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"vsse", "sum of squared vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"nsse", "noise preserving sum of squared differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_NSSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+#if CONFIG_SNOW_ENCODER
+{"w53", "5/3 wavelet, only used in snow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_W53 }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"w97", "9/7 wavelet, only used in snow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_W97 }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+#endif
+{"dctmax", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"msad", "sum of absolute differences, median predicted", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_MEDIAN_SAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"mblmin", "minimum macroblock Lagrange factor (VBR)", OFFSET(mb_lmin), AV_OPT_TYPE_INT, {.i64 = FF_QP2LAMBDA * 2 }, 1, FF_LAMBDA_MAX, V|E},
+{"mblmax", "maximum macroblock Lagrange factor (VBR)", OFFSET(mb_lmax), AV_OPT_TYPE_INT, {.i64 = FF_QP2LAMBDA * 31 }, 1, FF_LAMBDA_MAX, V|E},
+{"skip_loop_filter", "skip loop filtering process for the selected frames", OFFSET(skip_loop_filter), AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"skip_idct"       , "skip IDCT/dequantization for the selected frames",    OFFSET(skip_idct),        AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"skip_frame"      , "skip decoding for the selected frames",               OFFSET(skip_frame),       AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"none"            , "discard no frame",                    0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONE    }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"default"         , "discard useless frames",              0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"noref"           , "discard all non-reference frames",    0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONREF  }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"bidir"           , "discard all bidirectional frames",    0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_BIDIR   }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"nokey"           , "discard all frames except keyframes", 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONKEY  }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"nointra"         , "discard all frames except I frames",  0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONINTRA}, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"all"             , "discard all frames",                  0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_ALL     }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"bidir_refine", "refine the two motion vectors used in bidirectional macroblocks", OFFSET(bidir_refine), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, 4, V|E},
+{"keyint_min", "minimum interval between IDR-frames", OFFSET(keyint_min), AV_OPT_TYPE_INT, {.i64 = 25 }, INT_MIN, INT_MAX, V|E},
+{"refs", "reference frames to consider for motion compensation", OFFSET(refs), AV_OPT_TYPE_INT, {.i64 = 1 }, INT_MIN, INT_MAX, V|E},
+{"trellis", "rate-distortion optimal quantization", OFFSET(trellis), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
+{"mv0_threshold", NULL, OFFSET(mv0_threshold), AV_OPT_TYPE_INT, {.i64 = 256 }, 0, INT_MAX, V|E},
+{"compression_level", NULL, OFFSET(compression_level), AV_OPT_TYPE_INT, {.i64 = FF_COMPRESSION_DEFAULT }, INT_MIN, INT_MAX, V|A|E},
+{"bits_per_raw_sample", NULL, OFFSET(bits_per_raw_sample), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX},
+{"ch_layout", NULL, OFFSET(ch_layout), AV_OPT_TYPE_CHLAYOUT, {.str = NULL }, 0, 0, A|E|D, "ch_layout"},
+#if FF_API_OLD_CHANNEL_LAYOUT
+{"channel_layout", NULL, OFFSET(channel_layout), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64 = DEFAULT }, 0, UINT64_MAX, A|E|D, "channel_layout"},
+{"request_channel_layout", NULL, OFFSET(request_channel_layout), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64 = DEFAULT }, 0, UINT64_MAX, A|D, "request_channel_layout"},
+#endif
+{"rc_max_vbv_use", NULL, OFFSET(rc_max_available_vbv_use), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, 0.0, FLT_MAX, V|E},
+{"rc_min_vbv_use", NULL, OFFSET(rc_min_vbv_overflow_use),  AV_OPT_TYPE_FLOAT, {.dbl = 3 },     0.0, FLT_MAX, V|E},
+{"ticks_per_frame", NULL, OFFSET(ticks_per_frame), AV_OPT_TYPE_INT, {.i64 = 1 }, 1, INT_MAX, A|V|E|D},
+{"color_primaries", "color primaries", OFFSET(color_primaries), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_UNSPECIFIED }, 1, INT_MAX, V|E|D, "color_primaries_type"},
+{"bt709",       "BT.709",         0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709 },        INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"unknown",     "Unspecified",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_UNSPECIFIED },  INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"bt470m",      "BT.470 M",       0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT470M },       INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"bt470bg",     "BT.470 BG",      0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT470BG },      INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"smpte170m",   "SMPTE 170 M",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE170M },    INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"smpte240m",   "SMPTE 240 M",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE240M },    INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"film",        "Film",           0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_FILM },         INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"bt2020",      "BT.2020",        0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020 },       INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"smpte428",    "SMPTE 428-1",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE428 },     INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"smpte428_1",  "SMPTE 428-1",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE428 },     INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"smpte431",    "SMPTE 431-2",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE431 },     INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"smpte432",    "SMPTE 422-1",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE432 },     INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"jedec-p22",   "JEDEC P22",      0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_JEDEC_P22 },    INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"ebu3213",     "EBU 3213-E",     0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_EBU3213 },      INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"unspecified", "Unspecified",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_UNSPECIFIED },  INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"color_trc", "color transfer characteristics", OFFSET(color_trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_UNSPECIFIED }, 1, INT_MAX, V|E|D, "color_trc_type"},
+{"bt709",        "BT.709",           0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709 },        INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"unknown",      "Unspecified",      0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_UNSPECIFIED },  INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"gamma22",      "BT.470 M",         0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_GAMMA22 },      INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"gamma28",      "BT.470 BG",        0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_GAMMA28 },      INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"smpte170m",    "SMPTE 170 M",      0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTE170M },    INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"smpte240m",    "SMPTE 240 M",      0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTE240M },    INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"linear",       "Linear",           0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_LINEAR },       INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"log100",       "Log",              0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_LOG },          INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"log316",       "Log square root",  0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_LOG_SQRT },     INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"iec61966-2-4", "IEC 61966-2-4",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_IEC61966_2_4 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"bt1361e",      "BT.1361",          0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT1361_ECG },   INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"iec61966-2-1", "IEC 61966-2-1",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_IEC61966_2_1 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"bt2020-10",    "BT.2020 - 10 bit", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10 },    INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"bt2020-12",    "BT.2020 - 12 bit", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_12 },    INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"smpte2084",    "SMPTE 2084",       0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTE2084 },    INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"smpte428",     "SMPTE 428-1",      0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTE428 },     INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"arib-std-b67", "ARIB STD-B67",     0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_ARIB_STD_B67 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"unspecified",  "Unspecified",      0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_UNSPECIFIED },  INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"log",          "Log",              0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_LOG },          INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"log_sqrt",     "Log square root",  0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_LOG_SQRT },     INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"iec61966_2_4", "IEC 61966-2-4",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_IEC61966_2_4 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"bt1361",       "BT.1361",          0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT1361_ECG },   INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"iec61966_2_1", "IEC 61966-2-1",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_IEC61966_2_1 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"bt2020_10bit", "BT.2020 - 10 bit", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10 },    INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"bt2020_12bit", "BT.2020 - 12 bit", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_12 },    INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"smpte428_1",   "SMPTE 428-1",      0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTE428 },     INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"colorspace", "color space", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_UNSPECIFIED }, 0, INT_MAX, V|E|D, "colorspace_type"},
+{"rgb",               "RGB",                0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_RGB },                INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"bt709",             "BT.709",             0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709 },              INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"unknown",           "Unspecified",        0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_UNSPECIFIED },        INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"fcc",               "FCC",                0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_FCC },                INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"bt470bg",           "BT.470 BG",          0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT470BG },            INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"smpte170m",         "SMPTE 170 M",        0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_SMPTE170M },          INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"smpte240m",         "SMPTE 240 M",        0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_SMPTE240M },          INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"ycgco",             "YCGCO",              0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_YCGCO },              INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"bt2020nc",          "BT.2020 NCL",        0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL },         INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"bt2020c",           "BT.2020 CL",         0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_CL },          INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"smpte2085",         "SMPTE 2085",         0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_SMPTE2085 },          INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"chroma-derived-nc", "Chroma-derived NCL", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_CHROMA_DERIVED_NCL }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"chroma-derived-c",  "Chroma-derived CL",  0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_CHROMA_DERIVED_CL },  INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"ictcp",             "ICtCp",              0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_ICTCP },              INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"unspecified",       "Unspecified",        0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_UNSPECIFIED },        INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"ycocg",             "YCGCO",              0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_YCGCO },              INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"bt2020_ncl",        "BT.2020 NCL",        0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL },         INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"bt2020_cl",         "BT.2020 CL",         0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_CL },          INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
+{"color_range", "color range", OFFSET(color_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED }, 0, INT_MAX, V|E|D, "color_range_type"},
+{"unknown", "Unspecified",     0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "color_range_type"},
+{"tv", "MPEG (219*2^(n-8))",   0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG },        INT_MIN, INT_MAX, V|E|D, "color_range_type"},
+{"pc", "JPEG (2^n-1)",         0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG },        INT_MIN, INT_MAX, V|E|D, "color_range_type"},
+{"unspecified", "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "color_range_type"},
+{"mpeg", "MPEG (219*2^(n-8))", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG },        INT_MIN, INT_MAX, V|E|D, "color_range_type"},
+{"jpeg", "JPEG (2^n-1)",       0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG },        INT_MIN, INT_MAX, V|E|D, "color_range_type"},
+{"chroma_sample_location", "chroma sample location", OFFSET(chroma_sample_location), AV_OPT_TYPE_INT, {.i64 = AVCHROMA_LOC_UNSPECIFIED }, 0, INT_MAX, V|E|D, "chroma_sample_location_type"},
+{"unknown",     "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"},
+{"left",        "Left",        0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_LEFT },        INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"},
+{"center",      "Center",      0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_CENTER },      INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"},
+{"topleft",     "Top-left",    0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_TOPLEFT },     INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"},
+{"top",         "Top",         0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_TOP },         INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"},
+{"bottomleft",  "Bottom-left", 0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_BOTTOMLEFT },  INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"},
+{"bottom",      "Bottom",      0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_BOTTOM },      INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"},
+{"unspecified", "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"},
+{"log_level_offset", "set the log level offset", OFFSET(log_level_offset), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX },
+{"slices", "set the number of slices, used in parallelized encoding", OFFSET(slices), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, V|E},
+{"thread_type", "select multithreading type", OFFSET(thread_type), AV_OPT_TYPE_FLAGS, {.i64 = FF_THREAD_SLICE|FF_THREAD_FRAME }, 0, INT_MAX, V|A|E|D, "thread_type"},
+{"slice", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_THREAD_SLICE }, INT_MIN, INT_MAX, V|E|D, "thread_type"},
+{"frame", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_THREAD_FRAME }, INT_MIN, INT_MAX, V|E|D, "thread_type"},
+{"audio_service_type", "audio service type", OFFSET(audio_service_type), AV_OPT_TYPE_INT, {.i64 = AV_AUDIO_SERVICE_TYPE_MAIN }, 0, AV_AUDIO_SERVICE_TYPE_NB-1, A|E, "audio_service_type"},
+{"ma", "Main Audio Service", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_MAIN },              INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"ef", "Effects",            0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_EFFECTS },           INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"vi", "Visually Impaired",  0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_VISUALLY_IMPAIRED }, INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"hi", "Hearing Impaired",   0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_HEARING_IMPAIRED },  INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"di", "Dialogue",           0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_DIALOGUE },          INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"co", "Commentary",         0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_COMMENTARY },        INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"em", "Emergency",          0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_EMERGENCY },         INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"vo", "Voice Over",         0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_VOICE_OVER },        INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"ka", "Karaoke",            0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_KARAOKE },           INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT, {.i64=AV_SAMPLE_FMT_NONE}, -1, INT_MAX, A|D, "request_sample_fmt"},
+{"pkt_timebase", NULL, OFFSET(pkt_timebase), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0},
+{"sub_charenc", "set input text subtitles character encoding", OFFSET(sub_charenc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, S|D},
+{"sub_charenc_mode", "set input text subtitles character encoding mode", OFFSET(sub_charenc_mode), AV_OPT_TYPE_FLAGS, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC}, -1, INT_MAX, S|D, "sub_charenc_mode"},
+{"do_nothing",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_DO_NOTHING},  INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
+{"auto",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC},   INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
+{"pre_decoder", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_PRE_DECODER}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
+{"ignore",      NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_IGNORE},      INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
+{"apply_cropping", NULL, OFFSET(apply_cropping), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, V | D },
+{"skip_alpha", "Skip processing alpha", OFFSET(skip_alpha), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, V|D },
+{"field_order", "Field order", OFFSET(field_order), AV_OPT_TYPE_INT, {.i64 = AV_FIELD_UNKNOWN }, 0, 5, V|D|E, "field_order" },
+{"progressive", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_PROGRESSIVE }, 0, 0, V|D|E, "field_order" },
+{"tt", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_TT }, 0, 0, V|D|E, "field_order" },
+{"bb", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_BB }, 0, 0, V|D|E, "field_order" },
+{"tb", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_TB }, 0, 0, V|D|E, "field_order" },
+{"bt", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_BT }, 0, 0, V|D|E, "field_order" },
+{"dump_separator", "set information dump field separator", OFFSET(dump_separator), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, A|V|S|D|E},
+{"codec_whitelist", "List of decoders that are allowed to be used", OFFSET(codec_whitelist), AV_OPT_TYPE_STRING, { .str = NULL },  0, 0, A|V|S|D },
+{"pixel_format", "set pixel format", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64=AV_PIX_FMT_NONE}, -1, INT_MAX, 0 },
+{"video_size", "set video size", OFFSET(width), AV_OPT_TYPE_IMAGE_SIZE, {.str=NULL}, 0, INT_MAX, 0 },
+{"max_pixels", "Maximum number of pixels", OFFSET(max_pixels), AV_OPT_TYPE_INT64, {.i64 = INT_MAX }, 0, INT_MAX, A|V|S|D|E },
+{"max_samples", "Maximum number of samples", OFFSET(max_samples), AV_OPT_TYPE_INT64, {.i64 = INT_MAX }, 0, INT_MAX, A|D|E },
+{"hwaccel_flags", NULL, OFFSET(hwaccel_flags), AV_OPT_TYPE_FLAGS, {.i64 = AV_HWACCEL_FLAG_IGNORE_LEVEL }, 0, UINT_MAX, V|D, "hwaccel_flags"},
+{"ignore_level", "ignore level even if the codec level used is unknown or higher than the maximum supported level reported by the hardware driver", 0, AV_OPT_TYPE_CONST, { .i64 = AV_HWACCEL_FLAG_IGNORE_LEVEL }, INT_MIN, INT_MAX, V | D, "hwaccel_flags" },
+{"allow_high_depth", "allow to output YUV pixel formats with a different chroma sampling than 4:2:0 and/or other than 8 bits per component", 0, AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_ALLOW_HIGH_DEPTH }, INT_MIN, INT_MAX, V | D, "hwaccel_flags"},
+{"allow_profile_mismatch", "attempt to decode anyway if HW accelerated decoder's supported profiles do not exactly match the stream", 0, AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH }, INT_MIN, INT_MAX, V | D, "hwaccel_flags"},
+{"unsafe_output", "allow potentially unsafe hwaccel frame output that might require special care to process successfully", 0, AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_UNSAFE_OUTPUT }, INT_MIN, INT_MAX, V | D, "hwaccel_flags"},
+{"extra_hw_frames", "Number of extra hardware frames to allocate for the user", OFFSET(extra_hw_frames), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, V|D },
+{"discard_damaged_percentage", "Percentage of damaged samples to discard a frame", OFFSET(discard_damaged_percentage), AV_OPT_TYPE_INT, {.i64 = 95 }, 0, 100, V|D },
+{NULL},
+};
+
+#undef A
+#undef V
+#undef S
+#undef E
+#undef D
+#undef CC
+#undef DEFAULT
+#undef OFFSET
+
+#endif /* AVCODEC_OPTIONS_TABLE_H */
diff --git a/media/ffvpx/libavcodec/packet.h b/media/ffvpx/libavcodec/packet.h
new file mode 100644
index 0000000000..f28e7e7011
--- /dev/null
+++ b/media/ffvpx/libavcodec/packet.h
@@ -0,0 +1,731 @@
+/*
+ * AVPacket public API
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PACKET_H
+#define AVCODEC_PACKET_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/buffer.h"
+#include "libavutil/dict.h"
+#include "libavutil/rational.h"
+#include "libavutil/version.h"
+
+#include "libavcodec/version_major.h"
+
+/**
+ * @defgroup lavc_packet AVPacket
+ *
+ * Types and functions for working with AVPacket.
+ * @{
+ */
+enum AVPacketSideDataType {
+    /**
+     * An AV_PKT_DATA_PALETTE side data packet contains exactly AVPALETTE_SIZE
+     * bytes worth of palette. This side data signals that a new palette is
+     * present.
+     */
+    AV_PKT_DATA_PALETTE,
+
+    /**
+     * The AV_PKT_DATA_NEW_EXTRADATA is used to notify the codec or the format
+     * that the extradata buffer was changed and the receiving side should
+     * act upon it appropriately. The new extradata is embedded in the side
+     * data buffer and should be immediately used for processing the current
+     * frame or packet.
+     */
+    AV_PKT_DATA_NEW_EXTRADATA,
+
+    /**
+     * An AV_PKT_DATA_PARAM_CHANGE side data packet is laid out as follows:
+     * @code
+     * u32le param_flags
+     * if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT)
+     *     s32le channel_count
+     * if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT)
+     *     u64le channel_layout
+     * if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE)
+     *     s32le sample_rate
+     * if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS)
+     *     s32le width
+     *     s32le height
+     * @endcode
+     */
+    AV_PKT_DATA_PARAM_CHANGE,
+
+    /**
+     * An AV_PKT_DATA_H263_MB_INFO side data packet contains a number of
+     * structures with info about macroblocks relevant to splitting the
+     * packet into smaller packets on macroblock edges (e.g. as for RFC 2190).
+     * That is, it does not necessarily contain info about all macroblocks,
+     * as long as the distance between macroblocks in the info is smaller
+     * than the target payload size.
+     * Each MB info structure is 12 bytes, and is laid out as follows:
+     * @code
+     * u32le bit offset from the start of the packet
+     * u8    current quantizer at the start of the macroblock
+     * u8    GOB number
+     * u16le macroblock address within the GOB
+     * u8    horizontal MV predictor
+     * u8    vertical MV predictor
+     * u8    horizontal MV predictor for block number 3
+     * u8    vertical MV predictor for block number 3
+     * @endcode
+     */
+    AV_PKT_DATA_H263_MB_INFO,
+
+    /**
+     * This side data should be associated with an audio stream and contains
+     * ReplayGain information in form of the AVReplayGain struct.
+     */
+    AV_PKT_DATA_REPLAYGAIN,
+
+    /**
+     * This side data contains a 3x3 transformation matrix describing an affine
+     * transformation that needs to be applied to the decoded video frames for
+     * correct presentation.
+     *
+     * See libavutil/display.h for a detailed description of the data.
+     */
+    AV_PKT_DATA_DISPLAYMATRIX,
+
+    /**
+     * This side data should be associated with a video stream and contains
+     * Stereoscopic 3D information in form of the AVStereo3D struct.
+     */
+    AV_PKT_DATA_STEREO3D,
+
+    /**
+     * This side data should be associated with an audio stream and corresponds
+     * to enum AVAudioServiceType.
+     */
+    AV_PKT_DATA_AUDIO_SERVICE_TYPE,
+
+    /**
+     * This side data contains quality related information from the encoder.
+     * @code
+     * u32le quality factor of the compressed frame. Allowed range is between 1 (good) and FF_LAMBDA_MAX (bad).
+     * u8    picture type
+     * u8    error count
+     * u16   reserved
+     * u64le[error count] sum of squared differences between encoder in and output
+     * @endcode
+     */
+    AV_PKT_DATA_QUALITY_STATS,
+
+    /**
+     * This side data contains an integer value representing the stream index
+     * of a "fallback" track.  A fallback track indicates an alternate
+     * track to use when the current track can not be decoded for some reason.
+     * e.g. no decoder available for codec.
+     */
+    AV_PKT_DATA_FALLBACK_TRACK,
+
+    /**
+     * This side data corresponds to the AVCPBProperties struct.
+     */
+    AV_PKT_DATA_CPB_PROPERTIES,
+
+    /**
+     * Recommmends skipping the specified number of samples
+     * @code
+     * u32le number of samples to skip from start of this packet
+     * u32le number of samples to skip from end of this packet
+     * u8    reason for start skip
+     * u8    reason for end   skip (0=padding silence, 1=convergence)
+     * @endcode
+     */
+    AV_PKT_DATA_SKIP_SAMPLES,
+
+    /**
+     * An AV_PKT_DATA_JP_DUALMONO side data packet indicates that
+     * the packet may contain "dual mono" audio specific to Japanese DTV
+     * and if it is true, recommends only the selected channel to be used.
+     * @code
+     * u8    selected channels (0=main/left, 1=sub/right, 2=both)
+     * @endcode
+     */
+    AV_PKT_DATA_JP_DUALMONO,
+
+    /**
+     * A list of zero terminated key/value strings. There is no end marker for
+     * the list, so it is required to rely on the side data size to stop.
+     */
+    AV_PKT_DATA_STRINGS_METADATA,
+
+    /**
+     * Subtitle event position
+     * @code
+     * u32le x1
+     * u32le y1
+     * u32le x2
+     * u32le y2
+     * @endcode
+     */
+    AV_PKT_DATA_SUBTITLE_POSITION,
+
+    /**
+     * Data found in BlockAdditional element of matroska container. There is
+     * no end marker for the data, so it is required to rely on the side data
+     * size to recognize the end. 8 byte id (as found in BlockAddId) followed
+     * by data.
+     */
+    AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,
+
+    /**
+     * The optional first identifier line of a WebVTT cue.
+     */
+    AV_PKT_DATA_WEBVTT_IDENTIFIER,
+
+    /**
+     * The optional settings (rendering instructions) that immediately
+     * follow the timestamp specifier of a WebVTT cue.
+     */
+    AV_PKT_DATA_WEBVTT_SETTINGS,
+
+    /**
+     * A list of zero terminated key/value strings. There is no end marker for
+     * the list, so it is required to rely on the side data size to stop. This
+     * side data includes updated metadata which appeared in the stream.
+     */
+    AV_PKT_DATA_METADATA_UPDATE,
+
+    /**
+     * MPEGTS stream ID as uint8_t, this is required to pass the stream ID
+     * information from the demuxer to the corresponding muxer.
+     */
+    AV_PKT_DATA_MPEGTS_STREAM_ID,
+
+    /**
+     * Mastering display metadata (based on SMPTE-2086:2014). This metadata
+     * should be associated with a video stream and contains data in the form
+     * of the AVMasteringDisplayMetadata struct.
+     */
+    AV_PKT_DATA_MASTERING_DISPLAY_METADATA,
+
+    /**
+     * This side data should be associated with a video stream and corresponds
+     * to the AVSphericalMapping structure.
+     */
+    AV_PKT_DATA_SPHERICAL,
+
+    /**
+     * Content light level (based on CTA-861.3). This metadata should be
+     * associated with a video stream and contains data in the form of the
+     * AVContentLightMetadata struct.
+     */
+    AV_PKT_DATA_CONTENT_LIGHT_LEVEL,
+
+    /**
+     * ATSC A53 Part 4 Closed Captions. This metadata should be associated with
+     * a video stream. A53 CC bitstream is stored as uint8_t in AVPacketSideData.data.
+     * The number of bytes of CC data is AVPacketSideData.size.
+     */
+    AV_PKT_DATA_A53_CC,
+
+    /**
+     * This side data is encryption initialization data.
+     * The format is not part of ABI, use av_encryption_init_info_* methods to
+     * access.
+     */
+    AV_PKT_DATA_ENCRYPTION_INIT_INFO,
+
+    /**
+     * This side data contains encryption info for how to decrypt the packet.
+     * The format is not part of ABI, use av_encryption_info_* methods to access.
+     */
+    AV_PKT_DATA_ENCRYPTION_INFO,
+
+    /**
+     * Active Format Description data consisting of a single byte as specified
+     * in ETSI TS 101 154 using AVActiveFormatDescription enum.
+     */
+    AV_PKT_DATA_AFD,
+
+    /**
+     * Producer Reference Time data corresponding to the AVProducerReferenceTime struct,
+     * usually exported by some encoders (on demand through the prft flag set in the
+     * AVCodecContext export_side_data field).
+     */
+    AV_PKT_DATA_PRFT,
+
+    /**
+     * ICC profile data consisting of an opaque octet buffer following the
+     * format described by ISO 15076-1.
+     */
+    AV_PKT_DATA_ICC_PROFILE,
+
+    /**
+     * DOVI configuration
+     * ref:
+     * dolby-vision-bitstreams-within-the-iso-base-media-file-format-v2.1.2, section 2.2
+     * dolby-vision-bitstreams-in-mpeg-2-transport-stream-multiplex-v1.2, section 3.3
+     * Tags are stored in struct AVDOVIDecoderConfigurationRecord.
+     */
+    AV_PKT_DATA_DOVI_CONF,
+
+    /**
+     * Timecode which conforms to SMPTE ST 12-1:2014. The data is an array of 4 uint32_t
+     * where the first uint32_t describes how many (1-3) of the other timecodes are used.
+     * The timecode format is described in the documentation of av_timecode_get_smpte_from_framenum()
+     * function in libavutil/timecode.h.
+     */
+    AV_PKT_DATA_S12M_TIMECODE,
+
+    /**
+     * HDR10+ dynamic metadata associated with a video frame. The metadata is in
+     * the form of the AVDynamicHDRPlus struct and contains
+     * information for color volume transform - application 4 of
+     * SMPTE 2094-40:2016 standard.
+     */
+    AV_PKT_DATA_DYNAMIC_HDR10_PLUS,
+
+    /**
+     * The number of side data types.
+     * This is not part of the public API/ABI in the sense that it may
+     * change when new side data types are added.
+     * This must stay the last enum value.
+     * If its value becomes huge, some code using it
+     * needs to be updated as it assumes it to be smaller than other limits.
+     */
+    AV_PKT_DATA_NB
+};
+
+#define AV_PKT_DATA_QUALITY_FACTOR AV_PKT_DATA_QUALITY_STATS //DEPRECATED
+
+typedef struct AVPacketSideData {
+    uint8_t *data;
+    size_t   size;
+    enum AVPacketSideDataType type;
+} AVPacketSideData;
+
+/**
+ * This structure stores compressed data. It is typically exported by demuxers
+ * and then passed as input to decoders, or received as output from encoders and
+ * then passed to muxers.
+ *
+ * For video, it should typically contain one compressed frame. For audio it may
+ * contain several compressed frames. Encoders are allowed to output empty
+ * packets, with no compressed data, containing only side data
+ * (e.g. to update some stream parameters at the end of encoding).
+ *
+ * The semantics of data ownership depends on the buf field.
+ * If it is set, the packet data is dynamically allocated and is
+ * valid indefinitely until a call to av_packet_unref() reduces the
+ * reference count to 0.
+ *
+ * If the buf field is not set av_packet_ref() would make a copy instead
+ * of increasing the reference count.
+ *
+ * The side data is always allocated with av_malloc(), copied by
+ * av_packet_ref() and freed by av_packet_unref().
+ *
+ * sizeof(AVPacket) being a part of the public ABI is deprecated. once
+ * av_init_packet() is removed, new packets will only be able to be allocated
+ * with av_packet_alloc(), and new fields may be added to the end of the struct
+ * with a minor bump.
+ *
+ * @see av_packet_alloc
+ * @see av_packet_ref
+ * @see av_packet_unref
+ */
+typedef struct AVPacket {
+    /**
+     * A reference to the reference-counted buffer where the packet data is
+     * stored.
+     * May be NULL, then the packet data is not reference-counted.
+     */
+    AVBufferRef *buf;
+    /**
+     * Presentation timestamp in AVStream->time_base units; the time at which
+     * the decompressed packet will be presented to the user.
+     * Can be AV_NOPTS_VALUE if it is not stored in the file.
+     * pts MUST be larger or equal to dts as presentation cannot happen before
+     * decompression, unless one wants to view hex dumps. Some formats misuse
+     * the terms dts and pts/cts to mean something different. Such timestamps
+     * must be converted to true pts/dts before they are stored in AVPacket.
+     */
+    int64_t pts;
+    /**
+     * Decompression timestamp in AVStream->time_base units; the time at which
+     * the packet is decompressed.
+     * Can be AV_NOPTS_VALUE if it is not stored in the file.
+     */
+    int64_t dts;
+    uint8_t *data;
+    int   size;
+    int   stream_index;
+    /**
+     * A combination of AV_PKT_FLAG values
+     */
+    int   flags;
+    /**
+     * Additional packet data that can be provided by the container.
+     * Packet can contain several types of side information.
+     */
+    AVPacketSideData *side_data;
+    int side_data_elems;
+
+    /**
+     * Duration of this packet in AVStream->time_base units, 0 if unknown.
+     * Equals next_pts - this_pts in presentation order.
+     */
+    int64_t duration;
+
+    int64_t pos;                            ///< byte position in stream, -1 if unknown
+
+    /**
+     * for some private data of the user
+     */
+    void *opaque;
+
+    /**
+     * AVBufferRef for free use by the API user. FFmpeg will never check the
+     * contents of the buffer ref. FFmpeg calls av_buffer_unref() on it when
+     * the packet is unreferenced. av_packet_copy_props() calls create a new
+     * reference with av_buffer_ref() for the target packet's opaque_ref field.
+     *
+     * This is unrelated to the opaque field, although it serves a similar
+     * purpose.
+     */
+    AVBufferRef *opaque_ref;
+
+    /**
+     * Time base of the packet's timestamps.
+     * In the future, this field may be set on packets output by encoders or
+     * demuxers, but its value will be by default ignored on input to decoders
+     * or muxers.
+     */
+    AVRational time_base;
+} AVPacket;
+
+#if FF_API_INIT_PACKET
+attribute_deprecated
+typedef struct AVPacketList {
+    AVPacket pkt;
+    struct AVPacketList *next;
+} AVPacketList;
+#endif
+
+#define AV_PKT_FLAG_KEY     0x0001 ///< The packet contains a keyframe
+#define AV_PKT_FLAG_CORRUPT 0x0002 ///< The packet content is corrupted
+/**
+ * Flag is used to discard packets which are required to maintain valid
+ * decoder state but are not required for output and should be dropped
+ * after decoding.
+ **/
+#define AV_PKT_FLAG_DISCARD   0x0004
+/**
+ * The packet comes from a trusted source.
+ *
+ * Otherwise-unsafe constructs such as arbitrary pointers to data
+ * outside the packet may be followed.
+ */
+#define AV_PKT_FLAG_TRUSTED   0x0008
+/**
+ * Flag is used to indicate packets that contain frames that can
+ * be discarded by the decoder.  I.e. Non-reference frames.
+ */
+#define AV_PKT_FLAG_DISPOSABLE 0x0010
+
+enum AVSideDataParamChangeFlags {
+#if FF_API_OLD_CHANNEL_LAYOUT
+    /**
+     * @deprecated those are not used by any decoder
+     */
+    AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT  = 0x0001,
+    AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT = 0x0002,
+#endif
+    AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE    = 0x0004,
+    AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS     = 0x0008,
+};
+
+/**
+ * Allocate an AVPacket and set its fields to default values.  The resulting
+ * struct must be freed using av_packet_free().
+ *
+ * @return An AVPacket filled with default values or NULL on failure.
+ *
+ * @note this only allocates the AVPacket itself, not the data buffers. Those
+ * must be allocated through other means such as av_new_packet.
+ *
+ * @see av_new_packet
+ */
+AVPacket *av_packet_alloc(void);
+
+/**
+ * Create a new packet that references the same data as src.
+ *
+ * This is a shortcut for av_packet_alloc()+av_packet_ref().
+ *
+ * @return newly created AVPacket on success, NULL on error.
+ *
+ * @see av_packet_alloc
+ * @see av_packet_ref
+ */
+AVPacket *av_packet_clone(const AVPacket *src);
+
+/**
+ * Free the packet, if the packet is reference counted, it will be
+ * unreferenced first.
+ *
+ * @param pkt packet to be freed. The pointer will be set to NULL.
+ * @note passing NULL is a no-op.
+ */
+void av_packet_free(AVPacket **pkt);
+
+#if FF_API_INIT_PACKET
+/**
+ * Initialize optional fields of a packet with default values.
+ *
+ * Note, this does not touch the data and size members, which have to be
+ * initialized separately.
+ *
+ * @param pkt packet
+ *
+ * @see av_packet_alloc
+ * @see av_packet_unref
+ *
+ * @deprecated This function is deprecated. Once it's removed,
+               sizeof(AVPacket) will not be a part of the ABI anymore.
+ */
+attribute_deprecated
+void av_init_packet(AVPacket *pkt);
+#endif
+
+/**
+ * Allocate the payload of a packet and initialize its fields with
+ * default values.
+ *
+ * @param pkt packet
+ * @param size wanted payload size
+ * @return 0 if OK, AVERROR_xxx otherwise
+ */
+int av_new_packet(AVPacket *pkt, int size);
+
+/**
+ * Reduce packet size, correctly zeroing padding
+ *
+ * @param pkt packet
+ * @param size new size
+ */
+void av_shrink_packet(AVPacket *pkt, int size);
+
+/**
+ * Increase packet size, correctly zeroing padding
+ *
+ * @param pkt packet
+ * @param grow_by number of bytes by which to increase the size of the packet
+ */
+int av_grow_packet(AVPacket *pkt, int grow_by);
+
+/**
+ * Initialize a reference-counted packet from av_malloc()ed data.
+ *
+ * @param pkt packet to be initialized. This function will set the data, size,
+ *        and buf fields, all others are left untouched.
+ * @param data Data allocated by av_malloc() to be used as packet data. If this
+ *        function returns successfully, the data is owned by the underlying AVBuffer.
+ *        The caller may not access the data through other means.
+ * @param size size of data in bytes, without the padding. I.e. the full buffer
+ *        size is assumed to be size + AV_INPUT_BUFFER_PADDING_SIZE.
+ *
+ * @return 0 on success, a negative AVERROR on error
+ */
+int av_packet_from_data(AVPacket *pkt, uint8_t *data, int size);
+
+/**
+ * Allocate new information of a packet.
+ *
+ * @param pkt packet
+ * @param type side information type
+ * @param size side information size
+ * @return pointer to fresh allocated data or NULL otherwise
+ */
+uint8_t* av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                                 size_t size);
+
+/**
+ * Wrap an existing array as a packet side data.
+ *
+ * @param pkt packet
+ * @param type side information type
+ * @param data the side data array. It must be allocated with the av_malloc()
+ *             family of functions. The ownership of the data is transferred to
+ *             pkt.
+ * @param size side information size
+ * @return a non-negative number on success, a negative AVERROR code on
+ *         failure. On failure, the packet is unchanged and the data remains
+ *         owned by the caller.
+ */
+int av_packet_add_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                            uint8_t *data, size_t size);
+
+/**
+ * Shrink the already allocated side data buffer
+ *
+ * @param pkt packet
+ * @param type side information type
+ * @param size new side information size
+ * @return 0 on success, < 0 on failure
+ */
+int av_packet_shrink_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                               size_t size);
+
+/**
+ * Get side information from packet.
+ *
+ * @param pkt packet
+ * @param type desired side information type
+ * @param size If supplied, *size will be set to the size of the side data
+ *             or to zero if the desired side data is not present.
+ * @return pointer to data if present or NULL otherwise
+ */
+uint8_t* av_packet_get_side_data(const AVPacket *pkt, enum AVPacketSideDataType type,
+                                 size_t *size);
+
+const char *av_packet_side_data_name(enum AVPacketSideDataType type);
+
+/**
+ * Pack a dictionary for use in side_data.
+ *
+ * @param dict The dictionary to pack.
+ * @param size pointer to store the size of the returned data
+ * @return pointer to data if successful, NULL otherwise
+ */
+uint8_t *av_packet_pack_dictionary(AVDictionary *dict, size_t *size);
+/**
+ * Unpack a dictionary from side_data.
+ *
+ * @param data data from side_data
+ * @param size size of the data
+ * @param dict the metadata storage dictionary
+ * @return 0 on success, < 0 on failure
+ */
+int av_packet_unpack_dictionary(const uint8_t *data, size_t size,
+                                AVDictionary **dict);
+
+/**
+ * Convenience function to free all the side data stored.
+ * All the other fields stay untouched.
+ *
+ * @param pkt packet
+ */
+void av_packet_free_side_data(AVPacket *pkt);
+
+/**
+ * Setup a new reference to the data described by a given packet
+ *
+ * If src is reference-counted, setup dst as a new reference to the
+ * buffer in src. Otherwise allocate a new buffer in dst and copy the
+ * data from src into it.
+ *
+ * All the other fields are copied from src.
+ *
+ * @see av_packet_unref
+ *
+ * @param dst Destination packet. Will be completely overwritten.
+ * @param src Source packet
+ *
+ * @return 0 on success, a negative AVERROR on error. On error, dst
+ *         will be blank (as if returned by av_packet_alloc()).
+ */
+int av_packet_ref(AVPacket *dst, const AVPacket *src);
+
+/**
+ * Wipe the packet.
+ *
+ * Unreference the buffer referenced by the packet and reset the
+ * remaining packet fields to their default values.
+ *
+ * @param pkt The packet to be unreferenced.
+ */
+void av_packet_unref(AVPacket *pkt);
+
+/**
+ * Move every field in src to dst and reset src.
+ *
+ * @see av_packet_unref
+ *
+ * @param src Source packet, will be reset
+ * @param dst Destination packet
+ */
+void av_packet_move_ref(AVPacket *dst, AVPacket *src);
+
+/**
+ * Copy only "properties" fields from src to dst.
+ *
+ * Properties for the purpose of this function are all the fields
+ * beside those related to the packet data (buf, data, size)
+ *
+ * @param dst Destination packet
+ * @param src Source packet
+ *
+ * @return 0 on success AVERROR on failure.
+ */
+int av_packet_copy_props(AVPacket *dst, const AVPacket *src);
+
+/**
+ * Ensure the data described by a given packet is reference counted.
+ *
+ * @note This function does not ensure that the reference will be writable.
+ *       Use av_packet_make_writable instead for that purpose.
+ *
+ * @see av_packet_ref
+ * @see av_packet_make_writable
+ *
+ * @param pkt packet whose data should be made reference counted.
+ *
+ * @return 0 on success, a negative AVERROR on error. On failure, the
+ *         packet is unchanged.
+ */
+int av_packet_make_refcounted(AVPacket *pkt);
+
+/**
+ * Create a writable reference for the data described by a given packet,
+ * avoiding data copy if possible.
+ *
+ * @param pkt Packet whose data should be made writable.
+ *
+ * @return 0 on success, a negative AVERROR on failure. On failure, the
+ *         packet is unchanged.
+ */
+int av_packet_make_writable(AVPacket *pkt);
+
+/**
+ * Convert valid timing fields (timestamps / durations) in a packet from one
+ * timebase to another. Timestamps with unknown values (AV_NOPTS_VALUE) will be
+ * ignored.
+ *
+ * @param pkt packet on which the conversion will be performed
+ * @param tb_src source timebase, in which the timing fields in pkt are
+ *               expressed
+ * @param tb_dst destination timebase, to which the timing fields will be
+ *               converted
+ */
+void av_packet_rescale_ts(AVPacket *pkt, AVRational tb_src, AVRational tb_dst);
+
+/**
+ * @}
+ */
+
+#endif // AVCODEC_PACKET_H
diff --git a/media/ffvpx/libavcodec/packet_internal.h b/media/ffvpx/libavcodec/packet_internal.h
new file mode 100644
index 0000000000..92a0d4e6d5
--- /dev/null
+++ b/media/ffvpx/libavcodec/packet_internal.h
@@ -0,0 +1,73 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PACKET_INTERNAL_H
+#define AVCODEC_PACKET_INTERNAL_H
+
+#include <stdint.h>
+
+#include "packet.h"
+
+typedef struct PacketListEntry {
+    struct PacketListEntry *next;
+    AVPacket pkt;
+} PacketListEntry;
+
+typedef struct PacketList {
+    PacketListEntry *head, *tail;
+} PacketList;
+
+/**
+ * Append an AVPacket to the list.
+ *
+ * @param list  A PacketList
+ * @param pkt   The packet being appended. The data described in it will
+ *              be made reference counted if it isn't already.
+ * @param copy  A callback to copy the contents of the packet to the list.
+                May be null, in which case the packet's reference will be
+                moved to the list.
+ * @return 0 on success, negative AVERROR value on failure. On failure,
+           the packet and the list are unchanged.
+ */
+int avpriv_packet_list_put(PacketList *list, AVPacket *pkt,
+                           int (*copy)(AVPacket *dst, const AVPacket *src),
+                           int flags);
+
+/**
+ * Remove the oldest AVPacket in the list and return it.
+ *
+ * @note The pkt will be overwritten completely on success. The caller
+ *       owns the packet and must unref it by itself.
+ *
+ * @param head A pointer to a PacketList struct
+ * @param pkt  Pointer to an AVPacket struct
+ * @return 0 on success, and a packet is returned. AVERROR(EAGAIN) if
+ *         the list was empty.
+ */
+int avpriv_packet_list_get(PacketList *list, AVPacket *pkt);
+
+/**
+ * Wipe the list and unref all the packets in it.
+ */
+void avpriv_packet_list_free(PacketList *list);
+
+int ff_side_data_set_encoder_stats(AVPacket *pkt, int quality, int64_t *error, int error_count, int pict_type);
+
+int ff_side_data_set_prft(AVPacket *pkt, int64_t timestamp);
+
+#endif // AVCODEC_PACKET_INTERNAL_H
diff --git a/media/ffvpx/libavcodec/parser.c b/media/ffvpx/libavcodec/parser.c
new file mode 100644
index 0000000000..49de7e6a57
--- /dev/null
+++ b/media/ffvpx/libavcodec/parser.c
@@ -0,0 +1,289 @@
+/*
+ * Audio and Video frame extraction
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2003 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/mem.h"
+
+#include "parser.h"
+
+AVCodecParserContext *av_parser_init(int codec_id)
+{
+    AVCodecParserContext *s = NULL;
+    const AVCodecParser *parser;
+    void *i = 0;
+    int ret;
+
+    if (codec_id == AV_CODEC_ID_NONE)
+        return NULL;
+
+    while ((parser = av_parser_iterate(&i))) {
+        if (parser->codec_ids[0] == codec_id ||
+            parser->codec_ids[1] == codec_id ||
+            parser->codec_ids[2] == codec_id ||
+            parser->codec_ids[3] == codec_id ||
+            parser->codec_ids[4] == codec_id ||
+            parser->codec_ids[5] == codec_id ||
+            parser->codec_ids[6] == codec_id)
+            goto found;
+    }
+    return NULL;
+
+found:
+    s = av_mallocz(sizeof(AVCodecParserContext));
+    if (!s)
+        goto err_out;
+    s->parser = parser;
+    s->priv_data = av_mallocz(parser->priv_data_size);
+    if (!s->priv_data)
+        goto err_out;
+    s->fetch_timestamp=1;
+    s->pict_type = AV_PICTURE_TYPE_I;
+    if (parser->parser_init) {
+        ret = parser->parser_init(s);
+        if (ret != 0)
+            goto err_out;
+    }
+    s->key_frame            = -1;
+    s->dts_sync_point       = INT_MIN;
+    s->dts_ref_dts_delta    = INT_MIN;
+    s->pts_dts_delta        = INT_MIN;
+    s->format               = -1;
+
+    return s;
+
+err_out:
+    if (s)
+        av_freep(&s->priv_data);
+    av_free(s);
+    return NULL;
+}
+
+void ff_fetch_timestamp(AVCodecParserContext *s, int off, int remove, int fuzzy)
+{
+    int i;
+
+    if (!fuzzy) {
+        s->dts    =
+        s->pts    = AV_NOPTS_VALUE;
+        s->pos    = -1;
+        s->offset = 0;
+    }
+    for (i = 0; i < AV_PARSER_PTS_NB; i++) {
+        if (s->cur_offset + off >= s->cur_frame_offset[i] &&
+            (s->frame_offset < s->cur_frame_offset[i] ||
+             (!s->frame_offset && !s->next_frame_offset)) && // first field/frame
+            // check disabled since MPEG-TS does not send complete PES packets
+            /*s->next_frame_offset + off <*/  s->cur_frame_end[i]){
+
+            if (!fuzzy || s->cur_frame_dts[i] != AV_NOPTS_VALUE) {
+                s->dts    = s->cur_frame_dts[i];
+                s->pts    = s->cur_frame_pts[i];
+                s->pos    = s->cur_frame_pos[i];
+                s->offset = s->next_frame_offset - s->cur_frame_offset[i];
+            }
+            if (remove)
+                s->cur_frame_offset[i] = INT64_MAX;
+            if (s->cur_offset + off < s->cur_frame_end[i])
+                break;
+        }
+    }
+}
+
+int av_parser_parse2(AVCodecParserContext *s, AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size,
+                     int64_t pts, int64_t dts, int64_t pos)
+{
+    int index, i;
+    uint8_t dummy_buf[AV_INPUT_BUFFER_PADDING_SIZE];
+
+    av_assert1(avctx->codec_id != AV_CODEC_ID_NONE);
+
+    /* Parsers only work for the specified codec ids. */
+    av_assert1(avctx->codec_id == s->parser->codec_ids[0] ||
+               avctx->codec_id == s->parser->codec_ids[1] ||
+               avctx->codec_id == s->parser->codec_ids[2] ||
+               avctx->codec_id == s->parser->codec_ids[3] ||
+               avctx->codec_id == s->parser->codec_ids[4] ||
+               avctx->codec_id == s->parser->codec_ids[5] ||
+               avctx->codec_id == s->parser->codec_ids[6]);
+
+    if (!(s->flags & PARSER_FLAG_FETCHED_OFFSET)) {
+        s->next_frame_offset =
+        s->cur_offset        = pos;
+        s->flags            |= PARSER_FLAG_FETCHED_OFFSET;
+    }
+
+    if (buf_size == 0) {
+        /* padding is always necessary even if EOF, so we add it here */
+        memset(dummy_buf, 0, sizeof(dummy_buf));
+        buf = dummy_buf;
+    } else if (s->cur_offset + buf_size != s->cur_frame_end[s->cur_frame_start_index]) { /* skip remainder packets */
+        /* add a new packet descriptor */
+        i = (s->cur_frame_start_index + 1) & (AV_PARSER_PTS_NB - 1);
+        s->cur_frame_start_index = i;
+        s->cur_frame_offset[i]   = s->cur_offset;
+        s->cur_frame_end[i]      = s->cur_offset + buf_size;
+        s->cur_frame_pts[i]      = pts;
+        s->cur_frame_dts[i]      = dts;
+        s->cur_frame_pos[i]      = pos;
+    }
+
+    if (s->fetch_timestamp) {
+        s->fetch_timestamp = 0;
+        s->last_pts        = s->pts;
+        s->last_dts        = s->dts;
+        s->last_pos        = s->pos;
+        ff_fetch_timestamp(s, 0, 0, 0);
+    }
+    /* WARNING: the returned index can be negative */
+    index = s->parser->parser_parse(s, avctx, (const uint8_t **) poutbuf,
+                                    poutbuf_size, buf, buf_size);
+    av_assert0(index > -0x20000000); // The API does not allow returning AVERROR codes
+#define FILL(name) if(s->name > 0 && avctx->name <= 0) avctx->name = s->name
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+        FILL(field_order);
+    }
+
+    /* update the file pointer */
+    if (*poutbuf_size) {
+        /* fill the data for the current frame */
+        s->frame_offset = s->next_frame_offset;
+
+        /* offset of the next frame */
+        s->next_frame_offset = s->cur_offset + index;
+        s->fetch_timestamp   = 1;
+    } else {
+        /* Don't return a pointer to dummy_buf. */
+        *poutbuf = NULL;
+    }
+    if (index < 0)
+        index = 0;
+    s->cur_offset += index;
+    return index;
+}
+
+void av_parser_close(AVCodecParserContext *s)
+{
+    if (s) {
+        if (s->parser->parser_close)
+            s->parser->parser_close(s);
+        av_freep(&s->priv_data);
+        av_free(s);
+    }
+}
+
+int ff_combine_frame(ParseContext *pc, int next,
+                     const uint8_t **buf, int *buf_size)
+{
+    if (pc->overread) {
+        ff_dlog(NULL, "overread %d, state:%"PRIX32" next:%d index:%d o_index:%d\n",
+                pc->overread, pc->state, next, pc->index, pc->overread_index);
+        ff_dlog(NULL, "%X %X %X %X\n",
+                (*buf)[0], (*buf)[1], (*buf)[2], (*buf)[3]);
+    }
+
+    /* Copy overread bytes from last frame into buffer. */
+    for (; pc->overread > 0; pc->overread--)
+        pc->buffer[pc->index++] = pc->buffer[pc->overread_index++];
+
+    if (next > *buf_size)
+        return AVERROR(EINVAL);
+
+    /* flush remaining if EOF */
+    if (!*buf_size && next == END_NOT_FOUND)
+        next = 0;
+
+    pc->last_index = pc->index;
+
+    /* copy into buffer end return */
+    if (next == END_NOT_FOUND) {
+        void *new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size,
+                                           *buf_size + pc->index +
+                                           AV_INPUT_BUFFER_PADDING_SIZE);
+
+        if (!new_buffer) {
+            av_log(NULL, AV_LOG_ERROR, "Failed to reallocate parser buffer to %d\n", *buf_size + pc->index + AV_INPUT_BUFFER_PADDING_SIZE);
+            pc->index = 0;
+            return AVERROR(ENOMEM);
+        }
+        pc->buffer = new_buffer;
+        memcpy(&pc->buffer[pc->index], *buf, *buf_size);
+        pc->index += *buf_size;
+        return -1;
+    }
+
+    av_assert0(next >= 0 || pc->buffer);
+
+    *buf_size          =
+    pc->overread_index = pc->index + next;
+
+    /* append to buffer */
+    if (pc->index) {
+        void *new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size,
+                                           next + pc->index +
+                                           AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!new_buffer) {
+            av_log(NULL, AV_LOG_ERROR, "Failed to reallocate parser buffer to %d\n", next + pc->index + AV_INPUT_BUFFER_PADDING_SIZE);
+            pc->overread_index =
+            pc->index = 0;
+            return AVERROR(ENOMEM);
+        }
+        pc->buffer = new_buffer;
+        if (next > -AV_INPUT_BUFFER_PADDING_SIZE)
+            memcpy(&pc->buffer[pc->index], *buf,
+                   next + AV_INPUT_BUFFER_PADDING_SIZE);
+        pc->index = 0;
+        *buf      = pc->buffer;
+    }
+
+    if (next < -8) {
+        pc->overread += -8 - next;
+        next = -8;
+    }
+    /* store overread bytes */
+    for (; next < 0; next++) {
+        pc->state   = pc->state   << 8 | pc->buffer[pc->last_index + next];
+        pc->state64 = pc->state64 << 8 | pc->buffer[pc->last_index + next];
+        pc->overread++;
+    }
+
+    if (pc->overread) {
+        ff_dlog(NULL, "overread %d, state:%"PRIX32" next:%d index:%d o_index:%d\n",
+                pc->overread, pc->state, next, pc->index, pc->overread_index);
+        ff_dlog(NULL, "%X %X %X %X\n",
+                (*buf)[0], (*buf)[1], (*buf)[2], (*buf)[3]);
+    }
+
+    return 0;
+}
+
+void ff_parse_close(AVCodecParserContext *s)
+{
+    ParseContext *pc = s->priv_data;
+
+    av_freep(&pc->buffer);
+}
diff --git a/media/ffvpx/libavcodec/parser.h b/media/ffvpx/libavcodec/parser.h
new file mode 100644
index 0000000000..2cee5ae4ff
--- /dev/null
+++ b/media/ffvpx/libavcodec/parser.h
@@ -0,0 +1,58 @@
+/*
+ * AVCodecParser prototypes and definitions
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2003 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PARSER_H
+#define AVCODEC_PARSER_H
+
+#include "avcodec.h"
+
+typedef struct ParseContext{
+    uint8_t *buffer;
+    int index;
+    int last_index;
+    unsigned int buffer_size;
+    uint32_t state;             ///< contains the last few bytes in MSB order
+    int frame_start_found;
+    int overread;               ///< the number of bytes which where irreversibly read from the next frame
+    int overread_index;         ///< the index into ParseContext.buffer of the overread bytes
+    uint64_t state64;           ///< contains the last 8 bytes in MSB order
+} ParseContext;
+
+#define END_NOT_FOUND (-100)
+
+/**
+ * Combine the (truncated) bitstream to a complete frame.
+ * @return -1 if no complete frame could be created,
+ *         AVERROR(ENOMEM) if there was a memory allocation error
+ */
+int ff_combine_frame(ParseContext *pc, int next, const uint8_t **buf, int *buf_size);
+void ff_parse_close(AVCodecParserContext *s);
+
+/**
+ * Fetch timestamps for a specific byte within the current access unit.
+ * @param off byte position within the access unit
+ * @param remove Found timestamps will be removed if set to 1, kept if set to 0.
+ * @param fuzzy Only use found value if it is more informative than what we already have
+ */
+void ff_fetch_timestamp(AVCodecParserContext *s, int off, int remove, int fuzzy);
+
+#endif /* AVCODEC_PARSER_H */
diff --git a/media/ffvpx/libavcodec/parser_list.c b/media/ffvpx/libavcodec/parser_list.c
new file mode 100644
index 0000000000..f16ecd05c5
--- /dev/null
+++ b/media/ffvpx/libavcodec/parser_list.c
@@ -0,0 +1,10 @@
+#include "config_components.h"
+
+static const AVCodecParser * const parser_list[] = {
+#if CONFIG_VP8_PARSER
+    &ff_vp8_parser,
+#endif
+#if CONFIG_VP9_PARSER
+    &ff_vp9_parser,
+#endif
+    NULL };
diff --git a/media/ffvpx/libavcodec/parsers.c b/media/ffvpx/libavcodec/parsers.c
new file mode 100644
index 0000000000..d355808018
--- /dev/null
+++ b/media/ffvpx/libavcodec/parsers.c
@@ -0,0 +1,93 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "avcodec.h"
+
+extern const AVCodecParser ff_aac_parser;
+extern const AVCodecParser ff_aac_latm_parser;
+extern const AVCodecParser ff_ac3_parser;
+extern const AVCodecParser ff_adx_parser;
+extern const AVCodecParser ff_amr_parser;
+extern const AVCodecParser ff_av1_parser;
+extern const AVCodecParser ff_avs2_parser;
+extern const AVCodecParser ff_avs3_parser;
+extern const AVCodecParser ff_bmp_parser;
+extern const AVCodecParser ff_cavsvideo_parser;
+extern const AVCodecParser ff_cook_parser;
+extern const AVCodecParser ff_cri_parser;
+extern const AVCodecParser ff_dca_parser;
+extern const AVCodecParser ff_dirac_parser;
+extern const AVCodecParser ff_dnxhd_parser;
+extern const AVCodecParser ff_dolby_e_parser;
+extern const AVCodecParser ff_dpx_parser;
+extern const AVCodecParser ff_dvaudio_parser;
+extern const AVCodecParser ff_dvbsub_parser;
+extern const AVCodecParser ff_dvdsub_parser;
+extern const AVCodecParser ff_dvd_nav_parser;
+extern const AVCodecParser ff_flac_parser;
+extern const AVCodecParser ff_ftr_parser;
+extern const AVCodecParser ff_g723_1_parser;
+extern const AVCodecParser ff_g729_parser;
+extern const AVCodecParser ff_gif_parser;
+extern const AVCodecParser ff_gsm_parser;
+extern const AVCodecParser ff_h261_parser;
+extern const AVCodecParser ff_h263_parser;
+extern const AVCodecParser ff_h264_parser;
+extern const AVCodecParser ff_hevc_parser;
+extern const AVCodecParser ff_hdr_parser;
+extern const AVCodecParser ff_ipu_parser;
+extern const AVCodecParser ff_jpeg2000_parser;
+extern const AVCodecParser ff_misc4_parser;
+extern const AVCodecParser ff_mjpeg_parser;
+extern const AVCodecParser ff_mlp_parser;
+extern const AVCodecParser ff_mpeg4video_parser;
+extern const AVCodecParser ff_mpegaudio_parser;
+extern const AVCodecParser ff_mpegvideo_parser;
+extern const AVCodecParser ff_opus_parser;
+extern const AVCodecParser ff_png_parser;
+extern const AVCodecParser ff_pnm_parser;
+extern const AVCodecParser ff_qoi_parser;
+extern const AVCodecParser ff_rv30_parser;
+extern const AVCodecParser ff_rv40_parser;
+extern const AVCodecParser ff_sbc_parser;
+extern const AVCodecParser ff_sipr_parser;
+extern const AVCodecParser ff_tak_parser;
+extern const AVCodecParser ff_vc1_parser;
+extern const AVCodecParser ff_vorbis_parser;
+extern const AVCodecParser ff_vp3_parser;
+extern const AVCodecParser ff_vp8_parser;
+extern const AVCodecParser ff_vp9_parser;
+extern const AVCodecParser ff_webp_parser;
+extern const AVCodecParser ff_xbm_parser;
+extern const AVCodecParser ff_xma_parser;
+extern const AVCodecParser ff_xwd_parser;
+
+#include "libavcodec/parser_list.c"
+
+const AVCodecParser *av_parser_iterate(void **opaque)
+{
+    uintptr_t i = (uintptr_t)*opaque;
+    const AVCodecParser *p = parser_list[i];
+
+    if (p)
+        *opaque = (void*)(i + 1);
+
+    return p;
+}
diff --git a/media/ffvpx/libavcodec/pixblockdsp.h b/media/ffvpx/libavcodec/pixblockdsp.h
new file mode 100644
index 0000000000..9b002aa3d6
--- /dev/null
+++ b/media/ffvpx/libavcodec/pixblockdsp.h
@@ -0,0 +1,62 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PIXBLOCKDSP_H
+#define AVCODEC_PIXBLOCKDSP_H
+
+#include <stdint.h>
+
+#include "config.h"
+
+#include "avcodec.h"
+
+typedef struct PixblockDSPContext {
+    void (*get_pixels)(int16_t *av_restrict block /* align 16 */,
+                       const uint8_t *pixels /* align 8 */,
+                       ptrdiff_t stride);
+    void (*get_pixels_unaligned)(int16_t *av_restrict block /* align 16 */,
+                       const uint8_t *pixels,
+                       ptrdiff_t stride);
+    void (*diff_pixels)(int16_t *av_restrict block /* align 16 */,
+                        const uint8_t *s1 /* align 8 */,
+                        const uint8_t *s2 /* align 8 */,
+                        ptrdiff_t stride);
+    void (*diff_pixels_unaligned)(int16_t *av_restrict block /* align 16 */,
+                        const uint8_t *s1,
+                        const uint8_t *s2,
+                        ptrdiff_t stride);
+
+} PixblockDSPContext;
+
+void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx);
+void ff_pixblockdsp_init_aarch64(PixblockDSPContext *c, AVCodecContext *avctx,
+                                 unsigned high_bit_depth);
+void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx,
+                               unsigned high_bit_depth);
+void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx,
+                             unsigned high_bit_depth);
+void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx,
+                             unsigned high_bit_depth);
+void ff_pixblockdsp_init_riscv(PixblockDSPContext *c, AVCodecContext *avctx,
+                               unsigned high_bit_depth);
+void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx,
+                             unsigned high_bit_depth);
+void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx,
+                              unsigned high_bit_depth);
+
+#endif /* AVCODEC_PIXBLOCKDSP_H */
diff --git a/media/ffvpx/libavcodec/profiles.c b/media/ffvpx/libavcodec/profiles.c
new file mode 100644
index 0000000000..2230fc5415
--- /dev/null
+++ b/media/ffvpx/libavcodec/profiles.c
@@ -0,0 +1,185 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "avcodec.h"
+#include "profiles.h"
+
+#if !CONFIG_SMALL
+
+const AVProfile ff_aac_profiles[] = {
+    { FF_PROFILE_AAC_LOW,   "LC"       },
+    { FF_PROFILE_AAC_HE,    "HE-AAC"   },
+    { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
+    { FF_PROFILE_AAC_LD,    "LD"       },
+    { FF_PROFILE_AAC_ELD,   "ELD"      },
+    { FF_PROFILE_AAC_MAIN,  "Main" },
+    { FF_PROFILE_AAC_SSR,   "SSR"  },
+    { FF_PROFILE_AAC_LTP,   "LTP"  },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_dca_profiles[] = {
+    { FF_PROFILE_DTS,         "DTS"         },
+    { FF_PROFILE_DTS_ES,      "DTS-ES"      },
+    { FF_PROFILE_DTS_96_24,   "DTS 96/24"   },
+    { FF_PROFILE_DTS_HD_HRA,  "DTS-HD HRA"  },
+    { FF_PROFILE_DTS_HD_MA,   "DTS-HD MA"   },
+    { FF_PROFILE_DTS_EXPRESS, "DTS Express" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_dnxhd_profiles[] = {
+  { FF_PROFILE_DNXHD,      "DNXHD"},
+  { FF_PROFILE_DNXHR_LB,   "DNXHR LB"},
+  { FF_PROFILE_DNXHR_SQ,   "DNXHR SQ"},
+  { FF_PROFILE_DNXHR_HQ,   "DNXHR HQ" },
+  { FF_PROFILE_DNXHR_HQX,  "DNXHR HQX"},
+  { FF_PROFILE_DNXHR_444,  "DNXHR 444"},
+  { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_h264_profiles[] = {
+    { FF_PROFILE_H264_BASELINE,             "Baseline"              },
+    { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
+    { FF_PROFILE_H264_MAIN,                 "Main"                  },
+    { FF_PROFILE_H264_EXTENDED,             "Extended"              },
+    { FF_PROFILE_H264_HIGH,                 "High"                  },
+    { FF_PROFILE_H264_HIGH_10,              "High 10"               },
+    { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
+    { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
+    { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
+    { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
+    { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
+    { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
+    { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
+    { FF_PROFILE_H264_MULTIVIEW_HIGH,       "Multiview High"        },
+    { FF_PROFILE_H264_STEREO_HIGH,          "Stereo High"           },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_vvc_profiles[] = {
+    { FF_PROFILE_VVC_MAIN_10,                   "Main 10" },
+    { FF_PROFILE_VVC_MAIN_10_444,               "Main 10 4:4:4" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_hevc_profiles[] = {
+    { FF_PROFILE_HEVC_MAIN,                 "Main"                },
+    { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
+    { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
+    { FF_PROFILE_HEVC_REXT,                 "Rext"                },
+    { FF_PROFILE_HEVC_SCC,                  "Scc"                 },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_jpeg2000_profiles[] = {
+    { FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_0,  "JPEG 2000 codestream restriction 0"   },
+    { FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_1,  "JPEG 2000 codestream restriction 1"   },
+    { FF_PROFILE_JPEG2000_CSTREAM_NO_RESTRICTION, "JPEG 2000 no codestream restrictions" },
+    { FF_PROFILE_JPEG2000_DCINEMA_2K,             "JPEG 2000 digital cinema 2K"          },
+    { FF_PROFILE_JPEG2000_DCINEMA_4K,             "JPEG 2000 digital cinema 4K"          },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_mpeg2_video_profiles[] = {
+    { FF_PROFILE_MPEG2_422,          "4:2:2"              },
+    { FF_PROFILE_MPEG2_HIGH,         "High"               },
+    { FF_PROFILE_MPEG2_SS,           "Spatially Scalable" },
+    { FF_PROFILE_MPEG2_SNR_SCALABLE, "SNR Scalable"       },
+    { FF_PROFILE_MPEG2_MAIN,         "Main"               },
+    { FF_PROFILE_MPEG2_SIMPLE,       "Simple"             },
+    { FF_PROFILE_RESERVED,           "Reserved"           },
+    { FF_PROFILE_UNKNOWN                                  },
+};
+
+const AVProfile ff_mpeg4_video_profiles[] = {
+    { FF_PROFILE_MPEG4_SIMPLE,                    "Simple Profile" },
+    { FF_PROFILE_MPEG4_SIMPLE_SCALABLE,           "Simple Scalable Profile" },
+    { FF_PROFILE_MPEG4_CORE,                      "Core Profile" },
+    { FF_PROFILE_MPEG4_MAIN,                      "Main Profile" },
+    { FF_PROFILE_MPEG4_N_BIT,                     "N-bit Profile" },
+    { FF_PROFILE_MPEG4_SCALABLE_TEXTURE,          "Scalable Texture Profile" },
+    { FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION,     "Simple Face Animation Profile" },
+    { FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE,    "Basic Animated Texture Profile" },
+    { FF_PROFILE_MPEG4_HYBRID,                    "Hybrid Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_REAL_TIME,        "Advanced Real Time Simple Profile" },
+    { FF_PROFILE_MPEG4_CORE_SCALABLE,             "Code Scalable Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_CODING,           "Advanced Coding Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_CORE,             "Advanced Core Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE, "Advanced Scalable Texture Profile" },
+    { FF_PROFILE_MPEG4_SIMPLE_STUDIO,             "Simple Studio Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_SIMPLE,           "Advanced Simple Profile" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_vc1_profiles[] = {
+    { FF_PROFILE_VC1_SIMPLE,   "Simple"   },
+    { FF_PROFILE_VC1_MAIN,     "Main"     },
+    { FF_PROFILE_VC1_COMPLEX,  "Complex"  },
+    { FF_PROFILE_VC1_ADVANCED, "Advanced" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_vp9_profiles[] = {
+    { FF_PROFILE_VP9_0, "Profile 0" },
+    { FF_PROFILE_VP9_1, "Profile 1" },
+    { FF_PROFILE_VP9_2, "Profile 2" },
+    { FF_PROFILE_VP9_3, "Profile 3" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_av1_profiles[] = {
+    { FF_PROFILE_AV1_MAIN,         "Main" },
+    { FF_PROFILE_AV1_HIGH,         "High" },
+    { FF_PROFILE_AV1_PROFESSIONAL, "Professional" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_sbc_profiles[] = {
+    { FF_PROFILE_SBC_MSBC, "mSBC" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_prores_profiles[] = {
+    { FF_PROFILE_PRORES_PROXY,    "Proxy"    },
+    { FF_PROFILE_PRORES_LT,       "LT"       },
+    { FF_PROFILE_PRORES_STANDARD, "Standard" },
+    { FF_PROFILE_PRORES_HQ,       "HQ"       },
+    { FF_PROFILE_PRORES_4444,     "4444"     },
+    { FF_PROFILE_PRORES_XQ,       "XQ"       },
+    { FF_PROFILE_UNKNOWN }
+};
+
+const AVProfile ff_mjpeg_profiles[] = {
+    { FF_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT,            "Baseline"    },
+    { FF_PROFILE_MJPEG_HUFFMAN_EXTENDED_SEQUENTIAL_DCT, "Sequential"  },
+    { FF_PROFILE_MJPEG_HUFFMAN_PROGRESSIVE_DCT,         "Progressive" },
+    { FF_PROFILE_MJPEG_HUFFMAN_LOSSLESS,                "Lossless"    },
+    { FF_PROFILE_MJPEG_JPEG_LS,                         "JPEG LS"     },
+    { FF_PROFILE_UNKNOWN }
+};
+
+const AVProfile ff_arib_caption_profiles[] = {
+    { FF_PROFILE_ARIB_PROFILE_A, "Profile A" },
+    { FF_PROFILE_ARIB_PROFILE_C, "Profile C" },
+    { FF_PROFILE_UNKNOWN }
+};
+
+#endif /* !CONFIG_SMALL */
diff --git a/media/ffvpx/libavcodec/profiles.h b/media/ffvpx/libavcodec/profiles.h
new file mode 100644
index 0000000000..41a19aa9ad
--- /dev/null
+++ b/media/ffvpx/libavcodec/profiles.h
@@ -0,0 +1,76 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PROFILES_H
+#define AVCODEC_PROFILES_H
+
+#include "avcodec.h"
+#include "libavutil/opt.h"
+
+#define FF_AVCTX_PROFILE_OPTION(name, description, type, value) \
+    {name, description, 0, AV_OPT_TYPE_CONST, {.i64 = value }, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_## type ##_PARAM, "avctx.profile"},
+
+#define FF_AAC_PROFILE_OPTS \
+    FF_AVCTX_PROFILE_OPTION("aac_main",      NULL, AUDIO, FF_PROFILE_AAC_MAIN)\
+    FF_AVCTX_PROFILE_OPTION("aac_low",       NULL, AUDIO, FF_PROFILE_AAC_LOW)\
+    FF_AVCTX_PROFILE_OPTION("aac_ssr",       NULL, AUDIO, FF_PROFILE_AAC_SSR)\
+    FF_AVCTX_PROFILE_OPTION("aac_ltp",       NULL, AUDIO, FF_PROFILE_AAC_LTP)\
+    FF_AVCTX_PROFILE_OPTION("aac_he",        NULL, AUDIO, FF_PROFILE_AAC_HE)\
+    FF_AVCTX_PROFILE_OPTION("aac_he_v2",     NULL, AUDIO, FF_PROFILE_AAC_HE_V2)\
+    FF_AVCTX_PROFILE_OPTION("aac_ld",        NULL, AUDIO, FF_PROFILE_AAC_LD)\
+    FF_AVCTX_PROFILE_OPTION("aac_eld",       NULL, AUDIO, FF_PROFILE_AAC_ELD)\
+    FF_AVCTX_PROFILE_OPTION("mpeg2_aac_low", NULL, AUDIO, FF_PROFILE_MPEG2_AAC_LOW)\
+    FF_AVCTX_PROFILE_OPTION("mpeg2_aac_he",  NULL, AUDIO, FF_PROFILE_MPEG2_AAC_HE)\
+
+#define FF_MPEG4_PROFILE_OPTS \
+    FF_AVCTX_PROFILE_OPTION("mpeg4_sp",      NULL, VIDEO, FF_PROFILE_MPEG4_SIMPLE)\
+    FF_AVCTX_PROFILE_OPTION("mpeg4_core",    NULL, VIDEO, FF_PROFILE_MPEG4_CORE)\
+    FF_AVCTX_PROFILE_OPTION("mpeg4_main",    NULL, VIDEO, FF_PROFILE_MPEG4_MAIN)\
+    FF_AVCTX_PROFILE_OPTION("mpeg4_asp",     NULL, VIDEO, FF_PROFILE_MPEG4_ADVANCED_SIMPLE)\
+
+#define FF_MPEG2_PROFILE_OPTS \
+    FF_AVCTX_PROFILE_OPTION("422",           NULL, VIDEO, FF_PROFILE_MPEG2_422)\
+    FF_AVCTX_PROFILE_OPTION("high",          NULL, VIDEO, FF_PROFILE_MPEG2_HIGH)\
+    FF_AVCTX_PROFILE_OPTION("ss",            NULL, VIDEO, FF_PROFILE_MPEG2_SS)\
+    FF_AVCTX_PROFILE_OPTION("snr",           NULL, VIDEO, FF_PROFILE_MPEG2_SNR_SCALABLE)\
+    FF_AVCTX_PROFILE_OPTION("main",          NULL, VIDEO, FF_PROFILE_MPEG2_MAIN)\
+    FF_AVCTX_PROFILE_OPTION("simple",        NULL, VIDEO, FF_PROFILE_MPEG2_SIMPLE)\
+
+#define FF_AV1_PROFILE_OPTS \
+    FF_AVCTX_PROFILE_OPTION("main",          NULL, VIDEO, FF_PROFILE_AV1_MAIN)\
+    FF_AVCTX_PROFILE_OPTION("high",          NULL, VIDEO, FF_PROFILE_AV1_HIGH)\
+    FF_AVCTX_PROFILE_OPTION("professional",  NULL, VIDEO, FF_PROFILE_AV1_PROFESSIONAL)\
+
+extern const AVProfile ff_aac_profiles[];
+extern const AVProfile ff_dca_profiles[];
+extern const AVProfile ff_dnxhd_profiles[];
+extern const AVProfile ff_h264_profiles[];
+extern const AVProfile ff_hevc_profiles[];
+extern const AVProfile ff_vvc_profiles[];
+extern const AVProfile ff_jpeg2000_profiles[];
+extern const AVProfile ff_mpeg2_video_profiles[];
+extern const AVProfile ff_mpeg4_video_profiles[];
+extern const AVProfile ff_vc1_profiles[];
+extern const AVProfile ff_vp9_profiles[];
+extern const AVProfile ff_av1_profiles[];
+extern const AVProfile ff_sbc_profiles[];
+extern const AVProfile ff_prores_profiles[];
+extern const AVProfile ff_mjpeg_profiles[];
+extern const AVProfile ff_arib_caption_profiles[];
+
+#endif /* AVCODEC_PROFILES_H */
diff --git a/media/ffvpx/libavcodec/pthread.c b/media/ffvpx/libavcodec/pthread.c
new file mode 100644
index 0000000000..ca84b81391
--- /dev/null
+++ b/media/ffvpx/libavcodec/pthread.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2004 Roman Shaposhnik
+ * Copyright (c) 2008 Alexander Strange (astrange@ithinksw.com)
+ *
+ * Many thanks to Steven M. Schultz for providing clever ideas and
+ * to Michael Niedermayer <michaelni@gmx.at> for writing initial
+ * implementation.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Multithreading support functions
+ * @see doc/multithreading.txt
+ */
+
+#include "libavutil/thread.h"
+
+#include "avcodec.h"
+#include "codec_internal.h"
+#include "pthread_internal.h"
+#include "thread.h"
+
+/**
+ * Set the threading algorithms used.
+ *
+ * Threading requires more than one thread.
+ * Frame threading requires entire frames to be passed to the codec,
+ * and introduces extra decoding delay, so is incompatible with low_delay.
+ *
+ * @param avctx The context.
+ */
+static void validate_thread_parameters(AVCodecContext *avctx)
+{
+    int frame_threading_supported = (avctx->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS)
+                                && !(avctx->flags  & AV_CODEC_FLAG_LOW_DELAY)
+                                && !(avctx->flags2 & AV_CODEC_FLAG2_CHUNKS);
+    if (avctx->thread_count == 1) {
+        avctx->active_thread_type = 0;
+    } else if (frame_threading_supported && (avctx->thread_type & FF_THREAD_FRAME)) {
+        avctx->active_thread_type = FF_THREAD_FRAME;
+    } else if (avctx->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS &&
+               avctx->thread_type & FF_THREAD_SLICE) {
+        avctx->active_thread_type = FF_THREAD_SLICE;
+    } else if (!(ffcodec(avctx->codec)->caps_internal & FF_CODEC_CAP_AUTO_THREADS)) {
+        avctx->thread_count       = 1;
+        avctx->active_thread_type = 0;
+    }
+
+    if (avctx->thread_count > MAX_AUTO_THREADS)
+        av_log(avctx, AV_LOG_WARNING,
+               "Application has requested %d threads. Using a thread count greater than %d is not recommended.\n",
+               avctx->thread_count, MAX_AUTO_THREADS);
+}
+
+int ff_thread_init(AVCodecContext *avctx)
+{
+    validate_thread_parameters(avctx);
+
+    if (avctx->active_thread_type&FF_THREAD_SLICE)
+        return ff_slice_thread_init(avctx);
+    else if (avctx->active_thread_type&FF_THREAD_FRAME)
+        return ff_frame_thread_init(avctx);
+
+    return 0;
+}
+
+void ff_thread_free(AVCodecContext *avctx)
+{
+    if (avctx->active_thread_type&FF_THREAD_FRAME)
+        ff_frame_thread_free(avctx, avctx->thread_count);
+    else
+        ff_slice_thread_free(avctx);
+}
+
+av_cold void ff_pthread_free(void *obj, const unsigned offsets[])
+{
+    unsigned cnt = *(unsigned*)((char*)obj + offsets[0]);
+    const unsigned *cur_offset = offsets;
+
+    *(unsigned*)((char*)obj + offsets[0]) = 0;
+
+    for (; *(++cur_offset) != THREAD_SENTINEL && cnt; cnt--)
+        pthread_mutex_destroy((pthread_mutex_t*)((char*)obj + *cur_offset));
+    for (; *(++cur_offset) != THREAD_SENTINEL && cnt; cnt--)
+        pthread_cond_destroy ((pthread_cond_t *)((char*)obj + *cur_offset));
+}
+
+av_cold int ff_pthread_init(void *obj, const unsigned offsets[])
+{
+    const unsigned *cur_offset = offsets;
+    unsigned cnt = 0;
+    int err;
+
+#define PTHREAD_INIT_LOOP(type)                                               \
+    for (; *(++cur_offset) != THREAD_SENTINEL; cnt++) {                       \
+        pthread_ ## type ## _t *dst = (void*)((char*)obj + *cur_offset);      \
+        err = pthread_ ## type ## _init(dst, NULL);                           \
+        if (err) {                                                            \
+            err = AVERROR(err);                                               \
+            goto fail;                                                        \
+        }                                                                     \
+    }
+    PTHREAD_INIT_LOOP(mutex)
+    PTHREAD_INIT_LOOP(cond)
+
+fail:
+    *(unsigned*)((char*)obj + offsets[0]) = cnt;
+    return err;
+}
diff --git a/media/ffvpx/libavcodec/pthread_frame.c b/media/ffvpx/libavcodec/pthread_frame.c
new file mode 100644
index 0000000000..d9d5afaa82
--- /dev/null
+++ b/media/ffvpx/libavcodec/pthread_frame.c
@@ -0,0 +1,977 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Frame multithreading support functions
+ * @see doc/multithreading.txt
+ */
+
+#include "config.h"
+
+#include <stdatomic.h>
+#include <stdint.h>
+
+#include "avcodec.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "hwconfig.h"
+#include "internal.h"
+#include "pthread_internal.h"
+#include "thread.h"
+#include "threadframe.h"
+#include "version_major.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/buffer.h"
+#include "libavutil/common.h"
+#include "libavutil/cpu.h"
+#include "libavutil/frame.h"
+#include "libavutil/internal.h"
+#include "libavutil/log.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/thread.h"
+
+enum {
+    ///< Set when the thread is awaiting a packet.
+    STATE_INPUT_READY,
+    ///< Set before the codec has called ff_thread_finish_setup().
+    STATE_SETTING_UP,
+    /**
+     * Set when the codec calls get_buffer().
+     * State is returned to STATE_SETTING_UP afterwards.
+     */
+    STATE_GET_BUFFER,
+     /**
+      * Set when the codec calls get_format().
+      * State is returned to STATE_SETTING_UP afterwards.
+      */
+    STATE_GET_FORMAT,
+    ///< Set after the codec has called ff_thread_finish_setup().
+    STATE_SETUP_FINISHED,
+};
+
+enum {
+    UNINITIALIZED,  ///< Thread has not been created, AVCodec->close mustn't be called
+    NEEDS_CLOSE,    ///< FFCodec->close needs to be called
+    INITIALIZED,    ///< Thread has been properly set up
+};
+
+/**
+ * Context used by codec threads and stored in their AVCodecInternal thread_ctx.
+ */
+typedef struct PerThreadContext {
+    struct FrameThreadContext *parent;
+
+    pthread_t      thread;
+    int            thread_init;
+    unsigned       pthread_init_cnt;///< Number of successfully initialized mutexes/conditions
+    pthread_cond_t input_cond;      ///< Used to wait for a new packet from the main thread.
+    pthread_cond_t progress_cond;   ///< Used by child threads to wait for progress to change.
+    pthread_cond_t output_cond;     ///< Used by the main thread to wait for frames to finish.
+
+    pthread_mutex_t mutex;          ///< Mutex used to protect the contents of the PerThreadContext.
+    pthread_mutex_t progress_mutex; ///< Mutex used to protect frame progress values and progress_cond.
+
+    AVCodecContext *avctx;          ///< Context used to decode packets passed to this thread.
+
+    AVPacket       *avpkt;          ///< Input packet (for decoding) or output (for encoding).
+
+    AVFrame *frame;                 ///< Output frame (for decoding) or input (for encoding).
+    int     got_frame;              ///< The output of got_picture_ptr from the last avcodec_decode_video() call.
+    int     result;                 ///< The result of the last codec decode/encode() call.
+
+    atomic_int state;
+
+    int die;                        ///< Set when the thread should exit.
+
+    int hwaccel_serializing;
+    int async_serializing;
+
+    atomic_int debug_threads;       ///< Set if the FF_DEBUG_THREADS option is set.
+} PerThreadContext;
+
+/**
+ * Context stored in the client AVCodecInternal thread_ctx.
+ */
+typedef struct FrameThreadContext {
+    PerThreadContext *threads;     ///< The contexts for each thread.
+    PerThreadContext *prev_thread; ///< The last thread submit_packet() was called on.
+
+    unsigned    pthread_init_cnt;  ///< Number of successfully initialized mutexes/conditions
+    pthread_mutex_t buffer_mutex;  ///< Mutex used to protect get/release_buffer().
+    /**
+     * This lock is used for ensuring threads run in serial when hwaccel
+     * is used.
+     */
+    pthread_mutex_t hwaccel_mutex;
+    pthread_mutex_t async_mutex;
+    pthread_cond_t async_cond;
+    int async_lock;
+
+    int next_decoding;             ///< The next context to submit a packet to.
+    int next_finished;             ///< The next context to return output from.
+
+    int delaying;                  /**<
+                                    * Set for the first N packets, where N is the number of threads.
+                                    * While it is set, ff_thread_en/decode_frame won't return any results.
+                                    */
+
+    /* hwaccel state is temporarily stored here in order to transfer its ownership
+     * to the next decoding thread without the need for extra synchronization */
+    const AVHWAccel *stash_hwaccel;
+    void            *stash_hwaccel_context;
+    void            *stash_hwaccel_priv;
+} FrameThreadContext;
+
+static void async_lock(FrameThreadContext *fctx)
+{
+    pthread_mutex_lock(&fctx->async_mutex);
+    while (fctx->async_lock)
+        pthread_cond_wait(&fctx->async_cond, &fctx->async_mutex);
+    fctx->async_lock = 1;
+    pthread_mutex_unlock(&fctx->async_mutex);
+}
+
+static void async_unlock(FrameThreadContext *fctx)
+{
+    pthread_mutex_lock(&fctx->async_mutex);
+    av_assert0(fctx->async_lock);
+    fctx->async_lock = 0;
+    pthread_cond_broadcast(&fctx->async_cond);
+    pthread_mutex_unlock(&fctx->async_mutex);
+}
+
+static void thread_set_name(PerThreadContext *p)
+{
+    AVCodecContext *avctx = p->avctx;
+    int idx = p - p->parent->threads;
+    char name[16];
+
+    snprintf(name, sizeof(name), "av:%.7s:df%d", avctx->codec->name, idx);
+
+    ff_thread_setname(name);
+}
+
+/**
+ * Codec worker thread.
+ *
+ * Automatically calls ff_thread_finish_setup() if the codec does
+ * not provide an update_thread_context method, or if the codec returns
+ * before calling it.
+ */
+static attribute_align_arg void *frame_worker_thread(void *arg)
+{
+    PerThreadContext *p = arg;
+    AVCodecContext *avctx = p->avctx;
+    const FFCodec *codec = ffcodec(avctx->codec);
+
+    thread_set_name(p);
+
+    pthread_mutex_lock(&p->mutex);
+    while (1) {
+        while (atomic_load(&p->state) == STATE_INPUT_READY && !p->die)
+            pthread_cond_wait(&p->input_cond, &p->mutex);
+
+        if (p->die) break;
+
+        if (!codec->update_thread_context)
+            ff_thread_finish_setup(avctx);
+
+        /* If a decoder supports hwaccel, then it must call ff_get_format().
+         * Since that call must happen before ff_thread_finish_setup(), the
+         * decoder is required to implement update_thread_context() and call
+         * ff_thread_finish_setup() manually. Therefore the above
+         * ff_thread_finish_setup() call did not happen and hwaccel_serializing
+         * cannot be true here. */
+        av_assert0(!p->hwaccel_serializing);
+
+        /* if the previous thread uses hwaccel then we take the lock to ensure
+         * the threads don't run concurrently */
+        if (avctx->hwaccel) {
+            pthread_mutex_lock(&p->parent->hwaccel_mutex);
+            p->hwaccel_serializing = 1;
+        }
+
+        av_frame_unref(p->frame);
+        p->got_frame = 0;
+        p->result = codec->cb.decode(avctx, p->frame, &p->got_frame, p->avpkt);
+
+        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0])
+            ff_thread_release_buffer(avctx, p->frame);
+
+        if (atomic_load(&p->state) == STATE_SETTING_UP)
+            ff_thread_finish_setup(avctx);
+
+        if (p->hwaccel_serializing) {
+            /* wipe hwaccel state to avoid stale pointers lying around;
+             * the state was transferred to FrameThreadContext in
+             * ff_thread_finish_setup(), so nothing is leaked */
+            avctx->hwaccel                     = NULL;
+            avctx->hwaccel_context             = NULL;
+            avctx->internal->hwaccel_priv_data = NULL;
+
+            p->hwaccel_serializing = 0;
+            pthread_mutex_unlock(&p->parent->hwaccel_mutex);
+        }
+        av_assert0(!avctx->hwaccel);
+
+        if (p->async_serializing) {
+            p->async_serializing = 0;
+
+            async_unlock(p->parent);
+        }
+
+        pthread_mutex_lock(&p->progress_mutex);
+
+        atomic_store(&p->state, STATE_INPUT_READY);
+
+        pthread_cond_broadcast(&p->progress_cond);
+        pthread_cond_signal(&p->output_cond);
+        pthread_mutex_unlock(&p->progress_mutex);
+    }
+    pthread_mutex_unlock(&p->mutex);
+
+    return NULL;
+}
+
+/**
+ * Update the next thread's AVCodecContext with values from the reference thread's context.
+ *
+ * @param dst The destination context.
+ * @param src The source context.
+ * @param for_user 0 if the destination is a codec thread, 1 if the destination is the user's thread
+ * @return 0 on success, negative error code on failure
+ */
+static int update_context_from_thread(AVCodecContext *dst, AVCodecContext *src, int for_user)
+{
+    const FFCodec *const codec = ffcodec(dst->codec);
+    int err = 0;
+
+    if (dst != src && (for_user || codec->update_thread_context)) {
+        dst->time_base = src->time_base;
+        dst->framerate = src->framerate;
+        dst->width     = src->width;
+        dst->height    = src->height;
+        dst->pix_fmt   = src->pix_fmt;
+        dst->sw_pix_fmt = src->sw_pix_fmt;
+
+        dst->coded_width  = src->coded_width;
+        dst->coded_height = src->coded_height;
+
+        dst->has_b_frames = src->has_b_frames;
+        dst->idct_algo    = src->idct_algo;
+        dst->properties   = src->properties;
+
+        dst->bits_per_coded_sample = src->bits_per_coded_sample;
+        dst->sample_aspect_ratio   = src->sample_aspect_ratio;
+
+        dst->profile = src->profile;
+        dst->level   = src->level;
+
+        dst->bits_per_raw_sample = src->bits_per_raw_sample;
+        dst->ticks_per_frame     = src->ticks_per_frame;
+        dst->color_primaries     = src->color_primaries;
+
+        dst->color_trc   = src->color_trc;
+        dst->colorspace  = src->colorspace;
+        dst->color_range = src->color_range;
+        dst->chroma_sample_location = src->chroma_sample_location;
+
+        dst->sample_rate    = src->sample_rate;
+        dst->sample_fmt     = src->sample_fmt;
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+        dst->channels       = src->channels;
+        dst->channel_layout = src->channel_layout;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        err = av_channel_layout_copy(&dst->ch_layout, &src->ch_layout);
+        if (err < 0)
+            return err;
+
+        if (!!dst->hw_frames_ctx != !!src->hw_frames_ctx ||
+            (dst->hw_frames_ctx && dst->hw_frames_ctx->data != src->hw_frames_ctx->data)) {
+            av_buffer_unref(&dst->hw_frames_ctx);
+
+            if (src->hw_frames_ctx) {
+                dst->hw_frames_ctx = av_buffer_ref(src->hw_frames_ctx);
+                if (!dst->hw_frames_ctx)
+                    return AVERROR(ENOMEM);
+            }
+        }
+
+        dst->hwaccel_flags = src->hwaccel_flags;
+
+        err = av_buffer_replace(&dst->internal->pool, src->internal->pool);
+        if (err < 0)
+            return err;
+    }
+
+    if (for_user) {
+        if (codec->update_thread_context_for_user)
+            err = codec->update_thread_context_for_user(dst, src);
+    } else {
+        if (codec->update_thread_context)
+            err = codec->update_thread_context(dst, src);
+    }
+
+    return err;
+}
+
+/**
+ * Update the next thread's AVCodecContext with values set by the user.
+ *
+ * @param dst The destination context.
+ * @param src The source context.
+ * @return 0 on success, negative error code on failure
+ */
+static int update_context_from_user(AVCodecContext *dst, AVCodecContext *src)
+{
+    int err;
+
+    dst->flags          = src->flags;
+
+    dst->draw_horiz_band= src->draw_horiz_band;
+    dst->get_buffer2    = src->get_buffer2;
+
+    dst->opaque   = src->opaque;
+    dst->debug    = src->debug;
+
+    dst->slice_flags = src->slice_flags;
+    dst->flags2      = src->flags2;
+    dst->export_side_data = src->export_side_data;
+
+    dst->skip_loop_filter = src->skip_loop_filter;
+    dst->skip_idct        = src->skip_idct;
+    dst->skip_frame       = src->skip_frame;
+
+    dst->frame_num        = src->frame_num;
+#if FF_API_AVCTX_FRAME_NUMBER
+FF_DISABLE_DEPRECATION_WARNINGS
+    dst->frame_number     = src->frame_number;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+#if FF_API_REORDERED_OPAQUE
+FF_DISABLE_DEPRECATION_WARNINGS
+    dst->reordered_opaque = src->reordered_opaque;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (src->slice_count && src->slice_offset) {
+        if (dst->slice_count < src->slice_count) {
+            int err = av_reallocp_array(&dst->slice_offset, src->slice_count,
+                                        sizeof(*dst->slice_offset));
+            if (err < 0)
+                return err;
+        }
+        memcpy(dst->slice_offset, src->slice_offset,
+               src->slice_count * sizeof(*dst->slice_offset));
+    }
+    dst->slice_count = src->slice_count;
+
+    av_packet_unref(dst->internal->last_pkt_props);
+    err = av_packet_copy_props(dst->internal->last_pkt_props, src->internal->last_pkt_props);
+    if (err < 0)
+        return err;
+
+    return 0;
+}
+
+static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
+                         AVPacket *avpkt)
+{
+    FrameThreadContext *fctx = p->parent;
+    PerThreadContext *prev_thread = fctx->prev_thread;
+    const AVCodec *codec = p->avctx->codec;
+    int ret;
+
+    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
+        return 0;
+
+    pthread_mutex_lock(&p->mutex);
+
+    ret = update_context_from_user(p->avctx, user_avctx);
+    if (ret) {
+        pthread_mutex_unlock(&p->mutex);
+        return ret;
+    }
+    atomic_store_explicit(&p->debug_threads,
+                          (p->avctx->debug & FF_DEBUG_THREADS) != 0,
+                          memory_order_relaxed);
+
+    if (prev_thread) {
+        int err;
+        if (atomic_load(&prev_thread->state) == STATE_SETTING_UP) {
+            pthread_mutex_lock(&prev_thread->progress_mutex);
+            while (atomic_load(&prev_thread->state) == STATE_SETTING_UP)
+                pthread_cond_wait(&prev_thread->progress_cond, &prev_thread->progress_mutex);
+            pthread_mutex_unlock(&prev_thread->progress_mutex);
+        }
+
+        err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
+        if (err) {
+            pthread_mutex_unlock(&p->mutex);
+            return err;
+        }
+    }
+
+    /* transfer the stashed hwaccel state, if any */
+    av_assert0(!p->avctx->hwaccel);
+    FFSWAP(const AVHWAccel*, p->avctx->hwaccel,                     fctx->stash_hwaccel);
+    FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
+    FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
+
+    av_packet_unref(p->avpkt);
+    ret = av_packet_ref(p->avpkt, avpkt);
+    if (ret < 0) {
+        pthread_mutex_unlock(&p->mutex);
+        av_log(p->avctx, AV_LOG_ERROR, "av_packet_ref() failed in submit_packet()\n");
+        return ret;
+    }
+
+    atomic_store(&p->state, STATE_SETTING_UP);
+    pthread_cond_signal(&p->input_cond);
+    pthread_mutex_unlock(&p->mutex);
+
+    fctx->prev_thread = p;
+    fctx->next_decoding++;
+
+    return 0;
+}
+
+int ff_thread_decode_frame(AVCodecContext *avctx,
+                           AVFrame *picture, int *got_picture_ptr,
+                           AVPacket *avpkt)
+{
+    FrameThreadContext *fctx = avctx->internal->thread_ctx;
+    int finished = fctx->next_finished;
+    PerThreadContext *p;
+    int err;
+
+    /* release the async lock, permitting blocked hwaccel threads to
+     * go forward while we are in this function */
+    async_unlock(fctx);
+
+    /*
+     * Submit a packet to the next decoding thread.
+     */
+
+    p = &fctx->threads[fctx->next_decoding];
+    err = submit_packet(p, avctx, avpkt);
+    if (err)
+        goto finish;
+
+    /*
+     * If we're still receiving the initial packets, don't return a frame.
+     */
+
+    if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1)))
+        fctx->delaying = 0;
+
+    if (fctx->delaying) {
+        *got_picture_ptr=0;
+        if (avpkt->size) {
+            err = avpkt->size;
+            goto finish;
+        }
+    }
+
+    /*
+     * Return the next available frame from the oldest thread.
+     * If we're at the end of the stream, then we have to skip threads that
+     * didn't output a frame/error, because we don't want to accidentally signal
+     * EOF (avpkt->size == 0 && *got_picture_ptr == 0 && err >= 0).
+     */
+
+    do {
+        p = &fctx->threads[finished++];
+
+        if (atomic_load(&p->state) != STATE_INPUT_READY) {
+            pthread_mutex_lock(&p->progress_mutex);
+            while (atomic_load_explicit(&p->state, memory_order_relaxed) != STATE_INPUT_READY)
+                pthread_cond_wait(&p->output_cond, &p->progress_mutex);
+            pthread_mutex_unlock(&p->progress_mutex);
+        }
+
+        av_frame_move_ref(picture, p->frame);
+        *got_picture_ptr = p->got_frame;
+        picture->pkt_dts = p->avpkt->dts;
+        err = p->result;
+
+        /*
+         * A later call with avkpt->size == 0 may loop over all threads,
+         * including this one, searching for a frame/error to return before being
+         * stopped by the "finished != fctx->next_finished" condition.
+         * Make sure we don't mistakenly return the same frame/error again.
+         */
+        p->got_frame = 0;
+        p->result = 0;
+
+        if (finished >= avctx->thread_count) finished = 0;
+    } while (!avpkt->size && !*got_picture_ptr && err >= 0 && finished != fctx->next_finished);
+
+    update_context_from_thread(avctx, p->avctx, 1);
+
+    if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0;
+
+    fctx->next_finished = finished;
+
+    /* return the size of the consumed packet if no error occurred */
+    if (err >= 0)
+        err = avpkt->size;
+finish:
+    async_lock(fctx);
+    return err;
+}
+
+void ff_thread_report_progress(ThreadFrame *f, int n, int field)
+{
+    PerThreadContext *p;
+    atomic_int *progress = f->progress ? (atomic_int*)f->progress->data : NULL;
+
+    if (!progress ||
+        atomic_load_explicit(&progress[field], memory_order_relaxed) >= n)
+        return;
+
+    p = f->owner[field]->internal->thread_ctx;
+
+    if (atomic_load_explicit(&p->debug_threads, memory_order_relaxed))
+        av_log(f->owner[field], AV_LOG_DEBUG,
+               "%p finished %d field %d\n", progress, n, field);
+
+    pthread_mutex_lock(&p->progress_mutex);
+
+    atomic_store_explicit(&progress[field], n, memory_order_release);
+
+    pthread_cond_broadcast(&p->progress_cond);
+    pthread_mutex_unlock(&p->progress_mutex);
+}
+
+void ff_thread_await_progress(const ThreadFrame *f, int n, int field)
+{
+    PerThreadContext *p;
+    atomic_int *progress = f->progress ? (atomic_int*)f->progress->data : NULL;
+
+    if (!progress ||
+        atomic_load_explicit(&progress[field], memory_order_acquire) >= n)
+        return;
+
+    p = f->owner[field]->internal->thread_ctx;
+
+    if (atomic_load_explicit(&p->debug_threads, memory_order_relaxed))
+        av_log(f->owner[field], AV_LOG_DEBUG,
+               "thread awaiting %d field %d from %p\n", n, field, progress);
+
+    pthread_mutex_lock(&p->progress_mutex);
+    while (atomic_load_explicit(&progress[field], memory_order_relaxed) < n)
+        pthread_cond_wait(&p->progress_cond, &p->progress_mutex);
+    pthread_mutex_unlock(&p->progress_mutex);
+}
+
+void ff_thread_finish_setup(AVCodecContext *avctx) {
+    PerThreadContext *p = avctx->internal->thread_ctx;
+
+    if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return;
+
+    if (avctx->hwaccel && !p->hwaccel_serializing) {
+        pthread_mutex_lock(&p->parent->hwaccel_mutex);
+        p->hwaccel_serializing = 1;
+    }
+
+    /* this assumes that no hwaccel calls happen before ff_thread_finish_setup() */
+    if (avctx->hwaccel &&
+        !(avctx->hwaccel->caps_internal & HWACCEL_CAP_ASYNC_SAFE)) {
+        p->async_serializing = 1;
+
+        async_lock(p->parent);
+    }
+
+    /* save hwaccel state for passing to the next thread;
+     * this is done here so that this worker thread can wipe its own hwaccel
+     * state after decoding, without requiring synchronization */
+    av_assert0(!p->parent->stash_hwaccel);
+    p->parent->stash_hwaccel         = avctx->hwaccel;
+    p->parent->stash_hwaccel_context = avctx->hwaccel_context;
+    p->parent->stash_hwaccel_priv    = avctx->internal->hwaccel_priv_data;
+
+    pthread_mutex_lock(&p->progress_mutex);
+    if(atomic_load(&p->state) == STATE_SETUP_FINISHED){
+        av_log(avctx, AV_LOG_WARNING, "Multiple ff_thread_finish_setup() calls\n");
+    }
+
+    atomic_store(&p->state, STATE_SETUP_FINISHED);
+
+    pthread_cond_broadcast(&p->progress_cond);
+    pthread_mutex_unlock(&p->progress_mutex);
+}
+
+/// Waits for all threads to finish.
+static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count)
+{
+    int i;
+
+    async_unlock(fctx);
+
+    for (i = 0; i < thread_count; i++) {
+        PerThreadContext *p = &fctx->threads[i];
+
+        if (atomic_load(&p->state) != STATE_INPUT_READY) {
+            pthread_mutex_lock(&p->progress_mutex);
+            while (atomic_load(&p->state) != STATE_INPUT_READY)
+                pthread_cond_wait(&p->output_cond, &p->progress_mutex);
+            pthread_mutex_unlock(&p->progress_mutex);
+        }
+        p->got_frame = 0;
+    }
+
+    async_lock(fctx);
+}
+
+#define OFF(member) offsetof(FrameThreadContext, member)
+DEFINE_OFFSET_ARRAY(FrameThreadContext, thread_ctx, pthread_init_cnt,
+                    (OFF(buffer_mutex), OFF(hwaccel_mutex), OFF(async_mutex)),
+                    (OFF(async_cond)));
+#undef OFF
+
+#define OFF(member) offsetof(PerThreadContext, member)
+DEFINE_OFFSET_ARRAY(PerThreadContext, per_thread, pthread_init_cnt,
+                    (OFF(progress_mutex), OFF(mutex)),
+                    (OFF(input_cond), OFF(progress_cond), OFF(output_cond)));
+#undef OFF
+
+void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
+{
+    FrameThreadContext *fctx = avctx->internal->thread_ctx;
+    const FFCodec *codec = ffcodec(avctx->codec);
+    int i;
+
+    park_frame_worker_threads(fctx, thread_count);
+
+    for (i = 0; i < thread_count; i++) {
+        PerThreadContext *p = &fctx->threads[i];
+        AVCodecContext *ctx = p->avctx;
+
+        if (ctx->internal) {
+            if (p->thread_init == INITIALIZED) {
+                pthread_mutex_lock(&p->mutex);
+                p->die = 1;
+                pthread_cond_signal(&p->input_cond);
+                pthread_mutex_unlock(&p->mutex);
+
+                pthread_join(p->thread, NULL);
+            }
+            if (codec->close && p->thread_init != UNINITIALIZED)
+                codec->close(ctx);
+
+            if (ctx->priv_data) {
+                if (codec->p.priv_class)
+                    av_opt_free(ctx->priv_data);
+                av_freep(&ctx->priv_data);
+            }
+
+            av_freep(&ctx->slice_offset);
+
+            av_buffer_unref(&ctx->internal->pool);
+            av_packet_free(&ctx->internal->last_pkt_props);
+            av_freep(&ctx->internal);
+            av_buffer_unref(&ctx->hw_frames_ctx);
+        }
+
+        av_frame_free(&p->frame);
+
+        ff_pthread_free(p, per_thread_offsets);
+        av_packet_free(&p->avpkt);
+
+        av_freep(&p->avctx);
+    }
+
+    av_freep(&fctx->threads);
+    ff_pthread_free(fctx, thread_ctx_offsets);
+
+    /* if we have stashed hwaccel state, move it to the user-facing context,
+     * so it will be freed in avcodec_close() */
+    av_assert0(!avctx->hwaccel);
+    FFSWAP(const AVHWAccel*, avctx->hwaccel,                     fctx->stash_hwaccel);
+    FFSWAP(void*,            avctx->hwaccel_context,             fctx->stash_hwaccel_context);
+    FFSWAP(void*,            avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
+
+    av_freep(&avctx->internal->thread_ctx);
+}
+
+static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
+                               FrameThreadContext *fctx, AVCodecContext *avctx,
+                               const FFCodec *codec, int first)
+{
+    AVCodecContext *copy;
+    int err;
+
+    atomic_init(&p->state, STATE_INPUT_READY);
+
+    copy = av_memdup(avctx, sizeof(*avctx));
+    if (!copy)
+        return AVERROR(ENOMEM);
+    copy->priv_data = NULL;
+
+    /* From now on, this PerThreadContext will be cleaned up by
+     * ff_frame_thread_free in case of errors. */
+    (*threads_to_free)++;
+
+    p->parent = fctx;
+    p->avctx  = copy;
+
+    copy->internal = av_mallocz(sizeof(*copy->internal));
+    if (!copy->internal)
+        return AVERROR(ENOMEM);
+    copy->internal->thread_ctx = p;
+
+    copy->delay = avctx->delay;
+
+    if (codec->priv_data_size) {
+        copy->priv_data = av_mallocz(codec->priv_data_size);
+        if (!copy->priv_data)
+            return AVERROR(ENOMEM);
+
+        if (codec->p.priv_class) {
+            *(const AVClass **)copy->priv_data = codec->p.priv_class;
+            err = av_opt_copy(copy->priv_data, avctx->priv_data);
+            if (err < 0)
+                return err;
+        }
+    }
+
+    err = ff_pthread_init(p, per_thread_offsets);
+    if (err < 0)
+        return err;
+
+    if (!(p->frame = av_frame_alloc()) ||
+        !(p->avpkt = av_packet_alloc()))
+        return AVERROR(ENOMEM);
+
+    if (!first)
+        copy->internal->is_copy = 1;
+
+    copy->internal->last_pkt_props = av_packet_alloc();
+    if (!copy->internal->last_pkt_props)
+        return AVERROR(ENOMEM);
+
+    if (codec->init) {
+        err = codec->init(copy);
+        if (err < 0) {
+            if (codec->caps_internal & FF_CODEC_CAP_INIT_CLEANUP)
+                p->thread_init = NEEDS_CLOSE;
+            return err;
+        }
+    }
+    p->thread_init = NEEDS_CLOSE;
+
+    if (first)
+        update_context_from_thread(avctx, copy, 1);
+
+    atomic_init(&p->debug_threads, (copy->debug & FF_DEBUG_THREADS) != 0);
+
+    err = AVERROR(pthread_create(&p->thread, NULL, frame_worker_thread, p));
+    if (err < 0)
+        return err;
+    p->thread_init = INITIALIZED;
+
+    return 0;
+}
+
+int ff_frame_thread_init(AVCodecContext *avctx)
+{
+    int thread_count = avctx->thread_count;
+    const FFCodec *codec = ffcodec(avctx->codec);
+    FrameThreadContext *fctx;
+    int err, i = 0;
+
+    if (!thread_count) {
+        int nb_cpus = av_cpu_count();
+        // use number of cores + 1 as thread count if there is more than one
+        if (nb_cpus > 1)
+            thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS);
+        else
+            thread_count = avctx->thread_count = 1;
+    }
+
+    if (thread_count <= 1) {
+        avctx->active_thread_type = 0;
+        return 0;
+    }
+
+    avctx->internal->thread_ctx = fctx = av_mallocz(sizeof(FrameThreadContext));
+    if (!fctx)
+        return AVERROR(ENOMEM);
+
+    err = ff_pthread_init(fctx, thread_ctx_offsets);
+    if (err < 0) {
+        ff_pthread_free(fctx, thread_ctx_offsets);
+        av_freep(&avctx->internal->thread_ctx);
+        return err;
+    }
+
+    fctx->async_lock = 1;
+    fctx->delaying = 1;
+
+    if (codec->p.type == AVMEDIA_TYPE_VIDEO)
+        avctx->delay = avctx->thread_count - 1;
+
+    fctx->threads = av_calloc(thread_count, sizeof(*fctx->threads));
+    if (!fctx->threads) {
+        err = AVERROR(ENOMEM);
+        goto error;
+    }
+
+    for (; i < thread_count; ) {
+        PerThreadContext *p  = &fctx->threads[i];
+        int first = !i;
+
+        err = init_thread(p, &i, fctx, avctx, codec, first);
+        if (err < 0)
+            goto error;
+    }
+
+    return 0;
+
+error:
+    ff_frame_thread_free(avctx, i);
+    return err;
+}
+
+void ff_thread_flush(AVCodecContext *avctx)
+{
+    int i;
+    FrameThreadContext *fctx = avctx->internal->thread_ctx;
+
+    if (!fctx) return;
+
+    park_frame_worker_threads(fctx, avctx->thread_count);
+    if (fctx->prev_thread) {
+        if (fctx->prev_thread != &fctx->threads[0])
+            update_context_from_thread(fctx->threads[0].avctx, fctx->prev_thread->avctx, 0);
+    }
+
+    fctx->next_decoding = fctx->next_finished = 0;
+    fctx->delaying = 1;
+    fctx->prev_thread = NULL;
+    for (i = 0; i < avctx->thread_count; i++) {
+        PerThreadContext *p = &fctx->threads[i];
+        // Make sure decode flush calls with size=0 won't return old frames
+        p->got_frame = 0;
+        av_frame_unref(p->frame);
+        p->result = 0;
+
+        if (ffcodec(avctx->codec)->flush)
+            ffcodec(avctx->codec)->flush(p->avctx);
+    }
+}
+
+int ff_thread_can_start_frame(AVCodecContext *avctx)
+{
+    PerThreadContext *p = avctx->internal->thread_ctx;
+
+    if ((avctx->active_thread_type&FF_THREAD_FRAME) && atomic_load(&p->state) != STATE_SETTING_UP &&
+        ffcodec(avctx->codec)->update_thread_context) {
+        return 0;
+    }
+
+    return 1;
+}
+
+static int thread_get_buffer_internal(AVCodecContext *avctx, AVFrame *f, int flags)
+{
+    PerThreadContext *p;
+    int err;
+
+    if (!(avctx->active_thread_type & FF_THREAD_FRAME))
+        return ff_get_buffer(avctx, f, flags);
+
+    p = avctx->internal->thread_ctx;
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (atomic_load(&p->state) != STATE_SETTING_UP &&
+        ffcodec(avctx->codec)->update_thread_context) {
+FF_ENABLE_DEPRECATION_WARNINGS
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() cannot be called after ff_thread_finish_setup()\n");
+        return -1;
+    }
+
+    pthread_mutex_lock(&p->parent->buffer_mutex);
+    err = ff_get_buffer(avctx, f, flags);
+
+    pthread_mutex_unlock(&p->parent->buffer_mutex);
+
+    return err;
+}
+
+int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f, int flags)
+{
+    int ret = thread_get_buffer_internal(avctx, f, flags);
+    if (ret < 0)
+        av_log(avctx, AV_LOG_ERROR, "thread_get_buffer() failed\n");
+    return ret;
+}
+
+int ff_thread_get_ext_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags)
+{
+    int ret;
+
+    f->owner[0] = f->owner[1] = avctx;
+    /* Hint: It is possible for this function to be called with codecs
+     * that don't support frame threading at all, namely in case
+     * a frame-threaded decoder shares code with codecs that are not.
+     * This currently affects non-MPEG-4 mpegvideo codecs and and VP7.
+     * The following check will always be true for them. */
+    if (!(avctx->active_thread_type & FF_THREAD_FRAME))
+        return ff_get_buffer(avctx, f->f, flags);
+
+    if (ffcodec(avctx->codec)->caps_internal & FF_CODEC_CAP_ALLOCATE_PROGRESS) {
+        atomic_int *progress;
+        f->progress = av_buffer_alloc(2 * sizeof(*progress));
+        if (!f->progress) {
+            return AVERROR(ENOMEM);
+        }
+        progress = (atomic_int*)f->progress->data;
+
+        atomic_init(&progress[0], -1);
+        atomic_init(&progress[1], -1);
+    }
+
+    ret = ff_thread_get_buffer(avctx, f->f, flags);
+    if (ret)
+        av_buffer_unref(&f->progress);
+    return ret;
+}
+
+void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f)
+{
+    if (!f)
+        return;
+
+    if (avctx->debug & FF_DEBUG_BUFFERS)
+        av_log(avctx, AV_LOG_DEBUG, "thread_release_buffer called on pic %p\n", f);
+
+    av_frame_unref(f);
+}
+
+void ff_thread_release_ext_buffer(AVCodecContext *avctx, ThreadFrame *f)
+{
+    av_buffer_unref(&f->progress);
+    f->owner[0] = f->owner[1] = NULL;
+    ff_thread_release_buffer(avctx, f->f);
+}
diff --git a/media/ffvpx/libavcodec/pthread_internal.h b/media/ffvpx/libavcodec/pthread_internal.h
new file mode 100644
index 0000000000..d0b6a7a673
--- /dev/null
+++ b/media/ffvpx/libavcodec/pthread_internal.h
@@ -0,0 +1,66 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PTHREAD_INTERNAL_H
+#define AVCODEC_PTHREAD_INTERNAL_H
+
+#include "avcodec.h"
+
+/* H.264 slice threading seems to be buggy with more than 16 threads,
+ * limit the number of threads to 16 for automatic detection */
+#define MAX_AUTO_THREADS 16
+
+int ff_slice_thread_init(AVCodecContext *avctx);
+void ff_slice_thread_free(AVCodecContext *avctx);
+
+int ff_frame_thread_init(AVCodecContext *avctx);
+void ff_frame_thread_free(AVCodecContext *avctx, int thread_count);
+
+#define THREAD_SENTINEL 0 // This forbids putting a mutex/condition variable at the front.
+/**
+ * Initialize/destroy a list of mutexes/conditions contained in a structure.
+ * The positions of these mutexes/conditions in the structure are given by
+ * their offsets. Because it is undefined behaviour to destroy
+ * an uninitialized mutex/condition, ff_pthread_init() stores the number
+ * of successfully initialized mutexes and conditions in the object itself
+ * and ff_pthread_free() uses this number to destroy exactly the mutexes and
+ * condition variables that have been successfully initialized.
+ *
+ * @param     obj     The object containing the mutexes/conditions.
+ * @param[in] offsets An array of offsets. Its first member gives the offset
+ *                    of the variable that contains the count of successfully
+ *                    initialized mutexes/condition variables; said variable
+ *                    must be an unsigned int. Two arrays of offsets, each
+ *                    delimited by a THREAD_SENTINEL follow. The first
+ *                    contains the offsets of all the mutexes, the second
+ *                    contains the offsets of all the condition variables.
+ */
+int  ff_pthread_init(void *obj, const unsigned offsets[]);
+void ff_pthread_free(void *obj, const unsigned offsets[]);
+
+/**
+ * Macros to help creating the above lists. mutexes and conds need
+ * to be parentheses-enclosed lists of offsets in the containing structure.
+ */
+#define OFFSET_ARRAY(...) __VA_ARGS__, THREAD_SENTINEL
+#define DEFINE_OFFSET_ARRAY(type, name, cnt_variable, mutexes, conds)         \
+static const unsigned name ## _offsets[] = { offsetof(type, cnt_variable),    \
+                                             OFFSET_ARRAY mutexes,            \
+                                             OFFSET_ARRAY conds }
+
+#endif // AVCODEC_PTHREAD_INTERNAL_H
diff --git a/media/ffvpx/libavcodec/pthread_slice.c b/media/ffvpx/libavcodec/pthread_slice.c
new file mode 100644
index 0000000000..a4d31c6f4d
--- /dev/null
+++ b/media/ffvpx/libavcodec/pthread_slice.c
@@ -0,0 +1,260 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Slice multithreading support functions
+ * @see doc/multithreading.txt
+ */
+
+#include "config.h"
+
+#include "avcodec.h"
+#include "codec_internal.h"
+#include "internal.h"
+#include "pthread_internal.h"
+#include "thread.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/thread.h"
+#include "libavutil/slicethread.h"
+
+typedef int (action_func)(AVCodecContext *c, void *arg);
+typedef int (action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr);
+typedef int (main_func)(AVCodecContext *c);
+
+typedef struct Progress {
+    pthread_cond_t  cond;
+    pthread_mutex_t mutex;
+} Progress;
+
+typedef struct SliceThreadContext {
+    AVSliceThread *thread;
+    action_func *func;
+    action_func2 *func2;
+    main_func *mainfunc;
+    void *args;
+    int *rets;
+    int job_size;
+
+    int *entries;
+    int entries_count;
+    int thread_count;
+    Progress *progress;
+} SliceThreadContext;
+
+static void main_function(void *priv) {
+    AVCodecContext *avctx = priv;
+    SliceThreadContext *c = avctx->internal->thread_ctx;
+    c->mainfunc(avctx);
+}
+
+static void worker_func(void *priv, int jobnr, int threadnr, int nb_jobs, int nb_threads)
+{
+    AVCodecContext *avctx = priv;
+    SliceThreadContext *c = avctx->internal->thread_ctx;
+    int ret;
+
+    ret = c->func ? c->func(avctx, (char *)c->args + c->job_size * jobnr)
+                  : c->func2(avctx, c->args, jobnr, threadnr);
+    if (c->rets)
+        c->rets[jobnr] = ret;
+}
+
+void ff_slice_thread_free(AVCodecContext *avctx)
+{
+    SliceThreadContext *c = avctx->internal->thread_ctx;
+    int i;
+
+    avpriv_slicethread_free(&c->thread);
+
+    for (i = 0; i < c->thread_count; i++) {
+        Progress *const progress = &c->progress[i];
+        pthread_mutex_destroy(&progress->mutex);
+        pthread_cond_destroy(&progress->cond);
+    }
+
+    av_freep(&c->entries);
+    av_freep(&c->progress);
+    av_freep(&avctx->internal->thread_ctx);
+}
+
+static int thread_execute(AVCodecContext *avctx, action_func* func, void *arg, int *ret, int job_count, int job_size)
+{
+    SliceThreadContext *c = avctx->internal->thread_ctx;
+
+    if (!(avctx->active_thread_type&FF_THREAD_SLICE) || avctx->thread_count <= 1)
+        return avcodec_default_execute(avctx, func, arg, ret, job_count, job_size);
+
+    if (job_count <= 0)
+        return 0;
+
+    c->job_size = job_size;
+    c->args = arg;
+    c->func = func;
+    c->rets = ret;
+
+    avpriv_slicethread_execute(c->thread, job_count, !!c->mainfunc  );
+    return 0;
+}
+
+static int thread_execute2(AVCodecContext *avctx, action_func2* func2, void *arg, int *ret, int job_count)
+{
+    SliceThreadContext *c = avctx->internal->thread_ctx;
+    c->func2 = func2;
+    return thread_execute(avctx, NULL, arg, ret, job_count, 0);
+}
+
+int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx, action_func2* func2, main_func *mainfunc, void *arg, int *ret, int job_count)
+{
+    SliceThreadContext *c = avctx->internal->thread_ctx;
+    c->func2 = func2;
+    c->mainfunc = mainfunc;
+    return thread_execute(avctx, NULL, arg, ret, job_count, 0);
+}
+
+int ff_slice_thread_init(AVCodecContext *avctx)
+{
+    SliceThreadContext *c;
+    int thread_count = avctx->thread_count;
+    void (*mainfunc)(void *);
+
+    // We cannot do this in the encoder init as the threads are created before
+    if (av_codec_is_encoder(avctx->codec) &&
+        avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
+        avctx->height > 2800)
+        thread_count = avctx->thread_count = 1;
+
+    if (!thread_count) {
+        int nb_cpus = av_cpu_count();
+        if  (avctx->height)
+            nb_cpus = FFMIN(nb_cpus, (avctx->height+15)/16);
+        // use number of cores + 1 as thread count if there is more than one
+        if (nb_cpus > 1)
+            thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS);
+        else
+            thread_count = avctx->thread_count = 1;
+    }
+
+    if (thread_count <= 1) {
+        avctx->active_thread_type = 0;
+        return 0;
+    }
+
+    avctx->internal->thread_ctx = c = av_mallocz(sizeof(*c));
+    mainfunc = ffcodec(avctx->codec)->caps_internal & FF_CODEC_CAP_SLICE_THREAD_HAS_MF ? &main_function : NULL;
+    if (!c || (thread_count = avpriv_slicethread_create(&c->thread, avctx, worker_func, mainfunc, thread_count)) <= 1) {
+        if (c)
+            avpriv_slicethread_free(&c->thread);
+        av_freep(&avctx->internal->thread_ctx);
+        avctx->thread_count = 1;
+        avctx->active_thread_type = 0;
+        return 0;
+    }
+    avctx->thread_count = thread_count;
+
+    avctx->execute = thread_execute;
+    avctx->execute2 = thread_execute2;
+    return 0;
+}
+
+int av_cold ff_slice_thread_init_progress(AVCodecContext *avctx)
+{
+    SliceThreadContext *const p = avctx->internal->thread_ctx;
+    int err, i = 0, thread_count = avctx->thread_count;
+
+    p->progress = av_calloc(thread_count, sizeof(*p->progress));
+    if (!p->progress) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (; i < thread_count; i++) {
+        Progress *const progress = &p->progress[i];
+        err = pthread_mutex_init(&progress->mutex, NULL);
+        if (err) {
+            err = AVERROR(err);
+            goto fail;
+        }
+        err = pthread_cond_init (&progress->cond,  NULL);
+        if (err) {
+            err = AVERROR(err);
+            pthread_mutex_destroy(&progress->mutex);
+            goto fail;
+        }
+    }
+    err = 0;
+fail:
+    p->thread_count = i;
+    return err;
+}
+
+void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, int n)
+{
+    SliceThreadContext *p = avctx->internal->thread_ctx;
+    Progress *const progress = &p->progress[thread];
+    int *entries = p->entries;
+
+    pthread_mutex_lock(&progress->mutex);
+    entries[field] +=n;
+    pthread_cond_signal(&progress->cond);
+    pthread_mutex_unlock(&progress->mutex);
+}
+
+void ff_thread_await_progress2(AVCodecContext *avctx, int field, int thread, int shift)
+{
+    SliceThreadContext *p  = avctx->internal->thread_ctx;
+    Progress *progress;
+    int *entries      = p->entries;
+
+    if (!entries || !field) return;
+
+    thread = thread ? thread - 1 : p->thread_count - 1;
+    progress = &p->progress[thread];
+
+    pthread_mutex_lock(&progress->mutex);
+    while ((entries[field - 1] - entries[field]) < shift){
+        pthread_cond_wait(&progress->cond, &progress->mutex);
+    }
+    pthread_mutex_unlock(&progress->mutex);
+}
+
+int ff_slice_thread_allocz_entries(AVCodecContext *avctx, int count)
+{
+    if (avctx->active_thread_type & FF_THREAD_SLICE)  {
+        SliceThreadContext *p = avctx->internal->thread_ctx;
+
+        if (p->entries_count == count) {
+            memset(p->entries, 0, p->entries_count * sizeof(*p->entries));
+            return 0;
+        }
+        av_freep(&p->entries);
+
+        p->entries       = av_calloc(count, sizeof(*p->entries));
+        if (!p->entries) {
+            p->entries_count = 0;
+            return AVERROR(ENOMEM);
+        }
+        p->entries_count  = count;
+    }
+
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/put_bits.h b/media/ffvpx/libavcodec/put_bits.h
new file mode 100644
index 0000000000..4561dc131a
--- /dev/null
+++ b/media/ffvpx/libavcodec/put_bits.h
@@ -0,0 +1,428 @@
+/*
+ * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * bitstream writer API
+ */
+
+#ifndef AVCODEC_PUT_BITS_H
+#define AVCODEC_PUT_BITS_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "config.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+
+#if ARCH_X86_64
+// TODO: Benchmark and optionally enable on other 64-bit architectures.
+typedef uint64_t BitBuf;
+#define AV_WBBUF AV_WB64
+#define AV_WLBUF AV_WL64
+#else
+typedef uint32_t BitBuf;
+#define AV_WBBUF AV_WB32
+#define AV_WLBUF AV_WL32
+#endif
+
+static const int BUF_BITS = 8 * sizeof(BitBuf);
+
+typedef struct PutBitContext {
+    BitBuf bit_buf;
+    int bit_left;
+    uint8_t *buf, *buf_ptr, *buf_end;
+} PutBitContext;
+
+/**
+ * Initialize the PutBitContext s.
+ *
+ * @param buffer the buffer where to put bits
+ * @param buffer_size the size in bytes of buffer
+ */
+static inline void init_put_bits(PutBitContext *s, uint8_t *buffer,
+                                 int buffer_size)
+{
+    if (buffer_size < 0) {
+        buffer_size = 0;
+        buffer      = NULL;
+    }
+
+    s->buf          = buffer;
+    s->buf_end      = s->buf + buffer_size;
+    s->buf_ptr      = s->buf;
+    s->bit_left     = BUF_BITS;
+    s->bit_buf      = 0;
+}
+
+/**
+ * @return the total number of bits written to the bitstream.
+ */
+static inline int put_bits_count(PutBitContext *s)
+{
+    return (s->buf_ptr - s->buf) * 8 + BUF_BITS - s->bit_left;
+}
+
+/**
+ * @return the number of bytes output so far; may only be called
+ *         when the PutBitContext is freshly initialized or flushed.
+ */
+static inline int put_bytes_output(const PutBitContext *s)
+{
+    av_assert2(s->bit_left == BUF_BITS);
+    return s->buf_ptr - s->buf;
+}
+
+/**
+ * @param  round_up  When set, the number of bits written so far will be
+ *                   rounded up to the next byte.
+ * @return the number of bytes output so far.
+ */
+static inline int put_bytes_count(const PutBitContext *s, int round_up)
+{
+    return s->buf_ptr - s->buf + ((BUF_BITS - s->bit_left + (round_up ? 7 : 0)) >> 3);
+}
+
+/**
+ * Rebase the bit writer onto a reallocated buffer.
+ *
+ * @param buffer the buffer where to put bits
+ * @param buffer_size the size in bytes of buffer,
+ *                    must be large enough to hold everything written so far
+ */
+static inline void rebase_put_bits(PutBitContext *s, uint8_t *buffer,
+                                   int buffer_size)
+{
+    av_assert0(8*buffer_size >= put_bits_count(s));
+
+    s->buf_end = buffer + buffer_size;
+    s->buf_ptr = buffer + (s->buf_ptr - s->buf);
+    s->buf     = buffer;
+}
+
+/**
+ * @return the number of bits available in the bitstream.
+ */
+static inline int put_bits_left(PutBitContext* s)
+{
+    return (s->buf_end - s->buf_ptr) * 8 - BUF_BITS + s->bit_left;
+}
+
+/**
+ * @param  round_up  When set, the number of bits written will be
+ *                   rounded up to the next byte.
+ * @return the number of bytes left.
+ */
+static inline int put_bytes_left(const PutBitContext *s, int round_up)
+{
+    return s->buf_end - s->buf_ptr - ((BUF_BITS - s->bit_left + (round_up ? 7 : 0)) >> 3);
+}
+
+/**
+ * Pad the end of the output stream with zeros.
+ */
+static inline void flush_put_bits(PutBitContext *s)
+{
+#ifndef BITSTREAM_WRITER_LE
+    if (s->bit_left < BUF_BITS)
+        s->bit_buf <<= s->bit_left;
+#endif
+    while (s->bit_left < BUF_BITS) {
+        av_assert0(s->buf_ptr < s->buf_end);
+#ifdef BITSTREAM_WRITER_LE
+        *s->buf_ptr++ = s->bit_buf;
+        s->bit_buf  >>= 8;
+#else
+        *s->buf_ptr++ = s->bit_buf >> (BUF_BITS - 8);
+        s->bit_buf  <<= 8;
+#endif
+        s->bit_left  += 8;
+    }
+    s->bit_left = BUF_BITS;
+    s->bit_buf  = 0;
+}
+
+static inline void flush_put_bits_le(PutBitContext *s)
+{
+    while (s->bit_left < BUF_BITS) {
+        av_assert0(s->buf_ptr < s->buf_end);
+        *s->buf_ptr++ = s->bit_buf;
+        s->bit_buf  >>= 8;
+        s->bit_left  += 8;
+    }
+    s->bit_left = BUF_BITS;
+    s->bit_buf  = 0;
+}
+
+#ifdef BITSTREAM_WRITER_LE
+#define ff_put_string ff_put_string_unsupported_here
+#define ff_copy_bits ff_copy_bits_unsupported_here
+#else
+
+/**
+ * Put the string string in the bitstream.
+ *
+ * @param terminate_string 0-terminates the written string if value is 1
+ */
+void ff_put_string(PutBitContext *pb, const char *string,
+                       int terminate_string);
+
+/**
+ * Copy the content of src to the bitstream.
+ *
+ * @param length the number of bits of src to copy
+ */
+void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length);
+#endif
+
+static inline void put_bits_no_assert(PutBitContext *s, int n, BitBuf value)
+{
+    BitBuf bit_buf;
+    int bit_left;
+
+    bit_buf  = s->bit_buf;
+    bit_left = s->bit_left;
+
+    /* XXX: optimize */
+#ifdef BITSTREAM_WRITER_LE
+    bit_buf |= value << (BUF_BITS - bit_left);
+    if (n >= bit_left) {
+        if (s->buf_end - s->buf_ptr >= sizeof(BitBuf)) {
+            AV_WLBUF(s->buf_ptr, bit_buf);
+            s->buf_ptr += sizeof(BitBuf);
+        } else {
+            av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n");
+            av_assert2(0);
+        }
+        bit_buf     = value >> bit_left;
+        bit_left   += BUF_BITS;
+    }
+    bit_left -= n;
+#else
+    if (n < bit_left) {
+        bit_buf     = (bit_buf << n) | value;
+        bit_left   -= n;
+    } else {
+        bit_buf   <<= bit_left;
+        bit_buf    |= value >> (n - bit_left);
+        if (s->buf_end - s->buf_ptr >= sizeof(BitBuf)) {
+            AV_WBBUF(s->buf_ptr, bit_buf);
+            s->buf_ptr += sizeof(BitBuf);
+        } else {
+            av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n");
+            av_assert2(0);
+        }
+        bit_left   += BUF_BITS - n;
+        bit_buf     = value;
+    }
+#endif
+
+    s->bit_buf  = bit_buf;
+    s->bit_left = bit_left;
+}
+
+/**
+ * Write up to 31 bits into a bitstream.
+ * Use put_bits32 to write 32 bits.
+ */
+static inline void put_bits(PutBitContext *s, int n, BitBuf value)
+{
+    av_assert2(n <= 31 && value < (1UL << n));
+    put_bits_no_assert(s, n, value);
+}
+
+static inline void put_bits_le(PutBitContext *s, int n, BitBuf value)
+{
+    BitBuf bit_buf;
+    int bit_left;
+
+    av_assert2(n <= 31 && value < (1UL << n));
+
+    bit_buf  = s->bit_buf;
+    bit_left = s->bit_left;
+
+    bit_buf |= value << (BUF_BITS - bit_left);
+    if (n >= bit_left) {
+        if (s->buf_end - s->buf_ptr >= sizeof(BitBuf)) {
+            AV_WLBUF(s->buf_ptr, bit_buf);
+            s->buf_ptr += sizeof(BitBuf);
+        } else {
+            av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n");
+            av_assert2(0);
+        }
+        bit_buf     = value >> bit_left;
+        bit_left   += BUF_BITS;
+    }
+    bit_left -= n;
+
+    s->bit_buf  = bit_buf;
+    s->bit_left = bit_left;
+}
+
+static inline void put_sbits(PutBitContext *pb, int n, int32_t value)
+{
+    av_assert2(n >= 0 && n <= 31);
+
+    put_bits(pb, n, av_mod_uintp2(value, n));
+}
+
+/**
+ * Write exactly 32 bits into a bitstream.
+ */
+static void av_unused put_bits32(PutBitContext *s, uint32_t value)
+{
+    BitBuf bit_buf;
+    int bit_left;
+
+    if (BUF_BITS > 32) {
+        put_bits_no_assert(s, 32, value);
+        return;
+    }
+
+    bit_buf  = s->bit_buf;
+    bit_left = s->bit_left;
+
+#ifdef BITSTREAM_WRITER_LE
+    bit_buf |= (BitBuf)value << (BUF_BITS - bit_left);
+    if (s->buf_end - s->buf_ptr >= sizeof(BitBuf)) {
+        AV_WLBUF(s->buf_ptr, bit_buf);
+        s->buf_ptr += sizeof(BitBuf);
+    } else {
+        av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n");
+        av_assert2(0);
+    }
+    bit_buf     = (uint64_t)value >> bit_left;
+#else
+    bit_buf     = (uint64_t)bit_buf << bit_left;
+    bit_buf    |= (BitBuf)value >> (BUF_BITS - bit_left);
+    if (s->buf_end - s->buf_ptr >= sizeof(BitBuf)) {
+        AV_WBBUF(s->buf_ptr, bit_buf);
+        s->buf_ptr += sizeof(BitBuf);
+    } else {
+        av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n");
+        av_assert2(0);
+    }
+    bit_buf     = value;
+#endif
+
+    s->bit_buf  = bit_buf;
+    s->bit_left = bit_left;
+}
+
+/**
+ * Write up to 64 bits into a bitstream.
+ */
+static inline void put_bits64(PutBitContext *s, int n, uint64_t value)
+{
+    av_assert2((n == 64) || (n < 64 && value < (UINT64_C(1) << n)));
+
+    if (n < 32)
+        put_bits(s, n, value);
+    else if (n == 32)
+        put_bits32(s, value);
+    else if (n < 64) {
+        uint32_t lo = value & 0xffffffff;
+        uint32_t hi = value >> 32;
+#ifdef BITSTREAM_WRITER_LE
+        put_bits32(s, lo);
+        put_bits(s, n - 32, hi);
+#else
+        put_bits(s, n - 32, hi);
+        put_bits32(s, lo);
+#endif
+    } else {
+        uint32_t lo = value & 0xffffffff;
+        uint32_t hi = value >> 32;
+#ifdef BITSTREAM_WRITER_LE
+        put_bits32(s, lo);
+        put_bits32(s, hi);
+#else
+        put_bits32(s, hi);
+        put_bits32(s, lo);
+#endif
+
+    }
+}
+
+static inline void put_sbits63(PutBitContext *pb, int n, int64_t value)
+{
+    av_assert2(n >= 0 && n < 64);
+
+    put_bits64(pb, n, (uint64_t)(value) & (~(UINT64_MAX << n)));
+}
+
+/**
+ * Return the pointer to the byte where the bitstream writer will put
+ * the next bit.
+ */
+static inline uint8_t *put_bits_ptr(PutBitContext *s)
+{
+    return s->buf_ptr;
+}
+
+/**
+ * Skip the given number of bytes.
+ * PutBitContext must be flushed & aligned to a byte boundary before calling this.
+ */
+static inline void skip_put_bytes(PutBitContext *s, int n)
+{
+    av_assert2((put_bits_count(s) & 7) == 0);
+    av_assert2(s->bit_left == BUF_BITS);
+    av_assert0(n <= s->buf_end - s->buf_ptr);
+    s->buf_ptr += n;
+}
+
+/**
+ * Skip the given number of bits.
+ * Must only be used if the actual values in the bitstream do not matter.
+ * If n is < 0 the behavior is undefined.
+ */
+static inline void skip_put_bits(PutBitContext *s, int n)
+{
+    unsigned bits = BUF_BITS - s->bit_left + n;
+    s->buf_ptr += sizeof(BitBuf) * (bits / BUF_BITS);
+    s->bit_left = BUF_BITS - (bits & (BUF_BITS - 1));
+}
+
+/**
+ * Change the end of the buffer.
+ *
+ * @param size the new size in bytes of the buffer where to put bits
+ */
+static inline void set_put_bits_buffer_size(PutBitContext *s, int size)
+{
+    av_assert0(size <= INT_MAX/8 - BUF_BITS);
+    s->buf_end = s->buf + size;
+}
+
+/**
+ * Pad the bitstream with zeros up to the next byte boundary.
+ */
+static inline void align_put_bits(PutBitContext *s)
+{
+    put_bits(s, s->bit_left & 7, 0);
+}
+
+#undef AV_WBBUF
+#undef AV_WLBUF
+
+#endif /* AVCODEC_PUT_BITS_H */
diff --git a/media/ffvpx/libavcodec/qpeldsp.h b/media/ffvpx/libavcodec/qpeldsp.h
new file mode 100644
index 0000000000..91019eda9c
--- /dev/null
+++ b/media/ffvpx/libavcodec/qpeldsp.h
@@ -0,0 +1,83 @@
+/*
+ * quarterpel DSP functions
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * quarterpel DSP functions
+ */
+
+#ifndef AVCODEC_QPELDSP_H
+#define AVCODEC_QPELDSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_put_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_pixels16x16_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_pixels16x16_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+
+void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
+                         int dst_stride, int src_stride1, int src_stride2,
+                         int h);
+
+#define DEF_OLD_QPEL(name)                                              \
+void ff_put_        ## name(uint8_t *dst /* align width (8 or 16) */,   \
+                            const uint8_t *src /* align 1 */,           \
+                            ptrdiff_t stride);                          \
+void ff_put_no_rnd_ ## name(uint8_t *dst /* align width (8 or 16) */,   \
+                            const uint8_t *src /* align 1 */,           \
+                            ptrdiff_t stride);                          \
+void ff_avg_        ## name(uint8_t *dst /* align width (8 or 16) */,   \
+                            const uint8_t *src /* align 1 */,           \
+                            ptrdiff_t stride);
+
+DEF_OLD_QPEL(qpel16_mc11_old_c)
+DEF_OLD_QPEL(qpel16_mc31_old_c)
+DEF_OLD_QPEL(qpel16_mc12_old_c)
+DEF_OLD_QPEL(qpel16_mc32_old_c)
+DEF_OLD_QPEL(qpel16_mc13_old_c)
+DEF_OLD_QPEL(qpel16_mc33_old_c)
+DEF_OLD_QPEL(qpel8_mc11_old_c)
+DEF_OLD_QPEL(qpel8_mc31_old_c)
+DEF_OLD_QPEL(qpel8_mc12_old_c)
+DEF_OLD_QPEL(qpel8_mc32_old_c)
+DEF_OLD_QPEL(qpel8_mc13_old_c)
+DEF_OLD_QPEL(qpel8_mc33_old_c)
+
+typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */,
+                             const uint8_t *src /* align 1 */,
+                             ptrdiff_t stride);
+
+/**
+ * quarterpel DSP context
+ */
+typedef struct QpelDSPContext {
+    qpel_mc_func put_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_qpel_pixels_tab[2][16];
+    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
+} QpelDSPContext;
+
+void ff_qpeldsp_init(QpelDSPContext *c);
+
+void ff_qpeldsp_init_x86(QpelDSPContext *c);
+void ff_qpeldsp_init_mips(QpelDSPContext *c);
+
+#endif /* AVCODEC_QPELDSP_H */
diff --git a/media/ffvpx/libavcodec/qsv_api.c b/media/ffvpx/libavcodec/qsv_api.c
new file mode 100644
index 0000000000..327ff7d813
--- /dev/null
+++ b/media/ffvpx/libavcodec/qsv_api.c
@@ -0,0 +1,42 @@
+/*
+ * Intel MediaSDK QSV public API functions
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include <stddef.h>
+
+#include "libavutil/mem.h"
+
+#if CONFIG_QSV
+#include "qsv.h"
+
+AVQSVContext *av_qsv_alloc_context(void)
+{
+    return av_mallocz(sizeof(AVQSVContext));
+}
+#else
+
+struct AVQSVContext *av_qsv_alloc_context(void);
+
+struct AVQSVContext *av_qsv_alloc_context(void)
+{
+    return NULL;
+}
+#endif
diff --git a/media/ffvpx/libavcodec/ratecontrol.h b/media/ffvpx/libavcodec/ratecontrol.h
new file mode 100644
index 0000000000..4de80fad90
--- /dev/null
+++ b/media/ffvpx/libavcodec/ratecontrol.h
@@ -0,0 +1,96 @@
+/*
+ * Ratecontrol
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RATECONTROL_H
+#define AVCODEC_RATECONTROL_H
+
+/**
+ * @file
+ * ratecontrol header.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include "libavutil/eval.h"
+
+typedef struct Predictor{
+    double coeff;
+    double count;
+    double decay;
+} Predictor;
+
+typedef struct RateControlEntry{
+    int pict_type;
+    float qscale;
+    int mv_bits;
+    int i_tex_bits;
+    int p_tex_bits;
+    int misc_bits;
+    int header_bits;
+    uint64_t expected_bits;
+    int new_pict_type;
+    float new_qscale;
+    int64_t mc_mb_var_sum;
+    int64_t mb_var_sum;
+    int i_count;
+    int skip_count;
+    int f_code;
+    int b_code;
+}RateControlEntry;
+
+/**
+ * rate control context.
+ */
+typedef struct RateControlContext{
+    int num_entries;              ///< number of RateControlEntries
+    RateControlEntry *entry;
+    double buffer_index;          ///< amount of bits in the video/audio buffer
+    Predictor pred[5];
+    double short_term_qsum;       ///< sum of recent qscales
+    double short_term_qcount;     ///< count of recent qscales
+    double pass1_rc_eq_output_sum;///< sum of the output of the rc equation, this is used for normalization
+    double pass1_wanted_bits;     ///< bits which should have been output by the pass1 code (including complexity init)
+    double last_qscale;
+    double last_qscale_for[5];    ///< last qscale for a specific pict type, used for max_diff & ipb factor stuff
+    int64_t last_mc_mb_var_sum;
+    int64_t last_mb_var_sum;
+    uint64_t i_cplx_sum[5];
+    uint64_t p_cplx_sum[5];
+    uint64_t mv_bits_sum[5];
+    uint64_t qscale_sum[5];
+    int frame_count[5];
+    int last_non_b_pict_type;
+
+    AVExpr * rc_eq_eval;
+}RateControlContext;
+
+struct MpegEncContext;
+
+/* rate control */
+int ff_rate_control_init(struct MpegEncContext *s);
+float ff_rate_estimate_qscale(struct MpegEncContext *s, int dry_run);
+void ff_write_pass1_stats(struct MpegEncContext *s);
+void ff_rate_control_uninit(struct MpegEncContext *s);
+int ff_vbv_update(struct MpegEncContext *s, int frame_size);
+void ff_get_2pass_fcode(struct MpegEncContext *s);
+
+#endif /* AVCODEC_RATECONTROL_H */
diff --git a/media/ffvpx/libavcodec/raw.c b/media/ffvpx/libavcodec/raw.c
new file mode 100644
index 0000000000..1e5b48d1e0
--- /dev/null
+++ b/media/ffvpx/libavcodec/raw.c
@@ -0,0 +1,370 @@
+/*
+ * Raw Video Codec
+ * Copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Raw Video Codec
+ */
+
+#include "libavutil/macros.h"
+#include "avcodec.h"
+#include "raw.h"
+
+static const PixelFormatTag raw_pix_fmt_tags[] = {
+    { AV_PIX_FMT_YUV420P, MKTAG('I', '4', '2', '0') }, /* Planar formats */
+    { AV_PIX_FMT_YUV420P, MKTAG('I', 'Y', 'U', 'V') },
+    { AV_PIX_FMT_YUV420P, MKTAG('y', 'v', '1', '2') },
+    { AV_PIX_FMT_YUV420P, MKTAG('Y', 'V', '1', '2') },
+    { AV_PIX_FMT_YUV410P, MKTAG('Y', 'U', 'V', '9') },
+    { AV_PIX_FMT_YUV410P, MKTAG('Y', 'V', 'U', '9') },
+    { AV_PIX_FMT_YUV411P, MKTAG('Y', '4', '1', 'B') },
+    { AV_PIX_FMT_YUV422P, MKTAG('Y', '4', '2', 'B') },
+    { AV_PIX_FMT_YUV422P, MKTAG('P', '4', '2', '2') },
+    { AV_PIX_FMT_YUV422P, MKTAG('Y', 'V', '1', '6') },
+    /* yuvjXXX formats are deprecated hacks specific to libav*,
+       they are identical to yuvXXX  */
+    { AV_PIX_FMT_YUVJ420P, MKTAG('I', '4', '2', '0') }, /* Planar formats */
+    { AV_PIX_FMT_YUVJ420P, MKTAG('I', 'Y', 'U', 'V') },
+    { AV_PIX_FMT_YUVJ420P, MKTAG('Y', 'V', '1', '2') },
+    { AV_PIX_FMT_YUVJ422P, MKTAG('Y', '4', '2', 'B') },
+    { AV_PIX_FMT_YUVJ422P, MKTAG('P', '4', '2', '2') },
+    { AV_PIX_FMT_GRAY8,    MKTAG('Y', '8', '0', '0') },
+    { AV_PIX_FMT_GRAY8,    MKTAG('Y', '8', ' ', ' ') },
+
+    { AV_PIX_FMT_YUYV422, MKTAG('Y', 'U', 'Y', '2') }, /* Packed formats */
+    { AV_PIX_FMT_YUYV422, MKTAG('Y', '4', '2', '2') },
+    { AV_PIX_FMT_YUYV422, MKTAG('V', '4', '2', '2') },
+    { AV_PIX_FMT_YUYV422, MKTAG('V', 'Y', 'U', 'Y') },
+    { AV_PIX_FMT_YUYV422, MKTAG('Y', 'U', 'N', 'V') },
+    { AV_PIX_FMT_YUYV422, MKTAG('Y', 'U', 'Y', 'V') },
+    { AV_PIX_FMT_YVYU422, MKTAG('Y', 'V', 'Y', 'U') }, /* Philips */
+    { AV_PIX_FMT_UYVY422, MKTAG('U', 'Y', 'V', 'Y') },
+    { AV_PIX_FMT_UYVY422, MKTAG('H', 'D', 'Y', 'C') },
+    { AV_PIX_FMT_UYVY422, MKTAG('U', 'Y', 'N', 'V') },
+    { AV_PIX_FMT_UYVY422, MKTAG('U', 'Y', 'N', 'Y') },
+    { AV_PIX_FMT_UYVY422, MKTAG('u', 'y', 'v', '1') },
+    { AV_PIX_FMT_UYVY422, MKTAG('2', 'V', 'u', '1') },
+    { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', 'R', 'n') }, /* Avid AVI Codec 1:1 */
+    { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', '1', 'x') }, /* Avid 1:1x */
+    { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', 'u', 'p') },
+    { AV_PIX_FMT_UYVY422, MKTAG('V', 'D', 'T', 'Z') }, /* SoftLab-NSK VideoTizer */
+    { AV_PIX_FMT_UYVY422, MKTAG('a', 'u', 'v', '2') },
+    { AV_PIX_FMT_UYVY422, MKTAG('c', 'y', 'u', 'v') }, /* CYUV is also Creative YUV */
+    { AV_PIX_FMT_UYYVYY411, MKTAG('Y', '4', '1', '1') },
+    { AV_PIX_FMT_GRAY8,   MKTAG('G', 'R', 'E', 'Y') },
+    { AV_PIX_FMT_NV12,    MKTAG('N', 'V', '1', '2') },
+    { AV_PIX_FMT_NV21,    MKTAG('N', 'V', '2', '1') },
+    { AV_PIX_FMT_VUYA,    MKTAG('A', 'Y', 'U', 'V') }, /* MS 4:4:4:4 */
+
+    /* nut */
+    { AV_PIX_FMT_RGB555LE, MKTAG('R', 'G', 'B', 15) },
+    { AV_PIX_FMT_BGR555LE, MKTAG('B', 'G', 'R', 15) },
+    { AV_PIX_FMT_RGB565LE, MKTAG('R', 'G', 'B', 16) },
+    { AV_PIX_FMT_BGR565LE, MKTAG('B', 'G', 'R', 16) },
+    { AV_PIX_FMT_RGB555BE, MKTAG(15 , 'B', 'G', 'R') },
+    { AV_PIX_FMT_BGR555BE, MKTAG(15 , 'R', 'G', 'B') },
+    { AV_PIX_FMT_RGB565BE, MKTAG(16 , 'B', 'G', 'R') },
+    { AV_PIX_FMT_BGR565BE, MKTAG(16 , 'R', 'G', 'B') },
+    { AV_PIX_FMT_RGB444LE, MKTAG('R', 'G', 'B', 12) },
+    { AV_PIX_FMT_BGR444LE, MKTAG('B', 'G', 'R', 12) },
+    { AV_PIX_FMT_RGB444BE, MKTAG(12 , 'B', 'G', 'R') },
+    { AV_PIX_FMT_BGR444BE, MKTAG(12 , 'R', 'G', 'B') },
+    { AV_PIX_FMT_RGBA64LE, MKTAG('R', 'B', 'A', 64 ) },
+    { AV_PIX_FMT_BGRA64LE, MKTAG('B', 'R', 'A', 64 ) },
+    { AV_PIX_FMT_RGBA64BE, MKTAG(64 , 'R', 'B', 'A') },
+    { AV_PIX_FMT_BGRA64BE, MKTAG(64 , 'B', 'R', 'A') },
+    { AV_PIX_FMT_RGBA,     MKTAG('R', 'G', 'B', 'A') },
+    { AV_PIX_FMT_RGB0,     MKTAG('R', 'G', 'B',  0 ) },
+    { AV_PIX_FMT_BGRA,     MKTAG('B', 'G', 'R', 'A') },
+    { AV_PIX_FMT_BGR0,     MKTAG('B', 'G', 'R',  0 ) },
+    { AV_PIX_FMT_ABGR,     MKTAG('A', 'B', 'G', 'R') },
+    { AV_PIX_FMT_0BGR,     MKTAG( 0 , 'B', 'G', 'R') },
+    { AV_PIX_FMT_ARGB,     MKTAG('A', 'R', 'G', 'B') },
+    { AV_PIX_FMT_0RGB,     MKTAG( 0 , 'R', 'G', 'B') },
+    { AV_PIX_FMT_RGB24,    MKTAG('R', 'G', 'B', 24 ) },
+    { AV_PIX_FMT_BGR24,    MKTAG('B', 'G', 'R', 24 ) },
+    { AV_PIX_FMT_YUV411P,  MKTAG('4', '1', '1', 'P') },
+    { AV_PIX_FMT_YUV422P,  MKTAG('4', '2', '2', 'P') },
+    { AV_PIX_FMT_YUVJ422P, MKTAG('4', '2', '2', 'P') },
+    { AV_PIX_FMT_YUV440P,  MKTAG('4', '4', '0', 'P') },
+    { AV_PIX_FMT_YUVJ440P, MKTAG('4', '4', '0', 'P') },
+    { AV_PIX_FMT_YUV444P,  MKTAG('4', '4', '4', 'P') },
+    { AV_PIX_FMT_YUVJ444P, MKTAG('4', '4', '4', 'P') },
+    { AV_PIX_FMT_MONOWHITE,MKTAG('B', '1', 'W', '0') },
+    { AV_PIX_FMT_MONOBLACK,MKTAG('B', '0', 'W', '1') },
+    { AV_PIX_FMT_BGR8,     MKTAG('B', 'G', 'R',  8 ) },
+    { AV_PIX_FMT_RGB8,     MKTAG('R', 'G', 'B',  8 ) },
+    { AV_PIX_FMT_BGR4,     MKTAG('B', 'G', 'R',  4 ) },
+    { AV_PIX_FMT_RGB4,     MKTAG('R', 'G', 'B',  4 ) },
+    { AV_PIX_FMT_RGB4_BYTE,MKTAG('B', '4', 'B', 'Y') },
+    { AV_PIX_FMT_BGR4_BYTE,MKTAG('R', '4', 'B', 'Y') },
+    { AV_PIX_FMT_RGB48LE,  MKTAG('R', 'G', 'B', 48 ) },
+    { AV_PIX_FMT_RGB48BE,  MKTAG( 48, 'R', 'G', 'B') },
+    { AV_PIX_FMT_BGR48LE,  MKTAG('B', 'G', 'R', 48 ) },
+    { AV_PIX_FMT_BGR48BE,  MKTAG( 48, 'B', 'G', 'R') },
+    { AV_PIX_FMT_GRAY9LE,     MKTAG('Y', '1',  0 ,  9 ) },
+    { AV_PIX_FMT_GRAY9BE,     MKTAG( 9 ,  0 , '1', 'Y') },
+    { AV_PIX_FMT_GRAY10LE,    MKTAG('Y', '1',  0 , 10 ) },
+    { AV_PIX_FMT_GRAY10BE,    MKTAG(10 ,  0 , '1', 'Y') },
+    { AV_PIX_FMT_GRAY12LE,    MKTAG('Y', '1',  0 , 12 ) },
+    { AV_PIX_FMT_GRAY12BE,    MKTAG(12 ,  0 , '1', 'Y') },
+    { AV_PIX_FMT_GRAY14LE,    MKTAG('Y', '1',  0 , 14 ) },
+    { AV_PIX_FMT_GRAY14BE,    MKTAG(14 ,  0 , '1', 'Y') },
+    { AV_PIX_FMT_GRAY16LE,    MKTAG('Y', '1',  0 , 16 ) },
+    { AV_PIX_FMT_GRAY16BE,    MKTAG(16 ,  0 , '1', 'Y') },
+    { AV_PIX_FMT_YUV420P9LE,  MKTAG('Y', '3', 11 ,  9 ) },
+    { AV_PIX_FMT_YUV420P9BE,  MKTAG( 9 , 11 , '3', 'Y') },
+    { AV_PIX_FMT_YUV422P9LE,  MKTAG('Y', '3', 10 ,  9 ) },
+    { AV_PIX_FMT_YUV422P9BE,  MKTAG( 9 , 10 , '3', 'Y') },
+    { AV_PIX_FMT_YUV444P9LE,  MKTAG('Y', '3',  0 ,  9 ) },
+    { AV_PIX_FMT_YUV444P9BE,  MKTAG( 9 ,  0 , '3', 'Y') },
+    { AV_PIX_FMT_YUV420P10LE, MKTAG('Y', '3', 11 , 10 ) },
+    { AV_PIX_FMT_YUV420P10BE, MKTAG(10 , 11 , '3', 'Y') },
+    { AV_PIX_FMT_YUV422P10LE, MKTAG('Y', '3', 10 , 10 ) },
+    { AV_PIX_FMT_YUV422P10BE, MKTAG(10 , 10 , '3', 'Y') },
+    { AV_PIX_FMT_YUV444P10LE, MKTAG('Y', '3',  0 , 10 ) },
+    { AV_PIX_FMT_YUV444P10BE, MKTAG(10 ,  0 , '3', 'Y') },
+    { AV_PIX_FMT_YUV420P12LE, MKTAG('Y', '3', 11 , 12 ) },
+    { AV_PIX_FMT_YUV420P12BE, MKTAG(12 , 11 , '3', 'Y') },
+    { AV_PIX_FMT_YUV422P12LE, MKTAG('Y', '3', 10 , 12 ) },
+    { AV_PIX_FMT_YUV422P12BE, MKTAG(12 , 10 , '3', 'Y') },
+    { AV_PIX_FMT_YUV444P12LE, MKTAG('Y', '3',  0 , 12 ) },
+    { AV_PIX_FMT_YUV444P12BE, MKTAG(12 ,  0 , '3', 'Y') },
+    { AV_PIX_FMT_YUV420P14LE, MKTAG('Y', '3', 11 , 14 ) },
+    { AV_PIX_FMT_YUV420P14BE, MKTAG(14 , 11 , '3', 'Y') },
+    { AV_PIX_FMT_YUV422P14LE, MKTAG('Y', '3', 10 , 14 ) },
+    { AV_PIX_FMT_YUV422P14BE, MKTAG(14 , 10 , '3', 'Y') },
+    { AV_PIX_FMT_YUV444P14LE, MKTAG('Y', '3',  0 , 14 ) },
+    { AV_PIX_FMT_YUV444P14BE, MKTAG(14 ,  0 , '3', 'Y') },
+    { AV_PIX_FMT_YUV420P16LE, MKTAG('Y', '3', 11 , 16 ) },
+    { AV_PIX_FMT_YUV420P16BE, MKTAG(16 , 11 , '3', 'Y') },
+    { AV_PIX_FMT_YUV422P16LE, MKTAG('Y', '3', 10 , 16 ) },
+    { AV_PIX_FMT_YUV422P16BE, MKTAG(16 , 10 , '3', 'Y') },
+    { AV_PIX_FMT_YUV444P16LE, MKTAG('Y', '3',  0 , 16 ) },
+    { AV_PIX_FMT_YUV444P16BE, MKTAG(16 ,  0 , '3', 'Y') },
+    { AV_PIX_FMT_YUVA420P,    MKTAG('Y', '4', 11 ,  8 ) },
+    { AV_PIX_FMT_YUVA422P,    MKTAG('Y', '4', 10 ,  8 ) },
+    { AV_PIX_FMT_YUVA444P,    MKTAG('Y', '4',  0 ,  8 ) },
+    { AV_PIX_FMT_YA8,         MKTAG('Y', '2',  0 ,  8 ) },
+    { AV_PIX_FMT_PAL8,        MKTAG('P', 'A', 'L',  8 ) },
+
+    { AV_PIX_FMT_YUVA420P9LE,  MKTAG('Y', '4', 11 ,  9 ) },
+    { AV_PIX_FMT_YUVA420P9BE,  MKTAG( 9 , 11 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA422P9LE,  MKTAG('Y', '4', 10 ,  9 ) },
+    { AV_PIX_FMT_YUVA422P9BE,  MKTAG( 9 , 10 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA444P9LE,  MKTAG('Y', '4',  0 ,  9 ) },
+    { AV_PIX_FMT_YUVA444P9BE,  MKTAG( 9 ,  0 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA420P10LE, MKTAG('Y', '4', 11 , 10 ) },
+    { AV_PIX_FMT_YUVA420P10BE, MKTAG(10 , 11 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA422P10LE, MKTAG('Y', '4', 10 , 10 ) },
+    { AV_PIX_FMT_YUVA422P10BE, MKTAG(10 , 10 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA444P10LE, MKTAG('Y', '4',  0 , 10 ) },
+    { AV_PIX_FMT_YUVA444P10BE, MKTAG(10 ,  0 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA422P12LE, MKTAG('Y', '4', 10 , 12 ) },
+    { AV_PIX_FMT_YUVA422P12BE, MKTAG(12 , 10 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA444P12LE, MKTAG('Y', '4',  0 , 12 ) },
+    { AV_PIX_FMT_YUVA444P12BE, MKTAG(12 ,  0 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA420P16LE, MKTAG('Y', '4', 11 , 16 ) },
+    { AV_PIX_FMT_YUVA420P16BE, MKTAG(16 , 11 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA422P16LE, MKTAG('Y', '4', 10 , 16 ) },
+    { AV_PIX_FMT_YUVA422P16BE, MKTAG(16 , 10 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA444P16LE, MKTAG('Y', '4',  0 , 16 ) },
+    { AV_PIX_FMT_YUVA444P16BE, MKTAG(16 ,  0 , '4', 'Y') },
+
+    { AV_PIX_FMT_GBRP,         MKTAG('G', '3', 00 ,  8 ) },
+    { AV_PIX_FMT_GBRP9LE,      MKTAG('G', '3', 00 ,  9 ) },
+    { AV_PIX_FMT_GBRP9BE,      MKTAG( 9 , 00 , '3', 'G') },
+    { AV_PIX_FMT_GBRP10LE,     MKTAG('G', '3', 00 , 10 ) },
+    { AV_PIX_FMT_GBRP10BE,     MKTAG(10 , 00 , '3', 'G') },
+    { AV_PIX_FMT_GBRP12LE,     MKTAG('G', '3', 00 , 12 ) },
+    { AV_PIX_FMT_GBRP12BE,     MKTAG(12 , 00 , '3', 'G') },
+    { AV_PIX_FMT_GBRP14LE,     MKTAG('G', '3', 00 , 14 ) },
+    { AV_PIX_FMT_GBRP14BE,     MKTAG(14 , 00 , '3', 'G') },
+    { AV_PIX_FMT_GBRP16LE,     MKTAG('G', '3', 00 , 16 ) },
+    { AV_PIX_FMT_GBRP16BE,     MKTAG(16 , 00 , '3', 'G') },
+
+    { AV_PIX_FMT_GBRAP,        MKTAG('G', '4', 00 ,  8 ) },
+    { AV_PIX_FMT_GBRAP10LE,    MKTAG('G', '4', 00 , 10 ) },
+    { AV_PIX_FMT_GBRAP10BE,    MKTAG(10 , 00 , '4', 'G') },
+    { AV_PIX_FMT_GBRAP12LE,    MKTAG('G', '4', 00 , 12 ) },
+    { AV_PIX_FMT_GBRAP12BE,    MKTAG(12 , 00 , '4', 'G') },
+    { AV_PIX_FMT_GBRAP16LE,    MKTAG('G', '4', 00 , 16 ) },
+    { AV_PIX_FMT_GBRAP16BE,    MKTAG(16 , 00 , '4', 'G') },
+
+    { AV_PIX_FMT_XYZ12LE,      MKTAG('X', 'Y', 'Z' , 36 ) },
+    { AV_PIX_FMT_XYZ12BE,      MKTAG(36 , 'Z' , 'Y', 'X') },
+
+    { AV_PIX_FMT_BAYER_BGGR8,    MKTAG(0xBA, 'B', 'G', 8   ) },
+    { AV_PIX_FMT_BAYER_BGGR16LE, MKTAG(0xBA, 'B', 'G', 16  ) },
+    { AV_PIX_FMT_BAYER_BGGR16BE, MKTAG(16  , 'G', 'B', 0xBA) },
+    { AV_PIX_FMT_BAYER_RGGB8,    MKTAG(0xBA, 'R', 'G', 8   ) },
+    { AV_PIX_FMT_BAYER_RGGB16LE, MKTAG(0xBA, 'R', 'G', 16  ) },
+    { AV_PIX_FMT_BAYER_RGGB16BE, MKTAG(16  , 'G', 'R', 0xBA) },
+    { AV_PIX_FMT_BAYER_GBRG8,    MKTAG(0xBA, 'G', 'B', 8   ) },
+    { AV_PIX_FMT_BAYER_GBRG16LE, MKTAG(0xBA, 'G', 'B', 16  ) },
+    { AV_PIX_FMT_BAYER_GBRG16BE, MKTAG(16,   'B', 'G', 0xBA) },
+    { AV_PIX_FMT_BAYER_GRBG8,    MKTAG(0xBA, 'G', 'R', 8   ) },
+    { AV_PIX_FMT_BAYER_GRBG16LE, MKTAG(0xBA, 'G', 'R', 16  ) },
+    { AV_PIX_FMT_BAYER_GRBG16BE, MKTAG(16,   'R', 'G', 0xBA) },
+
+    /* quicktime */
+    { AV_PIX_FMT_YUV420P, MKTAG('R', '4', '2', '0') }, /* Radius DV YUV PAL */
+    { AV_PIX_FMT_YUV411P, MKTAG('R', '4', '1', '1') }, /* Radius DV YUV NTSC */
+    { AV_PIX_FMT_UYVY422, MKTAG('2', 'v', 'u', 'y') },
+    { AV_PIX_FMT_UYVY422, MKTAG('2', 'V', 'u', 'y') },
+    { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', 'U', 'I') }, /* FIXME merge both fields */
+    { AV_PIX_FMT_UYVY422, MKTAG('b', 'x', 'y', 'v') },
+    { AV_PIX_FMT_YUYV422, MKTAG('y', 'u', 'v', '2') },
+    { AV_PIX_FMT_YUYV422, MKTAG('y', 'u', 'v', 's') },
+    { AV_PIX_FMT_YUYV422, MKTAG('D', 'V', 'O', 'O') }, /* Digital Voodoo SD 8 Bit */
+    { AV_PIX_FMT_RGB555LE,MKTAG('L', '5', '5', '5') },
+    { AV_PIX_FMT_RGB565LE,MKTAG('L', '5', '6', '5') },
+    { AV_PIX_FMT_RGB565BE,MKTAG('B', '5', '6', '5') },
+    { AV_PIX_FMT_BGR24,   MKTAG('2', '4', 'B', 'G') },
+    { AV_PIX_FMT_BGR24,   MKTAG('b', 'x', 'b', 'g') },
+    { AV_PIX_FMT_BGRA,    MKTAG('B', 'G', 'R', 'A') },
+    { AV_PIX_FMT_RGBA,    MKTAG('R', 'G', 'B', 'A') },
+    { AV_PIX_FMT_RGB24,   MKTAG('b', 'x', 'r', 'g') },
+    { AV_PIX_FMT_ABGR,    MKTAG('A', 'B', 'G', 'R') },
+    { AV_PIX_FMT_GRAY16BE,MKTAG('b', '1', '6', 'g') },
+    { AV_PIX_FMT_RGB48BE, MKTAG('b', '4', '8', 'r') },
+    { AV_PIX_FMT_RGBA64BE,MKTAG('b', '6', '4', 'a') },
+    { AV_PIX_FMT_BAYER_RGGB16BE, MKTAG('B', 'G', 'G', 'R') },
+
+    /* vlc */
+    { AV_PIX_FMT_YUV410P,     MKTAG('I', '4', '1', '0') },
+    { AV_PIX_FMT_YUV411P,     MKTAG('I', '4', '1', '1') },
+    { AV_PIX_FMT_YUV422P,     MKTAG('I', '4', '2', '2') },
+    { AV_PIX_FMT_YUV440P,     MKTAG('I', '4', '4', '0') },
+    { AV_PIX_FMT_YUV444P,     MKTAG('I', '4', '4', '4') },
+    { AV_PIX_FMT_YUVJ420P,    MKTAG('J', '4', '2', '0') },
+    { AV_PIX_FMT_YUVJ422P,    MKTAG('J', '4', '2', '2') },
+    { AV_PIX_FMT_YUVJ440P,    MKTAG('J', '4', '4', '0') },
+    { AV_PIX_FMT_YUVJ444P,    MKTAG('J', '4', '4', '4') },
+    { AV_PIX_FMT_YUVA444P,    MKTAG('Y', 'U', 'V', 'A') },
+    { AV_PIX_FMT_YUVA420P,    MKTAG('I', '4', '0', 'A') },
+    { AV_PIX_FMT_YUVA422P,    MKTAG('I', '4', '2', 'A') },
+    { AV_PIX_FMT_RGB8,        MKTAG('R', 'G', 'B', '2') },
+    { AV_PIX_FMT_RGB555LE,    MKTAG('R', 'V', '1', '5') },
+    { AV_PIX_FMT_RGB565LE,    MKTAG('R', 'V', '1', '6') },
+    { AV_PIX_FMT_BGR24,       MKTAG('R', 'V', '2', '4') },
+    { AV_PIX_FMT_BGR0,        MKTAG('R', 'V', '3', '2') },
+    { AV_PIX_FMT_RGBA,        MKTAG('A', 'V', '3', '2') },
+    { AV_PIX_FMT_YUV420P9LE,  MKTAG('I', '0', '9', 'L') },
+    { AV_PIX_FMT_YUV420P9BE,  MKTAG('I', '0', '9', 'B') },
+    { AV_PIX_FMT_YUV422P9LE,  MKTAG('I', '2', '9', 'L') },
+    { AV_PIX_FMT_YUV422P9BE,  MKTAG('I', '2', '9', 'B') },
+    { AV_PIX_FMT_YUV444P9LE,  MKTAG('I', '4', '9', 'L') },
+    { AV_PIX_FMT_YUV444P9BE,  MKTAG('I', '4', '9', 'B') },
+    { AV_PIX_FMT_YUV420P10LE, MKTAG('I', '0', 'A', 'L') },
+    { AV_PIX_FMT_YUV420P10BE, MKTAG('I', '0', 'A', 'B') },
+    { AV_PIX_FMT_YUV422P10LE, MKTAG('I', '2', 'A', 'L') },
+    { AV_PIX_FMT_YUV422P10BE, MKTAG('I', '2', 'A', 'B') },
+    { AV_PIX_FMT_YUV444P10LE, MKTAG('I', '4', 'A', 'L') },
+    { AV_PIX_FMT_YUV444P10BE, MKTAG('I', '4', 'A', 'B') },
+    { AV_PIX_FMT_YUV420P12LE, MKTAG('I', '0', 'C', 'L') },
+    { AV_PIX_FMT_YUV420P12BE, MKTAG('I', '0', 'C', 'B') },
+    { AV_PIX_FMT_YUV422P12LE, MKTAG('I', '2', 'C', 'L') },
+    { AV_PIX_FMT_YUV422P12BE, MKTAG('I', '2', 'C', 'B') },
+    { AV_PIX_FMT_YUV444P12LE, MKTAG('I', '4', 'C', 'L') },
+    { AV_PIX_FMT_YUV444P12BE, MKTAG('I', '4', 'C', 'B') },
+    { AV_PIX_FMT_YUV420P16LE, MKTAG('I', '0', 'F', 'L') },
+    { AV_PIX_FMT_YUV420P16BE, MKTAG('I', '0', 'F', 'B') },
+    { AV_PIX_FMT_YUV444P16LE, MKTAG('I', '4', 'F', 'L') },
+    { AV_PIX_FMT_YUV444P16BE, MKTAG('I', '4', 'F', 'B') },
+
+    /* special */
+    { AV_PIX_FMT_RGB565LE,MKTAG( 3 ,  0 ,  0 ,  0 ) }, /* flipped RGB565LE */
+    { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */
+
+    { AV_PIX_FMT_NONE, 0 },
+};
+
+const struct PixelFormatTag *avpriv_get_raw_pix_fmt_tags(void)
+{
+    return raw_pix_fmt_tags;
+}
+
+unsigned int avcodec_pix_fmt_to_codec_tag(enum AVPixelFormat fmt)
+{
+    const PixelFormatTag *tags = raw_pix_fmt_tags;
+    while (tags->pix_fmt >= 0) {
+        if (tags->pix_fmt == fmt)
+            return tags->fourcc;
+        tags++;
+    }
+    return 0;
+}
+
+static const PixelFormatTag pix_fmt_bps_avi[] = {
+    { AV_PIX_FMT_PAL8,    1 },
+    { AV_PIX_FMT_PAL8,    2 },
+    { AV_PIX_FMT_PAL8,    4 },
+    { AV_PIX_FMT_PAL8,    8 },
+    { AV_PIX_FMT_RGB444LE, 12 },
+    { AV_PIX_FMT_RGB555LE, 15 },
+    { AV_PIX_FMT_RGB555LE, 16 },
+    { AV_PIX_FMT_BGR24,  24 },
+    { AV_PIX_FMT_BGRA,   32 },
+    { AV_PIX_FMT_NONE,    0 },
+};
+
+static const PixelFormatTag pix_fmt_bps_mov[] = {
+    { AV_PIX_FMT_PAL8,      1 },
+    { AV_PIX_FMT_PAL8,      2 },
+    { AV_PIX_FMT_PAL8,      4 },
+    { AV_PIX_FMT_PAL8,      8 },
+    { AV_PIX_FMT_RGB555BE, 16 },
+    { AV_PIX_FMT_RGB24,    24 },
+    { AV_PIX_FMT_ARGB,     32 },
+    { AV_PIX_FMT_PAL8,     33 },
+    { AV_PIX_FMT_NONE,      0 },
+};
+
+static enum AVPixelFormat find_pix_fmt(const PixelFormatTag *tags,
+                                       unsigned int fourcc)
+{
+    while (tags->pix_fmt != AV_PIX_FMT_NONE) {
+        if (tags->fourcc == fourcc)
+            return tags->pix_fmt;
+        tags++;
+    }
+    return AV_PIX_FMT_NONE;
+}
+
+enum AVPixelFormat avpriv_pix_fmt_find(enum PixelFormatTagLists list,
+                                       unsigned fourcc)
+{
+    const PixelFormatTag *tags;
+
+    switch (list) {
+    case PIX_FMT_LIST_RAW:
+        tags = raw_pix_fmt_tags;
+        break;
+    case PIX_FMT_LIST_AVI:
+        tags = pix_fmt_bps_avi;
+        break;
+    case PIX_FMT_LIST_MOV:
+        tags = pix_fmt_bps_mov;
+        break;
+    }
+    return find_pix_fmt(tags, fourcc);
+}
diff --git a/media/ffvpx/libavcodec/raw.h b/media/ffvpx/libavcodec/raw.h
new file mode 100644
index 0000000000..9a4ddef8fc
--- /dev/null
+++ b/media/ffvpx/libavcodec/raw.h
@@ -0,0 +1,48 @@
+/*
+ * Raw Video Codec
+ * Copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Raw Video Codec
+ */
+
+#ifndef AVCODEC_RAW_H
+#define AVCODEC_RAW_H
+
+#include "libavutil/pixfmt.h"
+
+typedef struct PixelFormatTag {
+    enum AVPixelFormat pix_fmt;
+    unsigned int fourcc;
+} PixelFormatTag;
+
+const struct PixelFormatTag *avpriv_get_raw_pix_fmt_tags(void);
+
+enum PixelFormatTagLists {
+    PIX_FMT_LIST_RAW,
+    PIX_FMT_LIST_AVI,
+    PIX_FMT_LIST_MOV,
+};
+
+enum AVPixelFormat avpriv_pix_fmt_find(enum PixelFormatTagLists list,
+                                       unsigned fourcc);
+
+#endif /* AVCODEC_RAW_H */
diff --git a/media/ffvpx/libavcodec/rdft.c b/media/ffvpx/libavcodec/rdft.c
new file mode 100644
index 0000000000..ac6f5d6781
--- /dev/null
+++ b/media/ffvpx/libavcodec/rdft.c
@@ -0,0 +1,120 @@
+/*
+ * (I)RDFT transforms
+ * Copyright (c) 2009 Alex Converse <alex dot converse at gmail dot com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <stdlib.h>
+#include <math.h>
+#include "libavutil/error.h"
+#include "libavutil/mathematics.h"
+#include "rdft.h"
+
+/**
+ * @file
+ * (Inverse) Real Discrete Fourier Transforms.
+ */
+
+/** Map one real FFT into two parallel real even and odd FFTs. Then interleave
+ * the two real FFTs into one complex FFT. Unmangle the results.
+ * ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM
+ */
+static void rdft_calc_c(RDFTContext *s, FFTSample *data)
+{
+    int i, i1, i2;
+    FFTComplex ev, od, odsum;
+    const int n = 1 << s->nbits;
+    const float k1 = 0.5;
+    const float k2 = 0.5 - s->inverse;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+
+    if (!s->inverse) {
+        s->fft.fft_permute(&s->fft, (FFTComplex*)data);
+        s->fft.fft_calc(&s->fft, (FFTComplex*)data);
+    }
+    /* i=0 is a special case because of packing, the DC term is real, so we
+       are going to throw the N/2 term (also real) in with it. */
+    ev.re = data[0];
+    data[0] = ev.re+data[1];
+    data[1] = ev.re-data[1];
+
+#define RDFT_UNMANGLE(sign0, sign1)                                         \
+    for (i = 1; i < (n>>2); i++) {                                          \
+        i1 = 2*i;                                                           \
+        i2 = n-i1;                                                          \
+        /* Separate even and odd FFTs */                                    \
+        ev.re =  k1*(data[i1  ]+data[i2  ]);                                \
+        od.im =  k2*(data[i2  ]-data[i1  ]);                                \
+        ev.im =  k1*(data[i1+1]-data[i2+1]);                                \
+        od.re =  k2*(data[i1+1]+data[i2+1]);                                \
+        /* Apply twiddle factors to the odd FFT and add to the even FFT */  \
+        odsum.re = od.re*tcos[i] sign0 od.im*tsin[i];                       \
+        odsum.im = od.im*tcos[i] sign1 od.re*tsin[i];                       \
+        data[i1  ] =  ev.re + odsum.re;                                     \
+        data[i1+1] =  ev.im + odsum.im;                                     \
+        data[i2  ] =  ev.re - odsum.re;                                     \
+        data[i2+1] =  odsum.im - ev.im;                                     \
+    }
+
+    if (s->negative_sin) {
+        RDFT_UNMANGLE(+,-)
+    } else {
+        RDFT_UNMANGLE(-,+)
+    }
+
+    data[2*i+1]=s->sign_convention*data[2*i+1];
+    if (s->inverse) {
+        data[0] *= k1;
+        data[1] *= k1;
+        s->fft.fft_permute(&s->fft, (FFTComplex*)data);
+        s->fft.fft_calc(&s->fft, (FFTComplex*)data);
+    }
+}
+
+av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
+{
+    int n = 1 << nbits;
+    int ret;
+
+    s->nbits           = nbits;
+    s->inverse         = trans == IDFT_C2R || trans == DFT_C2R;
+    s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1;
+    s->negative_sin    = trans == DFT_C2R || trans == DFT_R2C;
+
+    if (nbits < 4 || nbits > 16)
+        return AVERROR(EINVAL);
+
+    if ((ret = ff_fft_init(&s->fft, nbits-1, trans == IDFT_C2R || trans == IDFT_R2C)) < 0)
+        return ret;
+
+    ff_init_ff_cos_tabs(nbits);
+    s->tcos = ff_cos_tabs[nbits];
+    s->tsin = ff_cos_tabs[nbits] + (n >> 2);
+    s->rdft_calc   = rdft_calc_c;
+
+#if ARCH_ARM
+    ff_rdft_init_arm(s);
+#endif
+
+    return 0;
+}
+
+av_cold void ff_rdft_end(RDFTContext *s)
+{
+    ff_fft_end(&s->fft);
+}
diff --git a/media/ffvpx/libavcodec/rdft.h b/media/ffvpx/libavcodec/rdft.h
new file mode 100644
index 0000000000..ffafca7f24
--- /dev/null
+++ b/media/ffvpx/libavcodec/rdft.h
@@ -0,0 +1,52 @@
+/*
+ * (I)RDFT transforms
+ * Copyright (c) 2009 Alex Converse <alex dot converse at gmail dot com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if !defined(AVCODEC_RDFT_H) && (!defined(FFT_FLOAT) || FFT_FLOAT)
+#define AVCODEC_RDFT_H
+
+#include "config.h"
+#include "fft.h"
+
+struct RDFTContext {
+    int nbits;
+    int inverse;
+    int sign_convention;
+
+    /* pre/post rotation tables */
+    const FFTSample *tcos;
+    const FFTSample *tsin;
+    int negative_sin;
+    FFTContext fft;
+    void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
+};
+
+/**
+ * Set up a real FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param trans           the type of transform
+ */
+int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
+void ff_rdft_end(RDFTContext *s);
+
+void ff_rdft_init_arm(RDFTContext *s);
+
+
+#endif /* AVCODEC_RDFT_H */
diff --git a/media/ffvpx/libavcodec/rectangle.h b/media/ffvpx/libavcodec/rectangle.h
new file mode 100644
index 0000000000..df7c18a4e2
--- /dev/null
+++ b/media/ffvpx/libavcodec/rectangle.h
@@ -0,0 +1,124 @@
+/*
+ * rectangle filling function
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * useful rectangle filling function
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#ifndef AVCODEC_RECTANGLE_H
+#define AVCODEC_RECTANGLE_H
+
+#include "config.h"
+#include "libavutil/common.h"
+#include "libavutil/avassert.h"
+
+/**
+ * fill a rectangle.
+ * @param h height of the rectangle, should be a constant
+ * @param w width of the rectangle, should be a constant
+ * @param size the size of val (1, 2 or 4), should be a constant
+ */
+static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
+    uint8_t *p= (uint8_t*)vp;
+    av_assert2(size==1 || size==2 || size==4);
+    av_assert2(w<=4);
+
+    w      *= size;
+    stride *= size;
+
+    av_assert2((((long)vp)&(FFMIN(w, 8<<(HAVE_NEON|ARCH_PPC|HAVE_MMX))-1)) == 0);
+    av_assert2((stride&(w-1))==0);
+    if(w==2){
+        const uint16_t v= size==4 ? val : val*0x0101;
+        *(uint16_t*)(p + 0*stride)= v;
+        if(h==1) return;
+        *(uint16_t*)(p + 1*stride)= v;
+        if(h==2) return;
+        *(uint16_t*)(p + 2*stride)= v;
+        *(uint16_t*)(p + 3*stride)= v;
+    }else if(w==4){
+        const uint32_t v= size==4 ? val : size==2 ? val*0x00010001 : val*0x01010101;
+        *(uint32_t*)(p + 0*stride)= v;
+        if(h==1) return;
+        *(uint32_t*)(p + 1*stride)= v;
+        if(h==2) return;
+        *(uint32_t*)(p + 2*stride)= v;
+        *(uint32_t*)(p + 3*stride)= v;
+    }else if(w==8){
+    // gcc cannot optimize 64-bit math on x86_32
+#if HAVE_FAST_64BIT
+        const uint64_t v=  size==2 ? val*0x0001000100010001ULL : val*0x0100000001ULL;
+        *(uint64_t*)(p + 0*stride)= v;
+        if(h==1) return;
+        *(uint64_t*)(p + 1*stride)= v;
+        if(h==2) return;
+        *(uint64_t*)(p + 2*stride)= v;
+        *(uint64_t*)(p + 3*stride)= v;
+    }else if(w==16){
+        const uint64_t v= val*0x0100000001ULL;
+        *(uint64_t*)(p + 0+0*stride)= v;
+        *(uint64_t*)(p + 8+0*stride)= v;
+        *(uint64_t*)(p + 0+1*stride)= v;
+        *(uint64_t*)(p + 8+1*stride)= v;
+        if(h==2) return;
+        *(uint64_t*)(p + 0+2*stride)= v;
+        *(uint64_t*)(p + 8+2*stride)= v;
+        *(uint64_t*)(p + 0+3*stride)= v;
+        *(uint64_t*)(p + 8+3*stride)= v;
+#else
+        const uint32_t v= size==2 ? val*0x00010001 : val;
+        *(uint32_t*)(p + 0+0*stride)= v;
+        *(uint32_t*)(p + 4+0*stride)= v;
+        if(h==1) return;
+        *(uint32_t*)(p + 0+1*stride)= v;
+        *(uint32_t*)(p + 4+1*stride)= v;
+        if(h==2) return;
+        *(uint32_t*)(p + 0+2*stride)= v;
+        *(uint32_t*)(p + 4+2*stride)= v;
+        *(uint32_t*)(p + 0+3*stride)= v;
+        *(uint32_t*)(p + 4+3*stride)= v;
+    }else if(w==16){
+        *(uint32_t*)(p + 0+0*stride)= val;
+        *(uint32_t*)(p + 4+0*stride)= val;
+        *(uint32_t*)(p + 8+0*stride)= val;
+        *(uint32_t*)(p +12+0*stride)= val;
+        *(uint32_t*)(p + 0+1*stride)= val;
+        *(uint32_t*)(p + 4+1*stride)= val;
+        *(uint32_t*)(p + 8+1*stride)= val;
+        *(uint32_t*)(p +12+1*stride)= val;
+        if(h==2) return;
+        *(uint32_t*)(p + 0+2*stride)= val;
+        *(uint32_t*)(p + 4+2*stride)= val;
+        *(uint32_t*)(p + 8+2*stride)= val;
+        *(uint32_t*)(p +12+2*stride)= val;
+        *(uint32_t*)(p + 0+3*stride)= val;
+        *(uint32_t*)(p + 4+3*stride)= val;
+        *(uint32_t*)(p + 8+3*stride)= val;
+        *(uint32_t*)(p +12+3*stride)= val;
+#endif
+    }else
+        av_assert2(0);
+    av_assert2(h==4);
+}
+
+#endif /* AVCODEC_RECTANGLE_H */
diff --git a/media/ffvpx/libavcodec/reverse.c b/media/ffvpx/libavcodec/reverse.c
new file mode 100644
index 0000000000..440badaf34
--- /dev/null
+++ b/media/ffvpx/libavcodec/reverse.c
@@ -0,0 +1 @@
+#include "libavutil/reverse.c"
diff --git a/media/ffvpx/libavcodec/rl.h b/media/ffvpx/libavcodec/rl.h
new file mode 100644
index 0000000000..4380fda272
--- /dev/null
+++ b/media/ffvpx/libavcodec/rl.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2000-2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * rl header.
+ */
+
+#ifndef AVCODEC_RL_H
+#define AVCODEC_RL_H
+
+#include <stdint.h>
+
+#include "vlc.h"
+
+/* run length table */
+#define MAX_RUN    64
+#define MAX_LEVEL  64
+
+/** RLTable. */
+typedef struct RLTable {
+    int n;                         ///< number of entries of table_vlc minus 1
+    int last;                      ///< number of values for last = 0
+    const uint16_t (*table_vlc)[2];
+    const int8_t *table_run;
+    const int8_t *table_level;
+    uint8_t *index_run[2];         ///< encoding only
+    int8_t *max_level[2];          ///< encoding & decoding
+    int8_t *max_run[2];            ///< encoding & decoding
+    RL_VLC_ELEM *rl_vlc[32];       ///< decoding only
+} RLTable;
+
+/**
+ * Initialize max_level and index_run from table_run and table_level;
+ * this is equivalent to initializing RLTable.max_level[0] and
+ * RLTable.index_run[0] with ff_rl_init().
+ */
+void ff_rl_init_level_run(uint8_t max_level[MAX_LEVEL + 1],
+                          uint8_t index_run[MAX_RUN + 1],
+                          const uint8_t table_run[/* n */],
+                          const uint8_t table_level[/* n*/], int n);
+
+/**
+ * Initialize index_run, max_level and max_run from n, last, table_vlc,
+ * table_run and table_level.
+ * @param static_store static uint8_t array[2][2*MAX_RUN + MAX_LEVEL + 3]
+ *                     to hold the level and run tables.
+ * @note  This function does not touch rl_vlc at all, hence there is no need
+ *        to synchronize calls to ff_rl_init() and ff_rl_init_vlc() using the
+ *        same RLTable.
+ */
+void ff_rl_init(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3]);
+
+/**
+ * Initialize rl_vlc from n, last, table_vlc, table_run and table_level.
+ * All rl_vlc pointers to be initialized must already point to a static
+ * buffer of `static_size` RL_VLC_ELEM elements; if a pointer is NULL,
+ * initializing further VLCs stops.
+ * @note  This function does not touch what ff_rl_init() initializes at all,
+ *        hence there is no need to synchronize calls to ff_rl_init() and
+ *        ff_rl_init_vlc() using the same RLTable.
+ */
+void ff_rl_init_vlc(RLTable *rl, unsigned static_size);
+
+#define INIT_VLC_RL(rl, static_size)\
+{\
+    static RL_VLC_ELEM rl_vlc_table[32][static_size];\
+\
+    for (int q = 0; q < 32; q++) \
+        rl.rl_vlc[q] = rl_vlc_table[q]; \
+\
+    ff_rl_init_vlc(&rl, static_size); \
+}
+
+#define INIT_FIRST_VLC_RL(rl, static_size)              \
+do {                                                    \
+    static RL_VLC_ELEM rl_vlc_table[static_size];       \
+                                                        \
+    rl.rl_vlc[0] = rl_vlc_table;                        \
+    ff_rl_init_vlc(&rl, static_size);                   \
+} while (0)
+
+static inline int get_rl_index(const RLTable *rl, int last, int run, int level)
+{
+    int index;
+    index = rl->index_run[last][run];
+    if (index >= rl->n)
+        return rl->n;
+    if (level > rl->max_level[last][run])
+        return rl->n;
+    return index + level - 1;
+}
+
+#endif /* AVCODEC_RL_H */
diff --git a/media/ffvpx/libavcodec/rnd_avg.h b/media/ffvpx/libavcodec/rnd_avg.h
new file mode 100644
index 0000000000..344775e31f
--- /dev/null
+++ b/media/ffvpx/libavcodec/rnd_avg.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
+ * Copyright (c) 2011 Oskar Arvidsson
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RND_AVG_H
+#define AVCODEC_RND_AVG_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define BYTE_VEC32(c) ((c) * 0x01010101UL)
+#define BYTE_VEC64(c) ((c) * 0x0001000100010001UL)
+
+static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
+static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
+static inline uint64_t rnd_avg64(uint64_t a, uint64_t b)
+{
+    return (a | b) - (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
+}
+
+static inline uint64_t no_rnd_avg64(uint64_t a, uint64_t b)
+{
+    return (a & b) + (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
+}
+
+#endif /* AVCODEC_RND_AVG_H */
diff --git a/media/ffvpx/libavcodec/simple_idct.c b/media/ffvpx/libavcodec/simple_idct.c
new file mode 100644
index 0000000000..eb13cff146
--- /dev/null
+++ b/media/ffvpx/libavcodec/simple_idct.c
@@ -0,0 +1,269 @@
+/*
+ * Simple IDCT
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * simpleidct in C.
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "mathops.h"
+#include "simple_idct.h"
+
+#define IN_IDCT_DEPTH 16
+
+#define BIT_DEPTH 8
+#include "simple_idct_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "simple_idct_template.c"
+
+#define EXTRA_SHIFT  2
+#include "simple_idct_template.c"
+
+#undef EXTRA_SHIFT
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 12
+#include "simple_idct_template.c"
+#undef BIT_DEPTH
+#undef IN_IDCT_DEPTH
+
+#define IN_IDCT_DEPTH 32
+#define BIT_DEPTH 10
+#include "simple_idct_template.c"
+#undef BIT_DEPTH
+#undef IN_IDCT_DEPTH
+
+/* 2x4x8 idct */
+
+#define CN_SHIFT 12
+#define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5))
+#define C1 C_FIX(0.6532814824)
+#define C2 C_FIX(0.2705980501)
+
+/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized,
+   and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
+#define C_SHIFT (4+1+12)
+
+static inline void idct4col_put(uint8_t *dest, ptrdiff_t line_size, const int16_t *col)
+{
+    int c0, c1, c2, c3, a0, a1, a2, a3;
+
+    a0 = col[8*0];
+    a1 = col[8*2];
+    a2 = col[8*4];
+    a3 = col[8*6];
+    c0 = ((a0 + a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
+    c2 = ((a0 - a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
+    c1 = a1 * C1 + a3 * C2;
+    c3 = a1 * C2 - a3 * C1;
+    dest[0] = av_clip_uint8((c0 + c1) >> C_SHIFT);
+    dest += line_size;
+    dest[0] = av_clip_uint8((c2 + c3) >> C_SHIFT);
+    dest += line_size;
+    dest[0] = av_clip_uint8((c2 - c3) >> C_SHIFT);
+    dest += line_size;
+    dest[0] = av_clip_uint8((c0 - c1) >> C_SHIFT);
+}
+
+#define BF(k) \
+{\
+    int a0, a1;\
+    a0 = ptr[k];\
+    a1 = ptr[8 + k];\
+    ptr[k] = a0 + a1;\
+    ptr[8 + k] = a0 - a1;\
+}
+
+/* only used by DV codec. The input must be interlaced. 128 is added
+   to the pixels before clamping to avoid systematic error
+   (1024*sqrt(2)) offset would be needed otherwise. */
+/* XXX: I think a 1.0/sqrt(2) normalization should be needed to
+   compensate the extra butterfly stage - I don't have the full DV
+   specification */
+void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    int i;
+    int16_t *ptr;
+
+    /* butterfly */
+    ptr = block;
+    for(i=0;i<4;i++) {
+        BF(0);
+        BF(1);
+        BF(2);
+        BF(3);
+        BF(4);
+        BF(5);
+        BF(6);
+        BF(7);
+        ptr += 2 * 8;
+    }
+
+    /* IDCT8 on each line */
+    for(i=0; i<8; i++) {
+        idctRowCondDC_int16_8bit(block + i*8, 0);
+    }
+
+    /* IDCT4 and store */
+    for(i=0;i<8;i++) {
+        idct4col_put(dest + i, 2 * line_size, block + i);
+        idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i);
+    }
+}
+
+/* 8x4 & 4x8 WMV2 IDCT */
+#undef CN_SHIFT
+#undef C_SHIFT
+#undef C_FIX
+#undef C1
+#undef C2
+#define CN_SHIFT 12
+#define C_FIX(x) ((int)((x) * M_SQRT2 * (1 << CN_SHIFT) + 0.5))
+#define C1 C_FIX(0.6532814824)
+#define C2 C_FIX(0.2705980501)
+#define C3 C_FIX(0.5)
+#define C_SHIFT (4+1+12)
+static inline void idct4col_add(uint8_t *dest, ptrdiff_t line_size, const int16_t *col)
+{
+    int c0, c1, c2, c3, a0, a1, a2, a3;
+
+    a0 = col[8*0];
+    a1 = col[8*1];
+    a2 = col[8*2];
+    a3 = col[8*3];
+    c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
+    c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
+    c1 = a1 * C1 + a3 * C2;
+    c3 = a1 * C2 - a3 * C1;
+    dest[0] = av_clip_uint8(dest[0] + ((c0 + c1) >> C_SHIFT));
+    dest += line_size;
+    dest[0] = av_clip_uint8(dest[0] + ((c2 + c3) >> C_SHIFT));
+    dest += line_size;
+    dest[0] = av_clip_uint8(dest[0] + ((c2 - c3) >> C_SHIFT));
+    dest += line_size;
+    dest[0] = av_clip_uint8(dest[0] + ((c0 - c1) >> C_SHIFT));
+}
+
+#define RN_SHIFT 15
+#define R_FIX(x) ((int)((x) * M_SQRT2 * (1 << RN_SHIFT) + 0.5))
+#define R1 R_FIX(0.6532814824)
+#define R2 R_FIX(0.2705980501)
+#define R3 R_FIX(0.5)
+#define R_SHIFT 11
+static inline void idct4row(int16_t *row)
+{
+    unsigned c0, c1, c2, c3;
+    int a0, a1, a2, a3;
+
+    a0 = row[0];
+    a1 = row[1];
+    a2 = row[2];
+    a3 = row[3];
+    c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
+    c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
+    c1 = a1 * R1 + a3 * R2;
+    c3 = a1 * R2 - a3 * R1;
+    row[0]= (c0 + c1) >> R_SHIFT;
+    row[1]= (c2 + c3) >> R_SHIFT;
+    row[2]= (c2 - c3) >> R_SHIFT;
+    row[3]= (c0 - c1) >> R_SHIFT;
+}
+
+void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    int i;
+
+    /* IDCT8 on each line */
+    for(i=0; i<4; i++) {
+        idctRowCondDC_int16_8bit(block + i*8, 0);
+    }
+
+    /* IDCT4 and store */
+    for(i=0;i<8;i++) {
+        idct4col_add(dest + i, line_size, block + i);
+    }
+}
+
+void ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    int i;
+
+    /* IDCT4 on each line */
+    for(i=0; i<8; i++) {
+        idct4row(block + i*8);
+    }
+
+    /* IDCT8 and store */
+    for(i=0; i<4; i++){
+        idctSparseColAdd_int16_8bit(dest + i, line_size, block + i);
+    }
+}
+
+void ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    int i;
+
+    /* IDCT4 on each line */
+    for(i=0; i<4; i++) {
+        idct4row(block + i*8);
+    }
+
+    /* IDCT4 and store */
+    for(i=0; i<4; i++){
+        idct4col_add(dest + i, line_size, block + i);
+    }
+}
+
+void ff_prores_idct_10(int16_t *block, const int16_t *qmat)
+{
+    int i;
+
+    for (i = 0; i < 64; i++)
+        block[i] *= qmat[i];
+
+    for (i = 0; i < 8; i++)
+        idctRowCondDC_extrashift_10(block + i*8, 2);
+
+    for (i = 0; i < 8; i++) {
+        block[i] += 8192;
+        idctSparseCol_extrashift_10(block + i);
+    }
+}
+
+void ff_prores_idct_12(int16_t *block, const int16_t *qmat)
+{
+    int i;
+
+    for (i = 0; i < 64; i++)
+        block[i] *= qmat[i];
+
+    for (i = 0; i < 8; i++)
+        idctRowCondDC_int16_12bit(block + i*8, 0);
+
+    for (i = 0; i < 8; i++) {
+        block[i] += 8192;
+        idctSparseCol_int16_12bit(block + i);
+    }
+}
diff --git a/media/ffvpx/libavcodec/simple_idct.h b/media/ffvpx/libavcodec/simple_idct.h
new file mode 100644
index 0000000000..20578b3347
--- /dev/null
+++ b/media/ffvpx/libavcodec/simple_idct.h
@@ -0,0 +1,64 @@
+/*
+ * Simple IDCT
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * simple idct header.
+ */
+
+#ifndef AVCODEC_SIMPLE_IDCT_H
+#define AVCODEC_SIMPLE_IDCT_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_simple_idct_put_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_add_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_int16_8bit(int16_t *block);
+
+void ff_simple_idct_put_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_add_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_int16_10bit(int16_t *block);
+
+void ff_simple_idct_put_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_add_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_int32_10bit(int16_t *block);
+
+void ff_simple_idct_put_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_add_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_int16_12bit(int16_t *block);
+
+/**
+ * Special version of ff_simple_idct_int16_10bit() which does dequantization
+ * and scales by a factor of 2 more between the two IDCTs to account
+ * for larger scale of input coefficients.
+ */
+void ff_prores_idct_10(int16_t *block, const int16_t *qmat);
+void ff_prores_idct_12(int16_t *block, const int16_t *qmat);
+
+void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+
+void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+
+#endif /* AVCODEC_SIMPLE_IDCT_H */
diff --git a/media/ffvpx/libavcodec/simple_idct_template.c b/media/ffvpx/libavcodec/simple_idct_template.c
new file mode 100644
index 0000000000..5ddd0b45a2
--- /dev/null
+++ b/media/ffvpx/libavcodec/simple_idct_template.c
@@ -0,0 +1,371 @@
+/*
+ * Simple IDCT
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * simpleidct in C.
+ */
+
+/* Based upon some commented-out C code from mpeg2dec (idct_mmx.c
+ * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>). */
+
+#include "simple_idct.h"
+
+#include "bit_depth_template.c"
+
+#undef W1
+#undef W2
+#undef W3
+#undef W4
+#undef W5
+#undef W6
+#undef W7
+#undef ROW_SHIFT
+#undef COL_SHIFT
+#undef DC_SHIFT
+#undef MUL
+#undef MAC
+
+#if BIT_DEPTH == 8
+
+#define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+#define DC_SHIFT 3
+
+#define MUL(a, b)    MUL16(a, b)
+#define MAC(a, b, c) MAC16(a, b, c)
+
+#elif BIT_DEPTH == 10 || BIT_DEPTH == 12
+
+# if BIT_DEPTH == 10
+#define W1 22725 // 90901
+#define W2 21407 //  85627
+#define W3 19265 //  77062
+#define W4 16384 //  65535
+#define W5 12873 //  51491
+#define W6  8867 //  35468
+#define W7  4520 //  18081
+
+#   ifdef EXTRA_SHIFT
+#define ROW_SHIFT 13
+#define COL_SHIFT 18
+#define DC_SHIFT  1
+#   elif IN_IDCT_DEPTH == 32
+#define ROW_SHIFT 13
+#define COL_SHIFT 21
+#define DC_SHIFT  2
+#   else
+#define ROW_SHIFT 12
+#define COL_SHIFT 19
+#define DC_SHIFT  2
+#   endif
+
+# else
+#define W1 45451
+#define W2 42813
+#define W3 38531
+#define W4 32767
+#define W5 25746
+#define W6 17734
+#define W7 9041
+
+#define ROW_SHIFT 16
+#define COL_SHIFT 17
+#define DC_SHIFT -1
+# endif
+
+#define MUL(a, b)    ((int)((SUINT)(a) * (b)))
+#define MAC(a, b, c) ((a) += (SUINT)(b) * (c))
+
+#else
+
+#error "Unsupported bitdepth"
+
+#endif
+
+#ifdef EXTRA_SHIFT
+static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
+#else
+static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift)
+#endif
+{
+    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
+
+// TODO: Add DC-only support for int32_t input
+#if IN_IDCT_DEPTH == 16
+#if HAVE_FAST_64BIT
+#define ROW0_MASK (0xffffULL << 48 * HAVE_BIGENDIAN)
+    if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
+        uint64_t temp;
+        if (DC_SHIFT - extra_shift >= 0) {
+            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
+        } else {
+            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
+        }
+        temp += temp * (1 << 16);
+        temp += temp * ((uint64_t) 1 << 32);
+        AV_WN64A(row, temp);
+        AV_WN64A(row + 4, temp);
+        return;
+    }
+#else
+    if (!(AV_RN32A(row+2) |
+          AV_RN32A(row+4) |
+          AV_RN32A(row+6) |
+          row[1])) {
+        uint32_t temp;
+        if (DC_SHIFT - extra_shift >= 0) {
+            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
+        } else {
+            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
+        }
+        temp += temp * (1 << 16);
+        AV_WN32A(row, temp);
+        AV_WN32A(row+2, temp);
+        AV_WN32A(row+4, temp);
+        AV_WN32A(row+6, temp);
+        return;
+    }
+#endif
+#endif
+
+    a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
+    a1 = a0;
+    a2 = a0;
+    a3 = a0;
+
+    a0 += (SUINT)W2 * row[2];
+    a1 += (SUINT)W6 * row[2];
+    a2 -= (SUINT)W6 * row[2];
+    a3 -= (SUINT)W2 * row[2];
+
+    b0 = MUL(W1, row[1]);
+    MAC(b0, W3, row[3]);
+    b1 = MUL(W3, row[1]);
+    MAC(b1, -W7, row[3]);
+    b2 = MUL(W5, row[1]);
+    MAC(b2, -W1, row[3]);
+    b3 = MUL(W7, row[1]);
+    MAC(b3, -W5, row[3]);
+
+#if IN_IDCT_DEPTH == 32
+    if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
+#else
+    if (AV_RN64A(row + 4)) {
+#endif
+        a0 += (SUINT)  W4*row[4] + (SUINT)W6*row[6];
+        a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6];
+        a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6];
+        a3 += (SUINT)  W4*row[4] - (SUINT)W6*row[6];
+
+        MAC(b0,  W5, row[5]);
+        MAC(b0,  W7, row[7]);
+
+        MAC(b1, -W1, row[5]);
+        MAC(b1, -W5, row[7]);
+
+        MAC(b2,  W7, row[5]);
+        MAC(b2,  W3, row[7]);
+
+        MAC(b3,  W3, row[5]);
+        MAC(b3, -W1, row[7]);
+    }
+
+    row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift);
+    row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift);
+    row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift);
+    row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift);
+    row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift);
+    row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift);
+    row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift);
+    row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift);
+}
+
+#define IDCT_COLS do {                                  \
+        a0 = (SUINT)W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
+        a1 = a0;                                        \
+        a2 = a0;                                        \
+        a3 = a0;                                        \
+                                                        \
+        a0 += (SUINT) W2*col[8*2];                             \
+        a1 += (SUINT) W6*col[8*2];                             \
+        a2 += (SUINT)-W6*col[8*2];                             \
+        a3 += (SUINT)-W2*col[8*2];                             \
+                                                        \
+        b0 = MUL(W1, col[8*1]);                         \
+        b1 = MUL(W3, col[8*1]);                         \
+        b2 = MUL(W5, col[8*1]);                         \
+        b3 = MUL(W7, col[8*1]);                         \
+                                                        \
+        MAC(b0,  W3, col[8*3]);                         \
+        MAC(b1, -W7, col[8*3]);                         \
+        MAC(b2, -W1, col[8*3]);                         \
+        MAC(b3, -W5, col[8*3]);                         \
+                                                        \
+        if (col[8*4]) {                                 \
+            a0 += (SUINT) W4*col[8*4];                         \
+            a1 += (SUINT)-W4*col[8*4];                         \
+            a2 += (SUINT)-W4*col[8*4];                         \
+            a3 += (SUINT) W4*col[8*4];                         \
+        }                                               \
+                                                        \
+        if (col[8*5]) {                                 \
+            MAC(b0,  W5, col[8*5]);                     \
+            MAC(b1, -W1, col[8*5]);                     \
+            MAC(b2,  W7, col[8*5]);                     \
+            MAC(b3,  W3, col[8*5]);                     \
+        }                                               \
+                                                        \
+        if (col[8*6]) {                                 \
+            a0 += (SUINT) W6*col[8*6];                         \
+            a1 += (SUINT)-W2*col[8*6];                         \
+            a2 += (SUINT) W2*col[8*6];                         \
+            a3 += (SUINT)-W6*col[8*6];                         \
+        }                                               \
+                                                        \
+        if (col[8*7]) {                                 \
+            MAC(b0,  W7, col[8*7]);                     \
+            MAC(b1, -W5, col[8*7]);                     \
+            MAC(b2,  W3, col[8*7]);                     \
+            MAC(b3, -W1, col[8*7]);                     \
+        }                                               \
+    } while (0)
+
+#ifdef EXTRA_SHIFT
+static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
+#else
+static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
+                                          idctin *col)
+{
+    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
+
+    IDCT_COLS;
+
+    dest[0] = av_clip_pixel((int)(a0 + b0) >> COL_SHIFT);
+    dest += line_size;
+    dest[0] = av_clip_pixel((int)(a1 + b1) >> COL_SHIFT);
+    dest += line_size;
+    dest[0] = av_clip_pixel((int)(a2 + b2) >> COL_SHIFT);
+    dest += line_size;
+    dest[0] = av_clip_pixel((int)(a3 + b3) >> COL_SHIFT);
+    dest += line_size;
+    dest[0] = av_clip_pixel((int)(a3 - b3) >> COL_SHIFT);
+    dest += line_size;
+    dest[0] = av_clip_pixel((int)(a2 - b2) >> COL_SHIFT);
+    dest += line_size;
+    dest[0] = av_clip_pixel((int)(a1 - b1) >> COL_SHIFT);
+    dest += line_size;
+    dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT);
+}
+
+static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
+                                          idctin *col)
+{
+    unsigned a0, a1, a2, a3, b0, b1, b2, b3;
+
+    IDCT_COLS;
+
+    dest[0] = av_clip_pixel(dest[0] + ((int)(a0 + b0) >> COL_SHIFT));
+    dest += line_size;
+    dest[0] = av_clip_pixel(dest[0] + ((int)(a1 + b1) >> COL_SHIFT));
+    dest += line_size;
+    dest[0] = av_clip_pixel(dest[0] + ((int)(a2 + b2) >> COL_SHIFT));
+    dest += line_size;
+    dest[0] = av_clip_pixel(dest[0] + ((int)(a3 + b3) >> COL_SHIFT));
+    dest += line_size;
+    dest[0] = av_clip_pixel(dest[0] + ((int)(a3 - b3) >> COL_SHIFT));
+    dest += line_size;
+    dest[0] = av_clip_pixel(dest[0] + ((int)(a2 - b2) >> COL_SHIFT));
+    dest += line_size;
+    dest[0] = av_clip_pixel(dest[0] + ((int)(a1 - b1) >> COL_SHIFT));
+    dest += line_size;
+    dest[0] = av_clip_pixel(dest[0] + ((int)(a0 - b0) >> COL_SHIFT));
+}
+
+static inline void FUNC6(idctSparseCol)(idctin *col)
+#endif
+{
+    unsigned a0, a1, a2, a3, b0, b1, b2, b3;
+
+    IDCT_COLS;
+
+    col[0 ] = ((int)(a0 + b0) >> COL_SHIFT);
+    col[8 ] = ((int)(a1 + b1) >> COL_SHIFT);
+    col[16] = ((int)(a2 + b2) >> COL_SHIFT);
+    col[24] = ((int)(a3 + b3) >> COL_SHIFT);
+    col[32] = ((int)(a3 - b3) >> COL_SHIFT);
+    col[40] = ((int)(a2 - b2) >> COL_SHIFT);
+    col[48] = ((int)(a1 - b1) >> COL_SHIFT);
+    col[56] = ((int)(a0 - b0) >> COL_SHIFT);
+}
+
+#ifndef EXTRA_SHIFT
+void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_)
+{
+    idctin *block = (idctin *)block_;
+    pixel *dest = (pixel *)dest_;
+    int i;
+
+    line_size /= sizeof(pixel);
+
+    for (i = 0; i < 8; i++)
+        FUNC6(idctRowCondDC)(block + i*8, 0);
+
+    for (i = 0; i < 8; i++)
+        FUNC6(idctSparseColPut)(dest + i, line_size, block + i);
+}
+
+#if IN_IDCT_DEPTH == 16
+void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
+{
+    pixel *dest = (pixel *)dest_;
+    int i;
+
+    line_size /= sizeof(pixel);
+
+    for (i = 0; i < 8; i++)
+        FUNC6(idctRowCondDC)(block + i*8, 0);
+
+    for (i = 0; i < 8; i++)
+        FUNC6(idctSparseColAdd)(dest + i, line_size, block + i);
+}
+
+void FUNC6(ff_simple_idct)(int16_t *block)
+{
+    int i;
+
+    for (i = 0; i < 8; i++)
+        FUNC6(idctRowCondDC)(block + i*8, 0);
+
+    for (i = 0; i < 8; i++)
+        FUNC6(idctSparseCol)(block + i);
+}
+#endif
+#endif
diff --git a/media/ffvpx/libavcodec/startcode.h b/media/ffvpx/libavcodec/startcode.h
new file mode 100644
index 0000000000..8b75832aaf
--- /dev/null
+++ b/media/ffvpx/libavcodec/startcode.h
@@ -0,0 +1,36 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Accelerated start code search function for start codes common to
+ * MPEG-1/2/4 video, VC-1, H.264/5
+ */
+
+#ifndef AVCODEC_STARTCODE_H
+#define AVCODEC_STARTCODE_H
+
+#include <stdint.h>
+
+const uint8_t *avpriv_find_start_code(const uint8_t *p,
+                                      const uint8_t *end,
+                                      uint32_t *state);
+
+int ff_startcode_find_candidate_c(const uint8_t *buf, int size);
+
+#endif /* AVCODEC_STARTCODE_H */
diff --git a/media/ffvpx/libavcodec/thread.h b/media/ffvpx/libavcodec/thread.h
new file mode 100644
index 0000000000..88a14cfeb1
--- /dev/null
+++ b/media/ffvpx/libavcodec/thread.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2008 Alexander Strange <astrange@ithinksw.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Multithreading support functions
+ * @author Alexander Strange <astrange@ithinksw.com>
+ */
+
+#ifndef AVCODEC_THREAD_H
+#define AVCODEC_THREAD_H
+
+#include "libavutil/buffer.h"
+
+#include "avcodec.h"
+
+/**
+ * Wait for decoding threads to finish and reset internal state.
+ * Called by avcodec_flush_buffers().
+ *
+ * @param avctx The context.
+ */
+void ff_thread_flush(AVCodecContext *avctx);
+
+/**
+ * Submit a new frame to a decoding thread.
+ * Returns the next available frame in picture. *got_picture_ptr
+ * will be 0 if none is available.
+ * The return value on success is the size of the consumed packet for
+ * compatibility with FFCodec.decode. This means the decoder
+ * has to consume the full packet.
+ *
+ * Parameters are the same as FFCodec.decode.
+ */
+int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture,
+                           int *got_picture_ptr, AVPacket *avpkt);
+
+/**
+ * If the codec defines update_thread_context(), call this
+ * when they are ready for the next thread to start decoding
+ * the next frame. After calling it, do not change any variables
+ * read by the update_thread_context() method, or call ff_thread_get_buffer().
+ *
+ * @param avctx The context.
+ */
+void ff_thread_finish_setup(AVCodecContext *avctx);
+
+#define ff_thread_get_format ff_get_format
+
+/**
+ * Wrapper around get_buffer() for frame-multithreaded codecs.
+ * Call this function instead of ff_get_buffer(f).
+ * Cannot be called after the codec has called ff_thread_finish_setup().
+ *
+ * @param avctx The current context.
+ * @param f The frame to write into.
+ */
+int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f, int flags);
+
+/**
+ * Wrapper around release_buffer() frame-for multithreaded codecs.
+ * Call this function instead of avctx->release_buffer(f).
+ * The AVFrame will be copied and the actual release_buffer() call
+ * will be performed later. The contents of data pointed to by the
+ * AVFrame should not be changed until ff_thread_get_buffer() is called
+ * on it.
+ *
+ * @param avctx The current context.
+ * @param f The picture being released.
+ */
+void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f);
+
+int ff_thread_init(AVCodecContext *s);
+int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx,
+        int (*action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr),
+        int (*main_func)(AVCodecContext *c), void *arg, int *ret, int job_count);
+void ff_thread_free(AVCodecContext *s);
+int ff_slice_thread_allocz_entries(AVCodecContext *avctx, int count);
+int ff_slice_thread_init_progress(AVCodecContext *avctx);
+void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, int n);
+void ff_thread_await_progress2(AVCodecContext *avctx,  int field, int thread, int shift);
+
+#endif /* AVCODEC_THREAD_H */
diff --git a/media/ffvpx/libavcodec/threadframe.h b/media/ffvpx/libavcodec/threadframe.h
new file mode 100644
index 0000000000..d2f93c5cd0
--- /dev/null
+++ b/media/ffvpx/libavcodec/threadframe.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_THREADFRAME_H
+#define AVCODEC_THREADFRAME_H
+
+#include "libavutil/frame.h"
+#include "avcodec.h"
+
+typedef struct ThreadFrame {
+    AVFrame *f;
+    AVCodecContext *owner[2];
+    // progress->data is an array of 2 ints holding progress for top/bottom
+    // fields
+    AVBufferRef *progress;
+} ThreadFrame;
+
+/**
+ * Notify later decoding threads when part of their reference picture is ready.
+ * Call this when some part of the picture is finished decoding.
+ * Later calls with lower values of progress have no effect.
+ *
+ * @param f The picture being decoded.
+ * @param progress Value, in arbitrary units, of how much of the picture has decoded.
+ * @param field The field being decoded, for field-picture codecs.
+ * 0 for top field or frame pictures, 1 for bottom field.
+ */
+void ff_thread_report_progress(ThreadFrame *f, int progress, int field);
+
+/**
+ * Wait for earlier decoding threads to finish reference pictures.
+ * Call this before accessing some part of a picture, with a given
+ * value for progress, and it will return after the responsible decoding
+ * thread calls ff_thread_report_progress() with the same or
+ * higher value for progress.
+ *
+ * @param f The picture being referenced.
+ * @param progress Value, in arbitrary units, to wait for.
+ * @param field The field being referenced, for field-picture codecs.
+ * 0 for top field or frame pictures, 1 for bottom field.
+ */
+void ff_thread_await_progress(const ThreadFrame *f, int progress, int field);
+
+/**
+ * Wrapper around ff_get_buffer() for frame-multithreaded codecs.
+ * Call this function instead of ff_get_buffer() if you might need
+ * to wait for progress on this frame.
+ * Cannot be called after the codec has called ff_thread_finish_setup().
+ *
+ * @param avctx The current context.
+ * @param f The frame to write into.
+ * @note: It is fine to call this with codecs that do not support
+ *        frame threading.
+ */
+int ff_thread_get_ext_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags);
+
+/**
+ * Unref a ThreadFrame.
+ *
+ * This is basically a wrapper around av_frame_unref() and should
+ * be called instead of it.
+ *
+ * @param avctx The current context.
+ * @param f The picture being released.
+ */
+void ff_thread_release_ext_buffer(AVCodecContext *avctx, ThreadFrame *f);
+
+int ff_thread_ref_frame(ThreadFrame *dst, const ThreadFrame *src);
+
+int ff_thread_can_start_frame(AVCodecContext *avctx);
+
+#endif
diff --git a/media/ffvpx/libavcodec/unary.h b/media/ffvpx/libavcodec/unary.h
new file mode 100644
index 0000000000..d57f9f70c5
--- /dev/null
+++ b/media/ffvpx/libavcodec/unary.h
@@ -0,0 +1,69 @@
+/*
+ * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_UNARY_H
+#define AVCODEC_UNARY_H
+
+#include "get_bits.h"
+
+/**
+ * Get unary code of limited length
+ * @param gb GetBitContext
+ * @param[in] stop The bitstop value (unary code of 1's or 0's)
+ * @param[in] len Maximum length
+ * @return unary 0 based code index. This is also the length in bits of the
+ * code excluding the stop bit.
+ * (in case len=1)
+ * 1            0
+ * 0            1
+ * (in case len=2)
+ * 1            0
+ * 01           1
+ * 00           2
+ * (in case len=3)
+ * 1            0
+ * 01           1
+ * 001          2
+ * 000          3
+ */
+static inline int get_unary(GetBitContext *gb, int stop, int len)
+{
+    int i;
+
+    for(i = 0; i < len && get_bits1(gb) != stop; i++);
+    return i;
+}
+
+/**
+ * Get unary code terminated by a 0 with a maximum length of 33
+ * @param gb GetBitContext
+ * @return Unary length/index
+ */
+static inline int get_unary_0_33(GetBitContext *gb)
+{
+    return get_unary(gb, 0, 33);
+}
+
+static inline int get_unary_0_9(GetBitContext *gb)
+{
+    return get_unary(gb, 0, 9);
+}
+
+#endif /* AVCODEC_UNARY_H */
diff --git a/media/ffvpx/libavcodec/utils.c b/media/ffvpx/libavcodec/utils.c
new file mode 100644
index 0000000000..599da21dba
--- /dev/null
+++ b/media/ffvpx/libavcodec/utils.c
@@ -0,0 +1,1170 @@
+/*
+ * utils for libavcodec
+ * Copyright (c) 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * utils.
+ */
+
+#include "config.h"
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/pixfmt.h"
+#include "avcodec.h"
+#include "codec.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "hwconfig.h"
+#include "thread.h"
+#include "threadframe.h"
+#include "internal.h"
+#include "put_bits.h"
+#include "startcode.h"
+#include <stdlib.h>
+#include <limits.h>
+
+void av_fast_padded_malloc(void *ptr, unsigned int *size, size_t min_size)
+{
+    uint8_t **p = ptr;
+    if (min_size > SIZE_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
+        av_freep(p);
+        *size = 0;
+        return;
+    }
+    av_fast_mallocz(p, size, min_size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (*p)
+        memset(*p + min_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+}
+
+void av_fast_padded_mallocz(void *ptr, unsigned int *size, size_t min_size)
+{
+    uint8_t **p = ptr;
+    if (min_size > SIZE_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
+        av_freep(p);
+        *size = 0;
+        return;
+    }
+    av_fast_malloc(p, size, min_size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (*p)
+        memset(*p, 0, min_size + AV_INPUT_BUFFER_PADDING_SIZE);
+}
+
+int av_codec_is_encoder(const AVCodec *avcodec)
+{
+    const FFCodec *const codec = ffcodec(avcodec);
+    return codec && (codec->cb_type == FF_CODEC_CB_TYPE_ENCODE     ||
+                     codec->cb_type == FF_CODEC_CB_TYPE_ENCODE_SUB ||
+                     codec->cb_type == FF_CODEC_CB_TYPE_RECEIVE_PACKET);
+}
+
+int av_codec_is_decoder(const AVCodec *avcodec)
+{
+    const FFCodec *const codec = ffcodec(avcodec);
+    return codec && (codec->cb_type == FF_CODEC_CB_TYPE_DECODE     ||
+                     codec->cb_type == FF_CODEC_CB_TYPE_DECODE_SUB ||
+                     codec->cb_type == FF_CODEC_CB_TYPE_RECEIVE_FRAME);
+}
+
+int ff_set_dimensions(AVCodecContext *s, int width, int height)
+{
+    int ret = av_image_check_size2(width, height, s->max_pixels, AV_PIX_FMT_NONE, 0, s);
+
+    if (ret < 0)
+        width = height = 0;
+
+    s->coded_width  = width;
+    s->coded_height = height;
+    s->width        = AV_CEIL_RSHIFT(width,  s->lowres);
+    s->height       = AV_CEIL_RSHIFT(height, s->lowres);
+
+    return ret;
+}
+
+int ff_set_sar(AVCodecContext *avctx, AVRational sar)
+{
+    int ret = av_image_check_sar(avctx->width, avctx->height, sar);
+
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_WARNING, "ignoring invalid SAR: %d/%d\n",
+               sar.num, sar.den);
+        avctx->sample_aspect_ratio = (AVRational){ 0, 1 };
+        return ret;
+    } else {
+        avctx->sample_aspect_ratio = sar;
+    }
+    return 0;
+}
+
+int ff_side_data_update_matrix_encoding(AVFrame *frame,
+                                        enum AVMatrixEncoding matrix_encoding)
+{
+    AVFrameSideData *side_data;
+    enum AVMatrixEncoding *data;
+
+    side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_MATRIXENCODING);
+    if (!side_data)
+        side_data = av_frame_new_side_data(frame, AV_FRAME_DATA_MATRIXENCODING,
+                                           sizeof(enum AVMatrixEncoding));
+
+    if (!side_data)
+        return AVERROR(ENOMEM);
+
+    data  = (enum AVMatrixEncoding*)side_data->data;
+    *data = matrix_encoding;
+
+    return 0;
+}
+
+void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
+                               int linesize_align[AV_NUM_DATA_POINTERS])
+{
+    int i;
+    int w_align = 1;
+    int h_align = 1;
+    AVPixFmtDescriptor const *desc = av_pix_fmt_desc_get(s->pix_fmt);
+
+    if (desc) {
+        w_align = 1 << desc->log2_chroma_w;
+        h_align = 1 << desc->log2_chroma_h;
+    }
+
+    switch (s->pix_fmt) {
+    case AV_PIX_FMT_YUV420P:
+    case AV_PIX_FMT_YUYV422:
+    case AV_PIX_FMT_YVYU422:
+    case AV_PIX_FMT_UYVY422:
+    case AV_PIX_FMT_YUV422P:
+    case AV_PIX_FMT_YUV440P:
+    case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_GBRP:
+    case AV_PIX_FMT_GBRAP:
+    case AV_PIX_FMT_GRAY8:
+    case AV_PIX_FMT_GRAY16BE:
+    case AV_PIX_FMT_GRAY16LE:
+    case AV_PIX_FMT_YUVJ420P:
+    case AV_PIX_FMT_YUVJ422P:
+    case AV_PIX_FMT_YUVJ440P:
+    case AV_PIX_FMT_YUVJ444P:
+    case AV_PIX_FMT_YUVA420P:
+    case AV_PIX_FMT_YUVA422P:
+    case AV_PIX_FMT_YUVA444P:
+    case AV_PIX_FMT_YUV420P9LE:
+    case AV_PIX_FMT_YUV420P9BE:
+    case AV_PIX_FMT_YUV420P10LE:
+    case AV_PIX_FMT_YUV420P10BE:
+    case AV_PIX_FMT_YUV420P12LE:
+    case AV_PIX_FMT_YUV420P12BE:
+    case AV_PIX_FMT_YUV420P14LE:
+    case AV_PIX_FMT_YUV420P14BE:
+    case AV_PIX_FMT_YUV420P16LE:
+    case AV_PIX_FMT_YUV420P16BE:
+    case AV_PIX_FMT_YUVA420P9LE:
+    case AV_PIX_FMT_YUVA420P9BE:
+    case AV_PIX_FMT_YUVA420P10LE:
+    case AV_PIX_FMT_YUVA420P10BE:
+    case AV_PIX_FMT_YUVA420P16LE:
+    case AV_PIX_FMT_YUVA420P16BE:
+    case AV_PIX_FMT_YUV422P9LE:
+    case AV_PIX_FMT_YUV422P9BE:
+    case AV_PIX_FMT_YUV422P10LE:
+    case AV_PIX_FMT_YUV422P10BE:
+    case AV_PIX_FMT_YUV422P12LE:
+    case AV_PIX_FMT_YUV422P12BE:
+    case AV_PIX_FMT_YUV422P14LE:
+    case AV_PIX_FMT_YUV422P14BE:
+    case AV_PIX_FMT_YUV422P16LE:
+    case AV_PIX_FMT_YUV422P16BE:
+    case AV_PIX_FMT_YUVA422P9LE:
+    case AV_PIX_FMT_YUVA422P9BE:
+    case AV_PIX_FMT_YUVA422P10LE:
+    case AV_PIX_FMT_YUVA422P10BE:
+    case AV_PIX_FMT_YUVA422P12LE:
+    case AV_PIX_FMT_YUVA422P12BE:
+    case AV_PIX_FMT_YUVA422P16LE:
+    case AV_PIX_FMT_YUVA422P16BE:
+    case AV_PIX_FMT_YUV440P10LE:
+    case AV_PIX_FMT_YUV440P10BE:
+    case AV_PIX_FMT_YUV440P12LE:
+    case AV_PIX_FMT_YUV440P12BE:
+    case AV_PIX_FMT_YUV444P9LE:
+    case AV_PIX_FMT_YUV444P9BE:
+    case AV_PIX_FMT_YUV444P10LE:
+    case AV_PIX_FMT_YUV444P10BE:
+    case AV_PIX_FMT_YUV444P12LE:
+    case AV_PIX_FMT_YUV444P12BE:
+    case AV_PIX_FMT_YUV444P14LE:
+    case AV_PIX_FMT_YUV444P14BE:
+    case AV_PIX_FMT_YUV444P16LE:
+    case AV_PIX_FMT_YUV444P16BE:
+    case AV_PIX_FMT_YUVA444P9LE:
+    case AV_PIX_FMT_YUVA444P9BE:
+    case AV_PIX_FMT_YUVA444P10LE:
+    case AV_PIX_FMT_YUVA444P10BE:
+    case AV_PIX_FMT_YUVA444P12LE:
+    case AV_PIX_FMT_YUVA444P12BE:
+    case AV_PIX_FMT_YUVA444P16LE:
+    case AV_PIX_FMT_YUVA444P16BE:
+    case AV_PIX_FMT_GBRP9LE:
+    case AV_PIX_FMT_GBRP9BE:
+    case AV_PIX_FMT_GBRP10LE:
+    case AV_PIX_FMT_GBRP10BE:
+    case AV_PIX_FMT_GBRP12LE:
+    case AV_PIX_FMT_GBRP12BE:
+    case AV_PIX_FMT_GBRP14LE:
+    case AV_PIX_FMT_GBRP14BE:
+    case AV_PIX_FMT_GBRP16LE:
+    case AV_PIX_FMT_GBRP16BE:
+    case AV_PIX_FMT_GBRAP12LE:
+    case AV_PIX_FMT_GBRAP12BE:
+    case AV_PIX_FMT_GBRAP16LE:
+    case AV_PIX_FMT_GBRAP16BE:
+        w_align = 16; //FIXME assume 16 pixel per macroblock
+        h_align = 16 * 2; // interlaced needs 2 macroblocks height
+        if (s->codec_id == AV_CODEC_ID_BINKVIDEO)
+            w_align = 16*2;
+        break;
+    case AV_PIX_FMT_YUV411P:
+    case AV_PIX_FMT_YUVJ411P:
+    case AV_PIX_FMT_UYYVYY411:
+        w_align = 32;
+        h_align = 16 * 2;
+        break;
+    case AV_PIX_FMT_YUV410P:
+        if (s->codec_id == AV_CODEC_ID_SVQ1) {
+            w_align = 64;
+            h_align = 64;
+        }
+        break;
+    case AV_PIX_FMT_RGB555:
+        if (s->codec_id == AV_CODEC_ID_RPZA) {
+            w_align = 4;
+            h_align = 4;
+        }
+        if (s->codec_id == AV_CODEC_ID_INTERPLAY_VIDEO) {
+            w_align = 8;
+            h_align = 8;
+        }
+        break;
+    case AV_PIX_FMT_PAL8:
+    case AV_PIX_FMT_BGR8:
+    case AV_PIX_FMT_RGB8:
+        if (s->codec_id == AV_CODEC_ID_SMC ||
+            s->codec_id == AV_CODEC_ID_CINEPAK) {
+            w_align = 4;
+            h_align = 4;
+        }
+        if (s->codec_id == AV_CODEC_ID_JV ||
+            s->codec_id == AV_CODEC_ID_ARGO ||
+            s->codec_id == AV_CODEC_ID_INTERPLAY_VIDEO) {
+            w_align = 8;
+            h_align = 8;
+        }
+        if (s->codec_id == AV_CODEC_ID_MJPEG   ||
+            s->codec_id == AV_CODEC_ID_MJPEGB  ||
+            s->codec_id == AV_CODEC_ID_LJPEG   ||
+            s->codec_id == AV_CODEC_ID_SMVJPEG ||
+            s->codec_id == AV_CODEC_ID_AMV     ||
+            s->codec_id == AV_CODEC_ID_SP5X    ||
+            s->codec_id == AV_CODEC_ID_JPEGLS) {
+            w_align =   8;
+            h_align = 2*8;
+        }
+        break;
+    case AV_PIX_FMT_BGR24:
+        if ((s->codec_id == AV_CODEC_ID_MSZH) ||
+            (s->codec_id == AV_CODEC_ID_ZLIB)) {
+            w_align = 4;
+            h_align = 4;
+        }
+        break;
+    case AV_PIX_FMT_RGB24:
+        if (s->codec_id == AV_CODEC_ID_CINEPAK) {
+            w_align = 4;
+            h_align = 4;
+        }
+        break;
+    case AV_PIX_FMT_BGR0:
+        if (s->codec_id == AV_CODEC_ID_ARGO) {
+            w_align = 8;
+            h_align = 8;
+        }
+        break;
+    default:
+        break;
+    }
+
+    if (s->codec_id == AV_CODEC_ID_IFF_ILBM) {
+        w_align = FFMAX(w_align, 8);
+    }
+
+    *width  = FFALIGN(*width, w_align);
+    *height = FFALIGN(*height, h_align);
+    if (s->codec_id == AV_CODEC_ID_H264 || s->lowres ||
+        s->codec_id == AV_CODEC_ID_VC1  || s->codec_id == AV_CODEC_ID_WMV3 ||
+        s->codec_id == AV_CODEC_ID_VP5  || s->codec_id == AV_CODEC_ID_VP6 ||
+        s->codec_id == AV_CODEC_ID_VP6F || s->codec_id == AV_CODEC_ID_VP6A
+    ) {
+        // some of the optimized chroma MC reads one line too much
+        // which is also done in mpeg decoders with lowres > 0
+        *height += 2;
+
+        // H.264 uses edge emulation for out of frame motion vectors, for this
+        // it requires a temporary area large enough to hold a 21x21 block,
+        // increasing witdth ensure that the temporary area is large enough,
+        // the next rounded up width is 32
+        *width = FFMAX(*width, 32);
+    }
+    if (s->codec_id == AV_CODEC_ID_SVQ3) {
+        *width = FFMAX(*width, 32);
+    }
+
+    for (i = 0; i < 4; i++)
+        linesize_align[i] = STRIDE_ALIGN;
+}
+
+void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->pix_fmt);
+    int chroma_shift = desc->log2_chroma_w;
+    int linesize_align[AV_NUM_DATA_POINTERS];
+    int align;
+
+    avcodec_align_dimensions2(s, width, height, linesize_align);
+    align               = FFMAX(linesize_align[0], linesize_align[3]);
+    linesize_align[1] <<= chroma_shift;
+    linesize_align[2] <<= chroma_shift;
+    align               = FFMAX3(align, linesize_align[1], linesize_align[2]);
+    *width              = FFALIGN(*width, align);
+}
+#if FF_API_AVCODEC_CHROMA_POS
+int avcodec_enum_to_chroma_pos(int *xpos, int *ypos, enum AVChromaLocation pos)
+{
+    return av_chroma_location_enum_to_pos(xpos, ypos, pos);
+}
+
+enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos)
+{
+    return av_chroma_location_pos_to_enum(xpos, ypos);
+}
+#endif
+
+int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels,
+                             enum AVSampleFormat sample_fmt, const uint8_t *buf,
+                             int buf_size, int align)
+{
+    int ch, planar, needed_size, ret = 0;
+
+    needed_size = av_samples_get_buffer_size(NULL, nb_channels,
+                                             frame->nb_samples, sample_fmt,
+                                             align);
+    if (buf_size < needed_size)
+        return AVERROR(EINVAL);
+
+    planar = av_sample_fmt_is_planar(sample_fmt);
+    if (planar && nb_channels > AV_NUM_DATA_POINTERS) {
+        if (!FF_ALLOCZ_TYPED_ARRAY(frame->extended_data, nb_channels))
+            return AVERROR(ENOMEM);
+    } else {
+        frame->extended_data = frame->data;
+    }
+
+    if ((ret = av_samples_fill_arrays(frame->extended_data, &frame->linesize[0],
+                                      (uint8_t *)(intptr_t)buf, nb_channels, frame->nb_samples,
+                                      sample_fmt, align)) < 0) {
+        if (frame->extended_data != frame->data)
+            av_freep(&frame->extended_data);
+        return ret;
+    }
+    if (frame->extended_data != frame->data) {
+        for (ch = 0; ch < AV_NUM_DATA_POINTERS; ch++)
+            frame->data[ch] = frame->extended_data[ch];
+    }
+
+    return ret;
+}
+
+void ff_color_frame(AVFrame *frame, const int c[4])
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+    int p, y;
+
+    av_assert0(desc->flags & AV_PIX_FMT_FLAG_PLANAR);
+
+    for (p = 0; p<desc->nb_components; p++) {
+        uint8_t *dst = frame->data[p];
+        int is_chroma = p == 1 || p == 2;
+        int bytes  = is_chroma ? AV_CEIL_RSHIFT(frame->width,  desc->log2_chroma_w) : frame->width;
+        int height = is_chroma ? AV_CEIL_RSHIFT(frame->height, desc->log2_chroma_h) : frame->height;
+        if (desc->comp[0].depth >= 9) {
+            ((uint16_t*)dst)[0] = c[p];
+            av_memcpy_backptr(dst + 2, 2, bytes - 2);
+            dst += frame->linesize[p];
+            for (y = 1; y < height; y++) {
+                memcpy(dst, frame->data[p], 2*bytes);
+                dst += frame->linesize[p];
+            }
+        } else {
+            for (y = 0; y < height; y++) {
+                memset(dst, c[p], bytes);
+                dst += frame->linesize[p];
+            }
+        }
+    }
+}
+
+int avpriv_codec_get_cap_skip_frame_fill_param(const AVCodec *codec){
+    return !!(ffcodec(codec)->caps_internal & FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM);
+}
+
+const char *avcodec_get_name(enum AVCodecID id)
+{
+    const AVCodecDescriptor *cd;
+    const AVCodec *codec;
+
+    if (id == AV_CODEC_ID_NONE)
+        return "none";
+    cd = avcodec_descriptor_get(id);
+    if (cd)
+        return cd->name;
+    av_log(NULL, AV_LOG_WARNING, "Codec 0x%x is not in the full list.\n", id);
+    codec = avcodec_find_decoder(id);
+    if (codec)
+        return codec->name;
+    codec = avcodec_find_encoder(id);
+    if (codec)
+        return codec->name;
+    return "unknown_codec";
+}
+
+const char *av_get_profile_name(const AVCodec *codec, int profile)
+{
+    const AVProfile *p;
+    if (profile == FF_PROFILE_UNKNOWN || !codec->profiles)
+        return NULL;
+
+    for (p = codec->profiles; p->profile != FF_PROFILE_UNKNOWN; p++)
+        if (p->profile == profile)
+            return p->name;
+
+    return NULL;
+}
+
+const char *avcodec_profile_name(enum AVCodecID codec_id, int profile)
+{
+    const AVCodecDescriptor *desc = avcodec_descriptor_get(codec_id);
+    const AVProfile *p;
+
+    if (profile == FF_PROFILE_UNKNOWN || !desc || !desc->profiles)
+        return NULL;
+
+    for (p = desc->profiles; p->profile != FF_PROFILE_UNKNOWN; p++)
+        if (p->profile == profile)
+            return p->name;
+
+    return NULL;
+}
+
+int av_get_exact_bits_per_sample(enum AVCodecID codec_id)
+{
+    switch (codec_id) {
+    case AV_CODEC_ID_DFPWM:
+        return 1;
+    case AV_CODEC_ID_8SVX_EXP:
+    case AV_CODEC_ID_8SVX_FIB:
+    case AV_CODEC_ID_ADPCM_ARGO:
+    case AV_CODEC_ID_ADPCM_CT:
+    case AV_CODEC_ID_ADPCM_IMA_ALP:
+    case AV_CODEC_ID_ADPCM_IMA_AMV:
+    case AV_CODEC_ID_ADPCM_IMA_APC:
+    case AV_CODEC_ID_ADPCM_IMA_APM:
+    case AV_CODEC_ID_ADPCM_IMA_EA_SEAD:
+    case AV_CODEC_ID_ADPCM_IMA_OKI:
+    case AV_CODEC_ID_ADPCM_IMA_WS:
+    case AV_CODEC_ID_ADPCM_IMA_SSI:
+    case AV_CODEC_ID_ADPCM_G722:
+    case AV_CODEC_ID_ADPCM_YAMAHA:
+    case AV_CODEC_ID_ADPCM_AICA:
+        return 4;
+    case AV_CODEC_ID_DSD_LSBF:
+    case AV_CODEC_ID_DSD_MSBF:
+    case AV_CODEC_ID_DSD_LSBF_PLANAR:
+    case AV_CODEC_ID_DSD_MSBF_PLANAR:
+    case AV_CODEC_ID_PCM_ALAW:
+    case AV_CODEC_ID_PCM_MULAW:
+    case AV_CODEC_ID_PCM_VIDC:
+    case AV_CODEC_ID_PCM_S8:
+    case AV_CODEC_ID_PCM_S8_PLANAR:
+    case AV_CODEC_ID_PCM_SGA:
+    case AV_CODEC_ID_PCM_U8:
+    case AV_CODEC_ID_SDX2_DPCM:
+    case AV_CODEC_ID_CBD2_DPCM:
+    case AV_CODEC_ID_DERF_DPCM:
+    case AV_CODEC_ID_WADY_DPCM:
+        return 8;
+    case AV_CODEC_ID_PCM_S16BE:
+    case AV_CODEC_ID_PCM_S16BE_PLANAR:
+    case AV_CODEC_ID_PCM_S16LE:
+    case AV_CODEC_ID_PCM_S16LE_PLANAR:
+    case AV_CODEC_ID_PCM_U16BE:
+    case AV_CODEC_ID_PCM_U16LE:
+        return 16;
+    case AV_CODEC_ID_PCM_S24DAUD:
+    case AV_CODEC_ID_PCM_S24BE:
+    case AV_CODEC_ID_PCM_S24LE:
+    case AV_CODEC_ID_PCM_S24LE_PLANAR:
+    case AV_CODEC_ID_PCM_U24BE:
+    case AV_CODEC_ID_PCM_U24LE:
+        return 24;
+    case AV_CODEC_ID_PCM_S32BE:
+    case AV_CODEC_ID_PCM_S32LE:
+    case AV_CODEC_ID_PCM_S32LE_PLANAR:
+    case AV_CODEC_ID_PCM_U32BE:
+    case AV_CODEC_ID_PCM_U32LE:
+    case AV_CODEC_ID_PCM_F32BE:
+    case AV_CODEC_ID_PCM_F32LE:
+    case AV_CODEC_ID_PCM_F24LE:
+    case AV_CODEC_ID_PCM_F16LE:
+        return 32;
+    case AV_CODEC_ID_PCM_F64BE:
+    case AV_CODEC_ID_PCM_F64LE:
+    case AV_CODEC_ID_PCM_S64BE:
+    case AV_CODEC_ID_PCM_S64LE:
+        return 64;
+    default:
+        return 0;
+    }
+}
+
+enum AVCodecID av_get_pcm_codec(enum AVSampleFormat fmt, int be)
+{
+    static const enum AVCodecID map[][2] = {
+        [AV_SAMPLE_FMT_U8  ] = { AV_CODEC_ID_PCM_U8,    AV_CODEC_ID_PCM_U8    },
+        [AV_SAMPLE_FMT_S16 ] = { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S16BE },
+        [AV_SAMPLE_FMT_S32 ] = { AV_CODEC_ID_PCM_S32LE, AV_CODEC_ID_PCM_S32BE },
+        [AV_SAMPLE_FMT_FLT ] = { AV_CODEC_ID_PCM_F32LE, AV_CODEC_ID_PCM_F32BE },
+        [AV_SAMPLE_FMT_DBL ] = { AV_CODEC_ID_PCM_F64LE, AV_CODEC_ID_PCM_F64BE },
+        [AV_SAMPLE_FMT_U8P ] = { AV_CODEC_ID_PCM_U8,    AV_CODEC_ID_PCM_U8    },
+        [AV_SAMPLE_FMT_S16P] = { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S16BE },
+        [AV_SAMPLE_FMT_S32P] = { AV_CODEC_ID_PCM_S32LE, AV_CODEC_ID_PCM_S32BE },
+        [AV_SAMPLE_FMT_S64P] = { AV_CODEC_ID_PCM_S64LE, AV_CODEC_ID_PCM_S64BE },
+        [AV_SAMPLE_FMT_FLTP] = { AV_CODEC_ID_PCM_F32LE, AV_CODEC_ID_PCM_F32BE },
+        [AV_SAMPLE_FMT_DBLP] = { AV_CODEC_ID_PCM_F64LE, AV_CODEC_ID_PCM_F64BE },
+    };
+    if (fmt < 0 || fmt >= FF_ARRAY_ELEMS(map))
+        return AV_CODEC_ID_NONE;
+    if (be < 0 || be > 1)
+        be = AV_NE(1, 0);
+    return map[fmt][be];
+}
+
+int av_get_bits_per_sample(enum AVCodecID codec_id)
+{
+    switch (codec_id) {
+    case AV_CODEC_ID_DFPWM:
+        return 1;
+    case AV_CODEC_ID_ADPCM_SBPRO_2:
+        return 2;
+    case AV_CODEC_ID_ADPCM_SBPRO_3:
+        return 3;
+    case AV_CODEC_ID_ADPCM_SBPRO_4:
+    case AV_CODEC_ID_ADPCM_IMA_WAV:
+    case AV_CODEC_ID_ADPCM_IMA_QT:
+    case AV_CODEC_ID_ADPCM_SWF:
+    case AV_CODEC_ID_ADPCM_MS:
+        return 4;
+    default:
+        return av_get_exact_bits_per_sample(codec_id);
+    }
+}
+
+static int get_audio_frame_duration(enum AVCodecID id, int sr, int ch, int ba,
+                                    uint32_t tag, int bits_per_coded_sample, int64_t bitrate,
+                                    uint8_t * extradata, int frame_size, int frame_bytes)
+{
+    int bps = av_get_exact_bits_per_sample(id);
+    int framecount = (ba > 0 && frame_bytes / ba > 0) ? frame_bytes / ba : 1;
+
+    /* codecs with an exact constant bits per sample */
+    if (bps > 0 && ch > 0 && frame_bytes > 0 && ch < 32768 && bps < 32768)
+        return (frame_bytes * 8LL) / (bps * ch);
+    bps = bits_per_coded_sample;
+
+    /* codecs with a fixed packet duration */
+    switch (id) {
+    case AV_CODEC_ID_ADPCM_ADX:    return   32;
+    case AV_CODEC_ID_ADPCM_IMA_QT: return   64;
+    case AV_CODEC_ID_ADPCM_EA_XAS: return  128;
+    case AV_CODEC_ID_AMR_NB:
+    case AV_CODEC_ID_EVRC:
+    case AV_CODEC_ID_GSM:
+    case AV_CODEC_ID_QCELP:
+    case AV_CODEC_ID_RA_288:       return  160;
+    case AV_CODEC_ID_AMR_WB:
+    case AV_CODEC_ID_GSM_MS:       return  320;
+    case AV_CODEC_ID_MP1:          return  384;
+    case AV_CODEC_ID_ATRAC1:       return  512;
+    case AV_CODEC_ID_ATRAC9:
+    case AV_CODEC_ID_ATRAC3:
+        if (framecount > INT_MAX/1024)
+            return 0;
+        return 1024 * framecount;
+    case AV_CODEC_ID_ATRAC3P:      return 2048;
+    case AV_CODEC_ID_MP2:
+    case AV_CODEC_ID_MUSEPACK7:    return 1152;
+    case AV_CODEC_ID_AC3:          return 1536;
+    case AV_CODEC_ID_FTR:          return 1024;
+    }
+
+    if (sr > 0) {
+        /* calc from sample rate */
+        if (id == AV_CODEC_ID_TTA)
+            return 256 * sr / 245;
+        else if (id == AV_CODEC_ID_DST)
+            return 588 * sr / 44100;
+        else if (id == AV_CODEC_ID_BINKAUDIO_DCT) {
+            if (sr / 22050 > 22)
+                return 0;
+            return (480 << (sr / 22050));
+        }
+
+        if (id == AV_CODEC_ID_MP3)
+            return sr <= 24000 ? 576 : 1152;
+    }
+
+    if (ba > 0) {
+        /* calc from block_align */
+        if (id == AV_CODEC_ID_SIPR) {
+            switch (ba) {
+            case 20: return 160;
+            case 19: return 144;
+            case 29: return 288;
+            case 37: return 480;
+            }
+        } else if (id == AV_CODEC_ID_ILBC) {
+            switch (ba) {
+            case 38: return 160;
+            case 50: return 240;
+            }
+        }
+    }
+
+    if (frame_bytes > 0) {
+        /* calc from frame_bytes only */
+        if (id == AV_CODEC_ID_TRUESPEECH)
+            return 240 * (frame_bytes / 32);
+        if (id == AV_CODEC_ID_NELLYMOSER)
+            return 256 * (frame_bytes / 64);
+        if (id == AV_CODEC_ID_RA_144)
+            return 160 * (frame_bytes / 20);
+        if (id == AV_CODEC_ID_APTX)
+            return 4 * (frame_bytes / 4);
+        if (id == AV_CODEC_ID_APTX_HD)
+            return 4 * (frame_bytes / 6);
+
+        if (bps > 0) {
+            /* calc from frame_bytes and bits_per_coded_sample */
+            if (id == AV_CODEC_ID_ADPCM_G726 || id == AV_CODEC_ID_ADPCM_G726LE)
+                return frame_bytes * 8 / bps;
+        }
+
+        if (ch > 0 && ch < INT_MAX/16) {
+            /* calc from frame_bytes and channels */
+            switch (id) {
+            case AV_CODEC_ID_FASTAUDIO:
+                return frame_bytes / (40 * ch) * 256;
+            case AV_CODEC_ID_ADPCM_IMA_MOFLEX:
+                return (frame_bytes - 4 * ch) / (128 * ch) * 256;
+            case AV_CODEC_ID_ADPCM_AFC:
+                return frame_bytes / (9 * ch) * 16;
+            case AV_CODEC_ID_ADPCM_PSX:
+            case AV_CODEC_ID_ADPCM_DTK:
+                frame_bytes /= 16 * ch;
+                if (frame_bytes > INT_MAX / 28)
+                    return 0;
+                return frame_bytes * 28;
+            case AV_CODEC_ID_ADPCM_4XM:
+            case AV_CODEC_ID_ADPCM_IMA_ACORN:
+            case AV_CODEC_ID_ADPCM_IMA_DAT4:
+            case AV_CODEC_ID_ADPCM_IMA_ISS:
+                return (frame_bytes - 4 * ch) * 2 / ch;
+            case AV_CODEC_ID_ADPCM_IMA_SMJPEG:
+                return (frame_bytes - 4) * 2 / ch;
+            case AV_CODEC_ID_ADPCM_IMA_AMV:
+                return (frame_bytes - 8) * 2;
+            case AV_CODEC_ID_ADPCM_THP:
+            case AV_CODEC_ID_ADPCM_THP_LE:
+                if (extradata)
+                    return frame_bytes * 14LL / (8 * ch);
+                break;
+            case AV_CODEC_ID_ADPCM_XA:
+                return (frame_bytes / 128) * 224 / ch;
+            case AV_CODEC_ID_INTERPLAY_DPCM:
+                return (frame_bytes - 6 - ch) / ch;
+            case AV_CODEC_ID_ROQ_DPCM:
+                return (frame_bytes - 8) / ch;
+            case AV_CODEC_ID_XAN_DPCM:
+                return (frame_bytes - 2 * ch) / ch;
+            case AV_CODEC_ID_MACE3:
+                return 3 * frame_bytes / ch;
+            case AV_CODEC_ID_MACE6:
+                return 6 * frame_bytes / ch;
+            case AV_CODEC_ID_PCM_LXF:
+                return 2 * (frame_bytes / (5 * ch));
+            case AV_CODEC_ID_IAC:
+            case AV_CODEC_ID_IMC:
+                return 4 * frame_bytes / ch;
+            }
+
+            if (tag) {
+                /* calc from frame_bytes, channels, and codec_tag */
+                if (id == AV_CODEC_ID_SOL_DPCM) {
+                    if (tag == 3)
+                        return frame_bytes / ch;
+                    else
+                        return frame_bytes * 2 / ch;
+                }
+            }
+
+            if (ba > 0) {
+                /* calc from frame_bytes, channels, and block_align */
+                int blocks = frame_bytes / ba;
+                int64_t tmp = 0;
+                switch (id) {
+                case AV_CODEC_ID_ADPCM_IMA_WAV:
+                    if (bps < 2 || bps > 5)
+                        return 0;
+                    tmp = blocks * (1LL + (ba - 4 * ch) / (bps * ch) * 8);
+                    break;
+                case AV_CODEC_ID_ADPCM_IMA_DK3:
+                    tmp = blocks * (((ba - 16LL) * 2 / 3 * 4) / ch);
+                    break;
+                case AV_CODEC_ID_ADPCM_IMA_DK4:
+                    tmp = blocks * (1 + (ba - 4LL * ch) * 2 / ch);
+                    break;
+                case AV_CODEC_ID_ADPCM_IMA_RAD:
+                    tmp = blocks * ((ba - 4LL * ch) * 2 / ch);
+                    break;
+                case AV_CODEC_ID_ADPCM_MS:
+                    tmp = blocks * (2 + (ba - 7LL * ch) * 2LL / ch);
+                    break;
+                case AV_CODEC_ID_ADPCM_MTAF:
+                    tmp = blocks * (ba - 16LL) * 2 / ch;
+                    break;
+                case AV_CODEC_ID_ADPCM_XMD:
+                    tmp = blocks * 32;
+                    break;
+                }
+                if (tmp) {
+                    if (tmp != (int)tmp)
+                        return 0;
+                    return tmp;
+                }
+            }
+
+            if (bps > 0) {
+                /* calc from frame_bytes, channels, and bits_per_coded_sample */
+                switch (id) {
+                case AV_CODEC_ID_PCM_DVD:
+                    if(bps<4 || frame_bytes<3)
+                        return 0;
+                    return 2 * ((frame_bytes - 3) / ((bps * 2 / 8) * ch));
+                case AV_CODEC_ID_PCM_BLURAY:
+                    if(bps<4 || frame_bytes<4)
+                        return 0;
+                    return (frame_bytes - 4) / ((FFALIGN(ch, 2) * bps) / 8);
+                case AV_CODEC_ID_S302M:
+                    return 2 * (frame_bytes / ((bps + 4) / 4)) / ch;
+                }
+            }
+        }
+    }
+
+    /* Fall back on using frame_size */
+    if (frame_size > 1 && frame_bytes)
+        return frame_size;
+
+    //For WMA we currently have no other means to calculate duration thus we
+    //do it here by assuming CBR, which is true for all known cases.
+    if (bitrate > 0 && frame_bytes > 0 && sr > 0 && ba > 1) {
+        if (id == AV_CODEC_ID_WMAV1 || id == AV_CODEC_ID_WMAV2)
+            return  (frame_bytes * 8LL * sr) / bitrate;
+    }
+
+    return 0;
+}
+
+int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
+{
+   int channels = avctx->ch_layout.nb_channels;
+   int duration;
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (!channels)
+        channels = avctx->channels;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    duration = get_audio_frame_duration(avctx->codec_id, avctx->sample_rate,
+                                    channels, avctx->block_align,
+                                    avctx->codec_tag, avctx->bits_per_coded_sample,
+                                    avctx->bit_rate, avctx->extradata, avctx->frame_size,
+                                    frame_bytes);
+    return FFMAX(0, duration);
+}
+
+int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes)
+{
+   int channels = par->ch_layout.nb_channels;
+   int duration;
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (!channels)
+        channels = par->channels;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    duration = get_audio_frame_duration(par->codec_id, par->sample_rate,
+                                    channels, par->block_align,
+                                    par->codec_tag, par->bits_per_coded_sample,
+                                    par->bit_rate, par->extradata, par->frame_size,
+                                    frame_bytes);
+    return FFMAX(0, duration);
+}
+
+#if !HAVE_THREADS
+int ff_thread_init(AVCodecContext *s)
+{
+    return -1;
+}
+
+#endif
+
+unsigned int av_xiphlacing(unsigned char *s, unsigned int v)
+{
+    unsigned int n = 0;
+
+    while (v >= 0xff) {
+        *s++ = 0xff;
+        v -= 0xff;
+        n++;
+    }
+    *s = v;
+    n++;
+    return n;
+}
+
+int ff_match_2uint16(const uint16_t(*tab)[2], int size, int a, int b)
+{
+    int i;
+    for (i = 0; i < size && !(tab[i][0] == a && tab[i][1] == b); i++) ;
+    return i;
+}
+
+const AVCodecHWConfig *avcodec_get_hw_config(const AVCodec *avcodec, int index)
+{
+    const FFCodec *const codec = ffcodec(avcodec);
+    int i;
+    if (!codec->hw_configs || index < 0)
+        return NULL;
+    for (i = 0; i <= index; i++)
+        if (!codec->hw_configs[i])
+            return NULL;
+    return &codec->hw_configs[index]->public;
+}
+
+int ff_thread_ref_frame(ThreadFrame *dst, const ThreadFrame *src)
+{
+    int ret;
+
+    dst->owner[0] = src->owner[0];
+    dst->owner[1] = src->owner[1];
+
+    ret = av_frame_ref(dst->f, src->f);
+    if (ret < 0)
+        return ret;
+
+    av_assert0(!dst->progress);
+
+    if (src->progress &&
+        !(dst->progress = av_buffer_ref(src->progress))) {
+        ff_thread_release_ext_buffer(dst->owner[0], dst);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+#if !HAVE_THREADS
+
+int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f, int flags)
+{
+    return ff_get_buffer(avctx, f, flags);
+}
+
+int ff_thread_get_ext_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags)
+{
+    f->owner[0] = f->owner[1] = avctx;
+    return ff_get_buffer(avctx, f->f, flags);
+}
+
+void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f)
+{
+    if (f)
+        av_frame_unref(f);
+}
+
+void ff_thread_release_ext_buffer(AVCodecContext *avctx, ThreadFrame *f)
+{
+    f->owner[0] = f->owner[1] = NULL;
+    if (f->f)
+        av_frame_unref(f->f);
+}
+
+void ff_thread_finish_setup(AVCodecContext *avctx)
+{
+}
+
+void ff_thread_report_progress(ThreadFrame *f, int progress, int field)
+{
+}
+
+void ff_thread_await_progress(const ThreadFrame *f, int progress, int field)
+{
+}
+
+int ff_thread_can_start_frame(AVCodecContext *avctx)
+{
+    return 1;
+}
+
+int ff_slice_thread_init_progress(AVCodecContext *avctx)
+{
+    return 0;
+}
+
+int ff_slice_thread_allocz_entries(AVCodecContext *avctx, int count)
+{
+    return 0;
+}
+
+void ff_thread_await_progress2(AVCodecContext *avctx, int field, int thread, int shift)
+{
+}
+
+void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, int n)
+{
+}
+
+#endif
+
+const uint8_t *avpriv_find_start_code(const uint8_t *av_restrict p,
+                                      const uint8_t *end,
+                                      uint32_t *av_restrict state)
+{
+    int i;
+
+    av_assert0(p <= end);
+    if (p >= end)
+        return end;
+
+    for (i = 0; i < 3; i++) {
+        uint32_t tmp = *state << 8;
+        *state = tmp + *(p++);
+        if (tmp == 0x100 || p == end)
+            return p;
+    }
+
+    while (p < end) {
+        if      (p[-1] > 1      ) p += 3;
+        else if (p[-2]          ) p += 2;
+        else if (p[-3]|(p[-1]-1)) p++;
+        else {
+            p++;
+            break;
+        }
+    }
+
+    p = FFMIN(p, end) - 4;
+    *state = AV_RB32(p);
+
+    return p + 4;
+}
+
+AVCPBProperties *av_cpb_properties_alloc(size_t *size)
+{
+    AVCPBProperties *props = av_mallocz(sizeof(AVCPBProperties));
+    if (!props)
+        return NULL;
+
+    if (size)
+        *size = sizeof(*props);
+
+    props->vbv_delay = UINT64_MAX;
+
+    return props;
+}
+
+AVCPBProperties *ff_add_cpb_side_data(AVCodecContext *avctx)
+{
+    AVPacketSideData *tmp;
+    AVCPBProperties  *props;
+    size_t size;
+    int i;
+
+    for (i = 0; i < avctx->nb_coded_side_data; i++)
+        if (avctx->coded_side_data[i].type == AV_PKT_DATA_CPB_PROPERTIES)
+            return (AVCPBProperties *)avctx->coded_side_data[i].data;
+
+    props = av_cpb_properties_alloc(&size);
+    if (!props)
+        return NULL;
+
+    tmp = av_realloc_array(avctx->coded_side_data, avctx->nb_coded_side_data + 1, sizeof(*tmp));
+    if (!tmp) {
+        av_freep(&props);
+        return NULL;
+    }
+
+    avctx->coded_side_data = tmp;
+    avctx->nb_coded_side_data++;
+
+    avctx->coded_side_data[avctx->nb_coded_side_data - 1].type = AV_PKT_DATA_CPB_PROPERTIES;
+    avctx->coded_side_data[avctx->nb_coded_side_data - 1].data = (uint8_t*)props;
+    avctx->coded_side_data[avctx->nb_coded_side_data - 1].size = size;
+
+    return props;
+}
+
+static unsigned bcd2uint(uint8_t bcd)
+{
+    unsigned low  = bcd & 0xf;
+    unsigned high = bcd >> 4;
+    if (low > 9 || high > 9)
+        return 0;
+    return low + 10*high;
+}
+
+int ff_alloc_timecode_sei(const AVFrame *frame, AVRational rate, size_t prefix_len,
+                     void **data, size_t *sei_size)
+{
+    AVFrameSideData *sd = NULL;
+    uint8_t *sei_data;
+    PutBitContext pb;
+    uint32_t *tc;
+    int m;
+
+    if (frame)
+        sd = av_frame_get_side_data(frame, AV_FRAME_DATA_S12M_TIMECODE);
+
+    if (!sd) {
+        *data = NULL;
+        return 0;
+    }
+    tc =  (uint32_t*)sd->data;
+    m  = tc[0] & 3;
+
+    *sei_size = sizeof(uint32_t) * 4;
+    *data = av_mallocz(*sei_size + prefix_len);
+    if (!*data)
+        return AVERROR(ENOMEM);
+    sei_data = (uint8_t*)*data + prefix_len;
+
+    init_put_bits(&pb, sei_data, *sei_size);
+    put_bits(&pb, 2, m); // num_clock_ts
+
+    for (int j = 1; j <= m; j++) {
+        uint32_t tcsmpte = tc[j];
+        unsigned hh   = bcd2uint(tcsmpte     & 0x3f);    // 6-bit hours
+        unsigned mm   = bcd2uint(tcsmpte>>8  & 0x7f);    // 7-bit minutes
+        unsigned ss   = bcd2uint(tcsmpte>>16 & 0x7f);    // 7-bit seconds
+        unsigned ff   = bcd2uint(tcsmpte>>24 & 0x3f);    // 6-bit frames
+        unsigned drop = tcsmpte & 1<<30 && !0;  // 1-bit drop if not arbitrary bit
+
+        /* Calculate frame number of HEVC by SMPTE ST 12-1:2014 Sec 12.2 if rate > 30FPS */
+        if (av_cmp_q(rate, (AVRational) {30, 1}) == 1) {
+            unsigned pc;
+            ff *= 2;
+            if (av_cmp_q(rate, (AVRational) {50, 1}) == 0)
+                pc = !!(tcsmpte & 1 << 7);
+            else
+                pc = !!(tcsmpte & 1 << 23);
+            ff = (ff + pc) & 0x7f;
+        }
+
+        put_bits(&pb, 1, 1); // clock_timestamp_flag
+        put_bits(&pb, 1, 1); // units_field_based_flag
+        put_bits(&pb, 5, 0); // counting_type
+        put_bits(&pb, 1, 1); // full_timestamp_flag
+        put_bits(&pb, 1, 0); // discontinuity_flag
+        put_bits(&pb, 1, drop);
+        put_bits(&pb, 9, ff);
+        put_bits(&pb, 6, ss);
+        put_bits(&pb, 6, mm);
+        put_bits(&pb, 5, hh);
+        put_bits(&pb, 5, 0);
+    }
+    flush_put_bits(&pb);
+
+    return 0;
+}
+
+int64_t ff_guess_coded_bitrate(AVCodecContext *avctx)
+{
+    AVRational framerate = avctx->framerate;
+    int bits_per_coded_sample = avctx->bits_per_coded_sample;
+    int64_t bitrate;
+
+    if (!(framerate.num && framerate.den))
+        framerate = av_inv_q(avctx->time_base);
+    if (!(framerate.num && framerate.den))
+        return 0;
+
+    if (!bits_per_coded_sample) {
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
+        bits_per_coded_sample = av_get_bits_per_pixel(desc);
+    }
+    bitrate = (int64_t)bits_per_coded_sample * avctx->width * avctx->height *
+              framerate.num / framerate.den;
+
+    return bitrate;
+}
+
+int ff_int_from_list_or_default(void *ctx, const char * val_name, int val,
+                                const int * array_valid_values, int default_value)
+{
+    int i = 0, ref_val;
+
+    while (1) {
+        ref_val = array_valid_values[i];
+        if (ref_val == INT_MAX)
+            break;
+        if (val == ref_val)
+            return val;
+        i++;
+    }
+    /* val is not a valid value */
+    av_log(ctx, AV_LOG_DEBUG,
+           "%s %d are not supported. Set to default value : %d\n", val_name, val, default_value);
+    return default_value;
+}
diff --git a/media/ffvpx/libavcodec/vaapi.h b/media/ffvpx/libavcodec/vaapi.h
new file mode 100644
index 0000000000..2cf7da5889
--- /dev/null
+++ b/media/ffvpx/libavcodec/vaapi.h
@@ -0,0 +1,86 @@
+/*
+ * Video Acceleration API (shared data between FFmpeg and the video player)
+ * HW decode acceleration for MPEG-2, MPEG-4, H.264 and VC-1
+ *
+ * Copyright (C) 2008-2009 Splitted-Desktop Systems
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VAAPI_H
+#define AVCODEC_VAAPI_H
+
+/**
+ * @file
+ * @ingroup lavc_codec_hwaccel_vaapi
+ * Public libavcodec VA API header.
+ */
+
+#include <stdint.h>
+#include "libavutil/attributes.h"
+#include "version.h"
+
+#if FF_API_STRUCT_VAAPI_CONTEXT
+
+/**
+ * @defgroup lavc_codec_hwaccel_vaapi VA API Decoding
+ * @ingroup lavc_codec_hwaccel
+ * @{
+ */
+
+/**
+ * This structure is used to share data between the FFmpeg library and
+ * the client video application.
+ * This shall be zero-allocated and available as
+ * AVCodecContext.hwaccel_context. All user members can be set once
+ * during initialization or through each AVCodecContext.get_buffer()
+ * function call. In any case, they must be valid prior to calling
+ * decoding functions.
+ *
+ * Deprecated: use AVCodecContext.hw_frames_ctx instead.
+ */
+struct attribute_deprecated vaapi_context {
+    /**
+     * Window system dependent data
+     *
+     * - encoding: unused
+     * - decoding: Set by user
+     */
+    void *display;
+
+    /**
+     * Configuration ID
+     *
+     * - encoding: unused
+     * - decoding: Set by user
+     */
+    uint32_t config_id;
+
+    /**
+     * Context ID (video decode pipeline)
+     *
+     * - encoding: unused
+     * - decoding: Set by user
+     */
+    uint32_t context_id;
+};
+
+/* @} */
+
+#endif /* FF_API_STRUCT_VAAPI_CONTEXT */
+
+#endif /* AVCODEC_VAAPI_H */
diff --git a/media/ffvpx/libavcodec/vaapi_av1.c b/media/ffvpx/libavcodec/vaapi_av1.c
new file mode 100644
index 0000000000..d0339b2705
--- /dev/null
+++ b/media/ffvpx/libavcodec/vaapi_av1.c
@@ -0,0 +1,451 @@
+/*
+ * AV1 HW decode acceleration through VA API
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/frame.h"
+#include "libavutil/pixdesc.h"
+#include "hwconfig.h"
+#include "vaapi_decode.h"
+#include "internal.h"
+#include "av1dec.h"
+#include "thread.h"
+
+typedef struct VAAPIAV1FrameRef {
+    AVFrame *frame;
+    int valid;
+} VAAPIAV1FrameRef;
+
+typedef struct VAAPIAV1DecContext {
+    VAAPIDecodeContext base;
+
+    /**
+     * For film grain case, VAAPI generate 2 output for each frame,
+     * current_frame will not apply film grain, and will be used for
+     * references for next frames. Maintain the reference list without
+     * applying film grain here. And current_display_picture will be
+     * used to apply film grain and push to downstream.
+    */
+    VAAPIAV1FrameRef ref_tab[AV1_NUM_REF_FRAMES];
+    AVFrame *tmp_frame;
+} VAAPIAV1DecContext;
+
+static VASurfaceID vaapi_av1_surface_id(AV1Frame *vf)
+{
+    if (vf)
+        return ff_vaapi_get_surface_id(vf->f);
+    else
+        return VA_INVALID_SURFACE;
+}
+
+static int8_t vaapi_av1_get_bit_depth_idx(AVCodecContext *avctx)
+{
+    AV1DecContext *s = avctx->priv_data;
+    const AV1RawSequenceHeader *seq = s->raw_seq;
+    int8_t bit_depth = 8;
+
+    if (seq->seq_profile == 2 && seq->color_config.high_bitdepth)
+        bit_depth = seq->color_config.twelve_bit ? 12 : 10;
+    else if (seq->seq_profile <= 2)
+        bit_depth = seq->color_config.high_bitdepth ? 10 : 8;
+    else {
+        av_log(avctx, AV_LOG_ERROR,
+               "Couldn't get bit depth from profile:%d.\n", seq->seq_profile);
+        return -1;
+    }
+    return bit_depth == 8 ? 0 : bit_depth == 10 ? 1 : 2;
+}
+
+static int vaapi_av1_decode_init(AVCodecContext *avctx)
+{
+    VAAPIAV1DecContext *ctx = avctx->internal->hwaccel_priv_data;
+
+    ctx->tmp_frame = av_frame_alloc();
+    if (!ctx->tmp_frame) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to allocate frame.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(ctx->ref_tab); i++) {
+        ctx->ref_tab[i].frame = av_frame_alloc();
+        if (!ctx->ref_tab[i].frame) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Failed to allocate reference table frame %d.\n", i);
+            return AVERROR(ENOMEM);
+        }
+        ctx->ref_tab[i].valid = 0;
+    }
+
+    return ff_vaapi_decode_init(avctx);
+}
+
+static int vaapi_av1_decode_uninit(AVCodecContext *avctx)
+{
+    VAAPIAV1DecContext *ctx = avctx->internal->hwaccel_priv_data;
+
+    if (ctx->tmp_frame->buf[0])
+        ff_thread_release_buffer(avctx, ctx->tmp_frame);
+    av_frame_free(&ctx->tmp_frame);
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(ctx->ref_tab); i++) {
+        if (ctx->ref_tab[i].frame->buf[0])
+            ff_thread_release_buffer(avctx, ctx->ref_tab[i].frame);
+        av_frame_free(&ctx->ref_tab[i].frame);
+    }
+
+    return ff_vaapi_decode_uninit(avctx);
+}
+
+
+static int vaapi_av1_start_frame(AVCodecContext *avctx,
+                                 av_unused const uint8_t *buffer,
+                                 av_unused uint32_t size)
+{
+    AV1DecContext *s = avctx->priv_data;
+    const AV1RawSequenceHeader *seq = s->raw_seq;
+    const AV1RawFrameHeader *frame_header = s->raw_frame_header;
+    const AV1RawFilmGrainParams *film_grain = &s->cur_frame.film_grain;
+    VAAPIDecodePicture *pic = s->cur_frame.hwaccel_picture_private;
+    VAAPIAV1DecContext *ctx = avctx->internal->hwaccel_priv_data;
+    VADecPictureParameterBufferAV1 pic_param;
+    int8_t bit_depth_idx;
+    int err = 0;
+    int apply_grain = !(avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) && film_grain->apply_grain;
+    uint8_t remap_lr_type[4] = {AV1_RESTORE_NONE, AV1_RESTORE_SWITCHABLE, AV1_RESTORE_WIENER, AV1_RESTORE_SGRPROJ};
+    uint8_t segmentation_feature_signed[AV1_SEG_LVL_MAX] = {1, 1, 1, 1, 1, 0, 0, 0};
+    uint8_t segmentation_feature_max[AV1_SEG_LVL_MAX] = {255, AV1_MAX_LOOP_FILTER,
+        AV1_MAX_LOOP_FILTER, AV1_MAX_LOOP_FILTER, AV1_MAX_LOOP_FILTER, 7 , 0 , 0 };
+
+    bit_depth_idx = vaapi_av1_get_bit_depth_idx(avctx);
+    if (bit_depth_idx < 0)
+        goto fail;
+
+    if (apply_grain) {
+        if (ctx->tmp_frame->buf[0])
+            ff_thread_release_buffer(avctx, ctx->tmp_frame);
+        err = ff_thread_get_buffer(avctx, ctx->tmp_frame, AV_GET_BUFFER_FLAG_REF);
+        if (err < 0)
+            goto fail;
+        pic->output_surface = ff_vaapi_get_surface_id(ctx->tmp_frame);
+    } else {
+        pic->output_surface = vaapi_av1_surface_id(&s->cur_frame);
+    }
+
+    memset(&pic_param, 0, sizeof(VADecPictureParameterBufferAV1));
+    pic_param = (VADecPictureParameterBufferAV1) {
+        .profile                    = seq->seq_profile,
+        .order_hint_bits_minus_1    = seq->order_hint_bits_minus_1,
+        .bit_depth_idx              = bit_depth_idx,
+        .matrix_coefficients        = seq->color_config.matrix_coefficients,
+        .current_frame              = pic->output_surface,
+        .current_display_picture    = vaapi_av1_surface_id(&s->cur_frame),
+        .frame_width_minus1         = frame_header->frame_width_minus_1,
+        .frame_height_minus1        = frame_header->frame_height_minus_1,
+        .primary_ref_frame          = frame_header->primary_ref_frame,
+        .order_hint                 = frame_header->order_hint,
+        .tile_cols                  = frame_header->tile_cols,
+        .tile_rows                  = frame_header->tile_rows,
+        .context_update_tile_id     = frame_header->context_update_tile_id,
+        .superres_scale_denominator = frame_header->use_superres ?
+                                        frame_header->coded_denom + AV1_SUPERRES_DENOM_MIN :
+                                        AV1_SUPERRES_NUM,
+        .interp_filter              = frame_header->interpolation_filter,
+        .filter_level[0]            = frame_header->loop_filter_level[0],
+        .filter_level[1]            = frame_header->loop_filter_level[1],
+        .filter_level_u             = frame_header->loop_filter_level[2],
+        .filter_level_v             = frame_header->loop_filter_level[3],
+        .base_qindex                = frame_header->base_q_idx,
+        .y_dc_delta_q               = frame_header->delta_q_y_dc,
+        .u_dc_delta_q               = frame_header->delta_q_u_dc,
+        .u_ac_delta_q               = frame_header->delta_q_u_ac,
+        .v_dc_delta_q               = frame_header->delta_q_v_dc,
+        .v_ac_delta_q               = frame_header->delta_q_v_ac,
+        .cdef_damping_minus_3       = frame_header->cdef_damping_minus_3,
+        .cdef_bits                  = frame_header->cdef_bits,
+        .seq_info_fields.fields = {
+            .still_picture              = seq->still_picture,
+            .use_128x128_superblock     = seq->use_128x128_superblock,
+            .enable_filter_intra        = seq->enable_filter_intra,
+            .enable_intra_edge_filter   = seq->enable_intra_edge_filter,
+            .enable_interintra_compound = seq->enable_interintra_compound,
+            .enable_masked_compound     = seq->enable_masked_compound,
+            .enable_dual_filter         = seq->enable_dual_filter,
+            .enable_order_hint          = seq->enable_order_hint,
+            .enable_jnt_comp            = seq->enable_jnt_comp,
+            .enable_cdef                = seq->enable_cdef,
+            .mono_chrome                = seq->color_config.mono_chrome,
+            .color_range                = seq->color_config.color_range,
+            .subsampling_x              = seq->color_config.subsampling_x,
+            .subsampling_y              = seq->color_config.subsampling_y,
+            .chroma_sample_position     = seq->color_config.chroma_sample_position,
+            .film_grain_params_present  = seq->film_grain_params_present &&
+                                          !(avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN),
+        },
+        .seg_info.segment_info_fields.bits = {
+            .enabled         = frame_header->segmentation_enabled,
+            .update_map      = frame_header->segmentation_update_map,
+            .temporal_update = frame_header->segmentation_temporal_update,
+            .update_data     = frame_header->segmentation_update_data,
+        },
+        .film_grain_info = {
+            .film_grain_info_fields.bits = {
+                .apply_grain              = apply_grain,
+                .chroma_scaling_from_luma = film_grain->chroma_scaling_from_luma,
+                .grain_scaling_minus_8    = film_grain->grain_scaling_minus_8,
+                .ar_coeff_lag             = film_grain->ar_coeff_lag,
+                .ar_coeff_shift_minus_6   = film_grain->ar_coeff_shift_minus_6,
+                .grain_scale_shift        = film_grain->grain_scale_shift,
+                .overlap_flag             = film_grain->overlap_flag,
+                .clip_to_restricted_range = film_grain->clip_to_restricted_range,
+            },
+            .grain_seed    = film_grain->grain_seed,
+            .num_y_points  = film_grain->num_y_points,
+            .num_cb_points = film_grain->num_cb_points,
+            .num_cr_points = film_grain->num_cr_points,
+            .cb_mult       = film_grain->cb_mult,
+            .cb_luma_mult  = film_grain->cb_luma_mult,
+            .cb_offset     = film_grain->cb_offset,
+            .cr_mult       = film_grain->cr_mult,
+            .cr_luma_mult  = film_grain->cr_luma_mult,
+            .cr_offset     = film_grain->cr_offset,
+        },
+        .pic_info_fields.bits = {
+            .frame_type                   = frame_header->frame_type,
+            .show_frame                   = frame_header->show_frame,
+            .showable_frame               = frame_header->showable_frame,
+            .error_resilient_mode         = frame_header->error_resilient_mode,
+            .disable_cdf_update           = frame_header->disable_cdf_update,
+            .allow_screen_content_tools   = frame_header->allow_screen_content_tools,
+            .force_integer_mv             = frame_header->force_integer_mv,
+            .allow_intrabc                = frame_header->allow_intrabc,
+            .use_superres                 = frame_header->use_superres,
+            .allow_high_precision_mv      = frame_header->allow_high_precision_mv,
+            .is_motion_mode_switchable    = frame_header->is_motion_mode_switchable,
+            .use_ref_frame_mvs            = frame_header->use_ref_frame_mvs,
+            .disable_frame_end_update_cdf = frame_header->disable_frame_end_update_cdf,
+            .uniform_tile_spacing_flag    = frame_header->uniform_tile_spacing_flag,
+            .allow_warped_motion          = frame_header->allow_warped_motion,
+        },
+        .loop_filter_info_fields.bits = {
+            .sharpness_level        = frame_header->loop_filter_sharpness,
+            .mode_ref_delta_enabled = frame_header->loop_filter_delta_enabled,
+            .mode_ref_delta_update  = frame_header->loop_filter_delta_update,
+        },
+        .mode_control_fields.bits = {
+            .delta_q_present_flag  = frame_header->delta_q_present,
+            .log2_delta_q_res      = frame_header->delta_q_res,
+            .delta_lf_present_flag = frame_header->delta_lf_present,
+            .log2_delta_lf_res     = frame_header->delta_lf_res,
+            .delta_lf_multi        = frame_header->delta_lf_multi,
+            .tx_mode               = frame_header->tx_mode,
+            .reference_select      = frame_header->reference_select,
+            .reduced_tx_set_used   = frame_header->reduced_tx_set,
+            .skip_mode_present     = frame_header->skip_mode_present,
+        },
+        .loop_restoration_fields.bits = {
+            .yframe_restoration_type  = remap_lr_type[frame_header->lr_type[0]],
+            .cbframe_restoration_type = remap_lr_type[frame_header->lr_type[1]],
+            .crframe_restoration_type = remap_lr_type[frame_header->lr_type[2]],
+            .lr_unit_shift            = frame_header->lr_unit_shift,
+            .lr_uv_shift              = frame_header->lr_uv_shift,
+        },
+        .qmatrix_fields.bits = {
+            .using_qmatrix = frame_header->using_qmatrix,
+            .qm_y          = frame_header->qm_y,
+            .qm_u          = frame_header->qm_u,
+            .qm_v          = frame_header->qm_v,
+        }
+    };
+
+    for (int i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+        if (pic_param.pic_info_fields.bits.frame_type == AV1_FRAME_KEY && frame_header->show_frame)
+            pic_param.ref_frame_map[i] = VA_INVALID_ID;
+        else
+            pic_param.ref_frame_map[i] = ctx->ref_tab[i].valid ?
+                                         ff_vaapi_get_surface_id(ctx->ref_tab[i].frame) :
+                                         vaapi_av1_surface_id(&s->ref[i]);
+    }
+    for (int i = 0; i < AV1_REFS_PER_FRAME; i++) {
+        pic_param.ref_frame_idx[i] = frame_header->ref_frame_idx[i];
+    }
+    for (int i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++) {
+        pic_param.ref_deltas[i] = frame_header->loop_filter_ref_deltas[i];
+    }
+    for (int i = 0; i < 2; i++) {
+        pic_param.mode_deltas[i] = frame_header->loop_filter_mode_deltas[i];
+    }
+    for (int i = 0; i < (1 << frame_header->cdef_bits); i++) {
+        pic_param.cdef_y_strengths[i] =
+            (frame_header->cdef_y_pri_strength[i] << 2) +
+                frame_header->cdef_y_sec_strength[i];
+        pic_param.cdef_uv_strengths[i] =
+            (frame_header->cdef_uv_pri_strength[i] << 2) +
+                frame_header->cdef_uv_sec_strength[i];
+    }
+    for (int i = 0; i < frame_header->tile_cols; i++) {
+        pic_param.width_in_sbs_minus_1[i] =
+            frame_header->width_in_sbs_minus_1[i];
+    }
+    for (int i = 0; i < frame_header->tile_rows; i++) {
+        pic_param.height_in_sbs_minus_1[i] =
+            frame_header->height_in_sbs_minus_1[i];
+    }
+    for (int i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
+        pic_param.wm[i - 1].invalid = s->cur_frame.gm_invalid[i];
+        pic_param.wm[i - 1].wmtype  = s->cur_frame.gm_type[i];
+        for (int j = 0; j < 6; j++)
+            pic_param.wm[i - 1].wmmat[j] = s->cur_frame.gm_params[i][j];
+    }
+    for (int i = 0; i < AV1_MAX_SEGMENTS; i++) {
+        for (int j = 0; j < AV1_SEG_LVL_MAX; j++) {
+            pic_param.seg_info.feature_mask[i] |= (frame_header->feature_enabled[i][j] << j);
+            if (segmentation_feature_signed[j])
+                pic_param.seg_info.feature_data[i][j] = av_clip(frame_header->feature_value[i][j],
+                    -segmentation_feature_max[j], segmentation_feature_max[j]);
+            else
+                pic_param.seg_info.feature_data[i][j] = av_clip(frame_header->feature_value[i][j],
+                    0, segmentation_feature_max[j]);
+        }
+    }
+    if (apply_grain) {
+        for (int i = 0; i < film_grain->num_y_points; i++) {
+            pic_param.film_grain_info.point_y_value[i] =
+                film_grain->point_y_value[i];
+            pic_param.film_grain_info.point_y_scaling[i] =
+                film_grain->point_y_scaling[i];
+        }
+        for (int i = 0; i < film_grain->num_cb_points; i++) {
+            pic_param.film_grain_info.point_cb_value[i] =
+                film_grain->point_cb_value[i];
+            pic_param.film_grain_info.point_cb_scaling[i] =
+                film_grain->point_cb_scaling[i];
+        }
+        for (int i = 0; i < film_grain->num_cr_points; i++) {
+            pic_param.film_grain_info.point_cr_value[i] =
+                film_grain->point_cr_value[i];
+            pic_param.film_grain_info.point_cr_scaling[i] =
+                film_grain->point_cr_scaling[i];
+        }
+        for (int i = 0; i < 24; i++) {
+            pic_param.film_grain_info.ar_coeffs_y[i] =
+                film_grain->ar_coeffs_y_plus_128[i] - 128;
+        }
+        for (int i = 0; i < 25; i++) {
+            pic_param.film_grain_info.ar_coeffs_cb[i] =
+                film_grain->ar_coeffs_cb_plus_128[i] - 128;
+            pic_param.film_grain_info.ar_coeffs_cr[i] =
+                film_grain->ar_coeffs_cr_plus_128[i] - 128;
+        }
+    }
+    err = ff_vaapi_decode_make_param_buffer(avctx, pic,
+                                            VAPictureParameterBufferType,
+                                            &pic_param, sizeof(pic_param));
+    if (err < 0)
+        goto fail;
+
+    return 0;
+
+fail:
+    ff_vaapi_decode_cancel(avctx, pic);
+    return err;
+}
+
+static int vaapi_av1_end_frame(AVCodecContext *avctx)
+{
+    const AV1DecContext *s = avctx->priv_data;
+    const AV1RawFrameHeader *header = s->raw_frame_header;
+    const AV1RawFilmGrainParams *film_grain = &s->cur_frame.film_grain;
+    VAAPIDecodePicture *pic = s->cur_frame.hwaccel_picture_private;
+    VAAPIAV1DecContext *ctx = avctx->internal->hwaccel_priv_data;
+
+    int apply_grain = !(avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) && film_grain->apply_grain;
+    int ret;
+    ret = ff_vaapi_decode_issue(avctx, pic);
+    if (ret < 0)
+        return ret;
+
+    for (int i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+        if (header->refresh_frame_flags & (1 << i)) {
+            if (ctx->ref_tab[i].frame->buf[0])
+                ff_thread_release_buffer(avctx, ctx->ref_tab[i].frame);
+
+            if (apply_grain) {
+                ret = av_frame_ref(ctx->ref_tab[i].frame, ctx->tmp_frame);
+                if (ret < 0)
+                    return ret;
+                ctx->ref_tab[i].valid = 1;
+            } else {
+                ctx->ref_tab[i].valid = 0;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int vaapi_av1_decode_slice(AVCodecContext *avctx,
+                                  const uint8_t *buffer,
+                                  uint32_t size)
+{
+    const AV1DecContext *s = avctx->priv_data;
+    VAAPIDecodePicture *pic = s->cur_frame.hwaccel_picture_private;
+    VASliceParameterBufferAV1 slice_param;
+    int err = 0;
+
+    for (int i = s->tg_start; i <= s->tg_end; i++) {
+        memset(&slice_param, 0, sizeof(VASliceParameterBufferAV1));
+
+        slice_param = (VASliceParameterBufferAV1) {
+            .slice_data_size   = s->tile_group_info[i].tile_size,
+            .slice_data_offset = s->tile_group_info[i].tile_offset,
+            .slice_data_flag   = VA_SLICE_DATA_FLAG_ALL,
+            .tile_row          = s->tile_group_info[i].tile_row,
+            .tile_column       = s->tile_group_info[i].tile_column,
+            .tg_start          = s->tg_start,
+            .tg_end            = s->tg_end,
+        };
+
+        err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &slice_param,
+                                                sizeof(VASliceParameterBufferAV1),
+                                                buffer,
+                                                size);
+        if (err) {
+            ff_vaapi_decode_cancel(avctx, pic);
+            return err;
+        }
+    }
+
+    return 0;
+}
+
+const AVHWAccel ff_av1_vaapi_hwaccel = {
+    .name                 = "av1_vaapi",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_AV1,
+    .pix_fmt              = AV_PIX_FMT_VAAPI,
+    .start_frame          = vaapi_av1_start_frame,
+    .end_frame            = vaapi_av1_end_frame,
+    .decode_slice         = vaapi_av1_decode_slice,
+    .frame_priv_data_size = sizeof(VAAPIDecodePicture),
+    .init                 = vaapi_av1_decode_init,
+    .uninit               = vaapi_av1_decode_uninit,
+    .frame_params         = ff_vaapi_common_frame_params,
+    .priv_data_size       = sizeof(VAAPIAV1DecContext),
+    .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/media/ffvpx/libavcodec/vaapi_decode.c b/media/ffvpx/libavcodec/vaapi_decode.c
new file mode 100644
index 0000000000..ab8c12e364
--- /dev/null
+++ b/media/ffvpx/libavcodec/vaapi_decode.c
@@ -0,0 +1,726 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config_components.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/pixdesc.h"
+
+#include "avcodec.h"
+#include "decode.h"
+#include "internal.h"
+#include "vaapi_decode.h"
+#include "vaapi_hevc.h"
+
+
+int ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx,
+                                      VAAPIDecodePicture *pic,
+                                      int type,
+                                      const void *data,
+                                      size_t size)
+{
+    VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    VAStatus vas;
+    VABufferID buffer;
+
+    av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS);
+
+    vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
+                         type, size, 1, (void*)data, &buffer);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create parameter "
+               "buffer (type %d): %d (%s).\n",
+               type, vas, vaErrorStr(vas));
+        return AVERROR(EIO);
+    }
+
+    pic->param_buffers[pic->nb_param_buffers++] = buffer;
+
+    av_log(avctx, AV_LOG_DEBUG, "Param buffer (type %d, %zu bytes) "
+           "is %#x.\n", type, size, buffer);
+    return 0;
+}
+
+
+int ff_vaapi_decode_make_slice_buffer(AVCodecContext *avctx,
+                                      VAAPIDecodePicture *pic,
+                                      const void *params_data,
+                                      size_t params_size,
+                                      const void *slice_data,
+                                      size_t slice_size)
+{
+    VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    VAStatus vas;
+    int index;
+
+    av_assert0(pic->nb_slices <= pic->slices_allocated);
+    if (pic->nb_slices == pic->slices_allocated) {
+        if (pic->slices_allocated > 0)
+            pic->slices_allocated *= 2;
+        else
+            pic->slices_allocated = 64;
+
+        pic->slice_buffers =
+            av_realloc_array(pic->slice_buffers,
+                             pic->slices_allocated,
+                             2 * sizeof(*pic->slice_buffers));
+        if (!pic->slice_buffers)
+            return AVERROR(ENOMEM);
+    }
+    av_assert0(pic->nb_slices + 1 <= pic->slices_allocated);
+
+    index = 2 * pic->nb_slices;
+
+    vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
+                         VASliceParameterBufferType,
+                         params_size, 1, (void*)params_data,
+                         &pic->slice_buffers[index]);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create slice "
+               "parameter buffer: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR(EIO);
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "Slice %d param buffer (%zu bytes) "
+           "is %#x.\n", pic->nb_slices, params_size,
+           pic->slice_buffers[index]);
+
+    vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
+                         VASliceDataBufferType,
+                         slice_size, 1, (void*)slice_data,
+                         &pic->slice_buffers[index + 1]);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create slice "
+               "data buffer (size %zu): %d (%s).\n",
+               slice_size, vas, vaErrorStr(vas));
+        vaDestroyBuffer(ctx->hwctx->display,
+                        pic->slice_buffers[index]);
+        return AVERROR(EIO);
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "Slice %d data buffer (%zu bytes) "
+           "is %#x.\n", pic->nb_slices, slice_size,
+           pic->slice_buffers[index + 1]);
+
+    ++pic->nb_slices;
+    return 0;
+}
+
+static void ff_vaapi_decode_destroy_buffers(AVCodecContext *avctx,
+                                            VAAPIDecodePicture *pic)
+{
+    VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    VAStatus vas;
+    int i;
+
+    for (i = 0; i < pic->nb_param_buffers; i++) {
+        vas = vaDestroyBuffer(ctx->hwctx->display,
+                              pic->param_buffers[i]);
+        if (vas != VA_STATUS_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to destroy "
+                   "parameter buffer %#x: %d (%s).\n",
+                   pic->param_buffers[i], vas, vaErrorStr(vas));
+        }
+    }
+
+    for (i = 0; i < 2 * pic->nb_slices; i++) {
+        vas = vaDestroyBuffer(ctx->hwctx->display,
+                              pic->slice_buffers[i]);
+        if (vas != VA_STATUS_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to destroy slice "
+                   "slice buffer %#x: %d (%s).\n",
+                   pic->slice_buffers[i], vas, vaErrorStr(vas));
+        }
+    }
+}
+
+int ff_vaapi_decode_issue(AVCodecContext *avctx,
+                          VAAPIDecodePicture *pic)
+{
+    VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    VAStatus vas;
+    int err;
+
+    av_log(avctx, AV_LOG_DEBUG, "Decode to surface %#x.\n",
+           pic->output_surface);
+
+    vas = vaBeginPicture(ctx->hwctx->display, ctx->va_context,
+                         pic->output_surface);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to begin picture decode "
+               "issue: %d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR(EIO);
+        goto fail_with_picture;
+    }
+
+    vas = vaRenderPicture(ctx->hwctx->display, ctx->va_context,
+                          pic->param_buffers, pic->nb_param_buffers);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to upload decode "
+               "parameters: %d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR(EIO);
+        goto fail_with_picture;
+    }
+
+    vas = vaRenderPicture(ctx->hwctx->display, ctx->va_context,
+                          pic->slice_buffers, 2 * pic->nb_slices);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to upload slices: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR(EIO);
+        goto fail_with_picture;
+    }
+
+    vas = vaEndPicture(ctx->hwctx->display, ctx->va_context);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to end picture decode "
+               "issue: %d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR(EIO);
+        if (CONFIG_VAAPI_1 || ctx->hwctx->driver_quirks &
+            AV_VAAPI_DRIVER_QUIRK_RENDER_PARAM_BUFFERS)
+            goto fail;
+        else
+            goto fail_at_end;
+    }
+
+    if (CONFIG_VAAPI_1 || ctx->hwctx->driver_quirks &
+        AV_VAAPI_DRIVER_QUIRK_RENDER_PARAM_BUFFERS)
+        ff_vaapi_decode_destroy_buffers(avctx, pic);
+
+    err = 0;
+    goto exit;
+
+fail_with_picture:
+    vas = vaEndPicture(ctx->hwctx->display, ctx->va_context);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to end picture decode "
+               "after error: %d (%s).\n", vas, vaErrorStr(vas));
+    }
+fail:
+    ff_vaapi_decode_destroy_buffers(avctx, pic);
+fail_at_end:
+exit:
+    pic->nb_param_buffers = 0;
+    pic->nb_slices        = 0;
+    pic->slices_allocated = 0;
+    av_freep(&pic->slice_buffers);
+
+    return err;
+}
+
+int ff_vaapi_decode_cancel(AVCodecContext *avctx,
+                           VAAPIDecodePicture *pic)
+{
+    ff_vaapi_decode_destroy_buffers(avctx, pic);
+
+    pic->nb_param_buffers = 0;
+    pic->nb_slices        = 0;
+    pic->slices_allocated = 0;
+    av_freep(&pic->slice_buffers);
+
+    return 0;
+}
+
+static const struct {
+    uint32_t fourcc;
+    enum AVPixelFormat pix_fmt;
+} vaapi_format_map[] = {
+#define MAP(va, av) { VA_FOURCC_ ## va, AV_PIX_FMT_ ## av }
+    // 4:0:0
+    MAP(Y800, GRAY8),
+    // 4:2:0
+    MAP(NV12, NV12),
+    MAP(YV12, YUV420P),
+    MAP(IYUV, YUV420P),
+#ifdef VA_FOURCC_I420
+    MAP(I420, YUV420P),
+#endif
+    MAP(IMC3, YUV420P),
+    // 4:1:1
+    MAP(411P, YUV411P),
+    // 4:2:2
+    MAP(422H, YUV422P),
+#ifdef VA_FOURCC_YV16
+    MAP(YV16, YUV422P),
+#endif
+    MAP(YUY2, YUYV422),
+#ifdef VA_FOURCC_Y210
+    MAP(Y210,    Y210),
+#endif
+#ifdef VA_FOURCC_Y212
+    MAP(Y212,    Y212),
+#endif
+    // 4:4:0
+    MAP(422V, YUV440P),
+    // 4:4:4
+    MAP(444P, YUV444P),
+#ifdef VA_FOURCC_XYUV
+    MAP(XYUV, VUYX),
+#endif
+#ifdef VA_FOURCC_Y410
+    MAP(Y410,    XV30),
+#endif
+#ifdef VA_FOURCC_Y412
+    MAP(Y412,    XV36),
+#endif
+    // 4:2:0 10-bit
+#ifdef VA_FOURCC_P010
+    MAP(P010, P010),
+#endif
+#ifdef VA_FOURCC_P012
+    MAP(P012, P012),
+#endif
+#ifdef VA_FOURCC_I010
+    MAP(I010, YUV420P10),
+#endif
+#undef MAP
+};
+
+static int vaapi_decode_find_best_format(AVCodecContext *avctx,
+                                         AVHWDeviceContext *device,
+                                         VAConfigID config_id,
+                                         AVHWFramesContext *frames)
+{
+    AVVAAPIDeviceContext *hwctx = device->hwctx;
+    VAStatus vas;
+    VASurfaceAttrib *attr;
+    enum AVPixelFormat source_format, best_format, format;
+    uint32_t best_fourcc, fourcc;
+    int i, j, nb_attr;
+
+    source_format = avctx->sw_pix_fmt;
+    av_assert0(source_format != AV_PIX_FMT_NONE);
+
+    vas = vaQuerySurfaceAttributes(hwctx->display, config_id,
+                                   NULL, &nb_attr);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query surface attributes: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR(ENOSYS);
+    }
+
+    attr = av_malloc_array(nb_attr, sizeof(*attr));
+    if (!attr)
+        return AVERROR(ENOMEM);
+
+    vas = vaQuerySurfaceAttributes(hwctx->display, config_id,
+                                   attr, &nb_attr);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query surface attributes: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        av_freep(&attr);
+        return AVERROR(ENOSYS);
+    }
+
+    best_format = AV_PIX_FMT_NONE;
+
+    for (i = 0; i < nb_attr; i++) {
+        if (attr[i].type != VASurfaceAttribPixelFormat)
+            continue;
+
+        fourcc = attr[i].value.value.i;
+        for (j = 0; j < FF_ARRAY_ELEMS(vaapi_format_map); j++) {
+            if (fourcc == vaapi_format_map[j].fourcc)
+                break;
+        }
+        if (j >= FF_ARRAY_ELEMS(vaapi_format_map)) {
+            av_log(avctx, AV_LOG_DEBUG, "Ignoring unknown format %#x.\n",
+                   fourcc);
+            continue;
+        }
+        format = vaapi_format_map[j].pix_fmt;
+        av_log(avctx, AV_LOG_DEBUG, "Considering format %#x -> %s.\n",
+               fourcc, av_get_pix_fmt_name(format));
+
+        best_format = av_find_best_pix_fmt_of_2(format, best_format,
+                                                source_format, 0, NULL);
+        if (format == best_format)
+            best_fourcc = fourcc;
+    }
+
+    av_freep(&attr);
+
+    if (best_format == AV_PIX_FMT_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "No usable formats for decoding!\n");
+        return AVERROR(EINVAL);
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "Picked %s (%#x) as best match for %s.\n",
+           av_get_pix_fmt_name(best_format), best_fourcc,
+           av_get_pix_fmt_name(source_format));
+
+    frames->sw_format = best_format;
+    if (avctx->internal->hwaccel_priv_data) {
+        VAAPIDecodeContext    *ctx = avctx->internal->hwaccel_priv_data;
+        AVVAAPIFramesContext *avfc = frames->hwctx;
+
+        ctx->pixel_format_attribute = (VASurfaceAttrib) {
+            .type          = VASurfaceAttribPixelFormat,
+            .flags         = VA_SURFACE_ATTRIB_SETTABLE,
+            .value.type    = VAGenericValueTypeInteger,
+            .value.value.i = best_fourcc,
+        };
+
+        avfc->attributes    = &ctx->pixel_format_attribute;
+        avfc->nb_attributes = 1;
+    }
+
+    return 0;
+}
+
+static const struct {
+    enum AVCodecID codec_id;
+    int codec_profile;
+    VAProfile va_profile;
+    VAProfile (*profile_parser)(AVCodecContext *avctx);
+} vaapi_profile_map[] = {
+#define MAP(c, p, v, ...) { AV_CODEC_ID_ ## c, FF_PROFILE_ ## p, VAProfile ## v, __VA_ARGS__ }
+    MAP(MPEG2VIDEO,  MPEG2_SIMPLE,    MPEG2Simple ),
+    MAP(MPEG2VIDEO,  MPEG2_MAIN,      MPEG2Main   ),
+    MAP(H263,        UNKNOWN,         H263Baseline),
+    MAP(MPEG4,       MPEG4_SIMPLE,    MPEG4Simple ),
+    MAP(MPEG4,       MPEG4_ADVANCED_SIMPLE,
+                               MPEG4AdvancedSimple),
+    MAP(MPEG4,       MPEG4_MAIN,      MPEG4Main   ),
+    MAP(H264,        H264_CONSTRAINED_BASELINE,
+                           H264ConstrainedBaseline),
+    MAP(H264,        H264_MAIN,       H264Main    ),
+    MAP(H264,        H264_HIGH,       H264High    ),
+#if VA_CHECK_VERSION(0, 37, 0)
+    MAP(HEVC,        HEVC_MAIN,       HEVCMain    ),
+    MAP(HEVC,        HEVC_MAIN_10,    HEVCMain10  ),
+    MAP(HEVC,        HEVC_MAIN_STILL_PICTURE,
+                                      HEVCMain    ),
+#endif
+#if VA_CHECK_VERSION(1, 2, 0) && CONFIG_HEVC_VAAPI_HWACCEL
+    MAP(HEVC,        HEVC_REXT,       None,
+                 ff_vaapi_parse_hevc_rext_scc_profile ),
+    MAP(HEVC,        HEVC_SCC,        None,
+                 ff_vaapi_parse_hevc_rext_scc_profile ),
+#endif
+    MAP(MJPEG,       MJPEG_HUFFMAN_BASELINE_DCT,
+                                      JPEGBaseline),
+    MAP(WMV3,        VC1_SIMPLE,      VC1Simple   ),
+    MAP(WMV3,        VC1_MAIN,        VC1Main     ),
+    MAP(WMV3,        VC1_COMPLEX,     VC1Advanced ),
+    MAP(WMV3,        VC1_ADVANCED,    VC1Advanced ),
+    MAP(VC1,         VC1_SIMPLE,      VC1Simple   ),
+    MAP(VC1,         VC1_MAIN,        VC1Main     ),
+    MAP(VC1,         VC1_COMPLEX,     VC1Advanced ),
+    MAP(VC1,         VC1_ADVANCED,    VC1Advanced ),
+    MAP(VP8,         UNKNOWN,       VP8Version0_3 ),
+#if VA_CHECK_VERSION(0, 38, 0)
+    MAP(VP9,         VP9_0,           VP9Profile0 ),
+#endif
+#if VA_CHECK_VERSION(0, 39, 0)
+    MAP(VP9,         VP9_1,           VP9Profile1 ),
+    MAP(VP9,         VP9_2,           VP9Profile2 ),
+    MAP(VP9,         VP9_3,           VP9Profile3 ),
+#endif
+#if VA_CHECK_VERSION(1, 8, 0)
+    MAP(AV1,         AV1_MAIN,        AV1Profile0),
+    MAP(AV1,         AV1_HIGH,        AV1Profile1),
+#endif
+
+#undef MAP
+};
+
+/*
+ * Set *va_config and the frames_ref fields from the current codec parameters
+ * in avctx.
+ */
+static int vaapi_decode_make_config(AVCodecContext *avctx,
+                                    AVBufferRef *device_ref,
+                                    VAConfigID *va_config,
+                                    AVBufferRef *frames_ref)
+{
+    AVVAAPIHWConfig       *hwconfig    = NULL;
+    AVHWFramesConstraints *constraints = NULL;
+    VAStatus vas;
+    int err, i, j;
+    const AVCodecDescriptor *codec_desc;
+    VAProfile *profile_list = NULL, matched_va_profile, va_profile;
+    int profile_count, exact_match, matched_ff_profile, codec_profile;
+
+    AVHWDeviceContext    *device = (AVHWDeviceContext*)device_ref->data;
+    AVVAAPIDeviceContext *hwctx = device->hwctx;
+
+    codec_desc = avcodec_descriptor_get(avctx->codec_id);
+    if (!codec_desc) {
+        err = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    profile_count = vaMaxNumProfiles(hwctx->display);
+    profile_list  = av_malloc_array(profile_count,
+                                    sizeof(VAProfile));
+    if (!profile_list) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    vas = vaQueryConfigProfiles(hwctx->display,
+                                profile_list, &profile_count);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query profiles: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR(ENOSYS);
+        goto fail;
+    }
+
+    matched_va_profile = VAProfileNone;
+    exact_match = 0;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(vaapi_profile_map); i++) {
+        int profile_match = 0;
+        if (avctx->codec_id != vaapi_profile_map[i].codec_id)
+            continue;
+        if (avctx->profile == vaapi_profile_map[i].codec_profile ||
+            vaapi_profile_map[i].codec_profile == FF_PROFILE_UNKNOWN)
+            profile_match = 1;
+
+        va_profile = vaapi_profile_map[i].profile_parser ?
+                     vaapi_profile_map[i].profile_parser(avctx) :
+                     vaapi_profile_map[i].va_profile;
+        codec_profile = vaapi_profile_map[i].codec_profile;
+
+        for (j = 0; j < profile_count; j++) {
+            if (va_profile == profile_list[j]) {
+                exact_match = profile_match;
+                break;
+            }
+        }
+        if (j < profile_count) {
+            matched_va_profile = va_profile;
+            matched_ff_profile = codec_profile;
+            if (exact_match)
+                break;
+        }
+    }
+    av_freep(&profile_list);
+
+    if (matched_va_profile == VAProfileNone) {
+        av_log(avctx, AV_LOG_ERROR, "No support for codec %s "
+               "profile %d.\n", codec_desc->name, avctx->profile);
+        err = AVERROR(ENOSYS);
+        goto fail;
+    }
+    if (!exact_match) {
+        if (avctx->hwaccel_flags &
+            AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH) {
+            av_log(avctx, AV_LOG_VERBOSE, "Codec %s profile %d not "
+                   "supported for hardware decode.\n",
+                   codec_desc->name, avctx->profile);
+            av_log(avctx, AV_LOG_WARNING, "Using possibly-"
+                   "incompatible profile %d instead.\n",
+                   matched_ff_profile);
+        } else {
+            av_log(avctx, AV_LOG_VERBOSE, "Codec %s profile %d not "
+                   "supported for hardware decode.\n",
+                   codec_desc->name, avctx->profile);
+            err = AVERROR(EINVAL);
+            goto fail;
+        }
+    }
+
+    vas = vaCreateConfig(hwctx->display, matched_va_profile,
+                         VAEntrypointVLD, NULL, 0,
+                         va_config);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create decode "
+               "configuration: %d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    hwconfig = av_hwdevice_hwconfig_alloc(device_ref);
+    if (!hwconfig) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+    hwconfig->config_id = *va_config;
+
+    constraints =
+        av_hwdevice_get_hwframe_constraints(device_ref, hwconfig);
+    if (!constraints) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    if (avctx->coded_width  < constraints->min_width  ||
+        avctx->coded_height < constraints->min_height ||
+        avctx->coded_width  > constraints->max_width  ||
+        avctx->coded_height > constraints->max_height) {
+        av_log(avctx, AV_LOG_ERROR, "Hardware does not support image "
+               "size %dx%d (constraints: width %d-%d height %d-%d).\n",
+               avctx->coded_width, avctx->coded_height,
+               constraints->min_width,  constraints->max_width,
+               constraints->min_height, constraints->max_height);
+        err = AVERROR(EINVAL);
+        goto fail;
+    }
+    if (!constraints->valid_sw_formats ||
+        constraints->valid_sw_formats[0] == AV_PIX_FMT_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "Hardware does not offer any "
+               "usable surface formats.\n");
+        err = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    if (frames_ref) {
+        AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data;
+
+        frames->format = AV_PIX_FMT_VAAPI;
+        frames->width = avctx->coded_width;
+        frames->height = avctx->coded_height;
+
+        err = vaapi_decode_find_best_format(avctx, device,
+                                            *va_config, frames);
+        if (err < 0)
+            goto fail;
+
+        frames->initial_pool_size = 1;
+        // Add per-codec number of surfaces used for storing reference frames.
+        switch (avctx->codec_id) {
+        case AV_CODEC_ID_H264:
+        case AV_CODEC_ID_HEVC:
+        case AV_CODEC_ID_AV1:
+            frames->initial_pool_size += 16;
+            break;
+        case AV_CODEC_ID_VP9:
+            frames->initial_pool_size += 8;
+            break;
+        case AV_CODEC_ID_VP8:
+            frames->initial_pool_size += 3;
+            break;
+        default:
+            frames->initial_pool_size += 2;
+        }
+    }
+
+    av_hwframe_constraints_free(&constraints);
+    av_freep(&hwconfig);
+
+    return 0;
+
+fail:
+    av_hwframe_constraints_free(&constraints);
+    av_freep(&hwconfig);
+    if (*va_config != VA_INVALID_ID) {
+        vaDestroyConfig(hwctx->display, *va_config);
+        *va_config = VA_INVALID_ID;
+    }
+    av_freep(&profile_list);
+    return err;
+}
+
+int ff_vaapi_common_frame_params(AVCodecContext *avctx,
+                                 AVBufferRef *hw_frames_ctx)
+{
+    AVHWFramesContext *hw_frames = (AVHWFramesContext *)hw_frames_ctx->data;
+    AVHWDeviceContext *device_ctx = hw_frames->device_ctx;
+    AVVAAPIDeviceContext *hwctx;
+    VAConfigID va_config = VA_INVALID_ID;
+    int err;
+
+    if (device_ctx->type != AV_HWDEVICE_TYPE_VAAPI)
+        return AVERROR(EINVAL);
+    hwctx = device_ctx->hwctx;
+
+    err = vaapi_decode_make_config(avctx, hw_frames->device_ref, &va_config,
+                                   hw_frames_ctx);
+    if (err)
+        return err;
+
+    if (va_config != VA_INVALID_ID)
+        vaDestroyConfig(hwctx->display, va_config);
+
+    return 0;
+}
+
+int ff_vaapi_decode_init(AVCodecContext *avctx)
+{
+    VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    VAStatus vas;
+    int err;
+
+    ctx->va_config  = VA_INVALID_ID;
+    ctx->va_context = VA_INVALID_ID;
+
+    err = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VAAPI);
+    if (err < 0)
+        goto fail;
+
+    ctx->frames = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+    ctx->hwfc   = ctx->frames->hwctx;
+    ctx->device = ctx->frames->device_ctx;
+    ctx->hwctx  = ctx->device->hwctx;
+
+    err = vaapi_decode_make_config(avctx, ctx->frames->device_ref,
+                                   &ctx->va_config, NULL);
+    if (err)
+        goto fail;
+
+    vas = vaCreateContext(ctx->hwctx->display, ctx->va_config,
+                          avctx->coded_width, avctx->coded_height,
+                          VA_PROGRESSIVE,
+                          ctx->hwfc->surface_ids,
+                          ctx->hwfc->nb_surfaces,
+                          &ctx->va_context);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create decode "
+               "context: %d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "Decode context initialised: "
+           "%#x/%#x.\n", ctx->va_config, ctx->va_context);
+
+    return 0;
+
+fail:
+    ff_vaapi_decode_uninit(avctx);
+    return err;
+}
+
+int ff_vaapi_decode_uninit(AVCodecContext *avctx)
+{
+    VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    VAStatus vas;
+
+    if (ctx->va_context != VA_INVALID_ID) {
+        vas = vaDestroyContext(ctx->hwctx->display, ctx->va_context);
+        if (vas != VA_STATUS_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to destroy decode "
+                   "context %#x: %d (%s).\n",
+                   ctx->va_context, vas, vaErrorStr(vas));
+        }
+    }
+    if (ctx->va_config != VA_INVALID_ID) {
+        vas = vaDestroyConfig(ctx->hwctx->display, ctx->va_config);
+        if (vas != VA_STATUS_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to destroy decode "
+                   "configuration %#x: %d (%s).\n",
+                   ctx->va_config, vas, vaErrorStr(vas));
+        }
+    }
+
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/vaapi_decode.h b/media/ffvpx/libavcodec/vaapi_decode.h
new file mode 100644
index 0000000000..6beda14e52
--- /dev/null
+++ b/media/ffvpx/libavcodec/vaapi_decode.h
@@ -0,0 +1,91 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VAAPI_DECODE_H
+#define AVCODEC_VAAPI_DECODE_H
+
+#include <va/va.h>
+
+#include "libavutil/frame.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_vaapi.h"
+
+#include "avcodec.h"
+
+static inline VASurfaceID ff_vaapi_get_surface_id(AVFrame *pic)
+{
+    return (uintptr_t)pic->data[3];
+}
+
+enum {
+    MAX_PARAM_BUFFERS = 16,
+};
+
+typedef struct VAAPIDecodePicture {
+    VASurfaceID           output_surface;
+
+    int                nb_param_buffers;
+    VABufferID            param_buffers[MAX_PARAM_BUFFERS];
+
+    int                nb_slices;
+    VABufferID           *slice_buffers;
+    int                   slices_allocated;
+} VAAPIDecodePicture;
+
+typedef struct VAAPIDecodeContext {
+    VAConfigID            va_config;
+    VAContextID           va_context;
+
+    AVHWDeviceContext    *device;
+    AVVAAPIDeviceContext *hwctx;
+
+    AVHWFramesContext    *frames;
+    AVVAAPIFramesContext *hwfc;
+
+    enum AVPixelFormat    surface_format;
+    int                   surface_count;
+
+    VASurfaceAttrib       pixel_format_attribute;
+} VAAPIDecodeContext;
+
+
+int ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx,
+                                      VAAPIDecodePicture *pic,
+                                      int type,
+                                      const void *data,
+                                      size_t size);
+
+int ff_vaapi_decode_make_slice_buffer(AVCodecContext *avctx,
+                                      VAAPIDecodePicture *pic,
+                                      const void *params_data,
+                                      size_t params_size,
+                                      const void *slice_data,
+                                      size_t slice_size);
+
+int ff_vaapi_decode_issue(AVCodecContext *avctx,
+                          VAAPIDecodePicture *pic);
+int ff_vaapi_decode_cancel(AVCodecContext *avctx,
+                           VAAPIDecodePicture *pic);
+
+int ff_vaapi_decode_init(AVCodecContext *avctx);
+int ff_vaapi_decode_uninit(AVCodecContext *avctx);
+
+int ff_vaapi_common_frame_params(AVCodecContext *avctx,
+                                 AVBufferRef *hw_frames_ctx);
+
+#endif /* AVCODEC_VAAPI_DECODE_H */
diff --git a/media/ffvpx/libavcodec/vaapi_hevc.h b/media/ffvpx/libavcodec/vaapi_hevc.h
new file mode 100644
index 0000000000..449635d0d7
--- /dev/null
+++ b/media/ffvpx/libavcodec/vaapi_hevc.h
@@ -0,0 +1,27 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VAAPI_HEVC_H
+#define AVCODEC_VAAPI_HEVC_H
+
+#include <va/va.h>
+#include "avcodec.h"
+
+VAProfile ff_vaapi_parse_hevc_rext_scc_profile(AVCodecContext *avctx);
+
+#endif /* AVCODEC_VAAPI_HEVC_H */
diff --git a/media/ffvpx/libavcodec/vaapi_vp8.c b/media/ffvpx/libavcodec/vaapi_vp8.c
new file mode 100644
index 0000000000..5b18bf8f34
--- /dev/null
+++ b/media/ffvpx/libavcodec/vaapi_vp8.c
@@ -0,0 +1,237 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <va/va.h>
+#include <va/va_dec_vp8.h>
+
+#include "hwconfig.h"
+#include "vaapi_decode.h"
+#include "vp8.h"
+
+static VASurfaceID vaapi_vp8_surface_id(VP8Frame *vf)
+{
+    if (vf)
+        return ff_vaapi_get_surface_id(vf->tf.f);
+    else
+        return VA_INVALID_SURFACE;
+}
+
+static int vaapi_vp8_start_frame(AVCodecContext          *avctx,
+                                 av_unused const uint8_t *buffer,
+                                 av_unused uint32_t       size)
+{
+    const VP8Context *s = avctx->priv_data;
+    VAAPIDecodePicture *pic = s->framep[VP8_FRAME_CURRENT]->hwaccel_picture_private;
+    VAPictureParameterBufferVP8 pp;
+    VAProbabilityDataBufferVP8 prob;
+    VAIQMatrixBufferVP8 quant;
+    int err, i, j, k;
+
+    pic->output_surface = vaapi_vp8_surface_id(s->framep[VP8_FRAME_CURRENT]);
+
+    pp = (VAPictureParameterBufferVP8) {
+        .frame_width                     = avctx->width,
+        .frame_height                    = avctx->height,
+
+        .last_ref_frame                  = vaapi_vp8_surface_id(s->framep[VP8_FRAME_PREVIOUS]),
+        .golden_ref_frame                = vaapi_vp8_surface_id(s->framep[VP8_FRAME_GOLDEN]),
+        .alt_ref_frame                   = vaapi_vp8_surface_id(s->framep[VP8_FRAME_ALTREF]),
+        .out_of_loop_frame               = VA_INVALID_SURFACE,
+
+        .pic_fields.bits = {
+            .key_frame                   = !s->keyframe,
+            .version                     = s->profile,
+
+            .segmentation_enabled        = s->segmentation.enabled,
+            .update_mb_segmentation_map  = s->segmentation.update_map,
+            .update_segment_feature_data = s->segmentation.update_feature_data,
+
+            .filter_type                 = s->filter.simple,
+            .sharpness_level             = s->filter.sharpness,
+
+            .loop_filter_adj_enable      = s->lf_delta.enabled,
+            .mode_ref_lf_delta_update    = s->lf_delta.update,
+
+            .sign_bias_golden            = s->sign_bias[VP8_FRAME_GOLDEN],
+            .sign_bias_alternate         = s->sign_bias[VP8_FRAME_ALTREF],
+
+            .mb_no_coeff_skip            = s->mbskip_enabled,
+            .loop_filter_disable         = s->filter.level == 0,
+        },
+
+        .prob_skip_false                 = s->prob->mbskip,
+        .prob_intra                      = s->prob->intra,
+        .prob_last                       = s->prob->last,
+        .prob_gf                         = s->prob->golden,
+    };
+
+    for (i = 0; i < 3; i++)
+        pp.mb_segment_tree_probs[i] = s->prob->segmentid[i];
+
+    for (i = 0; i < 4; i++) {
+        if (s->segmentation.enabled) {
+            pp.loop_filter_level[i] = s->segmentation.filter_level[i];
+            if (!s->segmentation.absolute_vals)
+                pp.loop_filter_level[i] += s->filter.level;
+        } else {
+            pp.loop_filter_level[i] = s->filter.level;
+        }
+        pp.loop_filter_level[i] = av_clip_uintp2(pp.loop_filter_level[i], 6);
+    }
+
+    for (i = 0; i < 4; i++) {
+        pp.loop_filter_deltas_ref_frame[i] = s->lf_delta.ref[i];
+        pp.loop_filter_deltas_mode[i] = s->lf_delta.mode[i + 4];
+    }
+
+    if (s->keyframe) {
+        static const uint8_t keyframe_y_mode_probs[4] = {
+            145, 156, 163, 128
+        };
+        static const uint8_t keyframe_uv_mode_probs[3] = {
+            142, 114, 183
+        };
+        memcpy(pp.y_mode_probs,  keyframe_y_mode_probs,  4);
+        memcpy(pp.uv_mode_probs, keyframe_uv_mode_probs, 3);
+    } else {
+        for (i = 0; i < 4; i++)
+            pp.y_mode_probs[i] = s->prob->pred16x16[i];
+        for (i = 0; i < 3; i++)
+            pp.uv_mode_probs[i] = s->prob->pred8x8c[i];
+    }
+    for (i = 0; i < 2; i++)
+        for (j = 0; j < 19; j++)
+            pp.mv_probs[i][j] = s->prob->mvc[i][j];
+
+    pp.bool_coder_ctx.range = s->coder_state_at_header_end.range;
+    pp.bool_coder_ctx.value = s->coder_state_at_header_end.value;
+    pp.bool_coder_ctx.count = s->coder_state_at_header_end.bit_count;
+
+    err = ff_vaapi_decode_make_param_buffer(avctx, pic,
+                                            VAPictureParameterBufferType,
+                                            &pp, sizeof(pp));
+    if (err < 0)
+        goto fail;
+
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 8; j++) {
+            static const int coeff_bands_inverse[8] = {
+                0, 1, 2, 3, 5, 6, 4, 15
+            };
+            int coeff_pos = coeff_bands_inverse[j];
+
+            for (k = 0; k < 3; k++) {
+                memcpy(prob.dct_coeff_probs[i][j][k],
+                       s->prob->token[i][coeff_pos][k], 11);
+            }
+        }
+    }
+
+    err = ff_vaapi_decode_make_param_buffer(avctx, pic,
+                                            VAProbabilityBufferType,
+                                            &prob, sizeof(prob));
+    if (err < 0)
+        goto fail;
+
+    for (i = 0; i < 4; i++) {
+        int base_qi = s->segmentation.base_quant[i];
+        if (!s->segmentation.absolute_vals)
+            base_qi += s->quant.yac_qi;
+
+        quant.quantization_index[i][0] = av_clip_uintp2(base_qi,                       7);
+        quant.quantization_index[i][1] = av_clip_uintp2(base_qi + s->quant.ydc_delta,  7);
+        quant.quantization_index[i][2] = av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7);
+        quant.quantization_index[i][3] = av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7);
+        quant.quantization_index[i][4] = av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7);
+        quant.quantization_index[i][5] = av_clip_uintp2(base_qi + s->quant.uvac_delta, 7);
+    }
+
+    err = ff_vaapi_decode_make_param_buffer(avctx, pic,
+                                            VAIQMatrixBufferType,
+                                            &quant, sizeof(quant));
+    if (err < 0)
+        goto fail;
+
+    return 0;
+
+fail:
+    ff_vaapi_decode_cancel(avctx, pic);
+    return err;
+}
+
+static int vaapi_vp8_end_frame(AVCodecContext *avctx)
+{
+    const VP8Context *s = avctx->priv_data;
+    VAAPIDecodePicture *pic = s->framep[VP8_FRAME_CURRENT]->hwaccel_picture_private;
+
+    return ff_vaapi_decode_issue(avctx, pic);
+}
+
+static int vaapi_vp8_decode_slice(AVCodecContext *avctx,
+                                  const uint8_t  *buffer,
+                                  uint32_t        size)
+{
+    const VP8Context *s = avctx->priv_data;
+    VAAPIDecodePicture *pic = s->framep[VP8_FRAME_CURRENT]->hwaccel_picture_private;
+    VASliceParameterBufferVP8 sp;
+    int err, i;
+
+    unsigned int header_size = 3 + 7 * s->keyframe;
+    const uint8_t *data = buffer + header_size;
+    unsigned int data_size = size - header_size;
+
+    sp = (VASliceParameterBufferVP8) {
+        .slice_data_size   = data_size,
+        .slice_data_offset = 0,
+        .slice_data_flag   = VA_SLICE_DATA_FLAG_ALL,
+
+        .macroblock_offset = (8 * (s->coder_state_at_header_end.input - data) -
+                              s->coder_state_at_header_end.bit_count - 8),
+        .num_of_partitions = s->num_coeff_partitions + 1,
+    };
+
+    sp.partition_size[0] = s->header_partition_size - ((sp.macroblock_offset + 7) / 8);
+    for (i = 0; i < 8; i++)
+        sp.partition_size[i+1] = s->coeff_partition_size[i];
+
+    err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &sp, sizeof(sp), data, data_size);
+    if (err)
+        goto fail;
+
+    return 0;
+
+fail:
+    ff_vaapi_decode_cancel(avctx, pic);
+    return err;
+}
+
+const AVHWAccel ff_vp8_vaapi_hwaccel = {
+    .name                 = "vp8_vaapi",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_VP8,
+    .pix_fmt              = AV_PIX_FMT_VAAPI,
+    .start_frame          = &vaapi_vp8_start_frame,
+    .end_frame            = &vaapi_vp8_end_frame,
+    .decode_slice         = &vaapi_vp8_decode_slice,
+    .frame_priv_data_size = sizeof(VAAPIDecodePicture),
+    .init                 = &ff_vaapi_decode_init,
+    .uninit               = &ff_vaapi_decode_uninit,
+    .frame_params         = &ff_vaapi_common_frame_params,
+    .priv_data_size       = sizeof(VAAPIDecodeContext),
+    .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/media/ffvpx/libavcodec/vaapi_vp9.c b/media/ffvpx/libavcodec/vaapi_vp9.c
new file mode 100644
index 0000000000..776382f683
--- /dev/null
+++ b/media/ffvpx/libavcodec/vaapi_vp9.c
@@ -0,0 +1,185 @@
+/*
+ * VP9 HW decode acceleration through VA API
+ *
+ * Copyright (C) 2015 Timo Rothenpieler <timo@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/pixdesc.h"
+
+#include "hwconfig.h"
+#include "vaapi_decode.h"
+#include "vp9shared.h"
+
+static VASurfaceID vaapi_vp9_surface_id(const VP9Frame *vf)
+{
+    if (vf)
+        return ff_vaapi_get_surface_id(vf->tf.f);
+    else
+        return VA_INVALID_SURFACE;
+}
+
+static int vaapi_vp9_start_frame(AVCodecContext          *avctx,
+                                 av_unused const uint8_t *buffer,
+                                 av_unused uint32_t       size)
+{
+    const VP9SharedContext *h = avctx->priv_data;
+    VAAPIDecodePicture *pic = h->frames[CUR_FRAME].hwaccel_picture_private;
+    VADecPictureParameterBufferVP9 pic_param;
+    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+    int err, i;
+
+    pic->output_surface = vaapi_vp9_surface_id(&h->frames[CUR_FRAME]);
+
+    pic_param = (VADecPictureParameterBufferVP9) {
+        .frame_width                      = avctx->width,
+        .frame_height                     = avctx->height,
+
+        .pic_fields.bits = {
+            .subsampling_x                = pixdesc->log2_chroma_w,
+            .subsampling_y                = pixdesc->log2_chroma_h,
+            .frame_type                   = !h->h.keyframe,
+            .show_frame                   = !h->h.invisible,
+            .error_resilient_mode         = h->h.errorres,
+            .intra_only                   = h->h.intraonly,
+            .allow_high_precision_mv      = h->h.keyframe ? 0 : h->h.highprecisionmvs,
+            .mcomp_filter_type            = h->h.filtermode ^ (h->h.filtermode <= 1),
+            .frame_parallel_decoding_mode = h->h.parallelmode,
+            .reset_frame_context          = h->h.resetctx,
+            .refresh_frame_context        = h->h.refreshctx,
+            .frame_context_idx            = h->h.framectxid,
+
+            .segmentation_enabled          = h->h.segmentation.enabled,
+            .segmentation_temporal_update  = h->h.segmentation.temporal,
+            .segmentation_update_map       = h->h.segmentation.update_map,
+
+            .last_ref_frame                = h->h.refidx[0],
+            .last_ref_frame_sign_bias      = h->h.signbias[0],
+            .golden_ref_frame              = h->h.refidx[1],
+            .golden_ref_frame_sign_bias    = h->h.signbias[1],
+            .alt_ref_frame                 = h->h.refidx[2],
+            .alt_ref_frame_sign_bias       = h->h.signbias[2],
+            .lossless_flag                 = h->h.lossless,
+        },
+
+        .filter_level                      = h->h.filter.level,
+        .sharpness_level                   = h->h.filter.sharpness,
+        .log2_tile_rows                    = h->h.tiling.log2_tile_rows,
+        .log2_tile_columns                 = h->h.tiling.log2_tile_cols,
+
+        .frame_header_length_in_bytes      = h->h.uncompressed_header_size,
+        .first_partition_size              = h->h.compressed_header_size,
+
+        .profile                           = h->h.profile,
+        .bit_depth                         = h->h.bpp,
+    };
+
+    for (i = 0; i < 7; i++)
+        pic_param.mb_segment_tree_probs[i] = h->h.segmentation.prob[i];
+
+    if (h->h.segmentation.temporal) {
+        for (i = 0; i < 3; i++)
+            pic_param.segment_pred_probs[i] = h->h.segmentation.pred_prob[i];
+    } else {
+        memset(pic_param.segment_pred_probs, 255, sizeof(pic_param.segment_pred_probs));
+    }
+
+    for (i = 0; i < 8; i++) {
+        if (h->refs[i].f->buf[0])
+            pic_param.reference_frames[i] = ff_vaapi_get_surface_id(h->refs[i].f);
+        else
+            pic_param.reference_frames[i] = VA_INVALID_ID;
+    }
+
+    err = ff_vaapi_decode_make_param_buffer(avctx, pic,
+                                            VAPictureParameterBufferType,
+                                            &pic_param, sizeof(pic_param));
+    if (err < 0) {
+        ff_vaapi_decode_cancel(avctx, pic);
+        return err;
+    }
+
+    return 0;
+}
+
+static int vaapi_vp9_end_frame(AVCodecContext *avctx)
+{
+    const VP9SharedContext *h = avctx->priv_data;
+    VAAPIDecodePicture *pic = h->frames[CUR_FRAME].hwaccel_picture_private;
+
+    return ff_vaapi_decode_issue(avctx, pic);
+}
+
+static int vaapi_vp9_decode_slice(AVCodecContext *avctx,
+                                  const uint8_t  *buffer,
+                                  uint32_t        size)
+{
+    const VP9SharedContext *h = avctx->priv_data;
+    VAAPIDecodePicture *pic = h->frames[CUR_FRAME].hwaccel_picture_private;
+    VASliceParameterBufferVP9 slice_param;
+    int err, i;
+
+    slice_param = (VASliceParameterBufferVP9) {
+        .slice_data_size   = size,
+        .slice_data_offset = 0,
+        .slice_data_flag   = VA_SLICE_DATA_FLAG_ALL,
+    };
+
+    for (i = 0; i < 8; i++) {
+        slice_param.seg_param[i] = (VASegmentParameterVP9) {
+            .segment_flags.fields = {
+                .segment_reference_enabled = h->h.segmentation.feat[i].ref_enabled,
+                .segment_reference         = h->h.segmentation.feat[i].ref_val,
+                .segment_reference_skipped = h->h.segmentation.feat[i].skip_enabled,
+            },
+
+            .luma_dc_quant_scale           = h->h.segmentation.feat[i].qmul[0][0],
+            .luma_ac_quant_scale           = h->h.segmentation.feat[i].qmul[0][1],
+            .chroma_dc_quant_scale         = h->h.segmentation.feat[i].qmul[1][0],
+            .chroma_ac_quant_scale         = h->h.segmentation.feat[i].qmul[1][1],
+        };
+
+        memcpy(slice_param.seg_param[i].filter_level, h->h.segmentation.feat[i].lflvl, sizeof(slice_param.seg_param[i].filter_level));
+    }
+
+    err = ff_vaapi_decode_make_slice_buffer(avctx, pic,
+                                            &slice_param, sizeof(slice_param),
+                                            buffer, size);
+    if (err) {
+        ff_vaapi_decode_cancel(avctx, pic);
+        return err;
+    }
+
+    return 0;
+}
+
+const AVHWAccel ff_vp9_vaapi_hwaccel = {
+    .name                 = "vp9_vaapi",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_VP9,
+    .pix_fmt              = AV_PIX_FMT_VAAPI,
+    .start_frame          = vaapi_vp9_start_frame,
+    .end_frame            = vaapi_vp9_end_frame,
+    .decode_slice         = vaapi_vp9_decode_slice,
+    .frame_priv_data_size = sizeof(VAAPIDecodePicture),
+    .init                 = ff_vaapi_decode_init,
+    .uninit               = ff_vaapi_decode_uninit,
+    .frame_params         = ff_vaapi_common_frame_params,
+    .priv_data_size       = sizeof(VAAPIDecodeContext),
+    .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/media/ffvpx/libavcodec/version.c b/media/ffvpx/libavcodec/version.c
new file mode 100644
index 0000000000..d7966b2015
--- /dev/null
+++ b/media/ffvpx/libavcodec/version.c
@@ -0,0 +1,50 @@
+/*
+ * Version functions.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/avassert.h"
+#include "avcodec.h"
+#include "codec_id.h"
+#include "version.h"
+
+#include "libavutil/ffversion.h"
+const char av_codec_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
+
+unsigned avcodec_version(void)
+{
+    av_assert0(AV_CODEC_ID_PCM_S8_PLANAR==65563);
+    av_assert0(AV_CODEC_ID_ADPCM_G722==69660);
+    av_assert0(AV_CODEC_ID_SRT==94216);
+    av_assert0(LIBAVCODEC_VERSION_MICRO >= 100);
+
+    return LIBAVCODEC_VERSION_INT;
+}
+
+const char *avcodec_configuration(void)
+{
+    return FFMPEG_CONFIGURATION;
+}
+
+const char *avcodec_license(void)
+{
+#define LICENSE_PREFIX "libavcodec license: "
+    return &LICENSE_PREFIX FFMPEG_LICENSE[sizeof(LICENSE_PREFIX) - 1];
+}
diff --git a/media/ffvpx/libavcodec/version.h b/media/ffvpx/libavcodec/version.h
new file mode 100644
index 0000000000..7aa95fc3f1
--- /dev/null
+++ b/media/ffvpx/libavcodec/version.h
@@ -0,0 +1,45 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VERSION_H
+#define AVCODEC_VERSION_H
+
+/**
+ * @file
+ * @ingroup libavc
+ * Libavcodec version macros.
+ */
+
+#include "libavutil/version.h"
+
+#include "version_major.h"
+
+#define LIBAVCODEC_VERSION_MINOR   5
+#define LIBAVCODEC_VERSION_MICRO 100
+
+#define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
+                                               LIBAVCODEC_VERSION_MINOR, \
+                                               LIBAVCODEC_VERSION_MICRO)
+#define LIBAVCODEC_VERSION      AV_VERSION(LIBAVCODEC_VERSION_MAJOR,    \
+                                           LIBAVCODEC_VERSION_MINOR,    \
+                                           LIBAVCODEC_VERSION_MICRO)
+#define LIBAVCODEC_BUILD        LIBAVCODEC_VERSION_INT
+
+#define LIBAVCODEC_IDENT        "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
+
+#endif /* AVCODEC_VERSION_H */
diff --git a/media/ffvpx/libavcodec/version_major.h b/media/ffvpx/libavcodec/version_major.h
new file mode 100644
index 0000000000..c2f118b262
--- /dev/null
+++ b/media/ffvpx/libavcodec/version_major.h
@@ -0,0 +1,52 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VERSION_MAJOR_H
+#define AVCODEC_VERSION_MAJOR_H
+
+/**
+ * @file
+ * @ingroup libavc
+ * Libavcodec version macros.
+ */
+
+#define LIBAVCODEC_VERSION_MAJOR  60
+
+/**
+ * FF_API_* defines may be placed below to indicate public API that will be
+ * dropped at a future version bump. The defines themselves are not part of
+ * the public API and may change, break or disappear at any time.
+ *
+ * @note, when bumping the major version it is recommended to manually
+ * disable each FF_API_* in its own commit instead of disabling them all
+ * at once through the bump. This improves the git bisect-ability of the change.
+ */
+
+#define FF_API_INIT_PACKET         (LIBAVCODEC_VERSION_MAJOR < 61)
+#define FF_API_IDCT_NONE           (LIBAVCODEC_VERSION_MAJOR < 61)
+#define FF_API_SVTAV1_OPTS         (LIBAVCODEC_VERSION_MAJOR < 61)
+#define FF_API_AYUV_CODECID        (LIBAVCODEC_VERSION_MAJOR < 61)
+#define FF_API_VT_OUTPUT_CALLBACK  (LIBAVCODEC_VERSION_MAJOR < 61)
+#define FF_API_AVCODEC_CHROMA_POS  (LIBAVCODEC_VERSION_MAJOR < 61)
+#define FF_API_VT_HWACCEL_CONTEXT  (LIBAVCODEC_VERSION_MAJOR < 61)
+#define FF_API_AVCTX_FRAME_NUMBER  (LIBAVCODEC_VERSION_MAJOR < 61)
+
+// reminder to remove CrystalHD decoders on next major bump
+#define FF_CODEC_CRYSTAL_HD        (LIBAVCODEC_VERSION_MAJOR < 61)
+
+#endif /* AVCODEC_VERSION_MAJOR_H */
diff --git a/media/ffvpx/libavcodec/videodsp.c b/media/ffvpx/libavcodec/videodsp.c
new file mode 100644
index 0000000000..bdff2e76f5
--- /dev/null
+++ b/media/ffvpx/libavcodec/videodsp.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2012 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/macros.h"
+#include "videodsp.h"
+
+#define BIT_DEPTH 8
+#include "videodsp_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 16
+#include "videodsp_template.c"
+#undef BIT_DEPTH
+
+static void just_return(const uint8_t *buf, ptrdiff_t stride, int h)
+{
+}
+
+av_cold void ff_videodsp_init(VideoDSPContext *ctx, int bpc)
+{
+    ctx->prefetch = just_return;
+    if (bpc <= 8) {
+        ctx->emulated_edge_mc = ff_emulated_edge_mc_8;
+    } else {
+        ctx->emulated_edge_mc = ff_emulated_edge_mc_16;
+    }
+
+#if ARCH_AARCH64
+    ff_videodsp_init_aarch64(ctx, bpc);
+#elif ARCH_ARM
+    ff_videodsp_init_arm(ctx, bpc);
+#elif ARCH_PPC
+    ff_videodsp_init_ppc(ctx, bpc);
+#elif ARCH_X86
+    ff_videodsp_init_x86(ctx, bpc);
+#elif ARCH_MIPS
+    ff_videodsp_init_mips(ctx, bpc);
+#elif ARCH_LOONGARCH64
+    ff_videodsp_init_loongarch(ctx, bpc);
+#endif
+}
diff --git a/media/ffvpx/libavcodec/videodsp.h b/media/ffvpx/libavcodec/videodsp.h
new file mode 100644
index 0000000000..e8960b609d
--- /dev/null
+++ b/media/ffvpx/libavcodec/videodsp.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2012 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Core video DSP helper functions
+ */
+
+#ifndef AVCODEC_VIDEODSP_H
+#define AVCODEC_VIDEODSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define EMULATED_EDGE(depth) \
+void ff_emulated_edge_mc_ ## depth(uint8_t *dst, const uint8_t *src, \
+                                   ptrdiff_t dst_stride, ptrdiff_t src_stride, \
+                                   int block_w, int block_h,\
+                                   int src_x, int src_y, int w, int h);
+
+EMULATED_EDGE(8)
+
+typedef struct VideoDSPContext {
+    /**
+     * Copy a rectangular area of samples to a temporary buffer and replicate
+     * the border samples.
+     *
+     * @param dst destination buffer
+     * @param dst_stride number of bytes between 2 vertically adjacent samples
+     *                   in destination buffer
+     * @param src source buffer
+     * @param dst_linesize number of bytes between 2 vertically adjacent
+     *                     samples in the destination buffer
+     * @param src_linesize number of bytes between 2 vertically adjacent
+     *                     samples in both the source buffer
+     * @param block_w width of block
+     * @param block_h height of block
+     * @param src_x x coordinate of the top left sample of the block in the
+     *                source buffer
+     * @param src_y y coordinate of the top left sample of the block in the
+     *                source buffer
+     * @param w width of the source buffer
+     * @param h height of the source buffer
+     */
+    void (*emulated_edge_mc)(uint8_t *dst, const uint8_t *src,
+                             ptrdiff_t dst_linesize,
+                             ptrdiff_t src_linesize,
+                             int block_w, int block_h,
+                             int src_x, int src_y, int w, int h);
+
+    /**
+     * Prefetch memory into cache (if supported by hardware).
+     *
+     * @param buf    pointer to buffer to prefetch memory from
+     * @param stride distance between two lines of buf (in bytes)
+     * @param h      number of lines to prefetch
+     */
+    void (*prefetch)(const uint8_t *buf, ptrdiff_t stride, int h);
+} VideoDSPContext;
+
+void ff_videodsp_init(VideoDSPContext *ctx, int bpc);
+
+/* for internal use only (i.e. called by ff_videodsp_init() */
+void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc);
+void ff_videodsp_init_arm(VideoDSPContext *ctx, int bpc);
+void ff_videodsp_init_ppc(VideoDSPContext *ctx, int bpc);
+void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc);
+void ff_videodsp_init_mips(VideoDSPContext *ctx, int bpc);
+void ff_videodsp_init_loongarch(VideoDSPContext *ctx, int bpc);
+
+#endif /* AVCODEC_VIDEODSP_H */
diff --git a/media/ffvpx/libavcodec/videodsp_template.c b/media/ffvpx/libavcodec/videodsp_template.c
new file mode 100644
index 0000000000..d653f4d524
--- /dev/null
+++ b/media/ffvpx/libavcodec/videodsp_template.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2002-2012 Michael Niedermayer
+ * Copyright (C) 2012 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "bit_depth_template.c"
+#if BIT_DEPTH != 8
+// ff_emulated_edge_mc_8 is used by the x86 MpegVideoDSP API.
+static
+#endif
+void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src,
+                               ptrdiff_t buf_linesize,
+                               ptrdiff_t src_linesize,
+                               int block_w, int block_h,
+                               int src_x, int src_y, int w, int h)
+{
+    int x, y;
+    int start_y, start_x, end_y, end_x;
+
+    if (!w || !h)
+        return;
+
+    av_assert2(block_w * sizeof(pixel) <= FFABS(buf_linesize));
+
+    if (src_y >= h) {
+        src -= src_y * src_linesize;
+        src += (h - 1) * src_linesize;
+        src_y = h - 1;
+    } else if (src_y <= -block_h) {
+        src -= src_y * src_linesize;
+        src += (1 - block_h) * src_linesize;
+        src_y = 1 - block_h;
+    }
+    if (src_x >= w) {
+        // The subtracted expression has an unsigned type and must thus not be negative
+        src  -= (1 + src_x - w) * sizeof(pixel);
+        src_x = w - 1;
+    } else if (src_x <= -block_w) {
+        src  += (1 - block_w - src_x) * sizeof(pixel);
+        src_x = 1 - block_w;
+    }
+
+    start_y = FFMAX(0, -src_y);
+    start_x = FFMAX(0, -src_x);
+    end_y = FFMIN(block_h, h-src_y);
+    end_x = FFMIN(block_w, w-src_x);
+    av_assert2(start_y < end_y && block_h);
+    av_assert2(start_x < end_x && block_w);
+
+    w    = end_x - start_x;
+    src += start_y * src_linesize + start_x * (ptrdiff_t)sizeof(pixel);
+    buf += start_x * sizeof(pixel);
+
+    // top
+    for (y = 0; y < start_y; y++) {
+        memcpy(buf, src, w * sizeof(pixel));
+        buf += buf_linesize;
+    }
+
+    // copy existing part
+    for (; y < end_y; y++) {
+        memcpy(buf, src, w * sizeof(pixel));
+        src += src_linesize;
+        buf += buf_linesize;
+    }
+
+    // bottom
+    src -= src_linesize;
+    for (; y < block_h; y++) {
+        memcpy(buf, src, w * sizeof(pixel));
+        buf += buf_linesize;
+    }
+
+    buf -= block_h * buf_linesize + start_x * (ptrdiff_t)sizeof(pixel);
+    while (block_h--) {
+        pixel *bufp = (pixel *) buf;
+
+        // left
+        for(x = 0; x < start_x; x++) {
+            bufp[x] = bufp[start_x];
+        }
+
+        // right
+        for (x = end_x; x < block_w; x++) {
+            bufp[x] = bufp[end_x - 1];
+        }
+        buf += buf_linesize;
+    }
+}
diff --git a/media/ffvpx/libavcodec/vlc.c b/media/ffvpx/libavcodec/vlc.c
new file mode 100644
index 0000000000..96f2b28c7e
--- /dev/null
+++ b/media/ffvpx/libavcodec/vlc.c
@@ -0,0 +1,378 @@
+/*
+ * API for creating VLC trees
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2010 Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/error.h"
+#include "libavutil/internal.h"
+#include "libavutil/log.h"
+#include "libavutil/macros.h"
+#include "libavutil/mem.h"
+#include "libavutil/qsort.h"
+#include "libavutil/reverse.h"
+#include "vlc.h"
+
+#define GET_DATA(v, table, i, wrap, size)                   \
+{                                                           \
+    const uint8_t *ptr = (const uint8_t *)table + i * wrap; \
+    switch(size) {                                          \
+    case 1:                                                 \
+        v = *(const uint8_t *)ptr;                          \
+        break;                                              \
+    case 2:                                                 \
+        v = *(const uint16_t *)ptr;                         \
+        break;                                              \
+    case 4:                                                 \
+    default:                                                \
+        av_assert1(size == 4);                              \
+        v = *(const uint32_t *)ptr;                         \
+        break;                                              \
+    }                                                       \
+}
+
+
+static int alloc_table(VLC *vlc, int size, int use_static)
+{
+    int index = vlc->table_size;
+
+    vlc->table_size += size;
+    if (vlc->table_size > vlc->table_allocated) {
+        if (use_static)
+            abort(); // cannot do anything, init_vlc() is used with too little memory
+        vlc->table_allocated += (1 << vlc->bits);
+        vlc->table = av_realloc_f(vlc->table, vlc->table_allocated, sizeof(*vlc->table));
+        if (!vlc->table) {
+            vlc->table_allocated = 0;
+            vlc->table_size = 0;
+            return AVERROR(ENOMEM);
+        }
+        memset(vlc->table + vlc->table_allocated - (1 << vlc->bits), 0, sizeof(*vlc->table) << vlc->bits);
+    }
+    return index;
+}
+
+#define LOCALBUF_ELEMS 1500 // the maximum currently needed is 1296 by rv34
+
+static av_always_inline uint32_t bitswap_32(uint32_t x)
+{
+    return (uint32_t)ff_reverse[ x        & 0xFF] << 24 |
+           (uint32_t)ff_reverse[(x >> 8)  & 0xFF] << 16 |
+           (uint32_t)ff_reverse[(x >> 16) & 0xFF] << 8  |
+           (uint32_t)ff_reverse[ x >> 24];
+}
+
+typedef struct VLCcode {
+    uint8_t bits;
+    VLCBaseType symbol;
+    /** codeword, with the first bit-to-be-read in the msb
+     * (even if intended for a little-endian bitstream reader) */
+    uint32_t code;
+} VLCcode;
+
+static int vlc_common_init(VLC *vlc, int nb_bits, int nb_codes,
+                           VLCcode **buf, int flags)
+{
+    vlc->bits = nb_bits;
+    vlc->table_size = 0;
+    if (flags & INIT_VLC_USE_NEW_STATIC) {
+        av_assert0(nb_codes <= LOCALBUF_ELEMS);
+    } else {
+        vlc->table           = NULL;
+        vlc->table_allocated = 0;
+    }
+    if (nb_codes > LOCALBUF_ELEMS) {
+        *buf = av_malloc_array(nb_codes, sizeof(VLCcode));
+        if (!*buf)
+            return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int compare_vlcspec(const void *a, const void *b)
+{
+    const VLCcode *sa = a, *sb = b;
+    return (sa->code >> 1) - (sb->code >> 1);
+}
+
+/**
+ * Build VLC decoding tables suitable for use with get_vlc().
+ *
+ * @param vlc            the context to be initialized
+ *
+ * @param table_nb_bits  max length of vlc codes to store directly in this table
+ *                       (Longer codes are delegated to subtables.)
+ *
+ * @param nb_codes       number of elements in codes[]
+ *
+ * @param codes          descriptions of the vlc codes
+ *                       These must be ordered such that codes going into the same subtable are contiguous.
+ *                       Sorting by VLCcode.code is sufficient, though not necessary.
+ */
+static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
+                       VLCcode *codes, int flags)
+{
+    int table_size, table_index;
+    VLCElem *table;
+
+    if (table_nb_bits > 30)
+       return AVERROR(EINVAL);
+    table_size = 1 << table_nb_bits;
+    table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
+    ff_dlog(NULL, "new table index=%d size=%d\n", table_index, table_size);
+    if (table_index < 0)
+        return table_index;
+    table = &vlc->table[table_index];
+
+    /* first pass: map codes and compute auxiliary table sizes */
+    for (int i = 0; i < nb_codes; i++) {
+        int         n = codes[i].bits;
+        uint32_t code = codes[i].code;
+        int    symbol = codes[i].symbol;
+        ff_dlog(NULL, "i=%d n=%d code=0x%"PRIx32"\n", i, n, code);
+        if (n <= table_nb_bits) {
+            /* no need to add another table */
+            int   j = code >> (32 - table_nb_bits);
+            int  nb = 1 << (table_nb_bits - n);
+            int inc = 1;
+
+            if (flags & INIT_VLC_OUTPUT_LE) {
+                j = bitswap_32(code);
+                inc = 1 << n;
+            }
+            for (int k = 0; k < nb; k++) {
+                int   bits = table[j].len;
+                int oldsym = table[j].sym;
+                ff_dlog(NULL, "%4x: code=%d n=%d\n", j, i, n);
+                if ((bits || oldsym) && (bits != n || oldsym != symbol)) {
+                    av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
+                    return AVERROR_INVALIDDATA;
+                }
+                table[j].len = n;
+                table[j].sym = symbol;
+                j += inc;
+            }
+        } else {
+            /* fill auxiliary table recursively */
+            uint32_t code_prefix;
+            int index, subtable_bits, j, k;
+
+            n -= table_nb_bits;
+            code_prefix = code >> (32 - table_nb_bits);
+            subtable_bits = n;
+            codes[i].bits = n;
+            codes[i].code = code << table_nb_bits;
+            for (k = i + 1; k < nb_codes; k++) {
+                n = codes[k].bits - table_nb_bits;
+                if (n <= 0)
+                    break;
+                code = codes[k].code;
+                if (code >> (32 - table_nb_bits) != code_prefix)
+                    break;
+                codes[k].bits = n;
+                codes[k].code = code << table_nb_bits;
+                subtable_bits = FFMAX(subtable_bits, n);
+            }
+            subtable_bits = FFMIN(subtable_bits, table_nb_bits);
+            j = (flags & INIT_VLC_OUTPUT_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) : code_prefix;
+            table[j].len = -subtable_bits;
+            ff_dlog(NULL, "%4x: n=%d (subtable)\n",
+                    j, codes[i].bits + table_nb_bits);
+            index = build_table(vlc, subtable_bits, k-i, codes+i, flags);
+            if (index < 0)
+                return index;
+            /* note: realloc has been done, so reload tables */
+            table = &vlc->table[table_index];
+            table[j].sym = index;
+            if (table[j].sym != index) {
+                avpriv_request_sample(NULL, "strange codes");
+                return AVERROR_PATCHWELCOME;
+            }
+            i = k-1;
+        }
+    }
+
+    for (int i = 0; i < table_size; i++) {
+        if (table[i].len == 0)
+            table[i].sym = -1;
+    }
+
+    return table_index;
+}
+
+static int vlc_common_end(VLC *vlc, int nb_bits, int nb_codes, VLCcode *codes,
+                          int flags, VLCcode localbuf[LOCALBUF_ELEMS])
+{
+    int ret = build_table(vlc, nb_bits, nb_codes, codes, flags);
+
+    if (flags & INIT_VLC_USE_NEW_STATIC) {
+        if (vlc->table_size != vlc->table_allocated &&
+            !(flags & (INIT_VLC_STATIC_OVERLONG & ~INIT_VLC_USE_NEW_STATIC)))
+            av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated);
+        av_assert0(ret >= 0);
+    } else {
+        if (codes != localbuf)
+            av_free(codes);
+        if (ret < 0) {
+            av_freep(&vlc->table);
+            return ret;
+        }
+    }
+    return 0;
+}
+
+/* Build VLC decoding tables suitable for use with get_vlc().
+
+   'nb_bits' sets the decoding table size (2^nb_bits) entries. The
+   bigger it is, the faster is the decoding. But it should not be too
+   big to save memory and L1 cache. '9' is a good compromise.
+
+   'nb_codes' : number of vlcs codes
+
+   'bits' : table which gives the size (in bits) of each vlc code.
+
+   'codes' : table which gives the bit pattern of of each vlc code.
+
+   'symbols' : table which gives the values to be returned from get_vlc().
+
+   'xxx_wrap' : give the number of bytes between each entry of the
+   'bits' or 'codes' tables.
+
+   'xxx_size' : gives the number of bytes of each entry of the 'bits'
+   or 'codes' tables. Currently 1,2 and 4 are supported.
+
+   'wrap' and 'size' make it possible to use any memory configuration and types
+   (byte/word/long) to store the 'bits', 'codes', and 'symbols' tables.
+*/
+int ff_init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
+                       const void *bits, int bits_wrap, int bits_size,
+                       const void *codes, int codes_wrap, int codes_size,
+                       const void *symbols, int symbols_wrap, int symbols_size,
+                       int flags)
+{
+    VLCcode localbuf[LOCALBUF_ELEMS], *buf = localbuf;
+    int j, ret;
+
+    ret = vlc_common_init(vlc, nb_bits, nb_codes, &buf, flags);
+    if (ret < 0)
+        return ret;
+
+    av_assert0(symbols_size <= 2 || !symbols);
+    j = 0;
+#define COPY(condition)\
+    for (int i = 0; i < nb_codes; i++) {                                    \
+        unsigned len;                                                       \
+        GET_DATA(len, bits, i, bits_wrap, bits_size);                       \
+        if (!(condition))                                                   \
+            continue;                                                       \
+        if (len > 3*nb_bits || len > 32) {                                  \
+            av_log(NULL, AV_LOG_ERROR, "Too long VLC (%u) in init_vlc\n", len);\
+            if (buf != localbuf)                                            \
+                av_free(buf);                                               \
+            return AVERROR(EINVAL);                                         \
+        }                                                                   \
+        buf[j].bits = len;                                                  \
+        GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size);            \
+        if (buf[j].code >= (1LL<<buf[j].bits)) {                            \
+            av_log(NULL, AV_LOG_ERROR, "Invalid code %"PRIx32" for %d in "  \
+                   "init_vlc\n", buf[j].code, i);                           \
+            if (buf != localbuf)                                            \
+                av_free(buf);                                               \
+            return AVERROR(EINVAL);                                         \
+        }                                                                   \
+        if (flags & INIT_VLC_INPUT_LE)                                      \
+            buf[j].code = bitswap_32(buf[j].code);                          \
+        else                                                                \
+            buf[j].code <<= 32 - buf[j].bits;                               \
+        if (symbols)                                                        \
+            GET_DATA(buf[j].symbol, symbols, i, symbols_wrap, symbols_size) \
+        else                                                                \
+            buf[j].symbol = i;                                              \
+        j++;                                                                \
+    }
+    COPY(len > nb_bits);
+    // qsort is the slowest part of init_vlc, and could probably be improved or avoided
+    AV_QSORT(buf, j, struct VLCcode, compare_vlcspec);
+    COPY(len && len <= nb_bits);
+    nb_codes = j;
+
+    return vlc_common_end(vlc, nb_bits, nb_codes, buf,
+                          flags, localbuf);
+}
+
+int ff_init_vlc_from_lengths(VLC *vlc, int nb_bits, int nb_codes,
+                             const int8_t *lens, int lens_wrap,
+                             const void *symbols, int symbols_wrap, int symbols_size,
+                             int offset, int flags, void *logctx)
+{
+    VLCcode localbuf[LOCALBUF_ELEMS], *buf = localbuf;
+    uint64_t code;
+    int ret, j, len_max = FFMIN(32, 3 * nb_bits);
+
+    ret = vlc_common_init(vlc, nb_bits, nb_codes, &buf, flags);
+    if (ret < 0)
+        return ret;
+
+    j = code = 0;
+    for (int i = 0; i < nb_codes; i++, lens += lens_wrap) {
+        int len = *lens;
+        if (len > 0) {
+            unsigned sym;
+
+            buf[j].bits = len;
+            if (symbols)
+                GET_DATA(sym, symbols, i, symbols_wrap, symbols_size)
+            else
+                sym = i;
+            buf[j].symbol = sym + offset;
+            buf[j++].code = code;
+        } else if (len <  0) {
+            len = -len;
+        } else
+            continue;
+        if (len > len_max || code & ((1U << (32 - len)) - 1)) {
+            av_log(logctx, AV_LOG_ERROR, "Invalid VLC (length %u)\n", len);
+            goto fail;
+        }
+        code += 1U << (32 - len);
+        if (code > UINT32_MAX + 1ULL) {
+            av_log(logctx, AV_LOG_ERROR, "Overdetermined VLC tree\n");
+            goto fail;
+        }
+    }
+    return vlc_common_end(vlc, nb_bits, j, buf, flags, localbuf);
+fail:
+    if (buf != localbuf)
+        av_free(buf);
+    return AVERROR_INVALIDDATA;
+}
+
+void ff_free_vlc(VLC *vlc)
+{
+    av_freep(&vlc->table);
+}
diff --git a/media/ffvpx/libavcodec/vlc.h b/media/ffvpx/libavcodec/vlc.h
new file mode 100644
index 0000000000..e63c484755
--- /dev/null
+++ b/media/ffvpx/libavcodec/vlc.h
@@ -0,0 +1,144 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VLC_H
+#define AVCODEC_VLC_H
+
+#include <stdint.h>
+
+// When changing this, be sure to also update tableprint_vlc.h accordingly.
+typedef int16_t VLCBaseType;
+
+typedef struct VLCElem {
+    VLCBaseType sym, len;
+} VLCElem;
+
+typedef struct VLC {
+    int bits;
+    VLCElem *table;
+    int table_size, table_allocated;
+} VLC;
+
+typedef struct RL_VLC_ELEM {
+    int16_t level;
+    int8_t len;
+    uint8_t run;
+} RL_VLC_ELEM;
+
+#define init_vlc(vlc, nb_bits, nb_codes,                \
+                 bits, bits_wrap, bits_size,            \
+                 codes, codes_wrap, codes_size,         \
+                 flags)                                 \
+    ff_init_vlc_sparse(vlc, nb_bits, nb_codes,          \
+                       bits, bits_wrap, bits_size,      \
+                       codes, codes_wrap, codes_size,   \
+                       NULL, 0, 0, flags)
+
+int ff_init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
+                       const void *bits, int bits_wrap, int bits_size,
+                       const void *codes, int codes_wrap, int codes_size,
+                       const void *symbols, int symbols_wrap, int symbols_size,
+                       int flags);
+
+/**
+ * Build VLC decoding tables suitable for use with get_vlc2()
+ *
+ * This function takes lengths and symbols and calculates the codes from them.
+ * For this the input lengths and symbols have to be sorted according to "left
+ * nodes in the corresponding tree first".
+ *
+ * @param[in,out] vlc      The VLC to be initialized; table and table_allocated
+ *                         must have been set when initializing a static VLC,
+ *                         otherwise this will be treated as uninitialized.
+ * @param[in] nb_bits      The number of bits to use for the VLC table;
+ *                         higher values take up more memory and cache, but
+ *                         allow to read codes with fewer reads.
+ * @param[in] nb_codes     The number of provided length and (if supplied) symbol
+ *                         entries.
+ * @param[in] lens         The lengths of the codes. Entries > 0 correspond to
+ *                         valid codes; entries == 0 will be skipped and entries
+ *                         with len < 0 indicate that the tree is incomplete and
+ *                         has an open end of length -len at this position.
+ * @param[in] lens_wrap    Stride (in bytes) of the lengths.
+ * @param[in] symbols      The symbols, i.e. what is returned from get_vlc2()
+ *                         when the corresponding code is encountered.
+ *                         May be NULL, then 0, 1, 2, 3, 4,... will be used.
+ * @param[in] symbols_wrap Stride (in bytes) of the symbols.
+ * @param[in] symbols_size Size of the symbols. 1 and 2 are supported.
+ * @param[in] offset       An offset to apply to all the valid symbols.
+ * @param[in] flags        A combination of the INIT_VLC_* flags; notice that
+ *                         INIT_VLC_INPUT_LE is pointless and ignored.
+ */
+int ff_init_vlc_from_lengths(VLC *vlc, int nb_bits, int nb_codes,
+                             const int8_t *lens, int lens_wrap,
+                             const void *symbols, int symbols_wrap, int symbols_size,
+                             int offset, int flags, void *logctx);
+
+void ff_free_vlc(VLC *vlc);
+
+/* If INIT_VLC_INPUT_LE is set, the LSB bit of the codes used to
+ * initialize the VLC table is the first bit to be read. */
+#define INIT_VLC_INPUT_LE       2
+/* If set the VLC is intended for a little endian bitstream reader. */
+#define INIT_VLC_OUTPUT_LE      8
+#define INIT_VLC_LE             (INIT_VLC_INPUT_LE | INIT_VLC_OUTPUT_LE)
+#define INIT_VLC_USE_NEW_STATIC 4
+#define INIT_VLC_STATIC_OVERLONG (1 | INIT_VLC_USE_NEW_STATIC)
+
+#define INIT_CUSTOM_VLC_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g,      \
+                                      h, i, j, flags, static_size)         \
+    do {                                                                   \
+        static VLCElem table[static_size];                                 \
+        (vlc)->table           = table;                                    \
+        (vlc)->table_allocated = static_size;                              \
+        ff_init_vlc_sparse(vlc, bits, a, b, c, d, e, f, g, h, i, j,        \
+                           flags | INIT_VLC_USE_NEW_STATIC);               \
+    } while (0)
+
+#define INIT_VLC_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g, h, i, j, static_size) \
+    INIT_CUSTOM_VLC_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g,          \
+                                  h, i, j, 0, static_size)
+
+#define INIT_LE_VLC_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g, h, i, j, static_size) \
+    INIT_CUSTOM_VLC_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g,          \
+                                  h, i, j, INIT_VLC_LE, static_size)
+
+#define INIT_CUSTOM_VLC_STATIC(vlc, bits, a, b, c, d, e, f, g, flags, static_size) \
+    INIT_CUSTOM_VLC_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g,          \
+                                  NULL, 0, 0, flags, static_size)
+
+#define INIT_VLC_STATIC(vlc, bits, a, b, c, d, e, f, g, static_size)       \
+    INIT_VLC_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g, NULL, 0, 0, static_size)
+
+#define INIT_LE_VLC_STATIC(vlc, bits, a, b, c, d, e, f, g, static_size) \
+    INIT_LE_VLC_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g, NULL, 0, 0, static_size)
+
+#define INIT_VLC_STATIC_FROM_LENGTHS(vlc, bits, nb_codes, lens, len_wrap,  \
+                                     symbols, symbols_wrap, symbols_size,  \
+                                     offset, flags, static_size)           \
+    do {                                                                   \
+        static VLCElem table[static_size];                                 \
+        (vlc)->table           = table;                                    \
+        (vlc)->table_allocated = static_size;                              \
+        ff_init_vlc_from_lengths(vlc, bits, nb_codes, lens, len_wrap,      \
+                                 symbols, symbols_wrap, symbols_size,      \
+                                 offset, flags | INIT_VLC_USE_NEW_STATIC,  \
+                                 NULL);                                    \
+    } while (0)
+
+#endif /* AVCODEC_VLC_H */
diff --git a/media/ffvpx/libavcodec/vorbis_parser.c b/media/ffvpx/libavcodec/vorbis_parser.c
new file mode 100644
index 0000000000..a7d15d4ce9
--- /dev/null
+++ b/media/ffvpx/libavcodec/vorbis_parser.c
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2012 Justin Ruggles
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Vorbis audio parser
+ *
+ * Determines the duration for each packet.
+ */
+
+#include "config_components.h"
+
+#include "libavutil/log.h"
+
+#include "get_bits.h"
+#include "parser.h"
+#include "xiph.h"
+#include "vorbis_parser_internal.h"
+
+static const AVClass vorbis_parser_class = {
+    .class_name = "Vorbis parser",
+    .item_name  = av_default_item_name,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static int parse_id_header(AVVorbisParseContext *s,
+                           const uint8_t *buf, int buf_size)
+{
+    /* Id header should be 30 bytes */
+    if (buf_size < 30) {
+        av_log(s, AV_LOG_ERROR, "Id header is too short\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* make sure this is the Id header */
+    if (buf[0] != 1) {
+        av_log(s, AV_LOG_ERROR, "Wrong packet type in Id header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* check for header signature */
+    if (memcmp(&buf[1], "vorbis", 6)) {
+        av_log(s, AV_LOG_ERROR, "Invalid packet signature in Id header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (!(buf[29] & 0x1)) {
+        av_log(s, AV_LOG_ERROR, "Invalid framing bit in Id header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->blocksize[0] = 1 << (buf[28] & 0xF);
+    s->blocksize[1] = 1 << (buf[28] >>  4);
+
+    return 0;
+}
+
+static int parse_setup_header(AVVorbisParseContext *s,
+                              const uint8_t *buf, int buf_size)
+{
+    GetBitContext gb, gb0;
+    uint8_t *rev_buf;
+    int i, ret = 0;
+    int got_framing_bit, mode_count, got_mode_header, last_mode_count = 0;
+
+    /* avoid overread */
+    if (buf_size < 7) {
+        av_log(s, AV_LOG_ERROR, "Setup header is too short\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* make sure this is the Setup header */
+    if (buf[0] != 5) {
+        av_log(s, AV_LOG_ERROR, "Wrong packet type in Setup header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* check for header signature */
+    if (memcmp(&buf[1], "vorbis", 6)) {
+        av_log(s, AV_LOG_ERROR, "Invalid packet signature in Setup header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* reverse bytes so we can easily read backwards with get_bits() */
+    if (!(rev_buf = av_malloc(buf_size))) {
+        av_log(s, AV_LOG_ERROR, "Out of memory\n");
+        return AVERROR(ENOMEM);
+    }
+    for (i = 0; i < buf_size; i++)
+        rev_buf[i] = buf[buf_size - 1 - i];
+    init_get_bits(&gb, rev_buf, buf_size * 8);
+
+    got_framing_bit = 0;
+    while (get_bits_left(&gb) > 97) {
+        if (get_bits1(&gb)) {
+            got_framing_bit = get_bits_count(&gb);
+            break;
+        }
+    }
+    if (!got_framing_bit) {
+        av_log(s, AV_LOG_ERROR, "Invalid Setup header\n");
+        ret = AVERROR_INVALIDDATA;
+        goto bad_header;
+    }
+
+    /* Now we search backwards to find possible valid mode counts. This is not
+     * fool-proof because we could have false positive matches and read too
+     * far, but there isn't really any way to be sure without parsing through
+     * all the many variable-sized fields before the modes. This approach seems
+     * to work well in testing, and it is similar to how it is handled in
+     * liboggz. */
+    mode_count = 0;
+    got_mode_header = 0;
+    while (get_bits_left(&gb) >= 97) {
+        if (get_bits(&gb, 8) > 63 || get_bits(&gb, 16) || get_bits(&gb, 16))
+            break;
+        skip_bits(&gb, 1);
+        mode_count++;
+        if (mode_count > 64)
+            break;
+        gb0 = gb;
+        if (get_bits(&gb0, 6) + 1 == mode_count) {
+            got_mode_header = 1;
+            last_mode_count = mode_count;
+        }
+    }
+    if (!got_mode_header) {
+        av_log(s, AV_LOG_ERROR, "Invalid Setup header\n");
+        ret = AVERROR_INVALIDDATA;
+        goto bad_header;
+    }
+    /* All samples I've seen use <= 2 modes, so ask for a sample if we find
+     * more than that, as it is most likely a false positive. If we get any
+     * we may need to approach this the long way and parse the whole Setup
+     * header, but I hope very much that it never comes to that. */
+    if (last_mode_count > 2) {
+        avpriv_request_sample(s,
+                              "%d modes (either a false positive or a "
+                              "sample from an unknown encoder)",
+                              last_mode_count);
+    }
+    /* We're limiting the mode count to 63 so that we know that the previous
+     * block flag will be in the first packet byte. */
+    if (last_mode_count > 63) {
+        av_log(s, AV_LOG_ERROR, "Unsupported mode count: %d\n",
+               last_mode_count);
+        ret = AVERROR_INVALIDDATA;
+        goto bad_header;
+    }
+    s->mode_count = mode_count = last_mode_count;
+    /* Determine the number of bits required to code the mode and turn that
+     * into a bitmask to directly access the mode from the first frame byte. */
+    s->mode_mask = ((1 << (av_log2(mode_count - 1) + 1)) - 1) << 1;
+    /* The previous window flag is the next bit after the mode */
+    s->prev_mask = (s->mode_mask | 0x1) + 1;
+
+    init_get_bits(&gb, rev_buf, buf_size * 8);
+    skip_bits_long(&gb, got_framing_bit);
+    for (i = mode_count - 1; i >= 0; i--) {
+        skip_bits_long(&gb, 40);
+        s->mode_blocksize[i] = get_bits1(&gb);
+    }
+
+bad_header:
+    av_free(rev_buf);
+    return ret;
+}
+
+static int vorbis_parse_init(AVVorbisParseContext *s,
+                             const uint8_t *extradata, int extradata_size)
+{
+    const uint8_t *header_start[3];
+    int header_len[3];
+    int ret;
+
+    s->class = &vorbis_parser_class;
+    s->extradata_parsed = 1;
+
+    if ((ret = avpriv_split_xiph_headers(extradata,
+                                         extradata_size, 30,
+                                         header_start, header_len)) < 0) {
+        av_log(s, AV_LOG_ERROR, "Extradata corrupt.\n");
+        return ret;
+    }
+
+    if ((ret = parse_id_header(s, header_start[0], header_len[0])) < 0)
+        return ret;
+
+    if ((ret = parse_setup_header(s, header_start[2], header_len[2])) < 0)
+        return ret;
+
+    s->valid_extradata = 1;
+    s->previous_blocksize = s->blocksize[s->mode_blocksize[0]];
+
+    return 0;
+}
+
+int av_vorbis_parse_frame_flags(AVVorbisParseContext *s, const uint8_t *buf,
+                                int buf_size, int *flags)
+{
+    int duration = 0;
+
+    if (s->valid_extradata && buf_size > 0) {
+        int mode, current_blocksize;
+        int previous_blocksize = s->previous_blocksize;
+
+        if (buf[0] & 1) {
+            /* If the user doesn't care about special packets, it's a bad one. */
+            if (!flags)
+                goto bad_packet;
+
+            /* Set the flag for which kind of special packet it is. */
+            if (buf[0] == 1)
+                *flags |= VORBIS_FLAG_HEADER;
+            else if (buf[0] == 3)
+                *flags |= VORBIS_FLAG_COMMENT;
+            else if (buf[0] == 5)
+                *flags |= VORBIS_FLAG_SETUP;
+            else
+                goto bad_packet;
+
+            /* Special packets have no duration. */
+            return 0;
+
+bad_packet:
+            av_log(s, AV_LOG_ERROR, "Invalid packet\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (s->mode_count == 1)
+            mode = 0;
+        else
+            mode = (buf[0] & s->mode_mask) >> 1;
+        if (mode >= s->mode_count) {
+            av_log(s, AV_LOG_ERROR, "Invalid mode in packet\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if(s->mode_blocksize[mode]){
+            int flag = !!(buf[0] & s->prev_mask);
+            previous_blocksize = s->blocksize[flag];
+        }
+        current_blocksize     = s->blocksize[s->mode_blocksize[mode]];
+        duration              = (previous_blocksize + current_blocksize) >> 2;
+        s->previous_blocksize = current_blocksize;
+    }
+
+    return duration;
+}
+
+int av_vorbis_parse_frame(AVVorbisParseContext *s, const uint8_t *buf,
+                          int buf_size)
+{
+    return av_vorbis_parse_frame_flags(s, buf, buf_size, NULL);
+}
+
+void av_vorbis_parse_reset(AVVorbisParseContext *s)
+{
+    if (s->valid_extradata)
+        s->previous_blocksize = s->blocksize[0];
+}
+
+void av_vorbis_parse_free(AVVorbisParseContext **s)
+{
+    av_freep(s);
+}
+
+AVVorbisParseContext *av_vorbis_parse_init(const uint8_t *extradata,
+                                           int extradata_size)
+{
+    AVVorbisParseContext *s = av_mallocz(sizeof(*s));
+    int ret;
+
+    if (!s)
+        return NULL;
+
+    ret = vorbis_parse_init(s, extradata, extradata_size);
+    if (ret < 0) {
+        av_vorbis_parse_free(&s);
+        return NULL;
+    }
+
+    return s;
+}
+
+#if CONFIG_VORBIS_PARSER
+
+typedef struct VorbisParseContext {
+    AVVorbisParseContext *vp;
+} VorbisParseContext;
+
+static int vorbis_parse(AVCodecParserContext *s1, AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    VorbisParseContext *s = s1->priv_data;
+    int duration;
+
+    if (!s->vp && avctx->extradata && avctx->extradata_size) {
+        s->vp = av_vorbis_parse_init(avctx->extradata, avctx->extradata_size);
+    }
+    if (!s->vp)
+        goto end;
+
+    if ((duration = av_vorbis_parse_frame(s->vp, buf, buf_size)) >= 0)
+        s1->duration = duration;
+
+end:
+    /* always return the full packet. this parser isn't doing any splitting or
+       combining, only packet analysis */
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+    return buf_size;
+}
+
+static void vorbis_parser_close(AVCodecParserContext *ctx)
+{
+    VorbisParseContext *s = ctx->priv_data;
+    av_vorbis_parse_free(&s->vp);
+}
+
+const AVCodecParser ff_vorbis_parser = {
+    .codec_ids      = { AV_CODEC_ID_VORBIS },
+    .priv_data_size = sizeof(VorbisParseContext),
+    .parser_parse   = vorbis_parse,
+    .parser_close   = vorbis_parser_close,
+};
+#endif /* CONFIG_VORBIS_PARSER */
diff --git a/media/ffvpx/libavcodec/vorbis_parser.h b/media/ffvpx/libavcodec/vorbis_parser.h
new file mode 100644
index 0000000000..789932ac49
--- /dev/null
+++ b/media/ffvpx/libavcodec/vorbis_parser.h
@@ -0,0 +1,74 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * A public API for Vorbis parsing
+ *
+ * Determines the duration for each packet.
+ */
+
+#ifndef AVCODEC_VORBIS_PARSER_H
+#define AVCODEC_VORBIS_PARSER_H
+
+#include <stdint.h>
+
+typedef struct AVVorbisParseContext AVVorbisParseContext;
+
+/**
+ * Allocate and initialize the Vorbis parser using headers in the extradata.
+ */
+AVVorbisParseContext *av_vorbis_parse_init(const uint8_t *extradata,
+                                           int extradata_size);
+
+/**
+ * Free the parser and everything associated with it.
+ */
+void av_vorbis_parse_free(AVVorbisParseContext **s);
+
+#define VORBIS_FLAG_HEADER  0x00000001
+#define VORBIS_FLAG_COMMENT 0x00000002
+#define VORBIS_FLAG_SETUP   0x00000004
+
+/**
+ * Get the duration for a Vorbis packet.
+ *
+ * If @p flags is @c NULL,
+ * special frames are considered invalid.
+ *
+ * @param s        Vorbis parser context
+ * @param buf      buffer containing a Vorbis frame
+ * @param buf_size size of the buffer
+ * @param flags    flags for special frames
+ */
+int av_vorbis_parse_frame_flags(AVVorbisParseContext *s, const uint8_t *buf,
+                                int buf_size, int *flags);
+
+/**
+ * Get the duration for a Vorbis packet.
+ *
+ * @param s        Vorbis parser context
+ * @param buf      buffer containing a Vorbis frame
+ * @param buf_size size of the buffer
+ */
+int av_vorbis_parse_frame(AVVorbisParseContext *s, const uint8_t *buf,
+                          int buf_size);
+
+void av_vorbis_parse_reset(AVVorbisParseContext *s);
+
+#endif /* AVCODEC_VORBIS_PARSER_H */
diff --git a/media/ffvpx/libavcodec/vorbis_parser_internal.h b/media/ffvpx/libavcodec/vorbis_parser_internal.h
new file mode 100644
index 0000000000..691a842385
--- /dev/null
+++ b/media/ffvpx/libavcodec/vorbis_parser_internal.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2012 Justin Ruggles
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Vorbis audio parser
+ *
+ * Determines the duration for each packet.
+ */
+
+#ifndef AVCODEC_VORBIS_PARSER_INTERNAL_H
+#define AVCODEC_VORBIS_PARSER_INTERNAL_H
+
+#include "avcodec.h"
+#include "vorbis_parser.h"
+
+struct AVVorbisParseContext {
+    const AVClass *class;
+    int extradata_parsed;       ///< we have attempted to parse extradata
+    int valid_extradata;        ///< extradata is valid, so we can calculate duration
+    int blocksize[2];           ///< short and long window sizes
+    int previous_blocksize;     ///< previous window size
+    int mode_blocksize[64];     ///< window size mapping for each mode
+    int mode_count;             ///< number of modes
+    int mode_mask;              ///< bitmask used to get the mode in each packet
+    int prev_mask;              ///< bitmask used to get the previous mode flag in each packet
+};
+
+#endif /* AVCODEC_VORBIS_PARSER_INTERNAL_H */
diff --git a/media/ffvpx/libavcodec/vp3dsp.h b/media/ffvpx/libavcodec/vp3dsp.h
new file mode 100644
index 0000000000..3b849ec05d
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp3dsp.h
@@ -0,0 +1,64 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP3DSP_H
+#define AVCODEC_VP3DSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct VP3DSPContext {
+    /**
+     * Copy 8xH pixels from source to destination buffer using a bilinear
+     * filter with no rounding (i.e. *dst = (*a + *b) >> 1).
+     *
+     * @param dst destination buffer, aligned by 8
+     * @param a first source buffer, no alignment
+     * @param b second source buffer, no alignment
+     * @param stride distance between two lines in source/dest buffers
+     * @param h height
+     */
+    void (*put_no_rnd_pixels_l2)(uint8_t *dst,
+                                 const uint8_t *a,
+                                 const uint8_t *b,
+                                 ptrdiff_t stride, int h);
+
+    void (*idct_put)(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+    void (*idct_add)(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+    void (*idct_dc_add)(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+    void (*v_loop_filter)(uint8_t *src, ptrdiff_t stride, int *bounding_values);
+    void (*h_loop_filter)(uint8_t *src, ptrdiff_t stride, int *bounding_values);
+    void (*v_loop_filter_unaligned)(uint8_t *src, ptrdiff_t stride, int *bounding_values);
+    void (*h_loop_filter_unaligned)(uint8_t *src, ptrdiff_t stride, int *bounding_values);
+} VP3DSPContext;
+
+void ff_vp3dsp_v_loop_filter_12(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values);
+void ff_vp3dsp_h_loop_filter_12(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values);
+
+void ff_vp3dsp_idct10_put(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+void ff_vp3dsp_idct10_add(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+
+void ff_vp3dsp_init(VP3DSPContext *c, int flags);
+void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags);
+void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags);
+void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags);
+void ff_vp3dsp_init_mips(VP3DSPContext *c, int flags);
+
+void ff_vp3dsp_set_bounding_values(int * bound_values_array, int filter_limit);
+
+#endif /* AVCODEC_VP3DSP_H */
diff --git a/media/ffvpx/libavcodec/vp56.h b/media/ffvpx/libavcodec/vp56.h
new file mode 100644
index 0000000000..9dc0b9c7ad
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp56.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * VP5 and VP6 compatible video decoder (common features)
+ */
+
+#ifndef AVCODEC_VP56_H
+#define AVCODEC_VP56_H
+
+#include "libavutil/mem_internal.h"
+
+#include "avcodec.h"
+#include "get_bits.h"
+#include "hpeldsp.h"
+#include "h264chroma.h"
+#include "videodsp.h"
+#include "vp3dsp.h"
+#include "vp56dsp.h"
+#include "vpx_rac.h"
+
+typedef struct vp56_context VP56Context;
+
+typedef enum {
+    VP56_FRAME_NONE     =-1,
+    VP56_FRAME_CURRENT  = 0,
+    VP56_FRAME_PREVIOUS = 1,
+    VP56_FRAME_GOLDEN   = 2,
+} VP56Frame;
+
+typedef enum {
+    VP56_MB_INTER_NOVEC_PF = 0,  /**< Inter MB, no vector, from previous frame */
+    VP56_MB_INTRA          = 1,  /**< Intra MB */
+    VP56_MB_INTER_DELTA_PF = 2,  /**< Inter MB, above/left vector + delta, from previous frame */
+    VP56_MB_INTER_V1_PF    = 3,  /**< Inter MB, first vector, from previous frame */
+    VP56_MB_INTER_V2_PF    = 4,  /**< Inter MB, second vector, from previous frame */
+    VP56_MB_INTER_NOVEC_GF = 5,  /**< Inter MB, no vector, from golden frame */
+    VP56_MB_INTER_DELTA_GF = 6,  /**< Inter MB, above/left vector + delta, from golden frame */
+    VP56_MB_INTER_4V       = 7,  /**< Inter MB, 4 vectors, from previous frame */
+    VP56_MB_INTER_V1_GF    = 8,  /**< Inter MB, first vector, from golden frame */
+    VP56_MB_INTER_V2_GF    = 9,  /**< Inter MB, second vector, from golden frame */
+} VP56mb;
+
+typedef struct VP56Tree {
+  int8_t val;
+  int8_t prob_idx;
+} VP56Tree;
+
+typedef struct VP56mv {
+    DECLARE_ALIGNED(4, int16_t, x);
+    int16_t y;
+} VP56mv;
+
+#define VP56_SIZE_CHANGE 1
+
+typedef void (*VP56ParseVectorAdjustment)(VP56Context *s,
+                                          VP56mv *vect);
+typedef void (*VP56Filter)(VP56Context *s, uint8_t *dst, uint8_t *src,
+                           int offset1, int offset2, ptrdiff_t stride,
+                           VP56mv mv, int mask, int select, int luma);
+typedef int  (*VP56ParseCoeff)(VP56Context *s);
+typedef void (*VP56DefaultModelsInit)(VP56Context *s);
+typedef void (*VP56ParseVectorModels)(VP56Context *s);
+typedef int  (*VP56ParseCoeffModels)(VP56Context *s);
+typedef int  (*VP56ParseHeader)(VP56Context *s, const uint8_t *buf,
+                                int buf_size);
+
+typedef struct VP56RefDc {
+    uint8_t not_null_dc;
+    VP56Frame ref_frame;
+    int16_t dc_coeff;
+} VP56RefDc;
+
+typedef struct VP56Macroblock {
+    uint8_t type;
+    VP56mv mv;
+} VP56Macroblock;
+
+typedef struct VP56Model {
+    uint8_t coeff_reorder[64];       /* used in vp6 only */
+    uint8_t coeff_index_to_pos[64];  /* used in vp6 only */
+    uint8_t coeff_index_to_idct_selector[64]; /* used in vp6 only */
+    uint8_t vector_sig[2];           /* delta sign */
+    uint8_t vector_dct[2];           /* delta coding types */
+    uint8_t vector_pdi[2][2];        /* predefined delta init */
+    uint8_t vector_pdv[2][7];        /* predefined delta values */
+    uint8_t vector_fdv[2][8];        /* 8 bit delta value definition */
+    uint8_t coeff_dccv[2][11];       /* DC coeff value */
+    uint8_t coeff_ract[2][3][6][11]; /* Run/AC coding type and AC coeff value */
+    uint8_t coeff_acct[2][3][3][6][5];/* vp5 only AC coding type for coding group < 3 */
+    uint8_t coeff_dcct[2][36][5];    /* DC coeff coding type */
+    uint8_t coeff_runv[2][14];       /* run value (vp6 only) */
+    uint8_t mb_type[3][10][10];      /* model for decoding MB type */
+    uint8_t mb_types_stats[3][10][2];/* contextual, next MB type stats */
+} VP56Model;
+
+struct vp56_context {
+    AVCodecContext *avctx;
+    H264ChromaContext h264chroma;
+    HpelDSPContext hdsp;
+    VideoDSPContext vdsp;
+    VP3DSPContext vp3dsp;
+    VP56DSPContext vp56dsp;
+    uint8_t idct_scantable[64];
+    AVFrame *frames[4];
+    uint8_t *edge_emu_buffer_alloc;
+    uint8_t *edge_emu_buffer;
+    VPXRangeCoder c;
+    VPXRangeCoder cc;
+    VPXRangeCoder *ccp;
+    int sub_version;
+
+    /* frame info */
+    int golden_frame;
+    int plane_width[4];
+    int plane_height[4];
+    int mb_width;   /* number of horizontal MB */
+    int mb_height;  /* number of vertical MB */
+    int block_offset[6];
+
+    int quantizer;
+    uint16_t dequant_dc;
+    uint16_t dequant_ac;
+
+    /* DC predictors management */
+    VP56RefDc *above_blocks;
+    VP56RefDc left_block[4];
+    int above_block_idx[6];
+    int16_t prev_dc[3][3];    /* [plan][ref_frame] */
+
+    /* blocks / macroblock */
+    VP56mb mb_type;
+    VP56Macroblock *macroblocks;
+    DECLARE_ALIGNED(16, int16_t, block_coeff)[6][64];
+    int idct_selector[6];
+
+    /* motion vectors */
+    VP56mv mv[6];  /* vectors for each block in MB */
+    VP56mv vector_candidate[2];
+    int vector_candidate_pos;
+
+    /* filtering hints */
+    int filter_header;               /* used in vp6 only */
+    int deblock_filtering;
+    int filter_selection;
+    int filter_mode;
+    int max_vector_length;
+    int sample_variance_threshold;
+    DECLARE_ALIGNED(8, int, bounding_values_array)[256];
+
+    uint8_t coeff_ctx[4][64];              /* used in vp5 only */
+    uint8_t coeff_ctx_last[4];             /* used in vp5 only */
+
+    int has_alpha;
+
+    /* upside-down flipping hints */
+    int flip;  /* are we flipping ? */
+    int frbi;  /* first row block index in MB */
+    int srbi;  /* second row block index in MB */
+    ptrdiff_t stride[4];  /* stride for each plan */
+
+    const uint8_t *vp56_coord_div;
+    VP56ParseVectorAdjustment parse_vector_adjustment;
+    VP56Filter filter;
+    VP56ParseCoeff parse_coeff;
+    VP56DefaultModelsInit default_models_init;
+    VP56ParseVectorModels parse_vector_models;
+    VP56ParseCoeffModels parse_coeff_models;
+    VP56ParseHeader parse_header;
+
+    /* for "slice" parallelism between YUV and A */
+    VP56Context *alpha_context;
+
+    VP56Model *modelp;
+    VP56Model model;
+
+    /* huffman decoding */
+    int use_huffman;
+    GetBitContext gb;
+    VLC dccv_vlc[2];
+    VLC runv_vlc[2];
+    VLC ract_vlc[2][3][6];
+    unsigned int nb_null[2][2];       /* number of consecutive NULL DC/AC */
+
+    int have_undamaged_frame;
+    int discard_frame;
+};
+
+
+/**
+ * Initializes an VP56Context. Expects its caller to clean up
+ * in case of error.
+ */
+int ff_vp56_init_context(AVCodecContext *avctx, VP56Context *s,
+                          int flip, int has_alpha);
+int ff_vp56_free_context(VP56Context *s);
+void ff_vp56_init_dequant(VP56Context *s, int quantizer);
+int ff_vp56_decode_frame(AVCodecContext *avctx, AVFrame *frame,
+                         int *got_frame, AVPacket *avpkt);
+
+
+/**
+ * vp56 specific range coder implementation
+ */
+
+static int vp56_rac_gets(VPXRangeCoder *c, int bits)
+{
+    int value = 0;
+
+    while (bits--) {
+        value = (value << 1) | vpx_rac_get(c);
+    }
+
+    return value;
+}
+
+// P(7)
+static av_unused int vp56_rac_gets_nn(VPXRangeCoder *c, int bits)
+{
+    int v = vp56_rac_gets(c, 7) << 1;
+    return v + !v;
+}
+
+static av_always_inline
+int vp56_rac_get_tree(VPXRangeCoder *c,
+                      const VP56Tree *tree,
+                      const uint8_t *probs)
+{
+    while (tree->val > 0) {
+        if (vpx_rac_get_prob_branchy(c, probs[tree->prob_idx]))
+            tree += tree->val;
+        else
+            tree++;
+    }
+    return -tree->val;
+}
+
+#endif /* AVCODEC_VP56_H */
diff --git a/media/ffvpx/libavcodec/vp56dsp.h b/media/ffvpx/libavcodec/vp56dsp.h
new file mode 100644
index 0000000000..e35e232ea3
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp56dsp.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP56DSP_H
+#define AVCODEC_VP56DSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct VP56DSPContext {
+    void (*edge_filter_hor)(uint8_t *yuv, ptrdiff_t stride, int t);
+    void (*edge_filter_ver)(uint8_t *yuv, ptrdiff_t stride, int t);
+
+    void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                             const int16_t *h_weights,const int16_t *v_weights);
+} VP56DSPContext;
+
+void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                           const int16_t *h_weights, const int16_t *v_weights);
+
+void ff_vp5dsp_init(VP56DSPContext *s);
+void ff_vp6dsp_init(VP56DSPContext *s);
+
+void ff_vp6dsp_init_arm(VP56DSPContext *s);
+void ff_vp6dsp_init_x86(VP56DSPContext *s);
+
+#endif /* AVCODEC_VP56DSP_H */
diff --git a/media/ffvpx/libavcodec/vp8.c b/media/ffvpx/libavcodec/vp8.c
new file mode 100644
index 0000000000..db2419deaf
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp8.c
@@ -0,0 +1,3014 @@
+/*
+ * VP7/VP8 compatible video decoder
+ *
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ * Copyright (C) 2010 Fiona Glaser
+ * Copyright (C) 2012 Daniel Kang
+ * Copyright (C) 2014 Peter Ross
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config_components.h"
+
+#include "libavutil/mem_internal.h"
+
+#include "avcodec.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "hwconfig.h"
+#include "mathops.h"
+#include "thread.h"
+#include "threadframe.h"
+#include "vp8.h"
+#include "vp89_rac.h"
+#include "vp8data.h"
+#include "vpx_rac.h"
+
+#if ARCH_ARM
+#   include "arm/vp8.h"
+#endif
+
+// fixme: add 1 bit to all the calls to this?
+static int vp8_rac_get_sint(VPXRangeCoder *c, int bits)
+{
+    int v;
+
+    if (!vp89_rac_get(c))
+        return 0;
+
+    v = vp89_rac_get_uint(c, bits);
+
+    if (vp89_rac_get(c))
+        v = -v;
+
+    return v;
+}
+
+static int vp8_rac_get_nn(VPXRangeCoder *c)
+{
+    int v = vp89_rac_get_uint(c, 7) << 1;
+    return v + !v;
+}
+
+// DCTextra
+static int vp8_rac_get_coeff(VPXRangeCoder *c, const uint8_t *prob)
+{
+    int v = 0;
+
+    do {
+        v = (v<<1) + vpx_rac_get_prob(c, *prob++);
+    } while (*prob);
+
+    return v;
+}
+
+static void free_buffers(VP8Context *s)
+{
+    int i;
+    if (s->thread_data)
+        for (i = 0; i < MAX_THREADS; i++) {
+#if HAVE_THREADS
+            pthread_cond_destroy(&s->thread_data[i].cond);
+            pthread_mutex_destroy(&s->thread_data[i].lock);
+#endif
+            av_freep(&s->thread_data[i].filter_strength);
+        }
+    av_freep(&s->thread_data);
+    av_freep(&s->macroblocks_base);
+    av_freep(&s->intra4x4_pred_mode_top);
+    av_freep(&s->top_nnz);
+    av_freep(&s->top_border);
+
+    s->macroblocks = NULL;
+}
+
+static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
+{
+    int ret;
+    if ((ret = ff_thread_get_ext_buffer(s->avctx, &f->tf,
+                                        ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
+        return ret;
+    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
+        goto fail;
+    if (s->avctx->hwaccel) {
+        const AVHWAccel *hwaccel = s->avctx->hwaccel;
+        if (hwaccel->frame_priv_data_size) {
+            f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+            if (!f->hwaccel_priv_buf)
+                goto fail;
+            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
+        }
+    }
+    return 0;
+
+fail:
+    av_buffer_unref(&f->seg_map);
+    ff_thread_release_ext_buffer(s->avctx, &f->tf);
+    return AVERROR(ENOMEM);
+}
+
+static void vp8_release_frame(VP8Context *s, VP8Frame *f)
+{
+    av_buffer_unref(&f->seg_map);
+    av_buffer_unref(&f->hwaccel_priv_buf);
+    f->hwaccel_picture_private = NULL;
+    ff_thread_release_ext_buffer(s->avctx, &f->tf);
+}
+
+#if CONFIG_VP8_DECODER
+static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, const VP8Frame *src)
+{
+    int ret;
+
+    vp8_release_frame(s, dst);
+
+    if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
+        return ret;
+    if (src->seg_map &&
+        !(dst->seg_map = av_buffer_ref(src->seg_map))) {
+        vp8_release_frame(s, dst);
+        return AVERROR(ENOMEM);
+    }
+    if (src->hwaccel_picture_private) {
+        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
+        if (!dst->hwaccel_priv_buf)
+            return AVERROR(ENOMEM);
+        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
+    }
+
+    return 0;
+}
+#endif /* CONFIG_VP8_DECODER */
+
+static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
+{
+    VP8Context *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
+        vp8_release_frame(s, &s->frames[i]);
+    memset(s->framep, 0, sizeof(s->framep));
+
+    if (free_mem)
+        free_buffers(s);
+}
+
+static void vp8_decode_flush(AVCodecContext *avctx)
+{
+    vp8_decode_flush_impl(avctx, 0);
+}
+
+static VP8Frame *vp8_find_free_buffer(VP8Context *s)
+{
+    VP8Frame *frame = NULL;
+    int i;
+
+    // find a free buffer
+    for (i = 0; i < 5; i++)
+        if (&s->frames[i] != s->framep[VP8_FRAME_CURRENT]  &&
+            &s->frames[i] != s->framep[VP8_FRAME_PREVIOUS] &&
+            &s->frames[i] != s->framep[VP8_FRAME_GOLDEN]   &&
+            &s->frames[i] != s->framep[VP8_FRAME_ALTREF]) {
+            frame = &s->frames[i];
+            break;
+        }
+    if (i == 5) {
+        av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
+        abort();
+    }
+    if (frame->tf.f->buf[0])
+        vp8_release_frame(s, frame);
+
+    return frame;
+}
+
+static enum AVPixelFormat get_pixel_format(VP8Context *s)
+{
+    enum AVPixelFormat pix_fmts[] = {
+#if CONFIG_VP8_VAAPI_HWACCEL
+        AV_PIX_FMT_VAAPI,
+#endif
+#if CONFIG_VP8_NVDEC_HWACCEL
+        AV_PIX_FMT_CUDA,
+#endif
+        AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_NONE,
+    };
+
+    return ff_get_format(s->avctx, pix_fmts);
+}
+
+static av_always_inline
+int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
+{
+    AVCodecContext *avctx = s->avctx;
+    int i, ret, dim_reset = 0;
+
+    if (width  != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
+        height != s->avctx->height) {
+        vp8_decode_flush_impl(s->avctx, 1);
+
+        ret = ff_set_dimensions(s->avctx, width, height);
+        if (ret < 0)
+            return ret;
+
+        dim_reset = (s->macroblocks_base != NULL);
+    }
+
+    if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) &&
+         !s->actually_webp && !is_vp7) {
+        s->pix_fmt = get_pixel_format(s);
+        if (s->pix_fmt < 0)
+            return AVERROR(EINVAL);
+        avctx->pix_fmt = s->pix_fmt;
+    }
+
+    s->mb_width  = (s->avctx->coded_width  + 15) / 16;
+    s->mb_height = (s->avctx->coded_height + 15) / 16;
+
+    s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
+                   avctx->thread_count > 1;
+    if (!s->mb_layout) { // Frame threading and one thread
+        s->macroblocks_base       = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
+                                               sizeof(*s->macroblocks));
+        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
+    } else // Sliced threading
+        s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
+                                         sizeof(*s->macroblocks));
+    s->top_nnz     = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
+    s->top_border  = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
+    s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
+
+    if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
+        !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
+        free_buffers(s);
+        return AVERROR(ENOMEM);
+    }
+
+    for (i = 0; i < MAX_THREADS; i++) {
+        s->thread_data[i].filter_strength =
+            av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
+        if (!s->thread_data[i].filter_strength) {
+            free_buffers(s);
+            return AVERROR(ENOMEM);
+        }
+#if HAVE_THREADS
+        pthread_mutex_init(&s->thread_data[i].lock, NULL);
+        pthread_cond_init(&s->thread_data[i].cond, NULL);
+#endif
+    }
+
+    s->macroblocks = s->macroblocks_base + 1;
+
+    return 0;
+}
+
+static int vp7_update_dimensions(VP8Context *s, int width, int height)
+{
+    return update_dimensions(s, width, height, IS_VP7);
+}
+
+static int vp8_update_dimensions(VP8Context *s, int width, int height)
+{
+    return update_dimensions(s, width, height, IS_VP8);
+}
+
+
+static void parse_segment_info(VP8Context *s)
+{
+    VPXRangeCoder *c = &s->c;
+    int i;
+
+    s->segmentation.update_map = vp89_rac_get(c);
+    s->segmentation.update_feature_data = vp89_rac_get(c);
+
+    if (s->segmentation.update_feature_data) {
+        s->segmentation.absolute_vals = vp89_rac_get(c);
+
+        for (i = 0; i < 4; i++)
+            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
+
+        for (i = 0; i < 4; i++)
+            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
+    }
+    if (s->segmentation.update_map)
+        for (i = 0; i < 3; i++)
+            s->prob->segmentid[i] = vp89_rac_get(c) ? vp89_rac_get_uint(c, 8) : 255;
+}
+
+static void update_lf_deltas(VP8Context *s)
+{
+    VPXRangeCoder *c = &s->c;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        if (vp89_rac_get(c)) {
+            s->lf_delta.ref[i] = vp89_rac_get_uint(c, 6);
+
+            if (vp89_rac_get(c))
+                s->lf_delta.ref[i] = -s->lf_delta.ref[i];
+        }
+    }
+
+    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
+        if (vp89_rac_get(c)) {
+            s->lf_delta.mode[i] = vp89_rac_get_uint(c, 6);
+
+            if (vp89_rac_get(c))
+                s->lf_delta.mode[i] = -s->lf_delta.mode[i];
+        }
+    }
+}
+
+static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
+{
+    const uint8_t *sizes = buf;
+    int i;
+    int ret;
+
+    s->num_coeff_partitions = 1 << vp89_rac_get_uint(&s->c, 2);
+
+    buf      += 3 * (s->num_coeff_partitions - 1);
+    buf_size -= 3 * (s->num_coeff_partitions - 1);
+    if (buf_size < 0)
+        return -1;
+
+    for (i = 0; i < s->num_coeff_partitions - 1; i++) {
+        int size = AV_RL24(sizes + 3 * i);
+        if (buf_size - size < 0)
+            return -1;
+        s->coeff_partition_size[i] = size;
+
+        ret = ff_vpx_init_range_decoder(&s->coeff_partition[i], buf, size);
+        if (ret < 0)
+            return ret;
+        buf      += size;
+        buf_size -= size;
+    }
+
+    s->coeff_partition_size[i] = buf_size;
+    ff_vpx_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
+
+    return 0;
+}
+
+static void vp7_get_quants(VP8Context *s)
+{
+    VPXRangeCoder *c = &s->c;
+
+    int yac_qi  = vp89_rac_get_uint(c, 7);
+    int ydc_qi  = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
+    int y2dc_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
+    int y2ac_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
+    int uvdc_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
+    int uvac_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
+
+    s->qmat[0].luma_qmul[0]    =       vp7_ydc_qlookup[ydc_qi];
+    s->qmat[0].luma_qmul[1]    =       vp7_yac_qlookup[yac_qi];
+    s->qmat[0].luma_dc_qmul[0] =       vp7_y2dc_qlookup[y2dc_qi];
+    s->qmat[0].luma_dc_qmul[1] =       vp7_y2ac_qlookup[y2ac_qi];
+    s->qmat[0].chroma_qmul[0]  = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
+    s->qmat[0].chroma_qmul[1]  =       vp7_yac_qlookup[uvac_qi];
+}
+
+static void vp8_get_quants(VP8Context *s)
+{
+    VPXRangeCoder *c = &s->c;
+    int i, base_qi;
+
+    s->quant.yac_qi     = vp89_rac_get_uint(c, 7);
+    s->quant.ydc_delta  = vp8_rac_get_sint(c, 4);
+    s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
+    s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
+    s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
+    s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
+
+    for (i = 0; i < 4; i++) {
+        if (s->segmentation.enabled) {
+            base_qi = s->segmentation.base_quant[i];
+            if (!s->segmentation.absolute_vals)
+                base_qi += s->quant.yac_qi;
+        } else
+            base_qi = s->quant.yac_qi;
+
+        s->qmat[i].luma_qmul[0]    = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta,  7)];
+        s->qmat[i].luma_qmul[1]    = vp8_ac_qlookup[av_clip_uintp2(base_qi,              7)];
+        s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
+        /* 101581>>16 is equivalent to 155/100 */
+        s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
+        s->qmat[i].chroma_qmul[0]  = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
+        s->qmat[i].chroma_qmul[1]  = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
+
+        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
+        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
+    }
+}
+
+/**
+ * Determine which buffers golden and altref should be updated with after this frame.
+ * The spec isn't clear here, so I'm going by my understanding of what libvpx does
+ *
+ * Intra frames update all 3 references
+ * Inter frames update VP8_FRAME_PREVIOUS if the update_last flag is set
+ * If the update (golden|altref) flag is set, it's updated with the current frame
+ *      if update_last is set, and VP8_FRAME_PREVIOUS otherwise.
+ * If the flag is not set, the number read means:
+ *      0: no update
+ *      1: VP8_FRAME_PREVIOUS
+ *      2: update golden with altref, or update altref with golden
+ */
+static VP8FrameType ref_to_update(VP8Context *s, int update, VP8FrameType ref)
+{
+    VPXRangeCoder *c = &s->c;
+
+    if (update)
+        return VP8_FRAME_CURRENT;
+
+    switch (vp89_rac_get_uint(c, 2)) {
+    case 1:
+        return VP8_FRAME_PREVIOUS;
+    case 2:
+        return (ref == VP8_FRAME_GOLDEN) ? VP8_FRAME_ALTREF : VP8_FRAME_GOLDEN;
+    }
+    return VP8_FRAME_NONE;
+}
+
+static void vp78_reset_probability_tables(VP8Context *s)
+{
+    int i, j;
+    for (i = 0; i < 4; i++)
+        for (j = 0; j < 16; j++)
+            memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
+                   sizeof(s->prob->token[i][j]));
+}
+
+static void vp78_update_probability_tables(VP8Context *s)
+{
+    VPXRangeCoder *c = &s->c;
+    int i, j, k, l, m;
+
+    for (i = 0; i < 4; i++)
+        for (j = 0; j < 8; j++)
+            for (k = 0; k < 3; k++)
+                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
+                    if (vpx_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
+                        int prob = vp89_rac_get_uint(c, 8);
+                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
+                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
+                    }
+}
+
+#define VP7_MVC_SIZE 17
+#define VP8_MVC_SIZE 19
+
+static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
+                                                            int mvc_size)
+{
+    VPXRangeCoder *c = &s->c;
+    int i, j;
+
+    if (vp89_rac_get(c))
+        for (i = 0; i < 4; i++)
+            s->prob->pred16x16[i] = vp89_rac_get_uint(c, 8);
+    if (vp89_rac_get(c))
+        for (i = 0; i < 3; i++)
+            s->prob->pred8x8c[i]  = vp89_rac_get_uint(c, 8);
+
+    // 17.2 MV probability update
+    for (i = 0; i < 2; i++)
+        for (j = 0; j < mvc_size; j++)
+            if (vpx_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
+                s->prob->mvc[i][j] = vp8_rac_get_nn(c);
+}
+
+static void update_refs(VP8Context *s)
+{
+    VPXRangeCoder *c = &s->c;
+
+    int update_golden = vp89_rac_get(c);
+    int update_altref = vp89_rac_get(c);
+
+    s->update_golden = ref_to_update(s, update_golden, VP8_FRAME_GOLDEN);
+    s->update_altref = ref_to_update(s, update_altref, VP8_FRAME_ALTREF);
+}
+
+static void copy_chroma(AVFrame *dst, const AVFrame *src, int width, int height)
+{
+    int i, j;
+
+    for (j = 1; j < 3; j++) {
+        for (i = 0; i < height / 2; i++)
+            memcpy(dst->data[j] + i * dst->linesize[j],
+                   src->data[j] + i * src->linesize[j], width / 2);
+    }
+}
+
+static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
+                 const uint8_t *src, ptrdiff_t src_linesize,
+                 int width, int height,
+                 int alpha, int beta)
+{
+    int i, j;
+    for (j = 0; j < height; j++) {
+        const uint8_t *src2 = src + j * src_linesize;
+        uint8_t *dst2 = dst + j * dst_linesize;
+        for (i = 0; i < width; i++) {
+            uint8_t y = src2[i];
+            dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
+        }
+    }
+}
+
+static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
+{
+    int ret;
+
+    if (!s->keyframe && (alpha || beta)) {
+        int width  = s->mb_width * 16;
+        int height = s->mb_height * 16;
+        const AVFrame *src;
+        AVFrame *dst;
+
+        if (!s->framep[VP8_FRAME_PREVIOUS] ||
+            !s->framep[VP8_FRAME_GOLDEN]) {
+            av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        src =
+        dst = s->framep[VP8_FRAME_PREVIOUS]->tf.f;
+
+        /* preserve the golden frame, write a new previous frame */
+        if (s->framep[VP8_FRAME_GOLDEN] == s->framep[VP8_FRAME_PREVIOUS]) {
+            s->framep[VP8_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
+            if ((ret = vp8_alloc_frame(s, s->framep[VP8_FRAME_PREVIOUS], 1)) < 0)
+                return ret;
+
+            dst = s->framep[VP8_FRAME_PREVIOUS]->tf.f;
+
+            copy_chroma(dst, src, width, height);
+        }
+
+        fade(dst->data[0], dst->linesize[0],
+             src->data[0], src->linesize[0],
+             width, height, alpha, beta);
+    }
+
+    return 0;
+}
+
+static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
+{
+    VPXRangeCoder *c = &s->c;
+    int part1_size, hscale, vscale, i, j, ret;
+    int width  = s->avctx->width;
+    int height = s->avctx->height;
+    int alpha = 0;
+    int beta  = 0;
+    int fade_present = 1;
+
+    if (buf_size < 4) {
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->profile = (buf[0] >> 1) & 7;
+    if (s->profile > 1) {
+        avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->keyframe  = !(buf[0] & 1);
+    s->invisible = 0;
+    part1_size   = AV_RL24(buf) >> 4;
+
+    if (buf_size < 4 - s->profile + part1_size) {
+        av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    buf      += 4 - s->profile;
+    buf_size -= 4 - s->profile;
+
+    memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
+
+    ret = ff_vpx_init_range_decoder(c, buf, part1_size);
+    if (ret < 0)
+        return ret;
+    buf      += part1_size;
+    buf_size -= part1_size;
+
+    /* A. Dimension information (keyframes only) */
+    if (s->keyframe) {
+        width  = vp89_rac_get_uint(c, 12);
+        height = vp89_rac_get_uint(c, 12);
+        hscale = vp89_rac_get_uint(c, 2);
+        vscale = vp89_rac_get_uint(c, 2);
+        if (hscale || vscale)
+            avpriv_request_sample(s->avctx, "Upscaling");
+
+        s->update_golden = s->update_altref = VP8_FRAME_CURRENT;
+        vp78_reset_probability_tables(s);
+        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
+               sizeof(s->prob->pred16x16));
+        memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
+               sizeof(s->prob->pred8x8c));
+        for (i = 0; i < 2; i++)
+            memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
+                   sizeof(vp7_mv_default_prob[i]));
+        memset(&s->segmentation, 0, sizeof(s->segmentation));
+        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
+        memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
+    }
+
+    if (s->keyframe || s->profile > 0)
+        memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
+
+    /* B. Decoding information for all four macroblock-level features */
+    for (i = 0; i < 4; i++) {
+        s->feature_enabled[i] = vp89_rac_get(c);
+        if (s->feature_enabled[i]) {
+             s->feature_present_prob[i] = vp89_rac_get_uint(c, 8);
+
+             for (j = 0; j < 3; j++)
+                 s->feature_index_prob[i][j] =
+                     vp89_rac_get(c) ? vp89_rac_get_uint(c, 8) : 255;
+
+             if (vp7_feature_value_size[s->profile][i])
+                 for (j = 0; j < 4; j++)
+                     s->feature_value[i][j] =
+                        vp89_rac_get(c) ? vp89_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
+        }
+    }
+
+    s->segmentation.enabled    = 0;
+    s->segmentation.update_map = 0;
+    s->lf_delta.enabled        = 0;
+
+    s->num_coeff_partitions = 1;
+    ret = ff_vpx_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
+    if (ret < 0)
+        return ret;
+
+    if (!s->macroblocks_base || /* first frame */
+        width != s->avctx->width || height != s->avctx->height ||
+        (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
+        if ((ret = vp7_update_dimensions(s, width, height)) < 0)
+            return ret;
+    }
+
+    /* C. Dequantization indices */
+    vp7_get_quants(s);
+
+    /* D. Golden frame update flag (a Flag) for interframes only */
+    if (!s->keyframe) {
+        s->update_golden = vp89_rac_get(c) ? VP8_FRAME_CURRENT : VP8_FRAME_NONE;
+        s->sign_bias[VP8_FRAME_GOLDEN] = 0;
+    }
+
+    s->update_last          = 1;
+    s->update_probabilities = 1;
+
+    if (s->profile > 0) {
+        s->update_probabilities = vp89_rac_get(c);
+        if (!s->update_probabilities)
+            s->prob[1] = s->prob[0];
+
+        if (!s->keyframe)
+            fade_present = vp89_rac_get(c);
+    }
+
+    if (vpx_rac_is_end(c))
+        return AVERROR_INVALIDDATA;
+    /* E. Fading information for previous frame */
+    if (fade_present && vp89_rac_get(c)) {
+        alpha = (int8_t) vp89_rac_get_uint(c, 8);
+        beta  = (int8_t) vp89_rac_get_uint(c, 8);
+    }
+
+    /* F. Loop filter type */
+    if (!s->profile)
+        s->filter.simple = vp89_rac_get(c);
+
+    /* G. DCT coefficient ordering specification */
+    if (vp89_rac_get(c))
+        for (i = 1; i < 16; i++)
+            s->prob[0].scan[i] = ff_zigzag_scan[vp89_rac_get_uint(c, 4)];
+
+    /* H. Loop filter levels  */
+    if (s->profile > 0)
+        s->filter.simple = vp89_rac_get(c);
+    s->filter.level     = vp89_rac_get_uint(c, 6);
+    s->filter.sharpness = vp89_rac_get_uint(c, 3);
+
+    /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
+    vp78_update_probability_tables(s);
+
+    s->mbskip_enabled = 0;
+
+    /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
+    if (!s->keyframe) {
+        s->prob->intra  = vp89_rac_get_uint(c, 8);
+        s->prob->last   = vp89_rac_get_uint(c, 8);
+        vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
+    }
+
+    if (vpx_rac_is_end(c))
+        return AVERROR_INVALIDDATA;
+
+    if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
+{
+    VPXRangeCoder *c = &s->c;
+    int header_size, hscale, vscale, ret;
+    int width  = s->avctx->width;
+    int height = s->avctx->height;
+
+    if (buf_size < 3) {
+        av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->keyframe  = !(buf[0] & 1);
+    s->profile   =  (buf[0]>>1) & 7;
+    s->invisible = !(buf[0] & 0x10);
+    header_size  = AV_RL24(buf) >> 5;
+    buf      += 3;
+    buf_size -= 3;
+
+    s->header_partition_size = header_size;
+
+    if (s->profile > 3)
+        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
+
+    if (!s->profile)
+        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
+               sizeof(s->put_pixels_tab));
+    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
+        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
+               sizeof(s->put_pixels_tab));
+
+    if (header_size > buf_size - 7 * s->keyframe) {
+        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (s->keyframe) {
+        if (AV_RL24(buf) != 0x2a019d) {
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "Invalid start code 0x%x\n", AV_RL24(buf));
+            return AVERROR_INVALIDDATA;
+        }
+        width     = AV_RL16(buf + 3) & 0x3fff;
+        height    = AV_RL16(buf + 5) & 0x3fff;
+        hscale    = buf[4] >> 6;
+        vscale    = buf[6] >> 6;
+        buf      += 7;
+        buf_size -= 7;
+
+        if (hscale || vscale)
+            avpriv_request_sample(s->avctx, "Upscaling");
+
+        s->update_golden = s->update_altref = VP8_FRAME_CURRENT;
+        vp78_reset_probability_tables(s);
+        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
+               sizeof(s->prob->pred16x16));
+        memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
+               sizeof(s->prob->pred8x8c));
+        memcpy(s->prob->mvc, vp8_mv_default_prob,
+               sizeof(s->prob->mvc));
+        memset(&s->segmentation, 0, sizeof(s->segmentation));
+        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
+    }
+
+    ret = ff_vpx_init_range_decoder(c, buf, header_size);
+    if (ret < 0)
+        return ret;
+    buf      += header_size;
+    buf_size -= header_size;
+
+    if (s->keyframe) {
+        s->colorspace = vp89_rac_get(c);
+        if (s->colorspace)
+            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
+        s->fullrange = vp89_rac_get(c);
+    }
+
+    if ((s->segmentation.enabled = vp89_rac_get(c)))
+        parse_segment_info(s);
+    else
+        s->segmentation.update_map = 0; // FIXME: move this to some init function?
+
+    s->filter.simple    = vp89_rac_get(c);
+    s->filter.level     = vp89_rac_get_uint(c, 6);
+    s->filter.sharpness = vp89_rac_get_uint(c, 3);
+
+    if ((s->lf_delta.enabled = vp89_rac_get(c))) {
+        s->lf_delta.update = vp89_rac_get(c);
+        if (s->lf_delta.update)
+            update_lf_deltas(s);
+    }
+
+    if (setup_partitions(s, buf, buf_size)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (!s->macroblocks_base || /* first frame */
+        width != s->avctx->width || height != s->avctx->height ||
+        (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
+        if ((ret = vp8_update_dimensions(s, width, height)) < 0)
+            return ret;
+
+    vp8_get_quants(s);
+
+    if (!s->keyframe) {
+        update_refs(s);
+        s->sign_bias[VP8_FRAME_GOLDEN] = vp89_rac_get(c);
+        s->sign_bias[VP8_FRAME_ALTREF] = vp89_rac_get(c);
+    }
+
+    // if we aren't saving this frame's probabilities for future frames,
+    // make a copy of the current probabilities
+    if (!(s->update_probabilities = vp89_rac_get(c)))
+        s->prob[1] = s->prob[0];
+
+    s->update_last = s->keyframe || vp89_rac_get(c);
+
+    vp78_update_probability_tables(s);
+
+    if ((s->mbskip_enabled = vp89_rac_get(c)))
+        s->prob->mbskip = vp89_rac_get_uint(c, 8);
+
+    if (!s->keyframe) {
+        s->prob->intra  = vp89_rac_get_uint(c, 8);
+        s->prob->last   = vp89_rac_get_uint(c, 8);
+        s->prob->golden = vp89_rac_get_uint(c, 8);
+        vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
+    }
+
+    // Record the entropy coder state here so that hwaccels can use it.
+    s->c.code_word = vpx_rac_renorm(&s->c);
+    s->coder_state_at_header_end.input     = s->c.buffer - (-s->c.bits / 8);
+    s->coder_state_at_header_end.range     = s->c.high;
+    s->coder_state_at_header_end.value     = s->c.code_word >> 16;
+    s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
+
+    return 0;
+}
+
+static av_always_inline
+void clamp_mv(const VP8mvbounds *s, VP8mv *dst, const VP8mv *src)
+{
+    dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
+                             av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
+    dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
+                             av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
+}
+
+/**
+ * Motion vector coding, 17.1.
+ */
+static av_always_inline int read_mv_component(VPXRangeCoder *c, const uint8_t *p, int vp7)
+{
+    int bit, x = 0;
+
+    if (vpx_rac_get_prob_branchy(c, p[0])) {
+        int i;
+
+        for (i = 0; i < 3; i++)
+            x += vpx_rac_get_prob(c, p[9 + i]) << i;
+        for (i = (vp7 ? 7 : 9); i > 3; i--)
+            x += vpx_rac_get_prob(c, p[9 + i]) << i;
+        if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vpx_rac_get_prob(c, p[12]))
+            x += 8;
+    } else {
+        // small_mvtree
+        const uint8_t *ps = p + 2;
+        bit = vpx_rac_get_prob(c, *ps);
+        ps += 1 + 3 * bit;
+        x  += 4 * bit;
+        bit = vpx_rac_get_prob(c, *ps);
+        ps += 1 + bit;
+        x  += 2 * bit;
+        x  += vpx_rac_get_prob(c, *ps);
+    }
+
+    return (x && vpx_rac_get_prob(c, p[1])) ? -x : x;
+}
+
+static int vp7_read_mv_component(VPXRangeCoder *c, const uint8_t *p)
+{
+    return read_mv_component(c, p, 1);
+}
+
+static int vp8_read_mv_component(VPXRangeCoder *c, const uint8_t *p)
+{
+    return read_mv_component(c, p, 0);
+}
+
+static av_always_inline
+const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
+{
+    if (is_vp7)
+        return vp7_submv_prob;
+
+    if (left == top)
+        return vp8_submv_prob[4 - !!left];
+    if (!top)
+        return vp8_submv_prob[2];
+    return vp8_submv_prob[1 - !!left];
+}
+
+/**
+ * Split motion vector prediction, 16.4.
+ * @returns the number of motion vectors parsed (2, 4 or 16)
+ */
+static av_always_inline
+int decode_splitmvs(const VP8Context *s, VPXRangeCoder *c, VP8Macroblock *mb,
+                    int layout, int is_vp7)
+{
+    int part_idx;
+    int n, num;
+    const VP8Macroblock *top_mb;
+    const VP8Macroblock *left_mb = &mb[-1];
+    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
+    const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
+    const VP8mv *top_mv;
+    const VP8mv *left_mv = left_mb->bmv;
+    const VP8mv *cur_mv  = mb->bmv;
+
+    if (!layout) // layout is inlined, s->mb_layout is not
+        top_mb = &mb[2];
+    else
+        top_mb = &mb[-s->mb_width - 1];
+    mbsplits_top = vp8_mbsplits[top_mb->partitioning];
+    top_mv       = top_mb->bmv;
+
+    if (vpx_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
+        if (vpx_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
+            part_idx = VP8_SPLITMVMODE_16x8 + vpx_rac_get_prob(c, vp8_mbsplit_prob[2]);
+        else
+            part_idx = VP8_SPLITMVMODE_8x8;
+    } else {
+        part_idx = VP8_SPLITMVMODE_4x4;
+    }
+
+    num              = vp8_mbsplit_count[part_idx];
+    mbsplits_cur     = vp8_mbsplits[part_idx],
+    firstidx         = vp8_mbfirstidx[part_idx];
+    mb->partitioning = part_idx;
+
+    for (n = 0; n < num; n++) {
+        int k = firstidx[n];
+        uint32_t left, above;
+        const uint8_t *submv_prob;
+
+        if (!(k & 3))
+            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
+        else
+            left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
+        if (k <= 3)
+            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
+        else
+            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
+
+        submv_prob = get_submv_prob(left, above, is_vp7);
+
+        if (vpx_rac_get_prob_branchy(c, submv_prob[0])) {
+            if (vpx_rac_get_prob_branchy(c, submv_prob[1])) {
+                if (vpx_rac_get_prob_branchy(c, submv_prob[2])) {
+                    mb->bmv[n].y = mb->mv.y +
+                                   read_mv_component(c, s->prob->mvc[0], is_vp7);
+                    mb->bmv[n].x = mb->mv.x +
+                                   read_mv_component(c, s->prob->mvc[1], is_vp7);
+                } else {
+                    AV_ZERO32(&mb->bmv[n]);
+                }
+            } else {
+                AV_WN32A(&mb->bmv[n], above);
+            }
+        } else {
+            AV_WN32A(&mb->bmv[n], left);
+        }
+    }
+
+    return num;
+}
+
+/**
+ * The vp7 reference decoder uses a padding macroblock column (added to right
+ * edge of the frame) to guard against illegal macroblock offsets. The
+ * algorithm has bugs that permit offsets to straddle the padding column.
+ * This function replicates those bugs.
+ *
+ * @param[out] edge_x macroblock x address
+ * @param[out] edge_y macroblock y address
+ *
+ * @return macroblock offset legal (boolean)
+ */
+static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
+                                   int xoffset, int yoffset, int boundary,
+                                   int *edge_x, int *edge_y)
+{
+    int vwidth = mb_width + 1;
+    int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
+    if (new < boundary || new % vwidth == vwidth - 1)
+        return 0;
+    *edge_y = new / vwidth;
+    *edge_x = new % vwidth;
+    return 1;
+}
+
+static const VP8mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
+{
+    return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
+}
+
+static av_always_inline
+void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
+                    int mb_x, int mb_y, int layout)
+{
+    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
+    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
+    int idx = CNT_ZERO;
+    VP8mv near_mv[3];
+    uint8_t cnt[3] = { 0 };
+    VPXRangeCoder *c = &s->c;
+    int i;
+
+    AV_ZERO32(&near_mv[0]);
+    AV_ZERO32(&near_mv[1]);
+    AV_ZERO32(&near_mv[2]);
+
+    for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
+        const VP7MVPred * pred = &vp7_mv_pred[i];
+        int edge_x, edge_y;
+
+        if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
+                                    pred->yoffset, !s->profile, &edge_x, &edge_y)) {
+            const VP8Macroblock *edge = (s->mb_layout == 1)
+                                      ? s->macroblocks_base + 1 + edge_x +
+                                        (s->mb_width + 1) * (edge_y + 1)
+                                      : s->macroblocks + edge_x +
+                                        (s->mb_height - edge_y - 1) * 2;
+            uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
+            if (mv) {
+                if (AV_RN32A(&near_mv[CNT_NEAREST])) {
+                    if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
+                        idx = CNT_NEAREST;
+                    } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
+                        if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
+                            continue;
+                        idx = CNT_NEAR;
+                    } else {
+                        AV_WN32A(&near_mv[CNT_NEAR], mv);
+                        idx = CNT_NEAR;
+                    }
+                } else {
+                    AV_WN32A(&near_mv[CNT_NEAREST], mv);
+                    idx = CNT_NEAREST;
+                }
+            } else {
+                idx = CNT_ZERO;
+            }
+        } else {
+            idx = CNT_ZERO;
+        }
+        cnt[idx] += vp7_mv_pred[i].score;
+    }
+
+    mb->partitioning = VP8_SPLITMVMODE_NONE;
+
+    if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
+        mb->mode = VP8_MVMODE_MV;
+
+        if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
+
+            if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
+
+                if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
+                    AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
+                else
+                    AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR]    ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
+
+                if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
+                    mb->mode = VP8_MVMODE_SPLIT;
+                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
+                } else {
+                    mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
+                    mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
+                    mb->bmv[0] = mb->mv;
+                }
+            } else {
+                mb->mv = near_mv[CNT_NEAR];
+                mb->bmv[0] = mb->mv;
+            }
+        } else {
+            mb->mv = near_mv[CNT_NEAREST];
+            mb->bmv[0] = mb->mv;
+        }
+    } else {
+        mb->mode = VP8_MVMODE_ZERO;
+        AV_ZERO32(&mb->mv);
+        mb->bmv[0] = mb->mv;
+    }
+}
+
+static av_always_inline
+void vp8_decode_mvs(VP8Context *s, const VP8mvbounds *mv_bounds, VP8Macroblock *mb,
+                    int mb_x, int mb_y, int layout)
+{
+    VP8Macroblock *mb_edge[3] = { 0      /* top */,
+                                  mb - 1 /* left */,
+                                  0      /* top-left */ };
+    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
+    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
+    int idx = CNT_ZERO;
+    int cur_sign_bias = s->sign_bias[mb->ref_frame];
+    const int8_t *sign_bias = s->sign_bias;
+    VP8mv near_mv[4];
+    uint8_t cnt[4] = { 0 };
+    VPXRangeCoder *c = &s->c;
+
+    if (!layout) { // layout is inlined (s->mb_layout is not)
+        mb_edge[0] = mb + 2;
+        mb_edge[2] = mb + 1;
+    } else {
+        mb_edge[0] = mb - s->mb_width - 1;
+        mb_edge[2] = mb - s->mb_width - 2;
+    }
+
+    AV_ZERO32(&near_mv[0]);
+    AV_ZERO32(&near_mv[1]);
+    AV_ZERO32(&near_mv[2]);
+
+    /* Process MB on top, left and top-left */
+#define MV_EDGE_CHECK(n)                                                      \
+    {                                                                         \
+        const VP8Macroblock *edge = mb_edge[n];                               \
+        int edge_ref = edge->ref_frame;                                       \
+        if (edge_ref != VP8_FRAME_CURRENT) {                                 \
+            uint32_t mv = AV_RN32A(&edge->mv);                                \
+            if (mv) {                                                         \
+                if (cur_sign_bias != sign_bias[edge_ref]) {                   \
+                    /* SWAR negate of the values in mv. */                    \
+                    mv = ~mv;                                                 \
+                    mv = ((mv & 0x7fff7fff) +                                 \
+                          0x00010001) ^ (mv & 0x80008000);                    \
+                }                                                             \
+                if (!n || mv != AV_RN32A(&near_mv[idx]))                      \
+                    AV_WN32A(&near_mv[++idx], mv);                            \
+                cnt[idx] += 1 + (n != 2);                                     \
+            } else                                                            \
+                cnt[CNT_ZERO] += 1 + (n != 2);                                \
+        }                                                                     \
+    }
+
+    MV_EDGE_CHECK(0)
+    MV_EDGE_CHECK(1)
+    MV_EDGE_CHECK(2)
+
+    mb->partitioning = VP8_SPLITMVMODE_NONE;
+    if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
+        mb->mode = VP8_MVMODE_MV;
+
+        /* If we have three distinct MVs, merge first and last if they're the same */
+        if (cnt[CNT_SPLITMV] &&
+            AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
+            cnt[CNT_NEAREST] += 1;
+
+        /* Swap near and nearest if necessary */
+        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
+            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
+            FFSWAP(VP8mv,   near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
+        }
+
+        if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
+            if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
+                /* Choose the best mv out of 0,0 and the nearest mv */
+                clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
+                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
+                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
+                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
+
+                if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
+                    mb->mode = VP8_MVMODE_SPLIT;
+                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
+                } else {
+                    mb->mv.y  += vp8_read_mv_component(c, s->prob->mvc[0]);
+                    mb->mv.x  += vp8_read_mv_component(c, s->prob->mvc[1]);
+                    mb->bmv[0] = mb->mv;
+                }
+            } else {
+                clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
+                mb->bmv[0] = mb->mv;
+            }
+        } else {
+            clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
+            mb->bmv[0] = mb->mv;
+        }
+    } else {
+        mb->mode = VP8_MVMODE_ZERO;
+        AV_ZERO32(&mb->mv);
+        mb->bmv[0] = mb->mv;
+    }
+}
+
+static av_always_inline
+void decode_intra4x4_modes(VP8Context *s, VPXRangeCoder *c, VP8Macroblock *mb,
+                           int mb_x, int keyframe, int layout)
+{
+    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
+
+    if (layout) {
+        VP8Macroblock *mb_top = mb - s->mb_width - 1;
+        memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
+    }
+    if (keyframe) {
+        int x, y;
+        uint8_t *top;
+        uint8_t *const left = s->intra4x4_pred_mode_left;
+        if (layout)
+            top = mb->intra4x4_pred_mode_top;
+        else
+            top = s->intra4x4_pred_mode_top + 4 * mb_x;
+        for (y = 0; y < 4; y++) {
+            for (x = 0; x < 4; x++) {
+                const uint8_t *ctx;
+                ctx       = vp8_pred4x4_prob_intra[top[x]][left[y]];
+                *intra4x4 = vp89_rac_get_tree(c, vp8_pred4x4_tree, ctx);
+                left[y]   = top[x] = *intra4x4;
+                intra4x4++;
+            }
+        }
+    } else {
+        int i;
+        for (i = 0; i < 16; i++)
+            intra4x4[i] = vp89_rac_get_tree(c, vp8_pred4x4_tree,
+                                            vp8_pred4x4_prob_inter);
+    }
+}
+
+static av_always_inline
+void decode_mb_mode(VP8Context *s, const VP8mvbounds *mv_bounds,
+                    VP8Macroblock *mb, int mb_x, int mb_y,
+                    uint8_t *segment, const uint8_t *ref, int layout, int is_vp7)
+{
+    VPXRangeCoder *c = &s->c;
+    static const char * const vp7_feature_name[] = { "q-index",
+                                                     "lf-delta",
+                                                     "partial-golden-update",
+                                                     "blit-pitch" };
+    if (is_vp7) {
+        int i;
+        *segment = 0;
+        for (i = 0; i < 4; i++) {
+            if (s->feature_enabled[i]) {
+                if (vpx_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
+                      int index = vp89_rac_get_tree(c, vp7_feature_index_tree,
+                                                    s->feature_index_prob[i]);
+                      av_log(s->avctx, AV_LOG_WARNING,
+                             "Feature %s present in macroblock (value 0x%x)\n",
+                             vp7_feature_name[i], s->feature_value[i][index]);
+                }
+           }
+        }
+    } else if (s->segmentation.update_map) {
+        int bit  = vpx_rac_get_prob(c, s->prob->segmentid[0]);
+        *segment = vpx_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
+    } else if (s->segmentation.enabled)
+        *segment = ref ? *ref : *segment;
+    mb->segment = *segment;
+
+    mb->skip = s->mbskip_enabled ? vpx_rac_get_prob(c, s->prob->mbskip) : 0;
+
+    if (s->keyframe) {
+        mb->mode = vp89_rac_get_tree(c, vp8_pred16x16_tree_intra,
+                                     vp8_pred16x16_prob_intra);
+
+        if (mb->mode == MODE_I4x4) {
+            decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
+        } else {
+            const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
+                                           : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
+            if (s->mb_layout)
+                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
+            else
+                AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
+            AV_WN32A(s->intra4x4_pred_mode_left, modes);
+        }
+
+        mb->chroma_pred_mode = vp89_rac_get_tree(c, vp8_pred8x8c_tree,
+                                                 vp8_pred8x8c_prob_intra);
+        mb->ref_frame        = VP8_FRAME_CURRENT;
+    } else if (vpx_rac_get_prob_branchy(c, s->prob->intra)) {
+        // inter MB, 16.2
+        if (vpx_rac_get_prob_branchy(c, s->prob->last))
+            mb->ref_frame =
+                (!is_vp7 && vpx_rac_get_prob(c, s->prob->golden)) ? VP8_FRAME_ALTREF
+                                                                  : VP8_FRAME_GOLDEN;
+        else
+            mb->ref_frame = VP8_FRAME_PREVIOUS;
+        s->ref_count[mb->ref_frame - 1]++;
+
+        // motion vectors, 16.3
+        if (is_vp7)
+            vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
+        else
+            vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
+    } else {
+        // intra MB, 16.1
+        mb->mode = vp89_rac_get_tree(c, vp8_pred16x16_tree_inter,
+                                     s->prob->pred16x16);
+
+        if (mb->mode == MODE_I4x4)
+            decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
+
+        mb->chroma_pred_mode = vp89_rac_get_tree(c, vp8_pred8x8c_tree,
+                                                 s->prob->pred8x8c);
+        mb->ref_frame        = VP8_FRAME_CURRENT;
+        mb->partitioning     = VP8_SPLITMVMODE_NONE;
+        AV_ZERO32(&mb->bmv[0]);
+    }
+}
+
+/**
+ * @param r     arithmetic bitstream reader context
+ * @param block destination for block coefficients
+ * @param probs probabilities to use when reading trees from the bitstream
+ * @param i     initial coeff index, 0 unless a separate DC block is coded
+ * @param qmul  array holding the dc/ac dequant factor at position 0/1
+ *
+ * @return 0 if no coeffs were decoded
+ *         otherwise, the index of the last coeff decoded plus one
+ */
+static av_always_inline
+int decode_block_coeffs_internal(VPXRangeCoder *r, int16_t block[16],
+                                 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
+                                 int i, const uint8_t *token_prob, const int16_t qmul[2],
+                                 const uint8_t scan[16], int vp7)
+{
+    VPXRangeCoder c = *r;
+    goto skip_eob;
+    do {
+        int coeff;
+restart:
+        if (!vpx_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
+            break;
+
+skip_eob:
+        if (!vpx_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
+            if (++i == 16)
+                break; // invalid input; blocks should end with EOB
+            token_prob = probs[i][0];
+            if (vp7)
+                goto restart;
+            goto skip_eob;
+        }
+
+        if (!vpx_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
+            coeff = 1;
+            token_prob = probs[i + 1][1];
+        } else {
+            if (!vpx_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
+                coeff = vpx_rac_get_prob_branchy(&c, token_prob[4]);
+                if (coeff)
+                    coeff += vpx_rac_get_prob(&c, token_prob[5]);
+                coeff += 2;
+            } else {
+                // DCT_CAT*
+                if (!vpx_rac_get_prob_branchy(&c, token_prob[6])) {
+                    if (!vpx_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
+                        coeff = 5 + vpx_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
+                    } else {                                    // DCT_CAT2
+                        coeff  = 7;
+                        coeff += vpx_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
+                        coeff += vpx_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
+                    }
+                } else {    // DCT_CAT3 and up
+                    int a   = vpx_rac_get_prob(&c, token_prob[8]);
+                    int b   = vpx_rac_get_prob(&c, token_prob[9 + a]);
+                    int cat = (a << 1) + b;
+                    coeff  = 3 + (8 << cat);
+                    coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
+                }
+            }
+            token_prob = probs[i + 1][2];
+        }
+        block[scan[i]] = (vp89_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
+    } while (++i < 16);
+
+    *r = c;
+    return i;
+}
+
+static av_always_inline
+int inter_predict_dc(int16_t block[16], int16_t pred[2])
+{
+    int16_t dc = block[0];
+    int ret = 0;
+
+    if (pred[1] > 3) {
+        dc += pred[0];
+        ret = 1;
+    }
+
+    if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
+        block[0] = pred[0] = dc;
+        pred[1] = 0;
+    } else {
+        if (pred[0] == dc)
+            pred[1]++;
+        block[0] = pred[0] = dc;
+    }
+
+    return ret;
+}
+
+static int vp7_decode_block_coeffs_internal(VPXRangeCoder *r,
+                                            int16_t block[16],
+                                            uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
+                                            int i, const uint8_t *token_prob,
+                                            const int16_t qmul[2],
+                                            const uint8_t scan[16])
+{
+    return decode_block_coeffs_internal(r, block, probs, i,
+                                        token_prob, qmul, scan, IS_VP7);
+}
+
+#ifndef vp8_decode_block_coeffs_internal
+static int vp8_decode_block_coeffs_internal(VPXRangeCoder *r,
+                                            int16_t block[16],
+                                            uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
+                                            int i, const uint8_t *token_prob,
+                                            const int16_t qmul[2])
+{
+    return decode_block_coeffs_internal(r, block, probs, i,
+                                        token_prob, qmul, ff_zigzag_scan, IS_VP8);
+}
+#endif
+
+/**
+ * @param c          arithmetic bitstream reader context
+ * @param block      destination for block coefficients
+ * @param probs      probabilities to use when reading trees from the bitstream
+ * @param i          initial coeff index, 0 unless a separate DC block is coded
+ * @param zero_nhood the initial prediction context for number of surrounding
+ *                   all-zero blocks (only left/top, so 0-2)
+ * @param qmul       array holding the dc/ac dequant factor at position 0/1
+ * @param scan       scan pattern (VP7 only)
+ *
+ * @return 0 if no coeffs were decoded
+ *         otherwise, the index of the last coeff decoded plus one
+ */
+static av_always_inline
+int decode_block_coeffs(VPXRangeCoder *c, int16_t block[16],
+                        uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
+                        int i, int zero_nhood, const int16_t qmul[2],
+                        const uint8_t scan[16], int vp7)
+{
+    const uint8_t *token_prob = probs[i][zero_nhood];
+    if (!vpx_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
+        return 0;
+    return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
+                                                  token_prob, qmul, scan)
+               : vp8_decode_block_coeffs_internal(c, block, probs, i,
+                                                  token_prob, qmul);
+}
+
+static av_always_inline
+void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VPXRangeCoder *c,
+                      VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
+                      int is_vp7)
+{
+    int i, x, y, luma_start = 0, luma_ctx = 3;
+    int nnz_pred, nnz, nnz_total = 0;
+    int segment = mb->segment;
+    int block_dc = 0;
+
+    if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
+        nnz_pred = t_nnz[8] + l_nnz[8];
+
+        // decode DC values and do hadamard
+        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
+                                  nnz_pred, s->qmat[segment].luma_dc_qmul,
+                                  ff_zigzag_scan, is_vp7);
+        l_nnz[8] = t_nnz[8] = !!nnz;
+
+        if (is_vp7 && mb->mode > MODE_I4x4) {
+            nnz |=  inter_predict_dc(td->block_dc,
+                                     s->inter_dc_pred[mb->ref_frame - 1]);
+        }
+
+        if (nnz) {
+            nnz_total += nnz;
+            block_dc   = 1;
+            if (nnz == 1)
+                s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
+            else
+                s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
+        }
+        luma_start = 1;
+        luma_ctx   = 0;
+    }
+
+    // luma blocks
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++) {
+            nnz_pred = l_nnz[y] + t_nnz[x];
+            nnz = decode_block_coeffs(c, td->block[y][x],
+                                      s->prob->token[luma_ctx],
+                                      luma_start, nnz_pred,
+                                      s->qmat[segment].luma_qmul,
+                                      s->prob[0].scan, is_vp7);
+            /* nnz+block_dc may be one more than the actual last index,
+             * but we don't care */
+            td->non_zero_count_cache[y][x] = nnz + block_dc;
+            t_nnz[x] = l_nnz[y] = !!nnz;
+            nnz_total += nnz;
+        }
+
+    // chroma blocks
+    // TODO: what to do about dimensions? 2nd dim for luma is x,
+    // but for chroma it's (y<<1)|x
+    for (i = 4; i < 6; i++)
+        for (y = 0; y < 2; y++)
+            for (x = 0; x < 2; x++) {
+                nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
+                nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
+                                          s->prob->token[2], 0, nnz_pred,
+                                          s->qmat[segment].chroma_qmul,
+                                          s->prob[0].scan, is_vp7);
+                td->non_zero_count_cache[i][(y << 1) + x] = nnz;
+                t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
+                nnz_total += nnz;
+            }
+
+    // if there were no coded coeffs despite the macroblock not being marked skip,
+    // we MUST not do the inner loop filter and should not do IDCT
+    // Since skip isn't used for bitstream prediction, just manually set it.
+    if (!nnz_total)
+        mb->skip = 1;
+}
+
+static av_always_inline
+void backup_mb_border(uint8_t *top_border, const uint8_t *src_y,
+                      const uint8_t *src_cb, const uint8_t *src_cr,
+                      ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
+{
+    AV_COPY128(top_border, src_y + 15 * linesize);
+    if (!simple) {
+        AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
+        AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
+    }
+}
+
+static av_always_inline
+void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
+                    uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
+                    int mb_y, int mb_width, int simple, int xchg)
+{
+    uint8_t *top_border_m1 = top_border - 32;     // for TL prediction
+    src_y  -= linesize;
+    src_cb -= uvlinesize;
+    src_cr -= uvlinesize;
+
+#define XCHG(a, b, xchg)                                                      \
+    do {                                                                      \
+        if (xchg)                                                             \
+            AV_SWAP64(b, a);                                                  \
+        else                                                                  \
+            AV_COPY64(b, a);                                                  \
+    } while (0)
+
+    XCHG(top_border_m1 + 8, src_y - 8, xchg);
+    XCHG(top_border, src_y, xchg);
+    XCHG(top_border + 8, src_y + 8, 1);
+    if (mb_x < mb_width - 1)
+        XCHG(top_border + 32, src_y + 16, 1);
+
+    // only copy chroma for normal loop filter
+    // or to initialize the top row to 127
+    if (!simple || !mb_y) {
+        XCHG(top_border_m1 + 16, src_cb - 8, xchg);
+        XCHG(top_border_m1 + 24, src_cr - 8, xchg);
+        XCHG(top_border + 16, src_cb, 1);
+        XCHG(top_border + 24, src_cr, 1);
+    }
+}
+
+static av_always_inline
+int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
+{
+    if (!mb_x)
+        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
+    else
+        return mb_y ? mode : LEFT_DC_PRED8x8;
+}
+
+static av_always_inline
+int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
+{
+    if (!mb_x)
+        return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
+    else
+        return mb_y ? mode : HOR_PRED8x8;
+}
+
+static av_always_inline
+int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
+{
+    switch (mode) {
+    case DC_PRED8x8:
+        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
+    case VERT_PRED8x8:
+        return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
+    case HOR_PRED8x8:
+        return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
+    case PLANE_PRED8x8: /* TM */
+        return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
+    }
+    return mode;
+}
+
+static av_always_inline
+int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
+{
+    if (!mb_x) {
+        return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
+    } else {
+        return mb_y ? mode : HOR_VP8_PRED;
+    }
+}
+
+static av_always_inline
+int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
+                                     int *copy_buf, int vp7)
+{
+    switch (mode) {
+    case VERT_PRED:
+        if (!mb_x && mb_y) {
+            *copy_buf = 1;
+            return mode;
+        }
+        /* fall-through */
+    case DIAG_DOWN_LEFT_PRED:
+    case VERT_LEFT_PRED:
+        return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
+    case HOR_PRED:
+        if (!mb_y) {
+            *copy_buf = 1;
+            return mode;
+        }
+        /* fall-through */
+    case HOR_UP_PRED:
+        return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
+    case TM_VP8_PRED:
+        return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
+    case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
+                   * as 16x16/8x8 DC */
+    case DIAG_DOWN_RIGHT_PRED:
+    case VERT_RIGHT_PRED:
+    case HOR_DOWN_PRED:
+        if (!mb_y || !mb_x)
+            *copy_buf = 1;
+        return mode;
+    }
+    return mode;
+}
+
+static av_always_inline
+void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
+                   VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
+{
+    int x, y, mode, nnz;
+    uint32_t tr;
+
+    /* for the first row, we need to run xchg_mb_border to init the top edge
+     * to 127 otherwise, skip it if we aren't going to deblock */
+    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
+        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
+                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
+                       s->filter.simple, 1);
+
+    if (mb->mode < MODE_I4x4) {
+        mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
+        s->hpc.pred16x16[mode](dst[0], s->linesize);
+    } else {
+        uint8_t *ptr = dst[0];
+        const uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
+        const uint8_t lo = is_vp7 ? 128 : 127;
+        const uint8_t hi = is_vp7 ? 128 : 129;
+        const uint8_t tr_top[4] = { lo, lo, lo, lo };
+
+        // all blocks on the right edge of the macroblock use bottom edge
+        // the top macroblock for their topright edge
+        const uint8_t *tr_right = ptr - s->linesize + 16;
+
+        // if we're on the right edge of the frame, said edge is extended
+        // from the top macroblock
+        if (mb_y && mb_x == s->mb_width - 1) {
+            tr       = tr_right[-1] * 0x01010101u;
+            tr_right = (uint8_t *) &tr;
+        }
+
+        if (mb->skip)
+            AV_ZERO128(td->non_zero_count_cache);
+
+        for (y = 0; y < 4; y++) {
+            const uint8_t *topright = ptr + 4 - s->linesize;
+            for (x = 0; x < 4; x++) {
+                int copy = 0;
+                ptrdiff_t linesize = s->linesize;
+                uint8_t *dst = ptr + 4 * x;
+                LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
+
+                if ((y == 0 || x == 3) && mb_y == 0) {
+                    topright = tr_top;
+                } else if (x == 3)
+                    topright = tr_right;
+
+                mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
+                                                        mb_y + y, &copy, is_vp7);
+                if (copy) {
+                    dst      = copy_dst + 12;
+                    linesize = 8;
+                    if (!(mb_y + y)) {
+                        copy_dst[3] = lo;
+                        AV_WN32A(copy_dst + 4, lo * 0x01010101U);
+                    } else {
+                        AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
+                        if (!(mb_x + x)) {
+                            copy_dst[3] = hi;
+                        } else {
+                            copy_dst[3] = ptr[4 * x - s->linesize - 1];
+                        }
+                    }
+                    if (!(mb_x + x)) {
+                        copy_dst[11] =
+                        copy_dst[19] =
+                        copy_dst[27] =
+                        copy_dst[35] = hi;
+                    } else {
+                        copy_dst[11] = ptr[4 * x                   - 1];
+                        copy_dst[19] = ptr[4 * x + s->linesize     - 1];
+                        copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
+                        copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
+                    }
+                }
+                s->hpc.pred4x4[mode](dst, topright, linesize);
+                if (copy) {
+                    AV_COPY32(ptr + 4 * x,                   copy_dst + 12);
+                    AV_COPY32(ptr + 4 * x + s->linesize,     copy_dst + 20);
+                    AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
+                    AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
+                }
+
+                nnz = td->non_zero_count_cache[y][x];
+                if (nnz) {
+                    if (nnz == 1)
+                        s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
+                                                  td->block[y][x], s->linesize);
+                    else
+                        s->vp8dsp.vp8_idct_add(ptr + 4 * x,
+                                               td->block[y][x], s->linesize);
+                }
+                topright += 4;
+            }
+
+            ptr      += 4 * s->linesize;
+            intra4x4 += 4;
+        }
+    }
+
+    mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
+                                            mb_x, mb_y, is_vp7);
+    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
+    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
+
+    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
+        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
+                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
+                       s->filter.simple, 0);
+}
+
+static const uint8_t subpel_idx[3][8] = {
+    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
+                                // also function pointer index
+    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
+    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
+};
+
+/**
+ * luma MC function
+ *
+ * @param s        VP8 decoding context
+ * @param dst      target buffer for block data at block position
+ * @param ref      reference picture buffer at origin (0, 0)
+ * @param mv       motion vector (relative to block position) to get pixel data from
+ * @param x_off    horizontal position of block from origin (0, 0)
+ * @param y_off    vertical position of block from origin (0, 0)
+ * @param block_w  width of block (16, 8 or 4)
+ * @param block_h  height of block (always same as block_w)
+ * @param width    width of src/dst plane data
+ * @param height   height of src/dst plane data
+ * @param linesize size of a single line of plane data, including padding
+ * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
+ */
+static av_always_inline
+void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
+                 const ThreadFrame *ref, const VP8mv *mv,
+                 int x_off, int y_off, int block_w, int block_h,
+                 int width, int height, ptrdiff_t linesize,
+                 vp8_mc_func mc_func[3][3])
+{
+    const uint8_t *src = ref->f->data[0];
+
+    if (AV_RN32A(mv)) {
+        ptrdiff_t src_linesize = linesize;
+
+        int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
+        int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
+
+        x_off += mv->x >> 2;
+        y_off += mv->y >> 2;
+
+        // edge emulation
+        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
+        src += y_off * linesize + x_off;
+        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
+            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
+            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
+                                     src - my_idx * linesize - mx_idx,
+                                     EDGE_EMU_LINESIZE, linesize,
+                                     block_w + subpel_idx[1][mx],
+                                     block_h + subpel_idx[1][my],
+                                     x_off - mx_idx, y_off - my_idx,
+                                     width, height);
+            src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
+            src_linesize = EDGE_EMU_LINESIZE;
+        }
+        mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
+    } else {
+        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
+        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
+                      linesize, block_h, 0, 0);
+    }
+}
+
+/**
+ * chroma MC function
+ *
+ * @param s        VP8 decoding context
+ * @param dst1     target buffer for block data at block position (U plane)
+ * @param dst2     target buffer for block data at block position (V plane)
+ * @param ref      reference picture buffer at origin (0, 0)
+ * @param mv       motion vector (relative to block position) to get pixel data from
+ * @param x_off    horizontal position of block from origin (0, 0)
+ * @param y_off    vertical position of block from origin (0, 0)
+ * @param block_w  width of block (16, 8 or 4)
+ * @param block_h  height of block (always same as block_w)
+ * @param width    width of src/dst plane data
+ * @param height   height of src/dst plane data
+ * @param linesize size of a single line of plane data, including padding
+ * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
+ */
+static av_always_inline
+void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
+                   uint8_t *dst2, const ThreadFrame *ref, const VP8mv *mv,
+                   int x_off, int y_off, int block_w, int block_h,
+                   int width, int height, ptrdiff_t linesize,
+                   vp8_mc_func mc_func[3][3])
+{
+    const uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
+
+    if (AV_RN32A(mv)) {
+        int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
+        int my = mv->y & 7, my_idx = subpel_idx[0][my];
+
+        x_off += mv->x >> 3;
+        y_off += mv->y >> 3;
+
+        // edge emulation
+        src1 += y_off * linesize + x_off;
+        src2 += y_off * linesize + x_off;
+        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
+        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
+            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
+            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
+                                     src1 - my_idx * linesize - mx_idx,
+                                     EDGE_EMU_LINESIZE, linesize,
+                                     block_w + subpel_idx[1][mx],
+                                     block_h + subpel_idx[1][my],
+                                     x_off - mx_idx, y_off - my_idx, width, height);
+            src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
+            mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
+
+            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
+                                     src2 - my_idx * linesize - mx_idx,
+                                     EDGE_EMU_LINESIZE, linesize,
+                                     block_w + subpel_idx[1][mx],
+                                     block_h + subpel_idx[1][my],
+                                     x_off - mx_idx, y_off - my_idx, width, height);
+            src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
+            mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
+        } else {
+            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
+            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
+        }
+    } else {
+        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
+        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
+        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
+    }
+}
+
+static av_always_inline
+void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
+                 const ThreadFrame *ref_frame, int x_off, int y_off,
+                 int bx_off, int by_off, int block_w, int block_h,
+                 int width, int height, const VP8mv *mv)
+{
+    VP8mv uvmv = *mv;
+
+    /* Y */
+    vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
+                ref_frame, mv, x_off + bx_off, y_off + by_off,
+                block_w, block_h, width, height, s->linesize,
+                s->put_pixels_tab[block_w == 8]);
+
+    /* U/V */
+    if (s->profile == 3) {
+        /* this block only applies VP8; it is safe to check
+         * only the profile, as VP7 profile <= 1 */
+        uvmv.x &= ~7;
+        uvmv.y &= ~7;
+    }
+    x_off   >>= 1;
+    y_off   >>= 1;
+    bx_off  >>= 1;
+    by_off  >>= 1;
+    width   >>= 1;
+    height  >>= 1;
+    block_w >>= 1;
+    block_h >>= 1;
+    vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
+                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
+                  &uvmv, x_off + bx_off, y_off + by_off,
+                  block_w, block_h, width, height, s->uvlinesize,
+                  s->put_pixels_tab[1 + (block_w == 4)]);
+}
+
+/* Fetch pixels for estimated mv 4 macroblocks ahead.
+ * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
+static av_always_inline
+void prefetch_motion(const VP8Context *s, const VP8Macroblock *mb,
+                     int mb_x, int mb_y, int mb_xy, int ref)
+{
+    /* Don't prefetch refs that haven't been used very often this frame. */
+    if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
+        int x_off = mb_x << 4, y_off = mb_y << 4;
+        int mx = (mb->mv.x >> 2) + x_off + 8;
+        int my = (mb->mv.y >> 2) + y_off;
+        uint8_t **src = s->framep[ref]->tf.f->data;
+        int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
+        /* For threading, a ff_thread_await_progress here might be useful, but
+         * it actually slows down the decoder. Since a bad prefetch doesn't
+         * generate bad decoder output, we don't run it here. */
+        s->vdsp.prefetch(src[0] + off, s->linesize, 4);
+        off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
+        s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
+    }
+}
+
+/**
+ * Apply motion vectors to prediction buffer, chapter 18.
+ */
+static av_always_inline
+void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
+                   VP8Macroblock *mb, int mb_x, int mb_y)
+{
+    int x_off = mb_x << 4, y_off = mb_y << 4;
+    int width = 16 * s->mb_width, height = 16 * s->mb_height;
+    const ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
+    const VP8mv *bmv = mb->bmv;
+
+    switch (mb->partitioning) {
+    case VP8_SPLITMVMODE_NONE:
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    0, 0, 16, 16, width, height, &mb->mv);
+        break;
+    case VP8_SPLITMVMODE_4x4: {
+        int x, y;
+        VP8mv uvmv;
+
+        /* Y */
+        for (y = 0; y < 4; y++) {
+            for (x = 0; x < 4; x++) {
+                vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
+                            ref, &bmv[4 * y + x],
+                            4 * x + x_off, 4 * y + y_off, 4, 4,
+                            width, height, s->linesize,
+                            s->put_pixels_tab[2]);
+            }
+        }
+
+        /* U/V */
+        x_off  >>= 1;
+        y_off  >>= 1;
+        width  >>= 1;
+        height >>= 1;
+        for (y = 0; y < 2; y++) {
+            for (x = 0; x < 2; x++) {
+                uvmv.x = mb->bmv[2 * y       * 4 + 2 * x    ].x +
+                         mb->bmv[2 * y       * 4 + 2 * x + 1].x +
+                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].x +
+                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
+                uvmv.y = mb->bmv[2 * y       * 4 + 2 * x    ].y +
+                         mb->bmv[2 * y       * 4 + 2 * x + 1].y +
+                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].y +
+                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
+                uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
+                uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
+                if (s->profile == 3) {
+                    uvmv.x &= ~7;
+                    uvmv.y &= ~7;
+                }
+                vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
+                              dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
+                              &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
+                              width, height, s->uvlinesize,
+                              s->put_pixels_tab[2]);
+            }
+        }
+        break;
+    }
+    case VP8_SPLITMVMODE_16x8:
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    0, 0, 16, 8, width, height, &bmv[0]);
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    0, 8, 16, 8, width, height, &bmv[1]);
+        break;
+    case VP8_SPLITMVMODE_8x16:
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    0, 0, 8, 16, width, height, &bmv[0]);
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    8, 0, 8, 16, width, height, &bmv[1]);
+        break;
+    case VP8_SPLITMVMODE_8x8:
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    0, 0, 8, 8, width, height, &bmv[0]);
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    8, 0, 8, 8, width, height, &bmv[1]);
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    0, 8, 8, 8, width, height, &bmv[2]);
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    8, 8, 8, 8, width, height, &bmv[3]);
+        break;
+    }
+}
+
+static av_always_inline
+void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
+             const VP8Macroblock *mb)
+{
+    int x, y, ch;
+
+    if (mb->mode != MODE_I4x4) {
+        uint8_t *y_dst = dst[0];
+        for (y = 0; y < 4; y++) {
+            uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
+            if (nnz4) {
+                if (nnz4 & ~0x01010101) {
+                    for (x = 0; x < 4; x++) {
+                        if ((uint8_t) nnz4 == 1)
+                            s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
+                                                      td->block[y][x],
+                                                      s->linesize);
+                        else if ((uint8_t) nnz4 > 1)
+                            s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
+                                                   td->block[y][x],
+                                                   s->linesize);
+                        nnz4 >>= 8;
+                        if (!nnz4)
+                            break;
+                    }
+                } else {
+                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
+                }
+            }
+            y_dst += 4 * s->linesize;
+        }
+    }
+
+    for (ch = 0; ch < 2; ch++) {
+        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
+        if (nnz4) {
+            uint8_t *ch_dst = dst[1 + ch];
+            if (nnz4 & ~0x01010101) {
+                for (y = 0; y < 2; y++) {
+                    for (x = 0; x < 2; x++) {
+                        if ((uint8_t) nnz4 == 1)
+                            s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
+                                                      td->block[4 + ch][(y << 1) + x],
+                                                      s->uvlinesize);
+                        else if ((uint8_t) nnz4 > 1)
+                            s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
+                                                   td->block[4 + ch][(y << 1) + x],
+                                                   s->uvlinesize);
+                        nnz4 >>= 8;
+                        if (!nnz4)
+                            goto chroma_idct_end;
+                    }
+                    ch_dst += 4 * s->uvlinesize;
+                }
+            } else {
+                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
+            }
+        }
+chroma_idct_end:
+        ;
+    }
+}
+
+static av_always_inline
+void filter_level_for_mb(const VP8Context *s, const VP8Macroblock *mb,
+                         VP8FilterStrength *f, int is_vp7)
+{
+    int interior_limit, filter_level;
+
+    if (s->segmentation.enabled) {
+        filter_level = s->segmentation.filter_level[mb->segment];
+        if (!s->segmentation.absolute_vals)
+            filter_level += s->filter.level;
+    } else
+        filter_level = s->filter.level;
+
+    if (s->lf_delta.enabled) {
+        filter_level += s->lf_delta.ref[mb->ref_frame];
+        filter_level += s->lf_delta.mode[mb->mode];
+    }
+
+    filter_level = av_clip_uintp2(filter_level, 6);
+
+    interior_limit = filter_level;
+    if (s->filter.sharpness) {
+        interior_limit >>= (s->filter.sharpness + 3) >> 2;
+        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
+    }
+    interior_limit = FFMAX(interior_limit, 1);
+
+    f->filter_level = filter_level;
+    f->inner_limit = interior_limit;
+    f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
+                      mb->mode == VP8_MVMODE_SPLIT;
+}
+
+static av_always_inline
+void filter_mb(const VP8Context *s, uint8_t *const dst[3], const VP8FilterStrength *f,
+               int mb_x, int mb_y, int is_vp7)
+{
+    int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
+    int filter_level = f->filter_level;
+    int inner_limit = f->inner_limit;
+    int inner_filter = f->inner_filter;
+    ptrdiff_t linesize   = s->linesize;
+    ptrdiff_t uvlinesize = s->uvlinesize;
+    static const uint8_t hev_thresh_lut[2][64] = {
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          3, 3, 3, 3 },
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+          2, 2, 2, 2 }
+    };
+
+    if (!filter_level)
+        return;
+
+    if (is_vp7) {
+        bedge_lim_y  = filter_level;
+        bedge_lim_uv = filter_level * 2;
+        mbedge_lim   = filter_level + 2;
+    } else {
+        bedge_lim_y  =
+        bedge_lim_uv = filter_level * 2 + inner_limit;
+        mbedge_lim   = bedge_lim_y + 4;
+    }
+
+    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
+
+    if (mb_x) {
+        s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
+                                       mbedge_lim, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
+                                       mbedge_lim, inner_limit, hev_thresh);
+    }
+
+#define H_LOOP_FILTER_16Y_INNER(cond)                                         \
+    if (cond && inner_filter) {                                               \
+        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  4, linesize,           \
+                                             bedge_lim_y, inner_limit,        \
+                                             hev_thresh);                     \
+        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  8, linesize,           \
+                                             bedge_lim_y, inner_limit,        \
+                                             hev_thresh);                     \
+        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize,           \
+                                             bedge_lim_y, inner_limit,        \
+                                             hev_thresh);                     \
+        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] +  4, dst[2] + 4,         \
+                                             uvlinesize,  bedge_lim_uv,       \
+                                             inner_limit, hev_thresh);        \
+    }
+
+    H_LOOP_FILTER_16Y_INNER(!is_vp7)
+
+    if (mb_y) {
+        s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
+                                       mbedge_lim, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
+                                       mbedge_lim, inner_limit, hev_thresh);
+    }
+
+    if (inner_filter) {
+        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  4 * linesize,
+                                             linesize, bedge_lim_y,
+                                             inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  8 * linesize,
+                                             linesize, bedge_lim_y,
+                                             inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
+                                             linesize, bedge_lim_y,
+                                             inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] +  4 * uvlinesize,
+                                             dst[2] +  4 * uvlinesize,
+                                             uvlinesize, bedge_lim_uv,
+                                             inner_limit, hev_thresh);
+    }
+
+    H_LOOP_FILTER_16Y_INNER(is_vp7)
+}
+
+static av_always_inline
+void filter_mb_simple(const VP8Context *s, uint8_t *dst, const VP8FilterStrength *f,
+                      int mb_x, int mb_y)
+{
+    int mbedge_lim, bedge_lim;
+    int filter_level = f->filter_level;
+    int inner_limit  = f->inner_limit;
+    int inner_filter = f->inner_filter;
+    ptrdiff_t linesize = s->linesize;
+
+    if (!filter_level)
+        return;
+
+    bedge_lim  = 2 * filter_level + inner_limit;
+    mbedge_lim = bedge_lim + 4;
+
+    if (mb_x)
+        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
+    if (inner_filter) {
+        s->vp8dsp.vp8_h_loop_filter_simple(dst +  4, linesize, bedge_lim);
+        s->vp8dsp.vp8_h_loop_filter_simple(dst +  8, linesize, bedge_lim);
+        s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
+    }
+
+    if (mb_y)
+        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
+    if (inner_filter) {
+        s->vp8dsp.vp8_v_loop_filter_simple(dst +  4 * linesize, linesize, bedge_lim);
+        s->vp8dsp.vp8_v_loop_filter_simple(dst +  8 * linesize, linesize, bedge_lim);
+        s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
+    }
+}
+
+#define MARGIN (16 << 2)
+static av_always_inline
+int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
+                            const VP8Frame *prev_frame, int is_vp7)
+{
+    VP8Context *s = avctx->priv_data;
+    int mb_x, mb_y;
+
+    s->mv_bounds.mv_min.y = -MARGIN;
+    s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
+    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+        VP8Macroblock *mb = s->macroblocks_base +
+                            ((s->mb_width + 1) * (mb_y + 1) + 1);
+        int mb_xy = mb_y * s->mb_width;
+
+        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
+
+        s->mv_bounds.mv_min.x = -MARGIN;
+        s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
+
+        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
+            if (vpx_rac_is_end(&s->c)) {
+                return AVERROR_INVALIDDATA;
+            }
+            if (mb_y == 0)
+                AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
+                         DC_PRED * 0x01010101);
+            decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
+                           prev_frame && prev_frame->seg_map ?
+                           prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
+            s->mv_bounds.mv_min.x -= 64;
+            s->mv_bounds.mv_max.x -= 64;
+        }
+        s->mv_bounds.mv_min.y -= 64;
+        s->mv_bounds.mv_max.y -= 64;
+    }
+    return 0;
+}
+
+static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
+                                  const VP8Frame *prev_frame)
+{
+    return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
+}
+
+static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
+                                  const VP8Frame *prev_frame)
+{
+    return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
+}
+
+#if HAVE_THREADS
+#define check_thread_pos(td, otd, mb_x_check, mb_y_check)                     \
+    do {                                                                      \
+        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);                 \
+        if (atomic_load(&otd->thread_mb_pos) < tmp) {                         \
+            pthread_mutex_lock(&otd->lock);                                   \
+            atomic_store(&td->wait_mb_pos, tmp);                              \
+            do {                                                              \
+                if (atomic_load(&otd->thread_mb_pos) >= tmp)                  \
+                    break;                                                    \
+                pthread_cond_wait(&otd->cond, &otd->lock);                    \
+            } while (1);                                                      \
+            atomic_store(&td->wait_mb_pos, INT_MAX);                          \
+            pthread_mutex_unlock(&otd->lock);                                 \
+        }                                                                     \
+    } while (0)
+
+#define update_pos(td, mb_y, mb_x)                                            \
+    do {                                                                      \
+        int pos              = (mb_y << 16) | (mb_x & 0xFFFF);                \
+        int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
+                               (num_jobs > 1);                                \
+        int is_null          = !next_td || !prev_td;                          \
+        int pos_check        = (is_null) ? 1 :                                \
+            (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) ||   \
+            (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos));     \
+        atomic_store(&td->thread_mb_pos, pos);                                \
+        if (sliced_threading && pos_check) {                                  \
+            pthread_mutex_lock(&td->lock);                                    \
+            pthread_cond_broadcast(&td->cond);                                \
+            pthread_mutex_unlock(&td->lock);                                  \
+        }                                                                     \
+    } while (0)
+#else
+#define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
+#define update_pos(td, mb_y, mb_x) while(0)
+#endif
+
+static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
+                                        int jobnr, int threadnr, int is_vp7)
+{
+    VP8Context *s = avctx->priv_data;
+    VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
+    int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
+    int mb_x, mb_xy = mb_y * s->mb_width;
+    int num_jobs = s->num_jobs;
+    const VP8Frame *prev_frame = s->prev_frame;
+    VP8Frame *curframe = s->curframe;
+    VPXRangeCoder *coeff_c  = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
+
+    VP8Macroblock *mb;
+    uint8_t *dst[3] = {
+        curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
+        curframe->tf.f->data[1] +  8 * mb_y * s->uvlinesize,
+        curframe->tf.f->data[2] +  8 * mb_y * s->uvlinesize
+    };
+
+    if (vpx_rac_is_end(&s->c))
+         return AVERROR_INVALIDDATA;
+
+    if (mb_y == 0)
+        prev_td = td;
+    else
+        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
+    if (mb_y == s->mb_height - 1)
+        next_td = td;
+    else
+        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
+    if (s->mb_layout == 1)
+        mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
+    else {
+        // Make sure the previous frame has read its segmentation map,
+        // if we re-use the same map.
+        if (prev_frame && s->segmentation.enabled &&
+            !s->segmentation.update_map)
+            ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
+        mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
+        memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
+        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
+    }
+
+    if (!is_vp7 || mb_y == 0)
+        memset(td->left_nnz, 0, sizeof(td->left_nnz));
+
+    td->mv_bounds.mv_min.x = -MARGIN;
+    td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
+
+    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
+        if (vpx_rac_is_end(&s->c))
+            return AVERROR_INVALIDDATA;
+        // Wait for previous thread to read mb_x+2, and reach mb_y-1.
+        if (prev_td != td) {
+            if (threadnr != 0) {
+                check_thread_pos(td, prev_td,
+                                 mb_x + (is_vp7 ? 2 : 1),
+                                 mb_y - (is_vp7 ? 2 : 1));
+            } else {
+                check_thread_pos(td, prev_td,
+                                 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
+                                 mb_y - (is_vp7 ? 2 : 1));
+            }
+        }
+
+        s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
+                         s->linesize, 4);
+        s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
+                         dst[2] - dst[1], 2);
+
+        if (!s->mb_layout)
+            decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
+                           prev_frame && prev_frame->seg_map ?
+                           prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
+
+        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_PREVIOUS);
+
+        if (!mb->skip) {
+            if (vpx_rac_is_end(coeff_c))
+                return AVERROR_INVALIDDATA;
+            decode_mb_coeffs(s, td, coeff_c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
+        }
+
+        if (mb->mode <= MODE_I4x4)
+            intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
+        else
+            inter_predict(s, td, dst, mb, mb_x, mb_y);
+
+        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_GOLDEN);
+
+        if (!mb->skip) {
+            idct_mb(s, td, dst, mb);
+        } else {
+            AV_ZERO64(td->left_nnz);
+            AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
+
+            /* Reset DC block predictors if they would exist
+             * if the mb had coefficients */
+            if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
+                td->left_nnz[8]     = 0;
+                s->top_nnz[mb_x][8] = 0;
+            }
+        }
+
+        if (s->deblock_filter)
+            filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
+
+        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
+            if (s->filter.simple)
+                backup_mb_border(s->top_border[mb_x + 1], dst[0],
+                                 NULL, NULL, s->linesize, 0, 1);
+            else
+                backup_mb_border(s->top_border[mb_x + 1], dst[0],
+                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
+        }
+
+        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_ALTREF);
+
+        dst[0]      += 16;
+        dst[1]      += 8;
+        dst[2]      += 8;
+        td->mv_bounds.mv_min.x -= 64;
+        td->mv_bounds.mv_max.x -= 64;
+
+        if (mb_x == s->mb_width + 1) {
+            update_pos(td, mb_y, s->mb_width + 3);
+        } else {
+            update_pos(td, mb_y, mb_x);
+        }
+    }
+    return 0;
+}
+
+static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
+                                        int jobnr, int threadnr)
+{
+    return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
+}
+
+static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
+                                        int jobnr, int threadnr)
+{
+    return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
+}
+
+static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
+                              int jobnr, int threadnr, int is_vp7)
+{
+    VP8Context *s = avctx->priv_data;
+    VP8ThreadData *td = &s->thread_data[threadnr];
+    int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
+    AVFrame *curframe = s->curframe->tf.f;
+    VP8Macroblock *mb;
+    VP8ThreadData *prev_td, *next_td;
+    uint8_t *dst[3] = {
+        curframe->data[0] + 16 * mb_y * s->linesize,
+        curframe->data[1] +  8 * mb_y * s->uvlinesize,
+        curframe->data[2] +  8 * mb_y * s->uvlinesize
+    };
+
+    if (s->mb_layout == 1)
+        mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
+    else
+        mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
+
+    if (mb_y == 0)
+        prev_td = td;
+    else
+        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
+    if (mb_y == s->mb_height - 1)
+        next_td = td;
+    else
+        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
+
+    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
+        const VP8FilterStrength *f = &td->filter_strength[mb_x];
+        if (prev_td != td)
+            check_thread_pos(td, prev_td,
+                             (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
+        if (next_td != td)
+            if (next_td != &s->thread_data[0])
+                check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
+
+        if (num_jobs == 1) {
+            if (s->filter.simple)
+                backup_mb_border(s->top_border[mb_x + 1], dst[0],
+                                 NULL, NULL, s->linesize, 0, 1);
+            else
+                backup_mb_border(s->top_border[mb_x + 1], dst[0],
+                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
+        }
+
+        if (s->filter.simple)
+            filter_mb_simple(s, dst[0], f, mb_x, mb_y);
+        else
+            filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
+        dst[0] += 16;
+        dst[1] += 8;
+        dst[2] += 8;
+
+        update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
+    }
+}
+
+static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
+                              int jobnr, int threadnr)
+{
+    filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
+}
+
+static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
+                              int jobnr, int threadnr)
+{
+    filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
+}
+
+static av_always_inline
+int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
+                              int threadnr, int is_vp7)
+{
+    const VP8Context *s = avctx->priv_data;
+    VP8ThreadData *td = &s->thread_data[jobnr];
+    VP8ThreadData *next_td = NULL, *prev_td = NULL;
+    VP8Frame *curframe = s->curframe;
+    int mb_y, num_jobs = s->num_jobs;
+    int ret;
+
+    td->thread_nr = threadnr;
+    td->mv_bounds.mv_min.y   = -MARGIN - 64 * threadnr;
+    td->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
+    for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
+        atomic_store(&td->thread_mb_pos, mb_y << 16);
+        ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
+        if (ret < 0) {
+            update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
+            return ret;
+        }
+        if (s->deblock_filter)
+            s->filter_mb_row(avctx, tdata, jobnr, threadnr);
+        update_pos(td, mb_y, INT_MAX & 0xFFFF);
+
+        td->mv_bounds.mv_min.y -= 64 * num_jobs;
+        td->mv_bounds.mv_max.y -= 64 * num_jobs;
+
+        if (avctx->active_thread_type == FF_THREAD_FRAME)
+            ff_thread_report_progress(&curframe->tf, mb_y, 0);
+    }
+
+    return 0;
+}
+
+static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
+                                    int jobnr, int threadnr)
+{
+    return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
+}
+
+static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
+                                    int jobnr, int threadnr)
+{
+    return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
+}
+
+static av_always_inline
+int vp78_decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame,
+                      const AVPacket *avpkt, int is_vp7)
+{
+    VP8Context *s = avctx->priv_data;
+    int ret, i, referenced, num_jobs;
+    enum AVDiscard skip_thresh;
+    VP8Frame *av_uninit(curframe), *prev_frame;
+
+    if (is_vp7)
+        ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
+    else
+        ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
+
+    if (ret < 0)
+        goto err;
+
+    if (s->actually_webp) {
+        // avctx->pix_fmt already set in caller.
+    } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
+        s->pix_fmt = get_pixel_format(s);
+        if (s->pix_fmt < 0) {
+            ret = AVERROR(EINVAL);
+            goto err;
+        }
+        avctx->pix_fmt = s->pix_fmt;
+    }
+
+    prev_frame = s->framep[VP8_FRAME_CURRENT];
+
+    referenced = s->update_last || s->update_golden == VP8_FRAME_CURRENT ||
+                 s->update_altref == VP8_FRAME_CURRENT;
+
+    skip_thresh = !referenced ? AVDISCARD_NONREF
+                              : !s->keyframe ? AVDISCARD_NONKEY
+                                             : AVDISCARD_ALL;
+
+    if (avctx->skip_frame >= skip_thresh) {
+        s->invisible = 1;
+        memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
+        goto skip_decode;
+    }
+    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
+
+    // release no longer referenced frames
+    for (i = 0; i < 5; i++)
+        if (s->frames[i].tf.f->buf[0] &&
+            &s->frames[i] != prev_frame &&
+            &s->frames[i] != s->framep[VP8_FRAME_PREVIOUS] &&
+            &s->frames[i] != s->framep[VP8_FRAME_GOLDEN]   &&
+            &s->frames[i] != s->framep[VP8_FRAME_ALTREF])
+            vp8_release_frame(s, &s->frames[i]);
+
+    curframe = s->framep[VP8_FRAME_CURRENT] = vp8_find_free_buffer(s);
+
+    if (!s->colorspace)
+        avctx->colorspace = AVCOL_SPC_BT470BG;
+    if (s->fullrange)
+        avctx->color_range = AVCOL_RANGE_JPEG;
+    else
+        avctx->color_range = AVCOL_RANGE_MPEG;
+
+    /* Given that arithmetic probabilities are updated every frame, it's quite
+     * likely that the values we have on a random interframe are complete
+     * junk if we didn't start decode on a keyframe. So just don't display
+     * anything rather than junk. */
+    if (!s->keyframe && (!s->framep[VP8_FRAME_PREVIOUS] ||
+                         !s->framep[VP8_FRAME_GOLDEN]   ||
+                         !s->framep[VP8_FRAME_ALTREF])) {
+        av_log(avctx, AV_LOG_WARNING,
+               "Discarding interframe without a prior keyframe!\n");
+        ret = AVERROR_INVALIDDATA;
+        goto err;
+    }
+
+    curframe->tf.f->key_frame = s->keyframe;
+    curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
+                                            : AV_PICTURE_TYPE_P;
+    if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
+        goto err;
+
+    // check if golden and altref are swapped
+    if (s->update_altref != VP8_FRAME_NONE)
+        s->next_framep[VP8_FRAME_ALTREF] = s->framep[s->update_altref];
+    else
+        s->next_framep[VP8_FRAME_ALTREF] = s->framep[VP8_FRAME_ALTREF];
+
+    if (s->update_golden != VP8_FRAME_NONE)
+        s->next_framep[VP8_FRAME_GOLDEN] = s->framep[s->update_golden];
+    else
+        s->next_framep[VP8_FRAME_GOLDEN] = s->framep[VP8_FRAME_GOLDEN];
+
+    if (s->update_last)
+        s->next_framep[VP8_FRAME_PREVIOUS] = curframe;
+    else
+        s->next_framep[VP8_FRAME_PREVIOUS] = s->framep[VP8_FRAME_PREVIOUS];
+
+    s->next_framep[VP8_FRAME_CURRENT] = curframe;
+
+    if (ffcodec(avctx->codec)->update_thread_context)
+        ff_thread_finish_setup(avctx);
+
+    if (avctx->hwaccel) {
+        ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
+        if (ret < 0)
+            goto err;
+
+        ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
+        if (ret < 0)
+            goto err;
+
+        ret = avctx->hwaccel->end_frame(avctx);
+        if (ret < 0)
+            goto err;
+
+    } else {
+        s->linesize   = curframe->tf.f->linesize[0];
+        s->uvlinesize = curframe->tf.f->linesize[1];
+
+        memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
+        /* Zero macroblock structures for top/top-left prediction
+         * from outside the frame. */
+        if (!s->mb_layout)
+            memset(s->macroblocks + s->mb_height * 2 - 1, 0,
+                   (s->mb_width + 1) * sizeof(*s->macroblocks));
+        if (!s->mb_layout && s->keyframe)
+            memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
+
+        memset(s->ref_count, 0, sizeof(s->ref_count));
+
+        if (s->mb_layout == 1) {
+            // Make sure the previous frame has read its segmentation map,
+            // if we re-use the same map.
+            if (prev_frame && s->segmentation.enabled &&
+                !s->segmentation.update_map)
+                ff_thread_await_progress(&prev_frame->tf, 1, 0);
+            if (is_vp7)
+                ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
+            else
+                ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
+            if (ret < 0)
+                goto err;
+        }
+
+        if (avctx->active_thread_type == FF_THREAD_FRAME)
+            num_jobs = 1;
+        else
+            num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
+        s->num_jobs   = num_jobs;
+        s->curframe   = curframe;
+        s->prev_frame = prev_frame;
+        s->mv_bounds.mv_min.y   = -MARGIN;
+        s->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
+        for (i = 0; i < MAX_THREADS; i++) {
+            VP8ThreadData *td = &s->thread_data[i];
+            atomic_init(&td->thread_mb_pos, 0);
+            atomic_init(&td->wait_mb_pos, INT_MAX);
+        }
+        if (is_vp7)
+            avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
+                            num_jobs);
+        else
+            avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
+                            num_jobs);
+    }
+
+    ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
+    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
+
+skip_decode:
+    // if future frames don't use the updated probabilities,
+    // reset them to the values we saved
+    if (!s->update_probabilities)
+        s->prob[0] = s->prob[1];
+
+    if (!s->invisible) {
+        if ((ret = av_frame_ref(rframe, curframe->tf.f)) < 0)
+            return ret;
+        *got_frame = 1;
+    }
+
+    return avpkt->size;
+err:
+    memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
+    return ret;
+}
+
+int ff_vp8_decode_frame(AVCodecContext *avctx, AVFrame *frame,
+                        int *got_frame, AVPacket *avpkt)
+{
+    return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP8);
+}
+
+#if CONFIG_VP7_DECODER
+static int vp7_decode_frame(AVCodecContext *avctx, AVFrame *frame,
+                            int *got_frame, AVPacket *avpkt)
+{
+    return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP7);
+}
+#endif /* CONFIG_VP7_DECODER */
+
+av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
+{
+    VP8Context *s = avctx->priv_data;
+    int i;
+
+    vp8_decode_flush_impl(avctx, 1);
+    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
+        av_frame_free(&s->frames[i].tf.f);
+
+    return 0;
+}
+
+static av_cold int vp8_init_frames(VP8Context *s)
+{
+    int i;
+    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
+        s->frames[i].tf.f = av_frame_alloc();
+        if (!s->frames[i].tf.f)
+            return AVERROR(ENOMEM);
+    }
+    return 0;
+}
+
+static av_always_inline
+int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
+{
+    VP8Context *s = avctx->priv_data;
+    int ret;
+
+    s->avctx = avctx;
+    s->vp7   = avctx->codec->id == AV_CODEC_ID_VP7;
+    s->pix_fmt = AV_PIX_FMT_NONE;
+    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+
+    ff_videodsp_init(&s->vdsp, 8);
+
+    ff_vp78dsp_init(&s->vp8dsp);
+    if (CONFIG_VP7_DECODER && is_vp7) {
+        ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
+        ff_vp7dsp_init(&s->vp8dsp);
+        s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
+        s->filter_mb_row           = vp7_filter_mb_row;
+    } else if (CONFIG_VP8_DECODER && !is_vp7) {
+        ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
+        ff_vp8dsp_init(&s->vp8dsp);
+        s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
+        s->filter_mb_row           = vp8_filter_mb_row;
+    }
+
+    /* does not change for VP8 */
+    memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
+
+    if ((ret = vp8_init_frames(s)) < 0) {
+        ff_vp8_decode_free(avctx);
+        return ret;
+    }
+
+    return 0;
+}
+
+#if CONFIG_VP7_DECODER
+static int vp7_decode_init(AVCodecContext *avctx)
+{
+    return vp78_decode_init(avctx, IS_VP7);
+}
+#endif /* CONFIG_VP7_DECODER */
+
+av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
+{
+    return vp78_decode_init(avctx, IS_VP8);
+}
+
+#if CONFIG_VP8_DECODER
+#if HAVE_THREADS
+#define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
+
+static int vp8_decode_update_thread_context(AVCodecContext *dst,
+                                            const AVCodecContext *src)
+{
+    VP8Context *s = dst->priv_data, *s_src = src->priv_data;
+    int i;
+
+    if (s->macroblocks_base &&
+        (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
+        free_buffers(s);
+        s->mb_width  = s_src->mb_width;
+        s->mb_height = s_src->mb_height;
+    }
+
+    s->pix_fmt      = s_src->pix_fmt;
+    s->prob[0]      = s_src->prob[!s_src->update_probabilities];
+    s->segmentation = s_src->segmentation;
+    s->lf_delta     = s_src->lf_delta;
+    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
+        if (s_src->frames[i].tf.f->buf[0]) {
+            int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    s->framep[0] = REBASE(s_src->next_framep[0]);
+    s->framep[1] = REBASE(s_src->next_framep[1]);
+    s->framep[2] = REBASE(s_src->next_framep[2]);
+    s->framep[3] = REBASE(s_src->next_framep[3]);
+
+    return 0;
+}
+#endif /* HAVE_THREADS */
+#endif /* CONFIG_VP8_DECODER */
+
+#if CONFIG_VP7_DECODER
+const FFCodec ff_vp7_decoder = {
+    .p.name                = "vp7",
+    CODEC_LONG_NAME("On2 VP7"),
+    .p.type                = AVMEDIA_TYPE_VIDEO,
+    .p.id                  = AV_CODEC_ID_VP7,
+    .priv_data_size        = sizeof(VP8Context),
+    .init                  = vp7_decode_init,
+    .close                 = ff_vp8_decode_free,
+    FF_CODEC_DECODE_CB(vp7_decode_frame),
+    .p.capabilities        = AV_CODEC_CAP_DR1,
+    .flush                 = vp8_decode_flush,
+};
+#endif /* CONFIG_VP7_DECODER */
+
+#if CONFIG_VP8_DECODER
+const FFCodec ff_vp8_decoder = {
+    .p.name                = "vp8",
+    CODEC_LONG_NAME("On2 VP8"),
+    .p.type                = AVMEDIA_TYPE_VIDEO,
+    .p.id                  = AV_CODEC_ID_VP8,
+    .priv_data_size        = sizeof(VP8Context),
+    .init                  = ff_vp8_decode_init,
+    .close                 = ff_vp8_decode_free,
+    FF_CODEC_DECODE_CB(ff_vp8_decode_frame),
+    .p.capabilities        = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
+                             AV_CODEC_CAP_SLICE_THREADS,
+    .caps_internal         = FF_CODEC_CAP_ALLOCATE_PROGRESS,
+    .flush                 = vp8_decode_flush,
+    UPDATE_THREAD_CONTEXT(vp8_decode_update_thread_context),
+    .hw_configs            = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_VP8_VAAPI_HWACCEL
+                               HWACCEL_VAAPI(vp8),
+#endif
+#if CONFIG_VP8_NVDEC_HWACCEL
+                               HWACCEL_NVDEC(vp8),
+#endif
+                               NULL
+                           },
+};
+#endif /* CONFIG_VP7_DECODER */
diff --git a/media/ffvpx/libavcodec/vp8.h b/media/ffvpx/libavcodec/vp8.h
new file mode 100644
index 0000000000..6f29156b53
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp8.h
@@ -0,0 +1,361 @@
+/*
+ * VP8 compatible video decoder
+ *
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ * Copyright (C) 2010 Fiona Glaser
+ * Copyright (C) 2012 Daniel Kang
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP8_H
+#define AVCODEC_VP8_H
+
+#include <stdatomic.h>
+
+#include "libavutil/buffer.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/thread.h"
+
+#include "h264pred.h"
+#include "threadframe.h"
+#include "videodsp.h"
+#include "vp8dsp.h"
+#include "vpx_rac.h"
+
+#define VP8_MAX_QUANT 127
+
+typedef enum {
+    VP8_FRAME_NONE     = -1,
+    VP8_FRAME_CURRENT  =  0,
+    VP8_FRAME_PREVIOUS =  1,
+    VP8_FRAME_GOLDEN   =  2,
+    VP8_FRAME_ALTREF   =  3,
+} VP8FrameType;
+
+enum dct_token {
+    DCT_0,
+    DCT_1,
+    DCT_2,
+    DCT_3,
+    DCT_4,
+    DCT_CAT1,
+    DCT_CAT2,
+    DCT_CAT3,
+    DCT_CAT4,
+    DCT_CAT5,
+    DCT_CAT6,
+    DCT_EOB,
+
+    NUM_DCT_TOKENS
+};
+
+// used to signal 4x4 intra pred in luma MBs
+#define MODE_I4x4 4
+
+enum inter_mvmode {
+    VP8_MVMODE_ZERO = MODE_I4x4 + 1,
+    VP8_MVMODE_MV,
+    VP8_MVMODE_SPLIT
+};
+
+enum inter_splitmvmode {
+    VP8_SPLITMVMODE_16x8 = 0,    ///< 2 16x8 blocks (vertical)
+    VP8_SPLITMVMODE_8x16,        ///< 2 8x16 blocks (horizontal)
+    VP8_SPLITMVMODE_8x8,         ///< 2x2 blocks of 8x8px each
+    VP8_SPLITMVMODE_4x4,         ///< 4x4 blocks of 4x4px each
+    VP8_SPLITMVMODE_NONE,        ///< (only used in prediction) no split MVs
+};
+
+typedef struct VP8mv {
+    DECLARE_ALIGNED(4, int16_t, x);
+    int16_t y;
+} VP8mv;
+
+typedef struct VP8FilterStrength {
+    uint8_t filter_level;
+    uint8_t inner_limit;
+    uint8_t inner_filter;
+} VP8FilterStrength;
+
+typedef struct VP8Macroblock {
+    uint8_t skip;
+    // TODO: make it possible to check for at least (i4x4 or split_mv)
+    // in one op. are others needed?
+    uint8_t mode;
+    uint8_t ref_frame;
+    uint8_t partitioning;
+    uint8_t chroma_pred_mode;
+    uint8_t segment;
+    uint8_t intra4x4_pred_mode_mb[16];
+    DECLARE_ALIGNED(4, uint8_t, intra4x4_pred_mode_top)[4];
+    VP8mv mv;
+    VP8mv bmv[16];
+} VP8Macroblock;
+
+typedef struct VP8intmv {
+    int x;
+    int y;
+} VP8intmv;
+
+typedef struct VP8mvbounds {
+    VP8intmv mv_min;
+    VP8intmv mv_max;
+} VP8mvbounds;
+
+typedef struct VP8ThreadData {
+    DECLARE_ALIGNED(16, int16_t, block)[6][4][16];
+    DECLARE_ALIGNED(16, int16_t, block_dc)[16];
+    /**
+     * This is the index plus one of the last non-zero coeff
+     * for each of the blocks in the current macroblock.
+     * So, 0 -> no coeffs
+     *     1 -> dc-only (special transform)
+     *     2+-> full transform
+     */
+    DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
+    /**
+     * For coeff decode, we need to know whether the above block had non-zero
+     * coefficients. This means for each macroblock, we need data for 4 luma
+     * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
+     * per macroblock. We keep the last row in top_nnz.
+     */
+    DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
+    int thread_nr;
+#if HAVE_THREADS
+    pthread_mutex_t lock;
+    pthread_cond_t cond;
+#endif
+    atomic_int thread_mb_pos; // (mb_y << 16) | (mb_x & 0xFFFF)
+    atomic_int wait_mb_pos; // What the current thread is waiting on.
+
+#define EDGE_EMU_LINESIZE 32
+    DECLARE_ALIGNED(16, uint8_t, edge_emu_buffer)[21 * EDGE_EMU_LINESIZE];
+    VP8FilterStrength *filter_strength;
+    VP8mvbounds mv_bounds;
+} VP8ThreadData;
+
+typedef struct VP8Frame {
+    ThreadFrame tf;
+    AVBufferRef *seg_map;
+
+    AVBufferRef *hwaccel_priv_buf;
+    void *hwaccel_picture_private;
+} VP8Frame;
+
+#define MAX_THREADS 8
+typedef struct VP8Context {
+    VP8ThreadData *thread_data;
+    AVCodecContext *avctx;
+    enum AVPixelFormat pix_fmt;
+    int actually_webp;
+
+    VP8Frame *framep[4];
+    VP8Frame *next_framep[4];
+    VP8Frame *curframe;
+    VP8Frame *prev_frame;
+
+    uint16_t mb_width;   /* number of horizontal MB */
+    uint16_t mb_height;  /* number of vertical MB */
+    ptrdiff_t linesize;
+    ptrdiff_t uvlinesize;
+
+    uint8_t keyframe;
+    uint8_t deblock_filter;
+    uint8_t mbskip_enabled;
+    uint8_t profile;
+    VP8mvbounds mv_bounds;
+
+    int8_t sign_bias[4]; ///< one state [0, 1] per ref frame type
+    int ref_count[3];
+
+    /**
+     * Base parameters for segmentation, i.e. per-macroblock parameters.
+     * These must be kept unchanged even if segmentation is not used for
+     * a frame, since the values persist between interframes.
+     */
+    struct {
+        uint8_t enabled;
+        uint8_t absolute_vals;
+        uint8_t update_map;
+        uint8_t update_feature_data;
+        int8_t base_quant[4];
+        int8_t filter_level[4];     ///< base loop filter level
+    } segmentation;
+
+    struct {
+        uint8_t simple;
+        uint8_t level;
+        uint8_t sharpness;
+    } filter;
+
+    VP8Macroblock *macroblocks;
+
+    uint8_t *intra4x4_pred_mode_top;
+    uint8_t intra4x4_pred_mode_left[4];
+
+    /**
+     * Macroblocks can have one of 4 different quants in a frame when
+     * segmentation is enabled.
+     * If segmentation is disabled, only the first segment's values are used.
+     */
+    struct {
+        // [0] - DC qmul  [1] - AC qmul
+        int16_t luma_qmul[2];
+        int16_t luma_dc_qmul[2];    ///< luma dc-only block quant
+        int16_t chroma_qmul[2];
+    } qmat[4];
+
+    // Raw quantisation values, which may be needed by hwaccel decode.
+    struct {
+        int yac_qi;
+        int ydc_delta;
+        int y2dc_delta;
+        int y2ac_delta;
+        int uvdc_delta;
+        int uvac_delta;
+    } quant;
+
+    struct {
+        uint8_t enabled;    ///< whether each mb can have a different strength based on mode/ref
+        uint8_t update;
+
+        /**
+         * filter strength adjustment for the following macroblock modes:
+         * [0-3] - i16x16 (always zero)
+         * [4]   - i4x4
+         * [5]   - zero mv
+         * [6]   - inter modes except for zero or split mv
+         * [7]   - split mv
+         *  i16x16 modes never have any adjustment
+         */
+        int8_t mode[VP8_MVMODE_SPLIT + 1];
+
+        /**
+         * filter strength adjustment for macroblocks that reference:
+         * [0] - intra / VP8_FRAME_CURRENT
+         * [1] - VP8_FRAME_PREVIOUS
+         * [2] - VP8_FRAME_GOLDEN
+         * [3] - altref / VP8_FRAME_ALTREF
+         */
+        int8_t ref[4];
+    } lf_delta;
+
+    uint8_t (*top_border)[16 + 8 + 8];
+    uint8_t (*top_nnz)[9];
+
+    VPXRangeCoder c;   ///< header context, includes mb modes and motion vectors
+
+    /* This contains the entropy coder state at the end of the header
+     * block, in the form specified by the standard.  For use by
+     * hwaccels, so that a hardware decoder has the information to
+     * start decoding at the macroblock layer.
+     */
+    struct {
+        const uint8_t *input;
+        uint32_t range;
+        uint32_t value;
+        int bit_count;
+    } coder_state_at_header_end;
+
+    int header_partition_size;
+
+    /**
+     * These are all of the updatable probabilities for binary decisions.
+     * They are only implicitly reset on keyframes, making it quite likely
+     * for an interframe to desync if a prior frame's header was corrupt
+     * or missing outright!
+     */
+    struct {
+        uint8_t segmentid[3];
+        uint8_t mbskip;
+        uint8_t intra;
+        uint8_t last;
+        uint8_t golden;
+        uint8_t pred16x16[4];
+        uint8_t pred8x8c[3];
+        uint8_t token[4][16][3][NUM_DCT_TOKENS - 1];
+        uint8_t mvc[2][19];
+        uint8_t scan[16];
+    } prob[2];
+
+    VP8Macroblock *macroblocks_base;
+    int invisible;
+    int update_last;    ///< update VP8_FRAME_PREVIOUS with the current one
+    int update_golden;  ///< VP8_FRAME_NONE if not updated, or which frame to copy if so
+    int update_altref;
+
+    /**
+     * If this flag is not set, all the probability updates
+     * are discarded after this frame is decoded.
+     */
+    int update_probabilities;
+
+    /**
+     * All coefficients are contained in separate arith coding contexts.
+     * There can be 1, 2, 4, or 8 of these after the header context.
+     */
+    int num_coeff_partitions;
+    VPXRangeCoder coeff_partition[8];
+    int coeff_partition_size[8];
+    VideoDSPContext vdsp;
+    VP8DSPContext vp8dsp;
+    H264PredContext hpc;
+    vp8_mc_func put_pixels_tab[3][3][3];
+    VP8Frame frames[5];
+
+    uint8_t colorspace; ///< 0 is the only value allowed (meaning bt601)
+    uint8_t fullrange;  ///< whether we can skip clamping in dsp functions
+
+    int num_jobs;
+    /**
+     * This describes the macroblock memory layout.
+     * 0 -> Only width+height*2+1 macroblocks allocated (frame/single thread).
+     * 1 -> Macroblocks for entire frame allocated (sliced thread).
+     */
+    int mb_layout;
+
+    int (*decode_mb_row_no_filter)(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr);
+    void (*filter_mb_row)(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr);
+
+    int vp7;
+
+    /**
+     * Interframe DC prediction (VP7)
+     * [0] VP8_FRAME_PREVIOUS
+     * [1] VP8_FRAME_GOLDEN
+     */
+    uint16_t inter_dc_pred[2][2];
+
+    /**
+     * Macroblock features (VP7)
+     */
+    uint8_t feature_enabled[4];
+    uint8_t feature_present_prob[4];
+    uint8_t feature_index_prob[4][3];
+    uint8_t feature_value[4][4];
+} VP8Context;
+
+int ff_vp8_decode_init(AVCodecContext *avctx);
+
+int ff_vp8_decode_frame(AVCodecContext *avctx, AVFrame *frame,
+                        int *got_frame, AVPacket *avpkt);
+
+int ff_vp8_decode_free(AVCodecContext *avctx);
+
+#endif /* AVCODEC_VP8_H */
diff --git a/media/ffvpx/libavcodec/vp89_rac.h b/media/ffvpx/libavcodec/vp89_rac.h
new file mode 100644
index 0000000000..bc0924c387
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp89_rac.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Range decoder functions common to VP8 and VP9
+ */
+
+#ifndef AVCODEC_VP89_RAC_H
+#define AVCODEC_VP89_RAC_H
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+
+#include "vpx_rac.h"
+
+// rounding is different than vpx_rac_get, is vpx_rac_get wrong?
+static av_always_inline int vp89_rac_get(VPXRangeCoder *c)
+{
+    return vpx_rac_get_prob(c, 128);
+}
+
+static av_unused int vp89_rac_get_uint(VPXRangeCoder *c, int bits)
+{
+    int value = 0;
+
+    while (bits--) {
+        value = (value << 1) | vp89_rac_get(c);
+    }
+
+    return value;
+}
+
+// how probabilities are associated with decisions is different I think
+// well, the new scheme fits in the old but this way has one fewer branches per decision
+static av_always_inline int vp89_rac_get_tree(VPXRangeCoder *c, const int8_t (*tree)[2],
+                                              const uint8_t *probs)
+{
+    int i = 0;
+
+    do {
+        i = tree[i][vpx_rac_get_prob(c, probs[i])];
+    } while (i > 0);
+
+    return -i;
+}
+
+#endif /* AVCODEC_VP89_RAC_H */
diff --git a/media/ffvpx/libavcodec/vp8_parser.c b/media/ffvpx/libavcodec/vp8_parser.c
new file mode 100644
index 0000000000..98b752bfb9
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp8_parser.c
@@ -0,0 +1,79 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+
+#include "avcodec.h"
+
+static int parse(AVCodecParserContext *s,
+                 AVCodecContext *avctx,
+                 const uint8_t **poutbuf, int *poutbuf_size,
+                 const uint8_t *buf, int buf_size)
+{
+    unsigned int frame_type;
+    unsigned int profile;
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+
+    if (buf_size < 3)
+        return buf_size;
+
+    frame_type = buf[0] & 1;
+    profile    = (buf[0] >> 1) & 7;
+    if (profile > 3) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid profile %u.\n", profile);
+        return buf_size;
+    }
+
+    avctx->profile = profile;
+    s->key_frame   = frame_type == 0;
+    s->pict_type   = frame_type ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I;
+    s->format      = AV_PIX_FMT_YUV420P;
+    s->field_order = AV_FIELD_PROGRESSIVE;
+    s->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
+
+    if (frame_type == 0) {
+        unsigned int sync_code;
+        unsigned int width, height;
+
+        if (buf_size < 10)
+            return buf_size;
+
+        sync_code = AV_RL24(buf + 3);
+        if (sync_code != 0x2a019d) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid sync code %06x.\n", sync_code);
+            return buf_size;
+        }
+
+        width  = AV_RL16(buf + 6) & 0x3fff;
+        height = AV_RL16(buf + 8) & 0x3fff;
+
+        s->width        = width;
+        s->height       = height;
+        s->coded_width  = FFALIGN(width,  16);
+        s->coded_height = FFALIGN(height, 16);
+    }
+
+    return buf_size;
+}
+
+const AVCodecParser ff_vp8_parser = {
+    .codec_ids    = { AV_CODEC_ID_VP8 },
+    .parser_parse = parse,
+};
diff --git a/media/ffvpx/libavcodec/vp8data.h b/media/ffvpx/libavcodec/vp8data.h
new file mode 100644
index 0000000000..1fcce134eb
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp8data.h
@@ -0,0 +1,824 @@
+/*
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * VP8 compatible video decoder
+ */
+
+#ifndef AVCODEC_VP8DATA_H
+#define AVCODEC_VP8DATA_H
+
+#include "vp8.h"
+#include "h264pred.h"
+
+static const uint8_t vp7_pred4x4_mode[] = {
+    [DC_PRED8x8]    = DC_PRED,
+    [VERT_PRED8x8]  = TM_VP8_PRED,
+    [HOR_PRED8x8]   = TM_VP8_PRED,
+    [PLANE_PRED8x8] = TM_VP8_PRED,
+};
+
+static const uint8_t vp8_pred4x4_mode[] = {
+    [DC_PRED8x8]    = DC_PRED,
+    [VERT_PRED8x8]  = VERT_PRED,
+    [HOR_PRED8x8]   = HOR_PRED,
+    [PLANE_PRED8x8] = TM_VP8_PRED,
+};
+
+static const int8_t vp8_pred16x16_tree_intra[4][2] = {
+    {   -MODE_I4x4,              1 }, // '0'
+    {            2,              3 },
+    {  -DC_PRED8x8,  -VERT_PRED8x8 }, // '100', '101'
+    { -HOR_PRED8x8, -PLANE_PRED8x8 }, // '110', '111'
+};
+
+static const int8_t vp8_pred16x16_tree_inter[4][2] = {
+    {    -DC_PRED8x8,            1 }, // '0'
+    {              2,            3 },
+    {  -VERT_PRED8x8, -HOR_PRED8x8 }, // '100', '101'
+    { -PLANE_PRED8x8,   -MODE_I4x4 }, // '110', '111'
+};
+
+typedef struct VP7MVPred {
+    int8_t yoffset;
+    int8_t xoffset;
+    uint8_t subblock;
+    uint8_t score;
+} VP7MVPred;
+
+#define VP7_MV_PRED_COUNT 12
+static const VP7MVPred vp7_mv_pred[VP7_MV_PRED_COUNT] = {
+    { -1,  0, 12, 8 },
+    {  0, -1,  3, 8 },
+    { -1, -1, 15, 2 },
+    { -1,  1, 12, 2 },
+    { -2,  0, 12, 2 },
+    {  0, -2,  3, 2 },
+    { -1, -2, 15, 1 },
+    { -2, -1, 15, 1 },
+    { -2,  1, 12, 1 },
+    { -1,  2, 12, 1 },
+    { -2, -2, 15, 1 },
+    { -2,  2, 12, 1 },
+};
+
+static const int vp7_mode_contexts[31][4] = {
+    {   3,   3,   1, 246 },
+    {   7,  89,  66, 239 },
+    {  10,  90,  78, 238 },
+    {  14, 118,  95, 241 },
+    {  14, 123, 106, 238 },
+    {  20, 140, 109, 240 },
+    {  13, 155, 103, 238 },
+    {  21, 158,  99, 240 },
+    {  27,  82, 108, 232 },
+    {  19,  99, 123, 217 },
+    {  45, 139, 148, 236 },
+    {  50, 117, 144, 235 },
+    {  57, 128, 164, 238 },
+    {  69, 139, 171, 239 },
+    {  74, 154, 179, 238 },
+    { 112, 165, 186, 242 },
+    {  98, 143, 185, 245 },
+    { 105, 153, 190, 250 },
+    { 124, 167, 192, 245 },
+    { 131, 186, 203, 246 },
+    {  59, 184, 222, 224 },
+    { 148, 215, 214, 213 },
+    { 137, 211, 210, 219 },
+    { 190, 227, 128, 228 },
+    { 183, 228, 128, 228 },
+    { 194, 234, 128, 228 },
+    { 202, 236, 128, 228 },
+    { 205, 240, 128, 228 },
+    { 205, 244, 128, 228 },
+    { 225, 246, 128, 228 },
+    { 233, 251, 128, 228 },
+};
+
+static const int vp8_mode_contexts[6][4] = {
+    {   7,   1,   1, 143 },
+    {  14,  18,  14, 107 },
+    { 135,  64,  57,  68 },
+    {  60,  56, 128,  65 },
+    { 159, 134, 128,  34 },
+    { 234, 188, 128,  28 },
+};
+
+static const uint8_t vp8_mbsplits[5][16] = {
+    { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,  1,  1,  1,  1,  1,  1 },
+    { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,  1,  1,  0,  0,  1,  1 },
+    { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2,  3,  3,  2,  2,  3,  3 },
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0 }
+};
+
+static const uint8_t vp8_mbfirstidx[4][16] = {
+    { 0, 8 },
+    { 0, 2 },
+    { 0, 2, 8, 10 },
+    { 0, 1, 2,  3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }
+};
+
+static const uint8_t vp8_mbsplit_count[4] = {
+    2, 2, 4, 16
+};
+static const uint8_t vp8_mbsplit_prob[3] = {
+    110, 111, 150
+};
+
+static const uint8_t vp7_submv_prob[3] = {
+    180, 162, 25
+};
+
+static const uint8_t vp8_submv_prob[5][3] = {
+    { 147, 136,  18 },
+    { 106, 145,   1 },
+    { 179, 121,   1 },
+    { 223,   1,  34 },
+    { 208,   1,   1 }
+};
+
+static const uint8_t vp8_pred16x16_prob_intra[4] = {
+    145, 156, 163, 128
+};
+static const uint8_t vp8_pred16x16_prob_inter[4] = {
+    112,  86, 140,  37
+};
+
+static const int8_t vp8_pred4x4_tree[9][2] = {
+    {              -DC_PRED,                1 }, // '0'
+    {          -TM_VP8_PRED,                2 }, // '10'
+    {            -VERT_PRED,                3 }, // '110'
+    {                     4,                6 },
+    {             -HOR_PRED,                5 }, // '11100'
+    { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED }, // '111010', '111011'
+    {  -DIAG_DOWN_LEFT_PRED,                7 }, // '11110'
+    {       -VERT_LEFT_PRED,                8 }, // '111110'
+    {        -HOR_DOWN_PRED,     -HOR_UP_PRED }, // '1111110', '1111111'
+};
+
+static const int8_t vp8_pred8x8c_tree[3][2] = {
+    {   -DC_PRED8x8,              1 },  // '0'
+    { -VERT_PRED8x8,              2 },  // '10
+    {  -HOR_PRED8x8, -PLANE_PRED8x8 },  // '110', '111'
+};
+
+static const uint8_t vp8_pred8x8c_prob_intra[3] = {
+    142, 114, 183
+};
+static const uint8_t vp8_pred8x8c_prob_inter[3] = {
+    162, 101, 204
+};
+static const uint8_t vp8_pred4x4_prob_inter[9] = {
+    120, 90, 79, 133, 87, 85, 80, 111, 151
+};
+
+static const uint8_t vp8_pred4x4_prob_intra[10][10][9] = {
+    {
+        {  39,  53, 200,  87,  26,  21,  43, 232, 171 },
+        {  56,  34,  51, 104, 114, 102,  29,  93,  77 },
+        {  88,  88, 147, 150,  42,  46,  45, 196, 205 },
+        { 107,  54,  32,  26,  51,   1,  81,  43,  31 },
+        {  39,  28,  85, 171,  58, 165,  90,  98,  64 },
+        {  34,  22, 116, 206,  23,  34,  43, 166,  73 },
+        {  34,  19,  21, 102, 132, 188,  16,  76, 124 },
+        {  68,  25, 106,  22,  64, 171,  36, 225, 114 },
+        {  62,  18,  78,  95,  85,  57,  50,  48,  51 },
+        {  43,  97, 183, 117,  85,  38,  35, 179,  61 },
+    },
+    {
+        { 112, 113,  77,  85, 179, 255,  38, 120, 114 },
+        {  40,  42,   1, 196, 245, 209,  10,  25, 109 },
+        { 193, 101,  35, 159, 215, 111,  89,  46, 111 },
+        { 100,  80,   8,  43, 154,   1,  51,  26,  71 },
+        {  88,  43,  29, 140, 166, 213,  37,  43, 154 },
+        {  61,  63,  30, 155,  67,  45,  68,   1, 209 },
+        {  41,  40,   5, 102, 211, 183,   4,   1, 221 },
+        { 142,  78,  78,  16, 255, 128,  34, 197, 171 },
+        {  51,  50,  17, 168, 209, 192,  23,  25,  82 },
+        {  60, 148,  31, 172, 219, 228,  21,  18, 111 },
+    },
+    {
+        { 175,  69, 143,  80,  85,  82,  72, 155, 103 },
+        {  56,  58,  10, 171, 218, 189,  17,  13, 152 },
+        { 231, 120,  48,  89, 115, 113, 120, 152, 112 },
+        { 144,  71,  10,  38, 171, 213, 144,  34,  26 },
+        { 114,  26,  17, 163,  44, 195,  21,  10, 173 },
+        { 121,  24,  80, 195,  26,  62,  44,  64,  85 },
+        {  63,  20,   8, 114, 114, 208,  12,   9, 226 },
+        { 170,  46,  55,  19, 136, 160,  33, 206,  71 },
+        {  81,  40,  11,  96, 182,  84,  29,  16,  36 },
+        { 152, 179,  64, 126, 170, 118,  46,  70,  95 },
+    },
+    {
+        {  75,  79, 123,  47,  51, 128,  81, 171,   1 },
+        {  57,  17,   5,  71, 102,  57,  53,  41,  49 },
+        { 125,  98,  42,  88, 104,  85, 117, 175,  82 },
+        { 115,  21,   2,  10, 102, 255, 166,  23,   6 },
+        {  38,  33,  13, 121,  57,  73,  26,   1,  85 },
+        {  41,  10,  67, 138,  77, 110,  90,  47, 114 },
+        {  57,  18,  10, 102, 102, 213,  34,  20,  43 },
+        { 101,  29,  16,  10,  85, 128, 101, 196,  26 },
+        { 117,  20,  15,  36, 163, 128,  68,   1,  26 },
+        {  95,  84,  53,  89, 128, 100, 113, 101,  45 },
+    },
+    {
+        {  63,  59,  90, 180,  59, 166,  93,  73, 154 },
+        {  40,  40,  21, 116, 143, 209,  34,  39, 175 },
+        { 138,  31,  36, 171,  27, 166,  38,  44, 229 },
+        {  57,  46,  22,  24, 128,   1,  54,  17,  37 },
+        {  47,  15,  16, 183,  34, 223,  49,  45, 183 },
+        {  46,  17,  33, 183,   6,  98,  15,  32, 183 },
+        {  40,   3,   9, 115,  51, 192,  18,   6, 223 },
+        {  65,  32,  73, 115,  28, 128,  23, 128, 205 },
+        {  87,  37,   9, 115,  59,  77,  64,  21,  47 },
+        {  67,  87,  58, 169,  82, 115,  26,  59, 179 },
+    },
+    {
+        {  54,  57, 112, 184,   5,  41,  38, 166, 213 },
+        {  30,  34,  26, 133, 152, 116,  10,  32, 134 },
+        { 104,  55,  44, 218,   9,  54,  53, 130, 226 },
+        {  75,  32,  12,  51, 192, 255, 160,  43,  51 },
+        {  39,  19,  53, 221,  26, 114,  32,  73, 255 },
+        {  31,   9,  65, 234,   2,  15,   1, 118,  73 },
+        {  56,  21,  23, 111,  59, 205,  45,  37, 192 },
+        {  88,  31,  35,  67, 102,  85,  55, 186,  85 },
+        {  55,  38,  70, 124,  73, 102,   1,  34,  98 },
+        {  64,  90,  70, 205,  40,  41,  23,  26,  57 },
+    },
+    {
+        {  86,  40,  64, 135, 148, 224,  45, 183, 128 },
+        {  22,  26,  17, 131, 240, 154,  14,   1, 209 },
+        { 164,  50,  31, 137, 154, 133,  25,  35, 218 },
+        {  83,  12,  13,  54, 192, 255,  68,  47,  28 },
+        {  45,  16,  21,  91,  64, 222,   7,   1, 197 },
+        {  56,  21,  39, 155,  60, 138,  23, 102, 213 },
+        {  18,  11,   7,  63, 144, 171,   4,   4, 246 },
+        {  85,  26,  85,  85, 128, 128,  32, 146, 171 },
+        {  35,  27,  10, 146, 174, 171,  12,  26, 128 },
+        {  51, 103,  44, 131, 131, 123,  31,   6, 158 },
+    },
+    {
+        {  68,  45, 128,  34,   1,  47,  11, 245, 171 },
+        {  62,  17,  19,  70, 146,  85,  55,  62,  70 },
+        { 102,  61,  71,  37,  34,  53,  31, 243, 192 },
+        {  75,  15,   9,   9,  64, 255, 184, 119,  16 },
+        {  37,  43,  37, 154, 100, 163,  85, 160,   1 },
+        {  63,   9,  92, 136,  28,  64,  32, 201,  85 },
+        {  56,   8,  17, 132, 137, 255,  55, 116, 128 },
+        {  86,   6,  28,   5,  64, 255,  25, 248,   1 },
+        {  58,  15,  20,  82, 135,  57,  26, 121,  40 },
+        {  69,  60,  71,  38,  73, 119,  28, 222,  37 },
+    },
+    {
+        { 101,  75, 128, 139, 118, 146, 116, 128,  85 },
+        {  56,  41,  15, 176, 236,  85,  37,   9,  62 },
+        { 190,  80,  35,  99, 180,  80, 126,  54,  45 },
+        { 146,  36,  19,  30, 171, 255,  97,  27,  20 },
+        {  71,  30,  17, 119, 118, 255,  17,  18, 138 },
+        { 101,  38,  60, 138,  55,  70,  43,  26, 142 },
+        {  32,  41,  20, 117, 151, 142,  20,  21, 163 },
+        { 138,  45,  61,  62, 219,   1,  81, 188,  64 },
+        { 112,  19,  12,  61, 195, 128,  48,   4,  24 },
+        {  85, 126,  47,  87, 176,  51,  41,  20,  32 },
+    },
+    {
+        {  66, 102, 167,  99,  74,  62,  40, 234, 128 },
+        {  41,  53,   9, 178, 241, 141,  26,   8, 107 },
+        { 134, 183,  89, 137,  98, 101, 106, 165, 148 },
+        { 104,  79,  12,  27, 217, 255,  87,  17,   7 },
+        {  74,  43,  26, 146,  73, 166,  49,  23, 157 },
+        {  65,  38, 105, 160,  51,  52,  31, 115, 128 },
+        {  47,  41,  14, 110, 182, 183,  21,  17, 194 },
+        {  87,  68,  71,  44, 114,  51,  15, 186,  23 },
+        {  66,  45,  25, 102, 197, 189,  23,  18,  22 },
+        {  72, 187, 100, 130, 157, 111,  32,  75,  80 },
+    },
+};
+
+static const uint8_t vp8_coeff_band[16] = {
+    0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7
+};
+
+/* Inverse of vp8_coeff_band: mappings of bands to coefficient indexes.
+ * Each list is -1-terminated. */
+static const int8_t vp8_coeff_band_indexes[8][10] = {
+    {  0, -1 },
+    {  1, -1 },
+    {  2, -1 },
+    {  3, -1 },
+    {  5, -1 },
+    {  6, -1 },
+    {  4,  7, 8, 9, 10, 11, 12, 13, 14, -1 },
+    { 15, -1 }
+};
+
+static const uint8_t vp8_dct_cat1_prob[] = {
+    159, 0
+};
+static const uint8_t vp8_dct_cat2_prob[] = {
+    165, 145, 0
+};
+static const uint8_t vp8_dct_cat3_prob[] = {
+    173, 148, 140, 0
+};
+static const uint8_t vp8_dct_cat4_prob[] = {
+    176, 155, 140, 135, 0
+};
+static const uint8_t vp8_dct_cat5_prob[] = {
+    180, 157, 141, 134, 130, 0
+};
+static const uint8_t vp8_dct_cat6_prob[] = {
+    254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0
+};
+
+// only used for cat3 and above; cat 1 and 2 are referenced directly
+const uint8_t *const ff_vp8_dct_cat_prob[] = {
+    vp8_dct_cat3_prob,
+    vp8_dct_cat4_prob,
+    vp8_dct_cat5_prob,
+    vp8_dct_cat6_prob,
+};
+
+static const uint8_t vp8_token_default_probs[4][8][3][NUM_DCT_TOKENS - 1] = {
+    {
+        {
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+            { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+            { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1,  98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+            { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+            {  78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+        },
+        {
+            {   1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+            { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+            {  77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+            { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+            {  37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+            { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+            { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+            { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+            {  80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 246,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+    },
+    {
+        {
+            { 198,  35, 237, 223, 193, 187, 162, 160, 145, 155,  62 },
+            { 131,  45, 198, 221, 172, 176, 220, 157, 252, 221,   1 },
+            {  68,  47, 146, 208, 149, 167, 221, 162, 255, 223, 128 },
+        },
+        {
+            {   1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+            { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+            {  81,  99, 181, 242, 176, 190, 249, 202, 255, 255, 128 },
+        },
+        {
+            {   1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+            {  99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+            {  23,  91, 163, 242, 170, 187, 247, 210, 255, 255, 128 },
+        },
+        {
+            {   1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+            { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+            {  44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+            {  94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+            {  22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 },
+        },
+        {
+            {   1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+            { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+            {  35,  77, 181, 251, 193, 211, 255, 205, 128, 128, 128 },
+        },
+        {
+            {   1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+            { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+            {  45,  99, 188, 251, 195, 217, 255, 224, 128, 128, 128 },
+        },
+        {
+            {   1,   1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+            { 203,   1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 137,   1, 177, 255, 224, 255, 128, 128, 128, 128, 128 },
+        },
+    },
+    {
+        {
+            { 253,   9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+            { 175,  13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+            {  73,  17, 171, 221, 161, 179, 236, 167, 255, 234, 128 },
+        },
+        {
+            {   1,  95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+            { 239,  90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+            { 155,  77, 195, 248, 188, 195, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1,  24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+            { 201,  51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+            {  69,  46, 190, 239, 201, 218, 255, 228, 128, 128, 128 },
+        },
+        {
+            {   1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+            { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1,  16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 190,  36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+            { 149,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 213,  62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+            {  55,  93, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+    },
+    {
+        {
+            { 202,  24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
+            { 126,  38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
+            {  61,  46, 138, 219, 151, 178, 240, 170, 255, 216, 128 },
+        },
+        {
+            {   1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
+            { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
+            {  39,  77, 162, 232, 172, 180, 245, 178, 255, 255, 128 },
+        },
+        {
+            {   1,  52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
+            { 124,  74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
+            {  24,  71, 130, 219, 154, 170, 243, 182, 255, 255, 128 },
+        },
+        {
+            {   1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
+            { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
+            {  28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 },
+        },
+        {
+            {   1,  81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
+            { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
+            {  20,  95, 153, 243, 164, 173, 255, 203, 128, 128, 128 },
+        },
+        {
+            {   1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
+            { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
+            {  47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
+            { 141,  84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
+            {  42,  80, 160, 240, 162, 185, 255, 205, 128, 128, 128 },
+        },
+        {
+            {   1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 244,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 238,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+    },
+};
+
+static const uint8_t vp8_token_update_probs[4][8][3][NUM_DCT_TOKENS - 1] = {
+    {
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 },
+            { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+    },
+    {
+        {
+            { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 },
+            { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+    },
+    {
+        {
+            { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 },
+            { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+    },
+    {
+        {
+            { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 },
+            { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+    },
+};
+
+static const uint8_t vp8_dc_qlookup[VP8_MAX_QUANT + 1] = {
+      4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
+     18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
+     29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
+     44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
+     59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+     75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+     91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
+    122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
+};
+
+static const uint16_t vp8_ac_qlookup[VP8_MAX_QUANT + 1] = {
+      4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
+     20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
+     36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
+     52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
+     78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
+    110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
+    155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
+    213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
+};
+
+static const uint8_t vp8_mv_update_prob[2][19] = {
+    { 237,
+      246,
+      253, 253, 254, 254, 254, 254, 254,
+      254, 254, 254, 254, 254, 250, 250, 252, /* VP8 only: */ 254, 254 },
+    { 231,
+      243,
+      245, 253, 254, 254, 254, 254, 254,
+      254, 254, 254, 254, 254, 251, 251, 254, /* VP8 only: */ 254, 254 }
+};
+
+static const uint8_t vp7_mv_default_prob[2][17] = {
+    { 162,
+      128,
+      225, 146, 172, 147, 214,  39, 156,
+      247, 210, 135,  68, 138, 220, 239, 246 },
+    { 164,
+      128,
+      204, 170, 119, 235, 140, 230, 228,
+      244, 184, 201,  44, 173, 221, 239, 253 }
+};
+
+static const uint8_t vp8_mv_default_prob[2][19] = {
+    { 162,
+      128,
+      225, 146, 172, 147, 214, 39, 156,
+      128, 129, 132,  75, 145, 178, 206, 239, 254, 254 },
+    { 164,
+      128,
+      204, 170, 119, 235, 140, 230, 228,
+      128, 130, 130,  74, 148, 180, 203, 236, 254, 254 }
+};
+
+static const uint8_t vp7_feature_value_size[2][4] = {
+    { 7, 6, 0, 8 },
+    { 7, 6, 0, 5 },
+};
+
+static const int8_t vp7_feature_index_tree[4][2] =
+{
+    {  1,  2 },
+    { -0, -1 }, // '00', '01'
+    { -2, -3 }, // '10', '11'
+};
+
+static const uint16_t vp7_ydc_qlookup[] = {
+      4,   4,   5,   6,   6,   7,   8,   8,   9,  10,  11,  12,  13,  14,  15,
+     16,  17,  18,  19,  20,  21,  22,  23,  23,  24,  25,  26,  27,  28,  29,
+     30,  31,  32,  33,  33,  34,  35,  36,  36,  37,  38,  39,  39,  40,  41,
+     41,  42,  43,  43,  44,  45,  45,  46,  47,  48,  48,  49,  50,  51,  52,
+     53,  53,  54,  56,  57,  58,  59,  60,  62,  63,  65,  66,  68,  70,  72,
+     74,  76,  79,  81,  84,  87,  90,  93,  96, 100, 104, 108, 112, 116, 121,
+    126, 131, 136, 142, 148, 154, 160, 167, 174, 182, 189, 198, 206, 215, 224,
+    234, 244, 254, 265, 277, 288, 301, 313, 327, 340, 355, 370, 385, 401, 417,
+    434, 452, 470, 489, 509, 529, 550, 572,
+};
+
+static const uint16_t vp7_yac_qlookup[] = {
+       4,    4,   5,   5,   6,   6,   7,   8,   9,  10,  11,  12,   13,   15,
+      16,   17,  19,  20,  22,  23,  25,  26,  28,  29,  31,  32,   34,   35,
+      37,   38,  40,  41,  42,  44,  45,  46,  48,  49,  50,  51,   53,   54,
+      55,   56,  57,  58,  59,  61,  62,  63,  64,  65,  67,  68,   69,   70,
+      72,   73,  75,  76,  78,  80,  82,  84,  86,  88,  91,  93,   96,   99,
+     102,  105, 109, 112, 116, 121, 125, 130, 135, 140, 146, 152,  158,  165,
+     172,  180, 188, 196, 205, 214, 224, 234, 245, 256, 268, 281,  294,  308,
+     322,  337, 353, 369, 386, 404, 423, 443, 463, 484, 506, 529,  553,  578,
+     604,  631, 659, 688, 718, 749, 781, 814, 849, 885, 922, 960, 1000, 1041,
+    1083, 1127,
+};
+
+static const uint16_t vp7_y2dc_qlookup[] = {
+       7,    9,  11,  13,  15,  17,  19,  21,  23,  26,  28,  30,   33,   35,
+      37,   39,  42,  44,  46,  48,  51,  53,  55,  57,  59,  61,   63,   65,
+      67,   69,  70,  72,  74,  75,  77,  78,  80,  81,  83,  84,   85,   87,
+      88,   89,  90,  92,  93,  94,  95,  96,  97,  99, 100, 101,  102,  104,
+     105,  106, 108, 109, 111, 113, 114, 116, 118, 120, 123, 125,  128,  131,
+     134,  137, 140, 144, 148, 152, 156, 161, 166, 171, 176, 182,  188,  195,
+     202,  209, 217, 225, 234, 243, 253, 263, 274, 285, 297, 309,  322,  336,
+     350,  365, 381, 397, 414, 432, 450, 470, 490, 511, 533, 556,  579,  604,
+     630,  656, 684, 713, 742, 773, 805, 838, 873, 908, 945, 983, 1022, 1063,
+    1105, 1148,
+};
+
+static const uint16_t vp7_y2ac_qlookup[] = {
+       7,    9,   11,   13,   16,   18,   21,   24,   26,   29,   32,   35,
+      38,   41,   43,   46,   49,   52,   55,   58,   61,   64,   66,   69,
+      72,   74,   77,   79,   82,   84,   86,   88,   91,   93,   95,   97,
+      98,  100,  102,  104,  105,  107,  109,  110,  112,  113,  115,  116,
+     117,  119,  120,  122,  123,  125,  127,  128,  130,  132,  134,  136,
+     138,  141,  143,  146,  149,  152,  155,  158,  162,  166,  171,  175,
+     180,  185,  191,  197,  204,  210,  218,  226,  234,  243,  252,  262,
+     273,  284,  295,  308,  321,  335,  350,  365,  381,  398,  416,  435,
+     455,  476,  497,  520,  544,  569,  595,  622,  650,  680,  711,  743,
+     776,  811,  848,  885,  925,  965, 1008, 1052, 1097, 1144, 1193, 1244,
+    1297, 1351, 1407, 1466, 1526, 1588, 1652, 1719,
+};
+
+#endif /* AVCODEC_VP8DATA_H */
diff --git a/media/ffvpx/libavcodec/vp8dsp.c b/media/ffvpx/libavcodec/vp8dsp.c
new file mode 100644
index 0000000000..7a85e9f4ca
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp8dsp.c
@@ -0,0 +1,753 @@
+/*
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ * Copyright (C) 2014 Peter Ross
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * VP8 compatible video decoder
+ */
+
+#include "config_components.h"
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+
+#include "mathops.h"
+#include "vp8dsp.h"
+
+#define MK_IDCT_DC_ADD4_C(name)                                               \
+static void name ## _idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16],     \
+                                      ptrdiff_t stride)                       \
+{                                                                             \
+    name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride);           \
+    name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride);           \
+    name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride);           \
+    name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride);           \
+}                                                                             \
+                                                                              \
+static void name ## _idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16],      \
+                                     ptrdiff_t stride)                        \
+{                                                                             \
+    name ## _idct_dc_add_c(dst +  0, block[0], stride);                       \
+    name ## _idct_dc_add_c(dst +  4, block[1], stride);                       \
+    name ## _idct_dc_add_c(dst +  8, block[2], stride);                       \
+    name ## _idct_dc_add_c(dst + 12, block[3], stride);                       \
+}
+
+#if CONFIG_VP7_DECODER
+static void vp7_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
+{
+    int i;
+    unsigned a1, b1, c1, d1;
+    int16_t tmp[16];
+
+    for (i = 0; i < 4; i++) {
+        a1 = (dc[i * 4 + 0] + dc[i * 4 + 2]) * 23170;
+        b1 = (dc[i * 4 + 0] - dc[i * 4 + 2]) * 23170;
+        c1 = dc[i * 4 + 1] * 12540 - dc[i * 4 + 3] * 30274;
+        d1 = dc[i * 4 + 1] * 30274 + dc[i * 4 + 3] * 12540;
+        tmp[i * 4 + 0] = (int)(a1 + d1) >> 14;
+        tmp[i * 4 + 3] = (int)(a1 - d1) >> 14;
+        tmp[i * 4 + 1] = (int)(b1 + c1) >> 14;
+        tmp[i * 4 + 2] = (int)(b1 - c1) >> 14;
+    }
+
+    for (i = 0; i < 4; i++) {
+        a1 = (tmp[i + 0] + tmp[i + 8]) * 23170;
+        b1 = (tmp[i + 0] - tmp[i + 8]) * 23170;
+        c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274;
+        d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540;
+        AV_ZERO64(dc + i * 4);
+        block[0][i][0] = (int)(a1 + d1 + 0x20000) >> 18;
+        block[3][i][0] = (int)(a1 - d1 + 0x20000) >> 18;
+        block[1][i][0] = (int)(b1 + c1 + 0x20000) >> 18;
+        block[2][i][0] = (int)(b1 - c1 + 0x20000) >> 18;
+    }
+}
+
+static void vp7_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
+{
+    int i, val = (23170 * (23170 * dc[0] >> 14) + 0x20000) >> 18;
+    dc[0] = 0;
+
+    for (i = 0; i < 4; i++) {
+        block[i][0][0] = val;
+        block[i][1][0] = val;
+        block[i][2][0] = val;
+        block[i][3][0] = val;
+    }
+}
+
+static void vp7_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
+{
+    int i;
+    unsigned a1, b1, c1, d1;
+    int16_t tmp[16];
+
+    for (i = 0; i < 4; i++) {
+        a1 = (block[i * 4 + 0] + block[i * 4 + 2]) * 23170;
+        b1 = (block[i * 4 + 0] - block[i * 4 + 2]) * 23170;
+        c1 = block[i * 4 + 1] * 12540 - block[i * 4 + 3] * 30274;
+        d1 = block[i * 4 + 1] * 30274 + block[i * 4 + 3] * 12540;
+        AV_ZERO64(block + i * 4);
+        tmp[i * 4 + 0] = (int)(a1 + d1) >> 14;
+        tmp[i * 4 + 3] = (int)(a1 - d1) >> 14;
+        tmp[i * 4 + 1] = (int)(b1 + c1) >> 14;
+        tmp[i * 4 + 2] = (int)(b1 - c1) >> 14;
+    }
+
+    for (i = 0; i < 4; i++) {
+        a1 = (tmp[i + 0] + tmp[i + 8]) * 23170;
+        b1 = (tmp[i + 0] - tmp[i + 8]) * 23170;
+        c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274;
+        d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540;
+        dst[0 * stride + i] = av_clip_uint8(dst[0 * stride + i] +
+                                            ((int)(a1 + d1 + 0x20000) >> 18));
+        dst[3 * stride + i] = av_clip_uint8(dst[3 * stride + i] +
+                                            ((int)(a1 - d1 + 0x20000) >> 18));
+        dst[1 * stride + i] = av_clip_uint8(dst[1 * stride + i] +
+                                            ((int)(b1 + c1 + 0x20000) >> 18));
+        dst[2 * stride + i] = av_clip_uint8(dst[2 * stride + i] +
+                                            ((int)(b1 - c1 + 0x20000) >> 18));
+    }
+}
+
+static void vp7_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
+{
+    int i, dc = (23170 * (23170 * block[0] >> 14) + 0x20000) >> 18;
+    block[0] = 0;
+
+    for (i = 0; i < 4; i++) {
+        dst[0] = av_clip_uint8(dst[0] + dc);
+        dst[1] = av_clip_uint8(dst[1] + dc);
+        dst[2] = av_clip_uint8(dst[2] + dc);
+        dst[3] = av_clip_uint8(dst[3] + dc);
+        dst   += stride;
+    }
+}
+
+MK_IDCT_DC_ADD4_C(vp7)
+#endif /* CONFIG_VP7_DECODER */
+
+// TODO: Maybe add dequant
+#if CONFIG_VP8_DECODER
+static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
+{
+    int i, t0, t1, t2, t3;
+
+    for (i = 0; i < 4; i++) {
+        t0 = dc[0 * 4 + i] + dc[3 * 4 + i];
+        t1 = dc[1 * 4 + i] + dc[2 * 4 + i];
+        t2 = dc[1 * 4 + i] - dc[2 * 4 + i];
+        t3 = dc[0 * 4 + i] - dc[3 * 4 + i];
+
+        dc[0 * 4 + i] = t0 + t1;
+        dc[1 * 4 + i] = t3 + t2;
+        dc[2 * 4 + i] = t0 - t1;
+        dc[3 * 4 + i] = t3 - t2;
+    }
+
+    for (i = 0; i < 4; i++) {
+        t0 = dc[i * 4 + 0] + dc[i * 4 + 3] + 3; // rounding
+        t1 = dc[i * 4 + 1] + dc[i * 4 + 2];
+        t2 = dc[i * 4 + 1] - dc[i * 4 + 2];
+        t3 = dc[i * 4 + 0] - dc[i * 4 + 3] + 3; // rounding
+        AV_ZERO64(dc + i * 4);
+
+        block[i][0][0] = (t0 + t1) >> 3;
+        block[i][1][0] = (t3 + t2) >> 3;
+        block[i][2][0] = (t0 - t1) >> 3;
+        block[i][3][0] = (t3 - t2) >> 3;
+    }
+}
+
+static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
+{
+    int i, val = (dc[0] + 3) >> 3;
+    dc[0] = 0;
+
+    for (i = 0; i < 4; i++) {
+        block[i][0][0] = val;
+        block[i][1][0] = val;
+        block[i][2][0] = val;
+        block[i][3][0] = val;
+    }
+}
+
+#define MUL_20091(a) ((((a) * 20091) >> 16) + (a))
+#define MUL_35468(a)  (((a) * 35468) >> 16)
+
+static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
+{
+    int i, t0, t1, t2, t3;
+    int16_t tmp[16];
+
+    for (i = 0; i < 4; i++) {
+        t0 = block[0 * 4 + i] + block[2 * 4 + i];
+        t1 = block[0 * 4 + i] - block[2 * 4 + i];
+        t2 = MUL_35468(block[1 * 4 + i]) - MUL_20091(block[3 * 4 + i]);
+        t3 = MUL_20091(block[1 * 4 + i]) + MUL_35468(block[3 * 4 + i]);
+        block[0 * 4 + i] = 0;
+        block[1 * 4 + i] = 0;
+        block[2 * 4 + i] = 0;
+        block[3 * 4 + i] = 0;
+
+        tmp[i * 4 + 0] = t0 + t3;
+        tmp[i * 4 + 1] = t1 + t2;
+        tmp[i * 4 + 2] = t1 - t2;
+        tmp[i * 4 + 3] = t0 - t3;
+    }
+
+    for (i = 0; i < 4; i++) {
+        t0 = tmp[0 * 4 + i] + tmp[2 * 4 + i];
+        t1 = tmp[0 * 4 + i] - tmp[2 * 4 + i];
+        t2 = MUL_35468(tmp[1 * 4 + i]) - MUL_20091(tmp[3 * 4 + i]);
+        t3 = MUL_20091(tmp[1 * 4 + i]) + MUL_35468(tmp[3 * 4 + i]);
+
+        dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
+        dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
+        dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
+        dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
+        dst   += stride;
+    }
+}
+
+static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
+{
+    int i, dc = (block[0] + 4) >> 3;
+    block[0] = 0;
+
+    for (i = 0; i < 4; i++) {
+        dst[0] = av_clip_uint8(dst[0] + dc);
+        dst[1] = av_clip_uint8(dst[1] + dc);
+        dst[2] = av_clip_uint8(dst[2] + dc);
+        dst[3] = av_clip_uint8(dst[3] + dc);
+        dst   += stride;
+    }
+}
+
+MK_IDCT_DC_ADD4_C(vp8)
+#endif /* CONFIG_VP8_DECODER */
+
+// because I like only having two parameters to pass functions...
+#define LOAD_PIXELS                                                           \
+    int av_unused p3 = p[-4 * stride];                                        \
+    int av_unused p2 = p[-3 * stride];                                        \
+    int av_unused p1 = p[-2 * stride];                                        \
+    int av_unused p0 = p[-1 * stride];                                        \
+    int av_unused q0 = p[ 0 * stride];                                        \
+    int av_unused q1 = p[ 1 * stride];                                        \
+    int av_unused q2 = p[ 2 * stride];                                        \
+    int av_unused q3 = p[ 3 * stride];
+
+#define clip_int8(n) (cm[(n) + 0x80] - 0x80)
+
+static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride,
+                                           int is4tap, int is_vp7)
+{
+    LOAD_PIXELS
+    int a, f1, f2;
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
+
+    a = 3 * (q0 - p0);
+
+    if (is4tap)
+        a += clip_int8(p1 - q1);
+
+    a = clip_int8(a);
+
+    // We deviate from the spec here with c(a+3) >> 3
+    // since that's what libvpx does.
+    f1 = FFMIN(a + 4, 127) >> 3;
+
+    if (is_vp7)
+        f2 = f1 - ((a & 7) == 4);
+    else
+        f2 = FFMIN(a + 3, 127) >> 3;
+
+    // Despite what the spec says, we do need to clamp here to
+    // be bitexact with libvpx.
+    p[-1 * stride] = cm[p0 + f2];
+    p[ 0 * stride] = cm[q0 - f1];
+
+    // only used for _inner on blocks without high edge variance
+    if (!is4tap) {
+        a              = (f1 + 1) >> 1;
+        p[-2 * stride] = cm[p1 + a];
+        p[ 1 * stride] = cm[q1 - a];
+    }
+}
+
+static av_always_inline void vp7_filter_common(uint8_t *p, ptrdiff_t stride,
+                                               int is4tap)
+{
+    filter_common(p, stride, is4tap, IS_VP7);
+}
+
+static av_always_inline void vp8_filter_common(uint8_t *p, ptrdiff_t stride,
+                                               int is4tap)
+{
+    filter_common(p, stride, is4tap, IS_VP8);
+}
+
+static av_always_inline int vp7_simple_limit(uint8_t *p, ptrdiff_t stride,
+                                             int flim)
+{
+    LOAD_PIXELS
+    return FFABS(p0 - q0) <= flim;
+}
+
+static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride,
+                                             int flim)
+{
+    LOAD_PIXELS
+    return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim;
+}
+
+/**
+ * E - limit at the macroblock edge
+ * I - limit for interior difference
+ */
+#define NORMAL_LIMIT(vpn)                                                     \
+static av_always_inline int vp ## vpn ## _normal_limit(uint8_t *p,            \
+                                                       ptrdiff_t stride,      \
+                                                       int E, int I)          \
+{                                                                             \
+    LOAD_PIXELS                                                               \
+    return vp ## vpn ## _simple_limit(p, stride, E) &&                        \
+           FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I &&                      \
+           FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I &&                      \
+           FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I;                        \
+}
+
+NORMAL_LIMIT(7)
+NORMAL_LIMIT(8)
+
+// high edge variance
+static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
+{
+    LOAD_PIXELS
+    return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh;
+}
+
+static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
+{
+    int a0, a1, a2, w;
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
+
+    LOAD_PIXELS
+
+    w = clip_int8(p1 - q1);
+    w = clip_int8(w + 3 * (q0 - p0));
+
+    a0 = (27 * w + 63) >> 7;
+    a1 = (18 * w + 63) >> 7;
+    a2 =  (9 * w + 63) >> 7;
+
+    p[-3 * stride] = cm[p2 + a2];
+    p[-2 * stride] = cm[p1 + a1];
+    p[-1 * stride] = cm[p0 + a0];
+    p[ 0 * stride] = cm[q0 - a0];
+    p[ 1 * stride] = cm[q1 - a1];
+    p[ 2 * stride] = cm[q2 - a2];
+}
+
+#define LOOP_FILTER(vpn, dir, size, stridea, strideb, maybe_inline)           \
+static maybe_inline                                                           \
+void vpn ## _ ## dir ## _loop_filter ## size ## _c(uint8_t *dst,              \
+                                                   ptrdiff_t stride,          \
+                                                   int flim_E, int flim_I,    \
+                                                   int hev_thresh)            \
+{                                                                             \
+    int i;                                                                    \
+    for (i = 0; i < size; i++)                                                \
+        if (vpn ## _normal_limit(dst + i * stridea, strideb,                  \
+                                 flim_E, flim_I)) {                           \
+            if (hev(dst + i * stridea, strideb, hev_thresh))                  \
+                vpn ## _filter_common(dst + i * stridea, strideb, 1);         \
+            else                                                              \
+                filter_mbedge(dst + i * stridea, strideb);                    \
+        }                                                                     \
+}                                                                             \
+                                                                              \
+static maybe_inline                                                           \
+void vpn ## _ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst,        \
+                                                         ptrdiff_t stride,    \
+                                                         int flim_E,          \
+                                                         int flim_I,          \
+                                                         int hev_thresh)      \
+{                                                                             \
+    int i;                                                                    \
+    for (i = 0; i < size; i++)                                                \
+        if (vpn ## _normal_limit(dst + i * stridea, strideb,                  \
+                                 flim_E, flim_I)) {                           \
+            int hv = hev(dst + i * stridea, strideb, hev_thresh);             \
+            if (hv)                                                           \
+                vpn ## _filter_common(dst + i * stridea, strideb, 1);         \
+            else                                                              \
+                vpn ## _filter_common(dst + i * stridea, strideb, 0);         \
+        }                                                                     \
+}
+
+#define UV_LOOP_FILTER(vpn, dir, stridea, strideb)                            \
+LOOP_FILTER(vpn, dir, 8, stridea, strideb, av_always_inline)                  \
+static void vpn ## _ ## dir ## _loop_filter8uv_c(uint8_t *dstU,               \
+                                                 uint8_t *dstV,               \
+                                                 ptrdiff_t stride, int fE,    \
+                                                 int fI, int hev_thresh)      \
+{                                                                             \
+    vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);     \
+    vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);     \
+}                                                                             \
+                                                                              \
+static void vpn ## _ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU,         \
+                                                       uint8_t *dstV,         \
+                                                       ptrdiff_t stride,      \
+                                                       int fE, int fI,        \
+                                                       int hev_thresh)        \
+{                                                                             \
+    vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI,            \
+                                             hev_thresh);                     \
+    vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI,            \
+                                             hev_thresh);                     \
+}
+
+#define LOOP_FILTER_SIMPLE(vpn)                                               \
+static void vpn ## _v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride,    \
+                                           int flim)                          \
+{                                                                             \
+    int i;                                                                    \
+    for (i = 0; i < 16; i++)                                                  \
+        if (vpn ## _simple_limit(dst + i, stride, flim))                      \
+            vpn ## _filter_common(dst + i, stride, 1);                        \
+}                                                                             \
+                                                                              \
+static void vpn ## _h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride,    \
+                                           int flim)                          \
+{                                                                             \
+    int i;                                                                    \
+    for (i = 0; i < 16; i++)                                                  \
+        if (vpn ## _simple_limit(dst + i * stride, 1, flim))                  \
+            vpn ## _filter_common(dst + i * stride, 1, 1);                    \
+}
+
+#define LOOP_FILTERS(vpn)                \
+    LOOP_FILTER(vpn, v, 16, 1, stride, ) \
+    LOOP_FILTER(vpn, h, 16, stride, 1, ) \
+    UV_LOOP_FILTER(vpn, v, 1, stride)    \
+    UV_LOOP_FILTER(vpn, h, stride, 1)    \
+    LOOP_FILTER_SIMPLE(vpn)              \
+
+static const uint8_t subpel_filters[7][6] = {
+    { 0,  6, 123,  12,  1, 0 },
+    { 2, 11, 108,  36,  8, 1 },
+    { 0,  9,  93,  50,  6, 0 },
+    { 3, 16,  77,  77, 16, 3 },
+    { 0,  6,  50,  93,  9, 0 },
+    { 1,  8,  36, 108, 11, 2 },
+    { 0,  1,  12, 123,  6, 0 },
+};
+
+#define PUT_PIXELS(WIDTH)                                                     \
+static void put_vp8_pixels ## WIDTH ## _c(uint8_t *dst, ptrdiff_t dststride,  \
+                                          const uint8_t *src, ptrdiff_t srcstride, \
+                                          int h, int x, int y)                \
+{                                                                             \
+    int i;                                                                    \
+    for (i = 0; i < h; i++, dst += dststride, src += srcstride)               \
+        memcpy(dst, src, WIDTH);                                              \
+}
+
+PUT_PIXELS(16)
+PUT_PIXELS(8)
+PUT_PIXELS(4)
+
+#define FILTER_6TAP(src, F, stride)                                           \
+    cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] +             \
+        F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] -             \
+        F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
+
+#define FILTER_4TAP(src, F, stride)                                           \
+    cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] +             \
+        F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
+
+#define VP8_EPEL_H(SIZE, TAPS)                                                \
+static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst,            \
+                                                     ptrdiff_t dststride,     \
+                                                     const uint8_t *src,      \
+                                                     ptrdiff_t srcstride,     \
+                                                     int h, int mx, int my)   \
+{                                                                             \
+    const uint8_t *filter = subpel_filters[mx - 1];                           \
+    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;                       \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1);                  \
+        dst += dststride;                                                     \
+        src += srcstride;                                                     \
+    }                                                                         \
+}
+
+#define VP8_EPEL_V(SIZE, TAPS)                                                \
+static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst,            \
+                                                     ptrdiff_t dststride,     \
+                                                     const uint8_t *src,      \
+                                                     ptrdiff_t srcstride,     \
+                                                     int h, int mx, int my)   \
+{                                                                             \
+    const uint8_t *filter = subpel_filters[my - 1];                           \
+    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;                       \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride);          \
+        dst += dststride;                                                     \
+        src += srcstride;                                                     \
+    }                                                                         \
+}
+
+#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS)                                       \
+static void                                                                   \
+put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst,         \
+                                                        ptrdiff_t dststride,  \
+                                                        const uint8_t *src,   \
+                                                        ptrdiff_t srcstride,  \
+                                                        int h, int mx,        \
+                                                        int my)               \
+{                                                                             \
+    const uint8_t *filter = subpel_filters[mx - 1];                           \
+    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;                       \
+    int x, y;                                                                 \
+    uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE];                         \
+    uint8_t *tmp = tmp_array;                                                 \
+    src -= (2 - (VTAPS == 4)) * srcstride;                                    \
+                                                                              \
+    for (y = 0; y < h + VTAPS - 1; y++) {                                     \
+        for (x = 0; x < SIZE; x++)                                            \
+            tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1);                 \
+        tmp += SIZE;                                                          \
+        src += srcstride;                                                     \
+    }                                                                         \
+    tmp    = tmp_array + (2 - (VTAPS == 4)) * SIZE;                           \
+    filter = subpel_filters[my - 1];                                          \
+                                                                              \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE);              \
+        dst += dststride;                                                     \
+        tmp += SIZE;                                                          \
+    }                                                                         \
+}
+
+VP8_EPEL_H(16, 4)
+VP8_EPEL_H(8,  4)
+VP8_EPEL_H(4,  4)
+VP8_EPEL_H(16, 6)
+VP8_EPEL_H(8,  6)
+VP8_EPEL_H(4,  6)
+VP8_EPEL_V(16, 4)
+VP8_EPEL_V(8,  4)
+VP8_EPEL_V(4,  4)
+VP8_EPEL_V(16, 6)
+VP8_EPEL_V(8,  6)
+VP8_EPEL_V(4,  6)
+
+VP8_EPEL_HV(16, 4, 4)
+VP8_EPEL_HV(8,  4, 4)
+VP8_EPEL_HV(4,  4, 4)
+VP8_EPEL_HV(16, 4, 6)
+VP8_EPEL_HV(8,  4, 6)
+VP8_EPEL_HV(4,  4, 6)
+VP8_EPEL_HV(16, 6, 4)
+VP8_EPEL_HV(8,  6, 4)
+VP8_EPEL_HV(4,  6, 4)
+VP8_EPEL_HV(16, 6, 6)
+VP8_EPEL_HV(8,  6, 6)
+VP8_EPEL_HV(4,  6, 6)
+
+#define VP8_BILINEAR(SIZE)                                                    \
+static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, \
+                                             const uint8_t *src, ptrdiff_t sstride, \
+                                             int h, int mx, int my)           \
+{                                                                             \
+    int a = 8 - mx, b = mx;                                                   \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;                  \
+        dst += dstride;                                                       \
+        src += sstride;                                                       \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, \
+                                             const uint8_t *src, ptrdiff_t sstride, \
+                                             int h, int mx, int my)           \
+{                                                                             \
+    int c = 8 - my, d = my;                                                   \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3;            \
+        dst += dstride;                                                       \
+        src += sstride;                                                       \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst,                   \
+                                              ptrdiff_t dstride,              \
+                                              const uint8_t *src,             \
+                                              ptrdiff_t sstride,              \
+                                              int h, int mx, int my)          \
+{                                                                             \
+    int a = 8 - mx, b = mx;                                                   \
+    int c = 8 - my, d = my;                                                   \
+    int x, y;                                                                 \
+    uint8_t tmp_array[(2 * SIZE + 1) * SIZE];                                 \
+    uint8_t *tmp = tmp_array;                                                 \
+    for (y = 0; y < h + 1; y++) {                                             \
+        for (x = 0; x < SIZE; x++)                                            \
+            tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;                  \
+        tmp += SIZE;                                                          \
+        src += sstride;                                                       \
+    }                                                                         \
+    tmp = tmp_array;                                                          \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3;               \
+        dst += dstride;                                                       \
+        tmp += SIZE;                                                          \
+    }                                                                         \
+}
+
+VP8_BILINEAR(16)
+VP8_BILINEAR(8)
+VP8_BILINEAR(4)
+
+#define VP78_MC_FUNC(IDX, SIZE)                                               \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c;   \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c;  \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c;  \
+    dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c;  \
+    dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c;  \
+    dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c
+
+#define VP78_BILINEAR_MC_FUNC(IDX, SIZE)                                      \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels   ## SIZE ## _c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c
+
+av_cold void ff_vp78dsp_init(VP8DSPContext *dsp)
+{
+    VP78_MC_FUNC(0, 16);
+    VP78_MC_FUNC(1, 8);
+    VP78_MC_FUNC(2, 4);
+
+    VP78_BILINEAR_MC_FUNC(0, 16);
+    VP78_BILINEAR_MC_FUNC(1, 8);
+    VP78_BILINEAR_MC_FUNC(2, 4);
+
+#if ARCH_AARCH64
+    ff_vp78dsp_init_aarch64(dsp);
+#elif ARCH_ARM
+    ff_vp78dsp_init_arm(dsp);
+#elif ARCH_PPC
+    ff_vp78dsp_init_ppc(dsp);
+#elif ARCH_X86
+    ff_vp78dsp_init_x86(dsp);
+#endif
+}
+
+#if CONFIG_VP7_DECODER
+LOOP_FILTERS(vp7)
+
+av_cold void ff_vp7dsp_init(VP8DSPContext *dsp)
+{
+    dsp->vp8_luma_dc_wht    = vp7_luma_dc_wht_c;
+    dsp->vp8_luma_dc_wht_dc = vp7_luma_dc_wht_dc_c;
+    dsp->vp8_idct_add       = vp7_idct_add_c;
+    dsp->vp8_idct_dc_add    = vp7_idct_dc_add_c;
+    dsp->vp8_idct_dc_add4y  = vp7_idct_dc_add4y_c;
+    dsp->vp8_idct_dc_add4uv = vp7_idct_dc_add4uv_c;
+
+    dsp->vp8_v_loop_filter16y = vp7_v_loop_filter16_c;
+    dsp->vp8_h_loop_filter16y = vp7_h_loop_filter16_c;
+    dsp->vp8_v_loop_filter8uv = vp7_v_loop_filter8uv_c;
+    dsp->vp8_h_loop_filter8uv = vp7_h_loop_filter8uv_c;
+
+    dsp->vp8_v_loop_filter16y_inner = vp7_v_loop_filter16_inner_c;
+    dsp->vp8_h_loop_filter16y_inner = vp7_h_loop_filter16_inner_c;
+    dsp->vp8_v_loop_filter8uv_inner = vp7_v_loop_filter8uv_inner_c;
+    dsp->vp8_h_loop_filter8uv_inner = vp7_h_loop_filter8uv_inner_c;
+
+    dsp->vp8_v_loop_filter_simple = vp7_v_loop_filter_simple_c;
+    dsp->vp8_h_loop_filter_simple = vp7_h_loop_filter_simple_c;
+}
+#endif /* CONFIG_VP7_DECODER */
+
+#if CONFIG_VP8_DECODER
+LOOP_FILTERS(vp8)
+
+av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
+{
+    dsp->vp8_luma_dc_wht    = vp8_luma_dc_wht_c;
+    dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c;
+    dsp->vp8_idct_add       = vp8_idct_add_c;
+    dsp->vp8_idct_dc_add    = vp8_idct_dc_add_c;
+    dsp->vp8_idct_dc_add4y  = vp8_idct_dc_add4y_c;
+    dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c;
+
+    dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c;
+    dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c;
+    dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c;
+    dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c;
+
+    dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c;
+    dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c;
+    dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c;
+    dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c;
+
+    dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c;
+    dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c;
+
+#if ARCH_AARCH64
+    ff_vp8dsp_init_aarch64(dsp);
+#elif ARCH_ARM
+    ff_vp8dsp_init_arm(dsp);
+#elif ARCH_X86
+    ff_vp8dsp_init_x86(dsp);
+#elif ARCH_MIPS
+    ff_vp8dsp_init_mips(dsp);
+#elif ARCH_LOONGARCH
+    ff_vp8dsp_init_loongarch(dsp);
+#endif
+}
+#endif /* CONFIG_VP8_DECODER */
diff --git a/media/ffvpx/libavcodec/vp8dsp.h b/media/ffvpx/libavcodec/vp8dsp.h
new file mode 100644
index 0000000000..16b5e9c35b
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp8dsp.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * VP8 compatible video decoder
+ */
+
+#ifndef AVCODEC_VP8DSP_H
+#define AVCODEC_VP8DSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef void (*vp8_mc_func)(uint8_t *dst /* align 8 */, ptrdiff_t dstStride,
+                            const uint8_t *src /* align 1 */, ptrdiff_t srcStride,
+                            int h, int x, int y);
+
+typedef struct VP8DSPContext {
+    void (*vp8_luma_dc_wht)(int16_t block[4][4][16], int16_t dc[16]);
+    void (*vp8_luma_dc_wht_dc)(int16_t block[4][4][16], int16_t dc[16]);
+    void (*vp8_idct_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+    void (*vp8_idct_dc_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+    void (*vp8_idct_dc_add4y)(uint8_t *dst, int16_t block[4][16],
+                              ptrdiff_t stride);
+    void (*vp8_idct_dc_add4uv)(uint8_t *dst, int16_t block[4][16],
+                               ptrdiff_t stride);
+
+    // loop filter applied to edges between macroblocks
+    void (*vp8_v_loop_filter16y)(uint8_t *dst, ptrdiff_t stride,
+                                 int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_h_loop_filter16y)(uint8_t *dst, ptrdiff_t stride,
+                                 int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_v_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,
+                                 int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_h_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,
+                                 int flim_E, int flim_I, int hev_thresh);
+
+    // loop filter applied to inner macroblock edges
+    void (*vp8_v_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride,
+                                       int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_h_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride,
+                                       int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_v_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV,
+                                       ptrdiff_t stride,
+                                       int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_h_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV,
+                                       ptrdiff_t stride,
+                                       int flim_E, int flim_I, int hev_thresh);
+
+    void (*vp8_v_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim);
+    void (*vp8_h_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim);
+
+    /**
+     * first dimension: 4-log2(width)
+     * second dimension: 0 if no vertical interpolation is needed;
+     *                   1 4-tap vertical interpolation filter (my & 1)
+     *                   2 6-tap vertical interpolation filter (!(my & 1))
+     * third dimension: same as second dimension, for horizontal interpolation
+     * so something like put_vp8_epel_pixels_tab[4-log2(width)][2*!!my-(my&1)][2*!!mx-(mx&1)](..., mx, my)
+     */
+    vp8_mc_func put_vp8_epel_pixels_tab[3][3][3];
+    vp8_mc_func put_vp8_bilinear_pixels_tab[3][3][3];
+} VP8DSPContext;
+
+void ff_vp7dsp_init(VP8DSPContext *c);
+
+void ff_vp78dsp_init(VP8DSPContext *c);
+void ff_vp78dsp_init_aarch64(VP8DSPContext *c);
+void ff_vp78dsp_init_arm(VP8DSPContext *c);
+void ff_vp78dsp_init_ppc(VP8DSPContext *c);
+void ff_vp78dsp_init_x86(VP8DSPContext *c);
+
+void ff_vp8dsp_init(VP8DSPContext *c);
+void ff_vp8dsp_init_aarch64(VP8DSPContext *c);
+void ff_vp8dsp_init_arm(VP8DSPContext *c);
+void ff_vp8dsp_init_x86(VP8DSPContext *c);
+void ff_vp8dsp_init_mips(VP8DSPContext *c);
+void ff_vp8dsp_init_loongarch(VP8DSPContext *c);
+
+#define IS_VP7 1
+#define IS_VP8 0
+
+#endif /* AVCODEC_VP8DSP_H */
diff --git a/media/ffvpx/libavcodec/vp9.c b/media/ffvpx/libavcodec/vp9.c
new file mode 100644
index 0000000000..7c0a246446
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9.c
@@ -0,0 +1,1914 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config_components.h"
+
+#include "avcodec.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "get_bits.h"
+#include "hwconfig.h"
+#include "profiles.h"
+#include "thread.h"
+#include "threadframe.h"
+#include "pthread_internal.h"
+
+#include "videodsp.h"
+#include "vp89_rac.h"
+#include "vp9.h"
+#include "vp9data.h"
+#include "vp9dec.h"
+#include "vpx_rac.h"
+#include "libavutil/avassert.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/video_enc_params.h"
+
+#define VP9_SYNCCODE 0x498342
+
+#if HAVE_THREADS
+DEFINE_OFFSET_ARRAY(VP9Context, vp9_context, pthread_init_cnt,
+                    (offsetof(VP9Context, progress_mutex)),
+                    (offsetof(VP9Context, progress_cond)));
+
+static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
+    VP9Context *s = avctx->priv_data;
+    int i;
+
+    if (avctx->active_thread_type & FF_THREAD_SLICE)  {
+        if (s->entries)
+            av_freep(&s->entries);
+
+        s->entries = av_malloc_array(n, sizeof(atomic_int));
+        if (!s->entries)
+            return AVERROR(ENOMEM);
+
+        for (i  = 0; i < n; i++)
+            atomic_init(&s->entries[i], 0);
+    }
+    return 0;
+}
+
+static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
+    pthread_mutex_lock(&s->progress_mutex);
+    atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
+    pthread_cond_signal(&s->progress_cond);
+    pthread_mutex_unlock(&s->progress_mutex);
+}
+
+static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
+    if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
+        return;
+
+    pthread_mutex_lock(&s->progress_mutex);
+    while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
+        pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
+    pthread_mutex_unlock(&s->progress_mutex);
+}
+#else
+static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
+#endif
+
+static void vp9_tile_data_free(VP9TileData *td)
+{
+    av_freep(&td->b_base);
+    av_freep(&td->block_base);
+    av_freep(&td->block_structure);
+}
+
+static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
+{
+    ff_thread_release_ext_buffer(avctx, &f->tf);
+    av_buffer_unref(&f->extradata);
+    av_buffer_unref(&f->hwaccel_priv_buf);
+    f->segmentation_map = NULL;
+    f->hwaccel_picture_private = NULL;
+}
+
+static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
+{
+    VP9Context *s = avctx->priv_data;
+    int ret, sz;
+
+    ret = ff_thread_get_ext_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
+    if (ret < 0)
+        return ret;
+
+    sz = 64 * s->sb_cols * s->sb_rows;
+    if (sz != s->frame_extradata_pool_size) {
+        av_buffer_pool_uninit(&s->frame_extradata_pool);
+        s->frame_extradata_pool = av_buffer_pool_init(sz * (1 + sizeof(VP9mvrefPair)), NULL);
+        if (!s->frame_extradata_pool) {
+            s->frame_extradata_pool_size = 0;
+            goto fail;
+        }
+        s->frame_extradata_pool_size = sz;
+    }
+    f->extradata = av_buffer_pool_get(s->frame_extradata_pool);
+    if (!f->extradata) {
+        goto fail;
+    }
+    memset(f->extradata->data, 0, f->extradata->size);
+
+    f->segmentation_map = f->extradata->data;
+    f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
+
+    if (avctx->hwaccel) {
+        const AVHWAccel *hwaccel = avctx->hwaccel;
+        av_assert0(!f->hwaccel_picture_private);
+        if (hwaccel->frame_priv_data_size) {
+            f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+            if (!f->hwaccel_priv_buf)
+                goto fail;
+            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
+        }
+    }
+
+    return 0;
+
+fail:
+    vp9_frame_unref(avctx, f);
+    return AVERROR(ENOMEM);
+}
+
+static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
+{
+    int ret;
+
+    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
+    if (ret < 0)
+        return ret;
+
+    dst->extradata = av_buffer_ref(src->extradata);
+    if (!dst->extradata)
+        goto fail;
+
+    dst->segmentation_map = src->segmentation_map;
+    dst->mv = src->mv;
+    dst->uses_2pass = src->uses_2pass;
+
+    if (src->hwaccel_picture_private) {
+        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
+        if (!dst->hwaccel_priv_buf)
+            goto fail;
+        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
+    }
+
+    return 0;
+
+fail:
+    vp9_frame_unref(avctx, dst);
+    return AVERROR(ENOMEM);
+}
+
+static int update_size(AVCodecContext *avctx, int w, int h)
+{
+#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
+                     CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
+                     CONFIG_VP9_NVDEC_HWACCEL + \
+                     CONFIG_VP9_VAAPI_HWACCEL + \
+                     CONFIG_VP9_VDPAU_HWACCEL + \
+                     CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
+    enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
+    VP9Context *s = avctx->priv_data;
+    uint8_t *p;
+    int bytesperpixel = s->bytesperpixel, ret, cols, rows;
+    int lflvl_len, i;
+
+    av_assert0(w > 0 && h > 0);
+
+    if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
+        if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
+            return ret;
+
+        switch (s->pix_fmt) {
+        case AV_PIX_FMT_YUV420P:
+        case AV_PIX_FMT_YUV420P10:
+#if CONFIG_VP9_DXVA2_HWACCEL
+            *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
+#endif
+#if CONFIG_VP9_D3D11VA_HWACCEL
+            *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
+            *fmtp++ = AV_PIX_FMT_D3D11;
+#endif
+#if CONFIG_VP9_NVDEC_HWACCEL
+            *fmtp++ = AV_PIX_FMT_CUDA;
+#endif
+#if CONFIG_VP9_VAAPI_HWACCEL
+            *fmtp++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_VP9_VDPAU_HWACCEL
+            *fmtp++ = AV_PIX_FMT_VDPAU;
+#endif
+#if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
+            *fmtp++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+            break;
+        case AV_PIX_FMT_YUV420P12:
+#if CONFIG_VP9_NVDEC_HWACCEL
+            *fmtp++ = AV_PIX_FMT_CUDA;
+#endif
+#if CONFIG_VP9_VAAPI_HWACCEL
+            *fmtp++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_VP9_VDPAU_HWACCEL
+            *fmtp++ = AV_PIX_FMT_VDPAU;
+#endif
+            break;
+        case AV_PIX_FMT_YUV444P:
+        case AV_PIX_FMT_YUV444P10:
+        case AV_PIX_FMT_YUV444P12:
+#if CONFIG_VP9_VAAPI_HWACCEL
+            *fmtp++ = AV_PIX_FMT_VAAPI;
+#endif
+            break;
+        }
+
+        *fmtp++ = s->pix_fmt;
+        *fmtp = AV_PIX_FMT_NONE;
+
+        ret = ff_thread_get_format(avctx, pix_fmts);
+        if (ret < 0)
+            return ret;
+
+        avctx->pix_fmt = ret;
+        s->gf_fmt  = s->pix_fmt;
+        s->w = w;
+        s->h = h;
+    }
+
+    cols = (w + 7) >> 3;
+    rows = (h + 7) >> 3;
+
+    if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
+        return 0;
+
+    s->last_fmt  = s->pix_fmt;
+    s->sb_cols   = (w + 63) >> 6;
+    s->sb_rows   = (h + 63) >> 6;
+    s->cols      = (w + 7) >> 3;
+    s->rows      = (h + 7) >> 3;
+    lflvl_len    = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
+
+#define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
+    av_freep(&s->intra_pred_data[0]);
+    // FIXME we slightly over-allocate here for subsampled chroma, but a little
+    // bit of padding shouldn't affect performance...
+    p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
+                                lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
+    if (!p)
+        return AVERROR(ENOMEM);
+    assign(s->intra_pred_data[0],  uint8_t *,             64 * bytesperpixel);
+    assign(s->intra_pred_data[1],  uint8_t *,             64 * bytesperpixel);
+    assign(s->intra_pred_data[2],  uint8_t *,             64 * bytesperpixel);
+    assign(s->above_y_nnz_ctx,     uint8_t *,             16);
+    assign(s->above_mode_ctx,      uint8_t *,             16);
+    assign(s->above_mv_ctx,        VP9mv(*)[2],           16);
+    assign(s->above_uv_nnz_ctx[0], uint8_t *,             16);
+    assign(s->above_uv_nnz_ctx[1], uint8_t *,             16);
+    assign(s->above_partition_ctx, uint8_t *,              8);
+    assign(s->above_skip_ctx,      uint8_t *,              8);
+    assign(s->above_txfm_ctx,      uint8_t *,              8);
+    assign(s->above_segpred_ctx,   uint8_t *,              8);
+    assign(s->above_intra_ctx,     uint8_t *,              8);
+    assign(s->above_comp_ctx,      uint8_t *,              8);
+    assign(s->above_ref_ctx,       uint8_t *,              8);
+    assign(s->above_filter_ctx,    uint8_t *,              8);
+    assign(s->lflvl,               VP9Filter *,            lflvl_len);
+#undef assign
+
+    if (s->td) {
+        for (i = 0; i < s->active_tile_cols; i++)
+            vp9_tile_data_free(&s->td[i]);
+    }
+
+    if (s->s.h.bpp != s->last_bpp) {
+        ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
+        ff_videodsp_init(&s->vdsp, s->s.h.bpp);
+        s->last_bpp = s->s.h.bpp;
+    }
+
+    return 0;
+}
+
+static int update_block_buffers(AVCodecContext *avctx)
+{
+    int i;
+    VP9Context *s = avctx->priv_data;
+    int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
+    VP9TileData *td = &s->td[0];
+
+    if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
+        return 0;
+
+    vp9_tile_data_free(td);
+    chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
+    chroma_eobs   = 16 * 16 >> (s->ss_h + s->ss_v);
+    if (s->s.frames[CUR_FRAME].uses_2pass) {
+        int sbs = s->sb_cols * s->sb_rows;
+
+        td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
+        td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
+                                    16 * 16 + 2 * chroma_eobs) * sbs);
+        if (!td->b_base || !td->block_base)
+            return AVERROR(ENOMEM);
+        td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
+        td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
+        td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
+        td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
+        td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
+
+        if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
+            td->block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
+            if (!td->block_structure)
+                return AVERROR(ENOMEM);
+        }
+    } else {
+        for (i = 1; i < s->active_tile_cols; i++)
+            vp9_tile_data_free(&s->td[i]);
+
+        for (i = 0; i < s->active_tile_cols; i++) {
+            s->td[i].b_base = av_malloc(sizeof(VP9Block));
+            s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
+                                       16 * 16 + 2 * chroma_eobs);
+            if (!s->td[i].b_base || !s->td[i].block_base)
+                return AVERROR(ENOMEM);
+            s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
+            s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
+            s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
+            s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
+            s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
+
+            if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
+                s->td[i].block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
+                if (!s->td[i].block_structure)
+                    return AVERROR(ENOMEM);
+            }
+        }
+    }
+    s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
+
+    return 0;
+}
+
+// The sign bit is at the end, not the start, of a bit sequence
+static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
+{
+    int v = get_bits(gb, n);
+    return get_bits1(gb) ? -v : v;
+}
+
+static av_always_inline int inv_recenter_nonneg(int v, int m)
+{
+    if (v > 2 * m)
+        return v;
+    if (v & 1)
+        return m - ((v + 1) >> 1);
+    return m + (v >> 1);
+}
+
+// differential forward probability updates
+static int update_prob(VPXRangeCoder *c, int p)
+{
+    static const uint8_t inv_map_table[255] = {
+          7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150, 163, 176,
+        189, 202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,   8,   9,
+         10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  21,  22,  23,  24,
+         25,  26,  27,  28,  29,  30,  31,  32,  34,  35,  36,  37,  38,  39,
+         40,  41,  42,  43,  44,  45,  47,  48,  49,  50,  51,  52,  53,  54,
+         55,  56,  57,  58,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
+         70,  71,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
+         86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  99, 100,
+        101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
+        116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
+        131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
+        146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
+        161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+        177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
+        192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
+        207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
+        222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
+        237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
+        252, 253, 253,
+    };
+    int d;
+
+    /* This code is trying to do a differential probability update. For a
+     * current probability A in the range [1, 255], the difference to a new
+     * probability of any value can be expressed differentially as 1-A, 255-A
+     * where some part of this (absolute range) exists both in positive as
+     * well as the negative part, whereas another part only exists in one
+     * half. We're trying to code this shared part differentially, i.e.
+     * times two where the value of the lowest bit specifies the sign, and
+     * the single part is then coded on top of this. This absolute difference
+     * then again has a value of [0, 254], but a bigger value in this range
+     * indicates that we're further away from the original value A, so we
+     * can code this as a VLC code, since higher values are increasingly
+     * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
+     * updates vs. the 'fine, exact' updates further down the range, which
+     * adds one extra dimension to this differential update model. */
+
+    if (!vp89_rac_get(c)) {
+        d = vp89_rac_get_uint(c, 4) + 0;
+    } else if (!vp89_rac_get(c)) {
+        d = vp89_rac_get_uint(c, 4) + 16;
+    } else if (!vp89_rac_get(c)) {
+        d = vp89_rac_get_uint(c, 5) + 32;
+    } else {
+        d = vp89_rac_get_uint(c, 7);
+        if (d >= 65)
+            d = (d << 1) - 65 + vp89_rac_get(c);
+        d += 64;
+        av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
+    }
+
+    return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
+                    255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
+}
+
+static int read_colorspace_details(AVCodecContext *avctx)
+{
+    static const enum AVColorSpace colorspaces[8] = {
+        AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
+        AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
+    };
+    VP9Context *s = avctx->priv_data;
+    int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
+
+    s->bpp_index = bits;
+    s->s.h.bpp = 8 + bits * 2;
+    s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
+    avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
+    if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
+        static const enum AVPixelFormat pix_fmt_rgb[3] = {
+            AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
+        };
+        s->ss_h = s->ss_v = 0;
+        avctx->color_range = AVCOL_RANGE_JPEG;
+        s->pix_fmt = pix_fmt_rgb[bits];
+        if (avctx->profile & 1) {
+            if (get_bits1(&s->gb)) {
+                av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
+                return AVERROR_INVALIDDATA;
+            }
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
+                   avctx->profile);
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
+            { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
+              { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
+            { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
+              { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
+            { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
+              { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
+        };
+        avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
+        if (avctx->profile & 1) {
+            s->ss_h = get_bits1(&s->gb);
+            s->ss_v = get_bits1(&s->gb);
+            s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
+            if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
+                av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
+                       avctx->profile);
+                return AVERROR_INVALIDDATA;
+            } else if (get_bits1(&s->gb)) {
+                av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
+                       avctx->profile);
+                return AVERROR_INVALIDDATA;
+            }
+        } else {
+            s->ss_h = s->ss_v = 1;
+            s->pix_fmt = pix_fmt_for_ss[bits][1][1];
+        }
+    }
+
+    return 0;
+}
+
+static int decode_frame_header(AVCodecContext *avctx,
+                               const uint8_t *data, int size, int *ref)
+{
+    VP9Context *s = avctx->priv_data;
+    int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
+    int last_invisible;
+    const uint8_t *data2;
+
+    /* general header */
+    if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
+        return ret;
+    }
+    if (get_bits(&s->gb, 2) != 0x2) { // frame marker
+        av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
+        return AVERROR_INVALIDDATA;
+    }
+    avctx->profile  = get_bits1(&s->gb);
+    avctx->profile |= get_bits1(&s->gb) << 1;
+    if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
+    if (avctx->profile > 3) {
+        av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
+        return AVERROR_INVALIDDATA;
+    }
+    s->s.h.profile = avctx->profile;
+    if (get_bits1(&s->gb)) {
+        *ref = get_bits(&s->gb, 3);
+        return 0;
+    }
+
+    s->last_keyframe  = s->s.h.keyframe;
+    s->s.h.keyframe   = !get_bits1(&s->gb);
+
+    last_invisible   = s->s.h.invisible;
+    s->s.h.invisible = !get_bits1(&s->gb);
+    s->s.h.errorres  = get_bits1(&s->gb);
+    s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
+
+    if (s->s.h.keyframe) {
+        if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
+            av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if ((ret = read_colorspace_details(avctx)) < 0)
+            return ret;
+        // for profile 1, here follows the subsampling bits
+        s->s.h.refreshrefmask = 0xff;
+        w = get_bits(&s->gb, 16) + 1;
+        h = get_bits(&s->gb, 16) + 1;
+        if (get_bits1(&s->gb)) // display size
+            skip_bits(&s->gb, 32);
+    } else {
+        s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
+        s->s.h.resetctx  = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
+        if (s->s.h.intraonly) {
+            if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
+                av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
+                return AVERROR_INVALIDDATA;
+            }
+            if (avctx->profile >= 1) {
+                if ((ret = read_colorspace_details(avctx)) < 0)
+                    return ret;
+            } else {
+                s->ss_h = s->ss_v = 1;
+                s->s.h.bpp = 8;
+                s->bpp_index = 0;
+                s->bytesperpixel = 1;
+                s->pix_fmt = AV_PIX_FMT_YUV420P;
+                avctx->colorspace = AVCOL_SPC_BT470BG;
+                avctx->color_range = AVCOL_RANGE_MPEG;
+            }
+            s->s.h.refreshrefmask = get_bits(&s->gb, 8);
+            w = get_bits(&s->gb, 16) + 1;
+            h = get_bits(&s->gb, 16) + 1;
+            if (get_bits1(&s->gb)) // display size
+                skip_bits(&s->gb, 32);
+        } else {
+            s->s.h.refreshrefmask = get_bits(&s->gb, 8);
+            s->s.h.refidx[0]      = get_bits(&s->gb, 3);
+            s->s.h.signbias[0]    = get_bits1(&s->gb) && !s->s.h.errorres;
+            s->s.h.refidx[1]      = get_bits(&s->gb, 3);
+            s->s.h.signbias[1]    = get_bits1(&s->gb) && !s->s.h.errorres;
+            s->s.h.refidx[2]      = get_bits(&s->gb, 3);
+            s->s.h.signbias[2]    = get_bits1(&s->gb) && !s->s.h.errorres;
+            if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
+                !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
+                !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
+                av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
+                return AVERROR_INVALIDDATA;
+            }
+            if (get_bits1(&s->gb)) {
+                w = s->s.refs[s->s.h.refidx[0]].f->width;
+                h = s->s.refs[s->s.h.refidx[0]].f->height;
+            } else if (get_bits1(&s->gb)) {
+                w = s->s.refs[s->s.h.refidx[1]].f->width;
+                h = s->s.refs[s->s.h.refidx[1]].f->height;
+            } else if (get_bits1(&s->gb)) {
+                w = s->s.refs[s->s.h.refidx[2]].f->width;
+                h = s->s.refs[s->s.h.refidx[2]].f->height;
+            } else {
+                w = get_bits(&s->gb, 16) + 1;
+                h = get_bits(&s->gb, 16) + 1;
+            }
+            // Note that in this code, "CUR_FRAME" is actually before we
+            // have formally allocated a frame, and thus actually represents
+            // the _last_ frame
+            s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
+                                       s->s.frames[CUR_FRAME].tf.f->height == h;
+            if (get_bits1(&s->gb)) // display size
+                skip_bits(&s->gb, 32);
+            s->s.h.highprecisionmvs = get_bits1(&s->gb);
+            s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
+                                                  get_bits(&s->gb, 2);
+            s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
+                                  s->s.h.signbias[0] != s->s.h.signbias[2];
+            if (s->s.h.allowcompinter) {
+                if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
+                    s->s.h.fixcompref    = 2;
+                    s->s.h.varcompref[0] = 0;
+                    s->s.h.varcompref[1] = 1;
+                } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
+                    s->s.h.fixcompref    = 1;
+                    s->s.h.varcompref[0] = 0;
+                    s->s.h.varcompref[1] = 2;
+                } else {
+                    s->s.h.fixcompref    = 0;
+                    s->s.h.varcompref[0] = 1;
+                    s->s.h.varcompref[1] = 2;
+                }
+            }
+        }
+    }
+    s->s.h.refreshctx   = s->s.h.errorres ? 0 : get_bits1(&s->gb);
+    s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
+    s->s.h.framectxid   = c = get_bits(&s->gb, 2);
+    if (s->s.h.keyframe || s->s.h.intraonly)
+        s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
+
+    /* loopfilter header data */
+    if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
+        // reset loopfilter defaults
+        s->s.h.lf_delta.ref[0] = 1;
+        s->s.h.lf_delta.ref[1] = 0;
+        s->s.h.lf_delta.ref[2] = -1;
+        s->s.h.lf_delta.ref[3] = -1;
+        s->s.h.lf_delta.mode[0] = 0;
+        s->s.h.lf_delta.mode[1] = 0;
+        memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
+    }
+    s->s.h.filter.level = get_bits(&s->gb, 6);
+    sharp = get_bits(&s->gb, 3);
+    // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
+    // the old cache values since they are still valid
+    if (s->s.h.filter.sharpness != sharp) {
+        for (i = 1; i <= 63; i++) {
+            int limit = i;
+
+            if (sharp > 0) {
+                limit >>= (sharp + 3) >> 2;
+                limit = FFMIN(limit, 9 - sharp);
+            }
+            limit = FFMAX(limit, 1);
+
+            s->filter_lut.lim_lut[i] = limit;
+            s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
+        }
+    }
+    s->s.h.filter.sharpness = sharp;
+    if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
+        if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
+            for (i = 0; i < 4; i++)
+                if (get_bits1(&s->gb))
+                    s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
+            for (i = 0; i < 2; i++)
+                if (get_bits1(&s->gb))
+                    s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
+        }
+    }
+
+    /* quantization header data */
+    s->s.h.yac_qi      = get_bits(&s->gb, 8);
+    s->s.h.ydc_qdelta  = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
+    s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
+    s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
+    s->s.h.lossless    = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
+                       s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
+    if (s->s.h.lossless)
+        avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
+
+    /* segmentation header info */
+    if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
+        if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
+            for (i = 0; i < 7; i++)
+                s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
+                                 get_bits(&s->gb, 8) : 255;
+            if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
+                for (i = 0; i < 3; i++)
+                    s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
+                                         get_bits(&s->gb, 8) : 255;
+        }
+
+        if (get_bits1(&s->gb)) {
+            s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
+            for (i = 0; i < 8; i++) {
+                if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
+                    s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
+                if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
+                    s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
+                if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
+                    s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
+                s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
+            }
+        }
+    }
+
+    // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
+    for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
+        int qyac, qydc, quvac, quvdc, lflvl, sh;
+
+        if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
+            if (s->s.h.segmentation.absolute_vals)
+                qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
+            else
+                qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
+        } else {
+            qyac  = s->s.h.yac_qi;
+        }
+        qydc  = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
+        quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
+        quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
+        qyac  = av_clip_uintp2(qyac, 8);
+
+        s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
+        s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
+        s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
+        s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
+
+        sh = s->s.h.filter.level >= 32;
+        if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
+            if (s->s.h.segmentation.absolute_vals)
+                lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
+            else
+                lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
+        } else {
+            lflvl  = s->s.h.filter.level;
+        }
+        if (s->s.h.lf_delta.enabled) {
+            s->s.h.segmentation.feat[i].lflvl[0][0] =
+            s->s.h.segmentation.feat[i].lflvl[0][1] =
+                av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
+            for (j = 1; j < 4; j++) {
+                s->s.h.segmentation.feat[i].lflvl[j][0] =
+                    av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
+                                             s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
+                s->s.h.segmentation.feat[i].lflvl[j][1] =
+                    av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
+                                             s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
+            }
+        } else {
+            memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
+                   sizeof(s->s.h.segmentation.feat[i].lflvl));
+        }
+    }
+
+    /* tiling info */
+    if ((ret = update_size(avctx, w, h)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
+               w, h, s->pix_fmt);
+        return ret;
+    }
+    for (s->s.h.tiling.log2_tile_cols = 0;
+         s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
+         s->s.h.tiling.log2_tile_cols++) ;
+    for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
+    max = FFMAX(0, max - 1);
+    while (max > s->s.h.tiling.log2_tile_cols) {
+        if (get_bits1(&s->gb))
+            s->s.h.tiling.log2_tile_cols++;
+        else
+            break;
+    }
+    s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
+    s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
+    if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
+        int n_range_coders;
+        VPXRangeCoder *rc;
+
+        if (s->td) {
+            for (i = 0; i < s->active_tile_cols; i++)
+                vp9_tile_data_free(&s->td[i]);
+            av_freep(&s->td);
+        }
+
+        s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
+        s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
+                              s->s.h.tiling.tile_cols : 1;
+        vp9_alloc_entries(avctx, s->sb_rows);
+        if (avctx->active_thread_type == FF_THREAD_SLICE) {
+            n_range_coders = 4; // max_tile_rows
+        } else {
+            n_range_coders = s->s.h.tiling.tile_cols;
+        }
+        s->td = av_calloc(s->active_tile_cols, sizeof(VP9TileData) +
+                                 n_range_coders * sizeof(VPXRangeCoder));
+        if (!s->td)
+            return AVERROR(ENOMEM);
+        rc = (VPXRangeCoder *) &s->td[s->active_tile_cols];
+        for (i = 0; i < s->active_tile_cols; i++) {
+            s->td[i].s = s;
+            s->td[i].c_b = rc;
+            rc += n_range_coders;
+        }
+    }
+
+    /* check reference frames */
+    if (!s->s.h.keyframe && !s->s.h.intraonly) {
+        int valid_ref_frame = 0;
+        for (i = 0; i < 3; i++) {
+            AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
+            int refw = ref->width, refh = ref->height;
+
+            if (ref->format != avctx->pix_fmt) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "Ref pixfmt (%s) did not match current frame (%s)",
+                       av_get_pix_fmt_name(ref->format),
+                       av_get_pix_fmt_name(avctx->pix_fmt));
+                return AVERROR_INVALIDDATA;
+            } else if (refw == w && refh == h) {
+                s->mvscale[i][0] = s->mvscale[i][1] = 0;
+            } else {
+                /* Check to make sure at least one of frames that */
+                /* this frame references has valid dimensions     */
+                if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
+                    av_log(avctx, AV_LOG_WARNING,
+                           "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
+                           refw, refh, w, h);
+                    s->mvscale[i][0] = s->mvscale[i][1] = REF_INVALID_SCALE;
+                    continue;
+                }
+                s->mvscale[i][0] = (refw << 14) / w;
+                s->mvscale[i][1] = (refh << 14) / h;
+                s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
+                s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
+            }
+            valid_ref_frame++;
+        }
+        if (!valid_ref_frame) {
+            av_log(avctx, AV_LOG_ERROR, "No valid reference frame is found, bitstream not supported\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
+        s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
+                           s->prob_ctx[3].p = ff_vp9_default_probs;
+        memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
+               sizeof(ff_vp9_default_coef_probs));
+        memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
+               sizeof(ff_vp9_default_coef_probs));
+        memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
+               sizeof(ff_vp9_default_coef_probs));
+        memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
+               sizeof(ff_vp9_default_coef_probs));
+    } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
+        s->prob_ctx[c].p = ff_vp9_default_probs;
+        memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
+               sizeof(ff_vp9_default_coef_probs));
+    }
+
+    // next 16 bits is size of the rest of the header (arith-coded)
+    s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
+    s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
+
+    data2 = align_get_bits(&s->gb);
+    if (size2 > size - (data2 - data)) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
+        return AVERROR_INVALIDDATA;
+    }
+    ret = ff_vpx_init_range_decoder(&s->c, data2, size2);
+    if (ret < 0)
+        return ret;
+
+    if (vpx_rac_get_prob_branchy(&s->c, 128)) { // marker bit
+        av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    for (i = 0; i < s->active_tile_cols; i++) {
+        if (s->s.h.keyframe || s->s.h.intraonly) {
+            memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
+            memset(s->td[i].counts.eob,  0, sizeof(s->td[0].counts.eob));
+        } else {
+            memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
+        }
+        s->td[i].nb_block_structure = 0;
+    }
+
+    /* FIXME is it faster to not copy here, but do it down in the fw updates
+     * as explicit copies if the fw update is missing (and skip the copy upon
+     * fw update)? */
+    s->prob.p = s->prob_ctx[c].p;
+
+    // txfm updates
+    if (s->s.h.lossless) {
+        s->s.h.txfmmode = TX_4X4;
+    } else {
+        s->s.h.txfmmode = vp89_rac_get_uint(&s->c, 2);
+        if (s->s.h.txfmmode == 3)
+            s->s.h.txfmmode += vp89_rac_get(&s->c);
+
+        if (s->s.h.txfmmode == TX_SWITCHABLE) {
+            for (i = 0; i < 2; i++)
+                if (vpx_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
+            for (i = 0; i < 2; i++)
+                for (j = 0; j < 2; j++)
+                    if (vpx_rac_get_prob_branchy(&s->c, 252))
+                        s->prob.p.tx16p[i][j] =
+                            update_prob(&s->c, s->prob.p.tx16p[i][j]);
+            for (i = 0; i < 2; i++)
+                for (j = 0; j < 3; j++)
+                    if (vpx_rac_get_prob_branchy(&s->c, 252))
+                        s->prob.p.tx32p[i][j] =
+                            update_prob(&s->c, s->prob.p.tx32p[i][j]);
+        }
+    }
+
+    // coef updates
+    for (i = 0; i < 4; i++) {
+        uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
+        if (vp89_rac_get(&s->c)) {
+            for (j = 0; j < 2; j++)
+                for (k = 0; k < 2; k++)
+                    for (l = 0; l < 6; l++)
+                        for (m = 0; m < 6; m++) {
+                            uint8_t *p = s->prob.coef[i][j][k][l][m];
+                            uint8_t *r = ref[j][k][l][m];
+                            if (m >= 3 && l == 0) // dc only has 3 pt
+                                break;
+                            for (n = 0; n < 3; n++) {
+                                if (vpx_rac_get_prob_branchy(&s->c, 252))
+                                    p[n] = update_prob(&s->c, r[n]);
+                                else
+                                    p[n] = r[n];
+                            }
+                            memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
+                        }
+        } else {
+            for (j = 0; j < 2; j++)
+                for (k = 0; k < 2; k++)
+                    for (l = 0; l < 6; l++)
+                        for (m = 0; m < 6; m++) {
+                            uint8_t *p = s->prob.coef[i][j][k][l][m];
+                            uint8_t *r = ref[j][k][l][m];
+                            if (m > 3 && l == 0) // dc only has 3 pt
+                                break;
+                            memcpy(p, r, 3);
+                            memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
+                        }
+        }
+        if (s->s.h.txfmmode == i)
+            break;
+    }
+
+    // mode updates
+    for (i = 0; i < 3; i++)
+        if (vpx_rac_get_prob_branchy(&s->c, 252))
+            s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
+    if (!s->s.h.keyframe && !s->s.h.intraonly) {
+        for (i = 0; i < 7; i++)
+            for (j = 0; j < 3; j++)
+                if (vpx_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.mv_mode[i][j] =
+                        update_prob(&s->c, s->prob.p.mv_mode[i][j]);
+
+        if (s->s.h.filtermode == FILTER_SWITCHABLE)
+            for (i = 0; i < 4; i++)
+                for (j = 0; j < 2; j++)
+                    if (vpx_rac_get_prob_branchy(&s->c, 252))
+                        s->prob.p.filter[i][j] =
+                            update_prob(&s->c, s->prob.p.filter[i][j]);
+
+        for (i = 0; i < 4; i++)
+            if (vpx_rac_get_prob_branchy(&s->c, 252))
+                s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
+
+        if (s->s.h.allowcompinter) {
+            s->s.h.comppredmode = vp89_rac_get(&s->c);
+            if (s->s.h.comppredmode)
+                s->s.h.comppredmode += vp89_rac_get(&s->c);
+            if (s->s.h.comppredmode == PRED_SWITCHABLE)
+                for (i = 0; i < 5; i++)
+                    if (vpx_rac_get_prob_branchy(&s->c, 252))
+                        s->prob.p.comp[i] =
+                            update_prob(&s->c, s->prob.p.comp[i]);
+        } else {
+            s->s.h.comppredmode = PRED_SINGLEREF;
+        }
+
+        if (s->s.h.comppredmode != PRED_COMPREF) {
+            for (i = 0; i < 5; i++) {
+                if (vpx_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.single_ref[i][0] =
+                        update_prob(&s->c, s->prob.p.single_ref[i][0]);
+                if (vpx_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.single_ref[i][1] =
+                        update_prob(&s->c, s->prob.p.single_ref[i][1]);
+            }
+        }
+
+        if (s->s.h.comppredmode != PRED_SINGLEREF) {
+            for (i = 0; i < 5; i++)
+                if (vpx_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.comp_ref[i] =
+                        update_prob(&s->c, s->prob.p.comp_ref[i]);
+        }
+
+        for (i = 0; i < 4; i++)
+            for (j = 0; j < 9; j++)
+                if (vpx_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.y_mode[i][j] =
+                        update_prob(&s->c, s->prob.p.y_mode[i][j]);
+
+        for (i = 0; i < 4; i++)
+            for (j = 0; j < 4; j++)
+                for (k = 0; k < 3; k++)
+                    if (vpx_rac_get_prob_branchy(&s->c, 252))
+                        s->prob.p.partition[3 - i][j][k] =
+                            update_prob(&s->c,
+                                        s->prob.p.partition[3 - i][j][k]);
+
+        // mv fields don't use the update_prob subexp model for some reason
+        for (i = 0; i < 3; i++)
+            if (vpx_rac_get_prob_branchy(&s->c, 252))
+                s->prob.p.mv_joint[i] = (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
+
+        for (i = 0; i < 2; i++) {
+            if (vpx_rac_get_prob_branchy(&s->c, 252))
+                s->prob.p.mv_comp[i].sign =
+                    (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
+
+            for (j = 0; j < 10; j++)
+                if (vpx_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.mv_comp[i].classes[j] =
+                        (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
+
+            if (vpx_rac_get_prob_branchy(&s->c, 252))
+                s->prob.p.mv_comp[i].class0 =
+                    (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
+
+            for (j = 0; j < 10; j++)
+                if (vpx_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.mv_comp[i].bits[j] =
+                        (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
+        }
+
+        for (i = 0; i < 2; i++) {
+            for (j = 0; j < 2; j++)
+                for (k = 0; k < 3; k++)
+                    if (vpx_rac_get_prob_branchy(&s->c, 252))
+                        s->prob.p.mv_comp[i].class0_fp[j][k] =
+                            (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
+
+            for (j = 0; j < 3; j++)
+                if (vpx_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.mv_comp[i].fp[j] =
+                        (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
+        }
+
+        if (s->s.h.highprecisionmvs) {
+            for (i = 0; i < 2; i++) {
+                if (vpx_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.mv_comp[i].class0_hp =
+                        (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
+
+                if (vpx_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.mv_comp[i].hp =
+                        (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
+            }
+        }
+    }
+
+    return (data2 - data) + size2;
+}
+
+static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
+                      ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
+{
+    const VP9Context *s = td->s;
+    int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
+            (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
+    const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
+                                                     s->prob.p.partition[bl][c];
+    enum BlockPartition bp;
+    ptrdiff_t hbs = 4 >> bl;
+    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
+    ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
+    int bytesperpixel = s->bytesperpixel;
+
+    if (bl == BL_8X8) {
+        bp = vp89_rac_get_tree(td->c, ff_vp9_partition_tree, p);
+        ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
+    } else if (col + hbs < s->cols) { // FIXME why not <=?
+        if (row + hbs < s->rows) { // FIXME why not <=?
+            bp = vp89_rac_get_tree(td->c, ff_vp9_partition_tree, p);
+            switch (bp) {
+            case PARTITION_NONE:
+                ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
+                break;
+            case PARTITION_H:
+                ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
+                yoff  += hbs * 8 * y_stride;
+                uvoff += hbs * 8 * uv_stride >> s->ss_v;
+                ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
+                break;
+            case PARTITION_V:
+                ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
+                yoff  += hbs * 8 * bytesperpixel;
+                uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
+                ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
+                break;
+            case PARTITION_SPLIT:
+                decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
+                decode_sb(td, row, col + hbs, lflvl,
+                          yoff + 8 * hbs * bytesperpixel,
+                          uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
+                yoff  += hbs * 8 * y_stride;
+                uvoff += hbs * 8 * uv_stride >> s->ss_v;
+                decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
+                decode_sb(td, row + hbs, col + hbs, lflvl,
+                          yoff + 8 * hbs * bytesperpixel,
+                          uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
+                break;
+            default:
+                av_assert0(0);
+            }
+        } else if (vpx_rac_get_prob_branchy(td->c, p[1])) {
+            bp = PARTITION_SPLIT;
+            decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
+            decode_sb(td, row, col + hbs, lflvl,
+                      yoff + 8 * hbs * bytesperpixel,
+                      uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
+        } else {
+            bp = PARTITION_H;
+            ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
+        }
+    } else if (row + hbs < s->rows) { // FIXME why not <=?
+        if (vpx_rac_get_prob_branchy(td->c, p[2])) {
+            bp = PARTITION_SPLIT;
+            decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
+            yoff  += hbs * 8 * y_stride;
+            uvoff += hbs * 8 * uv_stride >> s->ss_v;
+            decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
+        } else {
+            bp = PARTITION_V;
+            ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
+        }
+    } else {
+        bp = PARTITION_SPLIT;
+        decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
+    }
+    td->counts.partition[bl][c][bp]++;
+}
+
+static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
+                          ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
+{
+    const VP9Context *s = td->s;
+    VP9Block *b = td->b;
+    ptrdiff_t hbs = 4 >> bl;
+    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
+    ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
+    int bytesperpixel = s->bytesperpixel;
+
+    if (bl == BL_8X8) {
+        av_assert2(b->bl == BL_8X8);
+        ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
+    } else if (td->b->bl == bl) {
+        ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
+        if (b->bp == PARTITION_H && row + hbs < s->rows) {
+            yoff  += hbs * 8 * y_stride;
+            uvoff += hbs * 8 * uv_stride >> s->ss_v;
+            ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
+        } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
+            yoff  += hbs * 8 * bytesperpixel;
+            uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
+            ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
+        }
+    } else {
+        decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
+        if (col + hbs < s->cols) { // FIXME why not <=?
+            if (row + hbs < s->rows) {
+                decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
+                              uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
+                yoff  += hbs * 8 * y_stride;
+                uvoff += hbs * 8 * uv_stride >> s->ss_v;
+                decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
+                decode_sb_mem(td, row + hbs, col + hbs, lflvl,
+                              yoff + 8 * hbs * bytesperpixel,
+                              uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
+            } else {
+                yoff  += hbs * 8 * bytesperpixel;
+                uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
+                decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
+            }
+        } else if (row + hbs < s->rows) {
+            yoff  += hbs * 8 * y_stride;
+            uvoff += hbs * 8 * uv_stride >> s->ss_v;
+            decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
+        }
+    }
+}
+
+static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
+{
+    int sb_start = ( idx      * n) >> log2_n;
+    int sb_end   = ((idx + 1) * n) >> log2_n;
+    *start = FFMIN(sb_start, n) << 3;
+    *end   = FFMIN(sb_end,   n) << 3;
+}
+
+static void free_buffers(VP9Context *s)
+{
+    int i;
+
+    av_freep(&s->intra_pred_data[0]);
+    for (i = 0; i < s->active_tile_cols; i++)
+        vp9_tile_data_free(&s->td[i]);
+}
+
+static av_cold int vp9_decode_free(AVCodecContext *avctx)
+{
+    VP9Context *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < 3; i++) {
+        vp9_frame_unref(avctx, &s->s.frames[i]);
+        av_frame_free(&s->s.frames[i].tf.f);
+    }
+    av_buffer_pool_uninit(&s->frame_extradata_pool);
+    for (i = 0; i < 8; i++) {
+        ff_thread_release_ext_buffer(avctx, &s->s.refs[i]);
+        av_frame_free(&s->s.refs[i].f);
+        ff_thread_release_ext_buffer(avctx, &s->next_refs[i]);
+        av_frame_free(&s->next_refs[i].f);
+    }
+
+    free_buffers(s);
+#if HAVE_THREADS
+    av_freep(&s->entries);
+    ff_pthread_free(s, vp9_context_offsets);
+#endif
+    av_freep(&s->td);
+    return 0;
+}
+
+static int decode_tiles(AVCodecContext *avctx,
+                        const uint8_t *data, int size)
+{
+    VP9Context *s = avctx->priv_data;
+    VP9TileData *td = &s->td[0];
+    int row, col, tile_row, tile_col, ret;
+    int bytesperpixel;
+    int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
+    AVFrame *f;
+    ptrdiff_t yoff, uvoff, ls_y, ls_uv;
+
+    f = s->s.frames[CUR_FRAME].tf.f;
+    ls_y = f->linesize[0];
+    ls_uv =f->linesize[1];
+    bytesperpixel = s->bytesperpixel;
+
+    yoff = uvoff = 0;
+    for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
+        set_tile_offset(&tile_row_start, &tile_row_end,
+                        tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
+
+        for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
+            int64_t tile_size;
+
+            if (tile_col == s->s.h.tiling.tile_cols - 1 &&
+                tile_row == s->s.h.tiling.tile_rows - 1) {
+                tile_size = size;
+            } else {
+                tile_size = AV_RB32(data);
+                data += 4;
+                size -= 4;
+            }
+            if (tile_size > size)
+                return AVERROR_INVALIDDATA;
+            ret = ff_vpx_init_range_decoder(&td->c_b[tile_col], data, tile_size);
+            if (ret < 0)
+                return ret;
+            if (vpx_rac_get_prob_branchy(&td->c_b[tile_col], 128)) // marker bit
+                return AVERROR_INVALIDDATA;
+            data += tile_size;
+            size -= tile_size;
+        }
+
+        for (row = tile_row_start; row < tile_row_end;
+             row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
+            VP9Filter *lflvl_ptr = s->lflvl;
+            ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
+
+            for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
+                set_tile_offset(&tile_col_start, &tile_col_end,
+                                tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
+                td->tile_col_start = tile_col_start;
+                if (s->pass != 2) {
+                    memset(td->left_partition_ctx, 0, 8);
+                    memset(td->left_skip_ctx, 0, 8);
+                    if (s->s.h.keyframe || s->s.h.intraonly) {
+                        memset(td->left_mode_ctx, DC_PRED, 16);
+                    } else {
+                        memset(td->left_mode_ctx, NEARESTMV, 8);
+                    }
+                    memset(td->left_y_nnz_ctx, 0, 16);
+                    memset(td->left_uv_nnz_ctx, 0, 32);
+                    memset(td->left_segpred_ctx, 0, 8);
+
+                    td->c = &td->c_b[tile_col];
+                }
+
+                for (col = tile_col_start;
+                     col < tile_col_end;
+                     col += 8, yoff2 += 64 * bytesperpixel,
+                     uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
+                    // FIXME integrate with lf code (i.e. zero after each
+                    // use, similar to invtxfm coefficients, or similar)
+                    if (s->pass != 1) {
+                        memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
+                    }
+
+                    if (s->pass == 2) {
+                        decode_sb_mem(td, row, col, lflvl_ptr,
+                                      yoff2, uvoff2, BL_64X64);
+                    } else {
+                        if (vpx_rac_is_end(td->c)) {
+                            return AVERROR_INVALIDDATA;
+                        }
+                        decode_sb(td, row, col, lflvl_ptr,
+                                  yoff2, uvoff2, BL_64X64);
+                    }
+                }
+            }
+
+            if (s->pass == 1)
+                continue;
+
+            // backup pre-loopfilter reconstruction data for intra
+            // prediction of next row of sb64s
+            if (row + 8 < s->rows) {
+                memcpy(s->intra_pred_data[0],
+                       f->data[0] + yoff + 63 * ls_y,
+                       8 * s->cols * bytesperpixel);
+                memcpy(s->intra_pred_data[1],
+                       f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
+                       8 * s->cols * bytesperpixel >> s->ss_h);
+                memcpy(s->intra_pred_data[2],
+                       f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
+                       8 * s->cols * bytesperpixel >> s->ss_h);
+            }
+
+            // loopfilter one row
+            if (s->s.h.filter.level) {
+                yoff2 = yoff;
+                uvoff2 = uvoff;
+                lflvl_ptr = s->lflvl;
+                for (col = 0; col < s->cols;
+                     col += 8, yoff2 += 64 * bytesperpixel,
+                     uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
+                    ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
+                                         yoff2, uvoff2);
+                }
+            }
+
+            // FIXME maybe we can make this more finegrained by running the
+            // loopfilter per-block instead of after each sbrow
+            // In fact that would also make intra pred left preparation easier?
+            ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
+        }
+    }
+    return 0;
+}
+
+#if HAVE_THREADS
+static av_always_inline
+int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
+                              int threadnr)
+{
+    VP9Context *s = avctx->priv_data;
+    VP9TileData *td = &s->td[jobnr];
+    ptrdiff_t uvoff, yoff, ls_y, ls_uv;
+    int bytesperpixel = s->bytesperpixel, row, col, tile_row;
+    unsigned tile_cols_len;
+    int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
+    VP9Filter *lflvl_ptr_base;
+    AVFrame *f;
+
+    f = s->s.frames[CUR_FRAME].tf.f;
+    ls_y = f->linesize[0];
+    ls_uv =f->linesize[1];
+
+    set_tile_offset(&tile_col_start, &tile_col_end,
+                    jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
+    td->tile_col_start  = tile_col_start;
+    uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
+    yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
+    lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
+
+    for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
+        set_tile_offset(&tile_row_start, &tile_row_end,
+                        tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
+
+        td->c = &td->c_b[tile_row];
+        for (row = tile_row_start; row < tile_row_end;
+             row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
+            ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
+            VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
+
+            memset(td->left_partition_ctx, 0, 8);
+            memset(td->left_skip_ctx, 0, 8);
+            if (s->s.h.keyframe || s->s.h.intraonly) {
+                memset(td->left_mode_ctx, DC_PRED, 16);
+            } else {
+                memset(td->left_mode_ctx, NEARESTMV, 8);
+            }
+            memset(td->left_y_nnz_ctx, 0, 16);
+            memset(td->left_uv_nnz_ctx, 0, 32);
+            memset(td->left_segpred_ctx, 0, 8);
+
+            for (col = tile_col_start;
+                 col < tile_col_end;
+                 col += 8, yoff2 += 64 * bytesperpixel,
+                 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
+                // FIXME integrate with lf code (i.e. zero after each
+                // use, similar to invtxfm coefficients, or similar)
+                memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
+                decode_sb(td, row, col, lflvl_ptr,
+                            yoff2, uvoff2, BL_64X64);
+            }
+
+            // backup pre-loopfilter reconstruction data for intra
+            // prediction of next row of sb64s
+            tile_cols_len = tile_col_end - tile_col_start;
+            if (row + 8 < s->rows) {
+                memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
+                       f->data[0] + yoff + 63 * ls_y,
+                       8 * tile_cols_len * bytesperpixel);
+                memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
+                       f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
+                       8 * tile_cols_len * bytesperpixel >> s->ss_h);
+                memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
+                       f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
+                       8 * tile_cols_len * bytesperpixel >> s->ss_h);
+            }
+
+            vp9_report_tile_progress(s, row >> 3, 1);
+        }
+    }
+    return 0;
+}
+
+static av_always_inline
+int loopfilter_proc(AVCodecContext *avctx)
+{
+    VP9Context *s = avctx->priv_data;
+    ptrdiff_t uvoff, yoff, ls_y, ls_uv;
+    VP9Filter *lflvl_ptr;
+    int bytesperpixel = s->bytesperpixel, col, i;
+    AVFrame *f;
+
+    f = s->s.frames[CUR_FRAME].tf.f;
+    ls_y = f->linesize[0];
+    ls_uv =f->linesize[1];
+
+    for (i = 0; i < s->sb_rows; i++) {
+        vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
+
+        if (s->s.h.filter.level) {
+            yoff = (ls_y * 64)*i;
+            uvoff =  (ls_uv * 64 >> s->ss_v)*i;
+            lflvl_ptr = s->lflvl+s->sb_cols*i;
+            for (col = 0; col < s->cols;
+                 col += 8, yoff += 64 * bytesperpixel,
+                 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
+                ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
+                                     yoff, uvoff);
+            }
+        }
+    }
+    return 0;
+}
+#endif
+
+static int vp9_export_enc_params(VP9Context *s, VP9Frame *frame)
+{
+    AVVideoEncParams *par;
+    unsigned int tile, nb_blocks = 0;
+
+    if (s->s.h.segmentation.enabled) {
+        for (tile = 0; tile < s->active_tile_cols; tile++)
+            nb_blocks += s->td[tile].nb_block_structure;
+    }
+
+    par = av_video_enc_params_create_side_data(frame->tf.f,
+        AV_VIDEO_ENC_PARAMS_VP9, nb_blocks);
+    if (!par)
+        return AVERROR(ENOMEM);
+
+    par->qp             = s->s.h.yac_qi;
+    par->delta_qp[0][0] = s->s.h.ydc_qdelta;
+    par->delta_qp[1][0] = s->s.h.uvdc_qdelta;
+    par->delta_qp[2][0] = s->s.h.uvdc_qdelta;
+    par->delta_qp[1][1] = s->s.h.uvac_qdelta;
+    par->delta_qp[2][1] = s->s.h.uvac_qdelta;
+
+    if (nb_blocks) {
+        unsigned int block = 0;
+        unsigned int tile, block_tile;
+
+        for (tile = 0; tile < s->active_tile_cols; tile++) {
+            VP9TileData *td = &s->td[tile];
+
+            for (block_tile = 0; block_tile < td->nb_block_structure; block_tile++) {
+                AVVideoBlockParams *b = av_video_enc_params_block(par, block++);
+                unsigned int      row = td->block_structure[block_tile].row;
+                unsigned int      col = td->block_structure[block_tile].col;
+                uint8_t        seg_id = frame->segmentation_map[row * 8 * s->sb_cols + col];
+
+                b->src_x = col * 8;
+                b->src_y = row * 8;
+                b->w     = 1 << (3 + td->block_structure[block_tile].block_size_idx_x);
+                b->h     = 1 << (3 + td->block_structure[block_tile].block_size_idx_y);
+
+                if (s->s.h.segmentation.feat[seg_id].q_enabled) {
+                    b->delta_qp = s->s.h.segmentation.feat[seg_id].q_val;
+                    if (s->s.h.segmentation.absolute_vals)
+                        b->delta_qp -= par->qp;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
+                            int *got_frame, AVPacket *pkt)
+{
+    const uint8_t *data = pkt->data;
+    int size = pkt->size;
+    VP9Context *s = avctx->priv_data;
+    int ret, i, j, ref;
+    int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
+                            (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
+    AVFrame *f;
+
+    if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
+        return ret;
+    } else if (ret == 0) {
+        if (!s->s.refs[ref].f->buf[0]) {
+            av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
+            return AVERROR_INVALIDDATA;
+        }
+        if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
+            return ret;
+        frame->pts     = pkt->pts;
+        frame->pkt_dts = pkt->dts;
+        for (i = 0; i < 8; i++) {
+            if (s->next_refs[i].f->buf[0])
+                ff_thread_release_ext_buffer(avctx, &s->next_refs[i]);
+            if (s->s.refs[i].f->buf[0] &&
+                (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
+                return ret;
+        }
+        *got_frame = 1;
+        return pkt->size;
+    }
+    data += ret;
+    size -= ret;
+
+    if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
+        if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
+            vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
+        if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
+            (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
+            return ret;
+    }
+    if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
+        vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
+    if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
+        (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
+        return ret;
+    if (s->s.frames[CUR_FRAME].tf.f->buf[0])
+        vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
+    if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
+        return ret;
+    f = s->s.frames[CUR_FRAME].tf.f;
+    f->key_frame = s->s.h.keyframe;
+    f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+
+    if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
+        (s->s.frames[REF_FRAME_MVPAIR].tf.f->width  != s->s.frames[CUR_FRAME].tf.f->width ||
+         s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
+        vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
+    }
+
+    // ref frame setup
+    for (i = 0; i < 8; i++) {
+        if (s->next_refs[i].f->buf[0])
+            ff_thread_release_ext_buffer(avctx, &s->next_refs[i]);
+        if (s->s.h.refreshrefmask & (1 << i)) {
+            ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
+        } else if (s->s.refs[i].f->buf[0]) {
+            ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
+        }
+        if (ret < 0)
+            return ret;
+    }
+
+    if (avctx->hwaccel) {
+        ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
+        if (ret < 0)
+            return ret;
+        ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
+        if (ret < 0)
+            return ret;
+        ret = avctx->hwaccel->end_frame(avctx);
+        if (ret < 0)
+            return ret;
+        goto finish;
+    }
+
+    // main tile decode loop
+    memset(s->above_partition_ctx, 0, s->cols);
+    memset(s->above_skip_ctx, 0, s->cols);
+    if (s->s.h.keyframe || s->s.h.intraonly) {
+        memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
+    } else {
+        memset(s->above_mode_ctx, NEARESTMV, s->cols);
+    }
+    memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
+    memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
+    memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
+    memset(s->above_segpred_ctx, 0, s->cols);
+    s->pass = s->s.frames[CUR_FRAME].uses_2pass =
+        avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
+    if ((ret = update_block_buffers(avctx)) < 0) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to allocate block buffers\n");
+        return ret;
+    }
+    if (s->s.h.refreshctx && s->s.h.parallelmode) {
+        int j, k, l, m;
+
+        for (i = 0; i < 4; i++) {
+            for (j = 0; j < 2; j++)
+                for (k = 0; k < 2; k++)
+                    for (l = 0; l < 6; l++)
+                        for (m = 0; m < 6; m++)
+                            memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
+                                   s->prob.coef[i][j][k][l][m], 3);
+            if (s->s.h.txfmmode == i)
+                break;
+        }
+        s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
+        ff_thread_finish_setup(avctx);
+    } else if (!s->s.h.refreshctx) {
+        ff_thread_finish_setup(avctx);
+    }
+
+#if HAVE_THREADS
+    if (avctx->active_thread_type & FF_THREAD_SLICE) {
+        for (i = 0; i < s->sb_rows; i++)
+            atomic_store(&s->entries[i], 0);
+    }
+#endif
+
+    do {
+        for (i = 0; i < s->active_tile_cols; i++) {
+            s->td[i].b = s->td[i].b_base;
+            s->td[i].block = s->td[i].block_base;
+            s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
+            s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
+            s->td[i].eob = s->td[i].eob_base;
+            s->td[i].uveob[0] = s->td[i].uveob_base[0];
+            s->td[i].uveob[1] = s->td[i].uveob_base[1];
+            s->td[i].error_info = 0;
+        }
+
+#if HAVE_THREADS
+        if (avctx->active_thread_type == FF_THREAD_SLICE) {
+            int tile_row, tile_col;
+
+            av_assert1(!s->pass);
+
+            for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
+                for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
+                    int64_t tile_size;
+
+                    if (tile_col == s->s.h.tiling.tile_cols - 1 &&
+                        tile_row == s->s.h.tiling.tile_rows - 1) {
+                        tile_size = size;
+                    } else {
+                        tile_size = AV_RB32(data);
+                        data += 4;
+                        size -= 4;
+                    }
+                    if (tile_size > size)
+                        return AVERROR_INVALIDDATA;
+                    ret = ff_vpx_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
+                    if (ret < 0)
+                        return ret;
+                    if (vpx_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
+                        return AVERROR_INVALIDDATA;
+                    data += tile_size;
+                    size -= tile_size;
+                }
+            }
+
+            ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
+        } else
+#endif
+        {
+            ret = decode_tiles(avctx, data, size);
+            if (ret < 0) {
+                ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
+                return ret;
+            }
+        }
+
+        // Sum all counts fields into td[0].counts for tile threading
+        if (avctx->active_thread_type == FF_THREAD_SLICE)
+            for (i = 1; i < s->s.h.tiling.tile_cols; i++)
+                for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
+                    ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
+
+        if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
+            ff_vp9_adapt_probs(s);
+            ff_thread_finish_setup(avctx);
+        }
+    } while (s->pass++ == 1);
+    ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
+
+    if (s->td->error_info < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to decode tile data\n");
+        s->td->error_info = 0;
+        return AVERROR_INVALIDDATA;
+    }
+    if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
+        ret = vp9_export_enc_params(s, &s->s.frames[CUR_FRAME]);
+        if (ret < 0)
+            return ret;
+    }
+
+finish:
+    // ref frame setup
+    for (i = 0; i < 8; i++) {
+        if (s->s.refs[i].f->buf[0])
+            ff_thread_release_ext_buffer(avctx, &s->s.refs[i]);
+        if (s->next_refs[i].f->buf[0] &&
+            (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
+            return ret;
+    }
+
+    if (!s->s.h.invisible) {
+        if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
+            return ret;
+        *got_frame = 1;
+    }
+
+    return pkt->size;
+}
+
+static void vp9_decode_flush(AVCodecContext *avctx)
+{
+    VP9Context *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < 3; i++)
+        vp9_frame_unref(avctx, &s->s.frames[i]);
+    for (i = 0; i < 8; i++)
+        ff_thread_release_ext_buffer(avctx, &s->s.refs[i]);
+}
+
+static av_cold int vp9_decode_init(AVCodecContext *avctx)
+{
+    VP9Context *s = avctx->priv_data;
+    int ret;
+
+    s->last_bpp = 0;
+    s->s.h.filter.sharpness = -1;
+
+#if HAVE_THREADS
+    if (avctx->active_thread_type & FF_THREAD_SLICE) {
+        ret = ff_pthread_init(s, vp9_context_offsets);
+        if (ret < 0)
+            return ret;
+    }
+#endif
+
+    for (int i = 0; i < 3; i++) {
+        s->s.frames[i].tf.f = av_frame_alloc();
+        if (!s->s.frames[i].tf.f)
+            return AVERROR(ENOMEM);
+    }
+    for (int i = 0; i < 8; i++) {
+        s->s.refs[i].f      = av_frame_alloc();
+        s->next_refs[i].f   = av_frame_alloc();
+        if (!s->s.refs[i].f || !s->next_refs[i].f)
+            return AVERROR(ENOMEM);
+    }
+    return 0;
+}
+
+#if HAVE_THREADS
+static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
+{
+    int i, ret;
+    VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
+
+    for (i = 0; i < 3; i++) {
+        if (s->s.frames[i].tf.f->buf[0])
+            vp9_frame_unref(dst, &s->s.frames[i]);
+        if (ssrc->s.frames[i].tf.f->buf[0]) {
+            if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
+                return ret;
+        }
+    }
+    for (i = 0; i < 8; i++) {
+        if (s->s.refs[i].f->buf[0])
+            ff_thread_release_ext_buffer(dst, &s->s.refs[i]);
+        if (ssrc->next_refs[i].f->buf[0]) {
+            if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
+                return ret;
+        }
+    }
+
+    s->s.h.invisible = ssrc->s.h.invisible;
+    s->s.h.keyframe = ssrc->s.h.keyframe;
+    s->s.h.intraonly = ssrc->s.h.intraonly;
+    s->ss_v = ssrc->ss_v;
+    s->ss_h = ssrc->ss_h;
+    s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
+    s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
+    s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
+    s->bytesperpixel = ssrc->bytesperpixel;
+    s->gf_fmt = ssrc->gf_fmt;
+    s->w = ssrc->w;
+    s->h = ssrc->h;
+    s->s.h.bpp = ssrc->s.h.bpp;
+    s->bpp_index = ssrc->bpp_index;
+    s->pix_fmt = ssrc->pix_fmt;
+    memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
+    memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
+    memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
+           sizeof(s->s.h.segmentation.feat));
+
+    return 0;
+}
+#endif
+
+const FFCodec ff_vp9_decoder = {
+    .p.name                = "vp9",
+    CODEC_LONG_NAME("Google VP9"),
+    .p.type                = AVMEDIA_TYPE_VIDEO,
+    .p.id                  = AV_CODEC_ID_VP9,
+    .priv_data_size        = sizeof(VP9Context),
+    .init                  = vp9_decode_init,
+    .close                 = vp9_decode_free,
+    FF_CODEC_DECODE_CB(vp9_decode_frame),
+    .p.capabilities        = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
+    .caps_internal         = FF_CODEC_CAP_INIT_CLEANUP |
+                             FF_CODEC_CAP_SLICE_THREAD_HAS_MF |
+                             FF_CODEC_CAP_ALLOCATE_PROGRESS,
+    .flush                 = vp9_decode_flush,
+    UPDATE_THREAD_CONTEXT(vp9_decode_update_thread_context),
+    .p.profiles            = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
+    .bsfs                  = "vp9_superframe_split",
+    .hw_configs            = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_VP9_DXVA2_HWACCEL
+                               HWACCEL_DXVA2(vp9),
+#endif
+#if CONFIG_VP9_D3D11VA_HWACCEL
+                               HWACCEL_D3D11VA(vp9),
+#endif
+#if CONFIG_VP9_D3D11VA2_HWACCEL
+                               HWACCEL_D3D11VA2(vp9),
+#endif
+#if CONFIG_VP9_NVDEC_HWACCEL
+                               HWACCEL_NVDEC(vp9),
+#endif
+#if CONFIG_VP9_VAAPI_HWACCEL
+                               HWACCEL_VAAPI(vp9),
+#endif
+#if CONFIG_VP9_VDPAU_HWACCEL
+                               HWACCEL_VDPAU(vp9),
+#endif
+#if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
+                               HWACCEL_VIDEOTOOLBOX(vp9),
+#endif
+                               NULL
+                           },
+};
diff --git a/media/ffvpx/libavcodec/vp9.h b/media/ffvpx/libavcodec/vp9.h
new file mode 100644
index 0000000000..c8d07ad986
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9.h
@@ -0,0 +1,73 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP9_H
+#define AVCODEC_VP9_H
+
+enum TxfmMode {
+    TX_4X4,
+    TX_8X8,
+    TX_16X16,
+    TX_32X32,
+    N_TXFM_SIZES,
+    TX_SWITCHABLE = N_TXFM_SIZES,
+    N_TXFM_MODES
+};
+
+enum TxfmType {
+    DCT_DCT,
+    DCT_ADST,
+    ADST_DCT,
+    ADST_ADST,
+    N_TXFM_TYPES
+};
+
+enum IntraPredMode {
+    VERT_PRED,
+    HOR_PRED,
+    DC_PRED,
+    DIAG_DOWN_LEFT_PRED,
+    DIAG_DOWN_RIGHT_PRED,
+    VERT_RIGHT_PRED,
+    HOR_DOWN_PRED,
+    VERT_LEFT_PRED,
+    HOR_UP_PRED,
+    TM_VP8_PRED,
+    LEFT_DC_PRED,
+    TOP_DC_PRED,
+    DC_128_PRED,
+    DC_127_PRED,
+    DC_129_PRED,
+    N_INTRA_PRED_MODES
+};
+
+enum FilterMode {
+    FILTER_8TAP_SMOOTH,
+    FILTER_8TAP_REGULAR,
+    FILTER_8TAP_SHARP,
+    FILTER_BILINEAR,
+    N_FILTERS,
+    FILTER_SWITCHABLE = N_FILTERS,
+};
+
+#endif /* AVCODEC_VP9_H */
diff --git a/media/ffvpx/libavcodec/vp9_mc_template.c b/media/ffvpx/libavcodec/vp9_mc_template.c
new file mode 100644
index 0000000000..e654c0e5ed
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9_mc_template.c
@@ -0,0 +1,439 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define ROUNDED_DIV_MVx2(a, b) \
+    (VP9mv) { .x = ROUNDED_DIV(a.x + b.x, 2), .y = ROUNDED_DIV(a.y + b.y, 2) }
+#define ROUNDED_DIV_MVx4(a, b, c, d) \
+    (VP9mv) { .x = ROUNDED_DIV(a.x + b.x + c.x + d.x, 4), \
+               .y = ROUNDED_DIV(a.y + b.y + c.y + d.y, 4) }
+
+static void FN(inter_pred)(VP9TileData *td)
+{
+    static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
+        { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
+        { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
+    };
+    const VP9Context *s = td->s;
+    VP9Block *b = td->b;
+    int row = td->row, col = td->col;
+    const ThreadFrame *tref1 = &s->s.refs[s->s.h.refidx[b->ref[0]]], *tref2;
+    const AVFrame *ref1 = tref1->f, *ref2;
+    int w1 = ref1->width, h1 = ref1->height, w2, h2;
+    ptrdiff_t ls_y = td->y_stride, ls_uv = td->uv_stride;
+    int bytesperpixel = BYTES_PER_PIXEL;
+
+    if (b->comp) {
+        tref2 = &s->s.refs[s->s.h.refidx[b->ref[1]]];
+        ref2 = tref2->f;
+        w2 = ref2->width;
+        h2 = ref2->height;
+    }
+
+    // y inter pred
+    if (b->bs > BS_8x8) {
+        VP9mv uvmv;
+
+#if SCALED == 0
+        if (b->bs == BS_8x4) {
+            mc_luma_dir(td, mc[3][b->filter][0], td->dst[0], ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, col << 3, &b->mv[0][0],,,,, 8, 4, w1, h1, 0);
+            mc_luma_dir(td, mc[3][b->filter][0],
+                        td->dst[0] + 4 * ls_y, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        (row << 3) + 4, col << 3, &b->mv[2][0],,,,, 8, 4, w1, h1, 0);
+            w1 = (w1 + s->ss_h) >> s->ss_h;
+            if (s->ss_v) {
+                h1 = (h1 + 1) >> 1;
+                uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
+                mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][0],
+                              td->dst[1], td->dst[2], ls_uv,
+                              ref1->data[1], ref1->linesize[1],
+                              ref1->data[2], ref1->linesize[2], tref1,
+                              row << 2, col << (3 - s->ss_h),
+                              &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
+            } else {
+                mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][0],
+                              td->dst[1], td->dst[2], ls_uv,
+                              ref1->data[1], ref1->linesize[1],
+                              ref1->data[2], ref1->linesize[2], tref1,
+                              row << 3, col << (3 - s->ss_h),
+                              &b->mv[0][0],,,,, 8 >> s->ss_h, 4, w1, h1, 0);
+                // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
+                // to get the motion vector for the bottom 4x4 block
+                // https://code.google.com/p/webm/issues/detail?id=993
+                if (s->ss_h == 0) {
+                    uvmv = b->mv[2][0];
+                } else {
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
+                }
+                mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][0],
+                              td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
+                              ref1->data[1], ref1->linesize[1],
+                              ref1->data[2], ref1->linesize[2], tref1,
+                              (row << 3) + 4, col << (3 - s->ss_h),
+                              &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
+            }
+
+            if (b->comp) {
+                mc_luma_dir(td, mc[3][b->filter][1], td->dst[0], ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, col << 3, &b->mv[0][1],,,,, 8, 4, w2, h2, 1);
+                mc_luma_dir(td, mc[3][b->filter][1],
+                            td->dst[0] + 4 * ls_y, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            (row << 3) + 4, col << 3, &b->mv[2][1],,,,, 8, 4, w2, h2, 1);
+                w2 = (w2 + s->ss_h) >> s->ss_h;
+                if (s->ss_v) {
+                    h2 = (h2 + 1) >> 1;
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
+                    mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][1],
+                                  td->dst[1], td->dst[2], ls_uv,
+                                  ref2->data[1], ref2->linesize[1],
+                                  ref2->data[2], ref2->linesize[2], tref2,
+                                  row << 2, col << (3 - s->ss_h),
+                                  &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
+                } else {
+                    mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][1],
+                                  td->dst[1], td->dst[2], ls_uv,
+                                  ref2->data[1], ref2->linesize[1],
+                                  ref2->data[2], ref2->linesize[2], tref2,
+                                  row << 3, col << (3 - s->ss_h),
+                                  &b->mv[0][1],,,,, 8 >> s->ss_h, 4, w2, h2, 1);
+                    // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
+                    // to get the motion vector for the bottom 4x4 block
+                    // https://code.google.com/p/webm/issues/detail?id=993
+                    if (s->ss_h == 0) {
+                        uvmv = b->mv[2][1];
+                    } else {
+                        uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
+                    }
+                    mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][1],
+                                  td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
+                                  ref2->data[1], ref2->linesize[1],
+                                  ref2->data[2], ref2->linesize[2], tref2,
+                                  (row << 3) + 4, col << (3 - s->ss_h),
+                                  &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
+                }
+            }
+        } else if (b->bs == BS_4x8) {
+            mc_luma_dir(td, mc[4][b->filter][0], td->dst[0], ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, col << 3, &b->mv[0][0],,,,, 4, 8, w1, h1, 0);
+            mc_luma_dir(td, mc[4][b->filter][0], td->dst[0] + 4 * bytesperpixel, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, (col << 3) + 4, &b->mv[1][0],,,,, 4, 8, w1, h1, 0);
+            h1 = (h1 + s->ss_v) >> s->ss_v;
+            if (s->ss_h) {
+                w1 = (w1 + 1) >> 1;
+                uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
+                mc_chroma_dir(td, mc[4][b->filter][0],
+                              td->dst[1], td->dst[2], ls_uv,
+                              ref1->data[1], ref1->linesize[1],
+                              ref1->data[2], ref1->linesize[2], tref1,
+                              row << (3 - s->ss_v), col << 2,
+                              &uvmv,,,,, 4, 8 >> s->ss_v, w1, h1, 0);
+            } else {
+                mc_chroma_dir(td, mc[4][b->filter][0],
+                              td->dst[1], td->dst[2], ls_uv,
+                              ref1->data[1], ref1->linesize[1],
+                              ref1->data[2], ref1->linesize[2], tref1,
+                              row << (3 - s->ss_v), col << 3,
+                              &b->mv[0][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
+                mc_chroma_dir(td, mc[4][b->filter][0],
+                              td->dst[1] + 4 * bytesperpixel,
+                              td->dst[2] + 4 * bytesperpixel, ls_uv,
+                              ref1->data[1], ref1->linesize[1],
+                              ref1->data[2], ref1->linesize[2], tref1,
+                              row << (3 - s->ss_v), (col << 3) + 4,
+                              &b->mv[1][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
+            }
+
+            if (b->comp) {
+                mc_luma_dir(td, mc[4][b->filter][1], td->dst[0], ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, col << 3, &b->mv[0][1],,,,, 4, 8, w2, h2, 1);
+                mc_luma_dir(td, mc[4][b->filter][1], td->dst[0] + 4 * bytesperpixel, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, (col << 3) + 4, &b->mv[1][1],,,,, 4, 8, w2, h2, 1);
+                h2 = (h2 + s->ss_v) >> s->ss_v;
+                if (s->ss_h) {
+                    w2 = (w2 + 1) >> 1;
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
+                    mc_chroma_dir(td, mc[4][b->filter][1],
+                                  td->dst[1], td->dst[2], ls_uv,
+                                  ref2->data[1], ref2->linesize[1],
+                                  ref2->data[2], ref2->linesize[2], tref2,
+                                  row << (3 - s->ss_v), col << 2,
+                                  &uvmv,,,,, 4, 8 >> s->ss_v, w2, h2, 1);
+                } else {
+                    mc_chroma_dir(td, mc[4][b->filter][1],
+                                  td->dst[1], td->dst[2], ls_uv,
+                                  ref2->data[1], ref2->linesize[1],
+                                  ref2->data[2], ref2->linesize[2], tref2,
+                                  row << (3 - s->ss_v), col << 3,
+                                  &b->mv[0][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
+                    mc_chroma_dir(td, mc[4][b->filter][1],
+                                  td->dst[1] + 4 * bytesperpixel,
+                                  td->dst[2] + 4 * bytesperpixel, ls_uv,
+                                  ref2->data[1], ref2->linesize[1],
+                                  ref2->data[2], ref2->linesize[2], tref2,
+                                  row << (3 - s->ss_v), (col << 3) + 4,
+                                  &b->mv[1][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
+                }
+            }
+        } else
+#endif
+        {
+#if SCALED == 0
+            av_assert2(b->bs == BS_4x4);
+#endif
+
+            // FIXME if two horizontally adjacent blocks have the same MV,
+            // do a w8 instead of a w4 call
+            mc_luma_dir(td, mc[4][b->filter][0], td->dst[0], ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, col << 3, &b->mv[0][0],
+                        0, 0, 8, 8, 4, 4, w1, h1, 0);
+            mc_luma_dir(td, mc[4][b->filter][0], td->dst[0] + 4 * bytesperpixel, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, (col << 3) + 4, &b->mv[1][0],
+                        4, 0, 8, 8, 4, 4, w1, h1, 0);
+            mc_luma_dir(td, mc[4][b->filter][0],
+                        td->dst[0] + 4 * ls_y, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        (row << 3) + 4, col << 3, &b->mv[2][0],
+                        0, 4, 8, 8, 4, 4, w1, h1, 0);
+            mc_luma_dir(td, mc[4][b->filter][0],
+                        td->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        (row << 3) + 4, (col << 3) + 4, &b->mv[3][0],
+                        4, 4, 8, 8, 4, 4, w1, h1, 0);
+            if (s->ss_v) {
+                h1 = (h1 + 1) >> 1;
+                if (s->ss_h) {
+                    w1 = (w1 + 1) >> 1;
+                    uvmv = ROUNDED_DIV_MVx4(b->mv[0][0], b->mv[1][0],
+                                            b->mv[2][0], b->mv[3][0]);
+                    mc_chroma_dir(td, mc[4][b->filter][0],
+                                  td->dst[1], td->dst[2], ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  row << 2, col << 2,
+                                  &uvmv, 0, 0, 4, 4, 4, 4, w1, h1, 0);
+                } else {
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
+                    mc_chroma_dir(td, mc[4][b->filter][0],
+                                  td->dst[1], td->dst[2], ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  row << 2, col << 3,
+                                  &uvmv, 0, 0, 8, 4, 4, 4, w1, h1, 0);
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]);
+                    mc_chroma_dir(td, mc[4][b->filter][0],
+                                  td->dst[1] + 4 * bytesperpixel,
+                                  td->dst[2] + 4 * bytesperpixel, ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  row << 2, (col << 3) + 4,
+                                  &uvmv, 4, 0, 8, 4, 4, 4, w1, h1, 0);
+                }
+            } else {
+                if (s->ss_h) {
+                    w1 = (w1 + 1) >> 1;
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
+                    mc_chroma_dir(td, mc[4][b->filter][0],
+                                  td->dst[1], td->dst[2], ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  row << 3, col << 2,
+                                  &uvmv, 0, 0, 4, 8, 4, 4, w1, h1, 0);
+                    // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
+                    // bottom block
+                    // https://code.google.com/p/webm/issues/detail?id=993
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[2][0]);
+                    mc_chroma_dir(td, mc[4][b->filter][0],
+                                  td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  (row << 3) + 4, col << 2,
+                                  &uvmv, 0, 4, 4, 8, 4, 4, w1, h1, 0);
+                } else {
+                    mc_chroma_dir(td, mc[4][b->filter][0],
+                                  td->dst[1], td->dst[2], ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  row << 3, col << 3,
+                                  &b->mv[0][0], 0, 0, 8, 8, 4, 4, w1, h1, 0);
+                    mc_chroma_dir(td, mc[4][b->filter][0],
+                                  td->dst[1] + 4 * bytesperpixel,
+                                  td->dst[2] + 4 * bytesperpixel, ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  row << 3, (col << 3) + 4,
+                                  &b->mv[1][0], 4, 0, 8, 8, 4, 4, w1, h1, 0);
+                    mc_chroma_dir(td, mc[4][b->filter][0],
+                                  td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  (row << 3) + 4, col << 3,
+                                  &b->mv[2][0], 0, 4, 8, 8, 4, 4, w1, h1, 0);
+                    mc_chroma_dir(td, mc[4][b->filter][0],
+                                  td->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
+                                  td->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  (row << 3) + 4, (col << 3) + 4,
+                                  &b->mv[3][0], 4, 4, 8, 8, 4, 4, w1, h1, 0);
+                }
+            }
+
+            if (b->comp) {
+                mc_luma_dir(td, mc[4][b->filter][1], td->dst[0], ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, col << 3, &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
+                mc_luma_dir(td, mc[4][b->filter][1], td->dst[0] + 4 * bytesperpixel, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, (col << 3) + 4, &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
+                mc_luma_dir(td, mc[4][b->filter][1],
+                            td->dst[0] + 4 * ls_y, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            (row << 3) + 4, col << 3, &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
+                mc_luma_dir(td, mc[4][b->filter][1],
+                            td->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
+                if (s->ss_v) {
+                    h2 = (h2 + 1) >> 1;
+                    if (s->ss_h) {
+                        w2 = (w2 + 1) >> 1;
+                        uvmv = ROUNDED_DIV_MVx4(b->mv[0][1], b->mv[1][1],
+                                                b->mv[2][1], b->mv[3][1]);
+                        mc_chroma_dir(td, mc[4][b->filter][1],
+                                      td->dst[1], td->dst[2], ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      row << 2, col << 2,
+                                      &uvmv, 0, 0, 4, 4, 4, 4, w2, h2, 1);
+                    } else {
+                        uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
+                        mc_chroma_dir(td, mc[4][b->filter][1],
+                                      td->dst[1], td->dst[2], ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      row << 2, col << 3,
+                                      &uvmv, 0, 0, 8, 4, 4, 4, w2, h2, 1);
+                        uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]);
+                        mc_chroma_dir(td, mc[4][b->filter][1],
+                                      td->dst[1] + 4 * bytesperpixel,
+                                      td->dst[2] + 4 * bytesperpixel, ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      row << 2, (col << 3) + 4,
+                                      &uvmv, 4, 0, 8, 4, 4, 4, w2, h2, 1);
+                    }
+                } else {
+                    if (s->ss_h) {
+                        w2 = (w2 + 1) >> 1;
+                        uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
+                        mc_chroma_dir(td, mc[4][b->filter][1],
+                                      td->dst[1], td->dst[2], ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      row << 3, col << 2,
+                                      &uvmv, 0, 0, 4, 8, 4, 4, w2, h2, 1);
+                        // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
+                        // bottom block
+                        // https://code.google.com/p/webm/issues/detail?id=993
+                        uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[2][1]);
+                        mc_chroma_dir(td, mc[4][b->filter][1],
+                                      td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      (row << 3) + 4, col << 2,
+                                      &uvmv, 0, 4, 4, 8, 4, 4, w2, h2, 1);
+                    } else {
+                        mc_chroma_dir(td, mc[4][b->filter][1],
+                                      td->dst[1], td->dst[2], ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      row << 3, col << 3,
+                                      &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
+                        mc_chroma_dir(td, mc[4][b->filter][1],
+                                      td->dst[1] + 4 * bytesperpixel,
+                                      td->dst[2] + 4 * bytesperpixel, ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      row << 3, (col << 3) + 4,
+                                      &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
+                        mc_chroma_dir(td, mc[4][b->filter][1],
+                                      td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      (row << 3) + 4, col << 3,
+                                      &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
+                        mc_chroma_dir(td, mc[4][b->filter][1],
+                                      td->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
+                                      td->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      (row << 3) + 4, (col << 3) + 4,
+                                      &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
+                    }
+                }
+            }
+        }
+    } else {
+        int bwl = bwlog_tab[0][b->bs];
+        int bw = ff_vp9_bwh_tab[0][b->bs][0] * 4;
+        int bh = ff_vp9_bwh_tab[0][b->bs][1] * 4;
+        int uvbw = ff_vp9_bwh_tab[s->ss_h][b->bs][0] * 4;
+        int uvbh = ff_vp9_bwh_tab[s->ss_v][b->bs][1] * 4;
+
+        mc_luma_dir(td, mc[bwl][b->filter][0], td->dst[0], ls_y,
+                    ref1->data[0], ref1->linesize[0], tref1,
+                    row << 3, col << 3, &b->mv[0][0], 0, 0, bw, bh, bw, bh, w1, h1, 0);
+        w1 = (w1 + s->ss_h) >> s->ss_h;
+        h1 = (h1 + s->ss_v) >> s->ss_v;
+        mc_chroma_dir(td, mc[bwl + s->ss_h][b->filter][0],
+                      td->dst[1], td->dst[2], ls_uv,
+                      ref1->data[1], ref1->linesize[1],
+                      ref1->data[2], ref1->linesize[2], tref1,
+                      row << (3 - s->ss_v), col << (3 - s->ss_h),
+                      &b->mv[0][0], 0, 0, uvbw, uvbh, uvbw, uvbh, w1, h1, 0);
+
+        if (b->comp) {
+            mc_luma_dir(td, mc[bwl][b->filter][1], td->dst[0], ls_y,
+                        ref2->data[0], ref2->linesize[0], tref2,
+                        row << 3, col << 3, &b->mv[0][1], 0, 0, bw, bh, bw, bh, w2, h2, 1);
+            w2 = (w2 + s->ss_h) >> s->ss_h;
+            h2 = (h2 + s->ss_v) >> s->ss_v;
+            mc_chroma_dir(td, mc[bwl + s->ss_h][b->filter][1],
+                          td->dst[1], td->dst[2], ls_uv,
+                          ref2->data[1], ref2->linesize[1],
+                          ref2->data[2], ref2->linesize[2], tref2,
+                          row << (3 - s->ss_v), col << (3 - s->ss_h),
+                          &b->mv[0][1], 0, 0, uvbw, uvbh, uvbw, uvbh, w2, h2, 1);
+        }
+    }
+}
diff --git a/media/ffvpx/libavcodec/vp9_parser.c b/media/ffvpx/libavcodec/vp9_parser.c
new file mode 100644
index 0000000000..ffcb93505f
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9_parser.c
@@ -0,0 +1,70 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "libavcodec/get_bits.h"
+#include "parser.h"
+
+static int parse(AVCodecParserContext *ctx,
+                 AVCodecContext *avctx,
+                 const uint8_t **out_data, int *out_size,
+                 const uint8_t *data, int size)
+{
+    GetBitContext gb;
+    int res, profile, keyframe;
+
+    *out_data = data;
+    *out_size = size;
+
+    if (!size || (res = init_get_bits8(&gb, data, size)) < 0)
+        return size; // parsers can't return errors
+    get_bits(&gb, 2); // frame marker
+    profile  = get_bits1(&gb);
+    profile |= get_bits1(&gb) << 1;
+    if (profile == 3) profile += get_bits1(&gb);
+    if (profile > 3)
+        return size;
+
+    avctx->profile = profile;
+
+    if (get_bits1(&gb)) {
+        keyframe = 0;
+    } else {
+        keyframe  = !get_bits1(&gb);
+    }
+
+    if (!keyframe) {
+        ctx->pict_type = AV_PICTURE_TYPE_P;
+        ctx->key_frame = 0;
+    } else {
+        ctx->pict_type = AV_PICTURE_TYPE_I;
+        ctx->key_frame = 1;
+    }
+
+    return size;
+}
+
+const AVCodecParser ff_vp9_parser = {
+    .codec_ids      = { AV_CODEC_ID_VP9 },
+    .parser_parse   = parse,
+};
diff --git a/media/ffvpx/libavcodec/vp9_superframe_split_bsf.c b/media/ffvpx/libavcodec/vp9_superframe_split_bsf.c
new file mode 100644
index 0000000000..cddd48119c
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9_superframe_split_bsf.c
@@ -0,0 +1,170 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * This bitstream filter splits VP9 superframes into packets containing
+ * just one frame.
+ */
+
+#include <stddef.h>
+
+#include "bsf.h"
+#include "bsf_internal.h"
+#include "bytestream.h"
+#include "get_bits.h"
+
+typedef struct VP9SFSplitContext {
+    AVPacket *buffer_pkt;
+
+    int nb_frames;
+    int next_frame;
+    size_t next_frame_offset;
+    int sizes[8];
+} VP9SFSplitContext;
+
+static int vp9_superframe_split_filter(AVBSFContext *ctx, AVPacket *out)
+{
+    VP9SFSplitContext *s = ctx->priv_data;
+    AVPacket *in;
+    int i, j, ret, marker;
+    int is_superframe = !!s->buffer_pkt->data;
+
+    if (!s->buffer_pkt->data) {
+        ret = ff_bsf_get_packet_ref(ctx, s->buffer_pkt);
+        if (ret < 0)
+            return ret;
+        in = s->buffer_pkt;
+
+        if (!in->size)
+            goto passthrough;
+
+        marker = in->data[in->size - 1];
+        if ((marker & 0xe0) == 0xc0) {
+            int length_size = 1 + ((marker >> 3) & 0x3);
+            int   nb_frames = 1 + (marker & 0x7);
+            int    idx_size = 2 + nb_frames * length_size;
+
+            if (in->size >= idx_size && in->data[in->size - idx_size] == marker) {
+                GetByteContext bc;
+                int64_t total_size = 0;
+
+                bytestream2_init(&bc, in->data + in->size + 1 - idx_size,
+                                 nb_frames * length_size);
+
+                for (i = 0; i < nb_frames; i++) {
+                    int frame_size = 0;
+                    for (j = 0; j < length_size; j++)
+                        frame_size |= bytestream2_get_byte(&bc) << (j * 8);
+
+                    total_size += frame_size;
+                    if (frame_size <= 0 || total_size > in->size - idx_size) {
+                        av_log(ctx, AV_LOG_ERROR,
+                               "Invalid frame size in a superframe: %d\n", frame_size);
+                        ret = AVERROR(EINVAL);
+                        goto fail;
+                    }
+                    s->sizes[i] = frame_size;
+                }
+                s->nb_frames         = nb_frames;
+                s->next_frame        = 0;
+                s->next_frame_offset = 0;
+                is_superframe        = 1;
+            }
+        }
+    }
+
+    if (is_superframe) {
+        GetBitContext gb;
+        int profile, invisible = 0;
+
+        ret = av_packet_ref(out, s->buffer_pkt);
+        if (ret < 0)
+            goto fail;
+
+        out->data += s->next_frame_offset;
+        out->size  = s->sizes[s->next_frame];
+
+        s->next_frame_offset += out->size;
+        s->next_frame++;
+
+        if (s->next_frame >= s->nb_frames)
+            av_packet_unref(s->buffer_pkt);
+
+        ret = init_get_bits8(&gb, out->data, out->size);
+        if (ret < 0)
+            goto fail;
+
+        get_bits(&gb, 2); // frame_marker
+        profile  = get_bits1(&gb);
+        profile |= get_bits1(&gb) << 1;
+        if (profile == 3)
+            get_bits1(&gb);
+        if (!get_bits1(&gb)) {
+            get_bits1(&gb);
+            invisible = !get_bits1(&gb);
+        }
+
+        if (invisible)
+            out->pts = AV_NOPTS_VALUE;
+
+    } else {
+passthrough:
+        av_packet_move_ref(out, s->buffer_pkt);
+    }
+
+    return 0;
+fail:
+    if (ret < 0)
+        av_packet_unref(out);
+    av_packet_unref(s->buffer_pkt);
+    return ret;
+}
+
+static int vp9_superframe_split_init(AVBSFContext *ctx)
+{
+    VP9SFSplitContext *s = ctx->priv_data;
+
+    s->buffer_pkt = av_packet_alloc();
+    if (!s->buffer_pkt)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void vp9_superframe_split_flush(AVBSFContext *ctx)
+{
+    VP9SFSplitContext *s = ctx->priv_data;
+    av_packet_unref(s->buffer_pkt);
+}
+
+static void vp9_superframe_split_uninit(AVBSFContext *ctx)
+{
+    VP9SFSplitContext *s = ctx->priv_data;
+    av_packet_free(&s->buffer_pkt);
+}
+
+const FFBitStreamFilter ff_vp9_superframe_split_bsf = {
+    .p.name         = "vp9_superframe_split",
+    .p.codec_ids    = (const enum AVCodecID []){ AV_CODEC_ID_VP9, AV_CODEC_ID_NONE },
+    .priv_data_size = sizeof(VP9SFSplitContext),
+    .init           = vp9_superframe_split_init,
+    .flush          = vp9_superframe_split_flush,
+    .close          = vp9_superframe_split_uninit,
+    .filter         = vp9_superframe_split_filter,
+};
diff --git a/media/ffvpx/libavcodec/vp9block.c b/media/ffvpx/libavcodec/vp9block.c
new file mode 100644
index 0000000000..5743f048cc
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9block.c
@@ -0,0 +1,1457 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+
+#include "threadframe.h"
+#include "vp89_rac.h"
+#include "vp9.h"
+#include "vp9data.h"
+#include "vp9dec.h"
+#include "vpx_rac.h"
+
+static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
+                                       ptrdiff_t stride, int v)
+{
+    switch (w) {
+    case 1:
+        do {
+            *ptr = v;
+            ptr += stride;
+        } while (--h);
+        break;
+    case 2: {
+        int v16 = v * 0x0101;
+        do {
+            AV_WN16A(ptr, v16);
+            ptr += stride;
+        } while (--h);
+        break;
+    }
+    case 4: {
+        uint32_t v32 = v * 0x01010101;
+        do {
+            AV_WN32A(ptr, v32);
+            ptr += stride;
+        } while (--h);
+        break;
+    }
+    case 8: {
+#if HAVE_FAST_64BIT
+        uint64_t v64 = v * 0x0101010101010101ULL;
+        do {
+            AV_WN64A(ptr, v64);
+            ptr += stride;
+        } while (--h);
+#else
+        uint32_t v32 = v * 0x01010101;
+        do {
+            AV_WN32A(ptr,     v32);
+            AV_WN32A(ptr + 4, v32);
+            ptr += stride;
+        } while (--h);
+#endif
+        break;
+    }
+    }
+}
+
+static void decode_mode(VP9TileData *td)
+{
+    static const uint8_t left_ctx[N_BS_SIZES] = {
+        0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
+    };
+    static const uint8_t above_ctx[N_BS_SIZES] = {
+        0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
+    };
+    static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
+        TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
+        TX_16X16, TX_8X8,   TX_8X8,   TX_8X8,   TX_4X4,   TX_4X4,  TX_4X4
+    };
+    const VP9Context *s = td->s;
+    VP9Block *b = td->b;
+    int row = td->row, col = td->col, row7 = td->row7;
+    enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
+    int bw4 = ff_vp9_bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
+    int bh4 = ff_vp9_bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
+    int have_a = row > 0, have_l = col > td->tile_col_start;
+    int vref, filter_id;
+
+    if (!s->s.h.segmentation.enabled) {
+        b->seg_id = 0;
+    } else if (s->s.h.keyframe || s->s.h.intraonly) {
+        b->seg_id = !s->s.h.segmentation.update_map ? 0 :
+                    vp89_rac_get_tree(td->c, ff_vp9_segmentation_tree,
+                                      s->s.h.segmentation.prob);
+    } else if (!s->s.h.segmentation.update_map ||
+               (s->s.h.segmentation.temporal &&
+                vpx_rac_get_prob_branchy(td->c,
+                    s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] +
+                                    td->left_segpred_ctx[row7]]))) {
+        if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) {
+            int pred = 8, x;
+            uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map;
+
+            if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass)
+                ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
+            for (y = 0; y < h4; y++) {
+                int idx_base = (y + row) * 8 * s->sb_cols + col;
+                for (x = 0; x < w4; x++)
+                    pred = FFMIN(pred, refsegmap[idx_base + x]);
+            }
+            av_assert1(pred < 8);
+            b->seg_id = pred;
+        } else {
+            b->seg_id = 0;
+        }
+
+        memset(&s->above_segpred_ctx[col], 1, w4);
+        memset(&td->left_segpred_ctx[row7], 1, h4);
+    } else {
+        b->seg_id = vp89_rac_get_tree(td->c, ff_vp9_segmentation_tree,
+                                      s->s.h.segmentation.prob);
+
+        memset(&s->above_segpred_ctx[col], 0, w4);
+        memset(&td->left_segpred_ctx[row7], 0, h4);
+    }
+    if (s->s.h.segmentation.enabled &&
+        (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) {
+        setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
+                  bw4, bh4, 8 * s->sb_cols, b->seg_id);
+    }
+
+    b->skip = s->s.h.segmentation.enabled &&
+        s->s.h.segmentation.feat[b->seg_id].skip_enabled;
+    if (!b->skip) {
+        int c = td->left_skip_ctx[row7] + s->above_skip_ctx[col];
+        b->skip = vpx_rac_get_prob(td->c, s->prob.p.skip[c]);
+        td->counts.skip[c][b->skip]++;
+    }
+
+    if (s->s.h.keyframe || s->s.h.intraonly) {
+        b->intra = 1;
+    } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
+        b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val;
+    } else {
+        int c, bit;
+
+        if (have_a && have_l) {
+            c = s->above_intra_ctx[col] + td->left_intra_ctx[row7];
+            c += (c == 2);
+        } else {
+            c = have_a ? 2 * s->above_intra_ctx[col] :
+                have_l ? 2 * td->left_intra_ctx[row7] : 0;
+        }
+        bit = vpx_rac_get_prob(td->c, s->prob.p.intra[c]);
+        td->counts.intra[c][bit]++;
+        b->intra = !bit;
+    }
+
+    if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) {
+        int c;
+        if (have_a) {
+            if (have_l) {
+                c = (s->above_skip_ctx[col] ? max_tx :
+                     s->above_txfm_ctx[col]) +
+                    (td->left_skip_ctx[row7] ? max_tx :
+                     td->left_txfm_ctx[row7]) > max_tx;
+            } else {
+                c = s->above_skip_ctx[col] ? 1 :
+                    (s->above_txfm_ctx[col] * 2 > max_tx);
+            }
+        } else if (have_l) {
+            c = td->left_skip_ctx[row7] ? 1 :
+                (td->left_txfm_ctx[row7] * 2 > max_tx);
+        } else {
+            c = 1;
+        }
+        switch (max_tx) {
+        case TX_32X32:
+            b->tx = vpx_rac_get_prob(td->c, s->prob.p.tx32p[c][0]);
+            if (b->tx) {
+                b->tx += vpx_rac_get_prob(td->c, s->prob.p.tx32p[c][1]);
+                if (b->tx == 2)
+                    b->tx += vpx_rac_get_prob(td->c, s->prob.p.tx32p[c][2]);
+            }
+            td->counts.tx32p[c][b->tx]++;
+            break;
+        case TX_16X16:
+            b->tx = vpx_rac_get_prob(td->c, s->prob.p.tx16p[c][0]);
+            if (b->tx)
+                b->tx += vpx_rac_get_prob(td->c, s->prob.p.tx16p[c][1]);
+            td->counts.tx16p[c][b->tx]++;
+            break;
+        case TX_8X8:
+            b->tx = vpx_rac_get_prob(td->c, s->prob.p.tx8p[c]);
+            td->counts.tx8p[c][b->tx]++;
+            break;
+        case TX_4X4:
+            b->tx = TX_4X4;
+            break;
+        }
+    } else {
+        b->tx = FFMIN(max_tx, s->s.h.txfmmode);
+    }
+
+    if (s->s.h.keyframe || s->s.h.intraonly) {
+        uint8_t *a = &s->above_mode_ctx[col * 2];
+        uint8_t *l = &td->left_mode_ctx[(row7) << 1];
+
+        b->comp = 0;
+        if (b->bs > BS_8x8) {
+            // FIXME the memory storage intermediates here aren't really
+            // necessary, they're just there to make the code slightly
+            // simpler for now
+            b->mode[0] =
+            a[0]       = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
+                                           ff_vp9_default_kf_ymode_probs[a[0]][l[0]]);
+            if (b->bs != BS_8x4) {
+                b->mode[1] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
+                                               ff_vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
+                l[0]       =
+                a[1]       = b->mode[1];
+            } else {
+                l[0]       =
+                a[1]       =
+                b->mode[1] = b->mode[0];
+            }
+            if (b->bs != BS_4x8) {
+                b->mode[2] =
+                a[0]       = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
+                                               ff_vp9_default_kf_ymode_probs[a[0]][l[1]]);
+                if (b->bs != BS_8x4) {
+                    b->mode[3] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
+                                                   ff_vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
+                    l[1]       =
+                    a[1]       = b->mode[3];
+                } else {
+                    l[1]       =
+                    a[1]       =
+                    b->mode[3] = b->mode[2];
+                }
+            } else {
+                b->mode[2] = b->mode[0];
+                l[1]       =
+                a[1]       =
+                b->mode[3] = b->mode[1];
+            }
+        } else {
+            b->mode[0] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
+                                           ff_vp9_default_kf_ymode_probs[*a][*l]);
+            b->mode[3] =
+            b->mode[2] =
+            b->mode[1] = b->mode[0];
+            // FIXME this can probably be optimized
+            memset(a, b->mode[0], ff_vp9_bwh_tab[0][b->bs][0]);
+            memset(l, b->mode[0], ff_vp9_bwh_tab[0][b->bs][1]);
+        }
+        b->uvmode = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
+                                      ff_vp9_default_kf_uvmode_probs[b->mode[3]]);
+    } else if (b->intra) {
+        b->comp = 0;
+        if (b->bs > BS_8x8) {
+            b->mode[0] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
+                                           s->prob.p.y_mode[0]);
+            td->counts.y_mode[0][b->mode[0]]++;
+            if (b->bs != BS_8x4) {
+                b->mode[1] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
+                                               s->prob.p.y_mode[0]);
+                td->counts.y_mode[0][b->mode[1]]++;
+            } else {
+                b->mode[1] = b->mode[0];
+            }
+            if (b->bs != BS_4x8) {
+                b->mode[2] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
+                                               s->prob.p.y_mode[0]);
+                td->counts.y_mode[0][b->mode[2]]++;
+                if (b->bs != BS_8x4) {
+                    b->mode[3] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
+                                                   s->prob.p.y_mode[0]);
+                    td->counts.y_mode[0][b->mode[3]]++;
+                } else {
+                    b->mode[3] = b->mode[2];
+                }
+            } else {
+                b->mode[2] = b->mode[0];
+                b->mode[3] = b->mode[1];
+            }
+        } else {
+            static const uint8_t size_group[10] = {
+                3, 3, 3, 3, 2, 2, 2, 1, 1, 1
+            };
+            int sz = size_group[b->bs];
+
+            b->mode[0] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
+                                           s->prob.p.y_mode[sz]);
+            b->mode[1] =
+            b->mode[2] =
+            b->mode[3] = b->mode[0];
+            td->counts.y_mode[sz][b->mode[3]]++;
+        }
+        b->uvmode = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
+                                      s->prob.p.uv_mode[b->mode[3]]);
+        td->counts.uv_mode[b->mode[3]][b->uvmode]++;
+    } else {
+        static const uint8_t inter_mode_ctx_lut[14][14] = {
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
+            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
+            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
+            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
+        };
+
+        if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
+            av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0);
+            b->comp = 0;
+            b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1;
+        } else {
+            // read comp_pred flag
+            if (s->s.h.comppredmode != PRED_SWITCHABLE) {
+                b->comp = s->s.h.comppredmode == PRED_COMPREF;
+            } else {
+                int c;
+
+                // FIXME add intra as ref=0xff (or -1) to make these easier?
+                if (have_a) {
+                    if (have_l) {
+                        if (s->above_comp_ctx[col] && td->left_comp_ctx[row7]) {
+                            c = 4;
+                        } else if (s->above_comp_ctx[col]) {
+                            c = 2 + (td->left_intra_ctx[row7] ||
+                                     td->left_ref_ctx[row7] == s->s.h.fixcompref);
+                        } else if (td->left_comp_ctx[row7]) {
+                            c = 2 + (s->above_intra_ctx[col] ||
+                                     s->above_ref_ctx[col] == s->s.h.fixcompref);
+                        } else {
+                            c = (!s->above_intra_ctx[col] &&
+                                 s->above_ref_ctx[col] == s->s.h.fixcompref) ^
+                                (!td->left_intra_ctx[row7] &&
+                                 td->left_ref_ctx[row & 7] == s->s.h.fixcompref);
+                        }
+                    } else {
+                        c = s->above_comp_ctx[col] ? 3 :
+                        (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref);
+                    }
+                } else if (have_l) {
+                    c = td->left_comp_ctx[row7] ? 3 :
+                    (!td->left_intra_ctx[row7] && td->left_ref_ctx[row7] == s->s.h.fixcompref);
+                } else {
+                    c = 1;
+                }
+                b->comp = vpx_rac_get_prob(td->c, s->prob.p.comp[c]);
+                td->counts.comp[c][b->comp]++;
+            }
+
+            // read actual references
+            // FIXME probably cache a few variables here to prevent repetitive
+            // memory accesses below
+            if (b->comp) { /* two references */
+                int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit;
+
+                b->ref[fix_idx] = s->s.h.fixcompref;
+                // FIXME can this codeblob be replaced by some sort of LUT?
+                if (have_a) {
+                    if (have_l) {
+                        if (s->above_intra_ctx[col]) {
+                            if (td->left_intra_ctx[row7]) {
+                                c = 2;
+                            } else {
+                                c = 1 + 2 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
+                            }
+                        } else if (td->left_intra_ctx[row7]) {
+                            c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
+                        } else {
+                            int refl = td->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
+
+                            if (refl == refa && refa == s->s.h.varcompref[1]) {
+                                c = 0;
+                            } else if (!td->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
+                                if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) ||
+                                    (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) {
+                                    c = 4;
+                                } else {
+                                    c = (refa == refl) ? 3 : 1;
+                                }
+                            } else if (!td->left_comp_ctx[row7]) {
+                                if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) {
+                                    c = 1;
+                                } else {
+                                    c = (refl == s->s.h.varcompref[1] &&
+                                         refa != s->s.h.varcompref[1]) ? 2 : 4;
+                                }
+                            } else if (!s->above_comp_ctx[col]) {
+                                if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) {
+                                    c = 1;
+                                } else {
+                                    c = (refa == s->s.h.varcompref[1] &&
+                                         refl != s->s.h.varcompref[1]) ? 2 : 4;
+                                }
+                            } else {
+                                c = (refl == refa) ? 4 : 2;
+                            }
+                        }
+                    } else {
+                        if (s->above_intra_ctx[col]) {
+                            c = 2;
+                        } else if (s->above_comp_ctx[col]) {
+                            c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
+                        } else {
+                            c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
+                        }
+                    }
+                } else if (have_l) {
+                    if (td->left_intra_ctx[row7]) {
+                        c = 2;
+                    } else if (td->left_comp_ctx[row7]) {
+                        c = 4 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
+                    } else {
+                        c = 3 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
+                    }
+                } else {
+                    c = 2;
+                }
+                bit = vpx_rac_get_prob(td->c, s->prob.p.comp_ref[c]);
+                b->ref[var_idx] = s->s.h.varcompref[bit];
+                td->counts.comp_ref[c][bit]++;
+            } else /* single reference */ {
+                int bit, c;
+
+                if (have_a && !s->above_intra_ctx[col]) {
+                    if (have_l && !td->left_intra_ctx[row7]) {
+                        if (td->left_comp_ctx[row7]) {
+                            if (s->above_comp_ctx[col]) {
+                                c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7] ||
+                                         !s->above_ref_ctx[col]);
+                            } else {
+                                c = (3 * !s->above_ref_ctx[col]) +
+                                    (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
+                            }
+                        } else if (s->above_comp_ctx[col]) {
+                            c = (3 * !td->left_ref_ctx[row7]) +
+                                (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
+                        } else {
+                            c = 2 * !td->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
+                        }
+                    } else if (s->above_intra_ctx[col]) {
+                        c = 2;
+                    } else if (s->above_comp_ctx[col]) {
+                        c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
+                    } else {
+                        c = 4 * (!s->above_ref_ctx[col]);
+                    }
+                } else if (have_l && !td->left_intra_ctx[row7]) {
+                    if (td->left_intra_ctx[row7]) {
+                        c = 2;
+                    } else if (td->left_comp_ctx[row7]) {
+                        c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
+                    } else {
+                        c = 4 * (!td->left_ref_ctx[row7]);
+                    }
+                } else {
+                    c = 2;
+                }
+                bit = vpx_rac_get_prob(td->c, s->prob.p.single_ref[c][0]);
+                td->counts.single_ref[c][0][bit]++;
+                if (!bit) {
+                    b->ref[0] = 0;
+                } else {
+                    // FIXME can this codeblob be replaced by some sort of LUT?
+                    if (have_a) {
+                        if (have_l) {
+                            if (td->left_intra_ctx[row7]) {
+                                if (s->above_intra_ctx[col]) {
+                                    c = 2;
+                                } else if (s->above_comp_ctx[col]) {
+                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
+                                                 s->above_ref_ctx[col] == 1);
+                                } else if (!s->above_ref_ctx[col]) {
+                                    c = 3;
+                                } else {
+                                    c = 4 * (s->above_ref_ctx[col] == 1);
+                                }
+                            } else if (s->above_intra_ctx[col]) {
+                                if (td->left_intra_ctx[row7]) {
+                                    c = 2;
+                                } else if (td->left_comp_ctx[row7]) {
+                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
+                                                 td->left_ref_ctx[row7] == 1);
+                                } else if (!td->left_ref_ctx[row7]) {
+                                    c = 3;
+                                } else {
+                                    c = 4 * (td->left_ref_ctx[row7] == 1);
+                                }
+                            } else if (s->above_comp_ctx[col]) {
+                                if (td->left_comp_ctx[row7]) {
+                                    if (td->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
+                                        c = 3 * (s->s.h.fixcompref == 1 ||
+                                                 td->left_ref_ctx[row7] == 1);
+                                    } else {
+                                        c = 2;
+                                    }
+                                } else if (!td->left_ref_ctx[row7]) {
+                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
+                                                 s->above_ref_ctx[col] == 1);
+                                } else {
+                                    c = 3 * (td->left_ref_ctx[row7] == 1) +
+                                    (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
+                                }
+                            } else if (td->left_comp_ctx[row7]) {
+                                if (!s->above_ref_ctx[col]) {
+                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
+                                                 td->left_ref_ctx[row7] == 1);
+                                } else {
+                                    c = 3 * (s->above_ref_ctx[col] == 1) +
+                                    (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
+                                }
+                            } else if (!s->above_ref_ctx[col]) {
+                                if (!td->left_ref_ctx[row7]) {
+                                    c = 3;
+                                } else {
+                                    c = 4 * (td->left_ref_ctx[row7] == 1);
+                                }
+                            } else if (!td->left_ref_ctx[row7]) {
+                                c = 4 * (s->above_ref_ctx[col] == 1);
+                            } else {
+                                c = 2 * (td->left_ref_ctx[row7] == 1) +
+                                    2 * (s->above_ref_ctx[col] == 1);
+                            }
+                        } else {
+                            if (s->above_intra_ctx[col] ||
+                                (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
+                                c = 2;
+                            } else if (s->above_comp_ctx[col]) {
+                                c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
+                            } else {
+                                c = 4 * (s->above_ref_ctx[col] == 1);
+                            }
+                        }
+                    } else if (have_l) {
+                        if (td->left_intra_ctx[row7] ||
+                            (!td->left_comp_ctx[row7] && !td->left_ref_ctx[row7])) {
+                            c = 2;
+                        } else if (td->left_comp_ctx[row7]) {
+                            c = 3 * (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
+                        } else {
+                            c = 4 * (td->left_ref_ctx[row7] == 1);
+                        }
+                    } else {
+                        c = 2;
+                    }
+                    bit = vpx_rac_get_prob(td->c, s->prob.p.single_ref[c][1]);
+                    td->counts.single_ref[c][1][bit]++;
+                    b->ref[0] = 1 + bit;
+                }
+            }
+        }
+
+        if (b->bs <= BS_8x8) {
+            if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) {
+                b->mode[0] =
+                b->mode[1] =
+                b->mode[2] =
+                b->mode[3] = ZEROMV;
+            } else {
+                static const uint8_t off[10] = {
+                    3, 0, 0, 1, 0, 0, 0, 0, 0, 0
+                };
+
+                // FIXME this needs to use the LUT tables from find_ref_mvs
+                // because not all are -1,0/0,-1
+                int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
+                                          [td->left_mode_ctx[row7 + off[b->bs]]];
+
+                b->mode[0] = vp89_rac_get_tree(td->c, ff_vp9_inter_mode_tree,
+                                               s->prob.p.mv_mode[c]);
+                b->mode[1] =
+                b->mode[2] =
+                b->mode[3] = b->mode[0];
+                td->counts.mv_mode[c][b->mode[0] - 10]++;
+            }
+        }
+
+        if (s->s.h.filtermode == FILTER_SWITCHABLE) {
+            int c;
+
+            if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
+                if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
+                    c = s->above_filter_ctx[col] == td->left_filter_ctx[row7] ?
+                        td->left_filter_ctx[row7] : 3;
+                } else {
+                    c = s->above_filter_ctx[col];
+                }
+            } else if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
+                c = td->left_filter_ctx[row7];
+            } else {
+                c = 3;
+            }
+
+            filter_id = vp89_rac_get_tree(td->c, ff_vp9_filter_tree,
+                                          s->prob.p.filter[c]);
+            td->counts.filter[c][filter_id]++;
+            b->filter = ff_vp9_filter_lut[filter_id];
+        } else {
+            b->filter = s->s.h.filtermode;
+        }
+
+        if (b->bs > BS_8x8) {
+            int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][td->left_mode_ctx[row7]];
+
+            b->mode[0] = vp89_rac_get_tree(td->c, ff_vp9_inter_mode_tree,
+                                           s->prob.p.mv_mode[c]);
+            td->counts.mv_mode[c][b->mode[0] - 10]++;
+            ff_vp9_fill_mv(td, b->mv[0], b->mode[0], 0);
+
+            if (b->bs != BS_8x4) {
+                b->mode[1] = vp89_rac_get_tree(td->c, ff_vp9_inter_mode_tree,
+                                               s->prob.p.mv_mode[c]);
+                td->counts.mv_mode[c][b->mode[1] - 10]++;
+                ff_vp9_fill_mv(td, b->mv[1], b->mode[1], 1);
+            } else {
+                b->mode[1] = b->mode[0];
+                AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
+                AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
+            }
+
+            if (b->bs != BS_4x8) {
+                b->mode[2] = vp89_rac_get_tree(td->c, ff_vp9_inter_mode_tree,
+                                               s->prob.p.mv_mode[c]);
+                td->counts.mv_mode[c][b->mode[2] - 10]++;
+                ff_vp9_fill_mv(td, b->mv[2], b->mode[2], 2);
+
+                if (b->bs != BS_8x4) {
+                    b->mode[3] = vp89_rac_get_tree(td->c, ff_vp9_inter_mode_tree,
+                                                   s->prob.p.mv_mode[c]);
+                    td->counts.mv_mode[c][b->mode[3] - 10]++;
+                    ff_vp9_fill_mv(td, b->mv[3], b->mode[3], 3);
+                } else {
+                    b->mode[3] = b->mode[2];
+                    AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
+                    AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
+                }
+            } else {
+                b->mode[2] = b->mode[0];
+                AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
+                AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
+                b->mode[3] = b->mode[1];
+                AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
+                AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
+            }
+        } else {
+            ff_vp9_fill_mv(td, b->mv[0], b->mode[0], -1);
+            AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
+            AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
+            AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
+            AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
+            AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
+            AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
+        }
+
+        vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0];
+    }
+
+#if HAVE_FAST_64BIT
+#define SPLAT_CTX(var, val, n) \
+    switch (n) { \
+    case 1:  var = val;                                    break; \
+    case 2:  AV_WN16A(&var, val *             0x0101);     break; \
+    case 4:  AV_WN32A(&var, val *         0x01010101);     break; \
+    case 8:  AV_WN64A(&var, val * 0x0101010101010101ULL);  break; \
+    case 16: { \
+        uint64_t v64 = val * 0x0101010101010101ULL; \
+        AV_WN64A(              &var,     v64); \
+        AV_WN64A(&((uint8_t *) &var)[8], v64); \
+        break; \
+    } \
+    }
+#else
+#define SPLAT_CTX(var, val, n) \
+    switch (n) { \
+    case 1:  var = val;                         break; \
+    case 2:  AV_WN16A(&var, val *     0x0101);  break; \
+    case 4:  AV_WN32A(&var, val * 0x01010101);  break; \
+    case 8: { \
+        uint32_t v32 = val * 0x01010101; \
+        AV_WN32A(              &var,     v32); \
+        AV_WN32A(&((uint8_t *) &var)[4], v32); \
+        break; \
+    } \
+    case 16: { \
+        uint32_t v32 = val * 0x01010101; \
+        AV_WN32A(              &var,      v32); \
+        AV_WN32A(&((uint8_t *) &var)[4],  v32); \
+        AV_WN32A(&((uint8_t *) &var)[8],  v32); \
+        AV_WN32A(&((uint8_t *) &var)[12], v32); \
+        break; \
+    } \
+    }
+#endif
+
+    switch (ff_vp9_bwh_tab[1][b->bs][0]) {
+#define SET_CTXS(perf, dir, off, n) \
+    do { \
+        SPLAT_CTX(perf->dir##_skip_ctx[off],      b->skip,          n); \
+        SPLAT_CTX(perf->dir##_txfm_ctx[off],      b->tx,            n); \
+        SPLAT_CTX(perf->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
+        if (!s->s.h.keyframe && !s->s.h.intraonly) { \
+            SPLAT_CTX(perf->dir##_intra_ctx[off], b->intra,   n); \
+            SPLAT_CTX(perf->dir##_comp_ctx[off],  b->comp,    n); \
+            SPLAT_CTX(perf->dir##_mode_ctx[off],  b->mode[3], n); \
+            if (!b->intra) { \
+                SPLAT_CTX(perf->dir##_ref_ctx[off], vref, n); \
+                if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
+                    SPLAT_CTX(perf->dir##_filter_ctx[off], filter_id, n); \
+                } \
+            } \
+        } \
+    } while (0)
+    case 1: SET_CTXS(s, above, col, 1); break;
+    case 2: SET_CTXS(s, above, col, 2); break;
+    case 4: SET_CTXS(s, above, col, 4); break;
+    case 8: SET_CTXS(s, above, col, 8); break;
+    }
+    switch (ff_vp9_bwh_tab[1][b->bs][1]) {
+    case 1: SET_CTXS(td, left, row7, 1); break;
+    case 2: SET_CTXS(td, left, row7, 2); break;
+    case 4: SET_CTXS(td, left, row7, 4); break;
+    case 8: SET_CTXS(td, left, row7, 8); break;
+    }
+#undef SPLAT_CTX
+#undef SET_CTXS
+
+    if (!s->s.h.keyframe && !s->s.h.intraonly) {
+        if (b->bs > BS_8x8) {
+            int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
+
+            AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
+            AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
+            AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][0], mv0);
+            AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][1], mv1);
+            AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
+            AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
+            AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
+            AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
+        } else {
+            int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
+
+            for (n = 0; n < w4 * 2; n++) {
+                AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
+                AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
+            }
+            for (n = 0; n < h4 * 2; n++) {
+                AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][0], mv0);
+                AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][1], mv1);
+            }
+        }
+    }
+
+    // FIXME kinda ugly
+    for (y = 0; y < h4; y++) {
+        int x, o = (row + y) * s->sb_cols * 8 + col;
+        VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o];
+
+        if (b->intra) {
+            for (x = 0; x < w4; x++) {
+                mv[x].ref[0] =
+                mv[x].ref[1] = -1;
+            }
+        } else if (b->comp) {
+            for (x = 0; x < w4; x++) {
+                mv[x].ref[0] = b->ref[0];
+                mv[x].ref[1] = b->ref[1];
+                AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
+                AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
+            }
+        } else {
+            for (x = 0; x < w4; x++) {
+                mv[x].ref[0] = b->ref[0];
+                mv[x].ref[1] = -1;
+                AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
+            }
+        }
+    }
+}
+
+// FIXME merge cnt/eob arguments?
+static av_always_inline int
+decode_coeffs_b_generic(VPXRangeCoder *c, int16_t *coef, int n_coeffs,
+                        int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
+                        unsigned (*eob)[6][2], const uint8_t (*p)[6][11],
+                        int nnz, const int16_t *scan, const int16_t (*nb)[2],
+                        const int16_t *band_counts, const int16_t *qmul)
+{
+    int i = 0, band = 0, band_left = band_counts[band];
+    const uint8_t *tp = p[0][nnz];
+    uint8_t cache[1024];
+
+    do {
+        int val, rc;
+
+        val = vpx_rac_get_prob_branchy(c, tp[0]); // eob
+        eob[band][nnz][val]++;
+        if (!val)
+            break;
+
+skip_eob:
+        if (!vpx_rac_get_prob_branchy(c, tp[1])) { // zero
+            cnt[band][nnz][0]++;
+            if (!--band_left)
+                band_left = band_counts[++band];
+            cache[scan[i]] = 0;
+            nnz            = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
+            tp             = p[band][nnz];
+            if (++i == n_coeffs)
+                break;  //invalid input; blocks should end with EOB
+            goto skip_eob;
+        }
+
+        rc = scan[i];
+        if (!vpx_rac_get_prob_branchy(c, tp[2])) { // one
+            cnt[band][nnz][1]++;
+            val       = 1;
+            cache[rc] = 1;
+        } else {
+            cnt[band][nnz][2]++;
+            if (!vpx_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
+                if (!vpx_rac_get_prob_branchy(c, tp[4])) {
+                    cache[rc] = val = 2;
+                } else {
+                    val       = 3 + vpx_rac_get_prob(c, tp[5]);
+                    cache[rc] = 3;
+                }
+            } else if (!vpx_rac_get_prob_branchy(c, tp[6])) { // cat1/2
+                cache[rc] = 4;
+                if (!vpx_rac_get_prob_branchy(c, tp[7])) {
+                    val  =  vpx_rac_get_prob(c, 159) + 5;
+                } else {
+                    val  = (vpx_rac_get_prob(c, 165) << 1) + 7;
+                    val +=  vpx_rac_get_prob(c, 145);
+                }
+            } else { // cat 3-6
+                cache[rc] = 5;
+                if (!vpx_rac_get_prob_branchy(c, tp[8])) {
+                    if (!vpx_rac_get_prob_branchy(c, tp[9])) {
+                        val  = 11 + (vpx_rac_get_prob(c, 173) << 2);
+                        val +=      (vpx_rac_get_prob(c, 148) << 1);
+                        val +=       vpx_rac_get_prob(c, 140);
+                    } else {
+                        val  = 19 + (vpx_rac_get_prob(c, 176) << 3);
+                        val +=      (vpx_rac_get_prob(c, 155) << 2);
+                        val +=      (vpx_rac_get_prob(c, 140) << 1);
+                        val +=       vpx_rac_get_prob(c, 135);
+                    }
+                } else if (!vpx_rac_get_prob_branchy(c, tp[10])) {
+                    val  = (vpx_rac_get_prob(c, 180) << 4) + 35;
+                    val += (vpx_rac_get_prob(c, 157) << 3);
+                    val += (vpx_rac_get_prob(c, 141) << 2);
+                    val += (vpx_rac_get_prob(c, 134) << 1);
+                    val +=  vpx_rac_get_prob(c, 130);
+                } else {
+                    val = 67;
+                    if (!is8bitsperpixel) {
+                        if (bpp == 12) {
+                            val += vpx_rac_get_prob(c, 255) << 17;
+                            val += vpx_rac_get_prob(c, 255) << 16;
+                        }
+                        val +=  (vpx_rac_get_prob(c, 255) << 15);
+                        val +=  (vpx_rac_get_prob(c, 255) << 14);
+                    }
+                    val += (vpx_rac_get_prob(c, 254) << 13);
+                    val += (vpx_rac_get_prob(c, 254) << 12);
+                    val += (vpx_rac_get_prob(c, 254) << 11);
+                    val += (vpx_rac_get_prob(c, 252) << 10);
+                    val += (vpx_rac_get_prob(c, 249) << 9);
+                    val += (vpx_rac_get_prob(c, 243) << 8);
+                    val += (vpx_rac_get_prob(c, 230) << 7);
+                    val += (vpx_rac_get_prob(c, 196) << 6);
+                    val += (vpx_rac_get_prob(c, 177) << 5);
+                    val += (vpx_rac_get_prob(c, 153) << 4);
+                    val += (vpx_rac_get_prob(c, 140) << 3);
+                    val += (vpx_rac_get_prob(c, 133) << 2);
+                    val += (vpx_rac_get_prob(c, 130) << 1);
+                    val +=  vpx_rac_get_prob(c, 129);
+                }
+            }
+        }
+#define STORE_COEF(c, i, v) do { \
+    if (is8bitsperpixel) { \
+        c[i] = v; \
+    } else { \
+        AV_WN32A(&c[i * 2], v); \
+    } \
+} while (0)
+        if (!--band_left)
+            band_left = band_counts[++band];
+        if (is_tx32x32)
+            STORE_COEF(coef, rc, (int)((vp89_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]) / 2);
+        else
+            STORE_COEF(coef, rc, (vp89_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]);
+        nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
+        tp = p[band][nnz];
+    } while (++i < n_coeffs);
+
+    return i;
+}
+
+static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
+                                unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
+                                const uint8_t (*p)[6][11], int nnz, const int16_t *scan,
+                                const int16_t (*nb)[2], const int16_t *band_counts,
+                                const int16_t *qmul)
+{
+    return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
+                                   nnz, scan, nb, band_counts, qmul);
+}
+
+static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
+                                  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
+                                  const uint8_t (*p)[6][11], int nnz, const int16_t *scan,
+                                  const int16_t (*nb)[2], const int16_t *band_counts,
+                                  const int16_t *qmul)
+{
+    return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
+                                   nnz, scan, nb, band_counts, qmul);
+}
+
+static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
+                                 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
+                                 const uint8_t (*p)[6][11], int nnz, const int16_t *scan,
+                                 const int16_t (*nb)[2], const int16_t *band_counts,
+                                 const int16_t *qmul)
+{
+    return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 0, td->s->s.h.bpp, cnt, eob, p,
+                                   nnz, scan, nb, band_counts, qmul);
+}
+
+static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
+                                   unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
+                                   const uint8_t (*p)[6][11], int nnz, const int16_t *scan,
+                                   const int16_t (*nb)[2], const int16_t *band_counts,
+                                   const int16_t *qmul)
+{
+    return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 0, td->s->s.h.bpp, cnt, eob, p,
+                                   nnz, scan, nb, band_counts, qmul);
+}
+
+static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
+{
+    const VP9Context *s = td->s;
+    VP9Block *b = td->b;
+    int row = td->row, col = td->col;
+    const uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
+    unsigned (*c)[6][3] = td->counts.coef[b->tx][0 /* y */][!b->intra];
+    unsigned (*e)[6][2] = td->counts.eob[b->tx][0 /* y */][!b->intra];
+    int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1;
+    int end_x = FFMIN(2 * (s->cols - col), w4);
+    int end_y = FFMIN(2 * (s->rows - row), h4);
+    int n, pl, x, y, ret;
+    const int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul;
+    int tx = 4 * s->s.h.lossless + b->tx;
+    const int16_t * const *yscans = ff_vp9_scans[tx];
+    const int16_t (* const * ynbs)[2] = ff_vp9_scans_nb[tx];
+    const int16_t *uvscan = ff_vp9_scans[b->uvtx][DCT_DCT];
+    const int16_t (*uvnb)[2] = ff_vp9_scans_nb[b->uvtx][DCT_DCT];
+    uint8_t *a = &s->above_y_nnz_ctx[col * 2];
+    uint8_t *l = &td->left_y_nnz_ctx[(row & 7) << 1];
+    static const int16_t band_counts[4][8] = {
+        { 1, 2, 3, 4,  3,   16 - 13 },
+        { 1, 2, 3, 4, 11,   64 - 21 },
+        { 1, 2, 3, 4, 11,  256 - 21 },
+        { 1, 2, 3, 4, 11, 1024 - 21 },
+    };
+    const int16_t *y_band_counts = band_counts[b->tx];
+    const int16_t *uv_band_counts = band_counts[b->uvtx];
+    int bytesperpixel = is8bitsperpixel ? 1 : 2;
+    int total_coeff = 0;
+
+#define MERGE(la, end, step, rd) \
+    for (n = 0; n < end; n += step) \
+        la[n] = !!rd(&la[n])
+#define MERGE_CTX(step, rd) \
+    do { \
+        MERGE(l, end_y, step, rd); \
+        MERGE(a, end_x, step, rd); \
+    } while (0)
+
+#define DECODE_Y_COEF_LOOP(step, mode_index, v) \
+    for (n = 0, y = 0; y < end_y; y += step) { \
+        for (x = 0; x < end_x; x += step, n += step * step) { \
+            enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[mode_index]]; \
+            ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
+                                    (td, td->block + 16 * n * bytesperpixel, 16 * step * step, \
+                                     c, e, p, a[x] + l[y], yscans[txtp], \
+                                     ynbs[txtp], y_band_counts, qmul[0]); \
+            a[x] = l[y] = !!ret; \
+            total_coeff |= !!ret; \
+            if (step >= 4) { \
+                AV_WN16A(&td->eob[n], ret); \
+            } else { \
+                td->eob[n] = ret; \
+            } \
+        } \
+    }
+
+#define SPLAT(la, end, step, cond) \
+    if (step == 2) { \
+        for (n = 1; n < end; n += step) \
+            la[n] = la[n - 1]; \
+    } else if (step == 4) { \
+        if (cond) { \
+            for (n = 0; n < end; n += step) \
+                AV_WN32A(&la[n], la[n] * 0x01010101); \
+        } else { \
+            for (n = 0; n < end; n += step) \
+                memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
+        } \
+    } else /* step == 8 */ { \
+        if (cond) { \
+            if (HAVE_FAST_64BIT) { \
+                for (n = 0; n < end; n += step) \
+                    AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
+            } else { \
+                for (n = 0; n < end; n += step) { \
+                    uint32_t v32 = la[n] * 0x01010101; \
+                    AV_WN32A(&la[n],     v32); \
+                    AV_WN32A(&la[n + 4], v32); \
+                } \
+            } \
+        } else { \
+            for (n = 0; n < end; n += step) \
+                memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
+        } \
+    }
+#define SPLAT_CTX(step) \
+    do { \
+        SPLAT(a, end_x, step, end_x == w4); \
+        SPLAT(l, end_y, step, end_y == h4); \
+    } while (0)
+
+    /* y tokens */
+    switch (b->tx) {
+    case TX_4X4:
+        DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
+        break;
+    case TX_8X8:
+        MERGE_CTX(2, AV_RN16A);
+        DECODE_Y_COEF_LOOP(2, 0,);
+        SPLAT_CTX(2);
+        break;
+    case TX_16X16:
+        MERGE_CTX(4, AV_RN32A);
+        DECODE_Y_COEF_LOOP(4, 0,);
+        SPLAT_CTX(4);
+        break;
+    case TX_32X32:
+        MERGE_CTX(8, AV_RN64A);
+        DECODE_Y_COEF_LOOP(8, 0, 32);
+        SPLAT_CTX(8);
+        break;
+    }
+
+#define DECODE_UV_COEF_LOOP(step, v) \
+    for (n = 0, y = 0; y < end_y; y += step) { \
+        for (x = 0; x < end_x; x += step, n += step * step) { \
+            ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
+                                    (td, td->uvblock[pl] + 16 * n * bytesperpixel, \
+                                     16 * step * step, c, e, p, a[x] + l[y], \
+                                     uvscan, uvnb, uv_band_counts, qmul[1]); \
+            a[x] = l[y] = !!ret; \
+            total_coeff |= !!ret; \
+            if (step >= 4) { \
+                AV_WN16A(&td->uveob[pl][n], ret); \
+            } else { \
+                td->uveob[pl][n] = ret; \
+            } \
+        } \
+    }
+
+    p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
+    c = td->counts.coef[b->uvtx][1 /* uv */][!b->intra];
+    e = td->counts.eob[b->uvtx][1 /* uv */][!b->intra];
+    w4 >>= s->ss_h;
+    end_x >>= s->ss_h;
+    h4 >>= s->ss_v;
+    end_y >>= s->ss_v;
+    for (pl = 0; pl < 2; pl++) {
+        a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
+        l = &td->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
+        switch (b->uvtx) {
+        case TX_4X4:
+            DECODE_UV_COEF_LOOP(1,);
+            break;
+        case TX_8X8:
+            MERGE_CTX(2, AV_RN16A);
+            DECODE_UV_COEF_LOOP(2,);
+            SPLAT_CTX(2);
+            break;
+        case TX_16X16:
+            MERGE_CTX(4, AV_RN32A);
+            DECODE_UV_COEF_LOOP(4,);
+            SPLAT_CTX(4);
+            break;
+        case TX_32X32:
+            MERGE_CTX(8, AV_RN64A);
+            DECODE_UV_COEF_LOOP(8, 32);
+            SPLAT_CTX(8);
+            break;
+        }
+    }
+
+    return total_coeff;
+}
+
+static int decode_coeffs_8bpp(VP9TileData *td)
+{
+    return decode_coeffs(td, 1);
+}
+
+static int decode_coeffs_16bpp(VP9TileData *td)
+{
+    return decode_coeffs(td, 0);
+}
+
+static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
+                                        int row_and_7, int col_and_7,
+                                        int w, int h, int col_end, int row_end,
+                                        enum TxfmMode tx, int skip_inter)
+{
+    static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
+    static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
+
+    // FIXME I'm pretty sure all loops can be replaced by a single LUT if
+    // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
+    // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
+    // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
+
+    // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
+    // edges. This means that for UV, we work on two subsampled blocks at
+    // a time, and we only use the topleft block's mode information to set
+    // things like block strength. Thus, for any block size smaller than
+    // 16x16, ignore the odd portion of the block.
+    if (tx == TX_4X4 && (ss_v | ss_h)) {
+        if (h == ss_v) {
+            if (row_and_7 & 1)
+                return;
+            if (!row_end)
+                h += 1;
+        }
+        if (w == ss_h) {
+            if (col_and_7 & 1)
+                return;
+            if (!col_end)
+                w += 1;
+        }
+    }
+
+    if (tx == TX_4X4 && !skip_inter) {
+        int t = 1 << col_and_7, m_col = (t << w) - t, y;
+        // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
+        int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
+
+        for (y = row_and_7; y < h + row_and_7; y++) {
+            int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
+
+            mask[0][y][1] |= m_row_8;
+            mask[0][y][2] |= m_row_4;
+            // for odd lines, if the odd col is not being filtered,
+            // skip odd row also:
+            // .---. <-- a
+            // |   |
+            // |___| <-- b
+            // ^   ^
+            // c   d
+            //
+            // if a/c are even row/col and b/d are odd, and d is skipped,
+            // e.g. right edge of size-66x66.webm, then skip b also (bug)
+            if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
+                mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
+            } else {
+                mask[1][y][col_mask_id] |= m_col;
+            }
+            if (!ss_h)
+                mask[0][y][3] |= m_col;
+            if (!ss_v) {
+                if (ss_h && (col_end & 1))
+                    mask[1][y][3] |= (t << (w - 1)) - t;
+                else
+                    mask[1][y][3] |= m_col;
+            }
+        }
+    } else {
+        int y, t = 1 << col_and_7, m_col = (t << w) - t;
+
+        if (!skip_inter) {
+            int mask_id = (tx == TX_8X8);
+            int l2 = tx + ss_h - 1, step1d;
+            static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
+            int m_row = m_col & masks[l2];
+
+            // at odd UV col/row edges tx16/tx32 loopfilter edges, force
+            // 8wd loopfilter to prevent going off the visible edge.
+            if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
+                int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
+                int m_row_8 = m_row - m_row_16;
+
+                for (y = row_and_7; y < h + row_and_7; y++) {
+                    mask[0][y][0] |= m_row_16;
+                    mask[0][y][1] |= m_row_8;
+                }
+            } else {
+                for (y = row_and_7; y < h + row_and_7; y++)
+                    mask[0][y][mask_id] |= m_row;
+            }
+
+            l2 = tx + ss_v - 1;
+            step1d = 1 << l2;
+            if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
+                for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
+                    mask[1][y][0] |= m_col;
+                if (y - row_and_7 == h - 1)
+                    mask[1][y][1] |= m_col;
+            } else {
+                for (y = row_and_7; y < h + row_and_7; y += step1d)
+                    mask[1][y][mask_id] |= m_col;
+            }
+        } else if (tx != TX_4X4) {
+            int mask_id;
+
+            mask_id = (tx == TX_8X8) || (h == ss_v);
+            mask[1][row_and_7][mask_id] |= m_col;
+            mask_id = (tx == TX_8X8) || (w == ss_h);
+            for (y = row_and_7; y < h + row_and_7; y++)
+                mask[0][y][mask_id] |= t;
+        } else {
+            int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
+
+            for (y = row_and_7; y < h + row_and_7; y++) {
+                mask[0][y][2] |= t4;
+                mask[0][y][1] |= t8;
+            }
+            mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
+        }
+    }
+}
+
+void ff_vp9_decode_block(VP9TileData *td, int row, int col,
+                         VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
+                         enum BlockLevel bl, enum BlockPartition bp)
+{
+    const VP9Context *s = td->s;
+    VP9Block *b = td->b;
+    enum BlockSize bs = bl * 3 + bp;
+    int bytesperpixel = s->bytesperpixel;
+    int w4 = ff_vp9_bwh_tab[1][bs][0], h4 = ff_vp9_bwh_tab[1][bs][1], lvl;
+    int emu[2];
+    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
+
+    td->row = row;
+    td->row7 = row & 7;
+    td->col = col;
+    td->col7 = col & 7;
+
+    td->min_mv.x = -(128 + col * 64);
+    td->min_mv.y = -(128 + row * 64);
+    td->max_mv.x = 128 + (s->cols - col - w4) * 64;
+    td->max_mv.y = 128 + (s->rows - row - h4) * 64;
+
+    if (s->pass < 2) {
+        b->bs = bs;
+        b->bl = bl;
+        b->bp = bp;
+        decode_mode(td);
+        b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
+                           (s->ss_v && h4 * 2 == (1 << b->tx)));
+
+        if (td->block_structure) {
+            td->block_structure[td->nb_block_structure].row = row;
+            td->block_structure[td->nb_block_structure].col = col;
+            td->block_structure[td->nb_block_structure].block_size_idx_x = av_log2(w4);
+            td->block_structure[td->nb_block_structure].block_size_idx_y = av_log2(h4);
+            td->nb_block_structure++;
+        }
+
+        if (!b->skip) {
+            int has_coeffs;
+
+            if (bytesperpixel == 1) {
+                has_coeffs = decode_coeffs_8bpp(td);
+            } else {
+                has_coeffs = decode_coeffs_16bpp(td);
+            }
+            if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
+                b->skip = 1;
+                memset(&s->above_skip_ctx[col], 1, w4);
+                memset(&td->left_skip_ctx[td->row7], 1, h4);
+            }
+        } else {
+            int row7 = td->row7;
+
+#define SPLAT_ZERO_CTX(v, n) \
+    switch (n) { \
+    case 1:  v = 0;          break; \
+    case 2:  AV_ZERO16(&v);  break; \
+    case 4:  AV_ZERO32(&v);  break; \
+    case 8:  AV_ZERO64(&v);  break; \
+    case 16: AV_ZERO128(&v); break; \
+    }
+#define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
+    do { \
+        SPLAT_ZERO_CTX(dir##_y_##var[off * 2], n * 2); \
+        if (s->ss_##dir2) { \
+            SPLAT_ZERO_CTX(dir##_uv_##var[0][off], n); \
+            SPLAT_ZERO_CTX(dir##_uv_##var[1][off], n); \
+        } else { \
+            SPLAT_ZERO_CTX(dir##_uv_##var[0][off * 2], n * 2); \
+            SPLAT_ZERO_CTX(dir##_uv_##var[1][off * 2], n * 2); \
+        } \
+    } while (0)
+
+            switch (w4) {
+            case 1: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 1, h); break;
+            case 2: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 2, h); break;
+            case 4: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 4, h); break;
+            case 8: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 8, h); break;
+            }
+            switch (h4) {
+            case 1: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 1, v); break;
+            case 2: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 2, v); break;
+            case 4: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 4, v); break;
+            case 8: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 8, v); break;
+            }
+        }
+
+        if (s->pass == 1) {
+            s->td[0].b++;
+            s->td[0].block += w4 * h4 * 64 * bytesperpixel;
+            s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
+            s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
+            s->td[0].eob += 4 * w4 * h4;
+            s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
+            s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
+
+            return;
+        }
+    }
+
+    // emulated overhangs if the stride of the target buffer can't hold. This
+    // makes it possible to support emu-edge and so on even if we have large block
+    // overhangs
+    emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
+             (row + h4) > s->rows;
+    emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
+             (row + h4) > s->rows;
+    if (emu[0]) {
+        td->dst[0] = td->tmp_y;
+        td->y_stride = 128;
+    } else {
+        td->dst[0] = f->data[0] + yoff;
+        td->y_stride = f->linesize[0];
+    }
+    if (emu[1]) {
+        td->dst[1] = td->tmp_uv[0];
+        td->dst[2] = td->tmp_uv[1];
+        td->uv_stride = 128;
+    } else {
+        td->dst[1] = f->data[1] + uvoff;
+        td->dst[2] = f->data[2] + uvoff;
+        td->uv_stride = f->linesize[1];
+    }
+    if (b->intra) {
+        if (s->s.h.bpp > 8) {
+            ff_vp9_intra_recon_16bpp(td, yoff, uvoff);
+        } else {
+            ff_vp9_intra_recon_8bpp(td, yoff, uvoff);
+        }
+    } else {
+        if (s->s.h.bpp > 8) {
+            ff_vp9_inter_recon_16bpp(td);
+        } else {
+            ff_vp9_inter_recon_8bpp(td);
+        }
+    }
+    if (emu[0]) {
+        int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
+
+        for (n = 0; o < w; n++) {
+            int bw = 64 >> n;
+
+            av_assert2(n <= 4);
+            if (w & bw) {
+                s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
+                                         td->tmp_y + o * bytesperpixel, 128, h, 0, 0);
+                o += bw;
+            }
+        }
+    }
+    if (emu[1]) {
+        int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
+        int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
+
+        for (n = s->ss_h; o < w; n++) {
+            int bw = 64 >> n;
+
+            av_assert2(n <= 4);
+            if (w & bw) {
+                s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
+                                         td->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
+                s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
+                                         td->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
+                o += bw;
+            }
+        }
+    }
+
+    // pick filter level and find edges to apply filter to
+    if (s->s.h.filter.level &&
+        (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
+                                                      [b->mode[3] != ZEROMV]) > 0) {
+        int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
+        int skip_inter = !b->intra && b->skip, col7 = td->col7, row7 = td->row7;
+
+        setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
+        mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
+        if (s->ss_h || s->ss_v)
+            mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
+                       s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
+                       s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
+                       b->uvtx, skip_inter);
+    }
+
+    if (s->pass == 2) {
+        s->td[0].b++;
+        s->td[0].block += w4 * h4 * 64 * bytesperpixel;
+        s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
+        s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
+        s->td[0].eob += 4 * w4 * h4;
+        s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
+        s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
+    }
+}
diff --git a/media/ffvpx/libavcodec/vp9data.c b/media/ffvpx/libavcodec/vp9data.c
new file mode 100644
index 0000000000..7af8a97b1e
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9data.c
@@ -0,0 +1,2247 @@
+/*
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vp9.h"
+#include "vp9data.h"
+
+const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2] = {
+    {
+        { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
+        {  4,  4 }, {  4, 2 }, { 2,  4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
+    }, {
+        {  8,  8 }, {  8, 4 }, { 4,  8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
+        {  2,  2 }, {  2, 1 }, { 1,  2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
+    }
+};
+
+const int8_t ff_vp9_partition_tree[3][2] = {
+    { -PARTITION_NONE, 1 },                      // '0'
+        { -PARTITION_H, 2 },                     // '10'
+            { -PARTITION_V, -PARTITION_SPLIT },  // '110', '111'
+};
+
+const uint8_t ff_vp9_default_kf_partition_probs[4][4][3] = {
+    { /* 64x64 -> 32x32 */
+        { 174,  35,  49 } /* a/l both not split */,
+        {  68,  11,  27 } /* a split, l not split */,
+        {  57,  15,   9 } /* l split, a not split */,
+        {  12,   3,   3 } /* a/l both split */
+    }, { /* 32x32 -> 16x16 */
+        { 150,  40,  39 } /* a/l both not split */,
+        {  78,  12,  26 } /* a split, l not split */,
+        {  67,  33,  11 } /* l split, a not split */,
+        {  24,   7,   5 } /* a/l both split */,
+    }, { /* 16x16 -> 8x8 */
+        { 149,  53,  53 } /* a/l both not split */,
+        {  94,  20,  48 } /* a split, l not split */,
+        {  83,  53,  24 } /* l split, a not split */,
+        {  52,  18,  18 } /* a/l both split */,
+    }, { /* 8x8 -> 4x4 */
+        { 158,  97,  94 } /* a/l both not split */,
+        {  93,  24,  99 } /* a split, l not split */,
+        {  85, 119,  44 } /* l split, a not split */,
+        {  62,  59,  67 } /* a/l both split */,
+    },
+};
+
+const int8_t ff_vp9_segmentation_tree[7][2] = {
+    { 1, 2 },
+        { 3, 4 },
+        { 5, 6 },
+            { -0, -1 },  // '00x'
+            { -2, -3 },  // '01x'
+            { -4, -5 },  // '10x'
+            { -6, -7 },  // '11x'
+};
+
+const int8_t ff_vp9_intramode_tree[9][2] = {
+    { -DC_PRED, 1 },                                                  // '0'
+        { -TM_VP8_PRED, 2 },                                          // '10'
+            { -VERT_PRED, 3 },                                        // '110'
+                { 4, 6 },
+                    { -HOR_PRED, 5 },                                 // '11100'
+                        { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED },  // '11101x'
+                    { -DIAG_DOWN_LEFT_PRED, 7 },                      // '11110'
+                        { -VERT_LEFT_PRED, 8 },                       // '111110'
+                            { -HOR_DOWN_PRED, -HOR_UP_PRED },         // '111111x'
+};
+
+const uint8_t ff_vp9_default_kf_ymode_probs[10][10][9] = {
+    { /* above = v */
+        {  43,  46, 168, 134, 107, 128,  69, 142,  92 } /* left = v */,
+        {  44,  29,  68, 159, 201, 177,  50,  57,  77 } /* left = h */,
+        {  63,  36, 126, 146, 123, 158,  60,  90,  96 } /* left = dc */,
+        {  58,  38,  76, 114,  97, 172,  78, 133,  92 } /* left = d45 */,
+        {  46,  41,  76, 140,  63, 184,  69, 112,  57 } /* left = d135 */,
+        {  38,  32,  85, 140,  46, 112,  54, 151, 133 } /* left = d117 */,
+        {  39,  27,  61, 131, 110, 175,  44,  75, 136 } /* left = d153 */,
+        {  47,  35,  80, 100,  74, 143,  64, 163,  74 } /* left = d63 */,
+        {  52,  30,  74, 113, 130, 175,  51,  64,  58 } /* left = d27 */,
+        {  36,  61, 116, 114, 128, 162,  80, 125,  82 } /* left = tm */
+    }, { /* above = h */
+        {  55,  44,  68, 166, 179, 192,  57,  57, 108 } /* left = v */,
+        {  42,  26,  11, 199, 241, 228,  23,  15,  85 } /* left = h */,
+        {  82,  26,  26, 171, 208, 204,  44,  32, 105 } /* left = dc */,
+        {  68,  42,  19, 131, 160, 199,  55,  52,  83 } /* left = d45 */,
+        {  58,  50,  25, 139, 115, 232,  39,  52, 118 } /* left = d135 */,
+        {  50,  35,  33, 153, 104, 162,  64,  59, 131 } /* left = d117 */,
+        {  44,  24,  16, 150, 177, 202,  33,  19, 156 } /* left = d153 */,
+        {  53,  49,  21, 110, 116, 168,  59,  80,  76 } /* left = d63 */,
+        {  55,  27,  12, 153, 203, 218,  26,  27,  49 } /* left = d27 */,
+        {  38,  72,  19, 168, 203, 212,  50,  50, 107 } /* left = tm */
+    }, { /* above = dc */
+        {  92,  45, 102, 136, 116, 180,  74,  90, 100 } /* left = v */,
+        {  73,  32,  19, 187, 222, 215,  46,  34, 100 } /* left = h */,
+        { 137,  30,  42, 148, 151, 207,  70,  52,  91 } /* left = dc */,
+        {  91,  30,  32, 116, 121, 186,  93,  86,  94 } /* left = d45 */,
+        {  72,  35,  36, 149,  68, 206,  68,  63, 105 } /* left = d135 */,
+        {  73,  31,  28, 138,  57, 124,  55, 122, 151 } /* left = d117 */,
+        {  67,  23,  21, 140, 126, 197,  40,  37, 171 } /* left = d153 */,
+        {  74,  32,  27, 107,  86, 160,  63, 134, 102 } /* left = d63 */,
+        {  86,  27,  28, 128, 154, 212,  45,  43,  53 } /* left = d27 */,
+        {  59,  67,  44, 140, 161, 202,  78,  67, 119 } /* left = tm */
+    }, { /* above = d45 */
+        {  59,  38,  83, 112, 103, 162,  98, 136,  90 } /* left = v */,
+        {  62,  30,  23, 158, 200, 207,  59,  57,  50 } /* left = h */,
+        { 103,  26,  36, 129, 132, 201,  83,  80,  93 } /* left = dc */,
+        {  67,  30,  29,  84,  86, 191, 102,  91,  59 } /* left = d45 */,
+        {  60,  32,  33, 112,  71, 220,  64,  89, 104 } /* left = d135 */,
+        {  53,  26,  34, 130,  56, 149,  84, 120, 103 } /* left = d117 */,
+        {  53,  21,  23, 133, 109, 210,  56,  77, 172 } /* left = d153 */,
+        {  61,  29,  29,  93,  97, 165,  83, 175, 162 } /* left = d63 */,
+        {  77,  19,  29, 112, 142, 228,  55,  66,  36 } /* left = d27 */,
+        {  47,  47,  43, 114, 137, 181, 100,  99,  95 } /* left = tm */
+    }, { /* above = d135 */
+        {  53,  40,  55, 139,  69, 183,  61,  80, 110 } /* left = v */,
+        {  40,  29,  19, 161, 180, 207,  43,  24,  91 } /* left = h */,
+        {  69,  23,  29, 128,  83, 199,  46,  44, 101 } /* left = dc */,
+        {  60,  34,  19, 105,  61, 198,  53,  64,  89 } /* left = d45 */,
+        {  52,  31,  22, 158,  40, 209,  58,  62,  89 } /* left = d135 */,
+        {  44,  31,  29, 147,  46, 158,  56, 102, 198 } /* left = d117 */,
+        {  35,  19,  12, 135,  87, 209,  41,  45, 167 } /* left = d153 */,
+        {  51,  38,  25, 113,  58, 164,  70,  93,  97 } /* left = d63 */,
+        {  55,  25,  21, 118,  95, 215,  38,  39,  66 } /* left = d27 */,
+        {  47,  54,  34, 146, 108, 203,  72, 103, 151 } /* left = tm */
+    }, { /* above = d117 */
+        {  46,  27,  80, 150,  55, 124,  55, 121, 135 } /* left = v */,
+        {  36,  23,  27, 165, 149, 166,  54,  64, 118 } /* left = h */,
+        {  64,  19,  37, 156,  66, 138,  49,  95, 133 } /* left = dc */,
+        {  53,  21,  36, 131,  63, 163,  60, 109,  81 } /* left = d45 */,
+        {  40,  26,  35, 154,  40, 185,  51,  97, 123 } /* left = d135 */,
+        {  35,  19,  34, 179,  19,  97,  48, 129, 124 } /* left = d117 */,
+        {  36,  20,  26, 136,  62, 164,  33,  77, 154 } /* left = d153 */,
+        {  45,  26,  28, 129,  45, 129,  49, 147, 123 } /* left = d63 */,
+        {  45,  18,  32, 130,  90, 157,  40,  79,  91 } /* left = d27 */,
+        {  38,  44,  51, 136,  74, 162,  57,  97, 121 } /* left = tm */
+    }, { /* above = d153 */
+        {  56,  39,  58, 133, 117, 173,  48,  53, 187 } /* left = v */,
+        {  35,  21,  12, 161, 212, 207,  20,  23, 145 } /* left = h */,
+        {  75,  17,  22, 136, 138, 185,  32,  34, 166 } /* left = dc */,
+        {  56,  29,  19, 117, 109, 181,  55,  68, 112 } /* left = d45 */,
+        {  47,  29,  17, 153,  64, 220,  59,  51, 114 } /* left = d135 */,
+        {  46,  16,  24, 136,  76, 147,  41,  64, 172 } /* left = d117 */,
+        {  34,  17,  11, 108, 152, 187,  13,  15, 209 } /* left = d153 */,
+        {  55,  30,  18, 122,  79, 179,  44,  88, 116 } /* left = d63 */,
+        {  51,  24,  14, 115, 133, 209,  32,  26, 104 } /* left = d27 */,
+        {  37,  49,  25, 129, 168, 164,  41,  54, 148 } /* left = tm */
+    }, { /* above = d63 */
+        {  48,  34,  86, 101,  92, 146,  78, 179, 134 } /* left = v */,
+        {  47,  22,  24, 138, 187, 178,  68,  69,  59 } /* left = h */,
+        {  78,  23,  39, 111, 117, 170,  74, 124,  94 } /* left = dc */,
+        {  56,  25,  33, 105, 112, 187,  95, 177, 129 } /* left = d45 */,
+        {  48,  31,  27, 114,  63, 183,  82, 116,  56 } /* left = d135 */,
+        {  43,  28,  37, 121,  63, 123,  61, 192, 169 } /* left = d117 */,
+        {  42,  17,  24, 109,  97, 177,  56,  76, 122 } /* left = d153 */,
+        {  46,  23,  32,  74,  86, 150,  67, 183,  88 } /* left = d63 */,
+        {  58,  18,  28, 105, 139, 182,  70,  92,  63 } /* left = d27 */,
+        {  36,  38,  48,  92, 122, 165,  88, 137,  91 } /* left = tm */
+    }, { /* above = d27 */
+        {  62,  44,  61, 123, 105, 189,  48,  57,  64 } /* left = v */,
+        {  47,  25,  17, 175, 222, 220,  24,  30,  86 } /* left = h */,
+        {  82,  22,  32, 127, 143, 213,  39,  41,  70 } /* left = dc */,
+        {  68,  36,  17, 106, 102, 206,  59,  74,  74 } /* left = d45 */,
+        {  57,  39,  23, 151,  68, 216,  55,  63,  58 } /* left = d135 */,
+        {  49,  30,  35, 141,  70, 168,  82,  40, 115 } /* left = d117 */,
+        {  51,  25,  15, 136, 129, 202,  38,  35, 139 } /* left = d153 */,
+        {  59,  39,  19, 114,  75, 180,  77, 104,  42 } /* left = d63 */,
+        {  68,  26,  16, 111, 141, 215,  29,  28,  28 } /* left = d27 */,
+        {  40,  61,  26, 126, 152, 206,  61,  59,  93 } /* left = tm */
+    }, { /* above = tm */
+        {  44,  78, 115, 132, 119, 173,  71, 112,  93 } /* left = v */,
+        {  39,  38,  21, 184, 227, 206,  42,  32,  64 } /* left = h */,
+        {  65,  70,  60, 155, 159, 199,  61,  60,  81 } /* left = dc */,
+        {  58,  47,  36, 124, 137, 193,  80,  82,  78 } /* left = d45 */,
+        {  49,  50,  35, 144,  95, 205,  63,  78,  59 } /* left = d135 */,
+        {  41,  53,  52, 148,  71, 142,  65, 128,  51 } /* left = d117 */,
+        {  40,  36,  28, 143, 143, 202,  40,  55, 137 } /* left = d153 */,
+        {  42,  44,  44, 104, 105, 164,  64, 130,  80 } /* left = d63 */,
+        {  52,  34,  29, 129, 183, 227,  42,  35,  43 } /* left = d27 */,
+        {  43,  81,  53, 140, 169, 204,  68,  84,  72 } /* left = tm */
+    }
+};
+
+const uint8_t ff_vp9_default_kf_uvmode_probs[10][9] = {
+    { 118,  15, 123, 148, 131, 101,  44,  93, 131 } /* y = v */,
+    { 113,  12,  23, 188, 226, 142,  26,  32, 125 } /* y = h */,
+    { 144,  11,  54, 157, 195, 130,  46,  58, 108 } /* y = dc */,
+    { 120,  11,  50, 123, 163, 135,  64,  77, 103 } /* y = d45 */,
+    { 113,   9,  36, 155, 111, 157,  32,  44, 161 } /* y = d135 */,
+    { 116,   9,  55, 176,  76,  96,  37,  61, 149 } /* y = d117 */,
+    { 115,   9,  28, 141, 161, 167,  21,  25, 193 } /* y = d153 */,
+    { 116,  12,  64, 120, 140, 125,  49, 115, 121 } /* y = d63 */,
+    { 120,  12,  32, 145, 195, 142,  32,  38,  86 } /* y = d27 */,
+    { 102,  19,  66, 162, 182, 122,  35,  59, 128 } /* y = tm */
+};
+
+const int8_t ff_vp9_inter_mode_tree[3][2] = {
+    { -ZEROMV, 1 },               // '0'
+        { -NEARESTMV, 2 },        // '10'
+            { -NEARMV, -NEWMV },  // '11x'
+};
+
+const int8_t ff_vp9_filter_tree[2][2] = {
+    { -0,  1 },     // '0'
+        { -1, -2 }, // '1x'
+};
+
+const enum FilterMode ff_vp9_filter_lut[3] = {
+    FILTER_8TAP_REGULAR,
+    FILTER_8TAP_SMOOTH,
+    FILTER_8TAP_SHARP,
+};
+
+const int16_t ff_vp9_dc_qlookup[3][256] = {
+    {
+            4,     8,     8,     9,    10,    11,    12,    12,
+           13,    14,    15,    16,    17,    18,    19,    19,
+           20,    21,    22,    23,    24,    25,    26,    26,
+           27,    28,    29,    30,    31,    32,    32,    33,
+           34,    35,    36,    37,    38,    38,    39,    40,
+           41,    42,    43,    43,    44,    45,    46,    47,
+           48,    48,    49,    50,    51,    52,    53,    53,
+           54,    55,    56,    57,    57,    58,    59,    60,
+           61,    62,    62,    63,    64,    65,    66,    66,
+           67,    68,    69,    70,    70,    71,    72,    73,
+           74,    74,    75,    76,    77,    78,    78,    79,
+           80,    81,    81,    82,    83,    84,    85,    85,
+           87,    88,    90,    92,    93,    95,    96,    98,
+           99,   101,   102,   104,   105,   107,   108,   110,
+          111,   113,   114,   116,   117,   118,   120,   121,
+          123,   125,   127,   129,   131,   134,   136,   138,
+          140,   142,   144,   146,   148,   150,   152,   154,
+          156,   158,   161,   164,   166,   169,   172,   174,
+          177,   180,   182,   185,   187,   190,   192,   195,
+          199,   202,   205,   208,   211,   214,   217,   220,
+          223,   226,   230,   233,   237,   240,   243,   247,
+          250,   253,   257,   261,   265,   269,   272,   276,
+          280,   284,   288,   292,   296,   300,   304,   309,
+          313,   317,   322,   326,   330,   335,   340,   344,
+          349,   354,   359,   364,   369,   374,   379,   384,
+          389,   395,   400,   406,   411,   417,   423,   429,
+          435,   441,   447,   454,   461,   467,   475,   482,
+          489,   497,   505,   513,   522,   530,   539,   549,
+          559,   569,   579,   590,   602,   614,   626,   640,
+          654,   668,   684,   700,   717,   736,   755,   775,
+          796,   819,   843,   869,   896,   925,   955,   988,
+         1022,  1058,  1098,  1139,  1184,  1232,  1282,  1336,
+    }, {
+            4,     9,    10,    13,    15,    17,    20,    22,
+           25,    28,    31,    34,    37,    40,    43,    47,
+           50,    53,    57,    60,    64,    68,    71,    75,
+           78,    82,    86,    90,    93,    97,   101,   105,
+          109,   113,   116,   120,   124,   128,   132,   136,
+          140,   143,   147,   151,   155,   159,   163,   166,
+          170,   174,   178,   182,   185,   189,   193,   197,
+          200,   204,   208,   212,   215,   219,   223,   226,
+          230,   233,   237,   241,   244,   248,   251,   255,
+          259,   262,   266,   269,   273,   276,   280,   283,
+          287,   290,   293,   297,   300,   304,   307,   310,
+          314,   317,   321,   324,   327,   331,   334,   337,
+          343,   350,   356,   362,   369,   375,   381,   387,
+          394,   400,   406,   412,   418,   424,   430,   436,
+          442,   448,   454,   460,   466,   472,   478,   484,
+          490,   499,   507,   516,   525,   533,   542,   550,
+          559,   567,   576,   584,   592,   601,   609,   617,
+          625,   634,   644,   655,   666,   676,   687,   698,
+          708,   718,   729,   739,   749,   759,   770,   782,
+          795,   807,   819,   831,   844,   856,   868,   880,
+          891,   906,   920,   933,   947,   961,   975,   988,
+         1001,  1015,  1030,  1045,  1061,  1076,  1090,  1105,
+         1120,  1137,  1153,  1170,  1186,  1202,  1218,  1236,
+         1253,  1271,  1288,  1306,  1323,  1342,  1361,  1379,
+         1398,  1416,  1436,  1456,  1476,  1496,  1516,  1537,
+         1559,  1580,  1601,  1624,  1647,  1670,  1692,  1717,
+         1741,  1766,  1791,  1817,  1844,  1871,  1900,  1929,
+         1958,  1990,  2021,  2054,  2088,  2123,  2159,  2197,
+         2236,  2276,  2319,  2363,  2410,  2458,  2508,  2561,
+         2616,  2675,  2737,  2802,  2871,  2944,  3020,  3102,
+         3188,  3280,  3375,  3478,  3586,  3702,  3823,  3953,
+         4089,  4236,  4394,  4559,  4737,  4929,  5130,  5347,
+    }, {
+            4,    12,    18,    25,    33,    41,    50,    60,
+           70,    80,    91,   103,   115,   127,   140,   153,
+          166,   180,   194,   208,   222,   237,   251,   266,
+          281,   296,   312,   327,   343,   358,   374,   390,
+          405,   421,   437,   453,   469,   484,   500,   516,
+          532,   548,   564,   580,   596,   611,   627,   643,
+          659,   674,   690,   706,   721,   737,   752,   768,
+          783,   798,   814,   829,   844,   859,   874,   889,
+          904,   919,   934,   949,   964,   978,   993,  1008,
+         1022,  1037,  1051,  1065,  1080,  1094,  1108,  1122,
+         1136,  1151,  1165,  1179,  1192,  1206,  1220,  1234,
+         1248,  1261,  1275,  1288,  1302,  1315,  1329,  1342,
+         1368,  1393,  1419,  1444,  1469,  1494,  1519,  1544,
+         1569,  1594,  1618,  1643,  1668,  1692,  1717,  1741,
+         1765,  1789,  1814,  1838,  1862,  1885,  1909,  1933,
+         1957,  1992,  2027,  2061,  2096,  2130,  2165,  2199,
+         2233,  2267,  2300,  2334,  2367,  2400,  2434,  2467,
+         2499,  2532,  2575,  2618,  2661,  2704,  2746,  2788,
+         2830,  2872,  2913,  2954,  2995,  3036,  3076,  3127,
+         3177,  3226,  3275,  3324,  3373,  3421,  3469,  3517,
+         3565,  3621,  3677,  3733,  3788,  3843,  3897,  3951,
+         4005,  4058,  4119,  4181,  4241,  4301,  4361,  4420,
+         4479,  4546,  4612,  4677,  4742,  4807,  4871,  4942,
+         5013,  5083,  5153,  5222,  5291,  5367,  5442,  5517,
+         5591,  5665,  5745,  5825,  5905,  5984,  6063,  6149,
+         6234,  6319,  6404,  6495,  6587,  6678,  6769,  6867,
+         6966,  7064,  7163,  7269,  7376,  7483,  7599,  7715,
+         7832,  7958,  8085,  8214,  8352,  8492,  8635,  8788,
+         8945,  9104,  9275,  9450,  9639,  9832, 10031, 10245,
+        10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409,
+        12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812,
+        16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387,
+    }
+};
+
+const int16_t ff_vp9_ac_qlookup[3][256] = {
+    {
+            4,     8,     9,    10,    11,    12,    13,    14,
+           15,    16,    17,    18,    19,    20,    21,    22,
+           23,    24,    25,    26,    27,    28,    29,    30,
+           31,    32,    33,    34,    35,    36,    37,    38,
+           39,    40,    41,    42,    43,    44,    45,    46,
+           47,    48,    49,    50,    51,    52,    53,    54,
+           55,    56,    57,    58,    59,    60,    61,    62,
+           63,    64,    65,    66,    67,    68,    69,    70,
+           71,    72,    73,    74,    75,    76,    77,    78,
+           79,    80,    81,    82,    83,    84,    85,    86,
+           87,    88,    89,    90,    91,    92,    93,    94,
+           95,    96,    97,    98,    99,   100,   101,   102,
+          104,   106,   108,   110,   112,   114,   116,   118,
+          120,   122,   124,   126,   128,   130,   132,   134,
+          136,   138,   140,   142,   144,   146,   148,   150,
+          152,   155,   158,   161,   164,   167,   170,   173,
+          176,   179,   182,   185,   188,   191,   194,   197,
+          200,   203,   207,   211,   215,   219,   223,   227,
+          231,   235,   239,   243,   247,   251,   255,   260,
+          265,   270,   275,   280,   285,   290,   295,   300,
+          305,   311,   317,   323,   329,   335,   341,   347,
+          353,   359,   366,   373,   380,   387,   394,   401,
+          408,   416,   424,   432,   440,   448,   456,   465,
+          474,   483,   492,   501,   510,   520,   530,   540,
+          550,   560,   571,   582,   593,   604,   615,   627,
+          639,   651,   663,   676,   689,   702,   715,   729,
+          743,   757,   771,   786,   801,   816,   832,   848,
+          864,   881,   898,   915,   933,   951,   969,   988,
+         1007,  1026,  1046,  1066,  1087,  1108,  1129,  1151,
+         1173,  1196,  1219,  1243,  1267,  1292,  1317,  1343,
+         1369,  1396,  1423,  1451,  1479,  1508,  1537,  1567,
+         1597,  1628,  1660,  1692,  1725,  1759,  1793,  1828,
+    }, {
+            4,     9,    11,    13,    16,    18,    21,    24,
+           27,    30,    33,    37,    40,    44,    48,    51,
+           55,    59,    63,    67,    71,    75,    79,    83,
+           88,    92,    96,   100,   105,   109,   114,   118,
+          122,   127,   131,   136,   140,   145,   149,   154,
+          158,   163,   168,   172,   177,   181,   186,   190,
+          195,   199,   204,   208,   213,   217,   222,   226,
+          231,   235,   240,   244,   249,   253,   258,   262,
+          267,   271,   275,   280,   284,   289,   293,   297,
+          302,   306,   311,   315,   319,   324,   328,   332,
+          337,   341,   345,   349,   354,   358,   362,   367,
+          371,   375,   379,   384,   388,   392,   396,   401,
+          409,   417,   425,   433,   441,   449,   458,   466,
+          474,   482,   490,   498,   506,   514,   523,   531,
+          539,   547,   555,   563,   571,   579,   588,   596,
+          604,   616,   628,   640,   652,   664,   676,   688,
+          700,   713,   725,   737,   749,   761,   773,   785,
+          797,   809,   825,   841,   857,   873,   889,   905,
+          922,   938,   954,   970,   986,  1002,  1018,  1038,
+         1058,  1078,  1098,  1118,  1138,  1158,  1178,  1198,
+         1218,  1242,  1266,  1290,  1314,  1338,  1362,  1386,
+         1411,  1435,  1463,  1491,  1519,  1547,  1575,  1603,
+         1631,  1663,  1695,  1727,  1759,  1791,  1823,  1859,
+         1895,  1931,  1967,  2003,  2039,  2079,  2119,  2159,
+         2199,  2239,  2283,  2327,  2371,  2415,  2459,  2507,
+         2555,  2603,  2651,  2703,  2755,  2807,  2859,  2915,
+         2971,  3027,  3083,  3143,  3203,  3263,  3327,  3391,
+         3455,  3523,  3591,  3659,  3731,  3803,  3876,  3952,
+         4028,  4104,  4184,  4264,  4348,  4432,  4516,  4604,
+         4692,  4784,  4876,  4972,  5068,  5168,  5268,  5372,
+         5476,  5584,  5692,  5804,  5916,  6032,  6148,  6268,
+         6388,  6512,  6640,  6768,  6900,  7036,  7172,  7312,
+    }, {
+            4,    13,    19,    27,    35,    44,    54,    64,
+           75,    87,    99,   112,   126,   139,   154,   168,
+          183,   199,   214,   230,   247,   263,   280,   297,
+          314,   331,   349,   366,   384,   402,   420,   438,
+          456,   475,   493,   511,   530,   548,   567,   586,
+          604,   623,   642,   660,   679,   698,   716,   735,
+          753,   772,   791,   809,   828,   846,   865,   884,
+          902,   920,   939,   957,   976,   994,  1012,  1030,
+         1049,  1067,  1085,  1103,  1121,  1139,  1157,  1175,
+         1193,  1211,  1229,  1246,  1264,  1282,  1299,  1317,
+         1335,  1352,  1370,  1387,  1405,  1422,  1440,  1457,
+         1474,  1491,  1509,  1526,  1543,  1560,  1577,  1595,
+         1627,  1660,  1693,  1725,  1758,  1791,  1824,  1856,
+         1889,  1922,  1954,  1987,  2020,  2052,  2085,  2118,
+         2150,  2183,  2216,  2248,  2281,  2313,  2346,  2378,
+         2411,  2459,  2508,  2556,  2605,  2653,  2701,  2750,
+         2798,  2847,  2895,  2943,  2992,  3040,  3088,  3137,
+         3185,  3234,  3298,  3362,  3426,  3491,  3555,  3619,
+         3684,  3748,  3812,  3876,  3941,  4005,  4069,  4149,
+         4230,  4310,  4390,  4470,  4550,  4631,  4711,  4791,
+         4871,  4967,  5064,  5160,  5256,  5352,  5448,  5544,
+         5641,  5737,  5849,  5961,  6073,  6185,  6297,  6410,
+         6522,  6650,  6778,  6906,  7034,  7162,  7290,  7435,
+         7579,  7723,  7867,  8011,  8155,  8315,  8475,  8635,
+         8795,  8956,  9132,  9308,  9484,  9660,  9836, 10028,
+        10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661,
+        11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565,
+        13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806,
+        16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414,
+        18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486,
+        21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070,
+        25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247,
+    }
+};
+
+const enum TxfmType ff_vp9_intra_txfm_type[14] = {
+    [VERT_PRED]            = ADST_DCT,
+    [HOR_PRED]             = DCT_ADST,
+    [DC_PRED]              = DCT_DCT,
+    [DIAG_DOWN_LEFT_PRED]  = DCT_DCT,
+    [DIAG_DOWN_RIGHT_PRED] = ADST_ADST,
+    [VERT_RIGHT_PRED]      = ADST_DCT,
+    [HOR_DOWN_PRED]        = DCT_ADST,
+    [VERT_LEFT_PRED]       = ADST_DCT,
+    [HOR_UP_PRED]          = DCT_ADST,
+    [TM_VP8_PRED]          = ADST_ADST,
+    [NEARESTMV]            = DCT_DCT,
+    [NEARMV]               = DCT_DCT,
+    [ZEROMV]               = DCT_DCT,
+    [NEWMV]                = DCT_DCT,
+};
+
+const int16_t ff_vp9_default_scan_4x4[16] = {
+     0,  1,  4,  5,
+     2,  8,  3,  6,
+    12,  9,  7, 10,
+    13, 11, 14, 15,
+};
+
+const int16_t ff_vp9_col_scan_4x4[16] = {
+     0,  1,  2,  4,
+     3,  5,  6,  8,
+     7,  9, 10, 12,
+    13, 11, 14, 15,
+};
+
+const int16_t ff_vp9_row_scan_4x4[16] = {
+     0,  4,  1,  8,
+     5, 12,  9,  2,
+     6, 13,  3, 10,
+     7, 14, 11, 15,
+};
+
+const int16_t ff_vp9_default_scan_8x8[64] = {
+     0,  1,  8,  2,  9, 16, 10,  3,
+    17, 24, 18, 11,  4, 25, 32, 19,
+    12, 26,  5, 33, 20, 27, 40, 13,
+    34,  6, 41, 28, 21, 35, 42, 48,
+    14,  7, 36, 29, 43, 56, 49, 22,
+    15, 37, 50, 44, 57, 30, 23, 51,
+    45, 58, 38, 31, 52, 59, 39, 46,
+    53, 60, 47, 54, 61, 55, 62, 63,
+};
+
+const int16_t ff_vp9_col_scan_8x8[64] = {
+     0,  1,  2,  8,  3,  9,  4, 10,
+    16,  5, 11, 17, 12, 18,  6, 24,
+    19, 13, 25,  7, 26, 20, 32, 14,
+    27, 21, 33, 28, 34, 15, 22, 35,
+    40, 29, 41, 36, 23, 30, 42, 37,
+    48, 43, 31, 44, 49, 38, 50, 56,
+    45, 39, 51, 57, 52, 46, 58, 53,
+    59, 47, 60, 54, 61, 55, 62, 63,
+};
+
+const int16_t ff_vp9_row_scan_8x8[64] = {
+     0,  8, 16,  1,  9, 24,  2, 17,
+    32, 10, 25,  3, 40, 18, 11, 33,
+    26, 19,  4, 48, 41, 34, 12, 27,
+    56, 20,  5, 42, 35, 13, 49, 28,
+     6, 21, 43, 36, 14, 50, 29, 57,
+     7, 44, 22, 37, 51, 15, 58, 30,
+    23, 45, 52, 38, 59, 31, 46, 53,
+    39, 60, 47, 61, 54, 62, 55, 63,
+};
+
+const int16_t ff_vp9_default_scan_16x16[256] = {
+      0,   1,  16,   2,  17,  32,   3,  18,  33,  48,   4,  34,  19,  49,  20,   5,
+     35,  64,  50,  36,  65,  21,   6,  51,  80,  66,  37,  22,  52,   7,  81,  67,
+     38,  82,  53,  23,  96,  68,   8,  83,  97,  54,  39,  69, 112,  24,  98,  84,
+     70,  55,   9,  40,  85,  99, 113, 128,  25, 114, 100,  71,  86,  56,  10,  41,
+    115, 101, 129, 116,  72,  87,  26, 130, 144, 102,  57,  11,  42, 117, 131, 145,
+     88, 103,  27,  73, 132, 118, 146,  58, 160,  12,  43, 133, 147, 104,  89, 119,
+    161,  74, 148, 134,  28, 162,  59,  13, 176, 120, 149,  90, 135, 105, 163,  44,
+     75, 177, 164,  29, 150, 121, 136, 178, 165,  14, 106,  60,  91, 151,  45, 179,
+    192, 137, 166, 122,  76, 180, 152,  30,  61,  15, 107, 167, 181, 193,  92, 208,
+     46, 138, 123, 153, 194,  77, 168, 182,  31, 195, 209, 183, 108, 139,  62, 154,
+     47, 196,  93, 169, 210, 197, 224, 124, 184, 211,  78, 109, 170, 155,  63, 198,
+    212, 185, 225, 240, 140,  94, 199, 125,  79, 213, 226, 171, 186, 156, 214, 200,
+    110, 227, 141,  95, 241, 215, 228, 201, 126, 242, 187, 172, 157, 229, 111, 216,
+    243, 142, 202, 230, 127, 217, 244, 173, 188, 231, 158, 203, 143, 245, 218, 232,
+    189, 246, 159, 174, 233, 247, 219, 204, 175, 190, 248, 234, 205, 220, 249, 191,
+    235, 221, 250, 206, 222, 251, 236, 207, 237, 223, 252, 238, 253, 239, 254, 255,
+};
+
+const int16_t ff_vp9_col_scan_16x16[256] = {
+      0,   1,   2,   3,  16,   4,  17,   5,  18,   6,  19,  32,  20,   7,  33,  21,
+     34,   8,  35,  22,  48,  36,   9,  49,  23,  50,  37,  10,  38,  51,  24,  64,
+     52,  11,  65,  39,  25,  53,  66,  54,  40,  67,  12,  80,  26,  68,  55,  81,
+     41,  69,  13,  27,  82,  56,  70,  83,  42,  14,  84,  96,  71,  28,  57,  85,
+     97,  15,  72,  98,  43,  86,  58,  99,  29,  87, 100, 112,  73,  44, 101,  59,
+     30, 113,  88, 114,  74, 128, 102,  45,  31, 115,  60, 103,  89, 116,  75, 129,
+    117,  46, 104,  90,  61, 130, 118, 131, 132, 105,  76,  47, 119, 144,  91,  62,
+    133, 106, 145, 120, 146, 134,  77, 147, 121,  92, 135, 148,  63, 107, 136, 122,
+     93, 149, 160,  78, 150, 137, 108, 161, 162, 151, 123,  79, 138, 163, 152,  94,
+    164, 109, 165, 153, 124, 139, 176, 166,  95, 177, 167, 110, 154, 178, 125, 179,
+    140, 168, 155, 111, 180, 192, 181, 169, 141, 126, 182, 193, 194, 156, 183, 170,
+    195, 127, 142, 196, 184, 208, 197, 157, 171, 143, 185, 198, 209, 199, 210, 172,
+    158, 186, 211, 224, 212, 200, 240, 159, 213, 225, 187, 201, 173, 226, 214, 215,
+    227, 202, 228, 188, 241, 216, 174, 229, 242, 203, 243, 217, 230, 175, 189, 244,
+    231, 204, 218, 232, 245, 219, 246, 190, 233, 205, 191, 247, 234, 248, 220, 206,
+    249, 235, 221, 207, 250, 236, 222, 251, 223, 237, 238, 252, 239, 253, 254, 255,
+};
+
+const int16_t ff_vp9_row_scan_16x16[256] = {
+      0,  16,  32,   1,  48,  17,  64,  33,   2,  80,  18,  49,  96,  34,   3,  65,
+     19, 112,  50,  81,  35,   4, 128,  66,  20,  97,  51,  82,   5, 144,  36,  67,
+    113,  98,  21,  52, 160,  83, 129,  37,  68,   6, 114, 176,  99,  53,  22,  84,
+    145,  38,  69, 130,   7, 115, 192, 100,  54,  23,  85, 161, 146, 131,  39,  70,
+    208, 116,   8, 101, 177,  55,  86,  24, 162, 147, 132,  71, 224, 117,  40, 102,
+      9, 148,  56,  87, 193, 163, 240, 133, 178,  25, 118,  72,  41, 103, 164,  10,
+    149,  88, 134, 209, 179,  57, 119, 194,  26,  73, 165, 150, 104,  42, 135,  11,
+    180, 120,  89, 225, 195,  58,  27, 210, 151, 181, 166,  74,  43, 105,  12, 136,
+     90,  59, 241, 121,  28, 196, 167, 211, 152,  44, 182, 137,  75,  13, 226, 106,
+    122,  60, 197,  91, 168,  29, 183, 153,  14,  76, 212, 138,  45, 107,  15, 198,
+     92, 227, 169,  30, 123, 154,  61, 242, 184, 213, 139,  46,  77,  31, 108, 170,
+    199, 185, 124, 228,  93, 155, 214,  62, 140, 243,  78,  47, 200, 109, 186, 171,
+    201,  94,  63, 215, 229, 156,  79, 125, 141, 110, 216, 187, 172, 244, 202, 230,
+    217,  95, 157, 126, 245, 111, 142, 231, 188, 127, 158, 218, 173, 232, 246, 233,
+    203, 143, 247, 174, 189, 159, 219, 204, 248, 234, 249, 175, 190, 220, 205, 250,
+    235, 191, 221, 251, 236, 206, 252, 222, 207, 237, 223, 253, 238, 254, 239, 255,
+};
+
+const int16_t ff_vp9_default_scan_32x32[1024] = {
+       0,    1,   32,    2,   33,   64,    3,   34,   65,    4,   96,   35,   66,    5,   36,   97,   67,  128,   98,   68,   37,    6,  129,   99,    7,  160,   69,   38,  130,  100,  161,  131,
+      39,   70,    8,  101,  162,  132,  192,   71,   40,    9,  102,  163,  133,  193,   72,  224,  103,   41,  164,   10,  194,  134,  165,   73,  104,  135,  225,   42,  195,   11,  256,  166,
+     226,  196,   74,  105,  136,   43,   12,  167,  197,  227,  257,   75,  106,  137,  228,   44,  198,  168,  258,  288,   13,  229,   76,  107,  199,  138,  259,  169,  289,   45,  230,  260,
+     200,  108,   14,  170,  139,  320,  290,   77,  231,  261,   46,  201,  140,  291,  109,  232,  321,  262,  171,   78,  292,   15,  322,  202,  263,  352,  172,  293,  233,  141,  323,  110,
+      47,  203,  264,  234,  294,  353,  324,   16,   79,  204,  265,  295,  325,  173,  354,  142,  235,  384,   48,  296,  111,  266,  355,  326,   80,   17,  205,  236,  174,  356,  385,  327,
+     143,  297,  267,  357,  386,  112,   49,  328,  298,  206,  416,  237,  358,  387,   81,  175,   18,  329,  359,  388,  299,  330,  389,  113,  417,  238,  360,   50,  207,  418,  390,  331,
+      19,  448,  361,   82,  419,  391,  239,   51,  362,  420,  114,  449,  480,  421,   83,  363,  450,  422,  512,  451,  423,  115,  452,  481,  453,  482,  454,  544,  483,  455,  513,  484,
+     514,  485,  515,  486,  545,  576,  487,  546,  547,  608,  577,  578,  579,  609,  610,  611,   20,  144,  268,  392,  516,  640,   21,   52,  145,  176,  269,  300,  393,  424,  517,  548,
+     641,  672,   22,   53,   84,  146,  177,  208,  270,  301,  332,  394,  425,  456,  518,  549,  580,  642,  673,  704,   23,   54,   85,  116,  147,  178,  209,  240,  271,  302,  333,  364,
+     395,  426,  457,  488,  519,  550,  581,  612,  643,  674,  705,  736,   55,   86,  117,  179,  210,  241,  303,  334,  365,  427,  458,  489,  551,  582,  613,  675,  706,  737,   87,  118,
+     211,  242,  335,  366,  459,  490,  583,  614,  707,  738,  119,  243,  367,  491,  615,  739,   24,  148,  272,  396,  520,  644,  768,   25,   56,  149,  180,  273,  304,  397,  428,  521,
+     552,  645,  676,  769,  800,   26,   57,   88,  150,  181,  212,  274,  305,  336,  398,  429,  460,  522,  553,  584,  646,  677,  708,  770,  801,  832,   27,   58,   89,  120,  151,  182,
+     213,  244,  275,  306,  337,  368,  399,  430,  461,  492,  523,  554,  585,  616,  647,  678,  709,  740,  771,  802,  833,  864,   59,   90,  121,  183,  214,  245,  307,  338,  369,  431,
+     462,  493,  555,  586,  617,  679,  710,  741,  803,  834,  865,   91,  122,  215,  246,  339,  370,  463,  494,  587,  618,  711,  742,  835,  866,  123,  247,  371,  495,  619,  743,  867,
+      28,  152,  276,  400,  524,  648,  772,  896,   29,   60,  153,  184,  277,  308,  401,  432,  525,  556,  649,  680,  773,  804,  897,  928,   30,   61,   92,  154,  185,  216,  278,  309,
+     340,  402,  433,  464,  526,  557,  588,  650,  681,  712,  774,  805,  836,  898,  929,  960,   31,   62,   93,  124,  155,  186,  217,  248,  279,  310,  341,  372,  403,  434,  465,  496,
+     527,  558,  589,  620,  651,  682,  713,  744,  775,  806,  837,  868,  899,  930,  961,  992,   63,   94,  125,  187,  218,  249,  311,  342,  373,  435,  466,  497,  559,  590,  621,  683,
+     714,  745,  807,  838,  869,  931,  962,  993,   95,  126,  219,  250,  343,  374,  467,  498,  591,  622,  715,  746,  839,  870,  963,  994,  127,  251,  375,  499,  623,  747,  871,  995,
+     156,  280,  404,  528,  652,  776,  900,  157,  188,  281,  312,  405,  436,  529,  560,  653,  684,  777,  808,  901,  932,  158,  189,  220,  282,  313,  344,  406,  437,  468,  530,  561,
+     592,  654,  685,  716,  778,  809,  840,  902,  933,  964,  159,  190,  221,  252,  283,  314,  345,  376,  407,  438,  469,  500,  531,  562,  593,  624,  655,  686,  717,  748,  779,  810,
+     841,  872,  903,  934,  965,  996,  191,  222,  253,  315,  346,  377,  439,  470,  501,  563,  594,  625,  687,  718,  749,  811,  842,  873,  935,  966,  997,  223,  254,  347,  378,  471,
+     502,  595,  626,  719,  750,  843,  874,  967,  998,  255,  379,  503,  627,  751,  875,  999,  284,  408,  532,  656,  780,  904,  285,  316,  409,  440,  533,  564,  657,  688,  781,  812,
+     905,  936,  286,  317,  348,  410,  441,  472,  534,  565,  596,  658,  689,  720,  782,  813,  844,  906,  937,  968,  287,  318,  349,  380,  411,  442,  473,  504,  535,  566,  597,  628,
+     659,  690,  721,  752,  783,  814,  845,  876,  907,  938,  969, 1000,  319,  350,  381,  443,  474,  505,  567,  598,  629,  691,  722,  753,  815,  846,  877,  939,  970, 1001,  351,  382,
+     475,  506,  599,  630,  723,  754,  847,  878,  971, 1002,  383,  507,  631,  755,  879, 1003,  412,  536,  660,  784,  908,  413,  444,  537,  568,  661,  692,  785,  816,  909,  940,  414,
+     445,  476,  538,  569,  600,  662,  693,  724,  786,  817,  848,  910,  941,  972,  415,  446,  477,  508,  539,  570,  601,  632,  663,  694,  725,  756,  787,  818,  849,  880,  911,  942,
+     973, 1004,  447,  478,  509,  571,  602,  633,  695,  726,  757,  819,  850,  881,  943,  974, 1005,  479,  510,  603,  634,  727,  758,  851,  882,  975, 1006,  511,  635,  759,  883, 1007,
+     540,  664,  788,  912,  541,  572,  665,  696,  789,  820,  913,  944,  542,  573,  604,  666,  697,  728,  790,  821,  852,  914,  945,  976,  543,  574,  605,  636,  667,  698,  729,  760,
+     791,  822,  853,  884,  915,  946,  977, 1008,  575,  606,  637,  699,  730,  761,  823,  854,  885,  947,  978, 1009,  607,  638,  731,  762,  855,  886,  979, 1010,  639,  763,  887, 1011,
+     668,  792,  916,  669,  700,  793,  824,  917,  948,  670,  701,  732,  794,  825,  856,  918,  949,  980,  671,  702,  733,  764,  795,  826,  857,  888,  919,  950,  981, 1012,  703,  734,
+     765,  827,  858,  889,  951,  982, 1013,  735,  766,  859,  890,  983, 1014,  767,  891, 1015,  796,  920,  797,  828,  921,  952,  798,  829,  860,  922,  953,  984,  799,  830,  861,  892,
+     923,  954,  985, 1016,  831,  862,  893,  955,  986, 1017,  863,  894,  987, 1018,  895, 1019,  924,  925,  956,  926,  957,  988,  927,  958,  989, 1020,  959,  990, 1021,  991, 1022, 1023,
+};
+
+const int16_t * const ff_vp9_scans[5][4] = {
+    {
+        ff_vp9_default_scan_4x4, ff_vp9_col_scan_4x4,
+        ff_vp9_row_scan_4x4, ff_vp9_default_scan_4x4
+    }, {
+        ff_vp9_default_scan_8x8, ff_vp9_col_scan_8x8,
+        ff_vp9_row_scan_8x8, ff_vp9_default_scan_8x8
+    }, {
+        ff_vp9_default_scan_16x16, ff_vp9_col_scan_16x16,
+        ff_vp9_row_scan_16x16, ff_vp9_default_scan_16x16
+    }, {
+        ff_vp9_default_scan_32x32, ff_vp9_default_scan_32x32,
+        ff_vp9_default_scan_32x32, ff_vp9_default_scan_32x32
+    }, { // lossless
+        ff_vp9_default_scan_4x4, ff_vp9_default_scan_4x4,
+        ff_vp9_default_scan_4x4, ff_vp9_default_scan_4x4
+    }
+};
+
+const int16_t ff_vp9_default_scan_4x4_nb[16][2] = {
+    {  0,  0 }, {  0,  0 }, {  4,  1 }, {  1,  1 },
+    {  4,  4 }, {  2,  2 }, {  5,  2 }, {  8,  8 },
+    {  8,  5 }, {  6,  3 }, {  9,  6 }, { 12,  9 },
+    { 10,  7 }, { 13, 10 }, { 14, 11 }, {  0,  0 },
+};
+
+const int16_t ff_vp9_col_scan_4x4_nb[16][2] = {
+    {  0,  0 }, {  1,  1 }, {  0,  0 }, {  2,  2 },
+    {  4,  4 }, {  5,  5 }, {  4,  4 }, {  6,  6 },
+    {  8,  8 }, {  9,  9 }, {  8,  8 }, { 12, 12 },
+    { 10, 10 }, { 13, 13 }, { 14, 14 }, {  0,  0 },
+};
+
+const int16_t ff_vp9_row_scan_4x4_nb[16][2] = {
+    {  0,  0 }, {  0,  0 }, {  4,  4 }, {  1,  1 },
+    {  8,  8 }, {  5,  5 }, {  1,  1 }, {  2,  2 },
+    {  9,  9 }, {  2,  2 }, {  6,  6 }, {  3,  3 },
+    { 10, 10 }, {  7,  7 }, { 11, 11 }, {  0,  0 },
+};
+
+const int16_t ff_vp9_default_scan_8x8_nb[64][2] = {
+    {  0,  0 }, {  0,  0 }, {  1,  1 }, {  8,  1 },
+    {  8,  8 }, {  9,  2 }, {  2,  2 }, { 16,  9 },
+    { 16, 16 }, { 17, 10 }, { 10,  3 }, {  3,  3 },
+    { 24, 17 }, { 24, 24 }, { 18, 11 }, { 11,  4 },
+    { 25, 18 }, {  4,  4 }, { 32, 25 }, { 19, 12 },
+    { 26, 19 }, { 32, 32 }, { 12,  5 }, { 33, 26 },
+    {  5,  5 }, { 40, 33 }, { 27, 20 }, { 20, 13 },
+    { 34, 27 }, { 41, 34 }, { 40, 40 }, { 13,  6 },
+    {  6,  6 }, { 35, 28 }, { 28, 21 }, { 42, 35 },
+    { 48, 48 }, { 48, 41 }, { 21, 14 }, { 14,  7 },
+    { 36, 29 }, { 49, 42 }, { 43, 36 }, { 56, 49 },
+    { 29, 22 }, { 22, 15 }, { 50, 43 }, { 44, 37 },
+    { 57, 50 }, { 37, 30 }, { 30, 23 }, { 51, 44 },
+    { 58, 51 }, { 38, 31 }, { 45, 38 }, { 52, 45 },
+    { 59, 52 }, { 46, 39 }, { 53, 46 }, { 60, 53 },
+    { 54, 47 }, { 61, 54 }, { 62, 55 }, {  0,  0 },
+};
+
+const int16_t ff_vp9_col_scan_8x8_nb[64][2] = {
+    {  0,  0 }, {  1,  1 }, {  0,  0 }, {  2,  2 },
+    {  8,  8 }, {  3,  3 }, {  9,  9 }, {  8,  8 },
+    {  4,  4 }, { 10, 10 }, { 16, 16 }, { 11, 11 },
+    { 17, 17 }, {  5,  5 }, { 16, 16 }, { 18, 18 },
+    { 12, 12 }, { 24, 24 }, {  6,  6 }, { 25, 25 },
+    { 19, 19 }, { 24, 24 }, { 13, 13 }, { 26, 26 },
+    { 20, 20 }, { 32, 32 }, { 27, 27 }, { 33, 33 },
+    { 14, 14 }, { 21, 21 }, { 34, 34 }, { 32, 32 },
+    { 28, 28 }, { 40, 40 }, { 35, 35 }, { 22, 22 },
+    { 29, 29 }, { 41, 41 }, { 36, 36 }, { 40, 40 },
+    { 42, 42 }, { 30, 30 }, { 43, 43 }, { 48, 48 },
+    { 37, 37 }, { 49, 49 }, { 48, 48 }, { 44, 44 },
+    { 38, 38 }, { 50, 50 }, { 56, 56 }, { 51, 51 },
+    { 45, 45 }, { 57, 57 }, { 52, 52 }, { 58, 58 },
+    { 46, 46 }, { 59, 59 }, { 53, 53 }, { 60, 60 },
+    { 54, 54 }, { 61, 61 }, { 62, 62 }, {  0,  0 },
+};
+
+const int16_t ff_vp9_row_scan_8x8_nb[64][2] = {
+    {  0,  0 }, {  8,  8 }, {  0,  0 }, {  1,  1 },
+    { 16, 16 }, {  1,  1 }, {  9,  9 }, { 24, 24 },
+    {  2,  2 }, { 17, 17 }, {  2,  2 }, { 32, 32 },
+    { 10, 10 }, {  3,  3 }, { 25, 25 }, { 18, 18 },
+    { 11, 11 }, {  3,  3 }, { 40, 40 }, { 33, 33 },
+    { 26, 26 }, {  4,  4 }, { 19, 19 }, { 48, 48 },
+    { 12, 12 }, {  4,  4 }, { 34, 34 }, { 27, 27 },
+    {  5,  5 }, { 41, 41 }, { 20, 20 }, {  5,  5 },
+    { 13, 13 }, { 35, 35 }, { 28, 28 }, {  6,  6 },
+    { 42, 42 }, { 21, 21 }, { 49, 49 }, {  6,  6 },
+    { 36, 36 }, { 14, 14 }, { 29, 29 }, { 43, 43 },
+    {  7,  7 }, { 50, 50 }, { 22, 22 }, { 15, 15 },
+    { 37, 37 }, { 44, 44 }, { 30, 30 }, { 51, 51 },
+    { 23, 23 }, { 38, 38 }, { 45, 45 }, { 31, 31 },
+    { 52, 52 }, { 39, 39 }, { 53, 53 }, { 46, 46 },
+    { 54, 54 }, { 47, 47 }, { 55, 55 }, {  0,  0 },
+};
+
+const int16_t ff_vp9_default_scan_16x16_nb[256][2] = {
+    {   0,   0 }, {   0,   0 }, {   1,   1 }, {  16,   1 },
+    {  16,  16 }, {   2,   2 }, {  17,   2 }, {  32,  17 },
+    {  32,  32 }, {   3,   3 }, {  33,  18 }, {  18,   3 },
+    {  48,  33 }, {  19,   4 }, {   4,   4 }, {  34,  19 },
+    {  48,  48 }, {  49,  34 }, {  35,  20 }, {  64,  49 },
+    {  20,   5 }, {   5,   5 }, {  50,  35 }, {  64,  64 },
+    {  65,  50 }, {  36,  21 }, {  21,   6 }, {  51,  36 },
+    {   6,   6 }, {  80,  65 }, {  66,  51 }, {  37,  22 },
+    {  81,  66 }, {  52,  37 }, {  22,   7 }, {  80,  80 },
+    {  67,  52 }, {   7,   7 }, {  82,  67 }, {  96,  81 },
+    {  53,  38 }, {  38,  23 }, {  68,  53 }, {  96,  96 },
+    {  23,   8 }, {  97,  82 }, {  83,  68 }, {  69,  54 },
+    {  54,  39 }, {   8,   8 }, {  39,  24 }, {  84,  69 },
+    {  98,  83 }, { 112,  97 }, { 112, 112 }, {  24,   9 },
+    { 113,  98 }, {  99,  84 }, {  70,  55 }, {  85,  70 },
+    {  55,  40 }, {   9,   9 }, {  40,  25 }, { 114,  99 },
+    { 100,  85 }, { 128, 113 }, { 115, 100 }, {  71,  56 },
+    {  86,  71 }, {  25,  10 }, { 129, 114 }, { 128, 128 },
+    { 101,  86 }, {  56,  41 }, {  10,  10 }, {  41,  26 },
+    { 116, 101 }, { 130, 115 }, { 144, 129 }, {  87,  72 },
+    { 102,  87 }, {  26,  11 }, {  72,  57 }, { 131, 116 },
+    { 117, 102 }, { 145, 130 }, {  57,  42 }, { 144, 144 },
+    {  11,  11 }, {  42,  27 }, { 132, 117 }, { 146, 131 },
+    { 103,  88 }, {  88,  73 }, { 118, 103 }, { 160, 145 },
+    {  73,  58 }, { 147, 132 }, { 133, 118 }, {  27,  12 },
+    { 161, 146 }, {  58,  43 }, {  12,  12 }, { 160, 160 },
+    { 119, 104 }, { 148, 133 }, {  89,  74 }, { 134, 119 },
+    { 104,  89 }, { 162, 147 }, {  43,  28 }, {  74,  59 },
+    { 176, 161 }, { 163, 148 }, {  28,  13 }, { 149, 134 },
+    { 120, 105 }, { 135, 120 }, { 177, 162 }, { 164, 149 },
+    {  13,  13 }, { 105,  90 }, {  59,  44 }, {  90,  75 },
+    { 150, 135 }, {  44,  29 }, { 178, 163 }, { 176, 176 },
+    { 136, 121 }, { 165, 150 }, { 121, 106 }, {  75,  60 },
+    { 179, 164 }, { 151, 136 }, {  29,  14 }, {  60,  45 },
+    {  14,  14 }, { 106,  91 }, { 166, 151 }, { 180, 165 },
+    { 192, 177 }, {  91,  76 }, { 192, 192 }, {  45,  30 },
+    { 137, 122 }, { 122, 107 }, { 152, 137 }, { 193, 178 },
+    {  76,  61 }, { 167, 152 }, { 181, 166 }, {  30,  15 },
+    { 194, 179 }, { 208, 193 }, { 182, 167 }, { 107,  92 },
+    { 138, 123 }, {  61,  46 }, { 153, 138 }, {  46,  31 },
+    { 195, 180 }, {  92,  77 }, { 168, 153 }, { 209, 194 },
+    { 196, 181 }, { 208, 208 }, { 123, 108 }, { 183, 168 },
+    { 210, 195 }, {  77,  62 }, { 108,  93 }, { 169, 154 },
+    { 154, 139 }, {  62,  47 }, { 197, 182 }, { 211, 196 },
+    { 184, 169 }, { 224, 209 }, { 224, 224 }, { 139, 124 },
+    {  93,  78 }, { 198, 183 }, { 124, 109 }, {  78,  63 },
+    { 212, 197 }, { 225, 210 }, { 170, 155 }, { 185, 170 },
+    { 155, 140 }, { 213, 198 }, { 199, 184 }, { 109,  94 },
+    { 226, 211 }, { 140, 125 }, {  94,  79 }, { 240, 225 },
+    { 214, 199 }, { 227, 212 }, { 200, 185 }, { 125, 110 },
+    { 241, 226 }, { 186, 171 }, { 171, 156 }, { 156, 141 },
+    { 228, 213 }, { 110,  95 }, { 215, 200 }, { 242, 227 },
+    { 141, 126 }, { 201, 186 }, { 229, 214 }, { 126, 111 },
+    { 216, 201 }, { 243, 228 }, { 172, 157 }, { 187, 172 },
+    { 230, 215 }, { 157, 142 }, { 202, 187 }, { 142, 127 },
+    { 244, 229 }, { 217, 202 }, { 231, 216 }, { 188, 173 },
+    { 245, 230 }, { 158, 143 }, { 173, 158 }, { 232, 217 },
+    { 246, 231 }, { 218, 203 }, { 203, 188 }, { 174, 159 },
+    { 189, 174 }, { 247, 232 }, { 233, 218 }, { 204, 189 },
+    { 219, 204 }, { 248, 233 }, { 190, 175 }, { 234, 219 },
+    { 220, 205 }, { 249, 234 }, { 205, 190 }, { 221, 206 },
+    { 250, 235 }, { 235, 220 }, { 206, 191 }, { 236, 221 },
+    { 222, 207 }, { 251, 236 }, { 237, 222 }, { 252, 237 },
+    { 238, 223 }, { 253, 238 }, { 254, 239 }, {   0,   0 },
+};
+
+const int16_t ff_vp9_col_scan_16x16_nb[256][2] = {
+    {   0,   0 }, {   1,   1 }, {   2,   2 }, {   0,   0 },
+    {   3,   3 }, {  16,  16 }, {   4,   4 }, {  17,  17 },
+    {   5,   5 }, {  18,  18 }, {  16,  16 }, {  19,  19 },
+    {   6,   6 }, {  32,  32 }, {  20,  20 }, {  33,  33 },
+    {   7,   7 }, {  34,  34 }, {  21,  21 }, {  32,  32 },
+    {  35,  35 }, {   8,   8 }, {  48,  48 }, {  22,  22 },
+    {  49,  49 }, {  36,  36 }, {   9,   9 }, {  37,  37 },
+    {  50,  50 }, {  23,  23 }, {  48,  48 }, {  51,  51 },
+    {  10,  10 }, {  64,  64 }, {  38,  38 }, {  24,  24 },
+    {  52,  52 }, {  65,  65 }, {  53,  53 }, {  39,  39 },
+    {  66,  66 }, {  11,  11 }, {  64,  64 }, {  25,  25 },
+    {  67,  67 }, {  54,  54 }, {  80,  80 }, {  40,  40 },
+    {  68,  68 }, {  12,  12 }, {  26,  26 }, {  81,  81 },
+    {  55,  55 }, {  69,  69 }, {  82,  82 }, {  41,  41 },
+    {  13,  13 }, {  83,  83 }, {  80,  80 }, {  70,  70 },
+    {  27,  27 }, {  56,  56 }, {  84,  84 }, {  96,  96 },
+    {  14,  14 }, {  71,  71 }, {  97,  97 }, {  42,  42 },
+    {  85,  85 }, {  57,  57 }, {  98,  98 }, {  28,  28 },
+    {  86,  86 }, {  99,  99 }, {  96,  96 }, {  72,  72 },
+    {  43,  43 }, { 100, 100 }, {  58,  58 }, {  29,  29 },
+    { 112, 112 }, {  87,  87 }, { 113, 113 }, {  73,  73 },
+    { 112, 112 }, { 101, 101 }, {  44,  44 }, {  30,  30 },
+    { 114, 114 }, {  59,  59 }, { 102, 102 }, {  88,  88 },
+    { 115, 115 }, {  74,  74 }, { 128, 128 }, { 116, 116 },
+    {  45,  45 }, { 103, 103 }, {  89,  89 }, {  60,  60 },
+    { 129, 129 }, { 117, 117 }, { 130, 130 }, { 131, 131 },
+    { 104, 104 }, {  75,  75 }, {  46,  46 }, { 118, 118 },
+    { 128, 128 }, {  90,  90 }, {  61,  61 }, { 132, 132 },
+    { 105, 105 }, { 144, 144 }, { 119, 119 }, { 145, 145 },
+    { 133, 133 }, {  76,  76 }, { 146, 146 }, { 120, 120 },
+    {  91,  91 }, { 134, 134 }, { 147, 147 }, {  62,  62 },
+    { 106, 106 }, { 135, 135 }, { 121, 121 }, {  92,  92 },
+    { 148, 148 }, { 144, 144 }, {  77,  77 }, { 149, 149 },
+    { 136, 136 }, { 107, 107 }, { 160, 160 }, { 161, 161 },
+    { 150, 150 }, { 122, 122 }, {  78,  78 }, { 137, 137 },
+    { 162, 162 }, { 151, 151 }, {  93,  93 }, { 163, 163 },
+    { 108, 108 }, { 164, 164 }, { 152, 152 }, { 123, 123 },
+    { 138, 138 }, { 160, 160 }, { 165, 165 }, {  94,  94 },
+    { 176, 176 }, { 166, 166 }, { 109, 109 }, { 153, 153 },
+    { 177, 177 }, { 124, 124 }, { 178, 178 }, { 139, 139 },
+    { 167, 167 }, { 154, 154 }, { 110, 110 }, { 179, 179 },
+    { 176, 176 }, { 180, 180 }, { 168, 168 }, { 140, 140 },
+    { 125, 125 }, { 181, 181 }, { 192, 192 }, { 193, 193 },
+    { 155, 155 }, { 182, 182 }, { 169, 169 }, { 194, 194 },
+    { 126, 126 }, { 141, 141 }, { 195, 195 }, { 183, 183 },
+    { 192, 192 }, { 196, 196 }, { 156, 156 }, { 170, 170 },
+    { 142, 142 }, { 184, 184 }, { 197, 197 }, { 208, 208 },
+    { 198, 198 }, { 209, 209 }, { 171, 171 }, { 157, 157 },
+    { 185, 185 }, { 210, 210 }, { 208, 208 }, { 211, 211 },
+    { 199, 199 }, { 224, 224 }, { 158, 158 }, { 212, 212 },
+    { 224, 224 }, { 186, 186 }, { 200, 200 }, { 172, 172 },
+    { 225, 225 }, { 213, 213 }, { 214, 214 }, { 226, 226 },
+    { 201, 201 }, { 227, 227 }, { 187, 187 }, { 240, 240 },
+    { 215, 215 }, { 173, 173 }, { 228, 228 }, { 241, 241 },
+    { 202, 202 }, { 242, 242 }, { 216, 216 }, { 229, 229 },
+    { 174, 174 }, { 188, 188 }, { 243, 243 }, { 230, 230 },
+    { 203, 203 }, { 217, 217 }, { 231, 231 }, { 244, 244 },
+    { 218, 218 }, { 245, 245 }, { 189, 189 }, { 232, 232 },
+    { 204, 204 }, { 190, 190 }, { 246, 246 }, { 233, 233 },
+    { 247, 247 }, { 219, 219 }, { 205, 205 }, { 248, 248 },
+    { 234, 234 }, { 220, 220 }, { 206, 206 }, { 249, 249 },
+    { 235, 235 }, { 221, 221 }, { 250, 250 }, { 222, 222 },
+    { 236, 236 }, { 237, 237 }, { 251, 251 }, { 238, 238 },
+    { 252, 252 }, { 253, 253 }, { 254, 254 }, {   0,   0 },
+};
+
+const int16_t ff_vp9_row_scan_16x16_nb[256][2] = {
+    {   0,   0 }, {  16,  16 }, {   0,   0 }, {  32,  32 },
+    {   1,   1 }, {  48,  48 }, {  17,  17 }, {   1,   1 },
+    {  64,  64 }, {   2,   2 }, {  33,  33 }, {  80,  80 },
+    {  18,  18 }, {   2,   2 }, {  49,  49 }, {   3,   3 },
+    {  96,  96 }, {  34,  34 }, {  65,  65 }, {  19,  19 },
+    {   3,   3 }, { 112, 112 }, {  50,  50 }, {   4,   4 },
+    {  81,  81 }, {  35,  35 }, {  66,  66 }, {   4,   4 },
+    { 128, 128 }, {  20,  20 }, {  51,  51 }, {  97,  97 },
+    {  82,  82 }, {   5,   5 }, {  36,  36 }, { 144, 144 },
+    {  67,  67 }, { 113, 113 }, {  21,  21 }, {  52,  52 },
+    {   5,   5 }, {  98,  98 }, { 160, 160 }, {  83,  83 },
+    {  37,  37 }, {   6,   6 }, {  68,  68 }, { 129, 129 },
+    {  22,  22 }, {  53,  53 }, { 114, 114 }, {   6,   6 },
+    {  99,  99 }, { 176, 176 }, {  84,  84 }, {  38,  38 },
+    {   7,   7 }, {  69,  69 }, { 145, 145 }, { 130, 130 },
+    { 115, 115 }, {  23,  23 }, {  54,  54 }, { 192, 192 },
+    { 100, 100 }, {   7,   7 }, {  85,  85 }, { 161, 161 },
+    {  39,  39 }, {  70,  70 }, {   8,   8 }, { 146, 146 },
+    { 131, 131 }, { 116, 116 }, {  55,  55 }, { 208, 208 },
+    { 101, 101 }, {  24,  24 }, {  86,  86 }, {   8,   8 },
+    { 132, 132 }, {  40,  40 }, {  71,  71 }, { 177, 177 },
+    { 147, 147 }, { 224, 224 }, { 117, 117 }, { 162, 162 },
+    {   9,   9 }, { 102, 102 }, {  56,  56 }, {  25,  25 },
+    {  87,  87 }, { 148, 148 }, {   9,   9 }, { 133, 133 },
+    {  72,  72 }, { 118, 118 }, { 193, 193 }, { 163, 163 },
+    {  41,  41 }, { 103, 103 }, { 178, 178 }, {  10,  10 },
+    {  57,  57 }, { 149, 149 }, { 134, 134 }, {  88,  88 },
+    {  26,  26 }, { 119, 119 }, {  10,  10 }, { 164, 164 },
+    { 104, 104 }, {  73,  73 }, { 209, 209 }, { 179, 179 },
+    {  42,  42 }, {  11,  11 }, { 194, 194 }, { 135, 135 },
+    { 165, 165 }, { 150, 150 }, {  58,  58 }, {  27,  27 },
+    {  89,  89 }, {  11,  11 }, { 120, 120 }, {  74,  74 },
+    {  43,  43 }, { 225, 225 }, { 105, 105 }, {  12,  12 },
+    { 180, 180 }, { 151, 151 }, { 195, 195 }, { 136, 136 },
+    {  28,  28 }, { 166, 166 }, { 121, 121 }, {  59,  59 },
+    {  12,  12 }, { 210, 210 }, {  90,  90 }, { 106, 106 },
+    {  44,  44 }, { 181, 181 }, {  75,  75 }, { 152, 152 },
+    {  13,  13 }, { 167, 167 }, { 137, 137 }, {  13,  13 },
+    {  60,  60 }, { 196, 196 }, { 122, 122 }, {  29,  29 },
+    {  91,  91 }, {  14,  14 }, { 182, 182 }, {  76,  76 },
+    { 211, 211 }, { 153, 153 }, {  14,  14 }, { 107, 107 },
+    { 138, 138 }, {  45,  45 }, { 226, 226 }, { 168, 168 },
+    { 197, 197 }, { 123, 123 }, {  30,  30 }, {  61,  61 },
+    {  15,  15 }, {  92,  92 }, { 154, 154 }, { 183, 183 },
+    { 169, 169 }, { 108, 108 }, { 212, 212 }, {  77,  77 },
+    { 139, 139 }, { 198, 198 }, {  46,  46 }, { 124, 124 },
+    { 227, 227 }, {  62,  62 }, {  31,  31 }, { 184, 184 },
+    {  93,  93 }, { 170, 170 }, { 155, 155 }, { 185, 185 },
+    {  78,  78 }, {  47,  47 }, { 199, 199 }, { 213, 213 },
+    { 140, 140 }, {  63,  63 }, { 109, 109 }, { 125, 125 },
+    {  94,  94 }, { 200, 200 }, { 171, 171 }, { 156, 156 },
+    { 228, 228 }, { 186, 186 }, { 214, 214 }, { 201, 201 },
+    {  79,  79 }, { 141, 141 }, { 110, 110 }, { 229, 229 },
+    {  95,  95 }, { 126, 126 }, { 215, 215 }, { 172, 172 },
+    { 111, 111 }, { 142, 142 }, { 202, 202 }, { 157, 157 },
+    { 216, 216 }, { 230, 230 }, { 217, 217 }, { 187, 187 },
+    { 127, 127 }, { 231, 231 }, { 158, 158 }, { 173, 173 },
+    { 143, 143 }, { 203, 203 }, { 188, 188 }, { 232, 232 },
+    { 218, 218 }, { 233, 233 }, { 159, 159 }, { 174, 174 },
+    { 204, 204 }, { 189, 189 }, { 234, 234 }, { 219, 219 },
+    { 175, 175 }, { 205, 205 }, { 235, 235 }, { 220, 220 },
+    { 190, 190 }, { 236, 236 }, { 206, 206 }, { 191, 191 },
+    { 221, 221 }, { 207, 207 }, { 237, 237 }, { 222, 222 },
+    { 238, 238 }, { 223, 223 }, { 239, 239 }, {   0,   0 },
+};
+
+const int16_t ff_vp9_default_scan_32x32_nb[1024][2] = {
+    {    0,    0 }, {    0,    0 }, {    1,    1 }, {   32,    1 },
+    {   32,   32 }, {    2,    2 }, {   33,    2 }, {   64,   33 },
+    {    3,    3 }, {   64,   64 }, {   34,    3 }, {   65,   34 },
+    {    4,    4 }, {   35,    4 }, {   96,   65 }, {   66,   35 },
+    {   96,   96 }, {   97,   66 }, {   67,   36 }, {   36,    5 },
+    {    5,    5 }, {  128,   97 }, {   98,   67 }, {    6,    6 },
+    {  128,  128 }, {   68,   37 }, {   37,    6 }, {  129,   98 },
+    {   99,   68 }, {  160,  129 }, {  130,   99 }, {   38,    7 },
+    {   69,   38 }, {    7,    7 }, {  100,   69 }, {  161,  130 },
+    {  131,  100 }, {  160,  160 }, {   70,   39 }, {   39,    8 },
+    {    8,    8 }, {  101,   70 }, {  162,  131 }, {  132,  101 },
+    {  192,  161 }, {   71,   40 }, {  192,  192 }, {  102,   71 },
+    {   40,    9 }, {  163,  132 }, {    9,    9 }, {  193,  162 },
+    {  133,  102 }, {  164,  133 }, {   72,   41 }, {  103,   72 },
+    {  134,  103 }, {  224,  193 }, {   41,   10 }, {  194,  163 },
+    {   10,   10 }, {  224,  224 }, {  165,  134 }, {  225,  194 },
+    {  195,  164 }, {   73,   42 }, {  104,   73 }, {  135,  104 },
+    {   42,   11 }, {   11,   11 }, {  166,  135 }, {  196,  165 },
+    {  226,  195 }, {  256,  225 }, {   74,   43 }, {  105,   74 },
+    {  136,  105 }, {  227,  196 }, {   43,   12 }, {  197,  166 },
+    {  167,  136 }, {  257,  226 }, {  256,  256 }, {   12,   12 },
+    {  228,  197 }, {   75,   44 }, {  106,   75 }, {  198,  167 },
+    {  137,  106 }, {  258,  227 }, {  168,  137 }, {  288,  257 },
+    {   44,   13 }, {  229,  198 }, {  259,  228 }, {  199,  168 },
+    {  107,   76 }, {   13,   13 }, {  169,  138 }, {  138,  107 },
+    {  288,  288 }, {  289,  258 }, {   76,   45 }, {  230,  199 },
+    {  260,  229 }, {   45,   14 }, {  200,  169 }, {  139,  108 },
+    {  290,  259 }, {  108,   77 }, {  231,  200 }, {  320,  289 },
+    {  261,  230 }, {  170,  139 }, {   77,   46 }, {  291,  260 },
+    {   14,   14 }, {  321,  290 }, {  201,  170 }, {  262,  231 },
+    {  320,  320 }, {  171,  140 }, {  292,  261 }, {  232,  201 },
+    {  140,  109 }, {  322,  291 }, {  109,   78 }, {   46,   15 },
+    {  202,  171 }, {  263,  232 }, {  233,  202 }, {  293,  262 },
+    {  352,  321 }, {  323,  292 }, {   15,   15 }, {   78,   47 },
+    {  203,  172 }, {  264,  233 }, {  294,  263 }, {  324,  293 },
+    {  172,  141 }, {  353,  322 }, {  141,  110 }, {  234,  203 },
+    {  352,  352 }, {   47,   16 }, {  295,  264 }, {  110,   79 },
+    {  265,  234 }, {  354,  323 }, {  325,  294 }, {   79,   48 },
+    {   16,   16 }, {  204,  173 }, {  235,  204 }, {  173,  142 },
+    {  355,  324 }, {  384,  353 }, {  326,  295 }, {  142,  111 },
+    {  296,  265 }, {  266,  235 }, {  356,  325 }, {  385,  354 },
+    {  111,   80 }, {   48,   17 }, {  327,  296 }, {  297,  266 },
+    {  205,  174 }, {  384,  384 }, {  236,  205 }, {  357,  326 },
+    {  386,  355 }, {   80,   49 }, {  174,  143 }, {   17,   17 },
+    {  328,  297 }, {  358,  327 }, {  387,  356 }, {  298,  267 },
+    {  329,  298 }, {  388,  357 }, {  112,   81 }, {  416,  385 },
+    {  237,  206 }, {  359,  328 }, {   49,   18 }, {  206,  175 },
+    {  417,  386 }, {  389,  358 }, {  330,  299 }, {   18,   18 },
+    {  416,  416 }, {  360,  329 }, {   81,   50 }, {  418,  387 },
+    {  390,  359 }, {  238,  207 }, {   50,   19 }, {  361,  330 },
+    {  419,  388 }, {  113,   82 }, {  448,  417 }, {  448,  448 },
+    {  420,  389 }, {   82,   51 }, {  362,  331 }, {  449,  418 },
+    {  421,  390 }, {  480,  480 }, {  450,  419 }, {  422,  391 },
+    {  114,   83 }, {  451,  420 }, {  480,  449 }, {  452,  421 },
+    {  481,  450 }, {  453,  422 }, {  512,  512 }, {  482,  451 },
+    {  454,  423 }, {  512,  481 }, {  483,  452 }, {  513,  482 },
+    {  484,  453 }, {  514,  483 }, {  485,  454 }, {  544,  513 },
+    {  544,  544 }, {  486,  455 }, {  545,  514 }, {  546,  515 },
+    {  576,  576 }, {  576,  545 }, {  577,  546 }, {  578,  547 },
+    {  608,  577 }, {  609,  578 }, {  610,  579 }, {   19,   19 },
+    {  143,  112 }, {  267,  236 }, {  391,  360 }, {  515,  484 },
+    {  608,  608 }, {   20,   20 }, {   51,   20 }, {  144,  113 },
+    {  175,  144 }, {  268,  237 }, {  299,  268 }, {  392,  361 },
+    {  423,  392 }, {  516,  485 }, {  547,  516 }, {  640,  609 },
+    {  640,  640 }, {   21,   21 }, {   52,   21 }, {   83,   52 },
+    {  145,  114 }, {  176,  145 }, {  207,  176 }, {  269,  238 },
+    {  300,  269 }, {  331,  300 }, {  393,  362 }, {  424,  393 },
+    {  455,  424 }, {  517,  486 }, {  548,  517 }, {  579,  548 },
+    {  641,  610 }, {  672,  641 }, {  672,  672 }, {   22,   22 },
+    {   53,   22 }, {   84,   53 }, {  115,   84 }, {  146,  115 },
+    {  177,  146 }, {  208,  177 }, {  239,  208 }, {  270,  239 },
+    {  301,  270 }, {  332,  301 }, {  363,  332 }, {  394,  363 },
+    {  425,  394 }, {  456,  425 }, {  487,  456 }, {  518,  487 },
+    {  549,  518 }, {  580,  549 }, {  611,  580 }, {  642,  611 },
+    {  673,  642 }, {  704,  673 }, {  704,  704 }, {   54,   23 },
+    {   85,   54 }, {  116,   85 }, {  178,  147 }, {  209,  178 },
+    {  240,  209 }, {  302,  271 }, {  333,  302 }, {  364,  333 },
+    {  426,  395 }, {  457,  426 }, {  488,  457 }, {  550,  519 },
+    {  581,  550 }, {  612,  581 }, {  674,  643 }, {  705,  674 },
+    {  736,  705 }, {   86,   55 }, {  117,   86 }, {  210,  179 },
+    {  241,  210 }, {  334,  303 }, {  365,  334 }, {  458,  427 },
+    {  489,  458 }, {  582,  551 }, {  613,  582 }, {  706,  675 },
+    {  737,  706 }, {  118,   87 }, {  242,  211 }, {  366,  335 },
+    {  490,  459 }, {  614,  583 }, {  738,  707 }, {   23,   23 },
+    {  147,  116 }, {  271,  240 }, {  395,  364 }, {  519,  488 },
+    {  643,  612 }, {  736,  736 }, {   24,   24 }, {   55,   24 },
+    {  148,  117 }, {  179,  148 }, {  272,  241 }, {  303,  272 },
+    {  396,  365 }, {  427,  396 }, {  520,  489 }, {  551,  520 },
+    {  644,  613 }, {  675,  644 }, {  768,  737 }, {  768,  768 },
+    {   25,   25 }, {   56,   25 }, {   87,   56 }, {  149,  118 },
+    {  180,  149 }, {  211,  180 }, {  273,  242 }, {  304,  273 },
+    {  335,  304 }, {  397,  366 }, {  428,  397 }, {  459,  428 },
+    {  521,  490 }, {  552,  521 }, {  583,  552 }, {  645,  614 },
+    {  676,  645 }, {  707,  676 }, {  769,  738 }, {  800,  769 },
+    {  800,  800 }, {   26,   26 }, {   57,   26 }, {   88,   57 },
+    {  119,   88 }, {  150,  119 }, {  181,  150 }, {  212,  181 },
+    {  243,  212 }, {  274,  243 }, {  305,  274 }, {  336,  305 },
+    {  367,  336 }, {  398,  367 }, {  429,  398 }, {  460,  429 },
+    {  491,  460 }, {  522,  491 }, {  553,  522 }, {  584,  553 },
+    {  615,  584 }, {  646,  615 }, {  677,  646 }, {  708,  677 },
+    {  739,  708 }, {  770,  739 }, {  801,  770 }, {  832,  801 },
+    {  832,  832 }, {   58,   27 }, {   89,   58 }, {  120,   89 },
+    {  182,  151 }, {  213,  182 }, {  244,  213 }, {  306,  275 },
+    {  337,  306 }, {  368,  337 }, {  430,  399 }, {  461,  430 },
+    {  492,  461 }, {  554,  523 }, {  585,  554 }, {  616,  585 },
+    {  678,  647 }, {  709,  678 }, {  740,  709 }, {  802,  771 },
+    {  833,  802 }, {  864,  833 }, {   90,   59 }, {  121,   90 },
+    {  214,  183 }, {  245,  214 }, {  338,  307 }, {  369,  338 },
+    {  462,  431 }, {  493,  462 }, {  586,  555 }, {  617,  586 },
+    {  710,  679 }, {  741,  710 }, {  834,  803 }, {  865,  834 },
+    {  122,   91 }, {  246,  215 }, {  370,  339 }, {  494,  463 },
+    {  618,  587 }, {  742,  711 }, {  866,  835 }, {   27,   27 },
+    {  151,  120 }, {  275,  244 }, {  399,  368 }, {  523,  492 },
+    {  647,  616 }, {  771,  740 }, {  864,  864 }, {   28,   28 },
+    {   59,   28 }, {  152,  121 }, {  183,  152 }, {  276,  245 },
+    {  307,  276 }, {  400,  369 }, {  431,  400 }, {  524,  493 },
+    {  555,  524 }, {  648,  617 }, {  679,  648 }, {  772,  741 },
+    {  803,  772 }, {  896,  865 }, {  896,  896 }, {   29,   29 },
+    {   60,   29 }, {   91,   60 }, {  153,  122 }, {  184,  153 },
+    {  215,  184 }, {  277,  246 }, {  308,  277 }, {  339,  308 },
+    {  401,  370 }, {  432,  401 }, {  463,  432 }, {  525,  494 },
+    {  556,  525 }, {  587,  556 }, {  649,  618 }, {  680,  649 },
+    {  711,  680 }, {  773,  742 }, {  804,  773 }, {  835,  804 },
+    {  897,  866 }, {  928,  897 }, {  928,  928 }, {   30,   30 },
+    {   61,   30 }, {   92,   61 }, {  123,   92 }, {  154,  123 },
+    {  185,  154 }, {  216,  185 }, {  247,  216 }, {  278,  247 },
+    {  309,  278 }, {  340,  309 }, {  371,  340 }, {  402,  371 },
+    {  433,  402 }, {  464,  433 }, {  495,  464 }, {  526,  495 },
+    {  557,  526 }, {  588,  557 }, {  619,  588 }, {  650,  619 },
+    {  681,  650 }, {  712,  681 }, {  743,  712 }, {  774,  743 },
+    {  805,  774 }, {  836,  805 }, {  867,  836 }, {  898,  867 },
+    {  929,  898 }, {  960,  929 }, {  960,  960 }, {   62,   31 },
+    {   93,   62 }, {  124,   93 }, {  186,  155 }, {  217,  186 },
+    {  248,  217 }, {  310,  279 }, {  341,  310 }, {  372,  341 },
+    {  434,  403 }, {  465,  434 }, {  496,  465 }, {  558,  527 },
+    {  589,  558 }, {  620,  589 }, {  682,  651 }, {  713,  682 },
+    {  744,  713 }, {  806,  775 }, {  837,  806 }, {  868,  837 },
+    {  930,  899 }, {  961,  930 }, {  992,  961 }, {   94,   63 },
+    {  125,   94 }, {  218,  187 }, {  249,  218 }, {  342,  311 },
+    {  373,  342 }, {  466,  435 }, {  497,  466 }, {  590,  559 },
+    {  621,  590 }, {  714,  683 }, {  745,  714 }, {  838,  807 },
+    {  869,  838 }, {  962,  931 }, {  993,  962 }, {  126,   95 },
+    {  250,  219 }, {  374,  343 }, {  498,  467 }, {  622,  591 },
+    {  746,  715 }, {  870,  839 }, {  994,  963 }, {  155,  124 },
+    {  279,  248 }, {  403,  372 }, {  527,  496 }, {  651,  620 },
+    {  775,  744 }, {  899,  868 }, {  156,  125 }, {  187,  156 },
+    {  280,  249 }, {  311,  280 }, {  404,  373 }, {  435,  404 },
+    {  528,  497 }, {  559,  528 }, {  652,  621 }, {  683,  652 },
+    {  776,  745 }, {  807,  776 }, {  900,  869 }, {  931,  900 },
+    {  157,  126 }, {  188,  157 }, {  219,  188 }, {  281,  250 },
+    {  312,  281 }, {  343,  312 }, {  405,  374 }, {  436,  405 },
+    {  467,  436 }, {  529,  498 }, {  560,  529 }, {  591,  560 },
+    {  653,  622 }, {  684,  653 }, {  715,  684 }, {  777,  746 },
+    {  808,  777 }, {  839,  808 }, {  901,  870 }, {  932,  901 },
+    {  963,  932 }, {  158,  127 }, {  189,  158 }, {  220,  189 },
+    {  251,  220 }, {  282,  251 }, {  313,  282 }, {  344,  313 },
+    {  375,  344 }, {  406,  375 }, {  437,  406 }, {  468,  437 },
+    {  499,  468 }, {  530,  499 }, {  561,  530 }, {  592,  561 },
+    {  623,  592 }, {  654,  623 }, {  685,  654 }, {  716,  685 },
+    {  747,  716 }, {  778,  747 }, {  809,  778 }, {  840,  809 },
+    {  871,  840 }, {  902,  871 }, {  933,  902 }, {  964,  933 },
+    {  995,  964 }, {  190,  159 }, {  221,  190 }, {  252,  221 },
+    {  314,  283 }, {  345,  314 }, {  376,  345 }, {  438,  407 },
+    {  469,  438 }, {  500,  469 }, {  562,  531 }, {  593,  562 },
+    {  624,  593 }, {  686,  655 }, {  717,  686 }, {  748,  717 },
+    {  810,  779 }, {  841,  810 }, {  872,  841 }, {  934,  903 },
+    {  965,  934 }, {  996,  965 }, {  222,  191 }, {  253,  222 },
+    {  346,  315 }, {  377,  346 }, {  470,  439 }, {  501,  470 },
+    {  594,  563 }, {  625,  594 }, {  718,  687 }, {  749,  718 },
+    {  842,  811 }, {  873,  842 }, {  966,  935 }, {  997,  966 },
+    {  254,  223 }, {  378,  347 }, {  502,  471 }, {  626,  595 },
+    {  750,  719 }, {  874,  843 }, {  998,  967 }, {  283,  252 },
+    {  407,  376 }, {  531,  500 }, {  655,  624 }, {  779,  748 },
+    {  903,  872 }, {  284,  253 }, {  315,  284 }, {  408,  377 },
+    {  439,  408 }, {  532,  501 }, {  563,  532 }, {  656,  625 },
+    {  687,  656 }, {  780,  749 }, {  811,  780 }, {  904,  873 },
+    {  935,  904 }, {  285,  254 }, {  316,  285 }, {  347,  316 },
+    {  409,  378 }, {  440,  409 }, {  471,  440 }, {  533,  502 },
+    {  564,  533 }, {  595,  564 }, {  657,  626 }, {  688,  657 },
+    {  719,  688 }, {  781,  750 }, {  812,  781 }, {  843,  812 },
+    {  905,  874 }, {  936,  905 }, {  967,  936 }, {  286,  255 },
+    {  317,  286 }, {  348,  317 }, {  379,  348 }, {  410,  379 },
+    {  441,  410 }, {  472,  441 }, {  503,  472 }, {  534,  503 },
+    {  565,  534 }, {  596,  565 }, {  627,  596 }, {  658,  627 },
+    {  689,  658 }, {  720,  689 }, {  751,  720 }, {  782,  751 },
+    {  813,  782 }, {  844,  813 }, {  875,  844 }, {  906,  875 },
+    {  937,  906 }, {  968,  937 }, {  999,  968 }, {  318,  287 },
+    {  349,  318 }, {  380,  349 }, {  442,  411 }, {  473,  442 },
+    {  504,  473 }, {  566,  535 }, {  597,  566 }, {  628,  597 },
+    {  690,  659 }, {  721,  690 }, {  752,  721 }, {  814,  783 },
+    {  845,  814 }, {  876,  845 }, {  938,  907 }, {  969,  938 },
+    { 1000,  969 }, {  350,  319 }, {  381,  350 }, {  474,  443 },
+    {  505,  474 }, {  598,  567 }, {  629,  598 }, {  722,  691 },
+    {  753,  722 }, {  846,  815 }, {  877,  846 }, {  970,  939 },
+    { 1001,  970 }, {  382,  351 }, {  506,  475 }, {  630,  599 },
+    {  754,  723 }, {  878,  847 }, { 1002,  971 }, {  411,  380 },
+    {  535,  504 }, {  659,  628 }, {  783,  752 }, {  907,  876 },
+    {  412,  381 }, {  443,  412 }, {  536,  505 }, {  567,  536 },
+    {  660,  629 }, {  691,  660 }, {  784,  753 }, {  815,  784 },
+    {  908,  877 }, {  939,  908 }, {  413,  382 }, {  444,  413 },
+    {  475,  444 }, {  537,  506 }, {  568,  537 }, {  599,  568 },
+    {  661,  630 }, {  692,  661 }, {  723,  692 }, {  785,  754 },
+    {  816,  785 }, {  847,  816 }, {  909,  878 }, {  940,  909 },
+    {  971,  940 }, {  414,  383 }, {  445,  414 }, {  476,  445 },
+    {  507,  476 }, {  538,  507 }, {  569,  538 }, {  600,  569 },
+    {  631,  600 }, {  662,  631 }, {  693,  662 }, {  724,  693 },
+    {  755,  724 }, {  786,  755 }, {  817,  786 }, {  848,  817 },
+    {  879,  848 }, {  910,  879 }, {  941,  910 }, {  972,  941 },
+    { 1003,  972 }, {  446,  415 }, {  477,  446 }, {  508,  477 },
+    {  570,  539 }, {  601,  570 }, {  632,  601 }, {  694,  663 },
+    {  725,  694 }, {  756,  725 }, {  818,  787 }, {  849,  818 },
+    {  880,  849 }, {  942,  911 }, {  973,  942 }, { 1004,  973 },
+    {  478,  447 }, {  509,  478 }, {  602,  571 }, {  633,  602 },
+    {  726,  695 }, {  757,  726 }, {  850,  819 }, {  881,  850 },
+    {  974,  943 }, { 1005,  974 }, {  510,  479 }, {  634,  603 },
+    {  758,  727 }, {  882,  851 }, { 1006,  975 }, {  539,  508 },
+    {  663,  632 }, {  787,  756 }, {  911,  880 }, {  540,  509 },
+    {  571,  540 }, {  664,  633 }, {  695,  664 }, {  788,  757 },
+    {  819,  788 }, {  912,  881 }, {  943,  912 }, {  541,  510 },
+    {  572,  541 }, {  603,  572 }, {  665,  634 }, {  696,  665 },
+    {  727,  696 }, {  789,  758 }, {  820,  789 }, {  851,  820 },
+    {  913,  882 }, {  944,  913 }, {  975,  944 }, {  542,  511 },
+    {  573,  542 }, {  604,  573 }, {  635,  604 }, {  666,  635 },
+    {  697,  666 }, {  728,  697 }, {  759,  728 }, {  790,  759 },
+    {  821,  790 }, {  852,  821 }, {  883,  852 }, {  914,  883 },
+    {  945,  914 }, {  976,  945 }, { 1007,  976 }, {  574,  543 },
+    {  605,  574 }, {  636,  605 }, {  698,  667 }, {  729,  698 },
+    {  760,  729 }, {  822,  791 }, {  853,  822 }, {  884,  853 },
+    {  946,  915 }, {  977,  946 }, { 1008,  977 }, {  606,  575 },
+    {  637,  606 }, {  730,  699 }, {  761,  730 }, {  854,  823 },
+    {  885,  854 }, {  978,  947 }, { 1009,  978 }, {  638,  607 },
+    {  762,  731 }, {  886,  855 }, { 1010,  979 }, {  667,  636 },
+    {  791,  760 }, {  915,  884 }, {  668,  637 }, {  699,  668 },
+    {  792,  761 }, {  823,  792 }, {  916,  885 }, {  947,  916 },
+    {  669,  638 }, {  700,  669 }, {  731,  700 }, {  793,  762 },
+    {  824,  793 }, {  855,  824 }, {  917,  886 }, {  948,  917 },
+    {  979,  948 }, {  670,  639 }, {  701,  670 }, {  732,  701 },
+    {  763,  732 }, {  794,  763 }, {  825,  794 }, {  856,  825 },
+    {  887,  856 }, {  918,  887 }, {  949,  918 }, {  980,  949 },
+    { 1011,  980 }, {  702,  671 }, {  733,  702 }, {  764,  733 },
+    {  826,  795 }, {  857,  826 }, {  888,  857 }, {  950,  919 },
+    {  981,  950 }, { 1012,  981 }, {  734,  703 }, {  765,  734 },
+    {  858,  827 }, {  889,  858 }, {  982,  951 }, { 1013,  982 },
+    {  766,  735 }, {  890,  859 }, { 1014,  983 }, {  795,  764 },
+    {  919,  888 }, {  796,  765 }, {  827,  796 }, {  920,  889 },
+    {  951,  920 }, {  797,  766 }, {  828,  797 }, {  859,  828 },
+    {  921,  890 }, {  952,  921 }, {  983,  952 }, {  798,  767 },
+    {  829,  798 }, {  860,  829 }, {  891,  860 }, {  922,  891 },
+    {  953,  922 }, {  984,  953 }, { 1015,  984 }, {  830,  799 },
+    {  861,  830 }, {  892,  861 }, {  954,  923 }, {  985,  954 },
+    { 1016,  985 }, {  862,  831 }, {  893,  862 }, {  986,  955 },
+    { 1017,  986 }, {  894,  863 }, { 1018,  987 }, {  923,  892 },
+    {  924,  893 }, {  955,  924 }, {  925,  894 }, {  956,  925 },
+    {  987,  956 }, {  926,  895 }, {  957,  926 }, {  988,  957 },
+    { 1019,  988 }, {  958,  927 }, {  989,  958 }, { 1020,  989 },
+    {  990,  959 }, { 1021,  990 }, { 1022,  991 }, {    0,    0 },
+};
+
+const int16_t (* const ff_vp9_scans_nb[5][4])[2] = {
+    {
+        ff_vp9_default_scan_4x4_nb, ff_vp9_col_scan_4x4_nb,
+        ff_vp9_row_scan_4x4_nb, ff_vp9_default_scan_4x4_nb
+    }, {
+        ff_vp9_default_scan_8x8_nb, ff_vp9_col_scan_8x8_nb,
+        ff_vp9_row_scan_8x8_nb, ff_vp9_default_scan_8x8_nb
+    }, {
+        ff_vp9_default_scan_16x16_nb, ff_vp9_col_scan_16x16_nb,
+        ff_vp9_row_scan_16x16_nb, ff_vp9_default_scan_16x16_nb
+    }, {
+        ff_vp9_default_scan_32x32_nb, ff_vp9_default_scan_32x32_nb,
+        ff_vp9_default_scan_32x32_nb, ff_vp9_default_scan_32x32_nb
+    }, { // lossless
+        ff_vp9_default_scan_4x4_nb, ff_vp9_default_scan_4x4_nb,
+        ff_vp9_default_scan_4x4_nb, ff_vp9_default_scan_4x4_nb
+    }
+};
+
+const uint8_t ff_vp9_model_pareto8[256][8] = {
+    {   6,  86, 128,  11,  87,  42,  91,  52 },
+    {   3,  86, 128,   6,  86,  23,  88,  29 },
+    {   6,  86, 128,  11,  87,  42,  91,  52 },
+    {   9,  86, 129,  17,  88,  61,  94,  76 },
+    {  12,  86, 129,  22,  88,  77,  97,  93 },
+    {  15,  87, 129,  28,  89,  93, 100, 110 },
+    {  17,  87, 129,  33,  90, 105, 103, 123 },
+    {  20,  88, 130,  38,  91, 118, 106, 136 },
+    {  23,  88, 130,  43,  91, 128, 108, 146 },
+    {  26,  89, 131,  48,  92, 139, 111, 156 },
+    {  28,  89, 131,  53,  93, 147, 114, 163 },
+    {  31,  90, 131,  58,  94, 156, 117, 171 },
+    {  34,  90, 131,  62,  94, 163, 119, 177 },
+    {  37,  90, 132,  66,  95, 171, 122, 184 },
+    {  39,  90, 132,  70,  96, 177, 124, 189 },
+    {  42,  91, 132,  75,  97, 183, 127, 194 },
+    {  44,  91, 132,  79,  97, 188, 129, 198 },
+    {  47,  92, 133,  83,  98, 193, 132, 202 },
+    {  49,  92, 133,  86,  99, 197, 134, 205 },
+    {  52,  93, 133,  90, 100, 201, 137, 208 },
+    {  54,  93, 133,  94, 100, 204, 139, 211 },
+    {  57,  94, 134,  98, 101, 208, 142, 214 },
+    {  59,  94, 134, 101, 102, 211, 144, 216 },
+    {  62,  94, 135, 105, 103, 214, 146, 218 },
+    {  64,  94, 135, 108, 103, 216, 148, 220 },
+    {  66,  95, 135, 111, 104, 219, 151, 222 },
+    {  68,  95, 135, 114, 105, 221, 153, 223 },
+    {  71,  96, 136, 117, 106, 224, 155, 225 },
+    {  73,  96, 136, 120, 106, 225, 157, 226 },
+    {  76,  97, 136, 123, 107, 227, 159, 228 },
+    {  78,  97, 136, 126, 108, 229, 160, 229 },
+    {  80,  98, 137, 129, 109, 231, 162, 231 },
+    {  82,  98, 137, 131, 109, 232, 164, 232 },
+    {  84,  98, 138, 134, 110, 234, 166, 233 },
+    {  86,  98, 138, 137, 111, 235, 168, 234 },
+    {  89,  99, 138, 140, 112, 236, 170, 235 },
+    {  91,  99, 138, 142, 112, 237, 171, 235 },
+    {  93, 100, 139, 145, 113, 238, 173, 236 },
+    {  95, 100, 139, 147, 114, 239, 174, 237 },
+    {  97, 101, 140, 149, 115, 240, 176, 238 },
+    {  99, 101, 140, 151, 115, 241, 177, 238 },
+    { 101, 102, 140, 154, 116, 242, 179, 239 },
+    { 103, 102, 140, 156, 117, 242, 180, 239 },
+    { 105, 103, 141, 158, 118, 243, 182, 240 },
+    { 107, 103, 141, 160, 118, 243, 183, 240 },
+    { 109, 104, 141, 162, 119, 244, 185, 241 },
+    { 111, 104, 141, 164, 119, 244, 186, 241 },
+    { 113, 104, 142, 166, 120, 245, 187, 242 },
+    { 114, 104, 142, 168, 121, 245, 188, 242 },
+    { 116, 105, 143, 170, 122, 246, 190, 243 },
+    { 118, 105, 143, 171, 122, 246, 191, 243 },
+    { 120, 106, 143, 173, 123, 247, 192, 244 },
+    { 121, 106, 143, 175, 124, 247, 193, 244 },
+    { 123, 107, 144, 177, 125, 248, 195, 244 },
+    { 125, 107, 144, 178, 125, 248, 196, 244 },
+    { 127, 108, 145, 180, 126, 249, 197, 245 },
+    { 128, 108, 145, 181, 127, 249, 198, 245 },
+    { 130, 109, 145, 183, 128, 249, 199, 245 },
+    { 132, 109, 145, 184, 128, 249, 200, 245 },
+    { 134, 110, 146, 186, 129, 250, 201, 246 },
+    { 135, 110, 146, 187, 130, 250, 202, 246 },
+    { 137, 111, 147, 189, 131, 251, 203, 246 },
+    { 138, 111, 147, 190, 131, 251, 204, 246 },
+    { 140, 112, 147, 192, 132, 251, 205, 247 },
+    { 141, 112, 147, 193, 132, 251, 206, 247 },
+    { 143, 113, 148, 194, 133, 251, 207, 247 },
+    { 144, 113, 148, 195, 134, 251, 207, 247 },
+    { 146, 114, 149, 197, 135, 252, 208, 248 },
+    { 147, 114, 149, 198, 135, 252, 209, 248 },
+    { 149, 115, 149, 199, 136, 252, 210, 248 },
+    { 150, 115, 149, 200, 137, 252, 210, 248 },
+    { 152, 115, 150, 201, 138, 252, 211, 248 },
+    { 153, 115, 150, 202, 138, 252, 212, 248 },
+    { 155, 116, 151, 204, 139, 253, 213, 249 },
+    { 156, 116, 151, 205, 139, 253, 213, 249 },
+    { 158, 117, 151, 206, 140, 253, 214, 249 },
+    { 159, 117, 151, 207, 141, 253, 215, 249 },
+    { 161, 118, 152, 208, 142, 253, 216, 249 },
+    { 162, 118, 152, 209, 142, 253, 216, 249 },
+    { 163, 119, 153, 210, 143, 253, 217, 249 },
+    { 164, 119, 153, 211, 143, 253, 217, 249 },
+    { 166, 120, 153, 212, 144, 254, 218, 250 },
+    { 167, 120, 153, 212, 145, 254, 219, 250 },
+    { 168, 121, 154, 213, 146, 254, 220, 250 },
+    { 169, 121, 154, 214, 146, 254, 220, 250 },
+    { 171, 122, 155, 215, 147, 254, 221, 250 },
+    { 172, 122, 155, 216, 147, 254, 221, 250 },
+    { 173, 123, 155, 217, 148, 254, 222, 250 },
+    { 174, 123, 155, 217, 149, 254, 222, 250 },
+    { 176, 124, 156, 218, 150, 254, 223, 250 },
+    { 177, 124, 156, 219, 150, 254, 223, 250 },
+    { 178, 125, 157, 220, 151, 254, 224, 251 },
+    { 179, 125, 157, 220, 151, 254, 224, 251 },
+    { 180, 126, 157, 221, 152, 254, 225, 251 },
+    { 181, 126, 157, 221, 152, 254, 225, 251 },
+    { 183, 127, 158, 222, 153, 254, 226, 251 },
+    { 184, 127, 158, 223, 154, 254, 226, 251 },
+    { 185, 128, 159, 224, 155, 255, 227, 251 },
+    { 186, 128, 159, 224, 155, 255, 227, 251 },
+    { 187, 129, 160, 225, 156, 255, 228, 251 },
+    { 188, 130, 160, 225, 156, 255, 228, 251 },
+    { 189, 131, 160, 226, 157, 255, 228, 251 },
+    { 190, 131, 160, 226, 158, 255, 228, 251 },
+    { 191, 132, 161, 227, 159, 255, 229, 251 },
+    { 192, 132, 161, 227, 159, 255, 229, 251 },
+    { 193, 133, 162, 228, 160, 255, 230, 252 },
+    { 194, 133, 162, 229, 160, 255, 230, 252 },
+    { 195, 134, 163, 230, 161, 255, 231, 252 },
+    { 196, 134, 163, 230, 161, 255, 231, 252 },
+    { 197, 135, 163, 231, 162, 255, 231, 252 },
+    { 198, 135, 163, 231, 162, 255, 231, 252 },
+    { 199, 136, 164, 232, 163, 255, 232, 252 },
+    { 200, 136, 164, 232, 164, 255, 232, 252 },
+    { 201, 137, 165, 233, 165, 255, 233, 252 },
+    { 201, 137, 165, 233, 165, 255, 233, 252 },
+    { 202, 138, 166, 233, 166, 255, 233, 252 },
+    { 203, 138, 166, 233, 166, 255, 233, 252 },
+    { 204, 139, 166, 234, 167, 255, 234, 252 },
+    { 205, 139, 166, 234, 167, 255, 234, 252 },
+    { 206, 140, 167, 235, 168, 255, 235, 252 },
+    { 206, 140, 167, 235, 168, 255, 235, 252 },
+    { 207, 141, 168, 236, 169, 255, 235, 252 },
+    { 208, 141, 168, 236, 170, 255, 235, 252 },
+    { 209, 142, 169, 237, 171, 255, 236, 252 },
+    { 209, 143, 169, 237, 171, 255, 236, 252 },
+    { 210, 144, 169, 237, 172, 255, 236, 252 },
+    { 211, 144, 169, 237, 172, 255, 236, 252 },
+    { 212, 145, 170, 238, 173, 255, 237, 252 },
+    { 213, 145, 170, 238, 173, 255, 237, 252 },
+    { 214, 146, 171, 239, 174, 255, 237, 253 },
+    { 214, 146, 171, 239, 174, 255, 237, 253 },
+    { 215, 147, 172, 240, 175, 255, 238, 253 },
+    { 215, 147, 172, 240, 175, 255, 238, 253 },
+    { 216, 148, 173, 240, 176, 255, 238, 253 },
+    { 217, 148, 173, 240, 176, 255, 238, 253 },
+    { 218, 149, 173, 241, 177, 255, 239, 253 },
+    { 218, 149, 173, 241, 178, 255, 239, 253 },
+    { 219, 150, 174, 241, 179, 255, 239, 253 },
+    { 219, 151, 174, 241, 179, 255, 239, 253 },
+    { 220, 152, 175, 242, 180, 255, 240, 253 },
+    { 221, 152, 175, 242, 180, 255, 240, 253 },
+    { 222, 153, 176, 242, 181, 255, 240, 253 },
+    { 222, 153, 176, 242, 181, 255, 240, 253 },
+    { 223, 154, 177, 243, 182, 255, 240, 253 },
+    { 223, 154, 177, 243, 182, 255, 240, 253 },
+    { 224, 155, 178, 244, 183, 255, 241, 253 },
+    { 224, 155, 178, 244, 183, 255, 241, 253 },
+    { 225, 156, 178, 244, 184, 255, 241, 253 },
+    { 225, 157, 178, 244, 184, 255, 241, 253 },
+    { 226, 158, 179, 244, 185, 255, 242, 253 },
+    { 227, 158, 179, 244, 185, 255, 242, 253 },
+    { 228, 159, 180, 245, 186, 255, 242, 253 },
+    { 228, 159, 180, 245, 186, 255, 242, 253 },
+    { 229, 160, 181, 245, 187, 255, 242, 253 },
+    { 229, 160, 181, 245, 187, 255, 242, 253 },
+    { 230, 161, 182, 246, 188, 255, 243, 253 },
+    { 230, 162, 182, 246, 188, 255, 243, 253 },
+    { 231, 163, 183, 246, 189, 255, 243, 253 },
+    { 231, 163, 183, 246, 189, 255, 243, 253 },
+    { 232, 164, 184, 247, 190, 255, 243, 253 },
+    { 232, 164, 184, 247, 190, 255, 243, 253 },
+    { 233, 165, 185, 247, 191, 255, 244, 253 },
+    { 233, 165, 185, 247, 191, 255, 244, 253 },
+    { 234, 166, 185, 247, 192, 255, 244, 253 },
+    { 234, 167, 185, 247, 192, 255, 244, 253 },
+    { 235, 168, 186, 248, 193, 255, 244, 253 },
+    { 235, 168, 186, 248, 193, 255, 244, 253 },
+    { 236, 169, 187, 248, 194, 255, 244, 253 },
+    { 236, 169, 187, 248, 194, 255, 244, 253 },
+    { 236, 170, 188, 248, 195, 255, 245, 253 },
+    { 236, 170, 188, 248, 195, 255, 245, 253 },
+    { 237, 171, 189, 249, 196, 255, 245, 254 },
+    { 237, 172, 189, 249, 196, 255, 245, 254 },
+    { 238, 173, 190, 249, 197, 255, 245, 254 },
+    { 238, 173, 190, 249, 197, 255, 245, 254 },
+    { 239, 174, 191, 249, 198, 255, 245, 254 },
+    { 239, 174, 191, 249, 198, 255, 245, 254 },
+    { 240, 175, 192, 249, 199, 255, 246, 254 },
+    { 240, 176, 192, 249, 199, 255, 246, 254 },
+    { 240, 177, 193, 250, 200, 255, 246, 254 },
+    { 240, 177, 193, 250, 200, 255, 246, 254 },
+    { 241, 178, 194, 250, 201, 255, 246, 254 },
+    { 241, 178, 194, 250, 201, 255, 246, 254 },
+    { 242, 179, 195, 250, 202, 255, 246, 254 },
+    { 242, 180, 195, 250, 202, 255, 246, 254 },
+    { 242, 181, 196, 250, 203, 255, 247, 254 },
+    { 242, 181, 196, 250, 203, 255, 247, 254 },
+    { 243, 182, 197, 251, 204, 255, 247, 254 },
+    { 243, 183, 197, 251, 204, 255, 247, 254 },
+    { 244, 184, 198, 251, 205, 255, 247, 254 },
+    { 244, 184, 198, 251, 205, 255, 247, 254 },
+    { 244, 185, 199, 251, 206, 255, 247, 254 },
+    { 244, 185, 199, 251, 206, 255, 247, 254 },
+    { 245, 186, 200, 251, 207, 255, 247, 254 },
+    { 245, 187, 200, 251, 207, 255, 247, 254 },
+    { 246, 188, 201, 252, 207, 255, 248, 254 },
+    { 246, 188, 201, 252, 207, 255, 248, 254 },
+    { 246, 189, 202, 252, 208, 255, 248, 254 },
+    { 246, 190, 202, 252, 208, 255, 248, 254 },
+    { 247, 191, 203, 252, 209, 255, 248, 254 },
+    { 247, 191, 203, 252, 209, 255, 248, 254 },
+    { 247, 192, 204, 252, 210, 255, 248, 254 },
+    { 247, 193, 204, 252, 210, 255, 248, 254 },
+    { 248, 194, 205, 252, 211, 255, 248, 254 },
+    { 248, 194, 205, 252, 211, 255, 248, 254 },
+    { 248, 195, 206, 252, 212, 255, 249, 254 },
+    { 248, 196, 206, 252, 212, 255, 249, 254 },
+    { 249, 197, 207, 253, 213, 255, 249, 254 },
+    { 249, 197, 207, 253, 213, 255, 249, 254 },
+    { 249, 198, 208, 253, 214, 255, 249, 254 },
+    { 249, 199, 209, 253, 214, 255, 249, 254 },
+    { 250, 200, 210, 253, 215, 255, 249, 254 },
+    { 250, 200, 210, 253, 215, 255, 249, 254 },
+    { 250, 201, 211, 253, 215, 255, 249, 254 },
+    { 250, 202, 211, 253, 215, 255, 249, 254 },
+    { 250, 203, 212, 253, 216, 255, 249, 254 },
+    { 250, 203, 212, 253, 216, 255, 249, 254 },
+    { 251, 204, 213, 253, 217, 255, 250, 254 },
+    { 251, 205, 213, 253, 217, 255, 250, 254 },
+    { 251, 206, 214, 254, 218, 255, 250, 254 },
+    { 251, 206, 215, 254, 218, 255, 250, 254 },
+    { 252, 207, 216, 254, 219, 255, 250, 254 },
+    { 252, 208, 216, 254, 219, 255, 250, 254 },
+    { 252, 209, 217, 254, 220, 255, 250, 254 },
+    { 252, 210, 217, 254, 220, 255, 250, 254 },
+    { 252, 211, 218, 254, 221, 255, 250, 254 },
+    { 252, 212, 218, 254, 221, 255, 250, 254 },
+    { 253, 213, 219, 254, 222, 255, 250, 254 },
+    { 253, 213, 220, 254, 222, 255, 250, 254 },
+    { 253, 214, 221, 254, 223, 255, 250, 254 },
+    { 253, 215, 221, 254, 223, 255, 250, 254 },
+    { 253, 216, 222, 254, 224, 255, 251, 254 },
+    { 253, 217, 223, 254, 224, 255, 251, 254 },
+    { 253, 218, 224, 254, 225, 255, 251, 254 },
+    { 253, 219, 224, 254, 225, 255, 251, 254 },
+    { 254, 220, 225, 254, 225, 255, 251, 254 },
+    { 254, 221, 226, 254, 225, 255, 251, 254 },
+    { 254, 222, 227, 255, 226, 255, 251, 254 },
+    { 254, 223, 227, 255, 226, 255, 251, 254 },
+    { 254, 224, 228, 255, 227, 255, 251, 254 },
+    { 254, 225, 229, 255, 227, 255, 251, 254 },
+    { 254, 226, 230, 255, 228, 255, 251, 254 },
+    { 254, 227, 230, 255, 229, 255, 251, 254 },
+    { 255, 228, 231, 255, 230, 255, 251, 254 },
+    { 255, 229, 232, 255, 230, 255, 251, 254 },
+    { 255, 230, 233, 255, 231, 255, 252, 254 },
+    { 255, 231, 234, 255, 231, 255, 252, 254 },
+    { 255, 232, 235, 255, 232, 255, 252, 254 },
+    { 255, 233, 236, 255, 232, 255, 252, 254 },
+    { 255, 235, 237, 255, 233, 255, 252, 254 },
+    { 255, 236, 238, 255, 234, 255, 252, 254 },
+    { 255, 238, 240, 255, 235, 255, 252, 255 },
+    { 255, 239, 241, 255, 235, 255, 252, 254 },
+    { 255, 241, 243, 255, 236, 255, 252, 254 },
+    { 255, 243, 245, 255, 237, 255, 252, 254 },
+    { 255, 246, 247, 255, 239, 255, 253, 255 },
+};
+
+const ProbContext ff_vp9_default_probs = {
+    { /* y_mode */
+        {  65,  32,  18, 144, 162, 194,  41,  51,  98 } /* bsize < 8x8 */,
+        { 132,  68,  18, 165, 217, 196,  45,  40,  78 } /* bsize < 16x16 */,
+        { 173,  80,  19, 176, 240, 193,  64,  35,  46 } /* bsize < 32x32 */,
+        { 221, 135,  38, 194, 248, 121,  96,  85,  29 } /* bsize >= 32x32 */
+    }, { /* uv_mode */
+        {  48,  12, 154, 155, 139,  90,  34, 117, 119 } /* y = v */,
+        {  67,   6,  25, 204, 243, 158,  13,  21,  96 } /* y = h */,
+        { 120,   7,  76, 176, 208, 126,  28,  54, 103 } /* y = dc */,
+        {  97,   5,  44, 131, 176, 139,  48,  68,  97 } /* y = d45 */,
+        {  83,   5,  42, 156, 111, 152,  26,  49, 152 } /* y = d135 */,
+        {  80,   5,  58, 178,  74,  83,  33,  62, 145 } /* y = d117 */,
+        {  86,   5,  32, 154, 192, 168,  14,  22, 163 } /* y = d153 */,
+        {  77,   7,  64, 116, 132, 122,  37, 126, 120 } /* y = d63 */,
+        {  85,   5,  32, 156, 216, 148,  19,  29,  73 } /* y = d27 */,
+        { 101,  21, 107, 181, 192, 103,  19,  67, 125 } /* y = tm */
+    }, { /* filter */
+        { 235, 162, },
+        {  36, 255, },
+        {  34,   3, },
+        { 149, 144, },
+    }, { /* mv_mode */
+        {  2, 173,  34 },  // 0 = both zero mv
+        {  7, 145,  85 },  // 1 = one zero mv + one a predicted mv
+        {  7, 166,  63 },  // 2 = two predicted mvs
+        {  7,  94,  66 },  // 3 = one predicted/zero and one new mv
+        {  8,  64,  46 },  // 4 = two new mvs
+        { 17,  81,  31 },  // 5 = one intra neighbor + x
+        { 25,  29,  30 },  // 6 = two intra neighbors
+    }, { /* intra */
+        9, 102, 187, 225
+    }, { /* comp */
+        239, 183, 119,  96,  41
+    }, { /* single_ref */
+        {  33,  16 },
+        {  77,  74 },
+        { 142, 142 },
+        { 172, 170 },
+        { 238, 247 }
+    }, { /* comp_ref */
+        50, 126, 123, 221, 226
+    }, { /* tx32p */
+        { 3, 136, 37, },
+        { 5,  52, 13, },
+    }, { /* tx16p */
+        { 20, 152, },
+        { 15, 101, },
+    }, { /* tx8p */
+        100, 66
+    }, { /* skip */
+        192, 128, 64
+    }, { /* mv_joint */
+        32, 64, 96
+    }, {
+        { /* mv vertical component */
+            128, /* sign */
+            { 224, 144, 192, 168, 192, 176, 192, 198, 198, 245 }, /* class */
+            216, /* class0 */
+            { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, /* bits */
+            { /* class0_fp */
+                { 128, 128, 64 },
+                {  96, 112, 64 }
+            },
+            { 64, 96, 64 }, /* fp */
+            160, /* class0_hp bit */
+            128, /* hp */
+        }, { /* mv horizontal component */
+            128, /* sign */
+            { 216, 128, 176, 160, 176, 176, 192, 198, 198, 208 }, /* class */
+            208, /* class0 */
+            { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, /* bits */
+            { /* class0_fp */
+                { 128, 128, 64 },
+                {  96, 112, 64 }
+            },
+            { 64, 96, 64 }, /* fp */
+            160, /* class0_hp bit */
+            128, /* hp */
+        }
+    }, { /* partition */
+        { /* 64x64 -> 32x32 */
+            { 222,  34,  30 } /* a/l both not split */,
+            {  72,  16,  44 } /* a split, l not split */,
+            {  58,  32,  12 } /* l split, a not split */,
+            {  10,   7,   6 } /* a/l both split */,
+        }, { /* 32x32 -> 16x16 */
+            { 177,  58,  59 } /* a/l both not split */,
+            {  68,  26,  63 } /* a split, l not split */,
+            {  52,  79,  25 } /* l split, a not split */,
+            {  17,  14,  12 } /* a/l both split */,
+        }, { /* 16x16 -> 8x8 */
+            { 174,  73,  87 } /* a/l both not split */,
+            {  92,  41,  83 } /* a split, l not split */,
+            {  82,  99,  50 } /* l split, a not split */,
+            {  53,  39,  39 } /* a/l both split */,
+        }, { /* 8x8 -> 4x4 */
+            { 199, 122, 141 } /* a/l both not split */,
+            { 147,  63, 159 } /* a split, l not split */,
+            { 148, 133, 118 } /* l split, a not split */,
+            { 121, 104, 114 } /* a/l both split */,
+        }
+    },
+};
+
+const uint8_t ff_vp9_default_coef_probs[4][2][2][6][6][3] = {
+    { /* tx = 4x4 */
+        { /* block Type 0 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 195,  29, 183 },
+                    {  84,  49, 136 },
+                    {   8,  42,  71 }
+                }, { /* Coeff Band 1 */
+                    {  31, 107, 169 },
+                    {  35,  99, 159 },
+                    {  17,  82, 140 },
+                    {   8,  66, 114 },
+                    {   2,  44,  76 },
+                    {   1,  19,  32 }
+                }, { /* Coeff Band 2 */
+                    {  40, 132, 201 },
+                    {  29, 114, 187 },
+                    {  13,  91, 157 },
+                    {   7,  75, 127 },
+                    {   3,  58,  95 },
+                    {   1,  28,  47 }
+                }, { /* Coeff Band 3 */
+                    {  69, 142, 221 },
+                    {  42, 122, 201 },
+                    {  15,  91, 159 },
+                    {   6,  67, 121 },
+                    {   1,  42,  77 },
+                    {   1,  17,  31 }
+                }, { /* Coeff Band 4 */
+                    { 102, 148, 228 },
+                    {  67, 117, 204 },
+                    {  17,  82, 154 },
+                    {   6,  59, 114 },
+                    {   2,  39,  75 },
+                    {   1,  15,  29 }
+                }, { /* Coeff Band 5 */
+                    { 156,  57, 233 },
+                    { 119,  57, 212 },
+                    {  58,  48, 163 },
+                    {  29,  40, 124 },
+                    {  12,  30,  81 },
+                    {   3,  12,  31 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 191, 107, 226 },
+                    { 124, 117, 204 },
+                    {  25,  99, 155 }
+                }, { /* Coeff Band 1 */
+                    {  29, 148, 210 },
+                    {  37, 126, 194 },
+                    {   8,  93, 157 },
+                    {   2,  68, 118 },
+                    {   1,  39,  69 },
+                    {   1,  17,  33 }
+                }, { /* Coeff Band 2 */
+                    {  41, 151, 213 },
+                    {  27, 123, 193 },
+                    {   3,  82, 144 },
+                    {   1,  58, 105 },
+                    {   1,  32,  60 },
+                    {   1,  13,  26 }
+                }, { /* Coeff Band 3 */
+                    {  59, 159, 220 },
+                    {  23, 126, 198 },
+                    {   4,  88, 151 },
+                    {   1,  66, 114 },
+                    {   1,  38,  71 },
+                    {   1,  18,  34 }
+                }, { /* Coeff Band 4 */
+                    { 114, 136, 232 },
+                    {  51, 114, 207 },
+                    {  11,  83, 155 },
+                    {   3,  56, 105 },
+                    {   1,  33,  65 },
+                    {   1,  17,  34 }
+                }, { /* Coeff Band 5 */
+                    { 149,  65, 234 },
+                    { 121,  57, 215 },
+                    {  61,  49, 166 },
+                    {  28,  36, 114 },
+                    {  12,  25,  76 },
+                    {   3,  16,  42 }
+                }
+            }
+        }, { /* block Type 1 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 214,  49, 220 },
+                    { 132,  63, 188 },
+                    {  42,  65, 137 }
+                }, { /* Coeff Band 1 */
+                    {  85, 137, 221 },
+                    { 104, 131, 216 },
+                    {  49, 111, 192 },
+                    {  21,  87, 155 },
+                    {   2,  49,  87 },
+                    {   1,  16,  28 }
+                }, { /* Coeff Band 2 */
+                    {  89, 163, 230 },
+                    {  90, 137, 220 },
+                    {  29, 100, 183 },
+                    {  10,  70, 135 },
+                    {   2,  42,  81 },
+                    {   1,  17,  33 }
+                }, { /* Coeff Band 3 */
+                    { 108, 167, 237 },
+                    {  55, 133, 222 },
+                    {  15,  97, 179 },
+                    {   4,  72, 135 },
+                    {   1,  45,  85 },
+                    {   1,  19,  38 }
+                }, { /* Coeff Band 4 */
+                    { 124, 146, 240 },
+                    {  66, 124, 224 },
+                    {  17,  88, 175 },
+                    {   4,  58, 122 },
+                    {   1,  36,  75 },
+                    {   1,  18,  37 }
+                }, { /* Coeff Band 5 */
+                    { 141,  79, 241 },
+                    { 126,  70, 227 },
+                    {  66,  58, 182 },
+                    {  30,  44, 136 },
+                    {  12,  34,  96 },
+                    {   2,  20,  47 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 229,  99, 249 },
+                    { 143, 111, 235 },
+                    {  46, 109, 192 }
+                }, { /* Coeff Band 1 */
+                    {  82, 158, 236 },
+                    {  94, 146, 224 },
+                    {  25, 117, 191 },
+                    {   9,  87, 149 },
+                    {   3,  56,  99 },
+                    {   1,  33,  57 }
+                }, { /* Coeff Band 2 */
+                    {  83, 167, 237 },
+                    {  68, 145, 222 },
+                    {  10, 103, 177 },
+                    {   2,  72, 131 },
+                    {   1,  41,  79 },
+                    {   1,  20,  39 }
+                }, { /* Coeff Band 3 */
+                    {  99, 167, 239 },
+                    {  47, 141, 224 },
+                    {  10, 104, 178 },
+                    {   2,  73, 133 },
+                    {   1,  44,  85 },
+                    {   1,  22,  47 }
+                }, { /* Coeff Band 4 */
+                    { 127, 145, 243 },
+                    {  71, 129, 228 },
+                    {  17,  93, 177 },
+                    {   3,  61, 124 },
+                    {   1,  41,  84 },
+                    {   1,  21,  52 }
+                }, { /* Coeff Band 5 */
+                    { 157,  78, 244 },
+                    { 140,  72, 231 },
+                    {  69,  58, 184 },
+                    {  31,  44, 137 },
+                    {  14,  38, 105 },
+                    {   8,  23,  61 }
+                }
+            }
+        }
+    }, { /* tx = 8x8 */
+        { /* block Type 0 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 125,  34, 187 },
+                    {  52,  41, 133 },
+                    {   6,  31,  56 }
+                }, { /* Coeff Band 1 */
+                    {  37, 109, 153 },
+                    {  51, 102, 147 },
+                    {  23,  87, 128 },
+                    {   8,  67, 101 },
+                    {   1,  41,  63 },
+                    {   1,  19,  29 }
+                }, { /* Coeff Band 2 */
+                    {  31, 154, 185 },
+                    {  17, 127, 175 },
+                    {   6,  96, 145 },
+                    {   2,  73, 114 },
+                    {   1,  51,  82 },
+                    {   1,  28,  45 }
+                }, { /* Coeff Band 3 */
+                    {  23, 163, 200 },
+                    {  10, 131, 185 },
+                    {   2,  93, 148 },
+                    {   1,  67, 111 },
+                    {   1,  41,  69 },
+                    {   1,  14,  24 }
+                }, { /* Coeff Band 4 */
+                    {  29, 176, 217 },
+                    {  12, 145, 201 },
+                    {   3, 101, 156 },
+                    {   1,  69, 111 },
+                    {   1,  39,  63 },
+                    {   1,  14,  23 }
+                }, { /* Coeff Band 5 */
+                    {  57, 192, 233 },
+                    {  25, 154, 215 },
+                    {   6, 109, 167 },
+                    {   3,  78, 118 },
+                    {   1,  48,  69 },
+                    {   1,  21,  29 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 202, 105, 245 },
+                    { 108, 106, 216 },
+                    {  18,  90, 144 }
+                }, { /* Coeff Band 1 */
+                    {  33, 172, 219 },
+                    {  64, 149, 206 },
+                    {  14, 117, 177 },
+                    {   5,  90, 141 },
+                    {   2,  61,  95 },
+                    {   1,  37,  57 }
+                }, { /* Coeff Band 2 */
+                    {  33, 179, 220 },
+                    {  11, 140, 198 },
+                    {   1,  89, 148 },
+                    {   1,  60, 104 },
+                    {   1,  33,  57 },
+                    {   1,  12,  21 }
+                }, { /* Coeff Band 3 */
+                    {  30, 181, 221 },
+                    {   8, 141, 198 },
+                    {   1,  87, 145 },
+                    {   1,  58, 100 },
+                    {   1,  31,  55 },
+                    {   1,  12,  20 }
+                }, { /* Coeff Band 4 */
+                    {  32, 186, 224 },
+                    {   7, 142, 198 },
+                    {   1,  86, 143 },
+                    {   1,  58, 100 },
+                    {   1,  31,  55 },
+                    {   1,  12,  22 }
+                }, { /* Coeff Band 5 */
+                    {  57, 192, 227 },
+                    {  20, 143, 204 },
+                    {   3,  96, 154 },
+                    {   1,  68, 112 },
+                    {   1,  42,  69 },
+                    {   1,  19,  32 }
+                }
+            }
+        }, { /* block Type 1 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 212,  35, 215 },
+                    { 113,  47, 169 },
+                    {  29,  48, 105 }
+                }, { /* Coeff Band 1 */
+                    {  74, 129, 203 },
+                    { 106, 120, 203 },
+                    {  49, 107, 178 },
+                    {  19,  84, 144 },
+                    {   4,  50,  84 },
+                    {   1,  15,  25 }
+                }, { /* Coeff Band 2 */
+                    {  71, 172, 217 },
+                    {  44, 141, 209 },
+                    {  15, 102, 173 },
+                    {   6,  76, 133 },
+                    {   2,  51,  89 },
+                    {   1,  24,  42 }
+                }, { /* Coeff Band 3 */
+                    {  64, 185, 231 },
+                    {  31, 148, 216 },
+                    {   8, 103, 175 },
+                    {   3,  74, 131 },
+                    {   1,  46,  81 },
+                    {   1,  18,  30 }
+                }, { /* Coeff Band 4 */
+                    {  65, 196, 235 },
+                    {  25, 157, 221 },
+                    {   5, 105, 174 },
+                    {   1,  67, 120 },
+                    {   1,  38,  69 },
+                    {   1,  15,  30 }
+                }, { /* Coeff Band 5 */
+                    {  65, 204, 238 },
+                    {  30, 156, 224 },
+                    {   7, 107, 177 },
+                    {   2,  70, 124 },
+                    {   1,  42,  73 },
+                    {   1,  18,  34 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 225,  86, 251 },
+                    { 144, 104, 235 },
+                    {  42,  99, 181 }
+                }, { /* Coeff Band 1 */
+                    {  85, 175, 239 },
+                    { 112, 165, 229 },
+                    {  29, 136, 200 },
+                    {  12, 103, 162 },
+                    {   6,  77, 123 },
+                    {   2,  53,  84 }
+                }, { /* Coeff Band 2 */
+                    {  75, 183, 239 },
+                    {  30, 155, 221 },
+                    {   3, 106, 171 },
+                    {   1,  74, 128 },
+                    {   1,  44,  76 },
+                    {   1,  17,  28 }
+                }, { /* Coeff Band 3 */
+                    {  73, 185, 240 },
+                    {  27, 159, 222 },
+                    {   2, 107, 172 },
+                    {   1,  75, 127 },
+                    {   1,  42,  73 },
+                    {   1,  17,  29 }
+                }, { /* Coeff Band 4 */
+                    {  62, 190, 238 },
+                    {  21, 159, 222 },
+                    {   2, 107, 172 },
+                    {   1,  72, 122 },
+                    {   1,  40,  71 },
+                    {   1,  18,  32 }
+                }, { /* Coeff Band 5 */
+                    {  61, 199, 240 },
+                    {  27, 161, 226 },
+                    {   4, 113, 180 },
+                    {   1,  76, 129 },
+                    {   1,  46,  80 },
+                    {   1,  23,  41 }
+                }
+            }
+        }
+    }, { /* tx = 16x16 */
+        { /* block Type 0 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    {   7,  27, 153 },
+                    {   5,  30,  95 },
+                    {   1,  16,  30 }
+                }, { /* Coeff Band 1 */
+                    {  50,  75, 127 },
+                    {  57,  75, 124 },
+                    {  27,  67, 108 },
+                    {  10,  54,  86 },
+                    {   1,  33,  52 },
+                    {   1,  12,  18 }
+                }, { /* Coeff Band 2 */
+                    {  43, 125, 151 },
+                    {  26, 108, 148 },
+                    {   7,  83, 122 },
+                    {   2,  59,  89 },
+                    {   1,  38,  60 },
+                    {   1,  17,  27 }
+                }, { /* Coeff Band 3 */
+                    {  23, 144, 163 },
+                    {  13, 112, 154 },
+                    {   2,  75, 117 },
+                    {   1,  50,  81 },
+                    {   1,  31,  51 },
+                    {   1,  14,  23 }
+                }, { /* Coeff Band 4 */
+                    {  18, 162, 185 },
+                    {   6, 123, 171 },
+                    {   1,  78, 125 },
+                    {   1,  51,  86 },
+                    {   1,  31,  54 },
+                    {   1,  14,  23 }
+                }, { /* Coeff Band 5 */
+                    {  15, 199, 227 },
+                    {   3, 150, 204 },
+                    {   1,  91, 146 },
+                    {   1,  55,  95 },
+                    {   1,  30,  53 },
+                    {   1,  11,  20 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    {  19,  55, 240 },
+                    {  19,  59, 196 },
+                    {   3,  52, 105 }
+                }, { /* Coeff Band 1 */
+                    {  41, 166, 207 },
+                    { 104, 153, 199 },
+                    {  31, 123, 181 },
+                    {  14, 101, 152 },
+                    {   5,  72, 106 },
+                    {   1,  36,  52 }
+                }, { /* Coeff Band 2 */
+                    {  35, 176, 211 },
+                    {  12, 131, 190 },
+                    {   2,  88, 144 },
+                    {   1,  60, 101 },
+                    {   1,  36,  60 },
+                    {   1,  16,  28 }
+                }, { /* Coeff Band 3 */
+                    {  28, 183, 213 },
+                    {   8, 134, 191 },
+                    {   1,  86, 142 },
+                    {   1,  56,  96 },
+                    {   1,  30,  53 },
+                    {   1,  12,  20 }
+                }, { /* Coeff Band 4 */
+                    {  20, 190, 215 },
+                    {   4, 135, 192 },
+                    {   1,  84, 139 },
+                    {   1,  53,  91 },
+                    {   1,  28,  49 },
+                    {   1,  11,  20 }
+                }, { /* Coeff Band 5 */
+                    {  13, 196, 216 },
+                    {   2, 137, 192 },
+                    {   1,  86, 143 },
+                    {   1,  57,  99 },
+                    {   1,  32,  56 },
+                    {   1,  13,  24 }
+                }
+            }
+        }, { /* block Type 1 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 211,  29, 217 },
+                    {  96,  47, 156 },
+                    {  22,  43,  87 }
+                }, { /* Coeff Band 1 */
+                    {  78, 120, 193 },
+                    { 111, 116, 186 },
+                    {  46, 102, 164 },
+                    {  15,  80, 128 },
+                    {   2,  49,  76 },
+                    {   1,  18,  28 }
+                }, { /* Coeff Band 2 */
+                    {  71, 161, 203 },
+                    {  42, 132, 192 },
+                    {  10,  98, 150 },
+                    {   3,  69, 109 },
+                    {   1,  44,  70 },
+                    {   1,  18,  29 }
+                }, { /* Coeff Band 3 */
+                    {  57, 186, 211 },
+                    {  30, 140, 196 },
+                    {   4,  93, 146 },
+                    {   1,  62, 102 },
+                    {   1,  38,  65 },
+                    {   1,  16,  27 }
+                }, { /* Coeff Band 4 */
+                    {  47, 199, 217 },
+                    {  14, 145, 196 },
+                    {   1,  88, 142 },
+                    {   1,  57,  98 },
+                    {   1,  36,  62 },
+                    {   1,  15,  26 }
+                }, { /* Coeff Band 5 */
+                    {  26, 219, 229 },
+                    {   5, 155, 207 },
+                    {   1,  94, 151 },
+                    {   1,  60, 104 },
+                    {   1,  36,  62 },
+                    {   1,  16,  28 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 233,  29, 248 },
+                    { 146,  47, 220 },
+                    {  43,  52, 140 }
+                }, { /* Coeff Band 1 */
+                    { 100, 163, 232 },
+                    { 179, 161, 222 },
+                    {  63, 142, 204 },
+                    {  37, 113, 174 },
+                    {  26,  89, 137 },
+                    {  18,  68,  97 }
+                }, { /* Coeff Band 2 */
+                    {  85, 181, 230 },
+                    {  32, 146, 209 },
+                    {   7, 100, 164 },
+                    {   3,  71, 121 },
+                    {   1,  45,  77 },
+                    {   1,  18,  30 }
+                }, { /* Coeff Band 3 */
+                    {  65, 187, 230 },
+                    {  20, 148, 207 },
+                    {   2,  97, 159 },
+                    {   1,  68, 116 },
+                    {   1,  40,  70 },
+                    {   1,  14,  29 }
+                }, { /* Coeff Band 4 */
+                    {  40, 194, 227 },
+                    {   8, 147, 204 },
+                    {   1,  94, 155 },
+                    {   1,  65, 112 },
+                    {   1,  39,  66 },
+                    {   1,  14,  26 }
+                }, { /* Coeff Band 5 */
+                    {  16, 208, 228 },
+                    {   3, 151, 207 },
+                    {   1,  98, 160 },
+                    {   1,  67, 117 },
+                    {   1,  41,  74 },
+                    {   1,  17,  31 }
+                }
+            }
+        }
+    }, { /* tx = 32x32 */
+        { /* block Type 0 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    {  17,  38, 140 },
+                    {   7,  34,  80 },
+                    {   1,  17,  29 }
+                }, { /* Coeff Band 1 */
+                    {  37,  75, 128 },
+                    {  41,  76, 128 },
+                    {  26,  66, 116 },
+                    {  12,  52,  94 },
+                    {   2,  32,  55 },
+                    {   1,  10,  16 }
+                }, { /* Coeff Band 2 */
+                    {  50, 127, 154 },
+                    {  37, 109, 152 },
+                    {  16,  82, 121 },
+                    {   5,  59,  85 },
+                    {   1,  35,  54 },
+                    {   1,  13,  20 }
+                }, { /* Coeff Band 3 */
+                    {  40, 142, 167 },
+                    {  17, 110, 157 },
+                    {   2,  71, 112 },
+                    {   1,  44,  72 },
+                    {   1,  27,  45 },
+                    {   1,  11,  17 }
+                }, { /* Coeff Band 4 */
+                    {  30, 175, 188 },
+                    {   9, 124, 169 },
+                    {   1,  74, 116 },
+                    {   1,  48,  78 },
+                    {   1,  30,  49 },
+                    {   1,  11,  18 }
+                }, { /* Coeff Band 5 */
+                    {  10, 222, 223 },
+                    {   2, 150, 194 },
+                    {   1,  83, 128 },
+                    {   1,  48,  79 },
+                    {   1,  27,  45 },
+                    {   1,  11,  17 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    {  36,  41, 235 },
+                    {  29,  36, 193 },
+                    {  10,  27, 111 }
+                }, { /* Coeff Band 1 */
+                    {  85, 165, 222 },
+                    { 177, 162, 215 },
+                    { 110, 135, 195 },
+                    {  57, 113, 168 },
+                    {  23,  83, 120 },
+                    {  10,  49,  61 }
+                }, { /* Coeff Band 2 */
+                    {  85, 190, 223 },
+                    {  36, 139, 200 },
+                    {   5,  90, 146 },
+                    {   1,  60, 103 },
+                    {   1,  38,  65 },
+                    {   1,  18,  30 }
+                }, { /* Coeff Band 3 */
+                    {  72, 202, 223 },
+                    {  23, 141, 199 },
+                    {   2,  86, 140 },
+                    {   1,  56,  97 },
+                    {   1,  36,  61 },
+                    {   1,  16,  27 }
+                }, { /* Coeff Band 4 */
+                    {  55, 218, 225 },
+                    {  13, 145, 200 },
+                    {   1,  86, 141 },
+                    {   1,  57,  99 },
+                    {   1,  35,  61 },
+                    {   1,  13,  22 }
+                }, { /* Coeff Band 5 */
+                    {  15, 235, 212 },
+                    {   1, 132, 184 },
+                    {   1,  84, 139 },
+                    {   1,  57,  97 },
+                    {   1,  34,  56 },
+                    {   1,  14,  23 }
+                }
+            }
+        }, { /* block Type 1 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 181,  21, 201 },
+                    {  61,  37, 123 },
+                    {  10,  38,  71 }
+                }, { /* Coeff Band 1 */
+                    {  47, 106, 172 },
+                    {  95, 104, 173 },
+                    {  42,  93, 159 },
+                    {  18,  77, 131 },
+                    {   4,  50,  81 },
+                    {   1,  17,  23 }
+                }, { /* Coeff Band 2 */
+                    {  62, 147, 199 },
+                    {  44, 130, 189 },
+                    {  28, 102, 154 },
+                    {  18,  75, 115 },
+                    {   2,  44,  65 },
+                    {   1,  12,  19 }
+                }, { /* Coeff Band 3 */
+                    {  55, 153, 210 },
+                    {  24, 130, 194 },
+                    {   3,  93, 146 },
+                    {   1,  61,  97 },
+                    {   1,  31,  50 },
+                    {   1,  10,  16 }
+                }, { /* Coeff Band 4 */
+                    {  49, 186, 223 },
+                    {  17, 148, 204 },
+                    {   1,  96, 142 },
+                    {   1,  53,  83 },
+                    {   1,  26,  44 },
+                    {   1,  11,  17 }
+                }, { /* Coeff Band 5 */
+                    {  13, 217, 212 },
+                    {   2, 136, 180 },
+                    {   1,  78, 124 },
+                    {   1,  50,  83 },
+                    {   1,  29,  49 },
+                    {   1,  14,  23 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 197,  13, 247 },
+                    {  82,  17, 222 },
+                    {  25,  17, 162 }
+                }, { /* Coeff Band 1 */
+                    { 126, 186, 247 },
+                    { 234, 191, 243 },
+                    { 176, 177, 234 },
+                    { 104, 158, 220 },
+                    {  66, 128, 186 },
+                    {  55,  90, 137 }
+                }, { /* Coeff Band 2 */
+                    { 111, 197, 242 },
+                    {  46, 158, 219 },
+                    {   9, 104, 171 },
+                    {   2,  65, 125 },
+                    {   1,  44,  80 },
+                    {   1,  17,  91 }
+                }, { /* Coeff Band 3 */
+                    { 104, 208, 245 },
+                    {  39, 168, 224 },
+                    {   3, 109, 162 },
+                    {   1,  79, 124 },
+                    {   1,  50, 102 },
+                    {   1,  43, 102 }
+                }, { /* Coeff Band 4 */
+                    {  84, 220, 246 },
+                    {  31, 177, 231 },
+                    {   2, 115, 180 },
+                    {   1,  79, 134 },
+                    {   1,  55,  77 },
+                    {   1,  60,  79 }
+                }, { /* Coeff Band 5 */
+                    {  43, 243, 240 },
+                    {   8, 180, 217 },
+                    {   1, 115, 166 },
+                    {   1,  84, 121 },
+                    {   1,  51,  67 },
+                    {   1,  16,   6 }
+                }
+            }
+        }
+    }
+};
+
+const int8_t ff_vp9_mv_joint_tree[3][2] = {
+    { -MV_JOINT_ZERO, 1 },                  // '0'
+        { -MV_JOINT_H, 2 },                 // '10'
+            { -MV_JOINT_V, -MV_JOINT_HV },  // '11x'
+};
+
+const int8_t ff_vp9_mv_class_tree[10][2] = {
+    { -0,   1 },                          // '0'
+        { -1,   2 },                      // '10'
+            {  3,   4 },
+                { -2,  -3 },              // '110x'
+                {  5,   6 },
+                    { -4,  -5 },          // '1110x'
+                    { -6,   7 },          // '11110'
+                        {  8,   9 },
+                            { -7,  -8 },  // '111110x'
+                            { -9, -10 },  // '111111x'
+};
+
+const int8_t ff_vp9_mv_fp_tree[3][2] = {
+    { -0,  1 },          // '0'
+        { -1,  2 },      // '10'
+            { -2, -3 },  // '11x'
+};
diff --git a/media/ffvpx/libavcodec/vp9data.h b/media/ffvpx/libavcodec/vp9data.h
new file mode 100644
index 0000000000..086dbdec06
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9data.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP9DATA_H
+#define AVCODEC_VP9DATA_H
+
+#include <stdint.h>
+
+#include "vp9dec.h"
+
+extern const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2];
+extern const int8_t ff_vp9_partition_tree[3][2];
+extern const uint8_t ff_vp9_default_kf_partition_probs[4][4][3];
+extern const int8_t ff_vp9_segmentation_tree[7][2];
+extern const int8_t ff_vp9_intramode_tree[9][2];
+extern const uint8_t ff_vp9_default_kf_ymode_probs[10][10][9];
+extern const uint8_t ff_vp9_default_kf_uvmode_probs[10][9];
+extern const int8_t ff_vp9_inter_mode_tree[3][2];
+extern const int8_t ff_vp9_filter_tree[2][2];
+extern const enum FilterMode ff_vp9_filter_lut[3];
+extern const int16_t ff_vp9_dc_qlookup[3][256];
+extern const int16_t ff_vp9_ac_qlookup[3][256];
+extern const enum TxfmType ff_vp9_intra_txfm_type[14];
+extern const int16_t ff_vp9_default_scan_4x4[16];
+extern const int16_t ff_vp9_col_scan_4x4[16];
+extern const int16_t ff_vp9_row_scan_4x4[16];
+extern const int16_t ff_vp9_default_scan_8x8[64];
+extern const int16_t ff_vp9_col_scan_8x8[64];
+extern const int16_t ff_vp9_row_scan_8x8[64];
+extern const int16_t ff_vp9_default_scan_16x16[256];
+extern const int16_t ff_vp9_col_scan_16x16[256];
+extern const int16_t ff_vp9_row_scan_16x16[256];
+extern const int16_t ff_vp9_default_scan_32x32[1024];
+extern const int16_t * const ff_vp9_scans[5][4];
+extern const int16_t ff_vp9_default_scan_4x4_nb[16][2];
+extern const int16_t ff_vp9_col_scan_4x4_nb[16][2];
+extern const int16_t ff_vp9_row_scan_4x4_nb[16][2];
+extern const int16_t ff_vp9_default_scan_8x8_nb[64][2];
+extern const int16_t ff_vp9_col_scan_8x8_nb[64][2];
+extern const int16_t ff_vp9_row_scan_8x8_nb[64][2];
+extern const int16_t ff_vp9_default_scan_16x16_nb[256][2];
+extern const int16_t ff_vp9_col_scan_16x16_nb[256][2];
+extern const int16_t ff_vp9_row_scan_16x16_nb[256][2];
+extern const int16_t ff_vp9_default_scan_32x32_nb[1024][2];
+extern const int16_t (* const ff_vp9_scans_nb[5][4])[2];
+extern const uint8_t ff_vp9_model_pareto8[256][8];
+extern const ProbContext ff_vp9_default_probs;
+extern const uint8_t ff_vp9_default_coef_probs[4][2][2][6][6][3];
+extern const int8_t ff_vp9_mv_joint_tree[3][2];
+extern const int8_t ff_vp9_mv_class_tree[10][2];
+extern const int8_t ff_vp9_mv_fp_tree[3][2];
+
+#endif /* AVCODEC_VP9DATA_H */
diff --git a/media/ffvpx/libavcodec/vp9dec.h b/media/ffvpx/libavcodec/vp9dec.h
new file mode 100644
index 0000000000..de7aba0458
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9dec.h
@@ -0,0 +1,259 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP9DEC_H
+#define AVCODEC_VP9DEC_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdatomic.h>
+
+#include "libavutil/buffer.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/thread.h"
+#include "libavutil/internal.h"
+
+#include "get_bits.h"
+#include "videodsp.h"
+#include "vp9.h"
+#include "vp9dsp.h"
+#include "vp9shared.h"
+#include "vpx_rac.h"
+
+#define REF_INVALID_SCALE 0xFFFF
+
+enum MVJoint {
+    MV_JOINT_ZERO,
+    MV_JOINT_H,
+    MV_JOINT_V,
+    MV_JOINT_HV,
+};
+
+typedef struct ProbContext {
+    uint8_t y_mode[4][9];
+    uint8_t uv_mode[10][9];
+    uint8_t filter[4][2];
+    uint8_t mv_mode[7][3];
+    uint8_t intra[4];
+    uint8_t comp[5];
+    uint8_t single_ref[5][2];
+    uint8_t comp_ref[5];
+    uint8_t tx32p[2][3];
+    uint8_t tx16p[2][2];
+    uint8_t tx8p[2];
+    uint8_t skip[3];
+    uint8_t mv_joint[3];
+    struct {
+        uint8_t sign;
+        uint8_t classes[10];
+        uint8_t class0;
+        uint8_t bits[10];
+        uint8_t class0_fp[2][3];
+        uint8_t fp[3];
+        uint8_t class0_hp;
+        uint8_t hp;
+    } mv_comp[2];
+    uint8_t partition[4][4][3];
+} ProbContext;
+
+typedef struct VP9Filter {
+    uint8_t level[8 * 8];
+    uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
+                              [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
+} VP9Filter;
+
+typedef struct VP9Block {
+    uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
+    enum FilterMode filter;
+    VP9mv mv[4 /* b_idx */][2 /* ref */];
+    enum BlockSize bs;
+    enum TxfmMode tx, uvtx;
+    enum BlockLevel bl;
+    enum BlockPartition bp;
+} VP9Block;
+
+typedef struct VP9TileData VP9TileData;
+
+typedef struct VP9Context {
+    VP9SharedContext s;
+    VP9TileData *td;
+
+    VP9DSPContext dsp;
+    VideoDSPContext vdsp;
+    GetBitContext gb;
+    VPXRangeCoder c;
+    int pass, active_tile_cols;
+
+#if HAVE_THREADS
+    pthread_mutex_t progress_mutex;
+    pthread_cond_t progress_cond;
+    atomic_int *entries;
+    unsigned pthread_init_cnt;
+#endif
+
+    uint8_t ss_h, ss_v;
+    uint8_t last_bpp, bpp_index, bytesperpixel;
+    uint8_t last_keyframe;
+    // sb_cols/rows, rows/cols and last_fmt are used for allocating all internal
+    // arrays, and are thus per-thread. w/h and gf_fmt are synced between threads
+    // and are therefore per-stream. pix_fmt represents the value in the header
+    // of the currently processed frame.
+    int w, h;
+    enum AVPixelFormat pix_fmt, last_fmt, gf_fmt;
+    unsigned sb_cols, sb_rows, rows, cols;
+    ThreadFrame next_refs[8];
+
+    struct {
+        uint8_t lim_lut[64];
+        uint8_t mblim_lut[64];
+    } filter_lut;
+    struct {
+        ProbContext p;
+        uint8_t coef[4][2][2][6][6][3];
+    } prob_ctx[4];
+    struct {
+        ProbContext p;
+        uint8_t coef[4][2][2][6][6][11];
+    } prob;
+
+    // contextual (above) cache
+    uint8_t *above_partition_ctx;
+    uint8_t *above_mode_ctx;
+    // FIXME maybe merge some of the below in a flags field?
+    uint8_t *above_y_nnz_ctx;
+    uint8_t *above_uv_nnz_ctx[2];
+    uint8_t *above_skip_ctx; // 1bit
+    uint8_t *above_txfm_ctx; // 2bit
+    uint8_t *above_segpred_ctx; // 1bit
+    uint8_t *above_intra_ctx; // 1bit
+    uint8_t *above_comp_ctx; // 1bit
+    uint8_t *above_ref_ctx; // 2bit
+    uint8_t *above_filter_ctx;
+    VP9mv (*above_mv_ctx)[2];
+
+    // whole-frame cache
+    uint8_t *intra_pred_data[3];
+    VP9Filter *lflvl;
+
+    // block reconstruction intermediates
+    int block_alloc_using_2pass;
+    uint16_t mvscale[3][2];
+    uint8_t mvstep[3][2];
+
+    // frame specific buffer pools
+    AVBufferPool *frame_extradata_pool;
+    int frame_extradata_pool_size;
+} VP9Context;
+
+struct VP9TileData {
+    const VP9Context *s;
+    VPXRangeCoder *c_b;
+    VPXRangeCoder *c;
+    int row, row7, col, col7;
+    uint8_t *dst[3];
+    ptrdiff_t y_stride, uv_stride;
+    VP9Block *b_base, *b;
+    unsigned tile_col_start;
+
+    struct {
+        unsigned y_mode[4][10];
+        unsigned uv_mode[10][10];
+        unsigned filter[4][3];
+        unsigned mv_mode[7][4];
+        unsigned intra[4][2];
+        unsigned comp[5][2];
+        unsigned single_ref[5][2][2];
+        unsigned comp_ref[5][2];
+        unsigned tx32p[2][4];
+        unsigned tx16p[2][3];
+        unsigned tx8p[2][2];
+        unsigned skip[3][2];
+        unsigned mv_joint[4];
+        struct {
+            unsigned sign[2];
+            unsigned classes[11];
+            unsigned class0[2];
+            unsigned bits[10][2];
+            unsigned class0_fp[2][4];
+            unsigned fp[4];
+            unsigned class0_hp[2];
+            unsigned hp[2];
+        } mv_comp[2];
+        unsigned partition[4][4][4];
+        unsigned coef[4][2][2][6][6][3];
+        unsigned eob[4][2][2][6][6][2];
+    } counts;
+
+    // whole-frame cache
+    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
+
+    // contextual (left) cache
+    DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
+    DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
+    DECLARE_ALIGNED(16, VP9mv, left_mv_ctx)[16][2];
+    DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
+    DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
+    // block reconstruction intermediates
+    DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
+    DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
+    struct { int x, y; } min_mv, max_mv;
+    int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
+    uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
+
+    // error message
+    int error_info;
+    struct {
+        unsigned int row:13;
+        unsigned int col:13;
+        unsigned int block_size_idx_x:2;
+        unsigned int block_size_idx_y:2;
+    } *block_structure;
+    unsigned int nb_block_structure;
+};
+
+void ff_vp9_fill_mv(VP9TileData *td, VP9mv *mv, int mode, int sb);
+
+void ff_vp9_adapt_probs(VP9Context *s);
+
+void ff_vp9_decode_block(VP9TileData *td, int row, int col,
+                         VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
+                         enum BlockLevel bl, enum BlockPartition bp);
+
+void ff_vp9_loopfilter_sb(AVCodecContext *avctx, VP9Filter *lflvl,
+                          int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff);
+
+void ff_vp9_intra_recon_8bpp(VP9TileData *td,
+                             ptrdiff_t y_off, ptrdiff_t uv_off);
+void ff_vp9_intra_recon_16bpp(VP9TileData *td,
+                              ptrdiff_t y_off, ptrdiff_t uv_off);
+void ff_vp9_inter_recon_8bpp(VP9TileData *td);
+void ff_vp9_inter_recon_16bpp(VP9TileData *td);
+
+#endif /* AVCODEC_VP9DEC_H */
diff --git a/media/ffvpx/libavcodec/vp9dsp.c b/media/ffvpx/libavcodec/vp9dsp.c
new file mode 100644
index 0000000000..d8ddf74d4f
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9dsp.c
@@ -0,0 +1,110 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/mem_internal.h"
+
+#include "vp9dsp.h"
+
+const DECLARE_ALIGNED(16, int16_t, ff_vp9_subpel_filters)[3][16][8] = {
+    [FILTER_8TAP_REGULAR] = {
+        {  0,  0,   0, 128,   0,   0,  0,  0 },
+        {  0,  1,  -5, 126,   8,  -3,  1,  0 },
+        { -1,  3, -10, 122,  18,  -6,  2,  0 },
+        { -1,  4, -13, 118,  27,  -9,  3, -1 },
+        { -1,  4, -16, 112,  37, -11,  4, -1 },
+        { -1,  5, -18, 105,  48, -14,  4, -1 },
+        { -1,  5, -19,  97,  58, -16,  5, -1 },
+        { -1,  6, -19,  88,  68, -18,  5, -1 },
+        { -1,  6, -19,  78,  78, -19,  6, -1 },
+        { -1,  5, -18,  68,  88, -19,  6, -1 },
+        { -1,  5, -16,  58,  97, -19,  5, -1 },
+        { -1,  4, -14,  48, 105, -18,  5, -1 },
+        { -1,  4, -11,  37, 112, -16,  4, -1 },
+        { -1,  3,  -9,  27, 118, -13,  4, -1 },
+        {  0,  2,  -6,  18, 122, -10,  3, -1 },
+        {  0,  1,  -3,   8, 126,  -5,  1,  0 },
+    }, [FILTER_8TAP_SHARP] = {
+        {  0,  0,   0, 128,   0,   0,  0,  0 },
+        { -1,  3,  -7, 127,   8,  -3,  1,  0 },
+        { -2,  5, -13, 125,  17,  -6,  3, -1 },
+        { -3,  7, -17, 121,  27, -10,  5, -2 },
+        { -4,  9, -20, 115,  37, -13,  6, -2 },
+        { -4, 10, -23, 108,  48, -16,  8, -3 },
+        { -4, 10, -24, 100,  59, -19,  9, -3 },
+        { -4, 11, -24,  90,  70, -21, 10, -4 },
+        { -4, 11, -23,  80,  80, -23, 11, -4 },
+        { -4, 10, -21,  70,  90, -24, 11, -4 },
+        { -3,  9, -19,  59, 100, -24, 10, -4 },
+        { -3,  8, -16,  48, 108, -23, 10, -4 },
+        { -2,  6, -13,  37, 115, -20,  9, -4 },
+        { -2,  5, -10,  27, 121, -17,  7, -3 },
+        { -1,  3,  -6,  17, 125, -13,  5, -2 },
+        {  0,  1,  -3,   8, 127,  -7,  3, -1 },
+    }, [FILTER_8TAP_SMOOTH] = {
+        {  0,  0,   0, 128,   0,   0,  0,  0 },
+        { -3, -1,  32,  64,  38,   1, -3,  0 },
+        { -2, -2,  29,  63,  41,   2, -3,  0 },
+        { -2, -2,  26,  63,  43,   4, -4,  0 },
+        { -2, -3,  24,  62,  46,   5, -4,  0 },
+        { -2, -3,  21,  60,  49,   7, -4,  0 },
+        { -1, -4,  18,  59,  51,   9, -4,  0 },
+        { -1, -4,  16,  57,  53,  12, -4, -1 },
+        { -1, -4,  14,  55,  55,  14, -4, -1 },
+        { -1, -4,  12,  53,  57,  16, -4, -1 },
+        {  0, -4,   9,  51,  59,  18, -4, -1 },
+        {  0, -4,   7,  49,  60,  21, -3, -2 },
+        {  0, -4,   5,  46,  62,  24, -3, -2 },
+        {  0, -4,   4,  43,  63,  26, -2, -2 },
+        {  0, -3,   2,  41,  63,  29, -2, -2 },
+        {  0, -3,   1,  38,  64,  32, -1, -3 },
+    }
+};
+
+
+av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
+{
+    if (bpp == 8) {
+        ff_vp9dsp_init_8(dsp);
+    } else if (bpp == 10) {
+        ff_vp9dsp_init_10(dsp);
+    } else {
+        av_assert0(bpp == 12);
+        ff_vp9dsp_init_12(dsp);
+    }
+
+#if ARCH_AARCH64
+    ff_vp9dsp_init_aarch64(dsp, bpp);
+#elif ARCH_ARM
+    ff_vp9dsp_init_arm(dsp, bpp);
+#elif ARCH_X86
+    ff_vp9dsp_init_x86(dsp, bpp, bitexact);
+#elif ARCH_MIPS
+    ff_vp9dsp_init_mips(dsp, bpp);
+#elif ARCH_LOONGARCH
+    ff_vp9dsp_init_loongarch(dsp, bpp);
+#endif
+}
diff --git a/media/ffvpx/libavcodec/vp9dsp.h b/media/ffvpx/libavcodec/vp9dsp.h
new file mode 100644
index 0000000000..be0ac0b181
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9dsp.h
@@ -0,0 +1,138 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP9DSP_H
+#define AVCODEC_VP9DSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavcodec/vp9.h"
+#include "libavutil/attributes_internal.h"
+
+typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
+                            const uint8_t *ref, ptrdiff_t ref_stride,
+                            int h, int mx, int my);
+typedef void (*vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
+                                   const uint8_t *ref, ptrdiff_t ref_stride,
+                                   int h, int mx, int my, int dx, int dy);
+
+typedef struct VP9DSPContext {
+    /*
+     * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32
+     * dimension 2: intra prediction modes
+     *
+     * dst/left/top is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels)
+     * stride is aligned by 16 pixels
+     * top[-1] is top/left; top[4,7] is top-right for 4x4
+     */
+    // FIXME(rbultje) maybe replace left/top pointers with HAVE_TOP/
+    // HAVE_LEFT/HAVE_TOPRIGHT flags instead, and then handle it in-place?
+    // also needs to fit in with what H.264/VP8/etc do
+    void (*intra_pred[N_TXFM_SIZES][N_INTRA_PRED_MODES])(uint8_t *dst,
+                                                         ptrdiff_t stride,
+                                                         const uint8_t *left,
+                                                         const uint8_t *top);
+
+    /*
+     * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32, 4=lossless (3-4=dct only)
+     * dimension 2: 0=dct/dct, 1=dct/adst, 2=adst/dct, 3=adst/adst
+     *
+     * dst is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels)
+     * stride is aligned by 16 pixels
+     * block is 16-byte aligned
+     * eob indicates the position (+1) of the last non-zero coefficient,
+     * in scan-order. This can be used to write faster versions, e.g. a
+     * dc-only 4x4/8x8/16x16/32x32, or a 4x4-only (eob<10) 8x8/16x16/32x32,
+     * etc.
+     */
+    // FIXME also write idct_add_block() versions for whole (inter) pred
+    // blocks, so we can do 2 4x4s at once
+    void (*itxfm_add[N_TXFM_SIZES + 1][N_TXFM_TYPES])(uint8_t *dst,
+                                                      ptrdiff_t stride,
+                                                      int16_t *block, int eob);
+
+    /*
+     * dimension 1: width of filter (0=4, 1=8, 2=16)
+     * dimension 2: 0=col-edge filter (h), 1=row-edge filter (v)
+     *
+     * dst/stride are aligned by 8
+     */
+    void (*loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride,
+                                int mb_lim, int lim, int hev_thr);
+
+    /*
+     * dimension 1: 0=col-edge filter (h), 1=row-edge filter (v)
+     *
+     * The width of filter is assumed to be 16; dst/stride are aligned by 16
+     */
+    void (*loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride,
+                              int mb_lim, int lim, int hev_thr);
+
+    /*
+     * dimension 1/2: width of filter (0=4, 1=8) for each filter half
+     * dimension 3: 0=col-edge filter (h), 1=row-edge filter (v)
+     *
+     * dst/stride are aligned by operation size
+     * this basically calls loop_filter[d1][d3][0](), followed by
+     * loop_filter[d2][d3][0]() on the next 8 pixels
+     * mb_lim/lim/hev_thr contain two values in the lowest two bytes of the
+     * integer.
+     */
+    // FIXME perhaps a mix4 that operates on 32px (for AVX2)
+    void (*loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride,
+                                      int mb_lim, int lim, int hev_thr);
+
+    /*
+     * dimension 1: hsize (0: 64, 1: 32, 2: 16, 3: 8, 4: 4)
+     * dimension 2: filter type (0: smooth, 1: regular, 2: sharp, 3: bilin)
+     * dimension 3: averaging type (0: put, 1: avg)
+     * dimension 4: x subpel interpolation (0: none, 1: 8tap/bilin)
+     * dimension 5: y subpel interpolation (0: none, 1: 8tap/bilin)
+     *
+     * dst/stride are aligned by hsize
+     */
+    vp9_mc_func mc[5][N_FILTERS][2][2][2];
+
+    /*
+     * for scalable MC, first 3 dimensions identical to above, the other two
+     * don't exist since it changes per stepsize.
+     */
+    vp9_scaled_mc_func smc[5][N_FILTERS][2];
+} VP9DSPContext;
+
+extern const int16_t attribute_visibility_hidden ff_vp9_subpel_filters[3][16][8];
+
+void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact);
+
+void ff_vp9dsp_init_8(VP9DSPContext *dsp);
+void ff_vp9dsp_init_10(VP9DSPContext *dsp);
+void ff_vp9dsp_init_12(VP9DSPContext *dsp);
+
+void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp);
+void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp);
+void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact);
+void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp);
+void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp);
+
+#endif /* AVCODEC_VP9DSP_H */
diff --git a/media/ffvpx/libavcodec/vp9dsp_10bpp.c b/media/ffvpx/libavcodec/vp9dsp_10bpp.c
new file mode 100644
index 0000000000..62ce182070
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9dsp_10bpp.c
@@ -0,0 +1,26 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BIT_DEPTH 10
+#define dctint int64_t
+#include "vp9dsp_template.c"
diff --git a/media/ffvpx/libavcodec/vp9dsp_12bpp.c b/media/ffvpx/libavcodec/vp9dsp_12bpp.c
new file mode 100644
index 0000000000..2f36471c5b
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9dsp_12bpp.c
@@ -0,0 +1,26 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BIT_DEPTH 12
+#define dctint int64_t
+#include "vp9dsp_template.c"
diff --git a/media/ffvpx/libavcodec/vp9dsp_8bpp.c b/media/ffvpx/libavcodec/vp9dsp_8bpp.c
new file mode 100644
index 0000000000..4b219b06b0
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9dsp_8bpp.c
@@ -0,0 +1,26 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BIT_DEPTH 8
+#define dctint int
+#include "vp9dsp_template.c"
diff --git a/media/ffvpx/libavcodec/vp9dsp_template.c b/media/ffvpx/libavcodec/vp9dsp_template.c
new file mode 100644
index 0000000000..9b11661704
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9dsp_template.c
@@ -0,0 +1,2546 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "bit_depth_template.c"
+#include "vp9dsp.h"
+
+#if BIT_DEPTH != 12
+
+// FIXME see whether we can merge parts of this (perhaps at least 4x4 and 8x8)
+// back with h264pred.[ch]
+
+static void vert_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                       const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 p4 = AV_RN4PA(top);
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, p4);
+    AV_WN4PA(dst + stride * 1, p4);
+    AV_WN4PA(dst + stride * 2, p4);
+    AV_WN4PA(dst + stride * 3, p4);
+}
+
+static void vert_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                       const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 p4a = AV_RN4PA(top + 0);
+    pixel4 p4b = AV_RN4PA(top + 4);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, p4a);
+        AV_WN4PA(dst + 4, p4b);
+        dst += stride;
+    }
+}
+
+static void vert_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 p4a = AV_RN4PA(top +  0);
+    pixel4 p4b = AV_RN4PA(top +  4);
+    pixel4 p4c = AV_RN4PA(top +  8);
+    pixel4 p4d = AV_RN4PA(top + 12);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, p4a);
+        AV_WN4PA(dst +  4, p4b);
+        AV_WN4PA(dst +  8, p4c);
+        AV_WN4PA(dst + 12, p4d);
+        dst += stride;
+    }
+}
+
+static void vert_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 p4a = AV_RN4PA(top +  0);
+    pixel4 p4b = AV_RN4PA(top +  4);
+    pixel4 p4c = AV_RN4PA(top +  8);
+    pixel4 p4d = AV_RN4PA(top + 12);
+    pixel4 p4e = AV_RN4PA(top + 16);
+    pixel4 p4f = AV_RN4PA(top + 20);
+    pixel4 p4g = AV_RN4PA(top + 24);
+    pixel4 p4h = AV_RN4PA(top + 28);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, p4a);
+        AV_WN4PA(dst +  4, p4b);
+        AV_WN4PA(dst +  8, p4c);
+        AV_WN4PA(dst + 12, p4d);
+        AV_WN4PA(dst + 16, p4e);
+        AV_WN4PA(dst + 20, p4f);
+        AV_WN4PA(dst + 24, p4g);
+        AV_WN4PA(dst + 28, p4h);
+        dst += stride;
+    }
+}
+
+static void hor_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                      const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, PIXEL_SPLAT_X4(left[3]));
+    AV_WN4PA(dst + stride * 1, PIXEL_SPLAT_X4(left[2]));
+    AV_WN4PA(dst + stride * 2, PIXEL_SPLAT_X4(left[1]));
+    AV_WN4PA(dst + stride * 3, PIXEL_SPLAT_X4(left[0]));
+}
+
+static void hor_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                      const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        pixel4 p4 = PIXEL_SPLAT_X4(left[7 - y]);
+
+        AV_WN4PA(dst + 0, p4);
+        AV_WN4PA(dst + 4, p4);
+        dst += stride;
+    }
+}
+
+static void hor_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                        const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        pixel4 p4 = PIXEL_SPLAT_X4(left[15 - y]);
+
+        AV_WN4PA(dst +  0, p4);
+        AV_WN4PA(dst +  4, p4);
+        AV_WN4PA(dst +  8, p4);
+        AV_WN4PA(dst + 12, p4);
+        dst += stride;
+    }
+}
+
+static void hor_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                        const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        pixel4 p4 = PIXEL_SPLAT_X4(left[31 - y]);
+
+        AV_WN4PA(dst +  0, p4);
+        AV_WN4PA(dst +  4, p4);
+        AV_WN4PA(dst +  8, p4);
+        AV_WN4PA(dst + 12, p4);
+        AV_WN4PA(dst + 16, p4);
+        AV_WN4PA(dst + 20, p4);
+        AV_WN4PA(dst + 24, p4);
+        AV_WN4PA(dst + 28, p4);
+        dst += stride;
+    }
+}
+
+#endif /* BIT_DEPTH != 12 */
+
+static void tm_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                     const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    int y, tl = top[-1];
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 4; y++) {
+        int l_m_tl = left[3 - y] - tl;
+
+        dst[0] = av_clip_pixel(top[0] + l_m_tl);
+        dst[1] = av_clip_pixel(top[1] + l_m_tl);
+        dst[2] = av_clip_pixel(top[2] + l_m_tl);
+        dst[3] = av_clip_pixel(top[3] + l_m_tl);
+        dst += stride;
+    }
+}
+
+static void tm_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                     const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    int y, tl = top[-1];
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        int l_m_tl = left[7 - y] - tl;
+
+        dst[0] = av_clip_pixel(top[0] + l_m_tl);
+        dst[1] = av_clip_pixel(top[1] + l_m_tl);
+        dst[2] = av_clip_pixel(top[2] + l_m_tl);
+        dst[3] = av_clip_pixel(top[3] + l_m_tl);
+        dst[4] = av_clip_pixel(top[4] + l_m_tl);
+        dst[5] = av_clip_pixel(top[5] + l_m_tl);
+        dst[6] = av_clip_pixel(top[6] + l_m_tl);
+        dst[7] = av_clip_pixel(top[7] + l_m_tl);
+        dst += stride;
+    }
+}
+
+static void tm_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                       const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    int y, tl = top[-1];
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        int l_m_tl = left[15 - y] - tl;
+
+        dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl);
+        dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl);
+        dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl);
+        dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl);
+        dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl);
+        dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl);
+        dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl);
+        dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl);
+        dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl);
+        dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl);
+        dst[10] = av_clip_pixel(top[10] + l_m_tl);
+        dst[11] = av_clip_pixel(top[11] + l_m_tl);
+        dst[12] = av_clip_pixel(top[12] + l_m_tl);
+        dst[13] = av_clip_pixel(top[13] + l_m_tl);
+        dst[14] = av_clip_pixel(top[14] + l_m_tl);
+        dst[15] = av_clip_pixel(top[15] + l_m_tl);
+        dst += stride;
+    }
+}
+
+static void tm_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                       const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    int y, tl = top[-1];
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        int l_m_tl = left[31 - y] - tl;
+
+        dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl);
+        dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl);
+        dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl);
+        dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl);
+        dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl);
+        dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl);
+        dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl);
+        dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl);
+        dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl);
+        dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl);
+        dst[10] = av_clip_pixel(top[10] + l_m_tl);
+        dst[11] = av_clip_pixel(top[11] + l_m_tl);
+        dst[12] = av_clip_pixel(top[12] + l_m_tl);
+        dst[13] = av_clip_pixel(top[13] + l_m_tl);
+        dst[14] = av_clip_pixel(top[14] + l_m_tl);
+        dst[15] = av_clip_pixel(top[15] + l_m_tl);
+        dst[16] = av_clip_pixel(top[16] + l_m_tl);
+        dst[17] = av_clip_pixel(top[17] + l_m_tl);
+        dst[18] = av_clip_pixel(top[18] + l_m_tl);
+        dst[19] = av_clip_pixel(top[19] + l_m_tl);
+        dst[20] = av_clip_pixel(top[20] + l_m_tl);
+        dst[21] = av_clip_pixel(top[21] + l_m_tl);
+        dst[22] = av_clip_pixel(top[22] + l_m_tl);
+        dst[23] = av_clip_pixel(top[23] + l_m_tl);
+        dst[24] = av_clip_pixel(top[24] + l_m_tl);
+        dst[25] = av_clip_pixel(top[25] + l_m_tl);
+        dst[26] = av_clip_pixel(top[26] + l_m_tl);
+        dst[27] = av_clip_pixel(top[27] + l_m_tl);
+        dst[28] = av_clip_pixel(top[28] + l_m_tl);
+        dst[29] = av_clip_pixel(top[29] + l_m_tl);
+        dst[30] = av_clip_pixel(top[30] + l_m_tl);
+        dst[31] = av_clip_pixel(top[31] + l_m_tl);
+        dst += stride;
+    }
+}
+
+#if BIT_DEPTH != 12
+
+static void dc_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                     const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4((left[0] + left[1] + left[2] + left[3] +
+                                top[0] + top[1] + top[2] + top[3] + 4) >> 3);
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, dc);
+    AV_WN4PA(dst + stride * 1, dc);
+    AV_WN4PA(dst + stride * 2, dc);
+    AV_WN4PA(dst + stride * 3, dc);
+}
+
+static void dc_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                     const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
+          left[6] + left[7] + top[0] + top[1] + top[2] + top[3] +
+          top[4] + top[5] + top[6] + top[7] + 8) >> 4);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, dc);
+        AV_WN4PA(dst + 4, dc);
+        dst += stride;
+    }
+}
+
+static void dc_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                       const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
+          left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
+          left[13] + left[14] + left[15] + top[0] + top[1] + top[2] + top[3] +
+          top[4] + top[5] + top[6] + top[7] + top[8] + top[9] + top[10] +
+          top[11] + top[12] + top[13] + top[14] + top[15] + 16) >> 5);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, dc);
+        AV_WN4PA(dst +  4, dc);
+        AV_WN4PA(dst +  8, dc);
+        AV_WN4PA(dst + 12, dc);
+        dst += stride;
+    }
+}
+
+static void dc_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                       const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
+          left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
+          left[13] + left[14] + left[15] + left[16] + left[17] + left[18] +
+          left[19] + left[20] + left[21] + left[22] + left[23] + left[24] +
+          left[25] + left[26] + left[27] + left[28] + left[29] + left[30] +
+          left[31] + top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
+          top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + top[12] +
+          top[13] + top[14] + top[15] + top[16] + top[17] + top[18] + top[19] +
+          top[20] + top[21] + top[22] + top[23] + top[24] + top[25] + top[26] +
+          top[27] + top[28] + top[29] + top[30] + top[31] + 32) >> 6);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, dc);
+        AV_WN4PA(dst +  4, dc);
+        AV_WN4PA(dst +  8, dc);
+        AV_WN4PA(dst + 12, dc);
+        AV_WN4PA(dst + 16, dc);
+        AV_WN4PA(dst + 20, dc);
+        AV_WN4PA(dst + 24, dc);
+        AV_WN4PA(dst + 28, dc);
+        dst += stride;
+    }
+}
+
+static void dc_left_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                          const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    pixel4 dc = PIXEL_SPLAT_X4((left[0] + left[1] + left[2] + left[3] + 2) >> 2);
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, dc);
+    AV_WN4PA(dst + stride * 1, dc);
+    AV_WN4PA(dst + stride * 2, dc);
+    AV_WN4PA(dst + stride * 3, dc);
+}
+
+static void dc_left_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                          const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((left[0] + left[1] + left[2] + left[3] +
+          left[4] + left[5] + left[6] + left[7] + 4) >> 3);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, dc);
+        AV_WN4PA(dst + 4, dc);
+        dst += stride;
+    }
+}
+
+static void dc_left_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                            const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
+          left[6] + left[7] + left[8] + left[9] + left[10] + left[11] +
+          left[12] + left[13] + left[14] + left[15] + 8) >> 4);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, dc);
+        AV_WN4PA(dst +  4, dc);
+        AV_WN4PA(dst +  8, dc);
+        AV_WN4PA(dst + 12, dc);
+        dst += stride;
+    }
+}
+
+static void dc_left_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                            const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
+          left[6] + left[7] + left[8] + left[9] + left[10] + left[11] +
+          left[12] + left[13] + left[14] + left[15] + left[16] + left[17] +
+          left[18] + left[19] + left[20] + left[21] + left[22] + left[23] +
+          left[24] + left[25] + left[26] + left[27] + left[28] + left[29] +
+          left[30] + left[31] + 16) >> 5);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, dc);
+        AV_WN4PA(dst +  4, dc);
+        AV_WN4PA(dst +  8, dc);
+        AV_WN4PA(dst + 12, dc);
+        AV_WN4PA(dst + 16, dc);
+        AV_WN4PA(dst + 20, dc);
+        AV_WN4PA(dst + 24, dc);
+        AV_WN4PA(dst + 28, dc);
+        dst += stride;
+    }
+}
+
+static void dc_top_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4((top[0] + top[1] + top[2] + top[3] + 2) >> 2);
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, dc);
+    AV_WN4PA(dst + stride * 1, dc);
+    AV_WN4PA(dst + stride * 2, dc);
+    AV_WN4PA(dst + stride * 3, dc);
+}
+
+static void dc_top_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((top[0] + top[1] + top[2] + top[3] +
+          top[4] + top[5] + top[6] + top[7] + 4) >> 3);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, dc);
+        AV_WN4PA(dst + 4, dc);
+        dst += stride;
+    }
+}
+
+static void dc_top_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
+          top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
+          top[12] + top[13] + top[14] + top[15] + 8) >> 4);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, dc);
+        AV_WN4PA(dst +  4, dc);
+        AV_WN4PA(dst +  8, dc);
+        AV_WN4PA(dst + 12, dc);
+        dst += stride;
+    }
+}
+
+static void dc_top_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
+          top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
+          top[12] + top[13] + top[14] + top[15] + top[16] + top[17] +
+          top[18] + top[19] + top[20] + top[21] + top[22] + top[23] +
+          top[24] + top[25] + top[26] + top[27] + top[28] + top[29] +
+          top[30] + top[31] + 16) >> 5);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, dc);
+        AV_WN4PA(dst +  4, dc);
+        AV_WN4PA(dst +  8, dc);
+        AV_WN4PA(dst + 12, dc);
+        AV_WN4PA(dst + 16, dc);
+        AV_WN4PA(dst + 20, dc);
+        AV_WN4PA(dst + 24, dc);
+        AV_WN4PA(dst + 28, dc);
+        dst += stride;
+    }
+}
+
+#endif /* BIT_DEPTH != 12 */
+
+static void dc_128_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, val);
+    AV_WN4PA(dst + stride * 1, val);
+    AV_WN4PA(dst + stride * 2, val);
+    AV_WN4PA(dst + stride * 3, val);
+}
+
+static void dc_128_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, val);
+        AV_WN4PA(dst + 4, val);
+        dst += stride;
+    }
+}
+
+static void dc_128_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, val);
+        AV_WN4PA(dst +  4, val);
+        AV_WN4PA(dst +  8, val);
+        AV_WN4PA(dst + 12, val);
+        dst += stride;
+    }
+}
+
+static void dc_128_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, val);
+        AV_WN4PA(dst +  4, val);
+        AV_WN4PA(dst +  8, val);
+        AV_WN4PA(dst + 12, val);
+        AV_WN4PA(dst + 16, val);
+        AV_WN4PA(dst + 20, val);
+        AV_WN4PA(dst + 24, val);
+        AV_WN4PA(dst + 28, val);
+        dst += stride;
+    }
+}
+
+static void dc_127_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, val);
+    AV_WN4PA(dst + stride * 1, val);
+    AV_WN4PA(dst + stride * 2, val);
+    AV_WN4PA(dst + stride * 3, val);}
+
+static void dc_127_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, val);
+        AV_WN4PA(dst + 4, val);
+        dst += stride;
+    }
+}
+
+static void dc_127_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, val);
+        AV_WN4PA(dst +  4, val);
+        AV_WN4PA(dst +  8, val);
+        AV_WN4PA(dst + 12, val);
+        dst += stride;
+    }
+}
+
+static void dc_127_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, val);
+        AV_WN4PA(dst +  4, val);
+        AV_WN4PA(dst +  8, val);
+        AV_WN4PA(dst + 12, val);
+        AV_WN4PA(dst + 16, val);
+        AV_WN4PA(dst + 20, val);
+        AV_WN4PA(dst + 24, val);
+        AV_WN4PA(dst + 28, val);
+        dst += stride;
+    }
+}
+
+static void dc_129_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1);
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, val);
+    AV_WN4PA(dst + stride * 1, val);
+    AV_WN4PA(dst + stride * 2, val);
+    AV_WN4PA(dst + stride * 3, val);
+}
+
+static void dc_129_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, val);
+        AV_WN4PA(dst + 4, val);
+        dst += stride;
+    }
+}
+
+static void dc_129_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, val);
+        AV_WN4PA(dst +  4, val);
+        AV_WN4PA(dst +  8, val);
+        AV_WN4PA(dst + 12, val);
+        dst += stride;
+    }
+}
+
+static void dc_129_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, val);
+        AV_WN4PA(dst +  4, val);
+        AV_WN4PA(dst +  8, val);
+        AV_WN4PA(dst + 12, val);
+        AV_WN4PA(dst + 16, val);
+        AV_WN4PA(dst + 20, val);
+        AV_WN4PA(dst + 24, val);
+        AV_WN4PA(dst + 28, val);
+        dst += stride;
+    }
+}
+
+#if BIT_DEPTH != 12
+
+#if BIT_DEPTH == 8
+#define memset_bpc memset
+#else
+static inline void memset_bpc(uint16_t *dst, int val, int len) {
+    int n;
+    for (n = 0; n < len; n++) {
+        dst[n] = val;
+    }
+}
+#endif
+
+#define DST(x, y) dst[(x) + (y) * stride]
+
+static void diag_downleft_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                                const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
+        a4 = top[4], a5 = top[5], a6 = top[6], a7 = top[7];
+
+    stride /= sizeof(pixel);
+    DST(0,0) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(1,0) = DST(0,1) = (a1 + a2 * 2 + a3 + 2) >> 2;
+    DST(2,0) = DST(1,1) = DST(0,2) = (a2 + a3 * 2 + a4 + 2) >> 2;
+    DST(3,0) = DST(2,1) = DST(1,2) = DST(0,3) = (a3 + a4 * 2 + a5 + 2) >> 2;
+    DST(3,1) = DST(2,2) = DST(1,3) = (a4 + a5 * 2 + a6 + 2) >> 2;
+    DST(3,2) = DST(2,3) = (a5 + a6 * 2 + a7 + 2) >> 2;
+    DST(3,3) = a7;  // note: this is different from vp8 and such
+}
+
+#define def_diag_downleft(size) \
+static void diag_downleft_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
+                                              const uint8_t *left, const uint8_t *_top) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    const pixel *top = (const pixel *) _top; \
+    int i, j; \
+    pixel v[size - 1]; \
+\
+    stride /= sizeof(pixel); \
+    for (i = 0; i < size - 2; i++) \
+        v[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
+    v[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
+\
+    for (j = 0; j < size; j++) { \
+        memcpy(dst + j*stride, v + j, (size - 1 - j) * sizeof(pixel)); \
+        memset_bpc(dst + j*stride + size - 1 - j, top[size - 1], j + 1); \
+    } \
+}
+
+def_diag_downleft(8)
+def_diag_downleft(16)
+def_diag_downleft(32)
+
+static void diag_downright_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                                 const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    const pixel *left = (const pixel *) _left;
+    int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
+        l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0];
+
+    stride /= sizeof(pixel);
+    DST(0,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
+    DST(0,2) = DST(1,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
+    DST(0,1) = DST(1,2) = DST(2,3) = (tl + l0 * 2 + l1 + 2) >> 2;
+    DST(0,0) = DST(1,1) = DST(2,2) = DST(3,3) = (l0 + tl * 2 + a0 + 2) >> 2;
+    DST(1,0) = DST(2,1) = DST(3,2) = (tl + a0 * 2 + a1 + 2) >> 2;
+    DST(2,0) = DST(3,1) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(3,0) = (a1 + a2 * 2 + a3 + 2) >> 2;
+}
+
+#define def_diag_downright(size) \
+static void diag_downright_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
+                                               const uint8_t *_left, const uint8_t *_top) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    const pixel *top = (const pixel *) _top; \
+    const pixel *left = (const pixel *) _left; \
+    int i, j; \
+    pixel v[size + size - 1]; \
+\
+    stride /= sizeof(pixel); \
+    for (i = 0; i < size - 2; i++) { \
+        v[i           ] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
+        v[size + 1 + i] = (top[i]  + top[i + 1]  * 2 + top[i + 2]  + 2) >> 2; \
+    } \
+    v[size - 2] = (left[size - 2] + left[size - 1] * 2 + top[-1] + 2) >> 2; \
+    v[size - 1] = (left[size - 1] + top[-1] * 2 + top[ 0] + 2) >> 2; \
+    v[size    ] = (top[-1] + top[0]  * 2 + top[ 1] + 2) >> 2; \
+\
+    for (j = 0; j < size; j++) \
+        memcpy(dst + j*stride, v + size - 1 - j, size * sizeof(pixel)); \
+}
+
+def_diag_downright(8)
+def_diag_downright(16)
+def_diag_downright(32)
+
+static void vert_right_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                             const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    const pixel *left = (const pixel *) _left;
+    int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
+        l0 = left[3], l1 = left[2], l2 = left[1];
+
+    stride /= sizeof(pixel);
+    DST(0,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
+    DST(0,2) = (tl + l0 * 2 + l1 + 2) >> 2;
+    DST(0,0) = DST(1,2) = (tl + a0 + 1) >> 1;
+    DST(0,1) = DST(1,3) = (l0 + tl * 2 + a0 + 2) >> 2;
+    DST(1,0) = DST(2,2) = (a0 + a1 + 1) >> 1;
+    DST(1,1) = DST(2,3) = (tl + a0 * 2 + a1 + 2) >> 2;
+    DST(2,0) = DST(3,2) = (a1 + a2 + 1) >> 1;
+    DST(2,1) = DST(3,3) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(3,0) = (a2 + a3 + 1) >> 1;
+    DST(3,1) = (a1 + a2 * 2 + a3 + 2) >> 2;
+}
+
+#define def_vert_right(size) \
+static void vert_right_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
+                                           const uint8_t *_left, const uint8_t *_top) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    const pixel *top = (const pixel *) _top; \
+    const pixel *left = (const pixel *) _left; \
+    int i, j; \
+    pixel ve[size + size/2 - 1], vo[size + size/2 - 1]; \
+\
+    stride /= sizeof(pixel); \
+    for (i = 0; i < size/2 - 2; i++) { \
+        vo[i] = (left[i*2 + 3] + left[i*2 + 2] * 2 + left[i*2 + 1] + 2) >> 2; \
+        ve[i] = (left[i*2 + 4] + left[i*2 + 3] * 2 + left[i*2 + 2] + 2) >> 2; \
+    } \
+    vo[size/2 - 2] = (left[size - 1] + left[size - 2] * 2 + left[size - 3] + 2) >> 2; \
+    ve[size/2 - 2] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
+\
+    ve[size/2 - 1] = (top[-1] + top[0] + 1) >> 1; \
+    vo[size/2 - 1] = (left[size - 1] + top[-1] * 2 + top[0] + 2) >> 2; \
+    for (i = 0; i < size - 1; i++) { \
+        ve[size/2 + i] = (top[i] + top[i + 1] + 1) >> 1; \
+        vo[size/2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
+    } \
+\
+    for (j = 0; j < size / 2; j++) { \
+        memcpy(dst +  j*2     *stride, ve + size/2 - 1 - j, size * sizeof(pixel)); \
+        memcpy(dst + (j*2 + 1)*stride, vo + size/2 - 1 - j, size * sizeof(pixel)); \
+    } \
+}
+
+def_vert_right(8)
+def_vert_right(16)
+def_vert_right(32)
+
+static void hor_down_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    const pixel *left = (const pixel *) _left;
+    int l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0],
+        tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2];
+
+    stride /= sizeof(pixel);
+    DST(2,0) = (tl + a0 * 2 + a1 + 2) >> 2;
+    DST(3,0) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(0,0) = DST(2,1) = (tl + l0 + 1) >> 1;
+    DST(1,0) = DST(3,1) = (a0 + tl * 2 + l0 + 2) >> 2;
+    DST(0,1) = DST(2,2) = (l0 + l1 + 1) >> 1;
+    DST(1,1) = DST(3,2) = (tl + l0 * 2 + l1 + 2) >> 2;
+    DST(0,2) = DST(2,3) = (l1 + l2 + 1) >> 1;
+    DST(1,2) = DST(3,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
+    DST(0,3) = (l2 + l3 + 1) >> 1;
+    DST(1,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
+}
+
+#define def_hor_down(size) \
+static void hor_down_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
+                                         const uint8_t *_left, const uint8_t *_top) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    const pixel *top = (const pixel *) _top; \
+    const pixel *left = (const pixel *) _left; \
+    int i, j; \
+    pixel v[size * 3 - 2]; \
+\
+    stride /= sizeof(pixel); \
+    for (i = 0; i < size - 2; i++) { \
+        v[i*2       ] = (left[i + 1] + left[i + 0] + 1) >> 1; \
+        v[i*2    + 1] = (left[i + 2] + left[i + 1] * 2 + left[i + 0] + 2) >> 2; \
+        v[size*2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
+    } \
+    v[size*2 - 2] = (top[-1] + left[size - 1] + 1) >> 1; \
+    v[size*2 - 4] = (left[size - 1] + left[size - 2] + 1) >> 1; \
+    v[size*2 - 1] = (top[0]  + top[-1] * 2 + left[size - 1] + 2) >> 2; \
+    v[size*2 - 3] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
+\
+    for (j = 0; j < size; j++) \
+        memcpy(dst + j*stride, v + size*2 - 2 - j*2, size * sizeof(pixel)); \
+}
+
+def_hor_down(8)
+def_hor_down(16)
+def_hor_down(32)
+
+static void vert_left_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                            const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
+        a4 = top[4], a5 = top[5], a6 = top[6];
+
+    stride /= sizeof(pixel);
+    DST(0,0) = (a0 + a1 + 1) >> 1;
+    DST(0,1) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(1,0) = DST(0,2) = (a1 + a2 + 1) >> 1;
+    DST(1,1) = DST(0,3) = (a1 + a2 * 2 + a3 + 2) >> 2;
+    DST(2,0) = DST(1,2) = (a2 + a3 + 1) >> 1;
+    DST(2,1) = DST(1,3) = (a2 + a3 * 2 + a4 + 2) >> 2;
+    DST(3,0) = DST(2,2) = (a3 + a4 + 1) >> 1;
+    DST(3,1) = DST(2,3) = (a3 + a4 * 2 + a5 + 2) >> 2;
+    DST(3,2) = (a4 + a5 + 1) >> 1;
+    DST(3,3) = (a4 + a5 * 2 + a6 + 2) >> 2;
+}
+
+#define def_vert_left(size) \
+static void vert_left_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
+                                          const uint8_t *left, const uint8_t *_top) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    const pixel *top = (const pixel *) _top; \
+    int i, j; \
+    pixel ve[size - 1], vo[size - 1]; \
+\
+    stride /= sizeof(pixel); \
+    for (i = 0; i < size - 2; i++) { \
+        ve[i] = (top[i] + top[i + 1] + 1) >> 1; \
+        vo[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
+    } \
+    ve[size - 2] = (top[size - 2] + top[size - 1] + 1) >> 1; \
+    vo[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
+\
+    for (j = 0; j < size / 2; j++) { \
+        memcpy(dst +  j*2      * stride, ve + j, (size - j - 1) * sizeof(pixel)); \
+        memset_bpc(dst +  j*2      * stride + size - j - 1, top[size - 1], j + 1); \
+        memcpy(dst + (j*2 + 1) * stride, vo + j, (size - j - 1) * sizeof(pixel)); \
+        memset_bpc(dst + (j*2 + 1) * stride + size - j - 1, top[size - 1], j + 1); \
+    } \
+}
+
+def_vert_left(8)
+def_vert_left(16)
+def_vert_left(32)
+
+static void hor_up_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    int l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3];
+
+    stride /= sizeof(pixel);
+    DST(0,0) = (l0 + l1 + 1) >> 1;
+    DST(1,0) = (l0 + l1 * 2 + l2 + 2) >> 2;
+    DST(0,1) = DST(2,0) = (l1 + l2 + 1) >> 1;
+    DST(1,1) = DST(3,0) = (l1 + l2 * 2 + l3 + 2) >> 2;
+    DST(0,2) = DST(2,1) = (l2 + l3 + 1) >> 1;
+    DST(1,2) = DST(3,1) = (l2 + l3 * 3 + 2) >> 2;
+    DST(0,3) = DST(1,3) = DST(2,2) = DST(2,3) = DST(3,2) = DST(3,3) = l3;
+}
+
+#define def_hor_up(size) \
+static void hor_up_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
+                                       const uint8_t *_left, const uint8_t *top) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    const pixel *left = (const pixel *) _left; \
+    int i, j; \
+    pixel v[size*2 - 2]; \
+\
+    stride /= sizeof(pixel); \
+    for (i = 0; i < size - 2; i++) { \
+        v[i*2    ] = (left[i] + left[i + 1] + 1) >> 1; \
+        v[i*2 + 1] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
+    } \
+    v[size*2 - 4] = (left[size - 2] + left[size - 1] + 1) >> 1; \
+    v[size*2 - 3] = (left[size - 2] + left[size - 1] * 3 + 2) >> 2; \
+\
+    for (j = 0; j < size / 2; j++) \
+        memcpy(dst + j*stride, v + j*2, size * sizeof(pixel)); \
+    for (j = size / 2; j < size; j++) { \
+        memcpy(dst + j*stride, v + j*2, (size*2 - 2 - j*2) * sizeof(pixel)); \
+        memset_bpc(dst + j*stride + size*2 - 2 - j*2, left[size - 1], \
+                   2 + j*2 - size); \
+    } \
+}
+
+def_hor_up(8)
+def_hor_up(16)
+def_hor_up(32)
+
+#undef DST
+
+#endif /* BIT_DEPTH != 12 */
+
+#if BIT_DEPTH != 8
+void ff_vp9dsp_intrapred_init_10(VP9DSPContext *dsp);
+#endif
+#if BIT_DEPTH != 10
+static
+#endif
+av_cold void FUNC(ff_vp9dsp_intrapred_init)(VP9DSPContext *dsp)
+{
+#define init_intra_pred_bd_aware(tx, sz) \
+    dsp->intra_pred[tx][TM_VP8_PRED]          = tm_##sz##_c; \
+    dsp->intra_pred[tx][DC_128_PRED]          = dc_128_##sz##_c; \
+    dsp->intra_pred[tx][DC_127_PRED]          = dc_127_##sz##_c; \
+    dsp->intra_pred[tx][DC_129_PRED]          = dc_129_##sz##_c
+
+#if BIT_DEPTH == 12
+    ff_vp9dsp_intrapred_init_10(dsp);
+#define init_intra_pred(tx, sz) \
+    init_intra_pred_bd_aware(tx, sz)
+#else
+    #define init_intra_pred(tx, sz) \
+    dsp->intra_pred[tx][VERT_PRED]            = vert_##sz##_c; \
+    dsp->intra_pred[tx][HOR_PRED]             = hor_##sz##_c; \
+    dsp->intra_pred[tx][DC_PRED]              = dc_##sz##_c; \
+    dsp->intra_pred[tx][DIAG_DOWN_LEFT_PRED]  = diag_downleft_##sz##_c; \
+    dsp->intra_pred[tx][DIAG_DOWN_RIGHT_PRED] = diag_downright_##sz##_c; \
+    dsp->intra_pred[tx][VERT_RIGHT_PRED]      = vert_right_##sz##_c; \
+    dsp->intra_pred[tx][HOR_DOWN_PRED]        = hor_down_##sz##_c; \
+    dsp->intra_pred[tx][VERT_LEFT_PRED]       = vert_left_##sz##_c; \
+    dsp->intra_pred[tx][HOR_UP_PRED]          = hor_up_##sz##_c; \
+    dsp->intra_pred[tx][LEFT_DC_PRED]         = dc_left_##sz##_c; \
+    dsp->intra_pred[tx][TOP_DC_PRED]          = dc_top_##sz##_c; \
+    init_intra_pred_bd_aware(tx, sz)
+#endif
+
+    init_intra_pred(TX_4X4,   4x4);
+    init_intra_pred(TX_8X8,   8x8);
+    init_intra_pred(TX_16X16, 16x16);
+    init_intra_pred(TX_32X32, 32x32);
+
+#undef init_intra_pred
+#undef init_intra_pred_bd_aware
+}
+
+#define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly) \
+static void type_a##_##type_b##_##sz##x##sz##_add_c(uint8_t *_dst, \
+                                                    ptrdiff_t stride, \
+                                                    int16_t *_block, int eob) \
+{ \
+    int i, j; \
+    pixel *dst = (pixel *) _dst; \
+    dctcoef *block = (dctcoef *) _block, tmp[sz * sz], out[sz]; \
+\
+    stride /= sizeof(pixel); \
+    if (has_dconly && eob == 1) { \
+        const int t  = ((((dctint) block[0] * 11585 + (1 << 13)) >> 14) \
+                                            * 11585 + (1 << 13)) >> 14; \
+        block[0] = 0; \
+        for (i = 0; i < sz; i++) { \
+            for (j = 0; j < sz; j++) \
+                dst[j * stride] = av_clip_pixel(dst[j * stride] + \
+                                                (bits ? \
+                                                 (int)(t + (1U << (bits - 1))) >> bits : \
+                                                 t)); \
+            dst++; \
+        } \
+        return; \
+    } \
+\
+    for (i = 0; i < sz; i++) \
+        type_a##sz##_1d(block + i, sz, tmp + i * sz, 0); \
+    memset(block, 0, sz * sz * sizeof(*block)); \
+    for (i = 0; i < sz; i++) { \
+        type_b##sz##_1d(tmp + i, sz, out, 1); \
+        for (j = 0; j < sz; j++) \
+            dst[j * stride] = av_clip_pixel(dst[j * stride] + \
+                                            (bits ? \
+                                             (int)(out[j] + (1U << (bits - 1))) >> bits : \
+                                             out[j])); \
+        dst++; \
+    } \
+}
+
+#define itxfm_wrap(sz, bits) \
+itxfm_wrapper(idct,  idct,  sz, bits, 1) \
+itxfm_wrapper(iadst, idct,  sz, bits, 0) \
+itxfm_wrapper(idct,  iadst, sz, bits, 0) \
+itxfm_wrapper(iadst, iadst, sz, bits, 0)
+
+#define IN(x) ((dctint) in[(x) * stride])
+
+static av_always_inline void idct4_1d(const dctcoef *in, ptrdiff_t stride,
+                                      dctcoef *out, int pass)
+{
+    dctint t0, t1, t2, t3;
+
+    t0 = ((IN(0) + IN(2)) * 11585 + (1 << 13)) >> 14;
+    t1 = ((IN(0) - IN(2)) * 11585 + (1 << 13)) >> 14;
+    t2 = (IN(1) *  6270 - IN(3) * 15137 + (1 << 13)) >> 14;
+    t3 = (IN(1) * 15137 + IN(3) *  6270 + (1 << 13)) >> 14;
+
+    out[0] = t0 + t3;
+    out[1] = t1 + t2;
+    out[2] = t1 - t2;
+    out[3] = t0 - t3;
+}
+
+static av_always_inline void iadst4_1d(const dctcoef *in, ptrdiff_t stride,
+                                       dctcoef *out, int pass)
+{
+    dctint t0, t1, t2, t3;
+
+    t0 =  5283 * IN(0) + 15212 * IN(2) +  9929 * IN(3);
+    t1 =  9929 * IN(0) -  5283 * IN(2) - 15212 * IN(3);
+    t2 = 13377 * (IN(0) - IN(2) + IN(3));
+    t3 = 13377 * IN(1);
+
+    out[0] = (t0 + t3      + (1 << 13)) >> 14;
+    out[1] = (t1 + t3      + (1 << 13)) >> 14;
+    out[2] = (t2           + (1 << 13)) >> 14;
+    out[3] = (t0 + t1 - t3 + (1 << 13)) >> 14;
+}
+
+itxfm_wrap(4, 4)
+
+static av_always_inline void idct8_1d(const dctcoef *in, ptrdiff_t stride,
+                                      dctcoef *out, int pass)
+{
+    dctint t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a;
+
+    t0a = ((IN(0) + IN(4)) * 11585 + (1 << 13)) >> 14;
+    t1a = ((IN(0) - IN(4)) * 11585 + (1 << 13)) >> 14;
+    t2a = (IN(2) *  6270 - IN(6) * 15137 + (1 << 13)) >> 14;
+    t3a = (IN(2) * 15137 + IN(6) *  6270 + (1 << 13)) >> 14;
+    t4a = (IN(1) *  3196 - IN(7) * 16069 + (1 << 13)) >> 14;
+    t5a = (IN(5) * 13623 - IN(3) *  9102 + (1 << 13)) >> 14;
+    t6a = (IN(5) *  9102 + IN(3) * 13623 + (1 << 13)) >> 14;
+    t7a = (IN(1) * 16069 + IN(7) *  3196 + (1 << 13)) >> 14;
+
+    t0  = t0a + t3a;
+    t1  = t1a + t2a;
+    t2  = t1a - t2a;
+    t3  = t0a - t3a;
+    t4  = t4a + t5a;
+    t5a = t4a - t5a;
+    t7  = t7a + t6a;
+    t6a = t7a - t6a;
+
+    t5  = ((t6a - t5a) * 11585 + (1 << 13)) >> 14;
+    t6  = ((t6a + t5a) * 11585 + (1 << 13)) >> 14;
+
+    out[0] = t0 + t7;
+    out[1] = t1 + t6;
+    out[2] = t2 + t5;
+    out[3] = t3 + t4;
+    out[4] = t3 - t4;
+    out[5] = t2 - t5;
+    out[6] = t1 - t6;
+    out[7] = t0 - t7;
+}
+
+static av_always_inline void iadst8_1d(const dctcoef *in, ptrdiff_t stride,
+                                       dctcoef *out, int pass)
+{
+    dctint t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a;
+
+    t0a = 16305 * IN(7) +  1606 * IN(0);
+    t1a =  1606 * IN(7) - 16305 * IN(0);
+    t2a = 14449 * IN(5) +  7723 * IN(2);
+    t3a =  7723 * IN(5) - 14449 * IN(2);
+    t4a = 10394 * IN(3) + 12665 * IN(4);
+    t5a = 12665 * IN(3) - 10394 * IN(4);
+    t6a =  4756 * IN(1) + 15679 * IN(6);
+    t7a = 15679 * IN(1) -  4756 * IN(6);
+
+    t0 = (t0a + t4a + (1 << 13)) >> 14;
+    t1 = (t1a + t5a + (1 << 13)) >> 14;
+    t2 = (t2a + t6a + (1 << 13)) >> 14;
+    t3 = (t3a + t7a + (1 << 13)) >> 14;
+    t4 = (t0a - t4a + (1 << 13)) >> 14;
+    t5 = (t1a - t5a + (1 << 13)) >> 14;
+    t6 = (t2a - t6a + (1 << 13)) >> 14;
+    t7 = (t3a - t7a + (1 << 13)) >> 14;
+
+    t4a = 15137U * t4 +  6270U * t5;
+    t5a =  6270U * t4 - 15137U * t5;
+    t6a = 15137U * t7 -  6270U * t6;
+    t7a =  6270U * t7 + 15137U * t6;
+
+    out[0] =   t0 + t2;
+    out[7] = -(t1 + t3);
+    t2     =   t0 - t2;
+    t3     =   t1 - t3;
+
+    out[1] = -((dctint)((1U << 13) + t4a + t6a) >> 14);
+    out[6] =   (dctint)((1U << 13) + t5a + t7a) >> 14;
+    t6     =   (dctint)((1U << 13) + t4a - t6a) >> 14;
+    t7     =   (dctint)((1U << 13) + t5a - t7a) >> 14;
+
+    out[3] = -((dctint)((t2 + t3) * 11585U + (1 << 13)) >> 14);
+    out[4] =   (dctint)((t2 - t3) * 11585U + (1 << 13)) >> 14;
+    out[2] =   (dctint)((t6 + t7) * 11585U + (1 << 13)) >> 14;
+    out[5] = -((dctint)((t6 - t7) * 11585U + (1 << 13)) >> 14);
+}
+
+itxfm_wrap(8, 5)
+
+static av_always_inline void idct16_1d(const dctcoef *in, ptrdiff_t stride,
+                                       dctcoef *out, int pass)
+{
+    dctint t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
+    dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
+    dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
+
+    t0a  = (dctint)((IN(0) + IN(8)) * 11585U + (1 << 13)) >> 14;
+    t1a  = (dctint)((IN(0) - IN(8)) * 11585U + (1 << 13)) >> 14;
+    t2a  = (dctint)(IN(4)  *  6270U - IN(12) * 15137U + (1 << 13)) >> 14;
+    t3a  = (dctint)(IN(4)  * 15137U + IN(12) *  6270U + (1 << 13)) >> 14;
+    t4a  = (dctint)(IN(2)  *  3196U - IN(14) * 16069U + (1 << 13)) >> 14;
+    t7a  = (dctint)(IN(2)  * 16069U + IN(14) *  3196U + (1 << 13)) >> 14;
+    t5a  = (dctint)(IN(10) * 13623U - IN(6)  *  9102U + (1 << 13)) >> 14;
+    t6a  = (dctint)(IN(10) *  9102U + IN(6)  * 13623U + (1 << 13)) >> 14;
+    t8a  = (dctint)(IN(1)  *  1606U - IN(15) * 16305U + (1 << 13)) >> 14;
+    t15a = (dctint)(IN(1)  * 16305U + IN(15) *  1606U + (1 << 13)) >> 14;
+    t9a  = (dctint)(IN(9)  * 12665U - IN(7)  * 10394U + (1 << 13)) >> 14;
+    t14a = (dctint)(IN(9)  * 10394U + IN(7)  * 12665U + (1 << 13)) >> 14;
+    t10a = (dctint)(IN(5)  *  7723U - IN(11) * 14449U + (1 << 13)) >> 14;
+    t13a = (dctint)(IN(5)  * 14449U + IN(11) *  7723U + (1 << 13)) >> 14;
+    t11a = (dctint)(IN(13) * 15679U - IN(3)  *  4756U + (1 << 13)) >> 14;
+    t12a = (dctint)(IN(13) *  4756U + IN(3)  * 15679U + (1 << 13)) >> 14;
+
+    t0  = t0a  + t3a;
+    t1  = t1a  + t2a;
+    t2  = t1a  - t2a;
+    t3  = t0a  - t3a;
+    t4  = t4a  + t5a;
+    t5  = t4a  - t5a;
+    t6  = t7a  - t6a;
+    t7  = t7a  + t6a;
+    t8  = t8a  + t9a;
+    t9  = t8a  - t9a;
+    t10 = t11a - t10a;
+    t11 = t11a + t10a;
+    t12 = t12a + t13a;
+    t13 = t12a - t13a;
+    t14 = t15a - t14a;
+    t15 = t15a + t14a;
+
+    t5a  = (dctint)((t6 - t5) * 11585U + (1 << 13)) >> 14;
+    t6a  = (dctint)((t6 + t5) * 11585U + (1 << 13)) >> 14;
+    t9a  = (dctint)(  t14 *  6270U - t9  * 15137U  + (1 << 13)) >> 14;
+    t14a = (dctint)(  t14 * 15137U + t9  *  6270U  + (1 << 13)) >> 14;
+    t10a = (dctint)(-(t13 * 15137U + t10 *  6270U) + (1 << 13)) >> 14;
+    t13a = (dctint)(  t13 *  6270U - t10 * 15137U  + (1 << 13)) >> 14;
+
+    t0a  = t0   + t7;
+    t1a  = t1   + t6a;
+    t2a  = t2   + t5a;
+    t3a  = t3   + t4;
+    t4   = t3   - t4;
+    t5   = t2   - t5a;
+    t6   = t1   - t6a;
+    t7   = t0   - t7;
+    t8a  = t8   + t11;
+    t9   = t9a  + t10a;
+    t10  = t9a  - t10a;
+    t11a = t8   - t11;
+    t12a = t15  - t12;
+    t13  = t14a - t13a;
+    t14  = t14a + t13a;
+    t15a = t15  + t12;
+
+    t10a = (dctint)((t13  - t10)  * 11585U + (1 << 13)) >> 14;
+    t13a = (dctint)((t13  + t10)  * 11585U + (1 << 13)) >> 14;
+    t11  = (dctint)((t12a - t11a) * 11585U + (1 << 13)) >> 14;
+    t12  = (dctint)((t12a + t11a) * 11585U + (1 << 13)) >> 14;
+
+    out[ 0] = t0a + t15a;
+    out[ 1] = t1a + t14;
+    out[ 2] = t2a + t13a;
+    out[ 3] = t3a + t12;
+    out[ 4] = t4  + t11;
+    out[ 5] = t5  + t10a;
+    out[ 6] = t6  + t9;
+    out[ 7] = t7  + t8a;
+    out[ 8] = t7  - t8a;
+    out[ 9] = t6  - t9;
+    out[10] = t5  - t10a;
+    out[11] = t4  - t11;
+    out[12] = t3a - t12;
+    out[13] = t2a - t13a;
+    out[14] = t1a - t14;
+    out[15] = t0a - t15a;
+}
+
+static av_always_inline void iadst16_1d(const dctcoef *in, ptrdiff_t stride,
+                                        dctcoef *out, int pass)
+{
+    dctint t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
+    dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
+    dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
+
+    t0  = IN(15) * 16364U + IN(0)  *   804U;
+    t1  = IN(15) *   804U - IN(0)  * 16364U;
+    t2  = IN(13) * 15893U + IN(2)  *  3981U;
+    t3  = IN(13) *  3981U - IN(2)  * 15893U;
+    t4  = IN(11) * 14811U + IN(4)  *  7005U;
+    t5  = IN(11) *  7005U - IN(4)  * 14811U;
+    t6  = IN(9)  * 13160U + IN(6)  *  9760U;
+    t7  = IN(9)  *  9760U - IN(6)  * 13160U;
+    t8  = IN(7)  * 11003U + IN(8)  * 12140U;
+    t9  = IN(7)  * 12140U - IN(8)  * 11003U;
+    t10 = IN(5)  *  8423U + IN(10) * 14053U;
+    t11 = IN(5)  * 14053U - IN(10) *  8423U;
+    t12 = IN(3)  *  5520U + IN(12) * 15426U;
+    t13 = IN(3)  * 15426U - IN(12) *  5520U;
+    t14 = IN(1)  *  2404U + IN(14) * 16207U;
+    t15 = IN(1)  * 16207U - IN(14) *  2404U;
+
+    t0a  = (dctint)((1U << 13) + t0 + t8 ) >> 14;
+    t1a  = (dctint)((1U << 13) + t1 + t9 ) >> 14;
+    t2a  = (dctint)((1U << 13) + t2 + t10) >> 14;
+    t3a  = (dctint)((1U << 13) + t3 + t11) >> 14;
+    t4a  = (dctint)((1U << 13) + t4 + t12) >> 14;
+    t5a  = (dctint)((1U << 13) + t5 + t13) >> 14;
+    t6a  = (dctint)((1U << 13) + t6 + t14) >> 14;
+    t7a  = (dctint)((1U << 13) + t7 + t15) >> 14;
+    t8a  = (dctint)((1U << 13) + t0 - t8 ) >> 14;
+    t9a  = (dctint)((1U << 13) + t1 - t9 ) >> 14;
+    t10a = (dctint)((1U << 13) + t2 - t10) >> 14;
+    t11a = (dctint)((1U << 13) + t3 - t11) >> 14;
+    t12a = (dctint)((1U << 13) + t4 - t12) >> 14;
+    t13a = (dctint)((1U << 13) + t5 - t13) >> 14;
+    t14a = (dctint)((1U << 13) + t6 - t14) >> 14;
+    t15a = (dctint)((1U << 13) + t7 - t15) >> 14;
+
+    t8   = t8a  * 16069U + t9a  *  3196U;
+    t9   = t8a  *  3196U - t9a  * 16069U;
+    t10  = t10a *  9102U + t11a * 13623U;
+    t11  = t10a * 13623U - t11a *  9102U;
+    t12  = t13a * 16069U - t12a *  3196U;
+    t13  = t13a *  3196U + t12a * 16069U;
+    t14  = t15a *  9102U - t14a * 13623U;
+    t15  = t15a * 13623U + t14a *  9102U;
+
+    t0   = t0a + t4a;
+    t1   = t1a + t5a;
+    t2   = t2a + t6a;
+    t3   = t3a + t7a;
+    t4   = t0a - t4a;
+    t5   = t1a - t5a;
+    t6   = t2a - t6a;
+    t7   = t3a - t7a;
+    t8a  = (dctint)((1U << 13) + t8  + t12) >> 14;
+    t9a  = (dctint)((1U << 13) + t9  + t13) >> 14;
+    t10a = (dctint)((1U << 13) + t10 + t14) >> 14;
+    t11a = (dctint)((1U << 13) + t11 + t15) >> 14;
+    t12a = (dctint)((1U << 13) + t8  - t12) >> 14;
+    t13a = (dctint)((1U << 13) + t9  - t13) >> 14;
+    t14a = (dctint)((1U << 13) + t10 - t14) >> 14;
+    t15a = (dctint)((1U << 13) + t11 - t15) >> 14;
+
+    t4a  = t4 * 15137U + t5 *  6270U;
+    t5a  = t4 *  6270U - t5 * 15137U;
+    t6a  = t7 * 15137U - t6 *  6270U;
+    t7a  = t7 *  6270U + t6 * 15137U;
+    t12  = t12a * 15137U + t13a *  6270U;
+    t13  = t12a *  6270U - t13a * 15137U;
+    t14  = t15a * 15137U - t14a *  6270U;
+    t15  = t15a *  6270U + t14a * 15137U;
+
+    out[ 0] =   t0 + t2;
+    out[15] = -(t1 + t3);
+    t2a     =   t0 - t2;
+    t3a     =   t1 - t3;
+    out[ 3] = -((dctint)((1U << 13) + t4a + t6a) >> 14);
+    out[12] =   (dctint)((1U << 13) + t5a + t7a) >> 14;
+    t6      =   (dctint)((1U << 13) + t4a - t6a) >> 14;
+    t7      =   (dctint)((1U << 13) + t5a - t7a) >> 14;
+    out[ 1] = -(t8a + t10a);
+    out[14] =   t9a + t11a;
+    t10     =   t8a - t10a;
+    t11     =   t9a - t11a;
+    out[ 2] =   (dctint)((1U << 13) + t12 + t14) >> 14;
+    out[13] = -((dctint)((1U << 13) + t13 + t15) >> 14);
+    t14a    =   (dctint)((1U << 13) + t12 - t14) >> 14;
+    t15a    =   (dctint)((1U << 13) + t13 - t15) >> 14;
+
+    out[ 7] = (dctint)(-(t2a  + t3a)  * 11585U  + (1 << 13)) >> 14;
+    out[ 8] = (dctint)( (t2a  - t3a)  * 11585U  + (1 << 13)) >> 14;
+    out[ 4] = (dctint)( (t7   + t6)   * 11585U  + (1 << 13)) >> 14;
+    out[11] = (dctint)( (t7   - t6)   * 11585U  + (1 << 13)) >> 14;
+    out[ 6] = (dctint)( (t11  + t10)  * 11585U  + (1 << 13)) >> 14;
+    out[ 9] = (dctint)( (t11  - t10)  * 11585U  + (1 << 13)) >> 14;
+    out[ 5] = (dctint)(-(t14a + t15a) * 11585U  + (1 << 13)) >> 14;
+    out[10] = (dctint)( (t14a - t15a) * 11585U  + (1 << 13)) >> 14;
+}
+
+itxfm_wrap(16, 6)
+
+static av_always_inline void idct32_1d(const dctcoef *in, ptrdiff_t stride,
+                                       dctcoef *out, int pass)
+{
+    dctint t0a  = (dctint)((IN(0) + IN(16)) * 11585U         + (1 << 13)) >> 14;
+    dctint t1a  = (dctint)((IN(0) - IN(16)) * 11585U         + (1 << 13)) >> 14;
+    dctint t2a  = (dctint)(IN( 8) *  6270U - IN(24) * 15137U + (1 << 13)) >> 14;
+    dctint t3a  = (dctint)(IN( 8) * 15137U + IN(24) *  6270U + (1 << 13)) >> 14;
+    dctint t4a  = (dctint)(IN( 4) *  3196U - IN(28) * 16069U + (1 << 13)) >> 14;
+    dctint t7a  = (dctint)(IN( 4) * 16069U + IN(28) *  3196U + (1 << 13)) >> 14;
+    dctint t5a  = (dctint)(IN(20) * 13623U - IN(12) *  9102U + (1 << 13)) >> 14;
+    dctint t6a  = (dctint)(IN(20) *  9102U + IN(12) * 13623U + (1 << 13)) >> 14;
+    dctint t8a  = (dctint)(IN( 2) *  1606U - IN(30) * 16305U + (1 << 13)) >> 14;
+    dctint t15a = (dctint)(IN( 2) * 16305U + IN(30) *  1606U + (1 << 13)) >> 14;
+    dctint t9a  = (dctint)(IN(18) * 12665U - IN(14) * 10394U + (1 << 13)) >> 14;
+    dctint t14a = (dctint)(IN(18) * 10394U + IN(14) * 12665U + (1 << 13)) >> 14;
+    dctint t10a = (dctint)(IN(10) *  7723U - IN(22) * 14449U + (1 << 13)) >> 14;
+    dctint t13a = (dctint)(IN(10) * 14449U + IN(22) *  7723U + (1 << 13)) >> 14;
+    dctint t11a = (dctint)(IN(26) * 15679U - IN( 6) *  4756U + (1 << 13)) >> 14;
+    dctint t12a = (dctint)(IN(26) *  4756U + IN( 6) * 15679U + (1 << 13)) >> 14;
+    dctint t16a = (dctint)(IN( 1) *   804U - IN(31) * 16364U + (1 << 13)) >> 14;
+    dctint t31a = (dctint)(IN( 1) * 16364U + IN(31) *   804U + (1 << 13)) >> 14;
+    dctint t17a = (dctint)(IN(17) * 12140U - IN(15) * 11003U + (1 << 13)) >> 14;
+    dctint t30a = (dctint)(IN(17) * 11003U + IN(15) * 12140U + (1 << 13)) >> 14;
+    dctint t18a = (dctint)(IN( 9) *  7005U - IN(23) * 14811U + (1 << 13)) >> 14;
+    dctint t29a = (dctint)(IN( 9) * 14811U + IN(23) *  7005U + (1 << 13)) >> 14;
+    dctint t19a = (dctint)(IN(25) * 15426U - IN( 7) *  5520U + (1 << 13)) >> 14;
+    dctint t28a = (dctint)(IN(25) *  5520U + IN( 7) * 15426U + (1 << 13)) >> 14;
+    dctint t20a = (dctint)(IN( 5) *  3981U - IN(27) * 15893U + (1 << 13)) >> 14;
+    dctint t27a = (dctint)(IN( 5) * 15893U + IN(27) *  3981U + (1 << 13)) >> 14;
+    dctint t21a = (dctint)(IN(21) * 14053U - IN(11) *  8423U + (1 << 13)) >> 14;
+    dctint t26a = (dctint)(IN(21) *  8423U + IN(11) * 14053U + (1 << 13)) >> 14;
+    dctint t22a = (dctint)(IN(13) *  9760U - IN(19) * 13160U + (1 << 13)) >> 14;
+    dctint t25a = (dctint)(IN(13) * 13160U + IN(19) *  9760U + (1 << 13)) >> 14;
+    dctint t23a = (dctint)(IN(29) * 16207U - IN( 3) *  2404U + (1 << 13)) >> 14;
+    dctint t24a = (dctint)(IN(29) *  2404U + IN( 3) * 16207U + (1 << 13)) >> 14;
+
+    dctint t0  = t0a  + t3a;
+    dctint t1  = t1a  + t2a;
+    dctint t2  = t1a  - t2a;
+    dctint t3  = t0a  - t3a;
+    dctint t4  = t4a  + t5a;
+    dctint t5  = t4a  - t5a;
+    dctint t6  = t7a  - t6a;
+    dctint t7  = t7a  + t6a;
+    dctint t8  = t8a  + t9a;
+    dctint t9  = t8a  - t9a;
+    dctint t10 = t11a - t10a;
+    dctint t11 = t11a + t10a;
+    dctint t12 = t12a + t13a;
+    dctint t13 = t12a - t13a;
+    dctint t14 = t15a - t14a;
+    dctint t15 = t15a + t14a;
+    dctint t16 = t16a + t17a;
+    dctint t17 = t16a - t17a;
+    dctint t18 = t19a - t18a;
+    dctint t19 = t19a + t18a;
+    dctint t20 = t20a + t21a;
+    dctint t21 = t20a - t21a;
+    dctint t22 = t23a - t22a;
+    dctint t23 = t23a + t22a;
+    dctint t24 = t24a + t25a;
+    dctint t25 = t24a - t25a;
+    dctint t26 = t27a - t26a;
+    dctint t27 = t27a + t26a;
+    dctint t28 = t28a + t29a;
+    dctint t29 = t28a - t29a;
+    dctint t30 = t31a - t30a;
+    dctint t31 = t31a + t30a;
+
+    t5a  = (dctint)((t6 - t5) * 11585U             + (1 << 13)) >> 14;
+    t6a  = (dctint)((t6 + t5) * 11585U             + (1 << 13)) >> 14;
+    t9a  = (dctint)(  t14 *  6270U - t9  * 15137U  + (1 << 13)) >> 14;
+    t14a = (dctint)(  t14 * 15137U + t9  *  6270U  + (1 << 13)) >> 14;
+    t10a = (dctint)(-(t13 * 15137U + t10 *  6270U) + (1 << 13)) >> 14;
+    t13a = (dctint)(  t13 *  6270U - t10 * 15137U  + (1 << 13)) >> 14;
+    t17a = (dctint)(  t30 *  3196U - t17 * 16069U  + (1 << 13)) >> 14;
+    t30a = (dctint)(  t30 * 16069U + t17 *  3196U  + (1 << 13)) >> 14;
+    t18a = (dctint)(-(t29 * 16069U + t18 *  3196U) + (1 << 13)) >> 14;
+    t29a = (dctint)(  t29 *  3196U - t18 * 16069U  + (1 << 13)) >> 14;
+    t21a = (dctint)(  t26 * 13623U - t21 *  9102U  + (1 << 13)) >> 14;
+    t26a = (dctint)(  t26 *  9102U + t21 * 13623U  + (1 << 13)) >> 14;
+    t22a = (dctint)(-(t25 *  9102U + t22 * 13623U) + (1 << 13)) >> 14;
+    t25a = (dctint)(  t25 * 13623U - t22 *  9102U  + (1 << 13)) >> 14;
+
+    t0a  = t0   + t7;
+    t1a  = t1   + t6a;
+    t2a  = t2   + t5a;
+    t3a  = t3   + t4;
+    t4a  = t3   - t4;
+    t5   = t2   - t5a;
+    t6   = t1   - t6a;
+    t7a  = t0   - t7;
+    t8a  = t8   + t11;
+    t9   = t9a  + t10a;
+    t10  = t9a  - t10a;
+    t11a = t8   - t11;
+    t12a = t15  - t12;
+    t13  = t14a - t13a;
+    t14  = t14a + t13a;
+    t15a = t15  + t12;
+    t16a = t16  + t19;
+    t17  = t17a + t18a;
+    t18  = t17a - t18a;
+    t19a = t16  - t19;
+    t20a = t23  - t20;
+    t21  = t22a - t21a;
+    t22  = t22a + t21a;
+    t23a = t23  + t20;
+    t24a = t24  + t27;
+    t25  = t25a + t26a;
+    t26  = t25a - t26a;
+    t27a = t24  - t27;
+    t28a = t31  - t28;
+    t29  = t30a - t29a;
+    t30  = t30a + t29a;
+    t31a = t31  + t28;
+
+    t10a = (dctint)((t13  - t10)  * 11585U           + (1 << 13)) >> 14;
+    t13a = (dctint)((t13  + t10)  * 11585U           + (1 << 13)) >> 14;
+    t11  = (dctint)((t12a - t11a) * 11585U           + (1 << 13)) >> 14;
+    t12  = (dctint)((t12a + t11a) * 11585U           + (1 << 13)) >> 14;
+    t18a = (dctint)(  t29  *  6270U - t18  * 15137U  + (1 << 13)) >> 14;
+    t29a = (dctint)(  t29  * 15137U + t18  *  6270U  + (1 << 13)) >> 14;
+    t19  = (dctint)(  t28a *  6270U - t19a * 15137U  + (1 << 13)) >> 14;
+    t28  = (dctint)(  t28a * 15137U + t19a *  6270U  + (1 << 13)) >> 14;
+    t20  = (dctint)(-(t27a * 15137U + t20a *  6270U) + (1 << 13)) >> 14;
+    t27  = (dctint)(  t27a *  6270U - t20a * 15137U  + (1 << 13)) >> 14;
+    t21a = (dctint)(-(t26  * 15137U + t21  *  6270U) + (1 << 13)) >> 14;
+    t26a = (dctint)(  t26  *  6270U - t21  * 15137U  + (1 << 13)) >> 14;
+
+    t0   = t0a + t15a;
+    t1   = t1a + t14;
+    t2   = t2a + t13a;
+    t3   = t3a + t12;
+    t4   = t4a + t11;
+    t5a  = t5  + t10a;
+    t6a  = t6  + t9;
+    t7   = t7a + t8a;
+    t8   = t7a - t8a;
+    t9a  = t6  - t9;
+    t10  = t5  - t10a;
+    t11a = t4a - t11;
+    t12a = t3a - t12;
+    t13  = t2a - t13a;
+    t14a = t1a - t14;
+    t15  = t0a - t15a;
+    t16  = t16a + t23a;
+    t17a = t17  + t22;
+    t18  = t18a + t21a;
+    t19a = t19  + t20;
+    t20a = t19  - t20;
+    t21  = t18a - t21a;
+    t22a = t17  - t22;
+    t23  = t16a - t23a;
+    t24  = t31a - t24a;
+    t25a = t30  - t25;
+    t26  = t29a - t26a;
+    t27a = t28  - t27;
+    t28a = t28  + t27;
+    t29  = t29a + t26a;
+    t30a = t30  + t25;
+    t31  = t31a + t24a;
+
+    t20  = (dctint)((t27a - t20a) * 11585U + (1 << 13)) >> 14;
+    t27  = (dctint)((t27a + t20a) * 11585U + (1 << 13)) >> 14;
+    t21a = (dctint)((t26  - t21 ) * 11585U + (1 << 13)) >> 14;
+    t26a = (dctint)((t26  + t21 ) * 11585U + (1 << 13)) >> 14;
+    t22  = (dctint)((t25a - t22a) * 11585U + (1 << 13)) >> 14;
+    t25  = (dctint)((t25a + t22a) * 11585U + (1 << 13)) >> 14;
+    t23a = (dctint)((t24  - t23 ) * 11585U + (1 << 13)) >> 14;
+    t24a = (dctint)((t24  + t23 ) * 11585U + (1 << 13)) >> 14;
+
+    out[ 0] = t0   + t31;
+    out[ 1] = t1   + t30a;
+    out[ 2] = t2   + t29;
+    out[ 3] = t3   + t28a;
+    out[ 4] = t4   + t27;
+    out[ 5] = t5a  + t26a;
+    out[ 6] = t6a  + t25;
+    out[ 7] = t7   + t24a;
+    out[ 8] = t8   + t23a;
+    out[ 9] = t9a  + t22;
+    out[10] = t10  + t21a;
+    out[11] = t11a + t20;
+    out[12] = t12a + t19a;
+    out[13] = t13  + t18;
+    out[14] = t14a + t17a;
+    out[15] = t15  + t16;
+    out[16] = t15  - t16;
+    out[17] = t14a - t17a;
+    out[18] = t13  - t18;
+    out[19] = t12a - t19a;
+    out[20] = t11a - t20;
+    out[21] = t10  - t21a;
+    out[22] = t9a  - t22;
+    out[23] = t8   - t23a;
+    out[24] = t7   - t24a;
+    out[25] = t6a  - t25;
+    out[26] = t5a  - t26a;
+    out[27] = t4   - t27;
+    out[28] = t3   - t28a;
+    out[29] = t2   - t29;
+    out[30] = t1   - t30a;
+    out[31] = t0   - t31;
+}
+
+itxfm_wrapper(idct, idct, 32, 6, 1)
+
+static av_always_inline void iwht4_1d(const dctcoef *in, ptrdiff_t stride,
+                                      dctcoef *out, int pass)
+{
+    int t0, t1, t2, t3, t4;
+
+    if (pass == 0) {
+        t0 = IN(0) >> 2;
+        t1 = IN(3) >> 2;
+        t2 = IN(1) >> 2;
+        t3 = IN(2) >> 2;
+    } else {
+        t0 = IN(0);
+        t1 = IN(3);
+        t2 = IN(1);
+        t3 = IN(2);
+    }
+
+    t0 += t2;
+    t3 -= t1;
+    t4 = (t0 - t3) >> 1;
+    t1 = t4 - t1;
+    t2 = t4 - t2;
+    t0 -= t1;
+    t3 += t2;
+
+    out[0] = t0;
+    out[1] = t1;
+    out[2] = t2;
+    out[3] = t3;
+}
+
+itxfm_wrapper(iwht, iwht, 4, 0, 0)
+
+#undef IN
+#undef itxfm_wrapper
+#undef itxfm_wrap
+
+static av_cold void vp9dsp_itxfm_init(VP9DSPContext *dsp)
+{
+#define init_itxfm(tx, sz) \
+    dsp->itxfm_add[tx][DCT_DCT]   = idct_idct_##sz##_add_c; \
+    dsp->itxfm_add[tx][DCT_ADST]  = iadst_idct_##sz##_add_c; \
+    dsp->itxfm_add[tx][ADST_DCT]  = idct_iadst_##sz##_add_c; \
+    dsp->itxfm_add[tx][ADST_ADST] = iadst_iadst_##sz##_add_c
+
+#define init_idct(tx, nm) \
+    dsp->itxfm_add[tx][DCT_DCT]   = \
+    dsp->itxfm_add[tx][ADST_DCT]  = \
+    dsp->itxfm_add[tx][DCT_ADST]  = \
+    dsp->itxfm_add[tx][ADST_ADST] = nm##_add_c
+
+    init_itxfm(TX_4X4,   4x4);
+    init_itxfm(TX_8X8,   8x8);
+    init_itxfm(TX_16X16, 16x16);
+    init_idct(TX_32X32,  idct_idct_32x32);
+    init_idct(4 /* lossless */, iwht_iwht_4x4);
+
+#undef init_itxfm
+#undef init_idct
+}
+
+static av_always_inline void loop_filter(pixel *dst, int E, int I, int H,
+                                         ptrdiff_t stridea, ptrdiff_t strideb,
+                                         int wd)
+{
+    int i, F = 1 << (BIT_DEPTH - 8);
+
+    E <<= (BIT_DEPTH - 8);
+    I <<= (BIT_DEPTH - 8);
+    H <<= (BIT_DEPTH - 8);
+    for (i = 0; i < 8; i++, dst += stridea) {
+        int p7, p6, p5, p4;
+        int p3 = dst[strideb * -4], p2 = dst[strideb * -3];
+        int p1 = dst[strideb * -2], p0 = dst[strideb * -1];
+        int q0 = dst[strideb * +0], q1 = dst[strideb * +1];
+        int q2 = dst[strideb * +2], q3 = dst[strideb * +3];
+        int q4, q5, q6, q7;
+        int fm = FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I &&
+                 FFABS(p1 - p0) <= I && FFABS(q1 - q0) <= I &&
+                 FFABS(q2 - q1) <= I && FFABS(q3 - q2) <= I &&
+                 FFABS(p0 - q0) * 2 + (FFABS(p1 - q1) >> 1) <= E;
+        int flat8out, flat8in;
+
+        if (!fm)
+            continue;
+
+        if (wd >= 16) {
+            p7 = dst[strideb * -8];
+            p6 = dst[strideb * -7];
+            p5 = dst[strideb * -6];
+            p4 = dst[strideb * -5];
+            q4 = dst[strideb * +4];
+            q5 = dst[strideb * +5];
+            q6 = dst[strideb * +6];
+            q7 = dst[strideb * +7];
+
+            flat8out = FFABS(p7 - p0) <= F && FFABS(p6 - p0) <= F &&
+                       FFABS(p5 - p0) <= F && FFABS(p4 - p0) <= F &&
+                       FFABS(q4 - q0) <= F && FFABS(q5 - q0) <= F &&
+                       FFABS(q6 - q0) <= F && FFABS(q7 - q0) <= F;
+        }
+
+        if (wd >= 8)
+            flat8in = FFABS(p3 - p0) <= F && FFABS(p2 - p0) <= F &&
+                      FFABS(p1 - p0) <= F && FFABS(q1 - q0) <= F &&
+                      FFABS(q2 - q0) <= F && FFABS(q3 - q0) <= F;
+
+        if (wd >= 16 && flat8out && flat8in) {
+            dst[strideb * -7] = (p7 + p7 + p7 + p7 + p7 + p7 + p7 + p6 * 2 +
+                                 p5 + p4 + p3 + p2 + p1 + p0 + q0 + 8) >> 4;
+            dst[strideb * -6] = (p7 + p7 + p7 + p7 + p7 + p7 + p6 + p5 * 2 +
+                                 p4 + p3 + p2 + p1 + p0 + q0 + q1 + 8) >> 4;
+            dst[strideb * -5] = (p7 + p7 + p7 + p7 + p7 + p6 + p5 + p4 * 2 +
+                                 p3 + p2 + p1 + p0 + q0 + q1 + q2 + 8) >> 4;
+            dst[strideb * -4] = (p7 + p7 + p7 + p7 + p6 + p5 + p4 + p3 * 2 +
+                                 p2 + p1 + p0 + q0 + q1 + q2 + q3 + 8) >> 4;
+            dst[strideb * -3] = (p7 + p7 + p7 + p6 + p5 + p4 + p3 + p2 * 2 +
+                                 p1 + p0 + q0 + q1 + q2 + q3 + q4 + 8) >> 4;
+            dst[strideb * -2] = (p7 + p7 + p6 + p5 + p4 + p3 + p2 + p1 * 2 +
+                                 p0 + q0 + q1 + q2 + q3 + q4 + q5 + 8) >> 4;
+            dst[strideb * -1] = (p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 +
+                                 q0 + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
+            dst[strideb * +0] = (p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 +
+                                 q1 + q2 + q3 + q4 + q5 + q6 + q7 + 8) >> 4;
+            dst[strideb * +1] = (p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 +
+                                 q2 + q3 + q4 + q5 + q6 + q7 + q7 + 8) >> 4;
+            dst[strideb * +2] = (p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 +
+                                 q3 + q4 + q5 + q6 + q7 + q7 + q7 + 8) >> 4;
+            dst[strideb * +3] = (p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 * 2 +
+                                 q4 + q5 + q6 + q7 + q7 + q7 + q7 + 8) >> 4;
+            dst[strideb * +4] = (p2 + p1 + p0 + q0 + q1 + q2 + q3 + q4 * 2 +
+                                 q5 + q6 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
+            dst[strideb * +5] = (p1 + p0 + q0 + q1 + q2 + q3 + q4 + q5 * 2 +
+                                 q6 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
+            dst[strideb * +6] = (p0 + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 +
+                                 q7 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
+        } else if (wd >= 8 && flat8in) {
+            dst[strideb * -3] = (p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0 + 4) >> 3;
+            dst[strideb * -2] = (p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1 + 4) >> 3;
+            dst[strideb * -1] = (p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3;
+            dst[strideb * +0] = (p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3;
+            dst[strideb * +1] = (p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3 + 4) >> 3;
+            dst[strideb * +2] = (p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3 + 4) >> 3;
+        } else {
+            int hev = FFABS(p1 - p0) > H || FFABS(q1 - q0) > H;
+
+            if (hev) {
+                int f = av_clip_intp2(p1 - q1, BIT_DEPTH - 1), f1, f2;
+                f = av_clip_intp2(3 * (q0 - p0) + f, BIT_DEPTH - 1);
+
+                f1 = FFMIN(f + 4, (1 << (BIT_DEPTH - 1)) - 1) >> 3;
+                f2 = FFMIN(f + 3, (1 << (BIT_DEPTH - 1)) - 1) >> 3;
+
+                dst[strideb * -1] = av_clip_pixel(p0 + f2);
+                dst[strideb * +0] = av_clip_pixel(q0 - f1);
+            } else {
+                int f = av_clip_intp2(3 * (q0 - p0), BIT_DEPTH - 1), f1, f2;
+
+                f1 = FFMIN(f + 4, (1 << (BIT_DEPTH - 1)) - 1) >> 3;
+                f2 = FFMIN(f + 3, (1 << (BIT_DEPTH - 1)) - 1) >> 3;
+
+                dst[strideb * -1] = av_clip_pixel(p0 + f2);
+                dst[strideb * +0] = av_clip_pixel(q0 - f1);
+
+                f = (f1 + 1) >> 1;
+                dst[strideb * -2] = av_clip_pixel(p1 + f);
+                dst[strideb * +1] = av_clip_pixel(q1 - f);
+            }
+        }
+    }
+}
+
+#define lf_8_fn(dir, wd, stridea, strideb) \
+static void loop_filter_##dir##_##wd##_8_c(uint8_t *_dst, \
+                                           ptrdiff_t stride, \
+                                           int E, int I, int H) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    stride /= sizeof(pixel); \
+    loop_filter(dst, E, I, H, stridea, strideb, wd); \
+}
+
+#define lf_8_fns(wd) \
+lf_8_fn(h, wd, stride, 1) \
+lf_8_fn(v, wd, 1, stride)
+
+lf_8_fns(4)
+lf_8_fns(8)
+lf_8_fns(16)
+
+#undef lf_8_fn
+#undef lf_8_fns
+
+#define lf_16_fn(dir, stridea) \
+static void loop_filter_##dir##_16_16_c(uint8_t *dst, \
+                                        ptrdiff_t stride, \
+                                        int E, int I, int H) \
+{ \
+    loop_filter_##dir##_16_8_c(dst, stride, E, I, H); \
+    loop_filter_##dir##_16_8_c(dst + 8 * stridea, stride, E, I, H); \
+}
+
+lf_16_fn(h, stride)
+lf_16_fn(v, sizeof(pixel))
+
+#undef lf_16_fn
+
+#define lf_mix_fn(dir, wd1, wd2, stridea) \
+static void loop_filter_##dir##_##wd1##wd2##_16_c(uint8_t *dst, \
+                                                  ptrdiff_t stride, \
+                                                  int E, int I, int H) \
+{ \
+    loop_filter_##dir##_##wd1##_8_c(dst, stride, E & 0xff, I & 0xff, H & 0xff); \
+    loop_filter_##dir##_##wd2##_8_c(dst + 8 * stridea, stride, E >> 8, I >> 8, H >> 8); \
+}
+
+#define lf_mix_fns(wd1, wd2) \
+lf_mix_fn(h, wd1, wd2, stride) \
+lf_mix_fn(v, wd1, wd2, sizeof(pixel))
+
+lf_mix_fns(4, 4)
+lf_mix_fns(4, 8)
+lf_mix_fns(8, 4)
+lf_mix_fns(8, 8)
+
+#undef lf_mix_fn
+#undef lf_mix_fns
+
+static av_cold void vp9dsp_loopfilter_init(VP9DSPContext *dsp)
+{
+    dsp->loop_filter_8[0][0] = loop_filter_h_4_8_c;
+    dsp->loop_filter_8[0][1] = loop_filter_v_4_8_c;
+    dsp->loop_filter_8[1][0] = loop_filter_h_8_8_c;
+    dsp->loop_filter_8[1][1] = loop_filter_v_8_8_c;
+    dsp->loop_filter_8[2][0] = loop_filter_h_16_8_c;
+    dsp->loop_filter_8[2][1] = loop_filter_v_16_8_c;
+
+    dsp->loop_filter_16[0] = loop_filter_h_16_16_c;
+    dsp->loop_filter_16[1] = loop_filter_v_16_16_c;
+
+    dsp->loop_filter_mix2[0][0][0] = loop_filter_h_44_16_c;
+    dsp->loop_filter_mix2[0][0][1] = loop_filter_v_44_16_c;
+    dsp->loop_filter_mix2[0][1][0] = loop_filter_h_48_16_c;
+    dsp->loop_filter_mix2[0][1][1] = loop_filter_v_48_16_c;
+    dsp->loop_filter_mix2[1][0][0] = loop_filter_h_84_16_c;
+    dsp->loop_filter_mix2[1][0][1] = loop_filter_v_84_16_c;
+    dsp->loop_filter_mix2[1][1][0] = loop_filter_h_88_16_c;
+    dsp->loop_filter_mix2[1][1][1] = loop_filter_v_88_16_c;
+}
+
+#if BIT_DEPTH != 12
+
+static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride,
+                                    const uint8_t *src, ptrdiff_t src_stride,
+                                    int w, int h)
+{
+    do {
+        memcpy(dst, src, w * sizeof(pixel));
+
+        dst += dst_stride;
+        src += src_stride;
+    } while (--h);
+}
+
+static av_always_inline void avg_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                   const uint8_t *_src, ptrdiff_t src_stride,
+                                   int w, int h)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    do {
+        int x;
+
+        for (x = 0; x < w; x += 4)
+            AV_WN4PA(&dst[x], rnd_avg_pixel4(AV_RN4PA(&dst[x]), AV_RN4P(&src[x])));
+
+        dst += dst_stride;
+        src += src_stride;
+    } while (--h);
+}
+
+#define fpel_fn(type, sz) \
+static void type##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                         const uint8_t *src, ptrdiff_t src_stride, \
+                         int h, int mx, int my) \
+{ \
+    type##_c(dst, dst_stride, src, src_stride, sz, h); \
+}
+
+#define copy_avg_fn(sz) \
+fpel_fn(copy, sz) \
+fpel_fn(avg,  sz)
+
+copy_avg_fn(64)
+copy_avg_fn(32)
+copy_avg_fn(16)
+copy_avg_fn(8)
+copy_avg_fn(4)
+
+#undef fpel_fn
+#undef copy_avg_fn
+
+#endif /* BIT_DEPTH != 12 */
+
+#define FILTER_8TAP(src, x, F, stride) \
+    av_clip_pixel((F[0] * src[x + -3 * stride] + \
+                   F[1] * src[x + -2 * stride] + \
+                   F[2] * src[x + -1 * stride] + \
+                   F[3] * src[x + +0 * stride] + \
+                   F[4] * src[x + +1 * stride] + \
+                   F[5] * src[x + +2 * stride] + \
+                   F[6] * src[x + +3 * stride] + \
+                   F[7] * src[x + +4 * stride] + 64) >> 7)
+
+static av_always_inline void do_8tap_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                          const uint8_t *_src, ptrdiff_t src_stride,
+                                          int w, int h, ptrdiff_t ds,
+                                          const int16_t *filter, int avg)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            if (avg) {
+                dst[x] = (dst[x] + FILTER_8TAP(src, x, filter, ds) + 1) >> 1;
+            } else {
+                dst[x] = FILTER_8TAP(src, x, filter, ds);
+            }
+
+        dst += dst_stride;
+        src += src_stride;
+    } while (--h);
+}
+
+#define filter_8tap_1d_fn(opn, opa, dir, ds) \
+static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                const uint8_t *src, ptrdiff_t src_stride, \
+                                                int w, int h, const int16_t *filter) \
+{ \
+    do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
+}
+
+filter_8tap_1d_fn(put, 0, v, src_stride / sizeof(pixel))
+filter_8tap_1d_fn(put, 0, h, 1)
+filter_8tap_1d_fn(avg, 1, v, src_stride / sizeof(pixel))
+filter_8tap_1d_fn(avg, 1, h, 1)
+
+#undef filter_8tap_1d_fn
+
+static av_always_inline void do_8tap_2d_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                          const uint8_t *_src, ptrdiff_t src_stride,
+                                          int w, int h, const int16_t *filterx,
+                                          const int16_t *filtery, int avg)
+{
+    int tmp_h = h + 7;
+    pixel tmp[64 * 71], *tmp_ptr = tmp;
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    src -= src_stride * 3;
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            tmp_ptr[x] = FILTER_8TAP(src, x, filterx, 1);
+
+        tmp_ptr += 64;
+        src += src_stride;
+    } while (--tmp_h);
+
+    tmp_ptr = tmp + 64 * 3;
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            if (avg) {
+                dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filtery, 64) + 1) >> 1;
+            } else {
+                dst[x] = FILTER_8TAP(tmp_ptr, x, filtery, 64);
+            }
+
+        tmp_ptr += 64;
+        dst += dst_stride;
+    } while (--h);
+}
+
+#define filter_8tap_2d_fn(opn, opa) \
+static av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                           const uint8_t *src, ptrdiff_t src_stride, \
+                                           int w, int h, const int16_t *filterx, \
+                                           const int16_t *filtery) \
+{ \
+    do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \
+}
+
+filter_8tap_2d_fn(put, 0)
+filter_8tap_2d_fn(avg, 1)
+
+#undef filter_8tap_2d_fn
+
+#define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \
+static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                              const uint8_t *src, ptrdiff_t src_stride, \
+                                              int h, int mx, int my) \
+{ \
+    avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \
+                            ff_vp9_subpel_filters[type_idx][dir_m]); \
+}
+
+#define filter_fn_2d(sz, type, type_idx, avg) \
+static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                           const uint8_t *src, ptrdiff_t src_stride, \
+                                           int h, int mx, int my) \
+{ \
+    avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \
+                       ff_vp9_subpel_filters[type_idx][mx], \
+                       ff_vp9_subpel_filters[type_idx][my]); \
+}
+
+#if BIT_DEPTH != 12
+
+#define FILTER_BILIN(src, x, mxy, stride) \
+    (src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4))
+
+static av_always_inline void do_bilin_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                           const uint8_t *_src, ptrdiff_t src_stride,
+                                           int w, int h, ptrdiff_t ds, int mxy, int avg)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            if (avg) {
+                dst[x] = (dst[x] + FILTER_BILIN(src, x, mxy, ds) + 1) >> 1;
+            } else {
+                dst[x] = FILTER_BILIN(src, x, mxy, ds);
+            }
+
+        dst += dst_stride;
+        src += src_stride;
+    } while (--h);
+}
+
+#define bilin_1d_fn(opn, opa, dir, ds) \
+static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                 const uint8_t *src, ptrdiff_t src_stride, \
+                                                 int w, int h, int mxy) \
+{ \
+    do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \
+}
+
+bilin_1d_fn(put, 0, v, src_stride / sizeof(pixel))
+bilin_1d_fn(put, 0, h, 1)
+bilin_1d_fn(avg, 1, v, src_stride / sizeof(pixel))
+bilin_1d_fn(avg, 1, h, 1)
+
+#undef bilin_1d_fn
+
+static av_always_inline void do_bilin_2d_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                           const uint8_t *_src, ptrdiff_t src_stride,
+                                           int w, int h, int mx, int my, int avg)
+{
+    pixel tmp[64 * 65], *tmp_ptr = tmp;
+    int tmp_h = h + 1;
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            tmp_ptr[x] = FILTER_BILIN(src, x, mx, 1);
+
+        tmp_ptr += 64;
+        src += src_stride;
+    } while (--tmp_h);
+
+    tmp_ptr = tmp;
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            if (avg) {
+                dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
+            } else {
+                dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64);
+            }
+
+        tmp_ptr += 64;
+        dst += dst_stride;
+    } while (--h);
+}
+
+#define bilin_2d_fn(opn, opa) \
+static av_noinline void opn##_bilin_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                            const uint8_t *src, ptrdiff_t src_stride, \
+                                            int w, int h, int mx, int my) \
+{ \
+    do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \
+}
+
+bilin_2d_fn(put, 0)
+bilin_2d_fn(avg, 1)
+
+#undef bilin_2d_fn
+
+#define bilinf_fn_1d(sz, dir, dir_m, avg) \
+static void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                      const uint8_t *src, ptrdiff_t src_stride, \
+                                      int h, int mx, int my) \
+{ \
+    avg##_bilin_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, dir_m); \
+}
+
+#define bilinf_fn_2d(sz, avg) \
+static void avg##_bilin_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                   const uint8_t *src, ptrdiff_t src_stride, \
+                                   int h, int mx, int my) \
+{ \
+    avg##_bilin_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, mx, my); \
+}
+
+#else
+
+#define bilinf_fn_1d(a, b, c, d)
+#define bilinf_fn_2d(a, b)
+
+#endif
+
+#define filter_fn(sz, avg) \
+filter_fn_1d(sz, h, mx, regular, FILTER_8TAP_REGULAR, avg) \
+filter_fn_1d(sz, v, my, regular, FILTER_8TAP_REGULAR, avg) \
+filter_fn_2d(sz,        regular, FILTER_8TAP_REGULAR, avg) \
+filter_fn_1d(sz, h, mx, smooth,  FILTER_8TAP_SMOOTH,  avg) \
+filter_fn_1d(sz, v, my, smooth,  FILTER_8TAP_SMOOTH,  avg) \
+filter_fn_2d(sz,        smooth,  FILTER_8TAP_SMOOTH,  avg) \
+filter_fn_1d(sz, h, mx, sharp,   FILTER_8TAP_SHARP,   avg) \
+filter_fn_1d(sz, v, my, sharp,   FILTER_8TAP_SHARP,   avg) \
+filter_fn_2d(sz,        sharp,   FILTER_8TAP_SHARP,   avg) \
+bilinf_fn_1d(sz, h, mx,                               avg) \
+bilinf_fn_1d(sz, v, my,                               avg) \
+bilinf_fn_2d(sz,                                      avg)
+
+#define filter_fn_set(avg) \
+filter_fn(64, avg) \
+filter_fn(32, avg) \
+filter_fn(16, avg) \
+filter_fn(8,  avg) \
+filter_fn(4,  avg)
+
+filter_fn_set(put)
+filter_fn_set(avg)
+
+#undef filter_fn
+#undef filter_fn_set
+#undef filter_fn_1d
+#undef filter_fn_2d
+#undef bilinf_fn_1d
+#undef bilinf_fn_2d
+
+#if BIT_DEPTH != 8
+void ff_vp9dsp_mc_init_10(VP9DSPContext *dsp);
+#endif
+#if BIT_DEPTH != 10
+static
+#endif
+av_cold void FUNC(ff_vp9dsp_mc_init)(VP9DSPContext *dsp)
+{
+#if BIT_DEPTH == 12
+    ff_vp9dsp_mc_init_10(dsp);
+#else /* BIT_DEPTH == 12 */
+
+#define init_fpel(idx1, idx2, sz, type) \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = type##sz##_c; \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = type##sz##_c; \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = type##sz##_c; \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = type##sz##_c
+
+#define init_copy_avg(idx, sz) \
+    init_fpel(idx, 0, sz, copy); \
+    init_fpel(idx, 1, sz, avg)
+
+    init_copy_avg(0, 64);
+    init_copy_avg(1, 32);
+    init_copy_avg(2, 16);
+    init_copy_avg(3,  8);
+    init_copy_avg(4,  4);
+
+#undef init_copy_avg
+#undef init_fpel
+
+#endif /* BIT_DEPTH == 12 */
+
+#define init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type) \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_c; \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_c; \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_c
+
+#if BIT_DEPTH == 12
+#define init_subpel1 init_subpel1_bd_aware
+#else
+#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \
+    init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type); \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][idxh][idxv] = type##_bilin_##sz##dir##_c
+#endif
+
+#define init_subpel2(idx, idxh, idxv, dir, type) \
+    init_subpel1(0, idx, idxh, idxv, 64, dir, type); \
+    init_subpel1(1, idx, idxh, idxv, 32, dir, type); \
+    init_subpel1(2, idx, idxh, idxv, 16, dir, type); \
+    init_subpel1(3, idx, idxh, idxv,  8, dir, type); \
+    init_subpel1(4, idx, idxh, idxv,  4, dir, type)
+
+#define init_subpel3(idx, type) \
+    init_subpel2(idx, 1, 1, hv, type); \
+    init_subpel2(idx, 0, 1, v, type); \
+    init_subpel2(idx, 1, 0, h, type)
+
+    init_subpel3(0, put);
+    init_subpel3(1, avg);
+
+#undef init_subpel1
+#undef init_subpel2
+#undef init_subpel3
+#undef init_subpel1_bd_aware
+}
+
+static av_always_inline void do_scaled_8tap_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                              const uint8_t *_src, ptrdiff_t src_stride,
+                                              int w, int h, int mx, int my,
+                                              int dx, int dy, int avg,
+                                              const int16_t (*filters)[8])
+{
+    int tmp_h = (((h - 1) * dy + my) >> 4) + 8;
+    pixel tmp[64 * 135], *tmp_ptr = tmp;
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    src -= src_stride * 3;
+    do {
+        int x;
+        int imx = mx, ioff = 0;
+
+        for (x = 0; x < w; x++) {
+            tmp_ptr[x] = FILTER_8TAP(src, ioff, filters[imx], 1);
+            imx += dx;
+            ioff += imx >> 4;
+            imx &= 0xf;
+        }
+
+        tmp_ptr += 64;
+        src += src_stride;
+    } while (--tmp_h);
+
+    tmp_ptr = tmp + 64 * 3;
+    do {
+        int x;
+        const int16_t *filter = filters[my];
+
+        for (x = 0; x < w; x++)
+            if (avg) {
+                dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filter, 64) + 1) >> 1;
+            } else {
+                dst[x] = FILTER_8TAP(tmp_ptr, x, filter, 64);
+            }
+
+        my += dy;
+        tmp_ptr += (my >> 4) * 64;
+        my &= 0xf;
+        dst += dst_stride;
+    } while (--h);
+}
+
+#define scaled_filter_8tap_fn(opn, opa) \
+static av_noinline void opn##_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                            const uint8_t *src, ptrdiff_t src_stride, \
+                                            int w, int h, int mx, int my, int dx, int dy, \
+                                            const int16_t (*filters)[8]) \
+{ \
+    do_scaled_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \
+                     opa, filters); \
+}
+
+scaled_filter_8tap_fn(put, 0)
+scaled_filter_8tap_fn(avg, 1)
+
+#undef scaled_filter_8tap_fn
+
+#undef FILTER_8TAP
+
+#define scaled_filter_fn(sz, type, type_idx, avg) \
+static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                           const uint8_t *src, ptrdiff_t src_stride, \
+                                           int h, int mx, int my, int dx, int dy) \
+{ \
+    avg##_scaled_8tap_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy, \
+                        ff_vp9_subpel_filters[type_idx]); \
+}
+
+#if BIT_DEPTH != 12
+
+static av_always_inline void do_scaled_bilin_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                               const uint8_t *_src, ptrdiff_t src_stride,
+                                               int w, int h, int mx, int my,
+                                               int dx, int dy, int avg)
+{
+    pixel tmp[64 * 129], *tmp_ptr = tmp;
+    int tmp_h = (((h - 1) * dy + my) >> 4) + 2;
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    do {
+        int x;
+        int imx = mx, ioff = 0;
+
+        for (x = 0; x < w; x++) {
+            tmp_ptr[x] = FILTER_BILIN(src, ioff, imx, 1);
+            imx += dx;
+            ioff += imx >> 4;
+            imx &= 0xf;
+        }
+
+        tmp_ptr += 64;
+        src += src_stride;
+    } while (--tmp_h);
+
+    tmp_ptr = tmp;
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            if (avg) {
+                dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
+            } else {
+                dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64);
+            }
+
+        my += dy;
+        tmp_ptr += (my >> 4) * 64;
+        my &= 0xf;
+        dst += dst_stride;
+    } while (--h);
+}
+
+#define scaled_bilin_fn(opn, opa) \
+static av_noinline void opn##_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                             const uint8_t *src, ptrdiff_t src_stride, \
+                                             int w, int h, int mx, int my, int dx, int dy) \
+{ \
+    do_scaled_bilin_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, opa); \
+}
+
+scaled_bilin_fn(put, 0)
+scaled_bilin_fn(avg, 1)
+
+#undef scaled_bilin_fn
+
+#undef FILTER_BILIN
+
+#define scaled_bilinf_fn(sz, avg) \
+static void avg##_scaled_bilin_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                        const uint8_t *src, ptrdiff_t src_stride, \
+                                        int h, int mx, int my, int dx, int dy) \
+{ \
+    avg##_scaled_bilin_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy); \
+}
+
+#else
+
+#define scaled_bilinf_fn(a, b)
+
+#endif
+
+#define scaled_filter_fns(sz, avg) \
+scaled_filter_fn(sz,        regular, FILTER_8TAP_REGULAR, avg) \
+scaled_filter_fn(sz,        smooth,  FILTER_8TAP_SMOOTH,  avg) \
+scaled_filter_fn(sz,        sharp,   FILTER_8TAP_SHARP,   avg) \
+scaled_bilinf_fn(sz,                                      avg)
+
+#define scaled_filter_fn_set(avg) \
+scaled_filter_fns(64, avg) \
+scaled_filter_fns(32, avg) \
+scaled_filter_fns(16, avg) \
+scaled_filter_fns(8,  avg) \
+scaled_filter_fns(4,  avg)
+
+scaled_filter_fn_set(put)
+scaled_filter_fn_set(avg)
+
+#undef scaled_filter_fns
+#undef scaled_filter_fn_set
+#undef scaled_filter_fn
+#undef scaled_bilinf_fn
+
+#if BIT_DEPTH != 8
+void ff_vp9dsp_scaled_mc_init_10(VP9DSPContext *dsp);
+#endif
+#if BIT_DEPTH != 10
+static
+#endif
+av_cold void FUNC(ff_vp9dsp_scaled_mc_init)(VP9DSPContext *dsp)
+{
+#define init_scaled_bd_aware(idx1, idx2, sz, type) \
+    dsp->smc[idx1][FILTER_8TAP_SMOOTH ][idx2] = type##_scaled_smooth_##sz##_c; \
+    dsp->smc[idx1][FILTER_8TAP_REGULAR][idx2] = type##_scaled_regular_##sz##_c; \
+    dsp->smc[idx1][FILTER_8TAP_SHARP  ][idx2] = type##_scaled_sharp_##sz##_c
+
+#if BIT_DEPTH == 12
+    ff_vp9dsp_scaled_mc_init_10(dsp);
+#define init_scaled(a,b,c,d) init_scaled_bd_aware(a,b,c,d)
+#else
+#define init_scaled(idx1, idx2, sz, type) \
+    init_scaled_bd_aware(idx1, idx2, sz, type); \
+    dsp->smc[idx1][FILTER_BILINEAR    ][idx2] = type##_scaled_bilin_##sz##_c
+#endif
+
+#define init_scaled_put_avg(idx, sz) \
+    init_scaled(idx, 0, sz, put); \
+    init_scaled(idx, 1, sz, avg)
+
+    init_scaled_put_avg(0, 64);
+    init_scaled_put_avg(1, 32);
+    init_scaled_put_avg(2, 16);
+    init_scaled_put_avg(3,  8);
+    init_scaled_put_avg(4,  4);
+
+#undef init_scaled_put_avg
+#undef init_scaled
+#undef init_scaled_bd_aware
+}
+
+av_cold void FUNC(ff_vp9dsp_init)(VP9DSPContext *dsp)
+{
+    FUNC(ff_vp9dsp_intrapred_init)(dsp);
+    vp9dsp_itxfm_init(dsp);
+    vp9dsp_loopfilter_init(dsp);
+    FUNC(ff_vp9dsp_mc_init)(dsp);
+    FUNC(ff_vp9dsp_scaled_mc_init)(dsp);
+}
diff --git a/media/ffvpx/libavcodec/vp9lpf.c b/media/ffvpx/libavcodec/vp9lpf.c
new file mode 100644
index 0000000000..414cede852
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9lpf.c
@@ -0,0 +1,202 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vp9dec.h"
+
+static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
+                                               uint8_t *lvl, uint8_t (*mask)[4],
+                                               uint8_t *dst, ptrdiff_t ls)
+{
+    int y, x, bytesperpixel = s->bytesperpixel;
+
+    // filter edges between columns (e.g. block1 | block2)
+    for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
+        uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
+        unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
+        unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
+        unsigned hm = hm1 | hm2 | hm13 | hm23;
+
+        for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
+            if (col || x > 1) {
+                if (hm1 & x) {
+                    int L = *l, H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    if (hmask1[0] & x) {
+                        if (hmask2[0] & x) {
+                            av_assert2(l[8 << ss_v] == L);
+                            s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
+                        } else {
+                            s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
+                        }
+                    } else if (hm2 & x) {
+                        L = l[8 << ss_v];
+                        H |= (L >> 4) << 8;
+                        E |= s->filter_lut.mblim_lut[L] << 8;
+                        I |= s->filter_lut.lim_lut[L] << 8;
+                        s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
+                                               [!!(hmask2[1] & x)]
+                                               [0](ptr, ls, E, I, H);
+                    } else {
+                        s->dsp.loop_filter_8[!!(hmask1[1] & x)]
+                                            [0](ptr, ls, E, I, H);
+                    }
+                } else if (hm2 & x) {
+                    int L = l[8 << ss_v], H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    s->dsp.loop_filter_8[!!(hmask2[1] & x)]
+                                        [0](ptr + 8 * ls, ls, E, I, H);
+                }
+            }
+            if (ss_h) {
+                if (x & 0xAA)
+                    l += 2;
+            } else {
+                if (hm13 & x) {
+                    int L = *l, H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    if (hm23 & x) {
+                        L = l[8 << ss_v];
+                        H |= (L >> 4) << 8;
+                        E |= s->filter_lut.mblim_lut[L] << 8;
+                        I |= s->filter_lut.lim_lut[L] << 8;
+                        s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
+                    } else {
+                        s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
+                    }
+                } else if (hm23 & x) {
+                    int L = l[8 << ss_v], H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
+                }
+                l++;
+            }
+        }
+    }
+}
+
+static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
+                                               uint8_t *lvl, uint8_t (*mask)[4],
+                                               uint8_t *dst, ptrdiff_t ls)
+{
+    int y, x, bytesperpixel = s->bytesperpixel;
+
+    //                                 block1
+    // filter edges between rows (e.g. ------)
+    //                                 block2
+    for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
+        uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
+        unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
+
+        for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
+            if (row || y) {
+                if (vm & x) {
+                    int L = *l, H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    if (vmask[0] & x) {
+                        if (vmask[0] & (x << (1 + ss_h))) {
+                            av_assert2(l[1 + ss_h] == L);
+                            s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
+                        } else {
+                            s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
+                        }
+                    } else if (vm & (x << (1 + ss_h))) {
+                        L = l[1 + ss_h];
+                        H |= (L >> 4) << 8;
+                        E |= s->filter_lut.mblim_lut[L] << 8;
+                        I |= s->filter_lut.lim_lut[L] << 8;
+                        s->dsp.loop_filter_mix2[!!(vmask[1] &  x)]
+                                               [!!(vmask[1] & (x << (1 + ss_h)))]
+                                               [1](ptr, ls, E, I, H);
+                    } else {
+                        s->dsp.loop_filter_8[!!(vmask[1] & x)]
+                                            [1](ptr, ls, E, I, H);
+                    }
+                } else if (vm & (x << (1 + ss_h))) {
+                    int L = l[1 + ss_h], H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
+                                        [1](ptr + 8 * bytesperpixel, ls, E, I, H);
+                }
+            }
+            if (!ss_v) {
+                if (vm3 & x) {
+                    int L = *l, H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    if (vm3 & (x << (1 + ss_h))) {
+                        L = l[1 + ss_h];
+                        H |= (L >> 4) << 8;
+                        E |= s->filter_lut.mblim_lut[L] << 8;
+                        I |= s->filter_lut.lim_lut[L] << 8;
+                        s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
+                    } else {
+                        s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
+                    }
+                } else if (vm3 & (x << (1 + ss_h))) {
+                    int L = l[1 + ss_h], H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
+                }
+            }
+        }
+        if (ss_v) {
+            if (y & 1)
+                lvl += 16;
+        } else {
+            lvl += 8;
+        }
+    }
+}
+
+void ff_vp9_loopfilter_sb(AVCodecContext *avctx, VP9Filter *lflvl,
+                          int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
+{
+    VP9Context *s = avctx->priv_data;
+    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
+    uint8_t *dst = f->data[0] + yoff;
+    ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
+    uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
+    int p;
+
+    /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g.
+     * if you think of them as acting on a 8x8 block max, we can interleave
+     * each v/h within the single x loop, but that only works if we work on
+     * 8 pixel blocks, and we won't always do that (we want at least 16px
+     * to use SSE2 optimizations, perhaps 32 for AVX2) */
+
+    filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
+    filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
+
+    for (p = 0; p < 2; p++) {
+        dst = f->data[1 + p] + uvoff;
+        filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
+        filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
+    }
+}
diff --git a/media/ffvpx/libavcodec/vp9mvs.c b/media/ffvpx/libavcodec/vp9mvs.c
new file mode 100644
index 0000000000..b93d878d6f
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9mvs.c
@@ -0,0 +1,364 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "threadframe.h"
+#include "vp89_rac.h"
+#include "vp9data.h"
+#include "vp9dec.h"
+#include "vpx_rac.h"
+
+static av_always_inline void clamp_mv(VP9mv *dst, const VP9mv *src,
+                                      VP9TileData *td)
+{
+    dst->x = av_clip(src->x, td->min_mv.x, td->max_mv.x);
+    dst->y = av_clip(src->y, td->min_mv.y, td->max_mv.y);
+}
+
+static void find_ref_mvs(VP9TileData *td,
+                         VP9mv *pmv, int ref, int z, int idx, int sb)
+{
+    static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
+        [BS_64x64] = { {  3, -1 }, { -1,  3 }, {  4, -1 }, { -1,  4 },
+                       { -1, -1 }, {  0, -1 }, { -1,  0 }, {  6, -1 } },
+        [BS_64x32] = { {  0, -1 }, { -1,  0 }, {  4, -1 }, { -1,  2 },
+                       { -1, -1 }, {  0, -3 }, { -3,  0 }, {  2, -1 } },
+        [BS_32x64] = { { -1,  0 }, {  0, -1 }, { -1,  4 }, {  2, -1 },
+                       { -1, -1 }, { -3,  0 }, {  0, -3 }, { -1,  2 } },
+        [BS_32x32] = { {  1, -1 }, { -1,  1 }, {  2, -1 }, { -1,  2 },
+                       { -1, -1 }, {  0, -3 }, { -3,  0 }, { -3, -3 } },
+        [BS_32x16] = { {  0, -1 }, { -1,  0 }, {  2, -1 }, { -1, -1 },
+                       { -1,  1 }, {  0, -3 }, { -3,  0 }, { -3, -3 } },
+        [BS_16x32] = { { -1,  0 }, {  0, -1 }, { -1,  2 }, { -1, -1 },
+                       {  1, -1 }, { -3,  0 }, {  0, -3 }, { -3, -3 } },
+        [BS_16x16] = { {  0, -1 }, { -1,  0 }, {  1, -1 }, { -1,  1 },
+                       { -1, -1 }, {  0, -3 }, { -3,  0 }, { -3, -3 } },
+        [BS_16x8]  = { {  0, -1 }, { -1,  0 }, {  1, -1 }, { -1, -1 },
+                       {  0, -2 }, { -2,  0 }, { -2, -1 }, { -1, -2 } },
+        [BS_8x16]  = { { -1,  0 }, {  0, -1 }, { -1,  1 }, { -1, -1 },
+                       { -2,  0 }, {  0, -2 }, { -1, -2 }, { -2, -1 } },
+        [BS_8x8]   = { {  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
+                       { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } },
+        [BS_8x4]   = { {  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
+                       { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } },
+        [BS_4x8]   = { {  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
+                       { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } },
+        [BS_4x4]   = { {  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
+                       { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } },
+    };
+    const VP9Context *s = td->s;
+    VP9Block *b = td->b;
+    int row = td->row, col = td->col, row7 = td->row7;
+    const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
+#define INVALID_MV 0x80008000U
+    uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
+    int i;
+
+#define RETURN_DIRECT_MV(mv)                    \
+    do {                                        \
+        uint32_t m = AV_RN32A(&mv);             \
+        if (!idx) {                             \
+            AV_WN32A(pmv, m);                   \
+            return;                             \
+        } else if (mem == INVALID_MV) {         \
+            mem = m;                            \
+        } else if (m != mem) {                  \
+            AV_WN32A(pmv, m);                   \
+            return;                             \
+        }                                       \
+    } while (0)
+
+    if (sb >= 0) {
+        if (sb == 2 || sb == 1) {
+            RETURN_DIRECT_MV(b->mv[0][z]);
+        } else if (sb == 3) {
+            RETURN_DIRECT_MV(b->mv[2][z]);
+            RETURN_DIRECT_MV(b->mv[1][z]);
+            RETURN_DIRECT_MV(b->mv[0][z]);
+        }
+
+#define RETURN_MV(mv)                                                  \
+    do {                                                               \
+        if (sb > 0) {                                                  \
+            VP9mv tmp;                                                 \
+            uint32_t m;                                                \
+            av_assert2(idx == 1);                                      \
+            av_assert2(mem != INVALID_MV);                             \
+            if (mem_sub8x8 == INVALID_MV) {                            \
+                clamp_mv(&tmp, &mv, td);                               \
+                m = AV_RN32A(&tmp);                                    \
+                if (m != mem) {                                        \
+                    AV_WN32A(pmv, m);                                  \
+                    return;                                            \
+                }                                                      \
+                mem_sub8x8 = AV_RN32A(&mv);                            \
+            } else if (mem_sub8x8 != AV_RN32A(&mv)) {                  \
+                clamp_mv(&tmp, &mv, td);                               \
+                m = AV_RN32A(&tmp);                                    \
+                if (m != mem) {                                        \
+                    AV_WN32A(pmv, m);                                  \
+                } else {                                               \
+                    /* BUG I'm pretty sure this isn't the intention */ \
+                    AV_WN32A(pmv, 0);                                  \
+                }                                                      \
+                return;                                                \
+            }                                                          \
+        } else {                                                       \
+            uint32_t m = AV_RN32A(&mv);                                \
+            if (!idx) {                                                \
+                clamp_mv(pmv, &mv, td);                                \
+                return;                                                \
+            } else if (mem == INVALID_MV) {                            \
+                mem = m;                                               \
+            } else if (m != mem) {                                     \
+                clamp_mv(pmv, &mv, td);                                \
+                return;                                                \
+            }                                                          \
+        }                                                              \
+    } while (0)
+
+        if (row > 0) {
+            VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
+            if (mv->ref[0] == ref)
+                RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
+            else if (mv->ref[1] == ref)
+                RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
+        }
+        if (col > td->tile_col_start) {
+            VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
+            if (mv->ref[0] == ref)
+                RETURN_MV(td->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
+            else if (mv->ref[1] == ref)
+                RETURN_MV(td->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
+        }
+        i = 2;
+    } else {
+        i = 0;
+    }
+
+    // previously coded MVs in this neighborhood, using same reference frame
+    for (; i < 8; i++) {
+        int c = p[i][0] + col, r = p[i][1] + row;
+
+        if (c >= td->tile_col_start && c < s->cols &&
+            r >= 0 && r < s->rows) {
+            VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
+
+            if (mv->ref[0] == ref)
+                RETURN_MV(mv->mv[0]);
+            else if (mv->ref[1] == ref)
+                RETURN_MV(mv->mv[1]);
+        }
+    }
+
+    // MV at this position in previous frame, using same reference frame
+    if (s->s.h.use_last_frame_mvs) {
+        VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
+
+        if (!s->s.frames[REF_FRAME_MVPAIR].uses_2pass)
+            ff_thread_await_progress(&s->s.frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
+        if (mv->ref[0] == ref)
+            RETURN_MV(mv->mv[0]);
+        else if (mv->ref[1] == ref)
+            RETURN_MV(mv->mv[1]);
+    }
+
+#define RETURN_SCALE_MV(mv, scale)              \
+    do {                                        \
+        if (scale) {                            \
+            VP9mv mv_temp = { -mv.x, -mv.y };   \
+            RETURN_MV(mv_temp);                 \
+        } else {                                \
+            RETURN_MV(mv);                      \
+        }                                       \
+    } while (0)
+
+    // previously coded MVs in this neighborhood, using different reference frame
+    for (i = 0; i < 8; i++) {
+        int c = p[i][0] + col, r = p[i][1] + row;
+
+        if (c >= td->tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
+            VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
+
+            if (mv->ref[0] != ref && mv->ref[0] >= 0)
+                RETURN_SCALE_MV(mv->mv[0],
+                                s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]);
+            if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
+                // BUG - libvpx has this condition regardless of whether
+                // we used the first ref MV and pre-scaling
+                AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
+                RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]);
+            }
+        }
+    }
+
+    // MV at this position in previous frame, using different reference frame
+    if (s->s.h.use_last_frame_mvs) {
+        VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
+
+        // no need to await_progress, because we already did that above
+        if (mv->ref[0] != ref && mv->ref[0] >= 0)
+            RETURN_SCALE_MV(mv->mv[0], s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]);
+        if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
+            // BUG - libvpx has this condition regardless of whether
+            // we used the first ref MV and pre-scaling
+            AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
+            RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]);
+        }
+    }
+
+    AV_ZERO32(pmv);
+    clamp_mv(pmv, pmv, td);
+#undef INVALID_MV
+#undef RETURN_MV
+#undef RETURN_SCALE_MV
+}
+
+static av_always_inline int read_mv_component(VP9TileData *td, int idx, int hp)
+{
+    const VP9Context *s = td->s;
+    int bit, sign = vpx_rac_get_prob(td->c, s->prob.p.mv_comp[idx].sign);
+    int n, c = vp89_rac_get_tree(td->c, ff_vp9_mv_class_tree,
+                                 s->prob.p.mv_comp[idx].classes);
+
+    td->counts.mv_comp[idx].sign[sign]++;
+    td->counts.mv_comp[idx].classes[c]++;
+    if (c) {
+        int m;
+
+        for (n = 0, m = 0; m < c; m++) {
+            bit = vpx_rac_get_prob(td->c, s->prob.p.mv_comp[idx].bits[m]);
+            n |= bit << m;
+            td->counts.mv_comp[idx].bits[m][bit]++;
+        }
+        n <<= 3;
+        bit = vp89_rac_get_tree(td->c, ff_vp9_mv_fp_tree,
+                                s->prob.p.mv_comp[idx].fp);
+        n  |= bit << 1;
+        td->counts.mv_comp[idx].fp[bit]++;
+        if (hp) {
+            bit = vpx_rac_get_prob(td->c, s->prob.p.mv_comp[idx].hp);
+            td->counts.mv_comp[idx].hp[bit]++;
+            n |= bit;
+        } else {
+            n |= 1;
+            // bug in libvpx - we count for bw entropy purposes even if the
+            // bit wasn't coded
+            td->counts.mv_comp[idx].hp[1]++;
+        }
+        n += 8 << c;
+    } else {
+        n = vpx_rac_get_prob(td->c, s->prob.p.mv_comp[idx].class0);
+        td->counts.mv_comp[idx].class0[n]++;
+        bit = vp89_rac_get_tree(td->c, ff_vp9_mv_fp_tree,
+                                s->prob.p.mv_comp[idx].class0_fp[n]);
+        td->counts.mv_comp[idx].class0_fp[n][bit]++;
+        n = (n << 3) | (bit << 1);
+        if (hp) {
+            bit = vpx_rac_get_prob(td->c, s->prob.p.mv_comp[idx].class0_hp);
+            td->counts.mv_comp[idx].class0_hp[bit]++;
+            n |= bit;
+        } else {
+            n |= 1;
+            // bug in libvpx - we count for bw entropy purposes even if the
+            // bit wasn't coded
+            td->counts.mv_comp[idx].class0_hp[1]++;
+        }
+    }
+
+    return sign ? -(n + 1) : (n + 1);
+}
+
+void ff_vp9_fill_mv(VP9TileData *td, VP9mv *mv, int mode, int sb)
+{
+    const VP9Context *s = td->s;
+    VP9Block *b = td->b;
+
+    if (mode == ZEROMV) {
+        AV_ZERO64(mv);
+    } else {
+        int hp;
+
+        // FIXME cache this value and reuse for other subblocks
+        find_ref_mvs(td, &mv[0], b->ref[0], 0, mode == NEARMV,
+                     mode == NEWMV ? -1 : sb);
+        // FIXME maybe move this code into find_ref_mvs()
+        if ((mode == NEWMV || sb == -1) &&
+            !(hp = s->s.h.highprecisionmvs &&
+              abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
+            if (mv[0].y & 1) {
+                if (mv[0].y < 0)
+                    mv[0].y++;
+                else
+                    mv[0].y--;
+            }
+            if (mv[0].x & 1) {
+                if (mv[0].x < 0)
+                    mv[0].x++;
+                else
+                    mv[0].x--;
+            }
+        }
+        if (mode == NEWMV) {
+            enum MVJoint j = vp89_rac_get_tree(td->c, ff_vp9_mv_joint_tree,
+                                               s->prob.p.mv_joint);
+
+            td->counts.mv_joint[j]++;
+            if (j >= MV_JOINT_V)
+                mv[0].y += read_mv_component(td, 0, hp);
+            if (j & 1)
+                mv[0].x += read_mv_component(td, 1, hp);
+        }
+
+        if (b->comp) {
+            // FIXME cache this value and reuse for other subblocks
+            find_ref_mvs(td, &mv[1], b->ref[1], 1, mode == NEARMV,
+                         mode == NEWMV ? -1 : sb);
+            if ((mode == NEWMV || sb == -1) &&
+                !(hp = s->s.h.highprecisionmvs &&
+                  abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
+                if (mv[1].y & 1) {
+                    if (mv[1].y < 0)
+                        mv[1].y++;
+                    else
+                        mv[1].y--;
+                }
+                if (mv[1].x & 1) {
+                    if (mv[1].x < 0)
+                        mv[1].x++;
+                    else
+                        mv[1].x--;
+                }
+            }
+            if (mode == NEWMV) {
+                enum MVJoint j = vp89_rac_get_tree(td->c, ff_vp9_mv_joint_tree,
+                                                   s->prob.p.mv_joint);
+
+                td->counts.mv_joint[j]++;
+                if (j >= MV_JOINT_V)
+                    mv[1].y += read_mv_component(td, 0, hp);
+                if (j & 1)
+                    mv[1].x += read_mv_component(td, 1, hp);
+            }
+        }
+    }
+}
diff --git a/media/ffvpx/libavcodec/vp9prob.c b/media/ffvpx/libavcodec/vp9prob.c
new file mode 100644
index 0000000000..69a5180770
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9prob.c
@@ -0,0 +1,272 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vp9.h"
+#include "vp9dec.h"
+
+static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
+                                        int max_count, int update_factor)
+{
+    unsigned ct = ct0 + ct1, p2, p1;
+
+    if (!ct)
+        return;
+
+    update_factor = FASTDIV(update_factor * FFMIN(ct, max_count), max_count);
+    p1 = *p;
+    p2 = ((((int64_t) ct0) << 8) + (ct >> 1)) / ct;
+    p2 = av_clip(p2, 1, 255);
+
+    // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
+    *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
+}
+
+void ff_vp9_adapt_probs(VP9Context *s)
+{
+    int i, j, k, l, m;
+    ProbContext *p = &s->prob_ctx[s->s.h.framectxid].p;
+    int uf = (s->s.h.keyframe || s->s.h.intraonly || !s->last_keyframe) ? 112 : 128;
+
+    // coefficients
+    for (i = 0; i < 4; i++)
+        for (j = 0; j < 2; j++)
+            for (k = 0; k < 2; k++)
+                for (l = 0; l < 6; l++)
+                    for (m = 0; m < 6; m++) {
+                        uint8_t *pp = s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m];
+                        unsigned *e = s->td[0].counts.eob[i][j][k][l][m];
+                        unsigned *c = s->td[0].counts.coef[i][j][k][l][m];
+
+                        if (l == 0 && m >= 3) // dc only has 3 pt
+                            break;
+
+                        adapt_prob(&pp[0], e[0], e[1], 24, uf);
+                        adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
+                        adapt_prob(&pp[2], c[1], c[2], 24, uf);
+                    }
+
+    if (s->s.h.keyframe || s->s.h.intraonly) {
+        memcpy(p->skip,  s->prob.p.skip,  sizeof(p->skip));
+        memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
+        memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
+        memcpy(p->tx8p,  s->prob.p.tx8p,  sizeof(p->tx8p));
+        return;
+    }
+
+    // skip flag
+    for (i = 0; i < 3; i++)
+        adapt_prob(&p->skip[i], s->td[0].counts.skip[i][0],
+                   s->td[0].counts.skip[i][1], 20, 128);
+
+    // intra/inter flag
+    for (i = 0; i < 4; i++)
+        adapt_prob(&p->intra[i], s->td[0].counts.intra[i][0],
+                   s->td[0].counts.intra[i][1], 20, 128);
+
+    // comppred flag
+    if (s->s.h.comppredmode == PRED_SWITCHABLE) {
+        for (i = 0; i < 5; i++)
+            adapt_prob(&p->comp[i], s->td[0].counts.comp[i][0],
+                       s->td[0].counts.comp[i][1], 20, 128);
+    }
+
+    // reference frames
+    if (s->s.h.comppredmode != PRED_SINGLEREF) {
+        for (i = 0; i < 5; i++)
+            adapt_prob(&p->comp_ref[i], s->td[0].counts.comp_ref[i][0],
+                       s->td[0].counts.comp_ref[i][1], 20, 128);
+    }
+
+    if (s->s.h.comppredmode != PRED_COMPREF) {
+        for (i = 0; i < 5; i++) {
+            uint8_t *pp = p->single_ref[i];
+            unsigned (*c)[2] = s->td[0].counts.single_ref[i];
+
+            adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
+            adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
+        }
+    }
+
+    // block partitioning
+    for (i = 0; i < 4; i++)
+        for (j = 0; j < 4; j++) {
+            uint8_t *pp = p->partition[i][j];
+            unsigned *c = s->td[0].counts.partition[i][j];
+
+            adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
+            adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
+            adapt_prob(&pp[2], c[2], c[3], 20, 128);
+        }
+
+    // tx size
+    if (s->s.h.txfmmode == TX_SWITCHABLE) {
+        for (i = 0; i < 2; i++) {
+            unsigned *c16 = s->td[0].counts.tx16p[i], *c32 = s->td[0].counts.tx32p[i];
+
+            adapt_prob(&p->tx8p[i], s->td[0].counts.tx8p[i][0],
+                       s->td[0].counts.tx8p[i][1], 20, 128);
+            adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
+            adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
+            adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
+            adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
+            adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
+        }
+    }
+
+    // interpolation filter
+    if (s->s.h.filtermode == FILTER_SWITCHABLE) {
+        for (i = 0; i < 4; i++) {
+            uint8_t *pp = p->filter[i];
+            unsigned *c = s->td[0].counts.filter[i];
+
+            adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
+            adapt_prob(&pp[1], c[1], c[2], 20, 128);
+        }
+    }
+
+    // inter modes
+    for (i = 0; i < 7; i++) {
+        uint8_t *pp = p->mv_mode[i];
+        unsigned *c = s->td[0].counts.mv_mode[i];
+
+        adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
+        adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
+        adapt_prob(&pp[2], c[1], c[3], 20, 128);
+    }
+
+    // mv joints
+    {
+        uint8_t *pp = p->mv_joint;
+        unsigned *c = s->td[0].counts.mv_joint;
+
+        adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
+        adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
+        adapt_prob(&pp[2], c[2], c[3], 20, 128);
+    }
+
+    // mv components
+    for (i = 0; i < 2; i++) {
+        uint8_t *pp;
+        unsigned *c, (*c2)[2], sum;
+
+        adapt_prob(&p->mv_comp[i].sign, s->td[0].counts.mv_comp[i].sign[0],
+                   s->td[0].counts.mv_comp[i].sign[1], 20, 128);
+
+        pp  = p->mv_comp[i].classes;
+        c   = s->td[0].counts.mv_comp[i].classes;
+        sum = c[1] + c[2] + c[3] + c[4] + c[5] +
+              c[6] + c[7] + c[8] + c[9] + c[10];
+        adapt_prob(&pp[0], c[0], sum, 20, 128);
+        sum -= c[1];
+        adapt_prob(&pp[1], c[1], sum, 20, 128);
+        sum -= c[2] + c[3];
+        adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
+        adapt_prob(&pp[3], c[2], c[3], 20, 128);
+        sum -= c[4] + c[5];
+        adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
+        adapt_prob(&pp[5], c[4], c[5], 20, 128);
+        sum -= c[6];
+        adapt_prob(&pp[6], c[6], sum, 20, 128);
+        adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
+        adapt_prob(&pp[8], c[7], c[8], 20, 128);
+        adapt_prob(&pp[9], c[9], c[10], 20, 128);
+
+        adapt_prob(&p->mv_comp[i].class0, s->td[0].counts.mv_comp[i].class0[0],
+                   s->td[0].counts.mv_comp[i].class0[1], 20, 128);
+        pp = p->mv_comp[i].bits;
+        c2 = s->td[0].counts.mv_comp[i].bits;
+        for (j = 0; j < 10; j++)
+            adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
+
+        for (j = 0; j < 2; j++) {
+            pp = p->mv_comp[i].class0_fp[j];
+            c  = s->td[0].counts.mv_comp[i].class0_fp[j];
+            adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
+            adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
+            adapt_prob(&pp[2], c[2], c[3], 20, 128);
+        }
+        pp = p->mv_comp[i].fp;
+        c  = s->td[0].counts.mv_comp[i].fp;
+        adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
+        adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
+        adapt_prob(&pp[2], c[2], c[3], 20, 128);
+
+        if (s->s.h.highprecisionmvs) {
+            adapt_prob(&p->mv_comp[i].class0_hp,
+                       s->td[0].counts.mv_comp[i].class0_hp[0],
+                       s->td[0].counts.mv_comp[i].class0_hp[1], 20, 128);
+            adapt_prob(&p->mv_comp[i].hp, s->td[0].counts.mv_comp[i].hp[0],
+                       s->td[0].counts.mv_comp[i].hp[1], 20, 128);
+        }
+    }
+
+    // y intra modes
+    for (i = 0; i < 4; i++) {
+        uint8_t *pp = p->y_mode[i];
+        unsigned *c = s->td[0].counts.y_mode[i], sum, s2;
+
+        sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
+        adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
+        sum -= c[TM_VP8_PRED];
+        adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
+        sum -= c[VERT_PRED];
+        adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
+        s2   = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
+        sum -= s2;
+        adapt_prob(&pp[3], s2, sum, 20, 128);
+        s2 -= c[HOR_PRED];
+        adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
+        adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED],
+                   20, 128);
+        sum -= c[DIAG_DOWN_LEFT_PRED];
+        adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
+        sum -= c[VERT_LEFT_PRED];
+        adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
+        adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
+    }
+
+    // uv intra modes
+    for (i = 0; i < 10; i++) {
+        uint8_t *pp = p->uv_mode[i];
+        unsigned *c = s->td[0].counts.uv_mode[i], sum, s2;
+
+        sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
+        adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
+        sum -= c[TM_VP8_PRED];
+        adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
+        sum -= c[VERT_PRED];
+        adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
+        s2   = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
+        sum -= s2;
+        adapt_prob(&pp[3], s2, sum, 20, 128);
+        s2 -= c[HOR_PRED];
+        adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
+        adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED],
+                   20, 128);
+        sum -= c[DIAG_DOWN_LEFT_PRED];
+        adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
+        sum -= c[VERT_LEFT_PRED];
+        adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
+        adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
+    }
+}
diff --git a/media/ffvpx/libavcodec/vp9recon.c b/media/ffvpx/libavcodec/vp9recon.c
new file mode 100644
index 0000000000..073c04b47d
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9recon.c
@@ -0,0 +1,654 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/mem_internal.h"
+
+#include "threadframe.h"
+#include "videodsp.h"
+#include "vp9data.h"
+#include "vp9dec.h"
+
+static av_always_inline int check_intra_mode(VP9TileData *td, int mode, uint8_t **a,
+                                             uint8_t *dst_edge, ptrdiff_t stride_edge,
+                                             uint8_t *dst_inner, ptrdiff_t stride_inner,
+                                             uint8_t *l, int col, int x, int w,
+                                             int row, int y, enum TxfmMode tx,
+                                             int p, int ss_h, int ss_v, int bytesperpixel)
+{
+    const VP9Context *s = td->s;
+    int have_top = row > 0 || y > 0;
+    int have_left = col > td->tile_col_start || x > 0;
+    int have_right = x < w - 1;
+    int bpp = s->s.h.bpp;
+    static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
+        [VERT_PRED]            = { { DC_127_PRED,          VERT_PRED            },
+                                   { DC_127_PRED,          VERT_PRED            } },
+        [HOR_PRED]             = { { DC_129_PRED,          DC_129_PRED          },
+                                   { HOR_PRED,             HOR_PRED             } },
+        [DC_PRED]              = { { DC_128_PRED,          TOP_DC_PRED          },
+                                   { LEFT_DC_PRED,         DC_PRED              } },
+        [DIAG_DOWN_LEFT_PRED]  = { { DC_127_PRED,          DIAG_DOWN_LEFT_PRED  },
+                                   { DC_127_PRED,          DIAG_DOWN_LEFT_PRED  } },
+        [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
+                                   { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
+        [VERT_RIGHT_PRED]      = { { VERT_RIGHT_PRED,      VERT_RIGHT_PRED      },
+                                   { VERT_RIGHT_PRED,      VERT_RIGHT_PRED      } },
+        [HOR_DOWN_PRED]        = { { HOR_DOWN_PRED,        HOR_DOWN_PRED        },
+                                   { HOR_DOWN_PRED,        HOR_DOWN_PRED        } },
+        [VERT_LEFT_PRED]       = { { DC_127_PRED,          VERT_LEFT_PRED       },
+                                   { DC_127_PRED,          VERT_LEFT_PRED       } },
+        [HOR_UP_PRED]          = { { DC_129_PRED,          DC_129_PRED          },
+                                   { HOR_UP_PRED,          HOR_UP_PRED          } },
+        [TM_VP8_PRED]          = { { DC_129_PRED,          VERT_PRED            },
+                                   { HOR_PRED,             TM_VP8_PRED          } },
+    };
+    static const struct {
+        uint8_t needs_left:1;
+        uint8_t needs_top:1;
+        uint8_t needs_topleft:1;
+        uint8_t needs_topright:1;
+        uint8_t invert_left:1;
+    } edges[N_INTRA_PRED_MODES] = {
+        [VERT_PRED]            = { .needs_top  = 1 },
+        [HOR_PRED]             = { .needs_left = 1 },
+        [DC_PRED]              = { .needs_top  = 1, .needs_left = 1 },
+        [DIAG_DOWN_LEFT_PRED]  = { .needs_top  = 1, .needs_topright = 1 },
+        [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
+                                   .needs_topleft = 1 },
+        [VERT_RIGHT_PRED]      = { .needs_left = 1, .needs_top = 1,
+                                   .needs_topleft = 1 },
+        [HOR_DOWN_PRED]        = { .needs_left = 1, .needs_top = 1,
+                                   .needs_topleft = 1 },
+        [VERT_LEFT_PRED]       = { .needs_top  = 1, .needs_topright = 1 },
+        [HOR_UP_PRED]          = { .needs_left = 1, .invert_left = 1 },
+        [TM_VP8_PRED]          = { .needs_left = 1, .needs_top = 1,
+                                   .needs_topleft = 1 },
+        [LEFT_DC_PRED]         = { .needs_left = 1 },
+        [TOP_DC_PRED]          = { .needs_top  = 1 },
+        [DC_128_PRED]          = { 0 },
+        [DC_127_PRED]          = { 0 },
+        [DC_129_PRED]          = { 0 }
+    };
+
+    av_assert2(mode >= 0 && mode < 10);
+    mode = mode_conv[mode][have_left][have_top];
+    if (edges[mode].needs_top) {
+        uint8_t *top, *topleft;
+        int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
+        int n_px_need_tr = 0;
+
+        if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
+            n_px_need_tr = 4;
+
+        // if top of sb64-row, use s->intra_pred_data[] instead of
+        // dst[-stride] for intra prediction (it contains pre- instead of
+        // post-loopfilter data)
+        if (have_top) {
+            top = !(row & 7) && !y ?
+                s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
+                y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
+            if (have_left)
+                topleft = !(row & 7) && !y ?
+                    s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
+                    y == 0 || x == 0 ? &dst_edge[-stride_edge] :
+                    &dst_inner[-stride_inner];
+        }
+
+        if (have_top &&
+            (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
+            (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
+            n_px_need + n_px_need_tr <= n_px_have) {
+            *a = top;
+        } else {
+            if (have_top) {
+                if (n_px_need <= n_px_have) {
+                    memcpy(*a, top, n_px_need * bytesperpixel);
+                } else {
+#define memset_bpp(c, i1, v, i2, num) do { \
+    if (bytesperpixel == 1) { \
+        memset(&(c)[(i1)], (v)[(i2)], (num)); \
+    } else { \
+        int n, val = AV_RN16A(&(v)[(i2) * 2]); \
+        for (n = 0; n < (num); n++) { \
+            AV_WN16A(&(c)[((i1) + n) * 2], val); \
+        } \
+    } \
+} while (0)
+                    memcpy(*a, top, n_px_have * bytesperpixel);
+                    memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
+                }
+            } else {
+#define memset_val(c, val, num) do { \
+    if (bytesperpixel == 1) { \
+        memset((c), (val), (num)); \
+    } else { \
+        int n; \
+        for (n = 0; n < (num); n++) { \
+            AV_WN16A(&(c)[n * 2], (val)); \
+        } \
+    } \
+} while (0)
+                memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
+            }
+            if (edges[mode].needs_topleft) {
+                if (have_left && have_top) {
+#define assign_bpp(c, i1, v, i2) do { \
+    if (bytesperpixel == 1) { \
+        (c)[(i1)] = (v)[(i2)]; \
+    } else { \
+        AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
+    } \
+} while (0)
+                    assign_bpp(*a, -1, topleft, -1);
+                } else {
+#define assign_val(c, i, v) do { \
+    if (bytesperpixel == 1) { \
+        (c)[(i)] = (v); \
+    } else { \
+        AV_WN16A(&(c)[(i) * 2], (v)); \
+    } \
+} while (0)
+                    assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
+                }
+            }
+            if (tx == TX_4X4 && edges[mode].needs_topright) {
+                if (have_top && have_right &&
+                    n_px_need + n_px_need_tr <= n_px_have) {
+                    memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
+                } else {
+                    memset_bpp(*a, 4, *a, 3, 4);
+                }
+            }
+        }
+    }
+    if (edges[mode].needs_left) {
+        if (have_left) {
+            int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
+            uint8_t *dst = x == 0 ? dst_edge : dst_inner;
+            ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
+
+            if (edges[mode].invert_left) {
+                if (n_px_need <= n_px_have) {
+                    for (i = 0; i < n_px_need; i++)
+                        assign_bpp(l, i, &dst[i * stride], -1);
+                } else {
+                    for (i = 0; i < n_px_have; i++)
+                        assign_bpp(l, i, &dst[i * stride], -1);
+                    memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
+                }
+            } else {
+                if (n_px_need <= n_px_have) {
+                    for (i = 0; i < n_px_need; i++)
+                        assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
+                } else {
+                    for (i = 0; i < n_px_have; i++)
+                        assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
+                    memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
+                }
+            }
+        } else {
+            memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
+        }
+    }
+
+    return mode;
+}
+
+static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off,
+                                         ptrdiff_t uv_off, int bytesperpixel)
+{
+    const VP9Context *s = td->s;
+    VP9Block *b = td->b;
+    int row = td->row, col = td->col;
+    int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
+    int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
+    int end_x = FFMIN(2 * (s->cols - col), w4);
+    int end_y = FFMIN(2 * (s->rows - row), h4);
+    int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
+    int uvstep1d = 1 << b->uvtx, p;
+    uint8_t *dst = td->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off;
+    LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
+    LOCAL_ALIGNED_32(uint8_t, l, [64]);
+
+    for (n = 0, y = 0; y < end_y; y += step1d) {
+        uint8_t *ptr = dst, *ptr_r = dst_r;
+        for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
+                               ptr_r += 4 * step1d * bytesperpixel, n += step) {
+            int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
+                               y * 2 + x : 0];
+            uint8_t *a = &a_buf[32];
+            enum TxfmType txtp = ff_vp9_intra_txfm_type[mode];
+            int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
+
+            mode = check_intra_mode(td, mode, &a, ptr_r,
+                                    s->s.frames[CUR_FRAME].tf.f->linesize[0],
+                                    ptr, td->y_stride, l,
+                                    col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
+            s->dsp.intra_pred[b->tx][mode](ptr, td->y_stride, l, a);
+            if (eob)
+                s->dsp.itxfm_add[tx][txtp](ptr, td->y_stride,
+                                           td->block + 16 * n * bytesperpixel, eob);
+        }
+        dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0];
+        dst   += 4 * step1d * td->y_stride;
+    }
+
+    // U/V
+    w4    >>= s->ss_h;
+    end_x >>= s->ss_h;
+    end_y >>= s->ss_v;
+    step = 1 << (b->uvtx * 2);
+    for (p = 0; p < 2; p++) {
+        dst   = td->dst[1 + p];
+        dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
+        for (n = 0, y = 0; y < end_y; y += uvstep1d) {
+            uint8_t *ptr = dst, *ptr_r = dst_r;
+            for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
+                                   ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
+                int mode = b->uvmode;
+                uint8_t *a = &a_buf[32];
+                int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
+
+                mode = check_intra_mode(td, mode, &a, ptr_r,
+                                        s->s.frames[CUR_FRAME].tf.f->linesize[1],
+                                        ptr, td->uv_stride, l, col, x, w4, row, y,
+                                        b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
+                s->dsp.intra_pred[b->uvtx][mode](ptr, td->uv_stride, l, a);
+                if (eob)
+                    s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
+                                                    td->uvblock[p] + 16 * n * bytesperpixel, eob);
+            }
+            dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1];
+            dst   += 4 * uvstep1d * td->uv_stride;
+        }
+    }
+}
+
+void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
+{
+    intra_recon(td, y_off, uv_off, 1);
+}
+
+void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
+{
+    intra_recon(td, y_off, uv_off, 2);
+}
+
+static av_always_inline void mc_luma_unscaled(VP9TileData *td, const vp9_mc_func (*mc)[2],
+                                              uint8_t *dst, ptrdiff_t dst_stride,
+                                              const uint8_t *ref, ptrdiff_t ref_stride,
+                                              const ThreadFrame *ref_frame,
+                                              ptrdiff_t y, ptrdiff_t x, const VP9mv *mv,
+                                              int bw, int bh, int w, int h, int bytesperpixel)
+{
+    const VP9Context *s = td->s;
+    int mx = mv->x, my = mv->y, th;
+
+    y += my >> 3;
+    x += mx >> 3;
+    ref += y * ref_stride + x * bytesperpixel;
+    mx &= 7;
+    my &= 7;
+    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+    // we use +7 because the last 7 pixels of each sbrow can be changed in
+    // the longest loopfilter of the next sbrow
+    th = (y + bh + 4 * !!my + 7) >> 6;
+    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    // The arm/aarch64 _hv filters read one more row than what actually is
+    // needed, so switch to emulated edge one pixel sooner vertically
+    // (!!my * 5) than horizontally (!!mx * 4).
+    if (x < !!mx * 3 || y < !!my * 3 ||
+        x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
+        s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
+                                 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
+                                 160, ref_stride,
+                                 bw + !!mx * 7, bh + !!my * 7,
+                                 x - !!mx * 3, y - !!my * 3, w, h);
+        ref = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+        ref_stride = 160;
+    }
+    mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
+}
+
+static av_always_inline void mc_chroma_unscaled(VP9TileData *td, const vp9_mc_func (*mc)[2],
+                                                uint8_t *dst_u, uint8_t *dst_v,
+                                                ptrdiff_t dst_stride,
+                                                const uint8_t *ref_u, ptrdiff_t src_stride_u,
+                                                const uint8_t *ref_v, ptrdiff_t src_stride_v,
+                                                const ThreadFrame *ref_frame,
+                                                ptrdiff_t y, ptrdiff_t x, const VP9mv *mv,
+                                                int bw, int bh, int w, int h, int bytesperpixel)
+{
+    const VP9Context *s = td->s;
+    int mx = mv->x * (1 << !s->ss_h), my = mv->y * (1 << !s->ss_v), th;
+
+    y += my >> 4;
+    x += mx >> 4;
+    ref_u += y * src_stride_u + x * bytesperpixel;
+    ref_v += y * src_stride_v + x * bytesperpixel;
+    mx &= 15;
+    my &= 15;
+    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+    // we use +7 because the last 7 pixels of each sbrow can be changed in
+    // the longest loopfilter of the next sbrow
+    th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
+    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    // The arm/aarch64 _hv filters read one more row than what actually is
+    // needed, so switch to emulated edge one pixel sooner vertically
+    // (!!my * 5) than horizontally (!!mx * 4).
+    if (x < !!mx * 3 || y < !!my * 3 ||
+        x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
+        s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
+                                 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
+                                 160, src_stride_u,
+                                 bw + !!mx * 7, bh + !!my * 7,
+                                 x - !!mx * 3, y - !!my * 3, w, h);
+        ref_u = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+        mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
+
+        s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
+                                 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
+                                 160, src_stride_v,
+                                 bw + !!mx * 7, bh + !!my * 7,
+                                 x - !!mx * 3, y - !!my * 3, w, h);
+        ref_v = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+        mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
+    } else {
+        mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
+        mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
+    }
+}
+
+#define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
+                    px, py, pw, ph, bw, bh, w, h, i) \
+    mc_luma_unscaled(td, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
+                     mv, bw, bh, w, h, bytesperpixel)
+#define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+                      row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
+    mc_chroma_unscaled(td, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+                       row, col, mv, bw, bh, w, h, bytesperpixel)
+#define SCALED 0
+#define FN(x) x##_8bpp
+#define BYTES_PER_PIXEL 1
+#include "vp9_mc_template.c"
+#undef FN
+#undef BYTES_PER_PIXEL
+#define FN(x) x##_16bpp
+#define BYTES_PER_PIXEL 2
+#include "vp9_mc_template.c"
+#undef mc_luma_dir
+#undef mc_chroma_dir
+#undef FN
+#undef BYTES_PER_PIXEL
+#undef SCALED
+
+static av_always_inline void mc_luma_scaled(VP9TileData *td, vp9_scaled_mc_func smc,
+                                            const vp9_mc_func (*mc)[2],
+                                            uint8_t *dst, ptrdiff_t dst_stride,
+                                            const uint8_t *ref, ptrdiff_t ref_stride,
+                                            const ThreadFrame *ref_frame,
+                                            ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv,
+                                            int px, int py, int pw, int ph,
+                                            int bw, int bh, int w, int h, int bytesperpixel,
+                                            const uint16_t *scale, const uint8_t *step)
+{
+    const VP9Context *s = td->s;
+    if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
+        s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
+        mc_luma_unscaled(td, mc, dst, dst_stride, ref, ref_stride, ref_frame,
+                         y, x, in_mv, bw, bh, w, h, bytesperpixel);
+    } else {
+#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
+    int mx, my;
+    int refbw_m1, refbh_m1;
+    int th;
+    VP9mv mv;
+
+    mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
+    mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
+    // BUG libvpx seems to scale the two components separately. This introduces
+    // rounding errors but we have to reproduce them to be exactly compatible
+    // with the output from libvpx...
+    mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
+    my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
+
+    y = my >> 4;
+    x = mx >> 4;
+    ref += y * ref_stride + x * bytesperpixel;
+    mx &= 15;
+    my &= 15;
+    refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
+    refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
+    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+    // we use +7 because the last 7 pixels of each sbrow can be changed in
+    // the longest loopfilter of the next sbrow
+    th = (y + refbh_m1 + 4 + 7) >> 6;
+    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    // The arm/aarch64 _hv filters read one more row than what actually is
+    // needed, so switch to emulated edge one pixel sooner vertically
+    // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
+    if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
+        s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
+                                 ref - 3 * ref_stride - 3 * bytesperpixel,
+                                 288, ref_stride,
+                                 refbw_m1 + 8, refbh_m1 + 8,
+                                 x - 3, y - 3, w, h);
+        ref = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
+        ref_stride = 288;
+    }
+    smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
+    }
+}
+
+static av_always_inline void mc_chroma_scaled(VP9TileData *td, vp9_scaled_mc_func smc,
+                                              const vp9_mc_func (*mc)[2],
+                                              uint8_t *dst_u, uint8_t *dst_v,
+                                              ptrdiff_t dst_stride,
+                                              const uint8_t *ref_u, ptrdiff_t src_stride_u,
+                                              const uint8_t *ref_v, ptrdiff_t src_stride_v,
+                                              const ThreadFrame *ref_frame,
+                                              ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv,
+                                              int px, int py, int pw, int ph,
+                                              int bw, int bh, int w, int h, int bytesperpixel,
+                                              const uint16_t *scale, const uint8_t *step)
+{
+    const VP9Context *s = td->s;
+    if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
+        s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
+        mc_chroma_unscaled(td, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
+                           ref_v, src_stride_v, ref_frame,
+                           y, x, in_mv, bw, bh, w, h, bytesperpixel);
+    } else {
+    int mx, my;
+    int refbw_m1, refbh_m1;
+    int th;
+    VP9mv mv;
+
+    if (s->ss_h) {
+        // BUG https://code.google.com/p/webm/issues/detail?id=820
+        mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 16, (s->cols * 4 - x + px + 3) * 16);
+        mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
+    } else {
+        mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
+        mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
+    }
+    if (s->ss_v) {
+        // BUG https://code.google.com/p/webm/issues/detail?id=820
+        mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 16, (s->rows * 4 - y + py + 3) * 16);
+        my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
+    } else {
+        mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
+        my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
+    }
+#undef scale_mv
+    y = my >> 4;
+    x = mx >> 4;
+    ref_u += y * src_stride_u + x * bytesperpixel;
+    ref_v += y * src_stride_v + x * bytesperpixel;
+    mx &= 15;
+    my &= 15;
+    refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
+    refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
+    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+    // we use +7 because the last 7 pixels of each sbrow can be changed in
+    // the longest loopfilter of the next sbrow
+    th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
+    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    // The arm/aarch64 _hv filters read one more row than what actually is
+    // needed, so switch to emulated edge one pixel sooner vertically
+    // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
+    if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
+        s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
+                                 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
+                                 288, src_stride_u,
+                                 refbw_m1 + 8, refbh_m1 + 8,
+                                 x - 3, y - 3, w, h);
+        ref_u = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
+        smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
+
+        s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
+                                 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
+                                 288, src_stride_v,
+                                 refbw_m1 + 8, refbh_m1 + 8,
+                                 x - 3, y - 3, w, h);
+        ref_v = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
+        smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
+    } else {
+        smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
+        smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
+    }
+    }
+}
+
+#define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
+                    px, py, pw, ph, bw, bh, w, h, i) \
+    mc_luma_scaled(td, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
+                   mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
+                   s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
+#define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+                      row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
+    mc_chroma_scaled(td, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+                     row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
+                     s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
+#define SCALED 1
+#define FN(x) x##_scaled_8bpp
+#define BYTES_PER_PIXEL 1
+#include "vp9_mc_template.c"
+#undef FN
+#undef BYTES_PER_PIXEL
+#define FN(x) x##_scaled_16bpp
+#define BYTES_PER_PIXEL 2
+#include "vp9_mc_template.c"
+#undef mc_luma_dir
+#undef mc_chroma_dir
+#undef FN
+#undef BYTES_PER_PIXEL
+#undef SCALED
+
+static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
+{
+    const VP9Context *s = td->s;
+    VP9Block *b = td->b;
+    int row = td->row, col = td->col;
+
+    if (s->mvscale[b->ref[0]][0] == REF_INVALID_SCALE ||
+        (b->comp && s->mvscale[b->ref[1]][0] == REF_INVALID_SCALE)) {
+        if (!s->td->error_info) {
+            s->td->error_info = AVERROR_INVALIDDATA;
+            av_log(NULL, AV_LOG_ERROR, "Bitstream not supported, "
+                                       "reference frame has invalid dimensions\n");
+        }
+        return;
+    }
+
+    if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
+        if (bytesperpixel == 1) {
+            inter_pred_scaled_8bpp(td);
+        } else {
+            inter_pred_scaled_16bpp(td);
+        }
+    } else {
+        if (bytesperpixel == 1) {
+            inter_pred_8bpp(td);
+        } else {
+            inter_pred_16bpp(td);
+        }
+    }
+
+    if (!b->skip) {
+        /* mostly copied intra_recon() */
+
+        int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
+        int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
+        int end_x = FFMIN(2 * (s->cols - col), w4);
+        int end_y = FFMIN(2 * (s->rows - row), h4);
+        int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
+        int uvstep1d = 1 << b->uvtx, p;
+        uint8_t *dst = td->dst[0];
+
+        // y itxfm add
+        for (n = 0, y = 0; y < end_y; y += step1d) {
+            uint8_t *ptr = dst;
+            for (x = 0; x < end_x; x += step1d,
+                 ptr += 4 * step1d * bytesperpixel, n += step) {
+                int eob = b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
+
+                if (eob)
+                    s->dsp.itxfm_add[tx][DCT_DCT](ptr, td->y_stride,
+                                                  td->block + 16 * n * bytesperpixel, eob);
+            }
+            dst += 4 * td->y_stride * step1d;
+        }
+
+        // uv itxfm add
+        end_x >>= s->ss_h;
+        end_y >>= s->ss_v;
+        step = 1 << (b->uvtx * 2);
+        for (p = 0; p < 2; p++) {
+            dst = td->dst[p + 1];
+            for (n = 0, y = 0; y < end_y; y += uvstep1d) {
+                uint8_t *ptr = dst;
+                for (x = 0; x < end_x; x += uvstep1d,
+                     ptr += 4 * uvstep1d * bytesperpixel, n += step) {
+                    int eob = b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
+
+                    if (eob)
+                        s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
+                                                        td->uvblock[p] + 16 * n * bytesperpixel, eob);
+                }
+                dst += 4 * uvstep1d * td->uv_stride;
+            }
+        }
+    }
+}
+
+void ff_vp9_inter_recon_8bpp(VP9TileData *td)
+{
+    inter_recon(td, 1);
+}
+
+void ff_vp9_inter_recon_16bpp(VP9TileData *td)
+{
+    inter_recon(td, 2);
+}
diff --git a/media/ffvpx/libavcodec/vp9shared.h b/media/ffvpx/libavcodec/vp9shared.h
new file mode 100644
index 0000000000..543a496df8
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9shared.h
@@ -0,0 +1,175 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP9SHARED_H
+#define AVCODEC_VP9SHARED_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/mem_internal.h"
+
+#include "vp9.h"
+#include "threadframe.h"
+
+enum BlockPartition {
+    PARTITION_NONE,    // [ ] <-.
+    PARTITION_H,       // [-]   |
+    PARTITION_V,       // [|]   |
+    PARTITION_SPLIT,   // [+] --'
+};
+
+enum InterPredMode {
+    NEARESTMV = 10,
+    NEARMV    = 11,
+    ZEROMV    = 12,
+    NEWMV     = 13,
+};
+
+enum CompPredMode {
+    PRED_SINGLEREF,
+    PRED_COMPREF,
+    PRED_SWITCHABLE,
+};
+
+typedef struct VP9mv {
+    DECLARE_ALIGNED(4, int16_t, x);
+    int16_t y;
+} VP9mv;
+
+typedef struct VP9mvrefPair {
+    VP9mv mv[2];
+    int8_t ref[2];
+} VP9mvrefPair;
+
+typedef struct VP9Frame {
+    ThreadFrame tf;
+    AVBufferRef *extradata;
+    uint8_t *segmentation_map;
+    VP9mvrefPair *mv;
+    int uses_2pass;
+
+    AVBufferRef *hwaccel_priv_buf;
+    void *hwaccel_picture_private;
+} VP9Frame;
+
+enum BlockLevel {
+    BL_64X64,
+    BL_32X32,
+    BL_16X16,
+    BL_8X8,
+};
+
+enum BlockSize {
+    BS_64x64,
+    BS_64x32,
+    BS_32x64,
+    BS_32x32,
+    BS_32x16,
+    BS_16x32,
+    BS_16x16,
+    BS_16x8,
+    BS_8x16,
+    BS_8x8,
+    BS_8x4,
+    BS_4x8,
+    BS_4x4,
+    N_BS_SIZES,
+};
+
+typedef struct VP9BitstreamHeader {
+    // bitstream header
+    uint8_t profile;
+    uint8_t bpp;
+    uint8_t keyframe;
+    uint8_t invisible;
+    uint8_t errorres;
+    uint8_t intraonly;
+    uint8_t resetctx;
+    uint8_t refreshrefmask;
+    uint8_t highprecisionmvs;
+    enum FilterMode filtermode;
+    uint8_t allowcompinter;
+    uint8_t refreshctx;
+    uint8_t parallelmode;
+    uint8_t framectxid;
+    uint8_t use_last_frame_mvs;
+    uint8_t refidx[3];
+    uint8_t signbias[3];
+    uint8_t fixcompref;
+    uint8_t varcompref[2];
+    struct {
+        uint8_t level;
+        int8_t sharpness;
+    } filter;
+    struct {
+        uint8_t enabled;
+        uint8_t updated;
+        int8_t mode[2];
+        int8_t ref[4];
+    } lf_delta;
+    uint8_t yac_qi;
+    int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
+    uint8_t lossless;
+#define MAX_SEGMENT 8
+    struct {
+        uint8_t enabled;
+        uint8_t temporal;
+        uint8_t absolute_vals;
+        uint8_t update_map;
+        uint8_t prob[7];
+        uint8_t pred_prob[3];
+        struct {
+            uint8_t q_enabled;
+            uint8_t lf_enabled;
+            uint8_t ref_enabled;
+            uint8_t skip_enabled;
+            uint8_t ref_val;
+            int16_t q_val;
+            int8_t lf_val;
+            int16_t qmul[2][2];
+            uint8_t lflvl[4][2];
+        } feat[MAX_SEGMENT];
+    } segmentation;
+    enum TxfmMode txfmmode;
+    enum CompPredMode comppredmode;
+    struct {
+        unsigned log2_tile_cols, log2_tile_rows;
+        unsigned tile_cols, tile_rows;
+    } tiling;
+
+    int uncompressed_header_size;
+    int compressed_header_size;
+} VP9BitstreamHeader;
+
+typedef struct VP9SharedContext {
+    VP9BitstreamHeader h;
+
+    ThreadFrame refs[8];
+#define CUR_FRAME 0
+#define REF_FRAME_MVPAIR 1
+#define REF_FRAME_SEGMAP 2
+    VP9Frame frames[3];
+} VP9SharedContext;
+
+#endif /* AVCODEC_VP9SHARED_H */
diff --git a/media/ffvpx/libavcodec/vpx_rac.c b/media/ffvpx/libavcodec/vpx_rac.c
new file mode 100644
index 0000000000..cf02e9a19c
--- /dev/null
+++ b/media/ffvpx/libavcodec/vpx_rac.c
@@ -0,0 +1,53 @@
+/*
+ * VP5/6/8 decoder
+ * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include "libavutil/error.h"
+#include "bytestream.h"
+#include "vpx_rac.h"
+
+const uint8_t ff_vpx_norm_shift[256]= {
+ 8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+};
+
+int ff_vpx_init_range_decoder(VPXRangeCoder *c, const uint8_t *buf, int buf_size)
+{
+    c->high = 255;
+    c->bits = -16;
+    c->buffer = buf;
+    c->end = buf + buf_size;
+    c->end_reached = 0;
+    if (buf_size < 1)
+        return AVERROR_INVALIDDATA;
+    c->code_word = bytestream_get_be24(&c->buffer);
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/vpx_rac.h b/media/ffvpx/libavcodec/vpx_rac.h
new file mode 100644
index 0000000000..b158cc0754
--- /dev/null
+++ b/media/ffvpx/libavcodec/vpx_rac.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Common VP5-VP9 range decoder stuff
+ */
+
+#ifndef AVCODEC_VPX_RAC_H
+#define AVCODEC_VPX_RAC_H
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "bytestream.h"
+
+typedef struct VPXRangeCoder {
+    int high;
+    int bits; /* stored negated (i.e. negative "bits" is a positive number of
+                 bits left) in order to eliminate a negate in cache refilling */
+    const uint8_t *buffer;
+    const uint8_t *end;
+    unsigned int code_word;
+    int end_reached;
+} VPXRangeCoder;
+
+extern const uint8_t ff_vpx_norm_shift[256];
+int ff_vpx_init_range_decoder(VPXRangeCoder *c, const uint8_t *buf, int buf_size);
+
+/**
+ * returns 1 if the end of the stream has been reached, 0 otherwise.
+ */
+static av_always_inline int vpx_rac_is_end(VPXRangeCoder *c)
+{
+    if (c->end <= c->buffer && c->bits >= 0)
+        c->end_reached ++;
+    return c->end_reached > 10;
+}
+
+static av_always_inline unsigned int vpx_rac_renorm(VPXRangeCoder *c)
+{
+    int shift = ff_vpx_norm_shift[c->high];
+    int bits = c->bits;
+    unsigned int code_word = c->code_word;
+
+    c->high   <<= shift;
+    code_word <<= shift;
+    bits       += shift;
+    if(bits >= 0 && c->buffer < c->end) {
+        code_word |= bytestream_get_be16(&c->buffer) << bits;
+        bits -= 16;
+    }
+    c->bits = bits;
+    return code_word;
+}
+
+#if   ARCH_ARM
+#include "arm/vpx_arith.h"
+#elif ARCH_X86
+#include "x86/vpx_arith.h"
+#endif
+
+#ifndef vpx_rac_get_prob
+#define vpx_rac_get_prob vpx_rac_get_prob
+static av_always_inline int vpx_rac_get_prob(VPXRangeCoder *c, uint8_t prob)
+{
+    unsigned int code_word = vpx_rac_renorm(c);
+    unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
+    unsigned int low_shift = low << 16;
+    int bit = code_word >= low_shift;
+
+    c->high = bit ? c->high - low : low;
+    c->code_word = bit ? code_word - low_shift : code_word;
+
+    return bit;
+}
+#endif
+
+#ifndef vpx_rac_get_prob_branchy
+// branchy variant, to be used where there's a branch based on the bit decoded
+static av_always_inline int vpx_rac_get_prob_branchy(VPXRangeCoder *c, int prob)
+{
+    unsigned long code_word = vpx_rac_renorm(c);
+    unsigned low = 1 + (((c->high - 1) * prob) >> 8);
+    unsigned low_shift = low << 16;
+
+    if (code_word >= low_shift) {
+        c->high     -= low;
+        c->code_word = code_word - low_shift;
+        return 1;
+    }
+
+    c->high = low;
+    c->code_word = code_word;
+    return 0;
+}
+#endif
+
+static av_always_inline int vpx_rac_get(VPXRangeCoder *c)
+{
+    unsigned int code_word = vpx_rac_renorm(c);
+    /* equiprobable */
+    int low = (c->high + 1) >> 1;
+    unsigned int low_shift = low << 16;
+    int bit = code_word >= low_shift;
+    if (bit) {
+        c->high   -= low;
+        code_word -= low_shift;
+    } else {
+        c->high = low;
+    }
+
+    c->code_word = code_word;
+    return bit;
+}
+
+#endif /* AVCODEC_VPX_RAC_H */
diff --git a/media/ffvpx/libavcodec/x86/constants.c b/media/ffvpx/libavcodec/x86/constants.c
new file mode 100644
index 0000000000..bc7f2b17b8
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/constants.c
@@ -0,0 +1,93 @@
+/*
+ * MMX/SSE/AVX constants used across x86 dsp optimizations.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mem_internal.h"
+#include "libavutil/x86/asm.h" // for xmm_reg
+#include "constants.h"
+
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1)    = { 0x0001000100010001ULL, 0x0001000100010001ULL,
+                                                    0x0001000100010001ULL, 0x0001000100010001ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2)    = { 0x0002000200020002ULL, 0x0002000200020002ULL,
+                                                    0x0002000200020002ULL, 0x0002000200020002ULL };
+DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_3)    = { 0x0003000300030003ULL, 0x0003000300030003ULL };
+DECLARE_ASM_ALIGNED(32, const ymm_reg,  ff_pw_4)    = { 0x0004000400040004ULL, 0x0004000400040004ULL,
+                                                    0x0004000400040004ULL, 0x0004000400040004ULL };
+DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_5)    = { 0x0005000500050005ULL, 0x0005000500050005ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8)    = { 0x0008000800080008ULL, 0x0008000800080008ULL };
+DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_9)    = { 0x0009000900090009ULL, 0x0009000900090009ULL };
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_15)   =   0x000F000F000F000FULL;
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_16)   = { 0x0010001000100010ULL, 0x0010001000100010ULL };
+DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_18)   = { 0x0012001200120012ULL, 0x0012001200120012ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_20)   = { 0x0014001400140014ULL, 0x0014001400140014ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_32)   = { 0x0020002000200020ULL, 0x0020002000200020ULL };
+DECLARE_ASM_ALIGNED(8,  const uint64_t, ff_pw_42)   =   0x002A002A002A002AULL;
+DECLARE_ASM_ALIGNED(8,  const uint64_t, ff_pw_53)   =   0x0035003500350035ULL;
+DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_64)   = { 0x0040004000400040ULL, 0x0040004000400040ULL };
+DECLARE_ASM_ALIGNED(8,  const uint64_t, ff_pw_96)   =   0x0060006000600060ULL;
+DECLARE_ASM_ALIGNED(8,  const uint64_t, ff_pw_128)  =   0x0080008000800080ULL;
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_255)  = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
+                                                    0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_256)  = { 0x0100010001000100ULL, 0x0100010001000100ULL,
+                                                    0x0100010001000100ULL, 0x0100010001000100ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_512)  = { 0x0200020002000200ULL, 0x0200020002000200ULL,
+                                                    0x0200020002000200ULL, 0x0200020002000200ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1023) = { 0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL,
+                                                    0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL};
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1024) = { 0x0400040004000400ULL, 0x0400040004000400ULL,
+                                                    0x0400040004000400ULL, 0x0400040004000400ULL};
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2048) = { 0x0800080008000800ULL, 0x0800080008000800ULL,
+                                                    0x0800080008000800ULL, 0x0800080008000800ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_4095) = { 0x0fff0fff0fff0fffULL, 0x0fff0fff0fff0fffULL,
+                                                    0x0fff0fff0fff0fffULL, 0x0fff0fff0fff0fffULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_4096) = { 0x1000100010001000ULL, 0x1000100010001000ULL,
+                                                    0x1000100010001000ULL, 0x1000100010001000ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_8192) = { 0x2000200020002000ULL, 0x2000200020002000ULL,
+                                                    0x2000200020002000ULL, 0x2000200020002000ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_m1)   = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
+                                                    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL };
+
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_0)    = { 0x0000000000000000ULL, 0x0000000000000000ULL,
+                                                    0x0000000000000000ULL, 0x0000000000000000ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_1)    = { 0x0101010101010101ULL, 0x0101010101010101ULL,
+                                                    0x0101010101010101ULL, 0x0101010101010101ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_2)    = { 0x0202020202020202ULL, 0x0202020202020202ULL,
+                                                    0x0202020202020202ULL, 0x0202020202020202ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_3)    = { 0x0303030303030303ULL, 0x0303030303030303ULL,
+                                                    0x0303030303030303ULL, 0x0303030303030303ULL };
+DECLARE_ALIGNED(32, const xmm_reg,  ff_pb_15)   = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_80)   = { 0x8080808080808080ULL, 0x8080808080808080ULL,
+                                                    0x8080808080808080ULL, 0x8080808080808080ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_FE)   = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL,
+                                                    0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL };
+DECLARE_ALIGNED(8,  const uint64_t, ff_pb_FC)   =   0xFCFCFCFCFCFCFCFCULL;
+
+DECLARE_ALIGNED(16, const xmm_reg,  ff_ps_neg)  = { 0x8000000080000000ULL, 0x8000000080000000ULL };
+
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_1)    = { 0x0000000100000001ULL, 0x0000000100000001ULL,
+                                                    0x0000000100000001ULL, 0x0000000100000001ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_16)   = { 0x0000001000000010ULL, 0x0000001000000010ULL,
+                                                    0x0000001000000010ULL, 0x0000001000000010ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_32)   = { 0x0000002000000020ULL, 0x0000002000000020ULL,
+                                                    0x0000002000000020ULL, 0x0000002000000020ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_8192) = { 0x0000200000002000ULL, 0x0000200000002000ULL,
+                                                    0x0000200000002000ULL, 0x0000200000002000ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_65535)= { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL,
+                                                    0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL };
diff --git a/media/ffvpx/libavcodec/x86/constants.h b/media/ffvpx/libavcodec/x86/constants.h
new file mode 100644
index 0000000000..85da38b7b9
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/constants.h
@@ -0,0 +1,72 @@
+/*
+ * MMX/SSE constants used across x86 dsp optimizations.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_CONSTANTS_H
+#define AVCODEC_X86_CONSTANTS_H
+
+#include <stdint.h>
+
+#include "libavutil/x86/asm.h"
+
+extern const ymm_reg  ff_pw_1;
+extern const ymm_reg  ff_pw_2;
+extern const xmm_reg  ff_pw_3;
+extern const ymm_reg  ff_pw_4;
+extern const xmm_reg  ff_pw_5;
+extern const xmm_reg  ff_pw_8;
+extern const xmm_reg  ff_pw_9;
+extern const uint64_t ff_pw_15;
+extern const xmm_reg  ff_pw_16;
+extern const xmm_reg  ff_pw_18;
+extern const xmm_reg  ff_pw_20;
+extern const xmm_reg  ff_pw_32;
+extern const uint64_t ff_pw_42;
+extern const uint64_t ff_pw_53;
+extern const xmm_reg  ff_pw_64;
+extern const uint64_t ff_pw_96;
+extern const uint64_t ff_pw_128;
+extern const ymm_reg  ff_pw_255;
+extern const ymm_reg  ff_pw_256;
+extern const ymm_reg  ff_pw_512;
+extern const ymm_reg  ff_pw_1023;
+extern const ymm_reg  ff_pw_1024;
+extern const ymm_reg  ff_pw_2048;
+extern const ymm_reg  ff_pw_4095;
+extern const ymm_reg  ff_pw_4096;
+extern const ymm_reg  ff_pw_8192;
+extern const ymm_reg  ff_pw_m1;
+
+extern const ymm_reg  ff_pb_0;
+extern const ymm_reg  ff_pb_1;
+extern const ymm_reg  ff_pb_2;
+extern const ymm_reg  ff_pb_3;
+extern const ymm_reg  ff_pb_80;
+extern const ymm_reg  ff_pb_FE;
+extern const uint64_t ff_pb_FC;
+
+extern const xmm_reg  ff_ps_neg;
+
+extern const ymm_reg  ff_pd_1;
+extern const ymm_reg  ff_pd_16;
+extern const ymm_reg  ff_pd_32;
+extern const ymm_reg  ff_pd_8192;
+extern const ymm_reg  ff_pd_65535;
+
+#endif /* AVCODEC_X86_CONSTANTS_H */
diff --git a/media/ffvpx/libavcodec/x86/dct32.asm b/media/ffvpx/libavcodec/x86/dct32.asm
new file mode 100644
index 0000000000..37fba51543
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/dct32.asm
@@ -0,0 +1,481 @@
+;******************************************************************************
+;* 32 point SSE-optimized DCT transform
+;* Copyright (c) 2010 Vitor Sessak
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
+
+ps_cos_vec: dd   0.500603,  0.505471,  0.515447,  0.531043
+            dd   0.553104,  0.582935,  0.622504,  0.674808
+            dd -10.190008, -3.407609, -2.057781, -1.484165
+            dd  -1.169440, -0.972568, -0.839350, -0.744536
+            dd   0.502419,  0.522499,  0.566944,  0.646822
+            dd   0.788155,  1.060678,  1.722447,  5.101149
+            dd   0.509796,  0.601345,  0.899976,  2.562916
+            dd   0.509796,  0.601345,  0.899976,  2.562916
+            dd   1.000000,  1.000000,  1.306563,  0.541196
+            dd   1.000000,  1.000000,  1.306563,  0.541196
+            dd   1.000000,  0.707107,  1.000000, -0.707107
+            dd   1.000000,  0.707107,  1.000000, -0.707107
+            dd   0.707107,  0.707107,  0.707107,  0.707107
+
+%macro BUTTERFLY 4
+    subps  %4, %1, %2
+    addps  %2, %2, %1
+    mulps  %1, %4, %3
+%endmacro
+
+%macro BUTTERFLY0 5
+%if cpuflag(sse2) && notcpuflag(avx)
+    pshufd %4, %1, %5
+    xorps  %1, %2
+    addps  %1, %4
+    mulps  %1, %3
+%else
+    shufps %4, %1, %1, %5
+    xorps  %1, %1, %2
+    addps  %4, %4, %1
+    mulps  %1, %4, %3
+%endif
+%endmacro
+
+%macro BUTTERFLY2 4
+    BUTTERFLY0 %1, %2, %3, %4, 0x1b
+%endmacro
+
+%macro BUTTERFLY3 4
+    BUTTERFLY0 %1, %2, %3, %4, 0xb1
+%endmacro
+
+%macro BUTTERFLY3V 5
+    movaps m%5, m%1
+    addps  m%1, m%2
+    subps  m%5, m%2
+    SWAP %2, %5
+    mulps  m%2, [ps_cos_vec+192]
+    movaps m%5, m%3
+    addps  m%3, m%4
+    subps  m%4, m%5
+    mulps  m%4, [ps_cos_vec+192]
+%endmacro
+
+%macro PASS6_AND_PERMUTE 0
+    mov         tmpd, [outq+4]
+    movss         m7, [outq+72]
+    addss         m7, [outq+76]
+    movss         m3, [outq+56]
+    addss         m3, [outq+60]
+    addss         m4, m3
+    movss         m2, [outq+52]
+    addss         m2, m3
+    movss         m3, [outq+104]
+    addss         m3, [outq+108]
+    addss         m1, m3
+    addss         m5, m4
+    movss [outq+ 16], m1
+    movss         m1, [outq+100]
+    addss         m1, m3
+    movss         m3, [outq+40]
+    movss [outq+ 48], m1
+    addss         m3, [outq+44]
+    movss         m1, [outq+100]
+    addss         m4, m3
+    addss         m3, m2
+    addss         m1, [outq+108]
+    movss [outq+ 40], m3
+    addss         m2, [outq+36]
+    movss         m3, [outq+8]
+    movss [outq+ 56], m2
+    addss         m3, [outq+12]
+    movss [outq+ 32], m3
+    movss         m3, [outq+80]
+    movss [outq+  8], m5
+    movss [outq+ 80], m1
+    movss         m2, [outq+52]
+    movss         m5, [outq+120]
+    addss         m5, [outq+124]
+    movss         m1, [outq+64]
+    addss         m2, [outq+60]
+    addss         m0, m5
+    addss         m5, [outq+116]
+    mov    [outq+64], tmpd
+    addss         m6, m0
+    addss         m1, m6
+    mov         tmpd, [outq+12]
+    mov   [outq+ 96], tmpd
+    movss [outq+  4], m1
+    movss         m1, [outq+24]
+    movss [outq+ 24], m4
+    movss         m4, [outq+88]
+    addss         m4, [outq+92]
+    addss         m3, m4
+    addss         m4, [outq+84]
+    mov         tmpd, [outq+108]
+    addss         m1, [outq+28]
+    addss         m0, m1
+    addss         m1, m5
+    addss         m6, m3
+    addss         m3, m0
+    addss         m0, m7
+    addss         m5, [outq+20]
+    addss         m7, m1
+    movss [outq+ 12], m6
+    mov   [outq+112], tmpd
+    movss         m6, [outq+28]
+    movss [outq+ 28], m0
+    movss         m0, [outq+36]
+    movss [outq+ 36], m7
+    addss         m1, m4
+    movss         m7, [outq+116]
+    addss         m0, m2
+    addss         m7, [outq+124]
+    movss [outq+ 72], m0
+    movss         m0, [outq+44]
+    addss         m2, m0
+    movss [outq+ 44], m1
+    movss [outq+ 88], m2
+    addss         m0, [outq+60]
+    mov         tmpd, [outq+60]
+    mov   [outq+120], tmpd
+    movss [outq+104], m0
+    addss         m4, m5
+    addss         m5, [outq+68]
+    movss  [outq+52], m4
+    movss  [outq+60], m5
+    movss         m4, [outq+68]
+    movss         m5, [outq+20]
+    movss [outq+ 20], m3
+    addss         m5, m7
+    addss         m7, m6
+    addss         m4, m5
+    movss         m2, [outq+84]
+    addss         m2, [outq+92]
+    addss         m5, m2
+    movss [outq+ 68], m4
+    addss         m2, m7
+    movss         m4, [outq+76]
+    movss [outq+ 84], m2
+    movss [outq+ 76], m5
+    addss         m7, m4
+    addss         m6, [outq+124]
+    addss         m4, m6
+    addss         m6, [outq+92]
+    movss [outq+100], m4
+    movss [outq+108], m6
+    movss         m6, [outq+92]
+    movss  [outq+92], m7
+    addss         m6, [outq+124]
+    movss [outq+116], m6
+%endmacro
+
+INIT_YMM avx
+SECTION .text
+%if HAVE_AVX_EXTERNAL
+; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
+cglobal dct32_float, 2,3,8, out, in, tmp
+    ; pass 1
+    vmovaps     m4, [inq+0]
+    vinsertf128 m5, m5, [inq+96], 1
+    vinsertf128 m5, m5, [inq+112], 0
+    vshufps     m5, m5, m5, 0x1b
+    BUTTERFLY   m4, m5, [ps_cos_vec], m6
+
+    vmovaps     m2, [inq+64]
+    vinsertf128 m6, m6, [inq+32], 1
+    vinsertf128 m6, m6, [inq+48], 0
+    vshufps     m6, m6, m6, 0x1b
+    BUTTERFLY   m2, m6, [ps_cos_vec+32], m0
+
+    ; pass 2
+
+    BUTTERFLY  m5, m6, [ps_cos_vec+64], m0
+    BUTTERFLY  m4, m2, [ps_cos_vec+64], m7
+
+
+    ; pass 3
+    vperm2f128  m3, m6, m4, 0x31
+    vperm2f128  m1, m6, m4, 0x20
+    vshufps     m3, m3, m3, 0x1b
+
+    BUTTERFLY   m1, m3, [ps_cos_vec+96], m6
+
+
+    vperm2f128  m4, m5, m2, 0x20
+    vperm2f128  m5, m5, m2, 0x31
+    vshufps     m5, m5, m5, 0x1b
+
+    BUTTERFLY   m4, m5, [ps_cos_vec+96], m6
+
+    ; pass 4
+    vmovaps m6, [ps_p1p1m1m1+0]
+    vmovaps m2, [ps_cos_vec+128]
+
+    BUTTERFLY2  m5, m6, m2, m7
+    BUTTERFLY2  m4, m6, m2, m7
+    BUTTERFLY2  m1, m6, m2, m7
+    BUTTERFLY2  m3, m6, m2, m7
+
+
+    ; pass 5
+    vshufps m6, m6, m6, 0xcc
+    vmovaps m2, [ps_cos_vec+160]
+
+    BUTTERFLY3  m5, m6, m2, m7
+    BUTTERFLY3  m4, m6, m2, m7
+    BUTTERFLY3  m1, m6, m2, m7
+    BUTTERFLY3  m3, m6, m2, m7
+
+    vperm2f128  m6, m3, m3, 0x31
+    vmovaps [outq], m3
+
+    vextractf128  [outq+64], m5, 1
+    vextractf128  [outq+32], m5, 0
+
+    vextractf128  [outq+80], m4, 1
+    vextractf128  [outq+48], m4, 0
+
+    vperm2f128  m0, m1, m1, 0x31
+    vmovaps [outq+96], m1
+
+    vzeroupper
+
+    ;    pass 6, no SIMD...
+INIT_XMM
+    PASS6_AND_PERMUTE
+    RET
+%endif
+
+%if ARCH_X86_64
+%define SPILL SWAP
+%define UNSPILL SWAP
+
+%macro PASS5 0
+    nop ; FIXME code alignment
+    SWAP 5, 8
+    SWAP 4, 12
+    SWAP 6, 14
+    SWAP 7, 13
+    SWAP 0, 15
+    PERMUTE 9,10, 10,12, 11,14, 12,9, 13,11, 14,13
+    TRANSPOSE4x4PS 8, 9, 10, 11, 0
+    BUTTERFLY3V    8, 9, 10, 11, 0
+    addps   m10, m11
+    TRANSPOSE4x4PS 12, 13, 14, 15, 0
+    BUTTERFLY3V    12, 13, 14, 15, 0
+    addps   m14, m15
+    addps   m12, m14
+    addps   m14, m13
+    addps   m13, m15
+%endmacro
+
+%macro PASS6 0
+    SWAP 9, 12
+    SWAP 11, 14
+    movss [outq+0x00], m8
+    pshuflw m0, m8, 0xe
+    movss [outq+0x10], m9
+    pshuflw m1, m9, 0xe
+    movss [outq+0x20], m10
+    pshuflw m2, m10, 0xe
+    movss [outq+0x30], m11
+    pshuflw m3, m11, 0xe
+    movss [outq+0x40], m12
+    pshuflw m4, m12, 0xe
+    movss [outq+0x50], m13
+    pshuflw m5, m13, 0xe
+    movss [outq+0x60], m14
+    pshuflw m6, m14, 0xe
+    movaps [outq+0x70], m15
+    pshuflw m7, m15, 0xe
+    addss   m0, m1
+    addss   m1, m2
+    movss [outq+0x08], m0
+    addss   m2, m3
+    movss [outq+0x18], m1
+    addss   m3, m4
+    movss [outq+0x28], m2
+    addss   m4, m5
+    movss [outq+0x38], m3
+    addss   m5, m6
+    movss [outq+0x48], m4
+    addss   m6, m7
+    movss [outq+0x58], m5
+    movss [outq+0x68], m6
+    movss [outq+0x78], m7
+
+    PERMUTE 1,8, 3,9, 5,10, 7,11, 9,12, 11,13, 13,14, 8,1, 10,3, 12,5, 14,7
+    movhlps m0, m1
+    pshufd  m1, m1, 3
+    SWAP 0, 2, 4, 6, 8, 10, 12, 14
+    SWAP 1, 3, 5, 7, 9, 11, 13, 15
+%rep 7
+    movhlps m0, m1
+    pshufd  m1, m1, 3
+    addss   m15, m1
+    SWAP 0, 2, 4, 6, 8, 10, 12, 14
+    SWAP 1, 3, 5, 7, 9, 11, 13, 15
+%endrep
+%assign i 4
+%rep 15
+    addss m0, m1
+    movss [outq+i], m0
+    SWAP 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+    %assign i i+8
+%endrep
+%endmacro
+
+%else ; ARCH_X86_32
+%macro SPILL 2 ; xmm#, mempos
+    movaps [outq+(%2-8)*16], m%1
+%endmacro
+%macro UNSPILL 2
+    movaps m%1, [outq+(%2-8)*16]
+%endmacro
+
+%define PASS6 PASS6_AND_PERMUTE
+%macro PASS5 0
+    movaps      m2, [ps_cos_vec+160]
+    shufps      m3, m3, 0xcc
+
+    BUTTERFLY3  m5, m3, m2, m1
+    SPILL 5, 8
+
+    UNSPILL 1, 9
+    BUTTERFLY3  m1, m3, m2, m5
+    SPILL 1, 14
+
+    BUTTERFLY3  m4, m3, m2, m5
+    SPILL 4, 12
+
+    BUTTERFLY3  m7, m3, m2, m5
+    SPILL 7, 13
+
+    UNSPILL 5, 10
+    BUTTERFLY3  m5, m3, m2, m7
+    SPILL 5, 10
+
+    UNSPILL 4, 11
+    BUTTERFLY3  m4, m3, m2, m7
+    SPILL 4, 11
+
+    BUTTERFLY3  m6, m3, m2, m7
+    SPILL 6, 9
+
+    BUTTERFLY3  m0, m3, m2, m7
+    SPILL 0, 15
+%endmacro
+%endif
+
+
+; void ff_dct32_float(FFTSample *out, const FFTSample *in)
+%macro DCT32_FUNC 0
+cglobal dct32_float, 2, 3, 16, out, in, tmp
+    ; pass 1
+
+    movaps      m0, [inq+0]
+    LOAD_INV    m1, [inq+112]
+    BUTTERFLY   m0, m1, [ps_cos_vec], m3
+
+    movaps      m7, [inq+64]
+    LOAD_INV    m4, [inq+48]
+    BUTTERFLY   m7, m4, [ps_cos_vec+32], m3
+
+    ; pass 2
+    movaps      m2, [ps_cos_vec+64]
+    BUTTERFLY   m1, m4, m2, m3
+    SPILL 1, 11
+    SPILL 4, 8
+
+    ; pass 1
+    movaps      m1, [inq+16]
+    LOAD_INV    m6, [inq+96]
+    BUTTERFLY   m1, m6, [ps_cos_vec+16], m3
+
+    movaps      m4, [inq+80]
+    LOAD_INV    m5, [inq+32]
+    BUTTERFLY   m4, m5, [ps_cos_vec+48], m3
+
+    ; pass 2
+    BUTTERFLY   m0, m7, m2, m3
+
+    movaps      m2, [ps_cos_vec+80]
+    BUTTERFLY   m6, m5, m2, m3
+
+    BUTTERFLY   m1, m4, m2, m3
+
+    ; pass 3
+    movaps      m2, [ps_cos_vec+96]
+    shufps      m1, m1, 0x1b
+    BUTTERFLY   m0, m1, m2, m3
+    SPILL 0, 15
+    SPILL 1, 14
+
+    UNSPILL 0, 8
+    shufps      m5, m5, 0x1b
+    BUTTERFLY   m0, m5, m2, m3
+
+    UNSPILL 1, 11
+    shufps      m6, m6, 0x1b
+    BUTTERFLY   m1, m6, m2, m3
+    SPILL 1, 11
+
+    shufps      m4, m4, 0x1b
+    BUTTERFLY   m7, m4, m2, m3
+
+    ; pass 4
+    movaps      m3, [ps_p1p1m1m1+0]
+    movaps      m2, [ps_cos_vec+128]
+
+    BUTTERFLY2  m5, m3, m2, m1
+
+    BUTTERFLY2  m0, m3, m2, m1
+    SPILL 0, 9
+
+    BUTTERFLY2  m6, m3, m2, m1
+    SPILL 6, 10
+
+    UNSPILL 0, 11
+    BUTTERFLY2  m0, m3, m2, m1
+    SPILL 0, 11
+
+    BUTTERFLY2  m4, m3, m2, m1
+
+    BUTTERFLY2  m7, m3, m2, m1
+
+    UNSPILL 6, 14
+    BUTTERFLY2  m6, m3, m2, m1
+
+    UNSPILL 0, 15
+    BUTTERFLY2  m0, m3, m2, m1
+
+    PASS5
+    PASS6
+    RET
+%endmacro
+
+%macro LOAD_INV 2
+    pshufd      %1, %2, 0x1b
+%endmacro
+
+INIT_XMM sse2
+DCT32_FUNC
diff --git a/media/ffvpx/libavcodec/x86/dct_init.c b/media/ffvpx/libavcodec/x86/dct_init.c
new file mode 100644
index 0000000000..d0e4b34dd3
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/dct_init.c
@@ -0,0 +1,36 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/dct.h"
+
+void ff_dct32_float_sse2(FFTSample *out, const FFTSample *in);
+void ff_dct32_float_avx(FFTSample *out, const FFTSample *in);
+
+av_cold void ff_dct_init_x86(DCTContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE2(cpu_flags))
+        s->dct32 = ff_dct32_float_sse2;
+    if (EXTERNAL_AVX_FAST(cpu_flags))
+        s->dct32 = ff_dct32_float_avx;
+}
diff --git a/media/ffvpx/libavcodec/x86/fdct.c b/media/ffvpx/libavcodec/x86/fdct.c
new file mode 100644
index 0000000000..f4677ff4be
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/fdct.c
@@ -0,0 +1,378 @@
+/*
+ * SIMD-optimized forward DCT
+ * The gcc porting is Copyright (c) 2001 Fabrice Bellard.
+ * cleanup/optimizations are Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * SSE2 optimization is Copyright (c) 2004 Denes Balatoni.
+ *
+ * from  fdctam32.c - AP922 MMX(3D-Now) forward-DCT
+ *
+ *  Intel Application Note AP-922 - fast, precise implementation of DCT
+ *        http://developer.intel.com/vtune/cbts/appnotes.htm
+ *
+ * Also of inspiration:
+ * a page about fdct at http://www.geocities.com/ssavekar/dct.htm
+ * Skal's fdct at http://skal.planet-d.net/coding/dct.html
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/macros.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/x86/asm.h"
+#include "fdct.h"
+
+#if HAVE_SSE2_INLINE
+
+//////////////////////////////////////////////////////////////////////
+//
+// constants for the forward DCT
+// -----------------------------
+//
+// Be sure to check that your compiler is aligning all constants to QWORD
+// (8-byte) memory boundaries!  Otherwise the unaligned memory access will
+// severely stall MMX execution.
+//
+//////////////////////////////////////////////////////////////////////
+
+#define BITS_FRW_ACC   3 //; 2 or 3 for accuracy
+#define SHIFT_FRW_COL  BITS_FRW_ACC
+#define SHIFT_FRW_ROW  (BITS_FRW_ACC + 17 - 3)
+#define RND_FRW_ROW    (1 << (SHIFT_FRW_ROW-1))
+//#define RND_FRW_COL    (1 << (SHIFT_FRW_COL-1))
+
+#define X8(x) x,x,x,x,x,x,x,x
+
+//concatenated table, for forward DCT transformation
+DECLARE_ALIGNED(16, static const int16_t, fdct_tg_all_16)[24] = {
+    X8(13036),  // tg * (2<<16) + 0.5
+    X8(27146),  // tg * (2<<16) + 0.5
+    X8(-21746)  // tg * (2<<16) + 0.5
+};
+
+DECLARE_ALIGNED(16, static const int16_t, ocos_4_16)[8] = {
+    X8(23170)   //cos * (2<<15) + 0.5
+};
+
+DECLARE_ALIGNED(16, static const int16_t, fdct_one_corr)[8] = { X8(1) };
+
+static const struct
+{
+ DECLARE_ALIGNED(16, const int32_t, fdct_r_row_sse2)[4];
+} fdct_r_row_sse2 =
+{{
+ RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW
+}};
+//DECLARE_ALIGNED(16, static const long, fdct_r_row_sse2)[4] = {RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW};
+
+static const struct
+{
+ DECLARE_ALIGNED(16, const int16_t, tab_frw_01234567_sse2)[256];
+} tab_frw_01234567_sse2 =
+{{
+//DECLARE_ALIGNED(16, static const int16_t, tab_frw_01234567_sse2)[] = {  // forward_dct coeff table
+#define TABLE_SSE2 C4,  C4,  C1,  C3, -C6, -C2, -C1, -C5, \
+                   C4,  C4,  C5,  C7,  C2,  C6,  C3, -C7, \
+                  -C4,  C4,  C7,  C3,  C6, -C2,  C7, -C5, \
+                   C4, -C4,  C5, -C1,  C2, -C6,  C3, -C1,
+// c1..c7 * cos(pi/4) * 2^15
+#define C1 22725
+#define C2 21407
+#define C3 19266
+#define C4 16384
+#define C5 12873
+#define C6 8867
+#define C7 4520
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 31521
+#define C2 29692
+#define C3 26722
+#define C4 22725
+#define C5 17855
+#define C6 12299
+#define C7 6270
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 29692
+#define C2 27969
+#define C3 25172
+#define C4 21407
+#define C5 16819
+#define C6 11585
+#define C7 5906
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 26722
+#define C2 25172
+#define C3 22654
+#define C4 19266
+#define C5 15137
+#define C6 10426
+#define C7 5315
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 22725
+#define C2 21407
+#define C3 19266
+#define C4 16384
+#define C5 12873
+#define C6 8867
+#define C7 4520
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 26722
+#define C2 25172
+#define C3 22654
+#define C4 19266
+#define C5 15137
+#define C6 10426
+#define C7 5315
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 29692
+#define C2 27969
+#define C3 25172
+#define C4 21407
+#define C5 16819
+#define C6 11585
+#define C7 5906
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 31521
+#define C2 29692
+#define C3 26722
+#define C4 22725
+#define C5 17855
+#define C6 12299
+#define C7 6270
+TABLE_SSE2
+}};
+
+#define S(s) AV_TOSTRING(s) //AV_STRINGIFY is too long
+
+#define FDCT_COL(cpu, mm, mov)\
+static av_always_inline void fdct_col_##cpu(const int16_t *in, int16_t *out, int offset)\
+{\
+    __asm__ volatile (\
+        #mov"      16(%0),  %%"#mm"0 \n\t" \
+        #mov"      96(%0),  %%"#mm"1 \n\t" \
+        #mov"    %%"#mm"0,  %%"#mm"2 \n\t" \
+        #mov"      32(%0),  %%"#mm"3 \n\t" \
+        "paddsw  %%"#mm"1,  %%"#mm"0 \n\t" \
+        #mov"      80(%0),  %%"#mm"4 \n\t" \
+        "psllw  $"S(SHIFT_FRW_COL)", %%"#mm"0 \n\t" \
+        #mov"        (%0),  %%"#mm"5 \n\t" \
+        "paddsw  %%"#mm"3,  %%"#mm"4 \n\t" \
+        "paddsw   112(%0),  %%"#mm"5 \n\t" \
+        "psllw  $"S(SHIFT_FRW_COL)", %%"#mm"4 \n\t" \
+        #mov"    %%"#mm"0,  %%"#mm"6 \n\t" \
+        "psubsw  %%"#mm"1,  %%"#mm"2 \n\t" \
+        #mov"      16(%1),  %%"#mm"1 \n\t" \
+        "psubsw  %%"#mm"4,  %%"#mm"0 \n\t" \
+        #mov"      48(%0),  %%"#mm"7 \n\t" \
+        "pmulhw  %%"#mm"0,  %%"#mm"1 \n\t" \
+        "paddsw    64(%0),  %%"#mm"7 \n\t" \
+        "psllw  $"S(SHIFT_FRW_COL)", %%"#mm"5 \n\t" \
+        "paddsw  %%"#mm"4,  %%"#mm"6 \n\t" \
+        "psllw  $"S(SHIFT_FRW_COL)", %%"#mm"7 \n\t" \
+        #mov"    %%"#mm"5,  %%"#mm"4 \n\t" \
+        "psubsw  %%"#mm"7,  %%"#mm"5 \n\t" \
+        "paddsw  %%"#mm"5,  %%"#mm"1 \n\t" \
+        "paddsw  %%"#mm"7,  %%"#mm"4 \n\t" \
+        "por         (%2),  %%"#mm"1 \n\t" \
+        "psllw  $"S(SHIFT_FRW_COL)"+1, %%"#mm"2 \n\t" \
+        "pmulhw    16(%1),  %%"#mm"5 \n\t" \
+        #mov"    %%"#mm"4,  %%"#mm"7 \n\t" \
+        "psubsw    80(%0),  %%"#mm"3 \n\t" \
+        "psubsw  %%"#mm"6,  %%"#mm"4 \n\t" \
+        #mov"    %%"#mm"1,    32(%3) \n\t" \
+        "paddsw  %%"#mm"6,  %%"#mm"7 \n\t" \
+        #mov"      48(%0),  %%"#mm"1 \n\t" \
+        "psllw  $"S(SHIFT_FRW_COL)"+1, %%"#mm"3 \n\t" \
+        "psubsw    64(%0),  %%"#mm"1 \n\t" \
+        #mov"    %%"#mm"2,  %%"#mm"6 \n\t" \
+        #mov"    %%"#mm"4,    64(%3) \n\t" \
+        "paddsw  %%"#mm"3,  %%"#mm"2 \n\t" \
+        "pmulhw      (%4),  %%"#mm"2 \n\t" \
+        "psubsw  %%"#mm"3,  %%"#mm"6 \n\t" \
+        "pmulhw      (%4),  %%"#mm"6 \n\t" \
+        "psubsw  %%"#mm"0,  %%"#mm"5 \n\t" \
+        "por         (%2),  %%"#mm"5 \n\t" \
+        "psllw  $"S(SHIFT_FRW_COL)", %%"#mm"1 \n\t" \
+        "por         (%2),  %%"#mm"2 \n\t" \
+        #mov"    %%"#mm"1,  %%"#mm"4 \n\t" \
+        #mov"        (%0),  %%"#mm"3 \n\t" \
+        "paddsw  %%"#mm"6,  %%"#mm"1 \n\t" \
+        "psubsw   112(%0),  %%"#mm"3 \n\t" \
+        "psubsw  %%"#mm"6,  %%"#mm"4 \n\t" \
+        #mov"        (%1),  %%"#mm"0 \n\t" \
+        "psllw  $"S(SHIFT_FRW_COL)", %%"#mm"3 \n\t" \
+        #mov"      32(%1),  %%"#mm"6 \n\t" \
+        "pmulhw  %%"#mm"1,  %%"#mm"0 \n\t" \
+        #mov"    %%"#mm"7,      (%3) \n\t" \
+        "pmulhw  %%"#mm"4,  %%"#mm"6 \n\t" \
+        #mov"    %%"#mm"5,    96(%3) \n\t" \
+        #mov"    %%"#mm"3,  %%"#mm"7 \n\t" \
+        #mov"      32(%1),  %%"#mm"5 \n\t" \
+        "psubsw  %%"#mm"2,  %%"#mm"7 \n\t" \
+        "paddsw  %%"#mm"2,  %%"#mm"3 \n\t" \
+        "pmulhw  %%"#mm"7,  %%"#mm"5 \n\t" \
+        "paddsw  %%"#mm"3,  %%"#mm"0 \n\t" \
+        "paddsw  %%"#mm"4,  %%"#mm"6 \n\t" \
+        "pmulhw      (%1),  %%"#mm"3 \n\t" \
+        "por         (%2),  %%"#mm"0 \n\t" \
+        "paddsw  %%"#mm"7,  %%"#mm"5 \n\t" \
+        "psubsw  %%"#mm"6,  %%"#mm"7 \n\t" \
+        #mov"    %%"#mm"0,    16(%3) \n\t" \
+        "paddsw  %%"#mm"4,  %%"#mm"5 \n\t" \
+        #mov"    %%"#mm"7,    48(%3) \n\t" \
+        "psubsw  %%"#mm"1,  %%"#mm"3 \n\t" \
+        #mov"    %%"#mm"5,    80(%3) \n\t" \
+        #mov"    %%"#mm"3,   112(%3) \n\t" \
+        : \
+        : "r" (in  + offset), "r" (fdct_tg_all_16), "r" (fdct_one_corr), \
+          "r" (out + offset), "r" (ocos_4_16)); \
+}
+
+FDCT_COL(sse2, xmm, movdqa)
+
+static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
+{
+    __asm__ volatile(
+#define FDCT_ROW_SSE2_H1(i,t)                    \
+        "movq      " #i "(%0), %%xmm2      \n\t" \
+        "movq      " #i "+8(%0), %%xmm0    \n\t" \
+        "movdqa    " #t "+32(%1), %%xmm3   \n\t" \
+        "movdqa    " #t "+48(%1), %%xmm7   \n\t" \
+        "movdqa    " #t "(%1), %%xmm4      \n\t" \
+        "movdqa    " #t "+16(%1), %%xmm5   \n\t"
+
+#define FDCT_ROW_SSE2_H2(i,t)                    \
+        "movq      " #i "(%0), %%xmm2      \n\t" \
+        "movq      " #i "+8(%0), %%xmm0    \n\t" \
+        "movdqa    " #t "+32(%1), %%xmm3   \n\t" \
+        "movdqa    " #t "+48(%1), %%xmm7   \n\t"
+
+#define FDCT_ROW_SSE2(i)                      \
+        "movq      %%xmm2, %%xmm1       \n\t" \
+        "pshuflw   $27, %%xmm0, %%xmm0  \n\t" \
+        "paddsw    %%xmm0, %%xmm1       \n\t" \
+        "psubsw    %%xmm0, %%xmm2       \n\t" \
+        "punpckldq %%xmm2, %%xmm1       \n\t" \
+        "pshufd    $78, %%xmm1, %%xmm2  \n\t" \
+        "pmaddwd   %%xmm2, %%xmm3       \n\t" \
+        "pmaddwd   %%xmm1, %%xmm7       \n\t" \
+        "pmaddwd   %%xmm5, %%xmm2       \n\t" \
+        "pmaddwd   %%xmm4, %%xmm1       \n\t" \
+        "paddd     %%xmm7, %%xmm3       \n\t" \
+        "paddd     %%xmm2, %%xmm1       \n\t" \
+        "paddd     %%xmm6, %%xmm3       \n\t" \
+        "paddd     %%xmm6, %%xmm1       \n\t" \
+        "psrad     %3, %%xmm3           \n\t" \
+        "psrad     %3, %%xmm1           \n\t" \
+        "packssdw  %%xmm3, %%xmm1       \n\t" \
+        "movdqa    %%xmm1, " #i "(%4)   \n\t"
+
+        "movdqa    (%2), %%xmm6         \n\t"
+        FDCT_ROW_SSE2_H1(0,0)
+        FDCT_ROW_SSE2(0)
+        FDCT_ROW_SSE2_H2(64,0)
+        FDCT_ROW_SSE2(64)
+
+        FDCT_ROW_SSE2_H1(16,64)
+        FDCT_ROW_SSE2(16)
+        FDCT_ROW_SSE2_H2(112,64)
+        FDCT_ROW_SSE2(112)
+
+        FDCT_ROW_SSE2_H1(32,128)
+        FDCT_ROW_SSE2(32)
+        FDCT_ROW_SSE2_H2(96,128)
+        FDCT_ROW_SSE2(96)
+
+        FDCT_ROW_SSE2_H1(48,192)
+        FDCT_ROW_SSE2(48)
+        FDCT_ROW_SSE2_H2(80,192)
+        FDCT_ROW_SSE2(80)
+        :
+        : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2),
+          "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
+          XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
+                            "%xmm4", "%xmm5", "%xmm6", "%xmm7")
+    );
+}
+
+void ff_fdct_sse2(int16_t *block)
+{
+    DECLARE_ALIGNED(16, int64_t, align_tmp)[16];
+    int16_t * const block1= (int16_t*)align_tmp;
+
+    fdct_col_sse2(block, block1, 0);
+    fdct_row_sse2(block1, block);
+}
+
+#endif /* HAVE_SSE2_INLINE */
diff --git a/media/ffvpx/libavcodec/x86/fdct.h b/media/ffvpx/libavcodec/x86/fdct.h
new file mode 100644
index 0000000000..164d4fb30e
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/fdct.h
@@ -0,0 +1,26 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_FDCT_H
+#define AVCODEC_X86_FDCT_H
+
+#include <stdint.h>
+
+void ff_fdct_sse2(int16_t *block);
+
+#endif /* AVCODEC_X86_FDCT_H */
diff --git a/media/ffvpx/libavcodec/x86/fdctdsp_init.c b/media/ffvpx/libavcodec/x86/fdctdsp_init.c
new file mode 100644
index 0000000000..92a842433d
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/fdctdsp_init.c
@@ -0,0 +1,38 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/fdctdsp.h"
+#include "fdct.h"
+
+av_cold void ff_fdctdsp_init_x86(FDCTDSPContext *c, AVCodecContext *avctx,
+                                 unsigned high_bit_depth)
+{
+    int cpu_flags = av_get_cpu_flags();
+    const int dct_algo = avctx->dct_algo;
+
+    if (!high_bit_depth) {
+        if ((dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) {
+            if (INLINE_SSE2(cpu_flags))
+                c->fdct = ff_fdct_sse2;
+        }
+    }
+}
diff --git a/media/ffvpx/libavcodec/x86/fft.asm b/media/ffvpx/libavcodec/x86/fft.asm
new file mode 100644
index 0000000000..34c3fc9a0f
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/fft.asm
@@ -0,0 +1,838 @@
+;******************************************************************************
+;* FFT transform with SSE/AVX optimizations
+;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2011 Vitor Sessak
+;*
+;* This algorithm (though not any of the implementation details) is
+;* based on libdjbfft by D. J. Bernstein.
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+; These functions are not individually interchangeable with the C versions.
+; While C takes arrays of FFTComplex, SSE/3DNow leave intermediate results
+; in blocks as conventient to the vector size.
+; i.e. {4x real, 4x imaginary, 4x real, ...} (or 2x respectively)
+
+%include "libavutil/x86/x86util.asm"
+
+%if ARCH_X86_64
+%define pointer resq
+%else
+%define pointer resd
+%endif
+
+struc FFTContext
+    .nbits:    resd 1
+    .reverse:  resd 1
+    .revtab:   pointer 1
+    .tmpbuf:   pointer 1
+    .mdctsize: resd 1
+    .mdctbits: resd 1
+    .tcos:     pointer 1
+    .tsin:     pointer 1
+    .fftperm:  pointer 1
+    .fftcalc:  pointer 1
+    .imdctcalc:pointer 1
+    .imdcthalf:pointer 1
+endstruc
+
+SECTION_RODATA 32
+
+%define M_SQRT1_2 0.70710678118654752440
+%define M_COS_PI_1_8 0.923879532511287
+%define M_COS_PI_3_8 0.38268343236509
+
+ps_cos16_1: dd 1.0, M_COS_PI_1_8, M_SQRT1_2, M_COS_PI_3_8, 1.0, M_COS_PI_1_8, M_SQRT1_2, M_COS_PI_3_8
+ps_cos16_2: dd 0, M_COS_PI_3_8, M_SQRT1_2, M_COS_PI_1_8, 0, -M_COS_PI_3_8, -M_SQRT1_2, -M_COS_PI_1_8
+
+ps_root2: times 8 dd M_SQRT1_2
+ps_root2mppm: dd -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
+ps_p1p1m1p1: dd 0, 0, 1<<31, 0, 0, 0, 1<<31, 0
+
+perm1: dd 0x00, 0x02, 0x03, 0x01, 0x03, 0x00, 0x02, 0x01
+perm2: dd 0x00, 0x01, 0x02, 0x03, 0x01, 0x00, 0x02, 0x03
+ps_p1p1m1p1root2: dd 1.0, 1.0, -1.0, 1.0, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2
+ps_m1m1p1m1p1m1m1m1: dd 1<<31, 1<<31, 0, 1<<31, 0, 1<<31, 1<<31, 1<<31
+ps_m1p1: dd 1<<31, 0
+
+cextern ps_neg
+
+%assign i 16
+%rep 14
+cextern cos_ %+ i
+%assign i i<<1
+%endrep
+
+%if ARCH_X86_64
+    %define pointer dq
+%else
+    %define pointer dd
+%endif
+
+%macro IF0 1+
+%endmacro
+%macro IF1 1+
+    %1
+%endmacro
+
+SECTION .text
+
+;  in: %1 = {r0,i0,r2,i2,r4,i4,r6,i6}
+;      %2 = {r1,i1,r3,i3,r5,i5,r7,i7}
+;      %3, %4, %5 tmp
+; out: %1 = {r0,r1,r2,r3,i0,i1,i2,i3}
+;      %2 = {r4,r5,r6,r7,i4,i5,i6,i7}
+%macro T8_AVX 5
+    vsubps     %5, %1, %2       ; v  = %1 - %2
+    vaddps     %3, %1, %2       ; w  = %1 + %2
+    vmulps     %2, %5, [ps_p1p1m1p1root2]  ; v *= vals1
+    vpermilps  %2, %2, [perm1]
+    vblendps   %1, %2, %3, 0x33 ; q = {w1,w2,v4,v2,w5,w6,v7,v6}
+    vshufps    %5, %3, %2, 0x4e ; r = {w3,w4,v1,v3,w7,w8,v8,v5}
+    vsubps     %4, %5, %1       ; s = r - q
+    vaddps     %1, %5, %1       ; u = r + q
+    vpermilps  %1, %1, [perm2]  ; k  = {u1,u2,u3,u4,u6,u5,u7,u8}
+    vshufps    %5, %4, %1, 0xbb
+    vshufps    %3, %4, %1, 0xee
+    vperm2f128 %3, %3, %5, 0x13
+    vxorps     %4, %4, [ps_m1m1p1m1p1m1m1m1]  ; s *= {1,1,-1,-1,1,-1,-1,-1}
+    vshufps    %2, %1, %4, 0xdd
+    vshufps    %1, %1, %4, 0x88
+    vperm2f128 %4, %2, %1, 0x02 ; v  = {k1,k3,s1,s3,k2,k4,s2,s4}
+    vperm2f128 %1, %1, %2, 0x13 ; w  = {k6,k8,s6,s8,k5,k7,s5,s7}
+    vsubps     %5, %1, %3
+    vblendps   %1, %5, %1, 0x55 ; w -= {0,s7,0,k7,0,s8,0,k8}
+    vsubps     %2, %4, %1       ; %2 = v - w
+    vaddps     %1, %4, %1       ; %1 = v + w
+%endmacro
+
+; In SSE mode do one fft4 transforms
+; in:  %1={r0,i0,r2,i2} %2={r1,i1,r3,i3}
+; out: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3}
+;
+; In AVX mode do two fft4 transforms
+; in:  %1={r0,i0,r2,i2,r4,i4,r6,i6} %2={r1,i1,r3,i3,r5,i5,r7,i7}
+; out: %1={r0,r1,r2,r3,r4,r5,r6,r7} %2={i0,i1,i2,i3,i4,i5,i6,i7}
+%macro T4_SSE 3
+    subps    %3, %1, %2       ; {t3,t4,-t8,t7}
+    addps    %1, %1, %2       ; {t1,t2,t6,t5}
+    xorps    %3, %3, [ps_p1p1m1p1]
+    shufps   %2, %1, %3, 0xbe ; {t6,t5,t7,t8}
+    shufps   %1, %1, %3, 0x44 ; {t1,t2,t3,t4}
+    subps    %3, %1, %2       ; {r2,i2,r3,i3}
+    addps    %1, %1, %2       ; {r0,i0,r1,i1}
+    shufps   %2, %1, %3, 0xdd ; {i0,i1,i2,i3}
+    shufps   %1, %1, %3, 0x88 ; {r0,r1,r2,r3}
+%endmacro
+
+; In SSE mode do one FFT8
+; in:  %1={r0,r1,r2,r3} %2={i0,i1,i2,i3} %3={r4,i4,r6,i6} %4={r5,i5,r7,i7}
+; out: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3} %1={r4,r5,r6,r7} %2={i4,i5,i6,i7}
+;
+; In AVX mode do two FFT8
+; in:  %1={r0,i0,r2,i2,r8, i8, r10,i10} %2={r1,i1,r3,i3,r9, i9, r11,i11}
+;      %3={r4,i4,r6,i6,r12,i12,r14,i14} %4={r5,i5,r7,i7,r13,i13,r15,i15}
+; out: %1={r0,r1,r2,r3,r8, r9, r10,r11} %2={i0,i1,i2,i3,i8, i9, i10,i11}
+;      %3={r4,r5,r6,r7,r12,r13,r14,r15} %4={i4,i5,i6,i7,i12,i13,i14,i15}
+%macro T8_SSE 6
+    addps    %6, %3, %4       ; {t1,t2,t3,t4}
+    subps    %3, %3, %4       ; {r5,i5,r7,i7}
+    shufps   %4, %3, %3, 0xb1 ; {i5,r5,i7,r7}
+    mulps    %3, %3, [ps_root2mppm] ; {-r5,i5,r7,-i7}
+    mulps    %4, %4, [ps_root2]
+    addps    %3, %3, %4       ; {t8,t7,ta,t9}
+    shufps   %4, %6, %3, 0x9c ; {t1,t4,t7,ta}
+    shufps   %6, %6, %3, 0x36 ; {t3,t2,t9,t8}
+    subps    %3, %6, %4       ; {t6,t5,tc,tb}
+    addps    %6, %6, %4       ; {t1,t2,t9,ta}
+    shufps   %5, %6, %3, 0x8d ; {t2,ta,t6,tc}
+    shufps   %6, %6, %3, 0xd8 ; {t1,t9,t5,tb}
+    subps    %3, %1, %6       ; {r4,r5,r6,r7}
+    addps    %1, %1, %6       ; {r0,r1,r2,r3}
+    subps    %4, %2, %5       ; {i4,i5,i6,i7}
+    addps    %2, %2, %5       ; {i0,i1,i2,i3}
+%endmacro
+
+%macro INTERL 5
+%if cpuflag(avx)
+    vunpckhps      %3, %2, %1
+    vunpcklps      %2, %2, %1
+    vextractf128   %4(%5), %2, 0
+    vextractf128  %4 %+ H(%5), %3, 0
+    vextractf128   %4(%5 + 1), %2, 1
+    vextractf128  %4 %+ H(%5 + 1), %3, 1
+%elif cpuflag(sse)
+    mova     %3, %2
+    unpcklps %2, %1
+    unpckhps %3, %1
+    mova  %4(%5), %2
+    mova  %4(%5+1), %3
+%endif
+%endmacro
+
+; scheduled for cpu-bound sizes
+%macro PASS_SMALL 3 ; (to load m4-m7), wre, wim
+IF%1 mova    m4, Z(4)
+IF%1 mova    m5, Z(5)
+    mova     m0, %2 ; wre
+    mova     m1, %3 ; wim
+    mulps    m2, m4, m0 ; r2*wre
+IF%1 mova    m6, Z2(6)
+    mulps    m3, m5, m1 ; i2*wim
+IF%1 mova    m7, Z2(7)
+    mulps    m4, m4, m1 ; r2*wim
+    mulps    m5, m5, m0 ; i2*wre
+    addps    m2, m2, m3 ; r2*wre + i2*wim
+    mulps    m3, m1, m7 ; i3*wim
+    subps    m5, m5, m4 ; i2*wre - r2*wim
+    mulps    m1, m1, m6 ; r3*wim
+    mulps    m4, m0, m6 ; r3*wre
+    mulps    m0, m0, m7 ; i3*wre
+    subps    m4, m4, m3 ; r3*wre - i3*wim
+    mova     m3, Z(0)
+    addps    m0, m0, m1 ; i3*wre + r3*wim
+    subps    m1, m4, m2 ; t3
+    addps    m4, m4, m2 ; t5
+    subps    m3, m3, m4 ; r2
+    addps    m4, m4, Z(0) ; r0
+    mova     m6, Z(2)
+    mova   Z(4), m3
+    mova   Z(0), m4
+    subps    m3, m5, m0 ; t4
+    subps    m4, m6, m3 ; r3
+    addps    m3, m3, m6 ; r1
+    mova  Z2(6), m4
+    mova   Z(2), m3
+    mova     m2, Z(3)
+    addps    m3, m5, m0 ; t6
+    subps    m2, m2, m1 ; i3
+    mova     m7, Z(1)
+    addps    m1, m1, Z(3) ; i1
+    mova  Z2(7), m2
+    mova   Z(3), m1
+    subps    m4, m7, m3 ; i2
+    addps    m3, m3, m7 ; i0
+    mova   Z(5), m4
+    mova   Z(1), m3
+%endmacro
+
+; scheduled to avoid store->load aliasing
+%macro PASS_BIG 1 ; (!interleave)
+    mova     m4, Z(4) ; r2
+    mova     m5, Z(5) ; i2
+    mova     m0, [wq] ; wre
+    mova     m1, [wq+o1q] ; wim
+    mulps    m2, m4, m0 ; r2*wre
+    mova     m6, Z2(6) ; r3
+    mulps    m3, m5, m1 ; i2*wim
+    mova     m7, Z2(7) ; i3
+    mulps    m4, m4, m1 ; r2*wim
+    mulps    m5, m5, m0 ; i2*wre
+    addps    m2, m2, m3 ; r2*wre + i2*wim
+    mulps    m3, m1, m7 ; i3*wim
+    mulps    m1, m1, m6 ; r3*wim
+    subps    m5, m5, m4 ; i2*wre - r2*wim
+    mulps    m4, m0, m6 ; r3*wre
+    mulps    m0, m0, m7 ; i3*wre
+    subps    m4, m4, m3 ; r3*wre - i3*wim
+    mova     m3, Z(0)
+    addps    m0, m0, m1 ; i3*wre + r3*wim
+    subps    m1, m4, m2 ; t3
+    addps    m4, m4, m2 ; t5
+    subps    m3, m3, m4 ; r2
+    addps    m4, m4, Z(0) ; r0
+    mova     m6, Z(2)
+    mova   Z(4), m3
+    mova   Z(0), m4
+    subps    m3, m5, m0 ; t4
+    subps    m4, m6, m3 ; r3
+    addps    m3, m3, m6 ; r1
+IF%1 mova Z2(6), m4
+IF%1 mova  Z(2), m3
+    mova     m2, Z(3)
+    addps    m5, m5, m0 ; t6
+    subps    m2, m2, m1 ; i3
+    mova     m7, Z(1)
+    addps    m1, m1, Z(3) ; i1
+IF%1 mova Z2(7), m2
+IF%1 mova  Z(3), m1
+    subps    m6, m7, m5 ; i2
+    addps    m5, m5, m7 ; i0
+IF%1 mova  Z(5), m6
+IF%1 mova  Z(1), m5
+%if %1==0
+    INTERL m1, m3, m7, Z, 2
+    INTERL m2, m4, m0, Z2, 6
+
+    mova     m1, Z(0)
+    mova     m2, Z(4)
+
+    INTERL m5, m1, m3, Z, 0
+    INTERL m6, m2, m7, Z, 4
+%endif
+%endmacro
+
+%define Z(x) [r0+mmsize*x]
+%define Z2(x) [r0+mmsize*x]
+%define ZH(x) [r0+mmsize*x+mmsize/2]
+
+INIT_YMM avx
+
+%if HAVE_AVX_EXTERNAL
+align 16
+fft8_avx:
+    mova      m0, Z(0)
+    mova      m1, Z(1)
+    T8_AVX    m0, m1, m2, m3, m4
+    mova      Z(0), m0
+    mova      Z(1), m1
+    ret
+
+
+align 16
+fft16_avx:
+    mova       m2, Z(2)
+    mova       m3, Z(3)
+    T4_SSE     m2, m3, m7
+
+    mova       m0, Z(0)
+    mova       m1, Z(1)
+    T8_AVX     m0, m1, m4, m5, m7
+
+    mova       m4, [ps_cos16_1]
+    mova       m5, [ps_cos16_2]
+    vmulps     m6, m2, m4
+    vmulps     m7, m3, m5
+    vaddps     m7, m7, m6
+    vmulps     m2, m2, m5
+    vmulps     m3, m3, m4
+    vsubps     m3, m3, m2
+    vblendps   m2, m7, m3, 0xf0
+    vperm2f128 m3, m7, m3, 0x21
+    vaddps     m4, m2, m3
+    vsubps     m2, m3, m2
+    vperm2f128 m2, m2, m2, 0x01
+    vsubps     m3, m1, m2
+    vaddps     m1, m1, m2
+    vsubps     m5, m0, m4
+    vaddps     m0, m0, m4
+    vextractf128   Z(0), m0, 0
+    vextractf128  ZH(0), m1, 0
+    vextractf128   Z(1), m0, 1
+    vextractf128  ZH(1), m1, 1
+    vextractf128   Z(2), m5, 0
+    vextractf128  ZH(2), m3, 0
+    vextractf128   Z(3), m5, 1
+    vextractf128  ZH(3), m3, 1
+    ret
+
+align 16
+fft32_avx:
+    call fft16_avx
+
+    mova m0, Z(4)
+    mova m1, Z(5)
+
+    T4_SSE      m0, m1, m4
+
+    mova m2, Z(6)
+    mova m3, Z(7)
+
+    T8_SSE      m0, m1, m2, m3, m4, m6
+    ; m0={r0,r1,r2,r3,r8, r9, r10,r11} m1={i0,i1,i2,i3,i8, i9, i10,i11}
+    ; m2={r4,r5,r6,r7,r12,r13,r14,r15} m3={i4,i5,i6,i7,i12,i13,i14,i15}
+
+    vperm2f128  m4, m0, m2, 0x20
+    vperm2f128  m5, m1, m3, 0x20
+    vperm2f128  m6, m0, m2, 0x31
+    vperm2f128  m7, m1, m3, 0x31
+
+    PASS_SMALL 0, [cos_32], [cos_32+32]
+
+    ret
+
+fft32_interleave_avx:
+    call fft32_avx
+    mov r2d, 32
+.deint_loop:
+    mova     m2, Z(0)
+    mova     m3, Z(1)
+    vunpcklps      m0, m2, m3
+    vunpckhps      m1, m2, m3
+    vextractf128   Z(0), m0, 0
+    vextractf128  ZH(0), m1, 0
+    vextractf128   Z(1), m0, 1
+    vextractf128  ZH(1), m1, 1
+    add r0, mmsize*2
+    sub r2d, mmsize/4
+    jg .deint_loop
+    ret
+
+%endif
+
+INIT_XMM sse
+
+align 16
+fft4_avx:
+fft4_sse:
+    mova     m0, Z(0)
+    mova     m1, Z(1)
+    T4_SSE   m0, m1, m2
+    mova   Z(0), m0
+    mova   Z(1), m1
+    ret
+
+align 16
+fft8_sse:
+    mova     m0, Z(0)
+    mova     m1, Z(1)
+    T4_SSE   m0, m1, m2
+    mova     m2, Z(2)
+    mova     m3, Z(3)
+    T8_SSE   m0, m1, m2, m3, m4, m5
+    mova   Z(0), m0
+    mova   Z(1), m1
+    mova   Z(2), m2
+    mova   Z(3), m3
+    ret
+
+align 16
+fft16_sse:
+    mova     m0, Z(0)
+    mova     m1, Z(1)
+    T4_SSE   m0, m1, m2
+    mova     m2, Z(2)
+    mova     m3, Z(3)
+    T8_SSE   m0, m1, m2, m3, m4, m5
+    mova     m4, Z(4)
+    mova     m5, Z(5)
+    mova   Z(0), m0
+    mova   Z(1), m1
+    mova   Z(2), m2
+    mova   Z(3), m3
+    T4_SSE   m4, m5, m6
+    mova     m6, Z2(6)
+    mova     m7, Z2(7)
+    T4_SSE   m6, m7, m0
+    PASS_SMALL 0, [cos_16], [cos_16+16]
+    ret
+
+
+%define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)]
+%define Z2(x) [zcq + o3q + mmsize*(x&1)]
+%define ZH(x) [zcq + o1q*(x&6) + mmsize*(x&1) + mmsize/2]
+%define Z2H(x) [zcq + o3q + mmsize*(x&1) + mmsize/2]
+
+%macro DECL_PASS 2+ ; name, payload
+align 16
+%1:
+DEFINE_ARGS zc, w, n, o1, o3
+    lea o3q, [nq*3]
+    lea o1q, [nq*8]
+    shl o3q, 4
+.loop:
+    %2
+    add zcq, mmsize*2
+    add  wq, mmsize
+    sub  nd, mmsize/8
+    jg .loop
+    rep ret
+%endmacro
+
+%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
+    lea r2, [dispatch_tab%1]
+    mov r2, [r2 + (%2q-2)*gprsize]
+%ifdef PIC
+    lea r3, [$$]
+    add r2, r3
+%endif
+    call r2
+%endmacro ; FFT_DISPATCH
+
+INIT_YMM avx
+
+%if HAVE_AVX_EXTERNAL
+DECL_PASS pass_avx, PASS_BIG 1
+DECL_PASS pass_interleave_avx, PASS_BIG 0
+
+cglobal fft_calc, 2,5,8
+    mov     r3d, [r0 + FFTContext.nbits]
+    mov     r0, r1
+    mov     r1, r3
+    FFT_DISPATCH _interleave %+ SUFFIX, r1
+    RET
+
+%endif
+
+INIT_XMM sse
+
+DECL_PASS pass_sse, PASS_BIG 1
+DECL_PASS pass_interleave_sse, PASS_BIG 0
+
+INIT_XMM sse
+cglobal fft_calc, 2,5,8
+    mov     r3d, [r0 + FFTContext.nbits]
+    PUSH    r1
+    PUSH    r3
+    mov     r0, r1
+    mov     r1, r3
+    FFT_DISPATCH _interleave %+ SUFFIX, r1
+    POP     rcx
+    POP     r4
+    cmp     rcx, 3+(mmsize/16)
+    jg      .end
+    mov     r2, -1
+    add     rcx, 3
+    shl     r2, cl
+    sub     r4, r2
+.loop:
+    movaps   xmm0, [r4 + r2]
+    movaps   xmm1, xmm0
+    unpcklps xmm0, [r4 + r2 + 16]
+    unpckhps xmm1, [r4 + r2 + 16]
+    movaps   [r4 + r2],      xmm0
+    movaps   [r4 + r2 + 16], xmm1
+    add      r2, mmsize*2
+    jl       .loop
+.end:
+    RET
+
+cglobal fft_permute, 2,7,1
+    mov     r4,  [r0 + FFTContext.revtab]
+    mov     r5,  [r0 + FFTContext.tmpbuf]
+    mov     ecx, [r0 + FFTContext.nbits]
+    mov     r2, 1
+    shl     r2, cl
+    xor     r0, r0
+%if ARCH_X86_32
+    mov     r1, r1m
+%endif
+.loop:
+    movaps  xmm0, [r1 + 8*r0]
+    movzx   r6, word [r4 + 2*r0]
+    movzx   r3, word [r4 + 2*r0 + 2]
+    movlps  [r5 + 8*r6], xmm0
+    movhps  [r5 + 8*r3], xmm0
+    add     r0, 2
+    cmp     r0, r2
+    jl      .loop
+    shl     r2, 3
+    add     r1, r2
+    add     r5, r2
+    neg     r2
+; nbits >= 2 (FFT4) and sizeof(FFTComplex)=8 => at least 32B
+.loopcopy:
+    movaps  xmm0, [r5 + r2]
+    movaps  xmm1, [r5 + r2 + 16]
+    movaps  [r1 + r2], xmm0
+    movaps  [r1 + r2 + 16], xmm1
+    add     r2, 32
+    jl      .loopcopy
+    RET
+
+INIT_XMM sse
+cglobal imdct_calc, 3,5,3
+    mov     r3d, [r0 + FFTContext.mdctsize]
+    mov     r4,  [r0 + FFTContext.imdcthalf]
+    add     r1,  r3
+    PUSH    r3
+    PUSH    r1
+%if ARCH_X86_32
+    push    r2
+    push    r1
+    push    r0
+%else
+    sub     rsp, 8+32*WIN64 ; allocate win64 shadow space
+%endif
+    call    r4
+%if ARCH_X86_32
+    add     esp, 12
+%else
+    add     rsp, 8+32*WIN64
+%endif
+    POP     r1
+    POP     r3
+    lea     r0, [r1 + 2*r3]
+    mov     r2, r3
+    sub     r3, mmsize
+    neg     r2
+    mova    m2, [ps_neg]
+.loop:
+    mova    m0, [r1 + r3]
+    mova    m1, [r0 + r2]
+    shufps  m0, m0, 0x1b
+    shufps  m1, m1, 0x1b
+    xorps   m0, m2
+    mova [r0 + r3], m1
+    mova [r1 + r2], m0
+    sub     r3, mmsize
+    add     r2, mmsize
+    jl      .loop
+    RET
+
+%ifdef PIC
+%define SECTION_REL - $$
+%else
+%define SECTION_REL
+%endif
+
+%macro DECL_FFT 1-2 ; nbits, suffix
+%ifidn %0, 1
+%xdefine fullsuffix SUFFIX
+%else
+%xdefine fullsuffix %2 %+ SUFFIX
+%endif
+%xdefine list_of_fft fft4 %+ SUFFIX SECTION_REL, fft8 %+ SUFFIX SECTION_REL
+%if %1>=5
+%xdefine list_of_fft list_of_fft, fft16 %+ SUFFIX SECTION_REL
+%endif
+%if %1>=6
+%xdefine list_of_fft list_of_fft, fft32 %+ fullsuffix SECTION_REL
+%endif
+
+%assign n 1<<%1
+%rep 18-%1
+%assign n2 n/2
+%assign n4 n/4
+%xdefine list_of_fft list_of_fft, fft %+ n %+ fullsuffix SECTION_REL
+
+align 16
+fft %+ n %+ fullsuffix:
+    call fft %+ n2 %+ SUFFIX
+    add r0, n*4 - (n&(-2<<%1))
+    call fft %+ n4 %+ SUFFIX
+    add r0, n*2 - (n2&(-2<<%1))
+    call fft %+ n4 %+ SUFFIX
+    sub r0, n*6 + (n2&(-2<<%1))
+    lea r1, [cos_ %+ n]
+    mov r2d, n4/2
+    jmp pass %+ fullsuffix
+
+%assign n n*2
+%endrep
+%undef n
+
+align 8
+dispatch_tab %+ fullsuffix: pointer list_of_fft
+%endmacro ; DECL_FFT
+
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx
+DECL_FFT 6
+DECL_FFT 6, _interleave
+%endif
+INIT_XMM sse
+DECL_FFT 5
+DECL_FFT 5, _interleave
+
+INIT_XMM sse
+%undef mulps
+%undef addps
+%undef subps
+%undef unpcklps
+%undef unpckhps
+
+%macro PREROTATER 5 ;-2*k, 2*k, input+n4, tcos+n8, tsin+n8
+    movaps   xmm0, [%3+%2*4]
+    movaps   xmm1, [%3+%1*4-0x10]
+    movaps   xmm2, xmm0
+    shufps   xmm0, xmm1, 0x88
+    shufps   xmm1, xmm2, 0x77
+    movlps   xmm4, [%4+%2*2]
+    movlps   xmm5, [%5+%2*2+0x0]
+    movhps   xmm4, [%4+%1*2-0x8]
+    movhps   xmm5, [%5+%1*2-0x8]
+    movaps   xmm2, xmm0
+    movaps   xmm3, xmm1
+    mulps    xmm0, xmm5
+    mulps    xmm1, xmm4
+    mulps    xmm2, xmm4
+    mulps    xmm3, xmm5
+    subps    xmm1, xmm0
+    addps    xmm2, xmm3
+    movaps   xmm0, xmm1
+    unpcklps xmm1, xmm2
+    unpckhps xmm0, xmm2
+%endmacro
+
+%macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5
+    mulps      m6, %3, [%5+%1]
+    mulps      m7, %2, [%5+%1]
+    mulps      %2, %2, [%6+%1]
+    mulps      %3, %3, [%6+%1]
+    subps      %2, %2, m6
+    addps      %3, %3, m7
+%endmacro
+
+%macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8
+.post:
+%if cpuflag(avx)
+    vmovaps      ymm1,   [%3+%1*2]
+    vmovaps      ymm0,   [%3+%1*2+0x20]
+    vmovaps      ymm3,   [%3+%2*2]
+    vmovaps      ymm2,   [%3+%2*2+0x20]
+
+    CMUL         %1, ymm0, ymm1, %3, %4, %5
+    CMUL         %2, ymm2, ymm3, %3, %4, %5
+    vshufps      ymm1, ymm1, ymm1, 0x1b
+    vshufps      ymm3, ymm3, ymm3, 0x1b
+    vperm2f128   ymm1, ymm1, ymm1, 0x01
+    vperm2f128   ymm3, ymm3, ymm3, 0x01
+    vunpcklps    ymm6, ymm2, ymm1
+    vunpckhps    ymm4, ymm2, ymm1
+    vunpcklps    ymm7, ymm0, ymm3
+    vunpckhps    ymm5, ymm0, ymm3
+
+    vextractf128 [%3+%1*2],      ymm7, 0
+    vextractf128 [%3+%1*2+0x10], ymm5, 0
+    vextractf128 [%3+%1*2+0x20], ymm7, 1
+    vextractf128 [%3+%1*2+0x30], ymm5, 1
+
+    vextractf128 [%3+%2*2],      ymm6, 0
+    vextractf128 [%3+%2*2+0x10], ymm4, 0
+    vextractf128 [%3+%2*2+0x20], ymm6, 1
+    vextractf128 [%3+%2*2+0x30], ymm4, 1
+    sub      %2,   0x20
+    add      %1,   0x20
+    jl       .post
+%else
+    movaps   xmm1, [%3+%1*2]
+    movaps   xmm0, [%3+%1*2+0x10]
+    CMUL     %1,   xmm0, xmm1, %3, %4, %5
+    movaps   xmm5, [%3+%2*2]
+    movaps   xmm4, [%3+%2*2+0x10]
+    CMUL     %2,   xmm4, xmm5, %3, %4, %5
+    shufps   xmm1, xmm1, 0x1b
+    shufps   xmm5, xmm5, 0x1b
+    movaps   xmm6, xmm4
+    unpckhps xmm4, xmm1
+    unpcklps xmm6, xmm1
+    movaps   xmm2, xmm0
+    unpcklps xmm0, xmm5
+    unpckhps xmm2, xmm5
+    movaps   [%3+%2*2],      xmm6
+    movaps   [%3+%2*2+0x10], xmm4
+    movaps   [%3+%1*2],      xmm0
+    movaps   [%3+%1*2+0x10], xmm2
+    sub      %2,   0x10
+    add      %1,   0x10
+    jl       .post
+%endif
+%endmacro
+
+%macro DECL_IMDCT 0
+cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *input
+%if ARCH_X86_64
+%define rrevtab r7
+%define rtcos   r8
+%define rtsin   r9
+%else
+%define rrevtab r6
+%define rtsin   r6
+%define rtcos   r5
+%endif
+    mov   r3d, [r0+FFTContext.mdctsize]
+    add   r2, r3
+    shr   r3, 1
+    mov   rtcos, [r0+FFTContext.tcos]
+    mov   rtsin, [r0+FFTContext.tsin]
+    add   rtcos, r3
+    add   rtsin, r3
+%if ARCH_X86_64 == 0
+    push  rtcos
+    push  rtsin
+%endif
+    shr   r3, 1
+    mov   rrevtab, [r0+FFTContext.revtab]
+    add   rrevtab, r3
+%if ARCH_X86_64 == 0
+    push  rrevtab
+%endif
+
+    sub   r3, 4
+%if ARCH_X86_64
+    xor   r4, r4
+    sub   r4, r3
+%endif
+.pre:
+%if ARCH_X86_64 == 0
+;unspill
+    xor   r4, r4
+    sub   r4, r3
+    mov   rtcos, [esp+8]
+    mov   rtsin, [esp+4]
+%endif
+
+    PREROTATER r4, r3, r2, rtcos, rtsin
+%if ARCH_X86_64
+    movzx  r5,  word [rrevtab+r4-4]
+    movzx  r6,  word [rrevtab+r4-2]
+    movzx  r10, word [rrevtab+r3]
+    movzx  r11, word [rrevtab+r3+2]
+    movlps [r1+r5 *8], xmm0
+    movhps [r1+r6 *8], xmm0
+    movlps [r1+r10*8], xmm1
+    movhps [r1+r11*8], xmm1
+    add    r4, 4
+%else
+    mov    r6, [esp]
+    movzx  r5, word [r6+r4-4]
+    movzx  r4, word [r6+r4-2]
+    movlps [r1+r5*8], xmm0
+    movhps [r1+r4*8], xmm0
+    movzx  r5, word [r6+r3]
+    movzx  r4, word [r6+r3+2]
+    movlps [r1+r5*8], xmm1
+    movhps [r1+r4*8], xmm1
+%endif
+    sub    r3, 4
+    jns    .pre
+
+    mov  r5, r0
+    mov  r6, r1
+    mov  r0, r1
+    mov  r1d, [r5+FFTContext.nbits]
+
+    FFT_DISPATCH SUFFIX, r1
+
+    mov  r0d, [r5+FFTContext.mdctsize]
+    add  r6, r0
+    shr  r0, 1
+%if ARCH_X86_64 == 0
+%define rtcos r2
+%define rtsin r3
+    mov  rtcos, [esp+8]
+    mov  rtsin, [esp+4]
+%endif
+    neg  r0
+    mov  r1, -mmsize
+    sub  r1, r0
+    POSROTATESHUF r0, r1, r6, rtcos, rtsin
+%if ARCH_X86_64 == 0
+    add esp, 12
+%endif
+    RET
+%endmacro
+
+DECL_IMDCT
+
+INIT_YMM avx
+
+%if HAVE_AVX_EXTERNAL
+DECL_IMDCT
+%endif
diff --git a/media/ffvpx/libavcodec/x86/fft.h b/media/ffvpx/libavcodec/x86/fft.h
new file mode 100644
index 0000000000..37418ec1f4
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/fft.h
@@ -0,0 +1,32 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_FFT_H
+#define AVCODEC_X86_FFT_H
+
+#include "libavcodec/fft.h"
+
+void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_avx(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
+
+void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input);
+
+#endif /* AVCODEC_X86_FFT_H */
diff --git a/media/ffvpx/libavcodec/x86/fft_init.c b/media/ffvpx/libavcodec/x86/fft_init.c
new file mode 100644
index 0000000000..df79d57dc7
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/fft_init.c
@@ -0,0 +1,47 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+
+#include "fft.h"
+
+av_cold void ff_fft_init_x86(FFTContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (s->nbits > 16)
+        return;
+
+    if (EXTERNAL_SSE(cpu_flags)) {
+        s->imdct_calc  = ff_imdct_calc_sse;
+        s->imdct_half  = ff_imdct_half_sse;
+        s->fft_permute = ff_fft_permute_sse;
+        s->fft_calc    = ff_fft_calc_sse;
+        s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
+    }
+
+    if (EXTERNAL_AVX_FAST(cpu_flags) && s->nbits >= 5) {
+        s->imdct_half      = ff_imdct_half_avx;
+        s->fft_calc        = ff_fft_calc_avx;
+        s->fft_permutation = FF_FFT_PERM_AVX;
+    }
+}
diff --git a/media/ffvpx/libavcodec/x86/flacdsp.asm b/media/ffvpx/libavcodec/x86/flacdsp.asm
new file mode 100644
index 0000000000..44416e4dfd
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/flacdsp.asm
@@ -0,0 +1,326 @@
+;******************************************************************************
+;* FLAC DSP SIMD optimizations
+;*
+;* Copyright (C) 2014 Loren Merritt
+;* Copyright (C) 2014 James Almer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+vector:  db 0,1,4,5,8,9,12,13,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,4,5,8,9,12,13,
+
+SECTION .text
+
+%macro PMACSDQL 5
+%if cpuflag(xop)
+    pmacsdql %1, %2, %3, %1
+%else
+    pmuldq   %2, %3
+    paddq    %1, %2
+%endif
+%endmacro
+
+%macro LPC_32 1
+INIT_XMM %1
+cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
+    sub    lend, pred_orderd
+    jle .ret
+    lea    decodedq, [decodedq+pred_orderq*4-8]
+    lea    coeffsq, [coeffsq+pred_orderq*4]
+    neg    pred_orderq
+    movd   m4, qlevelm
+ALIGN 16
+.loop_sample:
+    movd   m0, [decodedq+pred_orderq*4+8]
+    add    decodedq, 8
+    movd   m1, [coeffsq+pred_orderq*4]
+    pxor   m2, m2
+    pxor   m3, m3
+    lea    jq, [pred_orderq+1]
+    test   jq, jq
+    jz .end_order
+.loop_order:
+    PMACSDQL m2, m0, m1, m2, m0
+    movd   m0, [decodedq+jq*4]
+    PMACSDQL m3, m1, m0, m3, m1
+    movd   m1, [coeffsq+jq*4]
+    inc    jq
+    jl .loop_order
+.end_order:
+    PMACSDQL m2, m0, m1, m2, m0
+    psrlq  m2, m4
+    movd   m0, [decodedq]
+    paddd  m0, m2
+    movd   [decodedq], m0
+    sub  lend, 2
+    jl .ret
+    PMACSDQL m3, m1, m0, m3, m1
+    psrlq  m3, m4
+    movd   m1, [decodedq+4]
+    paddd  m1, m3
+    movd   [decodedq+4], m1
+    jg .loop_sample
+.ret:
+    RET
+%endmacro
+
+%if HAVE_XOP_EXTERNAL
+LPC_32 xop
+%endif
+LPC_32 sse4
+
+;----------------------------------------------------------------------------------
+;void ff_flac_decorrelate_[lrm]s_16_sse2(uint8_t **out, int32_t **in, int channels,
+;                                                   int len, int shift);
+;----------------------------------------------------------------------------------
+%macro FLAC_DECORRELATE_16 3-4
+cglobal flac_decorrelate_%1_16, 2, 4, 4, out, in0, in1, len
+%ifidn %1, indep2
+    VBROADCASTI128 m2, [vector]
+%endif
+%if ARCH_X86_32
+    mov      lend, lenm
+%endif
+    movd       m3, r4m
+    shl      lend, 2
+    mov      in1q, [in0q + gprsize]
+    mov      in0q, [in0q]
+    mov      outq, [outq]
+    add      in1q, lenq
+    add      in0q, lenq
+    add      outq, lenq
+    neg      lenq
+
+align 16
+.loop:
+    mova       m0, [in0q + lenq]
+    mova       m1, [in1q + lenq]
+%ifidn %1, ms
+    psrad      m2, m1, 1
+    psubd      m0, m2
+%endif
+%ifnidn %1, indep2
+    p%4d       m2, m0, m1
+    packssdw   m%2, m%2
+    packssdw   m%3, m%3
+    punpcklwd  m%2, m%3
+    psllw      m%2, m3
+%else
+    pslld      m%2, m3
+    pslld      m%3, m3
+    pshufb     m%2, m%2, m2
+    pshufb     m%3, m%3, m2
+    punpcklwd  m%2, m%3
+%endif
+    mova [outq + lenq], m%2
+    add      lenq, 16
+    jl .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+FLAC_DECORRELATE_16 ls, 0, 2, sub
+FLAC_DECORRELATE_16 rs, 2, 1, add
+FLAC_DECORRELATE_16 ms, 2, 0, add
+
+;----------------------------------------------------------------------------------
+;void ff_flac_decorrelate_[lrm]s_32_sse2(uint8_t **out, int32_t **in, int channels,
+;                                        int len, int shift);
+;----------------------------------------------------------------------------------
+%macro FLAC_DECORRELATE_32 5
+cglobal flac_decorrelate_%1_32, 2, 4, 4, out, in0, in1, len
+%if ARCH_X86_32
+    mov      lend, lenm
+%endif
+    movd       m3, r4m
+    mov      in1q, [in0q + gprsize]
+    mov      in0q, [in0q]
+    mov      outq, [outq]
+    sub      in1q, in0q
+
+align 16
+.loop:
+    mova       m0, [in0q]
+    mova       m1, [in0q + in1q]
+%ifidn %1, ms
+    psrad      m2, m1, 1
+    psubd      m0, m2
+%endif
+    p%5d       m2, m0, m1
+    pslld     m%2, m3
+    pslld     m%3, m3
+
+    SBUTTERFLY dq, %2, %3, %4
+
+    mova  [outq         ], m%2
+    mova  [outq + mmsize], m%3
+
+    add      in0q, mmsize
+    add      outq, mmsize*2
+    sub      lend, mmsize/4
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+FLAC_DECORRELATE_32 ls, 0, 2, 1, sub
+FLAC_DECORRELATE_32 rs, 2, 1, 0, add
+FLAC_DECORRELATE_32 ms, 2, 0, 1, add
+
+;-----------------------------------------------------------------------------------------
+;void ff_flac_decorrelate_indep<ch>_<bps>_<opt>(uint8_t **out, int32_t **in, int channels,
+;                                            int len, int shift);
+;-----------------------------------------------------------------------------------------
+;%1 = bps
+;%2 = channels
+;%3 = last xmm reg used
+;%4 = word/dword (shift instruction)
+%macro FLAC_DECORRELATE_INDEP 4
+%define REPCOUNT %2/(32/%1) ; 16bits = channels / 2; 32bits = channels
+cglobal flac_decorrelate_indep%2_%1, 2, %2+2, %3+1, out, in0, in1, len, in2, in3, in4, in5, in6, in7
+%if ARCH_X86_32
+%if %2 == 6
+    DEFINE_ARGS out, in0, in1, in2, in3, in4, in5
+    %define  lend  dword r3m
+%else
+    mov      lend, lenm
+%endif
+%endif
+    movd      m%3, r4m
+
+%assign %%i 1
+%rep %2-1
+    mov      in %+ %%i %+ q, [in0q+%%i*gprsize]
+%assign %%i %%i+1
+%endrep
+
+    mov      in0q, [in0q]
+    mov      outq, [outq]
+
+%assign %%i 1
+%rep %2-1
+    sub      in %+ %%i %+ q, in0q
+%assign %%i %%i+1
+%endrep
+
+align 16
+.loop:
+    mova       m0, [in0q]
+
+%assign %%i 1
+%rep REPCOUNT-1
+    mova     m %+ %%i, [in0q + in %+ %%i %+ q]
+%assign %%i %%i+1
+%endrep
+
+%if %1 == 32
+
+%if %2 == 8
+    TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8
+%elif %2 == 6
+    SBUTTERFLY dq, 0, 1, 6
+    SBUTTERFLY dq, 2, 3, 6
+    SBUTTERFLY dq, 4, 5, 6
+
+    punpcklqdq m6, m0, m2
+    punpckhqdq m2, m4
+    shufps     m4, m0, 0xe4
+    punpcklqdq m0, m1, m3
+    punpckhqdq m3, m5
+    shufps     m5, m1, 0xe4
+    SWAP 0,6,1,4,5,3
+%elif %2 == 4
+    TRANSPOSE4x4D 0, 1, 2, 3, 4
+%else ; %2 == 2
+    SBUTTERFLY dq, 0, 1, 2
+%endif
+
+%else ; %1 == 16
+
+%if %2 == 8
+    packssdw   m0, [in0q + in4q]
+    packssdw   m1, [in0q + in5q]
+    packssdw   m2, [in0q + in6q]
+    packssdw   m3, [in0q + in7q]
+    TRANSPOSE2x4x4W 0, 1, 2, 3, 4
+%elif %2 == 6
+    packssdw   m0, [in0q + in3q]
+    packssdw   m1, [in0q + in4q]
+    packssdw   m2, [in0q + in5q]
+    pshufd     m3, m0,     q1032
+    punpcklwd  m0, m1
+    punpckhwd  m1, m2
+    punpcklwd  m2, m3
+
+    shufps     m3, m0, m2, q2020
+    shufps     m0, m1,     q2031
+    shufps     m2, m1,     q3131
+    shufps     m1, m2, m3, q3120
+    shufps     m3, m0,     q0220
+    shufps     m0, m2,     q3113
+    SWAP 2, 0, 3
+%else ; %2 == 4
+    packssdw   m0, [in0q + in2q]
+    packssdw   m1, [in0q + in3q]
+    SBUTTERFLY wd, 0, 1, 2
+    SBUTTERFLY dq, 0, 1, 2
+%endif
+
+%endif
+
+%assign %%i 0
+%rep REPCOUNT
+    psll%4   m %+ %%i, m%3
+%assign %%i %%i+1
+%endrep
+
+%assign %%i 0
+%rep REPCOUNT
+    mova [outq + %%i*mmsize], m %+ %%i
+%assign %%i %%i+1
+%endrep
+
+    add      in0q, mmsize
+    add      outq, mmsize*REPCOUNT
+    sub      lend, mmsize/4
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM ssse3
+FLAC_DECORRELATE_16 indep2, 0, 1 ; Reuse stereo 16bits macro
+FLAC_DECORRELATE_INDEP 32, 2, 3, d
+FLAC_DECORRELATE_INDEP 16, 4, 3, w
+FLAC_DECORRELATE_INDEP 32, 4, 5, d
+FLAC_DECORRELATE_INDEP 16, 6, 4, w
+FLAC_DECORRELATE_INDEP 32, 6, 7, d
+%if ARCH_X86_64
+FLAC_DECORRELATE_INDEP 16, 8, 5, w
+FLAC_DECORRELATE_INDEP 32, 8, 9, d
+%endif
+
+INIT_XMM avx
+FLAC_DECORRELATE_INDEP 32, 4, 5, d
+FLAC_DECORRELATE_INDEP 32, 6, 7, d
+%if ARCH_X86_64
+FLAC_DECORRELATE_INDEP 16, 8, 5, w
+FLAC_DECORRELATE_INDEP 32, 8, 9, d
+%endif
diff --git a/media/ffvpx/libavcodec/x86/flacdsp_init.c b/media/ffvpx/libavcodec/x86/flacdsp_init.c
new file mode 100644
index 0000000000..87daed7005
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/flacdsp_init.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2014 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavcodec/flacdsp.h"
+#include "libavutil/x86/cpu.h"
+#include "config.h"
+
+void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order,
+                         int qlevel, int len);
+void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order,
+                        int qlevel, int len);
+
+#define DECORRELATE_FUNCS(fmt, opt)                                                      \
+void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int channels,     \
+                                          int len, int shift);                           \
+void ff_flac_decorrelate_rs_##fmt##_##opt(uint8_t **out, int32_t **in, int channels,     \
+                                          int len, int shift);                           \
+void ff_flac_decorrelate_ms_##fmt##_##opt(uint8_t **out, int32_t **in, int channels,     \
+                                          int len, int shift)
+
+#define DECORRELATE_IFUNCS(fmt, opt)                                                     \
+void ff_flac_decorrelate_indep2_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
+                                             int len, int shift);                        \
+void ff_flac_decorrelate_indep4_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
+                                              int len, int shift);                       \
+void ff_flac_decorrelate_indep6_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
+                                              int len, int shift);                       \
+void ff_flac_decorrelate_indep8_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
+                                              int len, int shift)
+
+DECORRELATE_FUNCS(16, sse2);
+DECORRELATE_FUNCS(16,  avx);
+DECORRELATE_FUNCS(32, sse2);
+DECORRELATE_FUNCS(32,  avx);
+DECORRELATE_IFUNCS(16, ssse3);
+DECORRELATE_IFUNCS(16,  avx);
+DECORRELATE_IFUNCS(32, ssse3);
+DECORRELATE_IFUNCS(32,  avx);
+
+av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels)
+{
+#if HAVE_X86ASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        if (fmt == AV_SAMPLE_FMT_S16) {
+            c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
+            c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
+            c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2;
+        } else if (fmt == AV_SAMPLE_FMT_S32) {
+            c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2;
+            c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2;
+            c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2;
+        }
+    }
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        if (fmt == AV_SAMPLE_FMT_S16) {
+            if (channels == 2)
+                c->decorrelate[0] = ff_flac_decorrelate_indep2_16_ssse3;
+            else if (channels == 4)
+                c->decorrelate[0] = ff_flac_decorrelate_indep4_16_ssse3;
+            else if (channels == 6)
+                c->decorrelate[0] = ff_flac_decorrelate_indep6_16_ssse3;
+            else if (ARCH_X86_64 && channels == 8)
+                c->decorrelate[0] = ff_flac_decorrelate_indep8_16_ssse3;
+        } else if (fmt == AV_SAMPLE_FMT_S32) {
+            if (channels == 2)
+                c->decorrelate[0] = ff_flac_decorrelate_indep2_32_ssse3;
+            else if (channels == 4)
+                c->decorrelate[0] = ff_flac_decorrelate_indep4_32_ssse3;
+            else if (channels == 6)
+                c->decorrelate[0] = ff_flac_decorrelate_indep6_32_ssse3;
+            else if (ARCH_X86_64 && channels == 8)
+                c->decorrelate[0] = ff_flac_decorrelate_indep8_32_ssse3;
+        }
+    }
+    if (EXTERNAL_SSE4(cpu_flags)) {
+        c->lpc32 = ff_flac_lpc_32_sse4;
+    }
+    if (EXTERNAL_AVX(cpu_flags)) {
+        if (fmt == AV_SAMPLE_FMT_S16) {
+            if (ARCH_X86_64 && channels == 8)
+                c->decorrelate[0] = ff_flac_decorrelate_indep8_16_avx;
+        } else if (fmt == AV_SAMPLE_FMT_S32) {
+            if (channels == 4)
+                c->decorrelate[0] = ff_flac_decorrelate_indep4_32_avx;
+            else if (channels == 6)
+                c->decorrelate[0] = ff_flac_decorrelate_indep6_32_avx;
+            else if (ARCH_X86_64 && channels == 8)
+                c->decorrelate[0] = ff_flac_decorrelate_indep8_32_avx;
+        }
+    }
+    if (EXTERNAL_XOP(cpu_flags)) {
+        c->lpc32 = ff_flac_lpc_32_xop;
+    }
+#endif /* HAVE_X86ASM */
+}
diff --git a/media/ffvpx/libavcodec/x86/h264_intrapred.asm b/media/ffvpx/libavcodec/x86/h264_intrapred.asm
new file mode 100644
index 0000000000..8a38ba2bb5
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/h264_intrapred.asm
@@ -0,0 +1,2044 @@
+;******************************************************************************
+;* H.264 intra prediction asm optimizations
+;* Copyright (c) 2010 Fiona Glaser
+;* Copyright (c) 2010 Holger Lubitz
+;* Copyright (c) 2010 Loren Merritt
+;* Copyright (c) 2010 Ronald S. Bultje
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+tm_shuf: times 8 db 0x03, 0x80
+pw_ff00: times 8 dw 0xff00
+plane_shuf:  db -8, -7, -6, -5, -4, -3, -2, -1
+             db  1,  2,  3,  4,  5,  6,  7,  8
+plane8_shuf: db -4, -3, -2, -1,  0,  0,  0,  0
+             db  1,  2,  3,  4,  0,  0,  0,  0
+pw_0to7:     dw  0,  1,  2,  3,  4,  5,  6,  7
+pw_1to8:     dw  1,  2,  3,  4,  5,  6,  7,  8
+pw_m8tom1:   dw -8, -7, -6, -5, -4, -3, -2, -1
+pw_m4to4:    dw -4, -3, -2, -1,  1,  2,  3,  4
+
+SECTION .text
+
+cextern pb_1
+cextern pb_3
+cextern pw_4
+cextern pw_8
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_vertical_8(uint8_t *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+INIT_XMM sse
+cglobal pred16x16_vertical_8, 2,3
+    sub   r0, r1
+    mov   r2, 4
+    movaps xmm0, [r0]
+.loop:
+    movaps [r0+r1*1], xmm0
+    movaps [r0+r1*2], xmm0
+    lea   r0, [r0+r1*2]
+    movaps [r0+r1*1], xmm0
+    movaps [r0+r1*2], xmm0
+    lea   r0, [r0+r1*2]
+    dec   r2
+    jg .loop
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_horizontal_8(uint8_t *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED16x16_H 0
+cglobal pred16x16_horizontal_8, 2,3
+    mov       r2, 8
+%if cpuflag(ssse3)
+    mova      m2, [pb_3]
+%endif
+.loop:
+    movd      m0, [r0+r1*0-4]
+    movd      m1, [r0+r1*1-4]
+
+%if cpuflag(ssse3)
+    pshufb    m0, m2
+    pshufb    m1, m2
+%else
+    punpcklbw m0, m0
+    punpcklbw m1, m1
+    SPLATW    m0, m0, 3
+    SPLATW    m1, m1, 3
+    mova [r0+r1*0+8], m0
+    mova [r0+r1*1+8], m1
+%endif
+
+    mova [r0+r1*0], m0
+    mova [r0+r1*1], m1
+    lea       r0, [r0+r1*2]
+    dec       r2
+    jg .loop
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED16x16_H
+INIT_XMM ssse3
+PRED16x16_H
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_dc_8(uint8_t *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED16x16_DC 0
+cglobal pred16x16_dc_8, 2,7
+    mov       r4, r0
+    sub       r0, r1
+    pxor      mm0, mm0
+    pxor      mm1, mm1
+    psadbw    mm0, [r0+0]
+    psadbw    mm1, [r0+8]
+    dec        r0
+    movzx     r5d, byte [r0+r1*1]
+    paddw     mm0, mm1
+    movd      r6d, mm0
+    lea        r0, [r0+r1*2]
+%rep 7
+    movzx     r2d, byte [r0+r1*0]
+    movzx     r3d, byte [r0+r1*1]
+    add       r5d, r2d
+    add       r6d, r3d
+    lea        r0, [r0+r1*2]
+%endrep
+    movzx     r2d, byte [r0+r1*0]
+    add       r5d, r6d
+    lea       r2d, [r2+r5+16]
+    shr       r2d, 5
+%if cpuflag(ssse3)
+    pxor       m1, m1
+%endif
+    SPLATB_REG m0, r2, m1
+
+    mov       r3d, 4
+.loop:
+    mova [r4+r1*0], m0
+    mova [r4+r1*1], m0
+    lea   r4, [r4+r1*2]
+    mova [r4+r1*0], m0
+    mova [r4+r1*1], m0
+    lea   r4, [r4+r1*2]
+    dec   r3d
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED16x16_DC
+INIT_XMM ssse3
+PRED16x16_DC
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_tm_vp8_8(uint8_t *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+INIT_XMM sse2
+cglobal pred16x16_tm_vp8_8, 2,6,6
+    sub          r0, r1
+    pxor       xmm2, xmm2
+    movdqa     xmm0, [r0]
+    movdqa     xmm1, xmm0
+    punpcklbw  xmm0, xmm2
+    punpckhbw  xmm1, xmm2
+    movzx       r4d, byte [r0-1]
+    mov         r5d, 8
+.loop:
+    movzx       r2d, byte [r0+r1*1-1]
+    movzx       r3d, byte [r0+r1*2-1]
+    sub         r2d, r4d
+    sub         r3d, r4d
+    movd       xmm2, r2d
+    movd       xmm4, r3d
+    pshuflw    xmm2, xmm2, 0
+    pshuflw    xmm4, xmm4, 0
+    punpcklqdq xmm2, xmm2
+    punpcklqdq xmm4, xmm4
+    movdqa     xmm3, xmm2
+    movdqa     xmm5, xmm4
+    paddw      xmm2, xmm0
+    paddw      xmm3, xmm1
+    paddw      xmm4, xmm0
+    paddw      xmm5, xmm1
+    packuswb   xmm2, xmm3
+    packuswb   xmm4, xmm5
+    movdqa [r0+r1*1], xmm2
+    movdqa [r0+r1*2], xmm4
+    lea          r0, [r0+r1*2]
+    dec         r5d
+    jg .loop
+    RET
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal pred16x16_tm_vp8_8, 2, 4, 5, dst, stride, stride3, iteration
+    sub                       dstq, strideq
+    pmovzxbw                    m0, [dstq]
+    vpbroadcastb               xm1, [r0-1]
+    pmovzxbw                    m1, xm1
+    psubw                       m0, m1
+    mov                 iterationd, 4
+    lea                   stride3q, [strideq*3]
+.loop:
+    vpbroadcastb               xm1, [dstq+strideq*1-1]
+    vpbroadcastb               xm2, [dstq+strideq*2-1]
+    vpbroadcastb               xm3, [dstq+stride3q-1]
+    vpbroadcastb               xm4, [dstq+strideq*4-1]
+    pmovzxbw                    m1, xm1
+    pmovzxbw                    m2, xm2
+    pmovzxbw                    m3, xm3
+    pmovzxbw                    m4, xm4
+    paddw                       m1, m0
+    paddw                       m2, m0
+    paddw                       m3, m0
+    paddw                       m4, m0
+    vpackuswb                   m1, m1, m2
+    vpackuswb                   m3, m3, m4
+    vpermq                      m1, m1, q3120
+    vpermq                      m3, m3, q3120
+    movdqa        [dstq+strideq*1], xm1
+    vextracti128  [dstq+strideq*2], m1, 1
+    movdqa       [dstq+stride3q*1], xm3
+    vextracti128  [dstq+strideq*4], m3, 1
+    lea                       dstq, [dstq+strideq*4]
+    dec                 iterationd
+    jg .loop
+    RET
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_plane_*_8(uint8_t *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro H264_PRED16x16_PLANE 1
+cglobal pred16x16_plane_%1_8, 2,9,7
+    mov          r2, r1           ; +stride
+    neg          r1               ; -stride
+
+    movh         m0, [r0+r1  -1]
+%if cpuflag(ssse3)
+    movhps       m0, [r0+r1  +8]
+    pmaddubsw    m0, [plane_shuf] ; H coefficients
+%else ; sse2
+    pxor         m2, m2
+    movh         m1, [r0+r1  +8]
+    punpcklbw    m0, m2
+    punpcklbw    m1, m2
+    pmullw       m0, [pw_m8tom1]
+    pmullw       m1, [pw_1to8]
+    paddw        m0, m1
+%endif
+    movhlps      m1, m0
+    paddw        m0, m1
+    PSHUFLW      m1, m0, 0xE
+    paddw        m0, m1
+    PSHUFLW      m1, m0, 0x1
+    paddw        m0, m1           ; sum of H coefficients
+
+    lea          r4, [r0+r2*8-1]
+    lea          r3, [r0+r2*4-1]
+    add          r4, r2
+
+%if ARCH_X86_64
+%define e_reg r8
+%else
+%define e_reg r0
+%endif
+
+    movzx     e_reg, byte [r3+r2*2   ]
+    movzx        r5, byte [r4+r1     ]
+    sub          r5, e_reg
+
+    movzx     e_reg, byte [r3+r2     ]
+    movzx        r6, byte [r4        ]
+    sub          r6, e_reg
+    lea          r5, [r5+r6*2]
+
+    movzx     e_reg, byte [r3+r1     ]
+    movzx        r6, byte [r4+r2*2   ]
+    sub          r6, e_reg
+    lea          r5, [r5+r6*4]
+
+    movzx     e_reg, byte [r3        ]
+%if ARCH_X86_64
+    movzx        r7, byte [r4+r2     ]
+    sub          r7, e_reg
+%else
+    movzx        r6, byte [r4+r2     ]
+    sub          r6, e_reg
+    lea          r5, [r5+r6*4]
+    sub          r5, r6
+%endif
+
+    lea       e_reg, [r3+r1*4]
+    lea          r3, [r4+r2*4]
+
+    movzx        r4, byte [e_reg+r2  ]
+    movzx        r6, byte [r3        ]
+    sub          r6, r4
+%if ARCH_X86_64
+    lea          r6, [r7+r6*2]
+    lea          r5, [r5+r6*2]
+    add          r5, r6
+%else
+    lea          r5, [r5+r6*4]
+    lea          r5, [r5+r6*2]
+%endif
+
+    movzx        r4, byte [e_reg     ]
+%if ARCH_X86_64
+    movzx        r7, byte [r3   +r2  ]
+    sub          r7, r4
+    sub          r5, r7
+%else
+    movzx        r6, byte [r3   +r2  ]
+    sub          r6, r4
+    lea          r5, [r5+r6*8]
+    sub          r5, r6
+%endif
+
+    movzx        r4, byte [e_reg+r1  ]
+    movzx        r6, byte [r3   +r2*2]
+    sub          r6, r4
+%if ARCH_X86_64
+    add          r6, r7
+%endif
+    lea          r5, [r5+r6*8]
+
+    movzx        r4, byte [e_reg+r2*2]
+    movzx        r6, byte [r3   +r1  ]
+    sub          r6, r4
+    lea          r5, [r5+r6*4]
+    add          r5, r6           ; sum of V coefficients
+
+%if ARCH_X86_64 == 0
+    mov          r0, r0m
+%endif
+
+%ifidn %1, h264
+    lea          r5, [r5*5+32]
+    sar          r5, 6
+%elifidn %1, rv40
+    lea          r5, [r5*5]
+    sar          r5, 6
+%elifidn %1, svq3
+    test         r5, r5
+    lea          r6, [r5+3]
+    cmovs        r5, r6
+    sar          r5, 2            ; V/4
+    lea          r5, [r5*5]       ; 5*(V/4)
+    test         r5, r5
+    lea          r6, [r5+15]
+    cmovs        r5, r6
+    sar          r5, 4            ; (5*(V/4))/16
+%endif
+
+    movzx        r4, byte [r0+r1  +15]
+    movzx        r3, byte [r3+r2*2   ]
+    lea          r3, [r3+r4+1]
+    shl          r3, 4
+
+    movd        r1d, m0
+    movsx       r1d, r1w
+%ifnidn %1, svq3
+%ifidn %1, h264
+    lea         r1d, [r1d*5+32]
+%else ; rv40
+    lea         r1d, [r1d*5]
+%endif
+    sar         r1d, 6
+%else ; svq3
+    test        r1d, r1d
+    lea         r4d, [r1d+3]
+    cmovs       r1d, r4d
+    sar         r1d, 2           ; H/4
+    lea         r1d, [r1d*5]     ; 5*(H/4)
+    test        r1d, r1d
+    lea         r4d, [r1d+15]
+    cmovs       r1d, r4d
+    sar         r1d, 4           ; (5*(H/4))/16
+%endif
+    movd         m0, r1d
+
+    add         r1d, r5d
+    add         r3d, r1d
+    shl         r1d, 3
+    sub         r3d, r1d          ; a
+
+    movd         m1, r5d
+    movd         m3, r3d
+    SPLATW       m0, m0, 0        ; H
+    SPLATW       m1, m1, 0        ; V
+    SPLATW       m3, m3, 0        ; a
+%ifidn %1, svq3
+    SWAP          0, 1
+%endif
+    mova         m2, m0
+    pmullw       m0, [pw_0to7]    ; 0*H, 1*H, ..., 7*H  (words)
+    psllw        m2, 3
+    paddw        m0, m3           ; a + {0,1,2,3,4,5,6,7}*H
+    paddw        m2, m0           ; a + {8,9,10,11,12,13,14,15}*H
+
+    mov          r4, 8
+.loop:
+    mova         m3, m0           ; b[0..7]
+    mova         m4, m2           ; b[8..15]
+    psraw        m3, 5
+    psraw        m4, 5
+    packuswb     m3, m4
+    mova       [r0], m3
+    paddw        m0, m1
+    paddw        m2, m1
+
+    mova         m3, m0           ; b[0..7]
+    mova         m4, m2           ; b[8..15]
+    psraw        m3, 5
+    psraw        m4, 5
+    packuswb     m3, m4
+    mova    [r0+r2], m3
+    paddw        m0, m1
+    paddw        m2, m1
+
+    lea          r0, [r0+r2*2]
+    dec          r4
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+H264_PRED16x16_PLANE h264
+H264_PRED16x16_PLANE rv40
+H264_PRED16x16_PLANE svq3
+INIT_XMM ssse3
+H264_PRED16x16_PLANE h264
+H264_PRED16x16_PLANE rv40
+H264_PRED16x16_PLANE svq3
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_plane_8(uint8_t *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro H264_PRED8x8_PLANE 0
+cglobal pred8x8_plane_8, 2,9,7
+    mov          r2, r1           ; +stride
+    neg          r1               ; -stride
+
+    movd         m0, [r0+r1  -1]
+%if cpuflag(ssse3)
+    movhps       m0, [r0+r1  +4]   ; this reads 4 bytes more than necessary
+    pmaddubsw    m0, [plane8_shuf] ; H coefficients
+%else ; sse2
+    pxor         m2, m2
+    movd         m1, [r0+r1  +4]
+    punpckldq    m0, m1
+    punpcklbw    m0, m2
+    pmullw       m0, [pw_m4to4]
+%endif
+    movhlps      m1, m0
+    paddw        m0, m1
+
+%if notcpuflag(ssse3)
+    PSHUFLW      m1, m0, 0xE
+    paddw        m0, m1
+%endif ; !ssse3
+
+    PSHUFLW      m1, m0, 0x1
+    paddw        m0, m1           ; sum of H coefficients
+
+    lea          r4, [r0+r2*4-1]
+    lea          r3, [r0     -1]
+    add          r4, r2
+
+%if ARCH_X86_64
+%define e_reg r8
+%else
+%define e_reg r0
+%endif
+
+    movzx     e_reg, byte [r3+r2*2   ]
+    movzx        r5, byte [r4+r1     ]
+    sub          r5, e_reg
+
+    movzx     e_reg, byte [r3        ]
+%if ARCH_X86_64
+    movzx        r7, byte [r4+r2     ]
+    sub          r7, e_reg
+    sub          r5, r7
+%else
+    movzx        r6, byte [r4+r2     ]
+    sub          r6, e_reg
+    lea          r5, [r5+r6*4]
+    sub          r5, r6
+%endif
+
+    movzx     e_reg, byte [r3+r1     ]
+    movzx        r6, byte [r4+r2*2   ]
+    sub          r6, e_reg
+%if ARCH_X86_64
+    add          r6, r7
+%endif
+    lea          r5, [r5+r6*4]
+
+    movzx     e_reg, byte [r3+r2     ]
+    movzx        r6, byte [r4        ]
+    sub          r6, e_reg
+    lea          r6, [r5+r6*2]
+
+    lea          r5, [r6*9+16]
+    lea          r5, [r5+r6*8]
+    sar          r5, 5
+
+%if ARCH_X86_64 == 0
+    mov          r0, r0m
+%endif
+
+    movzx        r3, byte [r4+r2*2  ]
+    movzx        r4, byte [r0+r1  +7]
+    lea          r3, [r3+r4+1]
+    shl          r3, 4
+    movd        r1d, m0
+    movsx       r1d, r1w
+    imul        r1d, 17
+    add         r1d, 16
+    sar         r1d, 5
+    movd         m0, r1d
+    add         r1d, r5d
+    sub         r3d, r1d
+    add         r1d, r1d
+    sub         r3d, r1d          ; a
+
+    movd         m1, r5d
+    movd         m3, r3d
+    SPLATW       m0, m0, 0        ; H
+    SPLATW       m1, m1, 0        ; V
+    SPLATW       m3, m3, 0        ; a
+    pmullw       m0, [pw_0to7]    ; 0*H, 1*H, ..., 7*H  (words)
+    paddw        m0, m3           ; a + {0,1,2,3,4,5,6,7}*H
+
+    mov          r4, 4
+ALIGN 16
+.loop:
+    mova         m3, m0           ; b[0..7]
+    paddw        m0, m1
+    psraw        m3, 5
+    mova         m4, m0           ; V+b[0..7]
+    paddw        m0, m1
+    psraw        m4, 5
+    packuswb     m3, m4
+    movh       [r0], m3
+    movhps  [r0+r2], m3
+
+    lea          r0, [r0+r2*2]
+    dec          r4
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+H264_PRED8x8_PLANE
+INIT_XMM ssse3
+H264_PRED8x8_PLANE
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_vertical_8(uint8_t *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmx
+cglobal pred8x8_vertical_8, 2,2
+    sub    r0, r1
+    movq  mm0, [r0]
+%rep 3
+    movq [r0+r1*1], mm0
+    movq [r0+r1*2], mm0
+    lea    r0, [r0+r1*2]
+%endrep
+    movq [r0+r1*1], mm0
+    movq [r0+r1*2], mm0
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_horizontal_8(uint8_t *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8_H 0
+cglobal pred8x8_horizontal_8, 2,3
+    mov       r2, 4
+%if cpuflag(ssse3)
+    mova      m2, [pb_3]
+%endif
+.loop:
+    SPLATB_LOAD m0, r0+r1*0-1, m2
+    SPLATB_LOAD m1, r0+r1*1-1, m2
+    mova [r0+r1*0], m0
+    mova [r0+r1*1], m1
+    lea       r0, [r0+r1*2]
+    dec       r2
+    jg .loop
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED8x8_H
+INIT_MMX ssse3
+PRED8x8_H
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_top_dc_8_mmxext(uint8_t *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+INIT_MMX mmxext
+cglobal pred8x8_top_dc_8, 2,5
+    sub         r0, r1
+    movq       mm0, [r0]
+    pxor       mm1, mm1
+    pxor       mm2, mm2
+    lea         r2, [r0+r1*2]
+    punpckhbw  mm1, mm0
+    punpcklbw  mm0, mm2
+    psadbw     mm1, mm2        ; s1
+    lea         r3, [r2+r1*2]
+    psadbw     mm0, mm2        ; s0
+    psrlw      mm1, 1
+    psrlw      mm0, 1
+    pavgw      mm1, mm2
+    lea         r4, [r3+r1*2]
+    pavgw      mm0, mm2
+    pshufw     mm1, mm1, 0
+    pshufw     mm0, mm0, 0     ; dc0 (w)
+    packuswb   mm0, mm1        ; dc0,dc1 (b)
+    movq [r0+r1*1], mm0
+    movq [r0+r1*2], mm0
+    lea         r0, [r3+r1*2]
+    movq [r2+r1*1], mm0
+    movq [r2+r1*2], mm0
+    movq [r3+r1*1], mm0
+    movq [r3+r1*2], mm0
+    movq [r0+r1*1], mm0
+    movq [r0+r1*2], mm0
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_dc_8_mmxext(uint8_t *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred8x8_dc_8, 2,5
+    sub       r0, r1
+    pxor      m7, m7
+    movd      m0, [r0+0]
+    movd      m1, [r0+4]
+    psadbw    m0, m7            ; s0
+    mov       r4, r0
+    psadbw    m1, m7            ; s1
+
+    movzx    r2d, byte [r0+r1*1-1]
+    movzx    r3d, byte [r0+r1*2-1]
+    lea       r0, [r0+r1*2]
+    add      r2d, r3d
+    movzx    r3d, byte [r0+r1*1-1]
+    add      r2d, r3d
+    movzx    r3d, byte [r0+r1*2-1]
+    add      r2d, r3d
+    lea       r0, [r0+r1*2]
+    movd      m2, r2d            ; s2
+    movzx    r2d, byte [r0+r1*1-1]
+    movzx    r3d, byte [r0+r1*2-1]
+    lea       r0, [r0+r1*2]
+    add      r2d, r3d
+    movzx    r3d, byte [r0+r1*1-1]
+    add      r2d, r3d
+    movzx    r3d, byte [r0+r1*2-1]
+    add      r2d, r3d
+    movd      m3, r2d            ; s3
+
+    punpcklwd m0, m1
+    mov       r0, r4
+    punpcklwd m2, m3
+    punpckldq m0, m2            ; s0, s1, s2, s3
+    pshufw    m3, m0, 11110110b ; s2, s1, s3, s3
+    lea       r2, [r0+r1*2]
+    pshufw    m0, m0, 01110100b ; s0, s1, s3, s1
+    paddw     m0, m3
+    lea       r3, [r2+r1*2]
+    psrlw     m0, 2
+    pavgw     m0, m7            ; s0+s2, s1, s3, s1+s3
+    lea       r4, [r3+r1*2]
+    packuswb  m0, m0
+    punpcklbw m0, m0
+    movq      m1, m0
+    punpcklbw m0, m0
+    punpckhbw m1, m1
+    movq [r0+r1*1], m0
+    movq [r0+r1*2], m0
+    movq [r2+r1*1], m0
+    movq [r2+r1*2], m0
+    movq [r3+r1*1], m1
+    movq [r3+r1*2], m1
+    movq [r4+r1*1], m1
+    movq [r4+r1*2], m1
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_dc_rv40_8(uint8_t *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred8x8_dc_rv40_8, 2,7
+    mov       r4, r0
+    sub       r0, r1
+    pxor      mm0, mm0
+    psadbw    mm0, [r0]
+    dec        r0
+    movzx     r5d, byte [r0+r1*1]
+    movd      r6d, mm0
+    lea        r0, [r0+r1*2]
+%rep 3
+    movzx     r2d, byte [r0+r1*0]
+    movzx     r3d, byte [r0+r1*1]
+    add       r5d, r2d
+    add       r6d, r3d
+    lea        r0, [r0+r1*2]
+%endrep
+    movzx     r2d, byte [r0+r1*0]
+    add       r5d, r6d
+    lea       r2d, [r2+r5+8]
+    shr       r2d, 4
+    movd      mm0, r2d
+    punpcklbw mm0, mm0
+    pshufw    mm0, mm0, 0
+    mov       r3d, 4
+.loop:
+    movq [r4+r1*0], mm0
+    movq [r4+r1*1], mm0
+    lea   r4, [r4+r1*2]
+    dec   r3d
+    jg .loop
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_tm_vp8_8(uint8_t *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+INIT_XMM sse2
+cglobal pred8x8_tm_vp8_8, 2,6,4
+    sub          r0, r1
+    pxor       xmm1, xmm1
+    movq       xmm0, [r0]
+    punpcklbw  xmm0, xmm1
+    movzx       r4d, byte [r0-1]
+    mov         r5d, 4
+.loop:
+    movzx       r2d, byte [r0+r1*1-1]
+    movzx       r3d, byte [r0+r1*2-1]
+    sub         r2d, r4d
+    sub         r3d, r4d
+    movd       xmm2, r2d
+    movd       xmm3, r3d
+    pshuflw    xmm2, xmm2, 0
+    pshuflw    xmm3, xmm3, 0
+    punpcklqdq xmm2, xmm2
+    punpcklqdq xmm3, xmm3
+    paddw      xmm2, xmm0
+    paddw      xmm3, xmm0
+    packuswb   xmm2, xmm3
+    movq   [r0+r1*1], xmm2
+    movhps [r0+r1*2], xmm2
+    lea          r0, [r0+r1*2]
+    dec         r5d
+    jg .loop
+    RET
+
+INIT_XMM ssse3
+cglobal pred8x8_tm_vp8_8, 2,3,6
+    sub          r0, r1
+    movdqa     xmm4, [tm_shuf]
+    pxor       xmm1, xmm1
+    movq       xmm0, [r0]
+    punpcklbw  xmm0, xmm1
+    movd       xmm5, [r0-4]
+    pshufb     xmm5, xmm4
+    mov         r2d, 4
+.loop:
+    movd       xmm2, [r0+r1*1-4]
+    movd       xmm3, [r0+r1*2-4]
+    pshufb     xmm2, xmm4
+    pshufb     xmm3, xmm4
+    psubw      xmm2, xmm5
+    psubw      xmm3, xmm5
+    paddw      xmm2, xmm0
+    paddw      xmm3, xmm0
+    packuswb   xmm2, xmm3
+    movq   [r0+r1*1], xmm2
+    movhps [r0+r1*2], xmm2
+    lea          r0, [r0+r1*2]
+    dec         r2d
+    jg .loop
+    RET
+
+; dest, left, right, src, tmp
+; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
+%macro PRED4x4_LOWPASS 5
+    mova    %5, %2
+    pavgb   %2, %3
+    pxor    %3, %5
+    mova    %1, %4
+    pand    %3, [pb_1]
+    psubusb %2, %3
+    pavgb   %1, %2
+%endmacro
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_top_dc_8(uint8_t *src, int has_topleft, int has_topright,
+;                           ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_TOP_DC 0
+cglobal pred8x8l_top_dc_8, 4,4
+    sub          r0, r3
+    pxor        mm7, mm7
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d ; top_left
+    jz .fix_lt_2
+    test        r2d, r2d ; top_right
+    jz .fix_tr_1
+    jmp .body
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d ; top_right
+    jnz .body
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+.body:
+    PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
+    psadbw   mm7, mm0
+    paddw    mm7, [pw_4]
+    psrlw    mm7, 3
+    pshufw   mm7, mm7, 0
+    packuswb mm7, mm7
+%rep 3
+    movq [r0+r3*1], mm7
+    movq [r0+r3*2], mm7
+    lea    r0, [r0+r3*2]
+%endrep
+    movq [r0+r3*1], mm7
+    movq [r0+r3*2], mm7
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED8x8L_TOP_DC
+INIT_MMX ssse3
+PRED8x8L_TOP_DC
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_dc_8(uint8_t *src, int has_topleft, int has_topright,
+;                       ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_DC 0
+cglobal pred8x8l_dc_8, 4,5
+    sub          r0, r3
+    lea          r4, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    punpckhbw   mm0, [r0+r3*0-8]
+    movq        mm1, [r4+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r4, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r4]
+    mov          r0, r4
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    test        r1d, r1d
+    jnz .do_left
+.fix_lt_1:
+    movq        mm5, mm3
+    pxor        mm5, mm4
+    psrlq       mm5, 56
+    psllq       mm5, 48
+    pxor        mm1, mm5
+    jmp .do_left
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d
+    jnz .body
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .body
+.do_left:
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq        mm4, mm0
+    movq        mm7, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    psllq       mm1, 56
+    PALIGNR     mm7, mm1, 7, mm3
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d
+    jz .fix_lt_2
+    test        r2d, r2d
+    jz .fix_tr_1
+.body:
+    lea          r1, [r0+r3*2]
+    PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
+    pxor        mm0, mm0
+    pxor        mm1, mm1
+    lea          r2, [r1+r3*2]
+    psadbw      mm0, mm7
+    psadbw      mm1, mm6
+    paddw       mm0, [pw_8]
+    paddw       mm0, mm1
+    lea          r4, [r2+r3*2]
+    psrlw       mm0, 4
+    pshufw      mm0, mm0, 0
+    packuswb    mm0, mm0
+    movq [r0+r3*1], mm0
+    movq [r0+r3*2], mm0
+    movq [r1+r3*1], mm0
+    movq [r1+r3*2], mm0
+    movq [r2+r3*1], mm0
+    movq [r2+r3*2], mm0
+    movq [r4+r3*1], mm0
+    movq [r4+r3*2], mm0
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED8x8L_DC
+INIT_MMX ssse3
+PRED8x8L_DC
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_horizontal_8(uint8_t *src, int has_topleft,
+;                               int has_topright, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_HORIZONTAL 0
+cglobal pred8x8l_horizontal_8, 4,4
+    sub          r0, r3
+    lea          r2, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    test        r1d, r1d
+    lea          r1, [r0+r3]
+    cmovnz       r1, r0
+    punpckhbw   mm0, [r1+r3*0-8]
+    movq        mm1, [r2+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r2, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r1+r3*0-8]
+    mov          r0, r2
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq        mm4, mm0
+    movq        mm7, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    psllq       mm1, 56
+    PALIGNR     mm7, mm1, 7, mm3
+    movq        mm3, mm7
+    lea         r1, [r0+r3*2]
+    movq       mm7, mm3
+    punpckhbw  mm3, mm3
+    punpcklbw  mm7, mm7
+    pshufw     mm0, mm3, 0xff
+    pshufw     mm1, mm3, 0xaa
+    lea         r2, [r1+r3*2]
+    pshufw     mm2, mm3, 0x55
+    pshufw     mm3, mm3, 0x00
+    pshufw     mm4, mm7, 0xff
+    pshufw     mm5, mm7, 0xaa
+    pshufw     mm6, mm7, 0x55
+    pshufw     mm7, mm7, 0x00
+    movq [r0+r3*1], mm0
+    movq [r0+r3*2], mm1
+    movq [r1+r3*1], mm2
+    movq [r1+r3*2], mm3
+    movq [r2+r3*1], mm4
+    movq [r2+r3*2], mm5
+    lea         r0, [r2+r3*2]
+    movq [r0+r3*1], mm6
+    movq [r0+r3*2], mm7
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED8x8L_HORIZONTAL
+INIT_MMX ssse3
+PRED8x8L_HORIZONTAL
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_vertical_8(uint8_t *src, int has_topleft, int has_topright,
+;                             ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_VERTICAL 0
+cglobal pred8x8l_vertical_8, 4,4
+    sub          r0, r3
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d ; top_left
+    jz .fix_lt_2
+    test        r2d, r2d ; top_right
+    jz .fix_tr_1
+    jmp .body
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d ; top_right
+    jnz .body
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+.body:
+    PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
+%rep 3
+    movq [r0+r3*1], mm0
+    movq [r0+r3*2], mm0
+    lea    r0, [r0+r3*2]
+%endrep
+    movq [r0+r3*1], mm0
+    movq [r0+r3*2], mm0
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED8x8L_VERTICAL
+INIT_MMX ssse3
+PRED8x8L_VERTICAL
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_down_left_8(uint8_t *src, int has_topleft,
+;                              int has_topright, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_DOWN_LEFT 0
+cglobal pred8x8l_down_left_8, 4,4
+    sub          r0, r3
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d ; top_left
+    jz .fix_lt_2
+    test        r2d, r2d ; top_right
+    jz .fix_tr_1
+    jmp .do_top
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d ; top_right
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.fix_tr_2:
+    punpckhbw   mm3, mm3
+    pshufw      mm1, mm3, 0xFF
+    jmp .do_topright
+.do_top:
+    PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
+    movq2dq    xmm3, mm4
+    test        r2d, r2d ; top_right
+    jz .fix_tr_2
+    movq        mm0, [r0+8]
+    movq        mm5, mm0
+    movq        mm2, mm0
+    movq        mm4, mm0
+    psrlq       mm5, 56
+    PALIGNR     mm2, mm3, 7, mm3
+    PALIGNR     mm5, mm4, 1, mm4
+    PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
+.do_topright:
+    movq2dq    xmm4, mm1
+    psrlq       mm1, 56
+    movq2dq    xmm5, mm1
+    lea         r1, [r0+r3*2]
+    pslldq    xmm4, 8
+    por       xmm3, xmm4
+    movdqa    xmm2, xmm3
+    psrldq    xmm2, 1
+    pslldq    xmm5, 15
+    por       xmm2, xmm5
+    lea         r2, [r1+r3*2]
+    movdqa    xmm1, xmm3
+    pslldq    xmm1, 1
+INIT_XMM cpuname
+    PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4
+    psrldq    xmm0, 1
+    movq [r0+r3*1], xmm0
+    psrldq    xmm0, 1
+    movq [r0+r3*2], xmm0
+    psrldq    xmm0, 1
+    lea         r0, [r2+r3*2]
+    movq [r1+r3*1], xmm0
+    psrldq    xmm0, 1
+    movq [r1+r3*2], xmm0
+    psrldq    xmm0, 1
+    movq [r2+r3*1], xmm0
+    psrldq    xmm0, 1
+    movq [r2+r3*2], xmm0
+    psrldq    xmm0, 1
+    movq [r0+r3*1], xmm0
+    psrldq    xmm0, 1
+    movq [r0+r3*2], xmm0
+    RET
+%endmacro
+
+INIT_MMX sse2
+PRED8x8L_DOWN_LEFT
+INIT_MMX ssse3
+PRED8x8L_DOWN_LEFT
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_down_right_8(uint8_t *src, int has_topleft,
+;                               int has_topright, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_DOWN_RIGHT 0
+cglobal pred8x8l_down_right_8, 4,5
+    sub          r0, r3
+    lea          r4, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    punpckhbw   mm0, [r0+r3*0-8]
+    movq        mm1, [r4+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r4, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r4]
+    mov          r0, r4
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    test        r1d, r1d
+    jz .fix_lt_1
+    jmp .do_left
+.fix_lt_1:
+    movq        mm5, mm3
+    pxor        mm5, mm4
+    psrlq       mm5, 56
+    psllq       mm5, 48
+    pxor        mm1, mm5
+    jmp .do_left
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.do_left:
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq        mm4, mm0
+    movq        mm7, mm2
+    movq2dq    xmm3, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    psllq       mm1, 56
+    PALIGNR     mm7, mm1, 7, mm3
+    movq2dq    xmm1, mm7
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d
+    jz .fix_lt_2
+    test        r2d, r2d
+    jz .fix_tr_1
+.do_top:
+    PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
+    movq2dq   xmm4, mm4
+    lea         r1, [r0+r3*2]
+    movdqa    xmm0, xmm3
+    pslldq    xmm4, 8
+    por       xmm3, xmm4
+    lea         r2, [r1+r3*2]
+    pslldq    xmm4, 1
+    por       xmm1, xmm4
+    psrldq    xmm0, 7
+    pslldq    xmm0, 15
+    psrldq    xmm0, 7
+    por       xmm1, xmm0
+    lea         r0, [r2+r3*2]
+    movdqa    xmm2, xmm3
+    psrldq    xmm2, 1
+INIT_XMM cpuname
+    PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4
+    movdqa    xmm1, xmm0
+    psrldq    xmm1, 1
+    movq [r0+r3*2], xmm0
+    movq [r0+r3*1], xmm1
+    psrldq    xmm0, 2
+    psrldq    xmm1, 2
+    movq [r2+r3*2], xmm0
+    movq [r2+r3*1], xmm1
+    psrldq    xmm0, 2
+    psrldq    xmm1, 2
+    movq [r1+r3*2], xmm0
+    movq [r1+r3*1], xmm1
+    psrldq    xmm0, 2
+    psrldq    xmm1, 2
+    movq [r4+r3*2], xmm0
+    movq [r4+r3*1], xmm1
+    RET
+%endmacro
+
+INIT_MMX sse2
+PRED8x8L_DOWN_RIGHT
+INIT_MMX ssse3
+PRED8x8L_DOWN_RIGHT
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_vertical_right_8(uint8_t *src, int has_topleft,
+;                                   int has_topright, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_VERTICAL_RIGHT 0
+cglobal pred8x8l_vertical_right_8, 4,5,7
+    ; manually spill XMM registers for Win64 because
+    ; the code here is initialized with INIT_MMX
+    WIN64_SPILL_XMM 7
+    sub          r0, r3
+    lea          r4, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    punpckhbw   mm0, [r0+r3*0-8]
+    movq        mm1, [r4+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r4, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r4]
+    mov          r0, r4
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    test        r1d, r1d
+    jnz .do_left
+.fix_lt_1:
+    movq        mm5, mm3
+    pxor        mm5, mm4
+    psrlq       mm5, 56
+    psllq       mm5, 48
+    pxor        mm1, mm5
+    jmp .do_left
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.do_left:
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq2dq    xmm0, mm2
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d
+    jz .fix_lt_2
+    test        r2d, r2d
+    jz .fix_tr_1
+.do_top:
+    PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
+    lea           r1, [r0+r3*2]
+    movq2dq     xmm4, mm6
+    pslldq      xmm4, 8
+    por         xmm0, xmm4
+    movdqa      xmm6, [pw_ff00]
+    movdqa      xmm1, xmm0
+    lea           r2, [r1+r3*2]
+    movdqa      xmm2, xmm0
+    movdqa      xmm3, xmm0
+    pslldq      xmm0, 1
+    pslldq      xmm1, 2
+    pavgb       xmm2, xmm0
+INIT_XMM cpuname
+    PRED4x4_LOWPASS xmm4, xmm3, xmm1, xmm0, xmm5
+    pandn       xmm6, xmm4
+    movdqa      xmm5, xmm4
+    psrlw       xmm4, 8
+    packuswb    xmm6, xmm4
+    movhlps     xmm4, xmm6
+    movhps [r0+r3*2], xmm5
+    movhps [r0+r3*1], xmm2
+    psrldq      xmm5, 4
+    movss       xmm5, xmm6
+    psrldq      xmm2, 4
+    movss       xmm2, xmm4
+    lea           r0, [r2+r3*2]
+    psrldq      xmm5, 1
+    psrldq      xmm2, 1
+    movq        [r0+r3*2], xmm5
+    movq        [r0+r3*1], xmm2
+    psrldq      xmm5, 1
+    psrldq      xmm2, 1
+    movq        [r2+r3*2], xmm5
+    movq        [r2+r3*1], xmm2
+    psrldq      xmm5, 1
+    psrldq      xmm2, 1
+    movq        [r1+r3*2], xmm5
+    movq        [r1+r3*1], xmm2
+    RET
+%endmacro
+
+INIT_MMX sse2
+PRED8x8L_VERTICAL_RIGHT
+INIT_MMX ssse3
+PRED8x8L_VERTICAL_RIGHT
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_vertical_left_8(uint8_t *src, int has_topleft,
+;                                  int has_topright, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_VERTICAL_LEFT 0
+cglobal pred8x8l_vertical_left_8, 4,4
+    sub          r0, r3
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d
+    jz .fix_lt_2
+    test        r2d, r2d
+    jz .fix_tr_1
+    jmp .do_top
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.fix_tr_2:
+    punpckhbw   mm3, mm3
+    pshufw      mm1, mm3, 0xFF
+    jmp .do_topright
+.do_top:
+    PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
+    movq2dq    xmm4, mm4
+    test        r2d, r2d
+    jz .fix_tr_2
+    movq        mm0, [r0+8]
+    movq        mm5, mm0
+    movq        mm2, mm0
+    movq        mm4, mm0
+    psrlq       mm5, 56
+    PALIGNR     mm2, mm3, 7, mm3
+    PALIGNR     mm5, mm4, 1, mm4
+    PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
+.do_topright:
+    movq2dq   xmm3, mm1
+    lea         r1, [r0+r3*2]
+    pslldq    xmm3, 8
+    por       xmm4, xmm3
+    movdqa    xmm2, xmm4
+    movdqa    xmm1, xmm4
+    movdqa    xmm3, xmm4
+    psrldq    xmm2, 1
+    pslldq    xmm1, 1
+    pavgb     xmm3, xmm2
+    lea         r2, [r1+r3*2]
+INIT_XMM cpuname
+    PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm4, xmm5
+    psrldq    xmm0, 1
+    movq [r0+r3*1], xmm3
+    movq [r0+r3*2], xmm0
+    lea         r0, [r2+r3*2]
+    psrldq    xmm3, 1
+    psrldq    xmm0, 1
+    movq [r1+r3*1], xmm3
+    movq [r1+r3*2], xmm0
+    psrldq    xmm3, 1
+    psrldq    xmm0, 1
+    movq [r2+r3*1], xmm3
+    movq [r2+r3*2], xmm0
+    psrldq    xmm3, 1
+    psrldq    xmm0, 1
+    movq [r0+r3*1], xmm3
+    movq [r0+r3*2], xmm0
+    RET
+%endmacro
+
+INIT_MMX sse2
+PRED8x8L_VERTICAL_LEFT
+INIT_MMX ssse3
+PRED8x8L_VERTICAL_LEFT
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_horizontal_up_8(uint8_t *src, int has_topleft,
+;                                  int has_topright, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_HORIZONTAL_UP 0
+cglobal pred8x8l_horizontal_up_8, 4,4
+    sub          r0, r3
+    lea          r2, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    test        r1d, r1d
+    lea          r1, [r0+r3]
+    cmovnz       r1, r0
+    punpckhbw   mm0, [r1+r3*0-8]
+    movq        mm1, [r2+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r2, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r1+r3*0-8]
+    mov          r0, r2
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    movq       mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq       mm4, mm0
+    movq       mm7, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    psllq      mm1, 56
+    PALIGNR    mm7, mm1, 7, mm3
+    lea         r1, [r0+r3*2]
+    pshufw     mm0, mm7, 00011011b ; l6 l7 l4 l5 l2 l3 l0 l1
+    psllq      mm7, 56             ; l7 .. .. .. .. .. .. ..
+    movq       mm2, mm0
+    psllw      mm0, 8
+    psrlw      mm2, 8
+    por        mm2, mm0            ; l7 l6 l5 l4 l3 l2 l1 l0
+    movq       mm3, mm2
+    movq       mm4, mm2
+    movq       mm5, mm2
+    psrlq      mm2, 8
+    psrlq      mm3, 16
+    lea         r2, [r1+r3*2]
+    por        mm2, mm7            ; l7 l7 l6 l5 l4 l3 l2 l1
+    punpckhbw  mm7, mm7
+    por        mm3, mm7            ; l7 l7 l7 l6 l5 l4 l3 l2
+    pavgb      mm4, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm5, mm2, mm6
+    movq       mm5, mm4
+    punpcklbw  mm4, mm1            ; p4 p3 p2 p1
+    punpckhbw  mm5, mm1            ; p8 p7 p6 p5
+    movq       mm6, mm5
+    movq       mm7, mm5
+    movq       mm0, mm5
+    PALIGNR    mm5, mm4, 2, mm1
+    pshufw     mm1, mm6, 11111001b
+    PALIGNR    mm6, mm4, 4, mm2
+    pshufw     mm2, mm7, 11111110b
+    PALIGNR    mm7, mm4, 6, mm3
+    pshufw     mm3, mm0, 11111111b
+    movq [r0+r3*1], mm4
+    movq [r0+r3*2], mm5
+    lea         r0, [r2+r3*2]
+    movq [r1+r3*1], mm6
+    movq [r1+r3*2], mm7
+    movq [r2+r3*1], mm0
+    movq [r2+r3*2], mm1
+    movq [r0+r3*1], mm2
+    movq [r0+r3*2], mm3
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED8x8L_HORIZONTAL_UP
+INIT_MMX ssse3
+PRED8x8L_HORIZONTAL_UP
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_horizontal_down_8(uint8_t *src, int has_topleft,
+;                                    int has_topright, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_HORIZONTAL_DOWN 0
+cglobal pred8x8l_horizontal_down_8, 4,5
+    sub          r0, r3
+    lea          r4, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    punpckhbw   mm0, [r0+r3*0-8]
+    movq        mm1, [r4+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r4, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r4]
+    mov          r0, r4
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    test        r1d, r1d
+    jnz .do_left
+.fix_lt_1:
+    movq        mm5, mm3
+    pxor        mm5, mm4
+    psrlq       mm5, 56
+    psllq       mm5, 48
+    pxor        mm1, mm5
+    jmp .do_left
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.fix_tr_2:
+    punpckhbw   mm3, mm3
+    pshufw      mm1, mm3, 0xFF
+    jmp .do_topright
+.do_left:
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq2dq    xmm0, mm2
+    pslldq     xmm0, 8
+    movq        mm4, mm0
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    movq2dq    xmm2, mm1
+    pslldq     xmm2, 15
+    psrldq     xmm2, 8
+    por        xmm0, xmm2
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d
+    jz .fix_lt_2
+    test        r2d, r2d
+    jz .fix_tr_1
+.do_top:
+    PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
+    movq2dq    xmm1, mm4
+    test        r2d, r2d
+    jz .fix_tr_2
+    movq        mm0, [r0+8]
+    movq        mm5, mm0
+    movq        mm2, mm0
+    movq        mm4, mm0
+    psrlq       mm5, 56
+    PALIGNR     mm2, mm3, 7, mm3
+    PALIGNR     mm5, mm4, 1, mm4
+    PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
+.do_topright:
+    movq2dq    xmm5, mm1
+    pslldq     xmm5, 8
+    por        xmm1, xmm5
+INIT_XMM cpuname
+    lea         r2, [r4+r3*2]
+    movdqa    xmm2, xmm1
+    movdqa    xmm3, xmm1
+    PALIGNR   xmm1, xmm0, 7, xmm4
+    PALIGNR   xmm2, xmm0, 9, xmm5
+    lea         r1, [r2+r3*2]
+    PALIGNR   xmm3, xmm0, 8, xmm0
+    movdqa    xmm4, xmm1
+    pavgb     xmm4, xmm3
+    lea         r0, [r1+r3*2]
+    PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm5
+    punpcklbw xmm4, xmm0
+    movhlps   xmm0, xmm4
+    movq   [r0+r3*2], xmm4
+    movq   [r2+r3*2], xmm0
+    psrldq xmm4, 2
+    psrldq xmm0, 2
+    movq   [r0+r3*1], xmm4
+    movq   [r2+r3*1], xmm0
+    psrldq xmm4, 2
+    psrldq xmm0, 2
+    movq   [r1+r3*2], xmm4
+    movq   [r4+r3*2], xmm0
+    psrldq xmm4, 2
+    psrldq xmm0, 2
+    movq   [r1+r3*1], xmm4
+    movq   [r4+r3*1], xmm0
+    RET
+%endmacro
+
+INIT_MMX sse2
+PRED8x8L_HORIZONTAL_DOWN
+INIT_MMX ssse3
+PRED8x8L_HORIZONTAL_DOWN
+
+;-------------------------------------------------------------------------------
+; void ff_pred4x4_dc_8_mmxext(uint8_t *src, const uint8_t *topright,
+;                             ptrdiff_t stride)
+;-------------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_dc_8, 3,5
+    pxor   mm7, mm7
+    mov     r4, r0
+    sub     r0, r2
+    movd   mm0, [r0]
+    psadbw mm0, mm7
+    movzx  r1d, byte [r0+r2*1-1]
+    movd   r3d, mm0
+    add    r3d, r1d
+    movzx  r1d, byte [r0+r2*2-1]
+    lea     r0, [r0+r2*2]
+    add    r3d, r1d
+    movzx  r1d, byte [r0+r2*1-1]
+    add    r3d, r1d
+    movzx  r1d, byte [r0+r2*2-1]
+    add    r3d, r1d
+    add    r3d, 4
+    shr    r3d, 3
+    imul   r3d, 0x01010101
+    mov   [r4+r2*0], r3d
+    mov   [r0+r2*0], r3d
+    mov   [r0+r2*1], r3d
+    mov   [r0+r2*2], r3d
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_tm_vp8_8_mmxext(uint8_t *src, const uint8_t *topright,
+;                                 ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_tm_vp8_8, 3,6
+    sub        r0, r2
+    pxor      mm7, mm7
+    movd      mm0, [r0]
+    punpcklbw mm0, mm7
+    movzx     r4d, byte [r0-1]
+    mov       r5d, 2
+.loop:
+    movzx     r1d, byte [r0+r2*1-1]
+    movzx     r3d, byte [r0+r2*2-1]
+    sub       r1d, r4d
+    sub       r3d, r4d
+    movd      mm2, r1d
+    movd      mm4, r3d
+    pshufw    mm2, mm2, 0
+    pshufw    mm4, mm4, 0
+    paddw     mm2, mm0
+    paddw     mm4, mm0
+    packuswb  mm2, mm2
+    packuswb  mm4, mm4
+    movd [r0+r2*1], mm2
+    movd [r0+r2*2], mm4
+    lea        r0, [r0+r2*2]
+    dec       r5d
+    jg .loop
+    RET
+
+INIT_XMM ssse3
+cglobal pred4x4_tm_vp8_8, 3,3
+    sub         r0, r2
+    movq       mm6, [tm_shuf]
+    pxor       mm1, mm1
+    movd       mm0, [r0]
+    punpcklbw  mm0, mm1
+    movd       mm7, [r0-4]
+    pshufb     mm7, mm6
+    lea         r1, [r0+r2*2]
+    movd       mm2, [r0+r2*1-4]
+    movd       mm3, [r0+r2*2-4]
+    movd       mm4, [r1+r2*1-4]
+    movd       mm5, [r1+r2*2-4]
+    pshufb     mm2, mm6
+    pshufb     mm3, mm6
+    pshufb     mm4, mm6
+    pshufb     mm5, mm6
+    psubw      mm0, mm7
+    paddw      mm2, mm0
+    paddw      mm3, mm0
+    paddw      mm4, mm0
+    paddw      mm5, mm0
+    packuswb   mm2, mm2
+    packuswb   mm3, mm3
+    packuswb   mm4, mm4
+    packuswb   mm5, mm5
+    movd [r0+r2*1], mm2
+    movd [r0+r2*2], mm3
+    movd [r1+r2*1], mm4
+    movd [r1+r2*2], mm5
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_vertical_vp8_8_mmxext(uint8_t *src, const uint8_t *topright,
+;                                       ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_vertical_vp8_8, 3,3
+    sub       r0, r2
+    movd      m1, [r0-1]
+    movd      m0, [r0]
+    mova      m2, m0   ;t0 t1 t2 t3
+    punpckldq m0, [r1] ;t0 t1 t2 t3 t4 t5 t6 t7
+    lea       r1, [r0+r2*2]
+    psrlq     m0, 8    ;t1 t2 t3 t4
+    PRED4x4_LOWPASS m3, m1, m0, m2, m4
+    movd [r0+r2*1], m3
+    movd [r0+r2*2], m3
+    movd [r1+r2*1], m3
+    movd [r1+r2*2], m3
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_down_left_8_mmxext(uint8_t *src, const uint8_t *topright,
+;                                    ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+INIT_MMX mmxext
+cglobal pred4x4_down_left_8, 3,3
+    sub       r0, r2
+    movq      m1, [r0]
+    punpckldq m1, [r1]
+    movq      m2, m1
+    movq      m3, m1
+    psllq     m1, 8
+    pxor      m2, m1
+    psrlq     m2, 8
+    pxor      m2, m3
+    PRED4x4_LOWPASS m0, m1, m2, m3, m4
+    lea       r1, [r0+r2*2]
+    psrlq     m0, 8
+    movd      [r0+r2*1], m0
+    psrlq     m0, 8
+    movd      [r0+r2*2], m0
+    psrlq     m0, 8
+    movd      [r1+r2*1], m0
+    psrlq     m0, 8
+    movd      [r1+r2*2], m0
+    RET
+
+;------------------------------------------------------------------------------
+; void ff_pred4x4_vertical_left_8_mmxext(uint8_t *src, const uint8_t *topright,
+;                                        ptrdiff_t stride)
+;------------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_vertical_left_8, 3,3
+    sub       r0, r2
+    movq      m1, [r0]
+    punpckldq m1, [r1]
+    movq      m3, m1
+    movq      m2, m1
+    psrlq     m3, 8
+    psrlq     m2, 16
+    movq      m4, m3
+    pavgb     m4, m1
+    PRED4x4_LOWPASS m0, m1, m2, m3, m5
+    lea       r1, [r0+r2*2]
+    movh      [r0+r2*1], m4
+    movh      [r0+r2*2], m0
+    psrlq     m4, 8
+    psrlq     m0, 8
+    movh      [r1+r2*1], m4
+    movh      [r1+r2*2], m0
+    RET
+
+;------------------------------------------------------------------------------
+; void ff_pred4x4_horizontal_up_8_mmxext(uint8_t *src, const uint8_t *topright,
+;                                        ptrdiff_t stride)
+;------------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_horizontal_up_8, 3,3
+    sub       r0, r2
+    lea       r1, [r0+r2*2]
+    movd      m0, [r0+r2*1-4]
+    punpcklbw m0, [r0+r2*2-4]
+    movd      m1, [r1+r2*1-4]
+    punpcklbw m1, [r1+r2*2-4]
+    punpckhwd m0, m1
+    movq      m1, m0
+    punpckhbw m1, m1
+    pshufw    m1, m1, 0xFF
+    punpckhdq m0, m1
+    movq      m2, m0
+    movq      m3, m0
+    movq      m7, m0
+    psrlq     m2, 16
+    psrlq     m3, 8
+    pavgb     m7, m3
+    PRED4x4_LOWPASS m4, m0, m2, m3, m5
+    punpcklbw m7, m4
+    movd    [r0+r2*1], m7
+    psrlq    m7, 16
+    movd    [r0+r2*2], m7
+    psrlq    m7, 16
+    movd    [r1+r2*1], m7
+    movd    [r1+r2*2], m1
+    RET
+
+;------------------------------------------------------------------------------
+; void ff_pred4x4_horizontal_down_8_mmxext(uint8_t *src,
+;                                          const uint8_t *topright,
+;                                          ptrdiff_t stride)
+;------------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_horizontal_down_8, 3,3
+    sub       r0, r2
+    lea       r1, [r0+r2*2]
+    movh      m0, [r0-4]      ; lt ..
+    punpckldq m0, [r0]        ; t3 t2 t1 t0 lt .. .. ..
+    psllq     m0, 8           ; t2 t1 t0 lt .. .. .. ..
+    movd      m1, [r1+r2*2-4] ; l3
+    punpcklbw m1, [r1+r2*1-4] ; l2 l3
+    movd      m2, [r0+r2*2-4] ; l1
+    punpcklbw m2, [r0+r2*1-4] ; l0 l1
+    punpckhwd m1, m2          ; l0 l1 l2 l3
+    punpckhdq m1, m0          ; t2 t1 t0 lt l0 l1 l2 l3
+    movq      m0, m1
+    movq      m2, m1
+    movq      m5, m1
+    psrlq     m0, 16          ; .. .. t2 t1 t0 lt l0 l1
+    psrlq     m2, 8           ; .. t2 t1 t0 lt l0 l1 l2
+    pavgb     m5, m2
+    PRED4x4_LOWPASS m3, m1, m0, m2, m4
+    punpcklbw m5, m3
+    psrlq     m3, 32
+    PALIGNR   m3, m5, 6, m4
+    movh      [r1+r2*2], m5
+    psrlq     m5, 16
+    movh      [r1+r2*1], m5
+    psrlq     m5, 16
+    movh      [r0+r2*2], m5
+    movh      [r0+r2*1], m3
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_vertical_right_8_mmxext(uint8_t *src,
+;                                         const uint8_t *topright,
+;                                         ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_vertical_right_8, 3,3
+    sub     r0, r2
+    lea     r1, [r0+r2*2]
+    movh    m0, [r0]                    ; ........t3t2t1t0
+    movq    m5, m0
+    PALIGNR m0, [r0-8], 7, m1           ; ......t3t2t1t0lt
+    pavgb   m5, m0
+    PALIGNR m0, [r0+r2*1-8], 7, m1      ; ....t3t2t1t0ltl0
+    movq    m1, m0
+    PALIGNR m0, [r0+r2*2-8], 7, m2      ; ..t3t2t1t0ltl0l1
+    movq    m2, m0
+    PALIGNR m0, [r1+r2*1-8], 7, m3      ; t3t2t1t0ltl0l1l2
+    PRED4x4_LOWPASS m3, m1, m0, m2, m4
+    movq    m1, m3
+    psrlq   m3, 16
+    psllq   m1, 48
+    movh    [r0+r2*1], m5
+    movh    [r0+r2*2], m3
+    PALIGNR m5, m1, 7, m2
+    psllq   m1, 8
+    movh    [r1+r2*1], m5
+    PALIGNR m3, m1, 7, m1
+    movh    [r1+r2*2], m3
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_down_right_8_mmxext(uint8_t *src, const uint8_t *topright,
+;                                     ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_down_right_8, 3,3
+    sub       r0, r2
+    lea       r1, [r0+r2*2]
+    movq      m1, [r1-8]
+    movq      m2, [r0+r2*1-8]
+    punpckhbw m2, [r0-8]
+    movh      m3, [r0]
+    punpckhwd m1, m2
+    PALIGNR   m3, m1, 5, m1
+    movq      m1, m3
+    PALIGNR   m3, [r1+r2*1-8], 7, m4
+    movq      m2, m3
+    PALIGNR   m3, [r1+r2*2-8], 7, m4
+    PRED4x4_LOWPASS m0, m3, m1, m2, m4
+    movh      [r1+r2*2], m0
+    psrlq     m0, 8
+    movh      [r1+r2*1], m0
+    psrlq     m0, 8
+    movh      [r0+r2*2], m0
+    psrlq     m0, 8
+    movh      [r0+r2*1], m0
+    RET
diff --git a/media/ffvpx/libavcodec/x86/h264_intrapred_10bit.asm b/media/ffvpx/libavcodec/x86/h264_intrapred_10bit.asm
new file mode 100644
index 0000000000..2f30807332
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/h264_intrapred_10bit.asm
@@ -0,0 +1,1119 @@
+;*****************************************************************************
+;* MMX/SSE2/AVX-optimized 10-bit H.264 intra prediction code
+;*****************************************************************************
+;* Copyright (C) 2005-2011 x264 project
+;*
+;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+cextern pw_1023
+%define pw_pixel_max pw_1023
+cextern pw_512
+cextern pw_16
+cextern pw_8
+cextern pw_4
+cextern pw_2
+cextern pw_1
+cextern pd_16
+
+pw_m32101234: dw -3, -2, -1, 0, 1, 2, 3, 4
+pw_m3:        times 8 dw -3
+pd_17:        times 4 dd 17
+
+SECTION .text
+
+; dest, left, right, src
+; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
+%macro PRED4x4_LOWPASS 4
+    paddw       %2, %3
+    psrlw       %2, 1
+    pavgw       %1, %4, %2
+%endmacro
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_down_right_10(pixel *src, const pixel *topright,
+;                               ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+%macro PRED4x4_DR 0
+cglobal pred4x4_down_right_10, 3, 3
+    sub       r0, r2
+    lea       r1, [r0+r2*2]
+    movhps    m1, [r1-8]
+    movhps    m2, [r0+r2*1-8]
+    movhps    m4, [r0-8]
+    punpckhwd m2, m4
+    movq      m3, [r0]
+    punpckhdq m1, m2
+    PALIGNR   m3, m1, 10, m1
+    movhps    m4, [r1+r2*1-8]
+    PALIGNR   m0, m3, m4, 14, m4
+    movhps    m4, [r1+r2*2-8]
+    PALIGNR   m2, m0, m4, 14, m4
+    PRED4x4_LOWPASS m0, m2, m3, m0
+    movq      [r1+r2*2], m0
+    psrldq    m0, 2
+    movq      [r1+r2*1], m0
+    psrldq    m0, 2
+    movq      [r0+r2*2], m0
+    psrldq    m0, 2
+    movq      [r0+r2*1], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED4x4_DR
+INIT_XMM ssse3
+PRED4x4_DR
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED4x4_DR
+%endif
+
+;------------------------------------------------------------------------------
+; void ff_pred4x4_vertical_right_10(pixel *src, const pixel *topright,
+;                                   ptrdiff_t stride)
+;------------------------------------------------------------------------------
+%macro PRED4x4_VR 0
+cglobal pred4x4_vertical_right_10, 3, 3, 6
+    sub     r0, r2
+    lea     r1, [r0+r2*2]
+    movq    m5, [r0]            ; ........t3t2t1t0
+    movhps  m1, [r0-8]
+    PALIGNR m0, m5, m1, 14, m1  ; ......t3t2t1t0lt
+    pavgw   m5, m0
+    movhps  m1, [r0+r2*1-8]
+    PALIGNR m0, m1, 14, m1      ; ....t3t2t1t0ltl0
+    movhps  m2, [r0+r2*2-8]
+    PALIGNR m1, m0, m2, 14, m2  ; ..t3t2t1t0ltl0l1
+    movhps  m3, [r1+r2*1-8]
+    PALIGNR m2, m1, m3, 14, m3  ; t3t2t1t0ltl0l1l2
+    PRED4x4_LOWPASS m1, m0, m2, m1
+    pslldq  m0, m1, 12
+    psrldq  m1, 4
+    movq    [r0+r2*1], m5
+    movq    [r0+r2*2], m1
+    PALIGNR m5, m0, 14, m2
+    pslldq  m0, 2
+    movq    [r1+r2*1], m5
+    PALIGNR m1, m0, 14, m0
+    movq    [r1+r2*2], m1
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED4x4_VR
+INIT_XMM ssse3
+PRED4x4_VR
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED4x4_VR
+%endif
+
+;-------------------------------------------------------------------------------
+; void ff_pred4x4_horizontal_down_10(pixel *src, const pixel *topright,
+;                                    ptrdiff_t stride)
+;-------------------------------------------------------------------------------
+%macro PRED4x4_HD 0
+cglobal pred4x4_horizontal_down_10, 3, 3
+    sub        r0, r2
+    lea        r1, [r0+r2*2]
+    movq       m0, [r0-8]      ; lt ..
+    movhps     m0, [r0]
+    pslldq     m0, 2           ; t2 t1 t0 lt .. .. .. ..
+    movq       m1, [r1+r2*2-8] ; l3
+    movq       m3, [r1+r2*1-8]
+    punpcklwd  m1, m3          ; l2 l3
+    movq       m2, [r0+r2*2-8] ; l1
+    movq       m3, [r0+r2*1-8]
+    punpcklwd  m2, m3          ; l0 l1
+    punpckhdq  m1, m2          ; l0 l1 l2 l3
+    punpckhqdq m1, m0          ; t2 t1 t0 lt l0 l1 l2 l3
+    psrldq     m0, m1, 4       ; .. .. t2 t1 t0 lt l0 l1
+    psrldq     m3, m1, 2       ; .. t2 t1 t0 lt l0 l1 l2
+    pavgw      m5, m1, m3
+    PRED4x4_LOWPASS m3, m1, m0, m3
+    punpcklwd  m5, m3
+    psrldq     m3, 8
+    PALIGNR    m3, m5, 12, m4
+    movq       [r1+r2*2], m5
+    movhps     [r0+r2*2], m5
+    psrldq     m5, 4
+    movq       [r1+r2*1], m5
+    movq       [r0+r2*1], m3
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED4x4_HD
+INIT_XMM ssse3
+PRED4x4_HD
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED4x4_HD
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_dc_10(pixel *src, const pixel *topright, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_dc_10, 3, 3
+    sub    r0, r2
+    lea    r1, [r0+r2*2]
+    movq   m2, [r0+r2*1-8]
+    paddw  m2, [r0+r2*2-8]
+    paddw  m2, [r1+r2*1-8]
+    paddw  m2, [r1+r2*2-8]
+    psrlq  m2, 48
+    movq   m0, [r0]
+    HADDW  m0, m1
+    paddw  m0, [pw_4]
+    paddw  m0, m2
+    psrlw  m0, 3
+    SPLATW m0, m0, 0
+    movq   [r0+r2*1], m0
+    movq   [r0+r2*2], m0
+    movq   [r1+r2*1], m0
+    movq   [r1+r2*2], m0
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_down_left_10(pixel *src, const pixel *topright,
+;                              ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+%macro PRED4x4_DL 0
+cglobal pred4x4_down_left_10, 3, 3
+    sub        r0, r2
+    movq       m0, [r0]
+    movhps     m0, [r1]
+    psrldq     m2, m0, 2
+    pslldq     m3, m0, 2
+    pshufhw    m2, m2, 10100100b
+    PRED4x4_LOWPASS m0, m3, m2, m0
+    lea        r1, [r0+r2*2]
+    movhps     [r1+r2*2], m0
+    psrldq     m0, 2
+    movq       [r0+r2*1], m0
+    psrldq     m0, 2
+    movq       [r0+r2*2], m0
+    psrldq     m0, 2
+    movq       [r1+r2*1], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED4x4_DL
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED4x4_DL
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_vertical_left_10(pixel *src, const pixel *topright,
+;                                  ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+%macro PRED4x4_VL 0
+cglobal pred4x4_vertical_left_10, 3, 3
+    sub        r0, r2
+    movu       m1, [r0]
+    movhps     m1, [r1]
+    psrldq     m0, m1, 2
+    psrldq     m2, m1, 4
+    pavgw      m4, m0, m1
+    PRED4x4_LOWPASS m0, m1, m2, m0
+    lea        r1, [r0+r2*2]
+    movq       [r0+r2*1], m4
+    movq       [r0+r2*2], m0
+    psrldq     m4, 2
+    psrldq     m0, 2
+    movq       [r1+r2*1], m4
+    movq       [r1+r2*2], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED4x4_VL
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED4x4_VL
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_horizontal_up_10(pixel *src, const pixel *topright,
+;                                  ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+INIT_MMX mmxext
+cglobal pred4x4_horizontal_up_10, 3, 3
+    sub       r0, r2
+    lea       r1, [r0+r2*2]
+    movq      m0, [r0+r2*1-8]
+    punpckhwd m0, [r0+r2*2-8]
+    movq      m1, [r1+r2*1-8]
+    punpckhwd m1, [r1+r2*2-8]
+    punpckhdq m0, m1
+    pshufw    m1, m1, 0xFF
+    movq      [r1+r2*2], m1
+    movd      [r1+r2*1+4], m1
+    pshufw    m2, m0, 11111001b
+    movq      m1, m2
+    pavgw     m2, m0
+
+    pshufw    m5, m0, 11111110b
+    PRED4x4_LOWPASS m1, m0, m5, m1
+    movq      m6, m2
+    punpcklwd m6, m1
+    movq      [r0+r2*1], m6
+    psrlq     m2, 16
+    psrlq     m1, 16
+    punpcklwd m2, m1
+    movq      [r0+r2*2], m2
+    psrlq     m2, 32
+    movd      [r1+r2*1], m2
+    RET
+
+
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_vertical_10(pixel *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred8x8_vertical_10, 2, 2
+    sub  r0, r1
+    mova m0, [r0]
+%rep 3
+    mova [r0+r1*1], m0
+    mova [r0+r1*2], m0
+    lea  r0, [r0+r1*2]
+%endrep
+    mova [r0+r1*1], m0
+    mova [r0+r1*2], m0
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_horizontal_10(pixel *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred8x8_horizontal_10, 2, 3
+    mov         r2d, 4
+.loop:
+    movq         m0, [r0+r1*0-8]
+    movq         m1, [r0+r1*1-8]
+    pshuflw      m0, m0, 0xff
+    pshuflw      m1, m1, 0xff
+    punpcklqdq   m0, m0
+    punpcklqdq   m1, m1
+    mova  [r0+r1*0], m0
+    mova  [r0+r1*1], m1
+    lea          r0, [r0+r1*2]
+    dec          r2d
+    jg .loop
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_predict_8x8_dc_10(pixel *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+%macro MOV8 2-3
+; sort of a hack, but it works
+    movdqa    [%1], %2
+%endmacro
+
+%macro PRED8x8_DC 1
+cglobal pred8x8_dc_10, 2, 6
+    sub         r0, r1
+    pxor        m4, m4
+    movq        m0, [r0+0]
+    movq        m1, [r0+8]
+    punpcklwd   m0, m1
+    movhlps     m1, m0
+    paddw       m0, m1
+    %1          m2, m0, 00001110b
+    paddw       m0, m2
+
+    lea         r5, [r1*3]
+    lea         r4, [r0+r1*4]
+    movzx      r2d, word [r0+r1*1-2]
+    movzx      r3d, word [r0+r1*2-2]
+    add        r2d, r3d
+    movzx      r3d, word [r0+r5*1-2]
+    add        r2d, r3d
+    movzx      r3d, word [r4-2]
+    add        r2d, r3d
+    movd        m2, r2d            ; s2
+
+    movzx      r2d, word [r4+r1*1-2]
+    movzx      r3d, word [r4+r1*2-2]
+    add        r2d, r3d
+    movzx      r3d, word [r4+r5*1-2]
+    add        r2d, r3d
+    movzx      r3d, word [r4+r1*4-2]
+    add        r2d, r3d
+    movd        m3, r2d            ; s3
+
+    punpcklwd   m2, m3
+    punpckldq   m0, m2            ; s0, s1, s2, s3
+    %1          m3, m0, 11110110b ; s2, s1, s3, s3
+    %1          m0, m0, 01110100b ; s0, s1, s3, s1
+    paddw       m0, m3
+    psrlw       m0, 2
+    pavgw       m0, m4            ; s0+s2, s1, s3, s1+s3
+    punpcklwd   m0, m0
+    pshufd      m3, m0, 11111010b
+    punpckldq   m0, m0
+    SWAP         0,1
+    MOV8   r0+r1*1, m1, m2
+    MOV8   r0+r1*2, m1, m2
+    MOV8   r0+r5*1, m1, m2
+    MOV8   r0+r1*4, m1, m2
+    MOV8   r4+r1*1, m3, m4
+    MOV8   r4+r1*2, m3, m4
+    MOV8   r4+r5*1, m3, m4
+    MOV8   r4+r1*4, m3, m4
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8_DC pshuflw
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_top_dc_10(pixel *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred8x8_top_dc_10, 2, 4
+    sub         r0, r1
+    mova        m0, [r0]
+    pshuflw     m1, m0, 0x4e
+    pshufhw     m1, m1, 0x4e
+    paddw       m0, m1
+    pshuflw     m1, m0, 0xb1
+    pshufhw     m1, m1, 0xb1
+    paddw       m0, m1
+    lea         r2, [r1*3]
+    lea         r3, [r0+r1*4]
+    paddw       m0, [pw_2]
+    psrlw       m0, 2
+    mova [r0+r1*1], m0
+    mova [r0+r1*2], m0
+    mova [r0+r2*1], m0
+    mova [r0+r1*4], m0
+    mova [r3+r1*1], m0
+    mova [r3+r1*2], m0
+    mova [r3+r2*1], m0
+    mova [r3+r1*4], m0
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_plane_10(pixel *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred8x8_plane_10, 2, 7, 7
+    sub       r0, r1
+    lea       r2, [r1*3]
+    lea       r3, [r0+r1*4]
+    mova      m2, [r0]
+    pmaddwd   m2, [pw_m32101234]
+    HADDD     m2, m1
+    movd      m0, [r0-4]
+    psrld     m0, 14
+    psubw     m2, m0               ; H
+    movd      m0, [r3+r1*4-4]
+    movd      m1, [r0+12]
+    paddw     m0, m1
+    psllw     m0, 4                ; 16*(src[7*stride-1] + src[-stride+7])
+    movzx    r4d, word [r3+r1*1-2] ; src[4*stride-1]
+    movzx    r5d, word [r0+r2*1-2] ; src[2*stride-1]
+    sub      r4d, r5d
+    movzx    r6d, word [r3+r1*2-2] ; src[5*stride-1]
+    movzx    r5d, word [r0+r1*2-2] ; src[1*stride-1]
+    sub      r6d, r5d
+    lea      r4d, [r4+r6*2]
+    movzx    r5d, word [r3+r2*1-2] ; src[6*stride-1]
+    movzx    r6d, word [r0+r1*1-2] ; src[0*stride-1]
+    sub      r5d, r6d
+    lea      r5d, [r5*3]
+    add      r4d, r5d
+    movzx    r6d, word [r3+r1*4-2] ; src[7*stride-1]
+    movzx    r5d, word [r0+r1*0-2] ; src[ -stride-1]
+    sub      r6d, r5d
+    lea      r4d, [r4+r6*4]
+    movd      m3, r4d              ; V
+    punpckldq m2, m3
+    pmaddwd   m2, [pd_17]
+    paddd     m2, [pd_16]
+    psrad     m2, 5                ; b, c
+
+    mova      m3, [pw_pixel_max]
+    pxor      m1, m1
+    SPLATW    m0, m0, 1
+    SPLATW    m4, m2, 2
+    SPLATW    m2, m2, 0
+    pmullw    m2, [pw_m32101234]   ; b
+    pmullw    m5, m4, [pw_m3]      ; c
+    paddw     m5, [pw_16]
+    mov      r2d, 8
+    add       r0, r1
+.loop:
+    paddsw    m6, m2, m5
+    paddsw    m6, m0
+    psraw     m6, 5
+    CLIPW     m6, m1, m3
+    mova    [r0], m6
+    paddw     m5, m4
+    add       r0, r1
+    dec r2d
+    jg .loop
+    RET
+
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_128_dc_10(pixel *src, int has_topleft, int has_topright,
+;                            ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred8x8l_128_dc_10, 4, 4
+    mova      m0, [pw_512] ; (1<<(BIT_DEPTH-1))
+    lea       r1, [r3*3]
+    lea       r2, [r0+r3*4]
+    MOV8 r0+r3*0, m0, m0
+    MOV8 r0+r3*1, m0, m0
+    MOV8 r0+r3*2, m0, m0
+    MOV8 r0+r1*1, m0, m0
+    MOV8 r2+r3*0, m0, m0
+    MOV8 r2+r3*1, m0, m0
+    MOV8 r2+r3*2, m0, m0
+    MOV8 r2+r1*1, m0, m0
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_top_dc_10(pixel *src, int has_topleft, int has_topright,
+;                            ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_TOP_DC 0
+cglobal pred8x8l_top_dc_10, 4, 4, 6
+    sub         r0, r3
+    mova        m0, [r0]
+    shr        r1d, 14
+    shr        r2d, 13
+    neg         r1
+    pslldq      m1, m0, 2
+    psrldq      m2, m0, 2
+    pinsrw      m1, [r0+r1], 0
+    pinsrw      m2, [r0+r2+14], 7
+    lea         r1, [r3*3]
+    lea         r2, [r0+r3*4]
+    PRED4x4_LOWPASS m0, m2, m1, m0
+    HADDW       m0, m1
+    paddw       m0, [pw_4]
+    psrlw       m0, 3
+    SPLATW      m0, m0, 0
+    mova [r0+r3*1], m0
+    mova [r0+r3*2], m0
+    mova [r0+r1*1], m0
+    mova [r0+r3*4], m0
+    mova [r2+r3*1], m0
+    mova [r2+r3*2], m0
+    mova [r2+r1*1], m0
+    mova [r2+r3*4], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_TOP_DC
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_TOP_DC
+%endif
+
+;-------------------------------------------------------------------------------
+; void ff_pred8x8l_dc_10(pixel *src, int has_topleft, int has_topright,
+;                        ptrdiff_t stride)
+;-------------------------------------------------------------------------------
+;TODO: see if scalar is faster
+%macro PRED8x8L_DC 0
+cglobal pred8x8l_dc_10, 4, 6, 6
+    sub         r0, r3
+    lea         r4, [r0+r3*4]
+    lea         r5, [r3*3]
+    mova        m0, [r0+r3*2-16]
+    punpckhwd   m0, [r0+r3*1-16]
+    mova        m1, [r4+r3*0-16]
+    punpckhwd   m1, [r0+r5*1-16]
+    punpckhdq   m1, m0
+    mova        m2, [r4+r3*2-16]
+    punpckhwd   m2, [r4+r3*1-16]
+    mova        m3, [r4+r3*4-16]
+    punpckhwd   m3, [r4+r5*1-16]
+    punpckhdq   m3, m2
+    punpckhqdq  m3, m1
+    mova        m0, [r0]
+    shr        r1d, 14
+    shr        r2d, 13
+    neg         r1
+    pslldq      m1, m0, 2
+    psrldq      m2, m0, 2
+    pinsrw      m1, [r0+r1], 0
+    pinsrw      m2, [r0+r2+14], 7
+    not         r1
+    and         r1, r3
+    pslldq      m4, m3, 2
+    psrldq      m5, m3, 2
+    pshuflw     m4, m4, 11100101b
+    pinsrw      m5, [r0+r1-2], 7
+    PRED4x4_LOWPASS m3, m4, m5, m3
+    PRED4x4_LOWPASS m0, m2, m1, m0
+    paddw       m0, m3
+    HADDW       m0, m1
+    paddw       m0, [pw_8]
+    psrlw       m0, 4
+    SPLATW      m0, m0
+    mova [r0+r3*1], m0
+    mova [r0+r3*2], m0
+    mova [r0+r5*1], m0
+    mova [r0+r3*4], m0
+    mova [r4+r3*1], m0
+    mova [r4+r3*2], m0
+    mova [r4+r5*1], m0
+    mova [r4+r3*4], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_DC
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_DC
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_vertical_10(pixel *src, int has_topleft, int has_topright,
+;                              ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_VERTICAL 0
+cglobal pred8x8l_vertical_10, 4, 4, 6
+    sub         r0, r3
+    mova        m0, [r0]
+    shr        r1d, 14
+    shr        r2d, 13
+    neg         r1
+    pslldq      m1, m0, 2
+    psrldq      m2, m0, 2
+    pinsrw      m1, [r0+r1], 0
+    pinsrw      m2, [r0+r2+14], 7
+    lea         r1, [r3*3]
+    lea         r2, [r0+r3*4]
+    PRED4x4_LOWPASS m0, m2, m1, m0
+    mova [r0+r3*1], m0
+    mova [r0+r3*2], m0
+    mova [r0+r1*1], m0
+    mova [r0+r3*4], m0
+    mova [r2+r3*1], m0
+    mova [r2+r3*2], m0
+    mova [r2+r1*1], m0
+    mova [r2+r3*4], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_VERTICAL
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_VERTICAL
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_horizontal_10(uint8_t *src, int has_topleft,
+;                                int has_topright, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_HORIZONTAL 0
+cglobal pred8x8l_horizontal_10, 4, 4, 5
+    mova        m0, [r0-16]
+    shr        r1d, 14
+    dec         r1
+    and         r1, r3
+    sub         r1, r3
+    punpckhwd   m0, [r0+r1-16]
+    mova        m1, [r0+r3*2-16]
+    punpckhwd   m1, [r0+r3*1-16]
+    lea         r2, [r0+r3*4]
+    lea         r1, [r3*3]
+    punpckhdq   m1, m0
+    mova        m2, [r2+r3*0-16]
+    punpckhwd   m2, [r0+r1-16]
+    mova        m3, [r2+r3*2-16]
+    punpckhwd   m3, [r2+r3*1-16]
+    punpckhdq   m3, m2
+    punpckhqdq  m3, m1
+    PALIGNR     m4, m3, [r2+r1-16], 14, m0
+    pslldq      m0, m4, 2
+    pshuflw     m0, m0, 11100101b
+    PRED4x4_LOWPASS m4, m3, m0, m4
+    punpckhwd   m3, m4, m4
+    punpcklwd   m4, m4
+    pshufd      m0, m3, 0xff
+    pshufd      m1, m3, 0xaa
+    pshufd      m2, m3, 0x55
+    pshufd      m3, m3, 0x00
+    mova [r0+r3*0], m0
+    mova [r0+r3*1], m1
+    mova [r0+r3*2], m2
+    mova [r0+r1*1], m3
+    pshufd      m0, m4, 0xff
+    pshufd      m1, m4, 0xaa
+    pshufd      m2, m4, 0x55
+    pshufd      m3, m4, 0x00
+    mova [r2+r3*0], m0
+    mova [r2+r3*1], m1
+    mova [r2+r3*2], m2
+    mova [r2+r1*1], m3
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_HORIZONTAL
+INIT_XMM ssse3
+PRED8x8L_HORIZONTAL
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_HORIZONTAL
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_down_left_10(pixel *src, int has_topleft, int has_topright,
+;                               ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_DOWN_LEFT 0
+cglobal pred8x8l_down_left_10, 4, 4, 7
+    sub         r0, r3
+    mova        m3, [r0]
+    shr        r1d, 14
+    neg         r1
+    shr        r2d, 13
+    pslldq      m1, m3, 2
+    psrldq      m2, m3, 2
+    pinsrw      m1, [r0+r1], 0
+    pinsrw      m2, [r0+r2+14], 7
+    PRED4x4_LOWPASS m6, m2, m1, m3
+    jz .fix_tr ; flags from shr r2d
+    mova        m1, [r0+16]
+    psrldq      m5, m1, 2
+    PALIGNR     m2, m1, m3, 14, m3
+    pshufhw     m5, m5, 10100100b
+    PRED4x4_LOWPASS m1, m2, m5, m1
+.do_topright:
+    lea         r1, [r3*3]
+    psrldq      m5, m1, 14
+    lea         r2, [r0+r3*4]
+    PALIGNR     m2, m1, m6,  2, m0
+    PALIGNR     m3, m1, m6, 14, m0
+    PALIGNR     m5, m1,  2, m0
+    pslldq      m4, m6, 2
+    PRED4x4_LOWPASS m6, m4, m2, m6
+    PRED4x4_LOWPASS m1, m3, m5, m1
+    mova [r2+r3*4], m1
+    PALIGNR     m1, m6, 14, m2
+    pslldq      m6, 2
+    mova [r2+r1*1], m1
+    PALIGNR     m1, m6, 14, m2
+    pslldq      m6, 2
+    mova [r2+r3*2], m1
+    PALIGNR     m1, m6, 14, m2
+    pslldq      m6, 2
+    mova [r2+r3*1], m1
+    PALIGNR     m1, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r3*4], m1
+    PALIGNR     m1, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r1*1], m1
+    PALIGNR     m1, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r3*2], m1
+    PALIGNR     m1, m6, 14, m6
+    mova [r0+r3*1], m1
+    RET
+.fix_tr:
+    punpckhwd   m3, m3
+    pshufd      m1, m3, 0xFF
+    jmp .do_topright
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_DOWN_LEFT
+INIT_XMM ssse3
+PRED8x8L_DOWN_LEFT
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_DOWN_LEFT
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_down_right_10(pixel *src, int has_topleft,
+;                                int has_topright, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_DOWN_RIGHT 0
+; standard forbids this when has_topleft is false
+; no need to check
+cglobal pred8x8l_down_right_10, 4, 5, 8
+    sub         r0, r3
+    lea         r4, [r0+r3*4]
+    lea         r1, [r3*3]
+    mova        m0, [r0+r3*1-16]
+    punpckhwd   m0, [r0+r3*0-16]
+    mova        m1, [r0+r1*1-16]
+    punpckhwd   m1, [r0+r3*2-16]
+    punpckhdq   m1, m0
+    mova        m2, [r4+r3*1-16]
+    punpckhwd   m2, [r4+r3*0-16]
+    mova        m3, [r4+r1*1-16]
+    punpckhwd   m3, [r4+r3*2-16]
+    punpckhdq   m3, m2
+    punpckhqdq  m3, m1
+    mova        m0, [r4+r3*4-16]
+    mova        m1, [r0]
+    PALIGNR     m4, m3, m0, 14, m0
+    PALIGNR     m1, m3,  2, m2
+    pslldq      m0, m4, 2
+    pshuflw     m0, m0, 11100101b
+    PRED4x4_LOWPASS m6, m1, m4, m3
+    PRED4x4_LOWPASS m4, m3, m0, m4
+    mova        m3, [r0]
+    shr        r2d, 13
+    pslldq      m1, m3, 2
+    psrldq      m2, m3, 2
+    pinsrw      m1, [r0-2], 0
+    pinsrw      m2, [r0+r2+14], 7
+    PRED4x4_LOWPASS m3, m2, m1, m3
+    PALIGNR     m2, m3, m6,  2, m0
+    PALIGNR     m5, m3, m6, 14, m0
+    psrldq      m7, m3, 2
+    PRED4x4_LOWPASS m6, m4, m2, m6
+    PRED4x4_LOWPASS m3, m5, m7, m3
+    mova [r4+r3*4], m6
+    PALIGNR     m3, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r3*1], m3
+    PALIGNR     m3, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r3*2], m3
+    PALIGNR     m3, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r1*1], m3
+    PALIGNR     m3, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r3*4], m3
+    PALIGNR     m3, m6, 14, m2
+    pslldq      m6, 2
+    mova [r4+r3*1], m3
+    PALIGNR     m3, m6, 14, m2
+    pslldq      m6, 2
+    mova [r4+r3*2], m3
+    PALIGNR     m3, m6, 14, m6
+    mova [r4+r1*1], m3
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_DOWN_RIGHT
+INIT_XMM ssse3
+PRED8x8L_DOWN_RIGHT
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_DOWN_RIGHT
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_vertical_right_10(pixel *src, int has_topleft,
+;                                    int has_topright, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_VERTICAL_RIGHT 0
+; likewise with 8x8l_down_right
+cglobal pred8x8l_vertical_right_10, 4, 5, 7
+    sub         r0, r3
+    lea         r4, [r0+r3*4]
+    lea         r1, [r3*3]
+    mova        m0, [r0+r3*1-16]
+    punpckhwd   m0, [r0+r3*0-16]
+    mova        m1, [r0+r1*1-16]
+    punpckhwd   m1, [r0+r3*2-16]
+    punpckhdq   m1, m0
+    mova        m2, [r4+r3*1-16]
+    punpckhwd   m2, [r4+r3*0-16]
+    mova        m3, [r4+r1*1-16]
+    punpckhwd   m3, [r4+r3*2-16]
+    punpckhdq   m3, m2
+    punpckhqdq  m3, m1
+    mova        m0, [r4+r3*4-16]
+    mova        m1, [r0]
+    PALIGNR     m4, m3, m0, 14, m0
+    PALIGNR     m1, m3,  2, m2
+    PRED4x4_LOWPASS m3, m1, m4, m3
+    mova        m2, [r0]
+    shr        r2d, 13
+    pslldq      m1, m2, 2
+    psrldq      m5, m2, 2
+    pinsrw      m1, [r0-2], 0
+    pinsrw      m5, [r0+r2+14], 7
+    PRED4x4_LOWPASS m2, m5, m1, m2
+    PALIGNR     m6, m2, m3, 12, m1
+    PALIGNR     m5, m2, m3, 14, m0
+    PRED4x4_LOWPASS m0, m6, m2, m5
+    pavgw       m2, m5
+    mova [r0+r3*2], m0
+    mova [r0+r3*1], m2
+    pslldq      m6, m3, 4
+    pslldq      m1, m3, 2
+    PRED4x4_LOWPASS m1, m3, m6, m1
+    PALIGNR     m2, m1, 14, m4
+    mova [r0+r1*1], m2
+    pslldq      m1, 2
+    PALIGNR     m0, m1, 14, m3
+    mova [r0+r3*4], m0
+    pslldq      m1, 2
+    PALIGNR     m2, m1, 14, m4
+    mova [r4+r3*1], m2
+    pslldq      m1, 2
+    PALIGNR     m0, m1, 14, m3
+    mova [r4+r3*2], m0
+    pslldq      m1, 2
+    PALIGNR     m2, m1, 14, m4
+    mova [r4+r1*1], m2
+    pslldq      m1, 2
+    PALIGNR     m0, m1, 14, m1
+    mova [r4+r3*4], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_VERTICAL_RIGHT
+INIT_XMM ssse3
+PRED8x8L_VERTICAL_RIGHT
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_VERTICAL_RIGHT
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_horizontal_up_10(pixel *src, int has_topleft,
+;                                   int has_topright, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_HORIZONTAL_UP 0
+cglobal pred8x8l_horizontal_up_10, 4, 4, 6
+    mova        m0, [r0+r3*0-16]
+    punpckhwd   m0, [r0+r3*1-16]
+    shr        r1d, 14
+    dec         r1
+    and         r1, r3
+    sub         r1, r3
+    mova        m4, [r0+r1*1-16]
+    lea         r1, [r3*3]
+    lea         r2, [r0+r3*4]
+    mova        m1, [r0+r3*2-16]
+    punpckhwd   m1, [r0+r1*1-16]
+    punpckhdq   m0, m1
+    mova        m2, [r2+r3*0-16]
+    punpckhwd   m2, [r2+r3*1-16]
+    mova        m3, [r2+r3*2-16]
+    punpckhwd   m3, [r2+r1*1-16]
+    punpckhdq   m2, m3
+    punpckhqdq  m0, m2
+    PALIGNR     m1, m0, m4, 14, m4
+    psrldq      m2, m0, 2
+    pshufhw     m2, m2, 10100100b
+    PRED4x4_LOWPASS m0, m1, m2, m0
+    psrldq      m1, m0, 2
+    psrldq      m2, m0, 4
+    pshufhw     m1, m1, 10100100b
+    pshufhw     m2, m2, 01010100b
+    pavgw       m4, m0, m1
+    PRED4x4_LOWPASS m1, m2, m0, m1
+    punpckhwd   m5, m4, m1
+    punpcklwd   m4, m1
+    mova [r2+r3*0], m5
+    mova [r0+r3*0], m4
+    pshufd      m0, m5, 11111001b
+    pshufd      m1, m5, 11111110b
+    pshufd      m2, m5, 11111111b
+    mova [r2+r3*1], m0
+    mova [r2+r3*2], m1
+    mova [r2+r1*1], m2
+    PALIGNR     m2, m5, m4, 4, m0
+    PALIGNR     m3, m5, m4, 8, m1
+    PALIGNR     m5, m5, m4, 12, m4
+    mova [r0+r3*1], m2
+    mova [r0+r3*2], m3
+    mova [r0+r1*1], m5
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_HORIZONTAL_UP
+INIT_XMM ssse3
+PRED8x8L_HORIZONTAL_UP
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_HORIZONTAL_UP
+%endif
+
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_vertical_10(pixel *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+%macro MOV16 3-5
+    mova [%1+     0], %2
+    mova [%1+mmsize], %3
+%endmacro
+
+INIT_XMM sse2
+cglobal pred16x16_vertical_10, 2, 3
+    sub   r0, r1
+    mov  r2d, 8
+    mova  m0, [r0+ 0]
+    mova  m1, [r0+mmsize]
+.loop:
+    MOV16 r0+r1*1, m0, m1, m2, m3
+    MOV16 r0+r1*2, m0, m1, m2, m3
+    lea   r0, [r0+r1*2]
+    dec   r2d
+    jg .loop
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_horizontal_10(pixel *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred16x16_horizontal_10, 2, 3
+    mov   r2d, 8
+.vloop:
+    movd   m0, [r0+r1*0-4]
+    movd   m1, [r0+r1*1-4]
+    SPLATW m0, m0, 1
+    SPLATW m1, m1, 1
+    MOV16  r0+r1*0, m0, m0, m0, m0
+    MOV16  r0+r1*1, m1, m1, m1, m1
+    lea    r0, [r0+r1*2]
+    dec    r2d
+    jg .vloop
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_dc_10(pixel *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred16x16_dc_10, 2, 6
+    mov        r5, r0
+    sub        r0, r1
+    mova       m0, [r0+0]
+    paddw      m0, [r0+mmsize]
+    HADDW      m0, m2
+
+    lea        r0, [r0+r1-2]
+    movzx     r3d, word [r0]
+    movzx     r4d, word [r0+r1]
+%rep 7
+    lea        r0, [r0+r1*2]
+    movzx     r2d, word [r0]
+    add       r3d, r2d
+    movzx     r2d, word [r0+r1]
+    add       r4d, r2d
+%endrep
+    lea       r3d, [r3+r4+16]
+
+    movd       m1, r3d
+    paddw      m0, m1
+    psrlw      m0, 5
+    SPLATW     m0, m0
+    mov       r3d, 8
+.loop:
+    MOV16 r5+r1*0, m0, m0, m0, m0
+    MOV16 r5+r1*1, m0, m0, m0, m0
+    lea        r5, [r5+r1*2]
+    dec       r3d
+    jg .loop
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_top_dc_10(pixel *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred16x16_top_dc_10, 2, 3
+    sub        r0, r1
+    mova       m0, [r0+0]
+    paddw      m0, [r0+mmsize]
+    HADDW      m0, m2
+
+    SPLATW     m0, m0
+    paddw      m0, [pw_8]
+    psrlw      m0, 4
+    mov       r2d, 8
+.loop:
+    MOV16 r0+r1*1, m0, m0, m0, m0
+    MOV16 r0+r1*2, m0, m0, m0, m0
+    lea        r0, [r0+r1*2]
+    dec       r2d
+    jg .loop
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_left_dc_10(pixel *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred16x16_left_dc_10, 2, 6
+    mov        r5, r0
+
+    sub        r0, 2
+    movzx     r3d, word [r0]
+    movzx     r4d, word [r0+r1]
+%rep 7
+    lea        r0, [r0+r1*2]
+    movzx     r2d, word [r0]
+    add       r3d, r2d
+    movzx     r2d, word [r0+r1]
+    add       r4d, r2d
+%endrep
+    lea       r3d, [r3+r4+8]
+    shr       r3d, 4
+
+    movd       m0, r3d
+    SPLATW     m0, m0
+    mov       r3d, 8
+.loop:
+    MOV16 r5+r1*0, m0, m0, m0, m0
+    MOV16 r5+r1*1, m0, m0, m0, m0
+    lea        r5, [r5+r1*2]
+    dec       r3d
+    jg .loop
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_128_dc_10(pixel *src, ptrdiff_t stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred16x16_128_dc_10, 2,3
+    mova       m0, [pw_512]
+    mov       r2d, 8
+.loop:
+    MOV16 r0+r1*0, m0, m0, m0, m0
+    MOV16 r0+r1*1, m0, m0, m0, m0
+    lea        r0, [r0+r1*2]
+    dec       r2d
+    jg .loop
+    RET
diff --git a/media/ffvpx/libavcodec/x86/h264_intrapred_init.c b/media/ffvpx/libavcodec/x86/h264_intrapred_init.c
new file mode 100644
index 0000000000..ee46927a24
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/h264_intrapred_init.c
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/codec_id.h"
+#include "libavcodec/h264pred.h"
+
+#define PRED4x4(TYPE, DEPTH, OPT) \
+void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
+                                                    const uint8_t *topright, \
+                                                    ptrdiff_t stride);
+
+PRED4x4(dc, 10, mmxext)
+PRED4x4(down_left, 10, sse2)
+PRED4x4(down_left, 10, avx)
+PRED4x4(down_right, 10, sse2)
+PRED4x4(down_right, 10, ssse3)
+PRED4x4(down_right, 10, avx)
+PRED4x4(vertical_left, 10, sse2)
+PRED4x4(vertical_left, 10, avx)
+PRED4x4(vertical_right, 10, sse2)
+PRED4x4(vertical_right, 10, ssse3)
+PRED4x4(vertical_right, 10, avx)
+PRED4x4(horizontal_up, 10, mmxext)
+PRED4x4(horizontal_down, 10, sse2)
+PRED4x4(horizontal_down, 10, ssse3)
+PRED4x4(horizontal_down, 10, avx)
+
+#define PRED8x8(TYPE, DEPTH, OPT) \
+void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
+                                                    ptrdiff_t stride);
+
+PRED8x8(dc, 10, sse2)
+PRED8x8(top_dc, 10, sse2)
+PRED8x8(plane, 10, sse2)
+PRED8x8(vertical, 10, sse2)
+PRED8x8(horizontal, 10, sse2)
+
+#define PRED8x8L(TYPE, DEPTH, OPT)\
+void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
+                                                     int has_topleft, \
+                                                     int has_topright, \
+                                                     ptrdiff_t stride);
+
+PRED8x8L(dc, 10, sse2)
+PRED8x8L(dc, 10, avx)
+PRED8x8L(128_dc, 10, sse2)
+PRED8x8L(top_dc, 10, sse2)
+PRED8x8L(top_dc, 10, avx)
+PRED8x8L(vertical, 10, sse2)
+PRED8x8L(vertical, 10, avx)
+PRED8x8L(horizontal, 10, sse2)
+PRED8x8L(horizontal, 10, ssse3)
+PRED8x8L(horizontal, 10, avx)
+PRED8x8L(down_left, 10, sse2)
+PRED8x8L(down_left, 10, ssse3)
+PRED8x8L(down_left, 10, avx)
+PRED8x8L(down_right, 10, sse2)
+PRED8x8L(down_right, 10, ssse3)
+PRED8x8L(down_right, 10, avx)
+PRED8x8L(vertical_right, 10, sse2)
+PRED8x8L(vertical_right, 10, ssse3)
+PRED8x8L(vertical_right, 10, avx)
+PRED8x8L(horizontal_up, 10, sse2)
+PRED8x8L(horizontal_up, 10, ssse3)
+PRED8x8L(horizontal_up, 10, avx)
+
+#define PRED16x16(TYPE, DEPTH, OPT)\
+void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
+                                                      ptrdiff_t stride);
+
+PRED16x16(dc, 10, sse2)
+PRED16x16(top_dc, 10, sse2)
+PRED16x16(128_dc, 10, sse2)
+PRED16x16(left_dc, 10, sse2)
+PRED16x16(vertical, 10, sse2)
+PRED16x16(horizontal, 10, sse2)
+
+/* 8-bit versions */
+PRED16x16(vertical, 8, sse)
+PRED16x16(horizontal, 8, mmxext)
+PRED16x16(horizontal, 8, ssse3)
+PRED16x16(dc, 8, sse2)
+PRED16x16(dc, 8, ssse3)
+PRED16x16(plane_h264, 8, sse2)
+PRED16x16(plane_h264, 8, ssse3)
+PRED16x16(plane_rv40, 8, sse2)
+PRED16x16(plane_rv40, 8, ssse3)
+PRED16x16(plane_svq3, 8, sse2)
+PRED16x16(plane_svq3, 8, ssse3)
+PRED16x16(tm_vp8, 8, sse2)
+PRED16x16(tm_vp8, 8, avx2)
+
+PRED8x8(top_dc, 8, mmxext)
+PRED8x8(dc_rv40, 8, mmxext)
+PRED8x8(dc, 8, mmxext)
+PRED8x8(vertical, 8, mmx)
+PRED8x8(horizontal, 8, mmxext)
+PRED8x8(horizontal, 8, ssse3)
+PRED8x8(plane, 8, sse2)
+PRED8x8(plane, 8, ssse3)
+PRED8x8(tm_vp8, 8, sse2)
+PRED8x8(tm_vp8, 8, ssse3)
+
+PRED8x8L(top_dc, 8, mmxext)
+PRED8x8L(top_dc, 8, ssse3)
+PRED8x8L(dc, 8, mmxext)
+PRED8x8L(dc, 8, ssse3)
+PRED8x8L(horizontal, 8, mmxext)
+PRED8x8L(horizontal, 8, ssse3)
+PRED8x8L(vertical, 8, mmxext)
+PRED8x8L(vertical, 8, ssse3)
+PRED8x8L(down_left, 8, sse2)
+PRED8x8L(down_left, 8, ssse3)
+PRED8x8L(down_right, 8, sse2)
+PRED8x8L(down_right, 8, ssse3)
+PRED8x8L(vertical_right, 8, sse2)
+PRED8x8L(vertical_right, 8, ssse3)
+PRED8x8L(vertical_left, 8, sse2)
+PRED8x8L(vertical_left, 8, ssse3)
+PRED8x8L(horizontal_up, 8, mmxext)
+PRED8x8L(horizontal_up, 8, ssse3)
+PRED8x8L(horizontal_down, 8, sse2)
+PRED8x8L(horizontal_down, 8, ssse3)
+
+PRED4x4(dc, 8, mmxext)
+PRED4x4(down_left, 8, mmxext)
+PRED4x4(down_right, 8, mmxext)
+PRED4x4(vertical_left, 8, mmxext)
+PRED4x4(vertical_right, 8, mmxext)
+PRED4x4(horizontal_up, 8, mmxext)
+PRED4x4(horizontal_down, 8, mmxext)
+PRED4x4(tm_vp8, 8, mmxext)
+PRED4x4(tm_vp8, 8, ssse3)
+PRED4x4(vertical_vp8, 8, mmxext)
+
+av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
+                                   const int bit_depth,
+                                   const int chroma_format_idc)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (bit_depth == 8) {
+        if (EXTERNAL_MMX(cpu_flags)) {
+            if (chroma_format_idc <= 1) {
+                h->pred8x8  [VERT_PRED8x8     ] = ff_pred8x8_vertical_8_mmx;
+            }
+        }
+
+        if (EXTERNAL_MMXEXT(cpu_flags)) {
+            h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_8_mmxext;
+            if (chroma_format_idc <= 1)
+                h->pred8x8[HOR_PRED8x8          ] = ff_pred8x8_horizontal_8_mmxext;
+            h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_8_mmxext;
+            h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_8_mmxext;
+            h->pred8x8l [HOR_PRED               ] = ff_pred8x8l_horizontal_8_mmxext;
+            h->pred8x8l [VERT_PRED              ] = ff_pred8x8l_vertical_8_mmxext;
+            h->pred8x8l [HOR_UP_PRED            ] = ff_pred8x8l_horizontal_up_8_mmxext;
+            h->pred4x4  [DIAG_DOWN_RIGHT_PRED   ] = ff_pred4x4_down_right_8_mmxext;
+            h->pred4x4  [VERT_RIGHT_PRED        ] = ff_pred4x4_vertical_right_8_mmxext;
+            h->pred4x4  [HOR_DOWN_PRED          ] = ff_pred4x4_horizontal_down_8_mmxext;
+            h->pred4x4  [DC_PRED                ] = ff_pred4x4_dc_8_mmxext;
+            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8 ||
+                codec_id == AV_CODEC_ID_H264) {
+                h->pred4x4  [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_8_mmxext;
+            }
+            if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
+                h->pred4x4  [VERT_LEFT_PRED     ] = ff_pred4x4_vertical_left_8_mmxext;
+            }
+            if (codec_id != AV_CODEC_ID_RV40) {
+                h->pred4x4  [HOR_UP_PRED        ] = ff_pred4x4_horizontal_up_8_mmxext;
+            }
+            if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
+                if (chroma_format_idc <= 1) {
+                    h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_8_mmxext;
+                    h->pred8x8[DC_PRED8x8       ] = ff_pred8x8_dc_8_mmxext;
+                }
+            }
+            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
+                h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_rv40_8_mmxext;
+                h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_8_mmxext;
+                h->pred4x4  [VERT_PRED          ] = ff_pred4x4_vertical_vp8_8_mmxext;
+            }
+        }
+
+        if (EXTERNAL_SSE(cpu_flags)) {
+            h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_sse;
+        }
+
+        if (EXTERNAL_SSE2(cpu_flags)) {
+            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_sse2;
+            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_sse2;
+            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2;
+            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_sse2;
+            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_sse2;
+            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_sse2;
+            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
+                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_sse2;
+                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_sse2;
+            } else {
+                if (chroma_format_idc <= 1)
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_sse2;
+                if (codec_id == AV_CODEC_ID_SVQ3) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_sse2;
+                } else if (codec_id == AV_CODEC_ID_RV40) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_sse2;
+                } else {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_sse2;
+                }
+            }
+        }
+
+        if (EXTERNAL_SSSE3(cpu_flags)) {
+            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_ssse3;
+            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_ssse3;
+            if (chroma_format_idc <= 1)
+                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_ssse3;
+            h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_8_ssse3;
+            h->pred8x8l [DC_PRED              ] = ff_pred8x8l_dc_8_ssse3;
+            h->pred8x8l [HOR_PRED             ] = ff_pred8x8l_horizontal_8_ssse3;
+            h->pred8x8l [VERT_PRED            ] = ff_pred8x8l_vertical_8_ssse3;
+            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_ssse3;
+            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_ssse3;
+            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_ssse3;
+            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_ssse3;
+            h->pred8x8l [HOR_UP_PRED          ] = ff_pred8x8l_horizontal_up_8_ssse3;
+            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_ssse3;
+            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
+                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_ssse3;
+                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_8_ssse3;
+            } else {
+                if (chroma_format_idc <= 1)
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_ssse3;
+                if (codec_id == AV_CODEC_ID_SVQ3) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_ssse3;
+                } else if (codec_id == AV_CODEC_ID_RV40) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_ssse3;
+                } else {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_ssse3;
+                }
+            }
+        }
+
+        if(EXTERNAL_AVX2(cpu_flags)){
+            if (codec_id == AV_CODEC_ID_VP8) {
+                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_avx2;
+            }
+        }
+    } else if (bit_depth == 10) {
+        if (EXTERNAL_MMXEXT(cpu_flags)) {
+            h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext;
+            h->pred4x4[HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_10_mmxext;
+        }
+        if (EXTERNAL_SSE2(cpu_flags)) {
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
+            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2;
+            h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_sse2;
+            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_sse2;
+            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_sse2;
+
+            if (chroma_format_idc <= 1) {
+                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_sse2;
+                h->pred8x8[TOP_DC_PRED8x8  ] = ff_pred8x8_top_dc_10_sse2;
+                h->pred8x8[PLANE_PRED8x8   ] = ff_pred8x8_plane_10_sse2;
+                h->pred8x8[VERT_PRED8x8    ] = ff_pred8x8_vertical_10_sse2;
+                h->pred8x8[HOR_PRED8x8     ] = ff_pred8x8_horizontal_10_sse2;
+            }
+
+            h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_sse2;
+            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_sse2;
+            h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_sse2;
+            h->pred8x8l[DC_128_PRED         ] = ff_pred8x8l_128_dc_10_sse2;
+            h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_sse2;
+            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_sse2;
+            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_sse2;
+            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_sse2;
+            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_sse2;
+
+            h->pred16x16[DC_PRED8x8        ] = ff_pred16x16_dc_10_sse2;
+            h->pred16x16[TOP_DC_PRED8x8    ] = ff_pred16x16_top_dc_10_sse2;
+            h->pred16x16[DC_128_PRED8x8    ] = ff_pred16x16_128_dc_10_sse2;
+            h->pred16x16[LEFT_DC_PRED8x8   ] = ff_pred16x16_left_dc_10_sse2;
+            h->pred16x16[VERT_PRED8x8      ] = ff_pred16x16_vertical_10_sse2;
+            h->pred16x16[HOR_PRED8x8       ] = ff_pred16x16_horizontal_10_sse2;
+        }
+        if (EXTERNAL_SSSE3(cpu_flags)) {
+            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
+            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_ssse3;
+            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_ssse3;
+
+            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_ssse3;
+            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_ssse3;
+            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_ssse3;
+            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_ssse3;
+            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_ssse3;
+        }
+        if (EXTERNAL_AVX(cpu_flags)) {
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
+            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
+            h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_avx;
+            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_avx;
+            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_avx;
+
+            h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_avx;
+            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_avx;
+            h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_avx;
+            h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_avx;
+            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_avx;
+            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_avx;
+            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_avx;
+            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_avx;
+        }
+    }
+}
diff --git a/media/ffvpx/libavcodec/x86/idctdsp.asm b/media/ffvpx/libavcodec/x86/idctdsp.asm
new file mode 100644
index 0000000000..1cfdb5419d
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/idctdsp.asm
@@ -0,0 +1,112 @@
+;******************************************************************************
+;* SIMD-optimized IDCT-related routines
+;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2003-2013 Michael Niedermayer
+;* Copyright (c) 2013 Daniel Kang
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+cextern pb_80
+
+SECTION .text
+
+;--------------------------------------------------------------------------
+;void ff_put_signed_pixels_clamped(const int16_t *block, uint8_t *pixels,
+;                                  ptrdiff_t line_size)
+;--------------------------------------------------------------------------
+
+%macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1
+    mova     m1, [blockq+mmsize*0+%1]
+    mova     m2, [blockq+mmsize*2+%1]
+    packsswb m1, [blockq+mmsize*1+%1]
+    packsswb m2, [blockq+mmsize*3+%1]
+    paddb    m1, m0
+    paddb    m2, m0
+    movq     [pixelsq+lsizeq*0], m1
+    movhps   [pixelsq+lsizeq*1], m1
+    movq     [pixelsq+lsizeq*2], m2
+    movhps   [pixelsq+lsize3q ], m2
+%endmacro
+
+INIT_XMM sse2
+cglobal put_signed_pixels_clamped, 3, 4, 3, block, pixels, lsize, lsize3
+    mova     m0, [pb_80]
+    lea      lsize3q, [lsizeq*3]
+    PUT_SIGNED_PIXELS_CLAMPED_HALF 0
+    lea      pixelsq, [pixelsq+lsizeq*4]
+    PUT_SIGNED_PIXELS_CLAMPED_HALF 64
+    RET
+
+;--------------------------------------------------------------------------
+; void ff_put_pixels_clamped(const int16_t *block, uint8_t *pixels,
+;                            ptrdiff_t line_size);
+;--------------------------------------------------------------------------
+; %1 = block offset
+%macro PUT_PIXELS_CLAMPED_HALF 1
+    mova     m0, [blockq+mmsize*0+%1]
+    mova     m1, [blockq+mmsize*2+%1]
+    packuswb m0, [blockq+mmsize*1+%1]
+    packuswb m1, [blockq+mmsize*3+%1]
+    movq           [pixelsq], m0
+    movhps  [lsizeq+pixelsq], m0
+    movq  [2*lsizeq+pixelsq], m1
+    movhps [lsize3q+pixelsq], m1
+%endmacro
+
+INIT_XMM sse2
+cglobal put_pixels_clamped, 3, 4, 2, block, pixels, lsize, lsize3
+    lea lsize3q, [lsizeq*3]
+    PUT_PIXELS_CLAMPED_HALF 0
+    lea pixelsq, [pixelsq+lsizeq*4]
+    PUT_PIXELS_CLAMPED_HALF 64
+    RET
+
+;--------------------------------------------------------------------------
+; void ff_add_pixels_clamped(const int16_t *block, uint8_t *pixels,
+;                            ptrdiff_t line_size);
+;--------------------------------------------------------------------------
+; %1 = block offset
+%macro ADD_PIXELS_CLAMPED 1
+    mova       m0, [blockq+mmsize*0+%1]
+    mova       m1, [blockq+mmsize*1+%1]
+    movq       m2, [pixelsq]
+    movq       m3, [pixelsq+lsizeq]
+    punpcklbw  m2, m4
+    punpcklbw  m3, m4
+    paddsw     m0, m2
+    paddsw     m1, m3
+    packuswb   m0, m1
+    movq       [pixelsq], m0
+    movhps     [pixelsq+lsizeq], m0
+%endmacro
+
+INIT_XMM sse2
+cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
+    pxor       m4, m4
+    ADD_PIXELS_CLAMPED 0
+    lea        pixelsq, [pixelsq+lsizeq*2]
+    ADD_PIXELS_CLAMPED 32
+    lea        pixelsq, [pixelsq+lsizeq*2]
+    ADD_PIXELS_CLAMPED 64
+    lea        pixelsq, [pixelsq+lsizeq*2]
+    ADD_PIXELS_CLAMPED 96
+    RET
diff --git a/media/ffvpx/libavcodec/x86/idctdsp.h b/media/ffvpx/libavcodec/x86/idctdsp.h
new file mode 100644
index 0000000000..738e4e36e4
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/idctdsp.h
@@ -0,0 +1,33 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_IDCTDSP_H
+#define AVCODEC_X86_IDCTDSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
+                                ptrdiff_t line_size);
+void ff_put_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
+                                ptrdiff_t line_size);
+void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
+                                       ptrdiff_t line_size);
+
+
+#endif /* AVCODEC_X86_IDCTDSP_H */
diff --git a/media/ffvpx/libavcodec/x86/idctdsp_init.c b/media/ffvpx/libavcodec/x86/idctdsp_init.c
new file mode 100644
index 0000000000..f28a1ad744
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/idctdsp_init.c
@@ -0,0 +1,159 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/idctdsp.h"
+#include "idctdsp.h"
+#include "simple_idct.h"
+
+/* Input permutation for the simple_idct_mmx */
+static const uint8_t simple_mmx_permutation[64] = {
+    0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
+    0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
+    0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
+    0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
+    0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
+    0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
+    0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
+    0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
+};
+
+static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
+
+av_cold int ff_init_scantable_permutation_x86(uint8_t *idct_permutation,
+                                              enum idct_permutation_type perm_type)
+{
+    int i;
+
+    switch (perm_type) {
+    case FF_IDCT_PERM_SIMPLE:
+        for (i = 0; i < 64; i++)
+            idct_permutation[i] = simple_mmx_permutation[i];
+        return 1;
+    case FF_IDCT_PERM_SSE2:
+        for (i = 0; i < 64; i++)
+            idct_permutation[i] = (i & 0x38) | idct_sse2_row_perm[i & 7];
+        return 1;
+    }
+
+    return 0;
+}
+
+av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
+                                 unsigned high_bit_depth)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+#if ARCH_X86_32
+    if (EXTERNAL_MMX(cpu_flags)) {
+        if (!high_bit_depth &&
+            avctx->lowres == 0 &&
+            (avctx->idct_algo == FF_IDCT_AUTO ||
+                avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+                avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
+                c->idct      = ff_simple_idct_mmx;
+        }
+    }
+#endif
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
+        c->put_pixels_clamped        = ff_put_pixels_clamped_sse2;
+        c->add_pixels_clamped        = ff_add_pixels_clamped_sse2;
+
+#if ARCH_X86_32
+        if (!high_bit_depth &&
+            avctx->lowres == 0 &&
+            (avctx->idct_algo == FF_IDCT_AUTO ||
+                avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+                avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
+                c->idct_put  = ff_simple_idct_put_sse2;
+                c->idct_add  = ff_simple_idct_add_sse2;
+                c->perm_type = FF_IDCT_PERM_SIMPLE;
+        }
+#endif
+
+        if (ARCH_X86_64 &&
+            !high_bit_depth &&
+            avctx->lowres == 0 &&
+            (avctx->idct_algo == FF_IDCT_AUTO ||
+                avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+                avctx->idct_algo == FF_IDCT_SIMPLEMMX ||
+                avctx->idct_algo == FF_IDCT_SIMPLE)) {
+                c->idct      = ff_simple_idct8_sse2;
+                c->idct_put  = ff_simple_idct8_put_sse2;
+                c->idct_add  = ff_simple_idct8_add_sse2;
+                c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+        }
+    }
+
+    if (ARCH_X86_64 && avctx->lowres == 0) {
+        if (EXTERNAL_AVX(cpu_flags) &&
+            !high_bit_depth &&
+            (avctx->idct_algo == FF_IDCT_AUTO ||
+                avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+                avctx->idct_algo == FF_IDCT_SIMPLEMMX ||
+                avctx->idct_algo == FF_IDCT_SIMPLE)) {
+                c->idct      = ff_simple_idct8_avx;
+                c->idct_put  = ff_simple_idct8_put_avx;
+                c->idct_add  = ff_simple_idct8_add_avx;
+                c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+        }
+
+        if (avctx->bits_per_raw_sample == 10 &&
+            avctx->codec_id != AV_CODEC_ID_MPEG4 &&
+            (avctx->idct_algo == FF_IDCT_AUTO ||
+             avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+             avctx->idct_algo == FF_IDCT_SIMPLE)) {
+            if (EXTERNAL_SSE2(cpu_flags)) {
+                c->idct_put  = ff_simple_idct10_put_sse2;
+                c->idct_add  = NULL;
+                c->idct      = ff_simple_idct10_sse2;
+                c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+
+            }
+            if (EXTERNAL_AVX(cpu_flags)) {
+                c->idct_put  = ff_simple_idct10_put_avx;
+                c->idct_add  = NULL;
+                c->idct      = ff_simple_idct10_avx;
+                c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+            }
+        }
+
+        if (avctx->bits_per_raw_sample == 12 &&
+            (avctx->idct_algo == FF_IDCT_AUTO ||
+             avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
+            if (EXTERNAL_SSE2(cpu_flags)) {
+                c->idct_put  = ff_simple_idct12_put_sse2;
+                c->idct_add  = NULL;
+                c->idct      = ff_simple_idct12_sse2;
+                c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+            }
+            if (EXTERNAL_AVX(cpu_flags)) {
+                c->idct_put  = ff_simple_idct12_put_avx;
+                c->idct_add  = NULL;
+                c->idct      = ff_simple_idct12_avx;
+                c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+            }
+        }
+    }
+}
diff --git a/media/ffvpx/libavcodec/x86/imdct36.asm b/media/ffvpx/libavcodec/x86/imdct36.asm
new file mode 100644
index 0000000000..888c6bf4d6
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/imdct36.asm
@@ -0,0 +1,736 @@
+;******************************************************************************
+;* 36 point SSE-optimized IMDCT transform
+;* Copyright (c) 2011 Vitor Sessak
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+ps_mask:  dd 0, ~0, ~0, ~0
+ps_mask2: dd 0, ~0,  0, ~0
+ps_mask3: dd 0,  0,  0, ~0
+ps_mask4: dd 0, ~0,  0,  0
+
+ps_val1:  dd          -0.5,          -0.5, -0.8660254038, -0.8660254038
+ps_val2:  dd           1.0,           1.0,  0.8660254038,  0.8660254038
+ps_val3:  dd  0.1736481777,  0.1736481777,  0.3420201433,  0.3420201433
+ps_val4:  dd -0.7660444431, -0.7660444431,  0.8660254038,  0.8660254038
+ps_val5:  dd -0.9396926208, -0.9396926208, -0.9848077530, -0.9848077530
+ps_val6:  dd           0.5,           0.5, -0.6427876097, -0.6427876097
+ps_val7:  dd           1.0,           1.0, -0.6427876097, -0.6427876097
+
+ps_p1p1m1m1: dd 0,          0, 0x80000000, 0x80000000
+ps_p1m1p1m1: dd 0, 0x80000000,          0, 0x80000000
+
+ps_cosh:       dd 1.0, 0.50190991877167369479,  1.0,  5.73685662283492756461
+               dd 1.0, 0.51763809020504152469,  1.0,  1.93185165257813657349
+               dd 1.0, 0.55168895948124587824, -1.0, -1.18310079157624925896
+               dd 1.0, 0.61038729438072803416, -1.0, -0.87172339781054900991
+               dd 1.0, 0.70710678118654752439,  0.0,  0.0
+
+ps_cosh_sse3:  dd 1.0, -0.50190991877167369479,  1.0, -5.73685662283492756461
+               dd 1.0, -0.51763809020504152469,  1.0, -1.93185165257813657349
+               dd 1.0, -0.55168895948124587824, -1.0,  1.18310079157624925896
+               dd 1.0, -0.61038729438072803416, -1.0,  0.87172339781054900991
+               dd 1.0, -0.70710678118654752439,  0.0,  0.0
+
+costabs:  times 4 dd  0.98480773
+          times 4 dd  0.93969262
+          times 4 dd  0.86602539
+          times 4 dd -0.76604444
+          times 4 dd -0.64278764
+          times 4 dd  0.50000000
+          times 4 dd -0.50000000
+          times 4 dd -0.34202015
+          times 4 dd -0.17364818
+          times 4 dd  0.50190992
+          times 4 dd  0.51763808
+          times 4 dd  0.55168896
+          times 4 dd  0.61038726
+          times 4 dd  0.70710677
+          times 4 dd  0.87172341
+          times 4 dd  1.18310082
+          times 4 dd  1.93185163
+          times 4 dd  5.73685646
+
+%define SBLIMIT 32
+SECTION .text
+
+%macro PSHUFD 3
+%if cpuflag(sse2) && notcpuflag(avx)
+    pshufd %1, %2, %3
+%else
+    shufps %1, %2, %2, %3
+%endif
+%endmacro
+
+; input  %2={x1,x2,x3,x4}, %3={y1,y2,y3,y4}
+; output %1={x3,x4,y1,y2}
+%macro BUILDINVHIGHLOW 3
+%if cpuflag(avx)
+    shufps %1, %2, %3, 0x4e
+%else
+    movlhps %1, %3
+    movhlps %1, %2
+%endif
+%endmacro
+
+; input  %2={x1,x2,x3,x4}, %3={y1,y2,y3,y4}
+; output %1={x4,y1,y2,y3}
+%macro ROTLEFT 3
+%if cpuflag(ssse3)
+    palignr  %1, %3, %2, 12
+%else
+    BUILDINVHIGHLOW %1, %2, %3
+    shufps  %1, %1, %3, 0x99
+%endif
+%endmacro
+
+%macro INVERTHL 2
+%if cpuflag(sse2)
+    PSHUFD  %1, %2, 0x4e
+%else
+    movhlps %1, %2
+    movlhps %1, %2
+%endif
+%endmacro
+
+%macro BUTTERF 3
+    INVERTHL %2, %1
+    xorps    %1, [ps_p1p1m1m1]
+    addps    %1, %2
+%if cpuflag(sse3)
+    mulps    %1, %1, [ps_cosh_sse3 + %3]
+    PSHUFD   %2, %1, 0xb1
+    addsubps %1, %1, %2
+%else
+    mulps    %1, [ps_cosh + %3]
+    PSHUFD   %2, %1, 0xb1
+    xorps    %1, [ps_p1m1p1m1]
+    addps    %1, %2
+%endif
+%endmacro
+
+%macro BUTTERF2 3
+%if cpuflag(sse3)
+    mulps    %1, %1, [ps_cosh_sse3 + %3]
+    PSHUFD   %2, %1, 0xe1
+    addsubps %1, %1, %2
+%else
+    mulps    %1, [ps_cosh + %3]
+    PSHUFD   %2, %1, 0xe1
+    xorps    %1, [ps_p1m1p1m1]
+    addps    %1, %2
+%endif
+%endmacro
+
+%macro STORE 4
+%if cpuflag(sse4)
+    movss     [%3       ], %1
+    extractps dword [%3 +   %4], %1, 1
+    extractps dword [%3 + 2*%4], %1, 2
+    extractps dword [%3 + 3*%4], %1, 3
+%else
+    movhlps %2, %1
+    movss   [%3       ], %1
+    movss   [%3 + 2*%4], %2
+    shufps  %1, %1, 0xb1
+    movss   [%3 +   %4], %1
+    movhlps %2, %1
+    movss   [%3 + 3*%4], %2
+%endif
+%endmacro
+
+%macro LOAD 4
+    movlps  %1, [%3       ]
+    movhps  %1, [%3 +   %4]
+    movlps  %2, [%3 + 2*%4]
+    movhps  %2, [%3 + 3*%4]
+    shufps  %1, %2, 0x88
+%endmacro
+
+%macro LOADA64 2
+%if cpuflag(avx)
+   movu     %1, [%2]
+%else
+   movlps   %1, [%2]
+   movhps   %1, [%2 + 8]
+%endif
+%endmacro
+
+%macro DEFINE_IMDCT 0
+cglobal imdct36_float, 4,4,9, out, buf, in, win
+
+    ; for(i=17;i>=1;i--) in[i] += in[i-1];
+    LOADA64 m0, inq
+    LOADA64 m1, inq + 16
+
+    ROTLEFT m5, m0, m1
+
+    PSHUFD  m6, m0, 0x93
+    andps   m6, m6, [ps_mask]
+    addps   m0, m0, m6
+
+    LOADA64 m2, inq + 32
+
+    ROTLEFT m7, m1, m2
+
+    addps   m1, m1, m5
+    LOADA64 m3, inq + 48
+
+    ROTLEFT m5, m2, m3
+
+    xorps   m4, m4, m4
+    movlps  m4, [inq+64]
+    BUILDINVHIGHLOW m6, m3, m4
+    shufps  m6, m6, m4, 0xa9
+
+    addps   m4, m4, m6
+    addps   m2, m2, m7
+    addps   m3, m3, m5
+
+    ; for(i=17;i>=3;i-=2) in[i] += in[i-2];
+    movlhps m5, m5, m0
+    andps   m5, m5, [ps_mask3]
+
+    BUILDINVHIGHLOW m7, m0, m1
+    andps   m7, m7, [ps_mask2]
+
+    addps   m0, m0, m5
+
+    BUILDINVHIGHLOW m6, m1, m2
+    andps   m6, m6, [ps_mask2]
+
+    addps  m1, m1, m7
+
+    BUILDINVHIGHLOW m7, m2, m3
+    andps   m7, m7, [ps_mask2]
+
+    addps   m2, m2, m6
+
+    movhlps m6, m6, m3
+    andps   m6, m6, [ps_mask4]
+
+    addps  m3, m3, m7
+    addps  m4, m4, m6
+
+    ; Populate tmp[]
+    movlhps m6, m1, m5    ; zero out high values
+    subps   m6, m6, m4
+
+    subps  m5, m0, m3
+
+%if ARCH_X86_64
+    SWAP   m5, m8
+%endif
+
+    mulps  m7, m2, [ps_val1]
+
+%if ARCH_X86_64
+    mulps  m5, m8, [ps_val2]
+%else
+    mulps  m5, m5, [ps_val2]
+%endif
+    addps  m7, m7, m5
+
+    mulps  m5, m6, [ps_val1]
+    subps  m7, m7, m5
+
+%if ARCH_X86_64
+    SWAP   m5, m8
+%else
+    subps  m5, m0, m3
+%endif
+
+    subps  m5, m5, m6
+    addps  m5, m5, m2
+
+    shufps m6, m4, m3, 0xe4
+    subps  m6, m6, m2
+    mulps  m6, m6, [ps_val3]
+
+    addps  m4, m4, m1
+    mulps  m4, m4, [ps_val4]
+
+    shufps m1, m1, m0, 0xe4
+    addps  m1, m1, m2
+    mulps  m1, m1, [ps_val5]
+
+    mulps  m3, m3, [ps_val6]
+    mulps  m0, m0, [ps_val7]
+    addps  m0, m0, m3
+
+    xorps  m2, m1, [ps_p1p1m1m1]
+    subps  m2, m2, m4
+    addps  m2, m2, m0
+
+    addps  m3, m4, m0
+    subps  m3, m3, m6
+    xorps  m3, m3, [ps_p1p1m1m1]
+
+    shufps m0, m0, m4, 0xe4
+    subps  m0, m0, m1
+    addps  m0, m0, m6
+
+    BUILDINVHIGHLOW m4, m2, m3
+    shufps  m3, m3, m2, 0x4e
+
+    ; we have tmp = {SwAPLH(m0), SwAPLH(m7), m3, m4, m5}
+
+    BUTTERF  m0, m1, 0
+    BUTTERF  m7, m2, 16
+    BUTTERF  m3, m6, 32
+    BUTTERF  m4, m1, 48
+    BUTTERF2 m5, m1, 64
+
+    ; permutates:
+    ; m0    0  1  2  3     =>     2  6 10 14   m1
+    ; m7    4  5  6  7     =>     3  7 11 15   m2
+    ; m3    8  9 10 11     =>    17 13  9  5   m3
+    ; m4   12 13 14 15     =>    16 12  8  4   m5
+    ; m5   16 17 xx xx     =>     0  1 xx xx   m0
+
+    unpckhps m1, m0, m7
+    unpckhps m6, m3, m4
+    movhlps  m2, m6, m1
+    movlhps  m1, m1, m6
+
+    unpcklps m5, m5, m4
+    unpcklps m3, m3, m7
+    movhlps  m4, m3, m5
+    movlhps  m5, m5, m3
+    SWAP m4, m3
+    ; permutation done
+
+    PSHUFD  m6, m2, 0xb1
+    movss   m4, [bufq + 4*68]
+    movss   m7, [bufq + 4*64]
+    unpcklps  m7, m7, m4
+    mulps   m6, m6, [winq + 16*4]
+    addps   m6, m6, m7
+    movss   [outq + 64*SBLIMIT], m6
+    shufps  m6, m6, m6, 0xb1
+    movss   [outq + 68*SBLIMIT], m6
+
+    mulps   m6, m3, [winq + 4*4]
+    LOAD    m4, m7, bufq + 4*16, 16
+    addps   m6, m6, m4
+    STORE   m6, m7, outq + 16*SBLIMIT, 4*SBLIMIT
+
+    shufps  m4, m0, m3, 0xb5
+    mulps   m4, m4, [winq + 8*4]
+    LOAD    m7, m6, bufq + 4*32, 16
+    addps   m4, m4, m7
+    STORE   m4, m6, outq + 32*SBLIMIT, 4*SBLIMIT
+
+    shufps  m3, m3, m2, 0xb1
+    mulps   m3, m3, [winq + 12*4]
+    LOAD    m7, m6, bufq + 4*48, 16
+    addps   m3, m3, m7
+    STORE   m3, m7, outq + 48*SBLIMIT, 4*SBLIMIT
+
+    mulps   m2, m2, [winq]
+    LOAD    m6, m7, bufq, 16
+    addps   m2, m2, m6
+    STORE   m2, m7, outq, 4*SBLIMIT
+
+    mulps    m4, m1, [winq + 20*4]
+    STORE    m4, m7, bufq, 16
+
+    mulps    m3, m5, [winq + 24*4]
+    STORE    m3, m7, bufq + 4*16, 16
+
+    shufps   m0, m0, m5, 0xb0
+    mulps    m0, m0, [winq + 28*4]
+    STORE    m0, m7, bufq + 4*32, 16
+
+    shufps   m5, m5, m1, 0xb1
+    mulps    m5, m5, [winq + 32*4]
+    STORE    m5, m7, bufq + 4*48, 16
+
+    shufps   m1, m1, m1, 0xb1
+    mulps    m1, m1, [winq + 36*4]
+    movss    [bufq + 4*64], m1
+    shufps   m1, m1, 0xb1
+    movss    [bufq + 4*68], m1
+    RET
+%endmacro
+
+INIT_XMM sse2
+DEFINE_IMDCT
+
+INIT_XMM sse3
+DEFINE_IMDCT
+
+INIT_XMM ssse3
+DEFINE_IMDCT
+
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+DEFINE_IMDCT
+%endif
+
+INIT_XMM sse
+
+%if ARCH_X86_64
+%define SPILL SWAP
+%define UNSPILL SWAP
+%define SPILLED(x) m %+ x
+%else
+%define SPILLED(x) [tmpq+(x-8)*16 + 32*4]
+%macro SPILL 2 ; xmm#, mempos
+    movaps SPILLED(%2), m%1
+%endmacro
+%macro UNSPILL 2
+    movaps m%1, SPILLED(%2)
+%endmacro
+%endif
+
+%macro DEFINE_FOUR_IMDCT 0
+cglobal four_imdct36_float, 5,5,16, out, buf, in, win, tmp
+    movlps  m0, [inq+64]
+    movhps  m0, [inq+64 +   72]
+    movlps  m3, [inq+64 + 2*72]
+    movhps  m3, [inq+64 + 3*72]
+
+    shufps  m5, m0, m3, 0xdd
+    shufps  m0, m0, m3, 0x88
+
+    mova     m1, [inq+48]
+    movu     m6, [inq+48 +   72]
+    mova     m7, [inq+48 + 2*72]
+    movu     m3, [inq+48 + 3*72]
+
+    TRANSPOSE4x4PS 1, 6, 7, 3, 4
+
+    addps   m4, m6, m7
+    mova    [tmpq+4*28], m4
+
+    addps    m7, m3
+    addps    m6, m1
+    addps    m3, m0
+    addps    m0, m5
+    addps    m0, m7
+    addps    m7, m6
+    mova    [tmpq+4*12], m7
+    SPILL   3, 12
+
+    mova     m4, [inq+32]
+    movu     m5, [inq+32 +   72]
+    mova     m2, [inq+32 + 2*72]
+    movu     m7, [inq+32 + 3*72]
+
+    TRANSPOSE4x4PS 4, 5, 2, 7, 3
+
+    addps   m1, m7
+    SPILL   1, 11
+
+    addps   m3, m5, m2
+    SPILL   3, 13
+
+    addps    m7, m2
+    addps    m5, m4
+    addps    m6, m7
+    mova    [tmpq], m6
+    addps   m7, m5
+    mova    [tmpq+4*16], m7
+
+    mova    m2, [inq+16]
+    movu    m7, [inq+16 +   72]
+    mova    m1, [inq+16 + 2*72]
+    movu    m6, [inq+16 + 3*72]
+
+    TRANSPOSE4x4PS 2, 7, 1, 6, 3
+
+    addps   m4, m6
+    addps   m6, m1
+    addps   m1, m7
+    addps   m7, m2
+    addps   m5, m6
+    SPILL   5, 15
+    addps   m6, m7
+    mulps   m6, [costabs + 16*2]
+    mova    [tmpq+4*8], m6
+    SPILL   1, 10
+    SPILL   0, 14
+
+    mova    m1, [inq]
+    movu    m6, [inq +   72]
+    mova    m3, [inq + 2*72]
+    movu    m5, [inq + 3*72]
+
+    TRANSPOSE4x4PS 1, 6, 3, 5, 0
+
+    addps    m2, m5
+    addps    m5, m3
+    addps    m7, m5
+    addps    m3, m6
+    addps    m6, m1
+    SPILL    7, 8
+    addps    m5, m6
+    SPILL    6, 9
+    addps    m6, m4, SPILLED(12)
+    subps    m6, m2
+    UNSPILL  7, 11
+    SPILL    5, 11
+    subps    m5, m1, m7
+    mulps    m7, [costabs + 16*5]
+    addps    m7, m1
+    mulps    m0, m6, [costabs + 16*6]
+    addps    m0, m5
+    mova     [tmpq+4*24], m0
+    addps    m6, m5
+    mova     [tmpq+4*4], m6
+    addps    m6, m4, m2
+    mulps    m6, [costabs + 16*1]
+    subps    m4, SPILLED(12)
+    mulps    m4, [costabs + 16*8]
+    addps    m2, SPILLED(12)
+    mulps    m2, [costabs + 16*3]
+    subps    m5, m7, m6
+    subps    m5, m2
+    addps    m6, m7
+    addps    m6, m4
+    addps    m7, m2
+    subps    m7, m4
+    mova     [tmpq+4*20], m7
+    mova     m2, [tmpq+4*28]
+    mova     [tmpq+4*28], m5
+    UNSPILL  7, 13
+    subps    m5, m7, m2
+    mulps    m5, [costabs + 16*7]
+    UNSPILL  1, 10
+    mulps    m1, [costabs + 16*2]
+    addps    m4, m3, m2
+    mulps    m4, [costabs + 16*4]
+    addps    m2, m7
+    addps    m7, m3
+    mulps    m7, [costabs]
+    subps    m3, m2
+    mulps    m3, [costabs + 16*2]
+    addps    m2, m7, m5
+    addps    m2, m1
+    SPILL    2, 10
+    addps    m7, m4
+    subps    m7, m1
+    SPILL    7, 12
+    subps    m5, m4
+    subps    m5, m1
+    UNSPILL  0, 14
+    SPILL    5, 13
+    addps    m1, m0, SPILLED(15)
+    subps    m1, SPILLED(8)
+    mova     m4, [costabs + 16*5]
+    mulps    m4, [tmpq]
+    UNSPILL  2, 9
+    addps    m4, m2
+    subps    m2, [tmpq]
+    mulps    m5, m1, [costabs + 16*6]
+    addps    m5, m2
+    SPILL    5, 9
+    addps    m2, m1
+    SPILL    2, 14
+    UNSPILL  5, 15
+    subps    m7, m5, m0
+    addps    m5, SPILLED(8)
+    mulps    m5, [costabs + 16*1]
+    mulps    m7, [costabs + 16*8]
+    addps    m0, SPILLED(8)
+    mulps    m0, [costabs + 16*3]
+    subps    m2, m4, m5
+    subps    m2, m0
+    SPILL    2, 15
+    addps    m5, m4
+    addps    m5, m7
+    addps    m4, m0
+    subps    m4, m7
+    SPILL    4, 8
+    mova     m7, [tmpq+4*16]
+    mova     m2, [tmpq+4*12]
+    addps    m0, m7, m2
+    subps    m0, SPILLED(11)
+    mulps    m0, [costabs + 16*2]
+    addps    m4, m7, SPILLED(11)
+    mulps    m4, [costabs]
+    subps    m7, m2
+    mulps    m7, [costabs + 16*7]
+    addps    m2, SPILLED(11)
+    mulps    m2, [costabs + 16*4]
+    addps    m1, m7, [tmpq+4*8]
+    addps    m1, m4
+    addps    m4, m2
+    subps    m4, [tmpq+4*8]
+    SPILL    4, 11
+    subps    m7, m2
+    subps    m7, [tmpq+4*8]
+    addps    m4, m6, SPILLED(10)
+    subps    m6, SPILLED(10)
+    addps    m2, m5, m1
+    mulps    m2, [costabs + 16*9]
+    subps    m5, m1
+    mulps    m5, [costabs + 16*17]
+    subps    m1, m4, m2
+    addps    m4, m2
+    mulps    m2, m1, [winq+4*36]
+    addps    m2, [bufq+4*36]
+    mova     [outq+1152], m2
+    mulps    m1, [winq+4*32]
+    addps    m1, [bufq+4*32]
+    mova     [outq+1024], m1
+    mulps    m1, m4, [winq+4*116]
+    mova     [bufq+4*36], m1
+    mulps    m4, [winq+4*112]
+    mova     [bufq+4*32], m4
+    addps    m2, m6, m5
+    subps    m6, m5
+    mulps    m1, m6, [winq+4*68]
+    addps    m1, [bufq+4*68]
+    mova     [outq+2176], m1
+    mulps    m6, [winq]
+    addps    m6, [bufq]
+    mova     [outq], m6
+    mulps    m1, m2, [winq+4*148]
+    mova     [bufq+4*68], m1
+    mulps    m2, [winq+4*80]
+    mova     [bufq], m2
+    addps    m5, m3, [tmpq+4*24]
+    mova     m2, [tmpq+4*24]
+    subps    m2, m3
+    mova     m1, SPILLED(9)
+    subps    m1, m0
+    mulps    m1, [costabs + 16*10]
+    addps    m0, SPILLED(9)
+    mulps    m0, [costabs + 16*16]
+    addps    m6, m5, m1
+    subps    m5, m1
+    mulps    m3, m5, [winq+4*40]
+    addps    m3, [bufq+4*40]
+    mova     [outq+1280], m3
+    mulps    m5, [winq+4*28]
+    addps    m5, [bufq+4*28]
+    mova     [outq+896], m5
+    mulps    m1, m6, [winq+4*120]
+    mova     [bufq+4*40], m1
+    mulps    m6, [winq+4*108]
+    mova     [bufq+4*28], m6
+    addps    m1, m2, m0
+    subps    m2, m0
+    mulps    m5, m2, [winq+4*64]
+    addps    m5, [bufq+4*64]
+    mova     [outq+2048], m5
+    mulps    m2, [winq+4*4]
+    addps    m2, [bufq+4*4]
+    mova     [outq+128], m2
+    mulps    m0, m1, [winq+4*144]
+    mova     [bufq+4*64], m0
+    mulps    m1, [winq+4*84]
+    mova     [bufq+4*4], m1
+    mova     m1, [tmpq+4*28]
+    mova     m5, m1
+    addps    m1, SPILLED(13)
+    subps    m5, SPILLED(13)
+    UNSPILL  3, 15
+    addps    m2, m7, m3
+    mulps    m2, [costabs + 16*11]
+    subps    m3, m7
+    mulps    m3, [costabs + 16*15]
+    addps    m0, m2, m1
+    subps    m1, m2
+    SWAP     m0, m2
+    mulps    m6, m1, [winq+4*44]
+    addps    m6, [bufq+4*44]
+    mova     [outq+1408], m6
+    mulps    m1, [winq+4*24]
+    addps    m1, [bufq+4*24]
+    mova     [outq+768], m1
+    mulps    m0, m2, [winq+4*124]
+    mova     [bufq+4*44], m0
+    mulps    m2, [winq+4*104]
+    mova     [bufq+4*24], m2
+    addps    m0, m5, m3
+    subps    m5, m3
+    mulps    m1, m5, [winq+4*60]
+    addps    m1, [bufq+4*60]
+    mova     [outq+1920], m1
+    mulps    m5, [winq+4*8]
+    addps    m5, [bufq+4*8]
+    mova     [outq+256], m5
+    mulps    m1, m0, [winq+4*140]
+    mova     [bufq+4*60], m1
+    mulps    m0, [winq+4*88]
+    mova     [bufq+4*8], m0
+    mova     m1, [tmpq+4*20]
+    addps    m1, SPILLED(12)
+    mova     m2, [tmpq+4*20]
+    subps    m2, SPILLED(12)
+    UNSPILL  7, 8
+    subps    m0, m7, SPILLED(11)
+    addps    m7, SPILLED(11)
+    mulps    m4, m7, [costabs + 16*12]
+    mulps    m0, [costabs + 16*14]
+    addps    m5, m1, m4
+    subps    m1, m4
+    mulps    m7, m1, [winq+4*48]
+    addps    m7, [bufq+4*48]
+    mova     [outq+1536], m7
+    mulps    m1, [winq+4*20]
+    addps    m1, [bufq+4*20]
+    mova     [outq+640], m1
+    mulps    m1, m5, [winq+4*128]
+    mova     [bufq+4*48], m1
+    mulps    m5, [winq+4*100]
+    mova     [bufq+4*20], m5
+    addps    m6, m2, m0
+    subps    m2, m0
+    mulps    m1, m2, [winq+4*56]
+    addps    m1, [bufq+4*56]
+    mova     [outq+1792], m1
+    mulps    m2, [winq+4*12]
+    addps    m2, [bufq+4*12]
+    mova     [outq+384], m2
+    mulps    m0, m6, [winq+4*136]
+    mova    [bufq+4*56], m0
+    mulps    m6, [winq+4*92]
+    mova     [bufq+4*12], m6
+    UNSPILL  0, 14
+    mulps    m0, [costabs + 16*13]
+    mova     m3, [tmpq+4*4]
+    addps    m2, m0, m3
+    subps    m3, m0
+    mulps    m0, m3, [winq+4*52]
+    addps    m0, [bufq+4*52]
+    mova     [outq+1664], m0
+    mulps    m3, [winq+4*16]
+    addps    m3, [bufq+4*16]
+    mova     [outq+512], m3
+    mulps    m0, m2, [winq+4*132]
+    mova     [bufq+4*52], m0
+    mulps    m2, [winq+4*96]
+    mova     [bufq+4*16], m2
+    RET
+%endmacro
+
+INIT_XMM sse
+DEFINE_FOUR_IMDCT
+
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+DEFINE_FOUR_IMDCT
+%endif
diff --git a/media/ffvpx/libavcodec/x86/mathops.h b/media/ffvpx/libavcodec/x86/mathops.h
new file mode 100644
index 0000000000..ca7e2dffc1
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/mathops.h
@@ -0,0 +1,153 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_MATHOPS_H
+#define AVCODEC_X86_MATHOPS_H
+
+#include "config.h"
+
+#include "libavutil/common.h"
+#include "libavutil/x86/asm.h"
+
+#if HAVE_INLINE_ASM
+
+#if ARCH_X86_32
+
+#define MULL MULL
+static av_always_inline av_const int MULL(int a, int b, unsigned shift)
+{
+    int rt, dummy;
+    if (__builtin_constant_p(shift))
+    __asm__ (
+        "imull %3               \n\t"
+        "shrdl %4, %%edx, %%eax \n\t"
+        :"=a"(rt), "=d"(dummy)
+        :"a"(a), "rm"(b), "i"(shift & 0x1F)
+    );
+    else
+        __asm__ (
+            "imull %3               \n\t"
+            "shrdl %4, %%edx, %%eax \n\t"
+            :"=a"(rt), "=d"(dummy)
+            :"a"(a), "rm"(b), "c"((uint8_t)shift)
+        );
+    return rt;
+}
+
+#define MULH MULH
+static av_always_inline av_const int MULH(int a, int b)
+{
+    int rt, dummy;
+    __asm__ (
+        "imull %3"
+        :"=d"(rt), "=a"(dummy)
+        :"a"(a), "rm"(b)
+    );
+    return rt;
+}
+
+#define MUL64 MUL64
+static av_always_inline av_const int64_t MUL64(int a, int b)
+{
+    int64_t rt;
+    __asm__ (
+        "imull %2"
+        :"=A"(rt)
+        :"a"(a), "rm"(b)
+    );
+    return rt;
+}
+
+#endif /* ARCH_X86_32 */
+
+#if HAVE_I686
+/* median of 3 */
+#define mid_pred mid_pred
+static inline av_const int mid_pred(int a, int b, int c)
+{
+    int i=b;
+    __asm__ (
+        "cmp    %2, %1 \n\t"
+        "cmovg  %1, %0 \n\t"
+        "cmovg  %2, %1 \n\t"
+        "cmp    %3, %1 \n\t"
+        "cmovl  %3, %1 \n\t"
+        "cmp    %1, %0 \n\t"
+        "cmovg  %1, %0 \n\t"
+        :"+&r"(i), "+&r"(a)
+        :"r"(b), "r"(c)
+    );
+    return i;
+}
+
+#if HAVE_6REGS
+#define COPY3_IF_LT(x, y, a, b, c, d)\
+__asm__ volatile(\
+    "cmpl  %0, %3       \n\t"\
+    "cmovl %3, %0       \n\t"\
+    "cmovl %4, %1       \n\t"\
+    "cmovl %5, %2       \n\t"\
+    : "+&r" (x), "+&r" (a), "+r" (c)\
+    : "r" (y), "r" (b), "r" (d)\
+);
+#endif /* HAVE_6REGS */
+
+#endif /* HAVE_I686 */
+
+#define MASK_ABS(mask, level)                   \
+    __asm__ ("cdq                    \n\t"      \
+             "xorl %1, %0            \n\t"      \
+             "subl %1, %0            \n\t"      \
+             : "+a"(level), "=&d"(mask))
+
+// avoid +32 for shift optimization (gcc should do that ...)
+#define NEG_SSR32 NEG_SSR32
+static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
+    if (__builtin_constant_p(s))
+    __asm__ ("sarl %1, %0\n\t"
+         : "+r" (a)
+         : "i" (-s & 0x1F)
+    );
+    else
+        __asm__ ("sarl %1, %0\n\t"
+               : "+r" (a)
+               : "c" ((uint8_t)(-s))
+        );
+    return a;
+}
+
+#define NEG_USR32 NEG_USR32
+static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
+    if (__builtin_constant_p(s))
+    __asm__ ("shrl %1, %0\n\t"
+         : "+r" (a)
+         : "i" (-s & 0x1F)
+    );
+    else
+        __asm__ ("shrl %1, %0\n\t"
+               : "+r" (a)
+               : "c" ((uint8_t)(-s))
+        );
+    return a;
+}
+
+#endif /* HAVE_INLINE_ASM */
+#endif /* AVCODEC_X86_MATHOPS_H */
diff --git a/media/ffvpx/libavcodec/x86/moz.build b/media/ffvpx/libavcodec/x86/moz.build
new file mode 100644
index 0000000000..693218099a
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/moz.build
@@ -0,0 +1,55 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+SOURCES += [
+    'constants.c',
+    'dct32.asm',
+    'dct_init.c',
+    'fdct.c',
+    'fdctdsp_init.c',
+    'flacdsp.asm',
+    'flacdsp_init.c',
+    'h264_intrapred.asm',
+    'h264_intrapred_10bit.asm',
+    'h264_intrapred_init.c',
+    'idctdsp.asm',
+    'idctdsp_init.c',
+    'imdct36.asm',
+    'mpegaudiodsp.c',
+    'videodsp.asm',
+    'videodsp_init.c',
+    'vp8dsp.asm',
+    'vp8dsp_init.c',
+    'vp8dsp_loopfilter.asm',
+    'vp9dsp_init.c',
+    'vp9dsp_init_10bpp.c',
+    'vp9dsp_init_12bpp.c',
+    'vp9dsp_init_16bpp.c',
+    'vp9intrapred.asm',
+    'vp9intrapred_16bpp.asm',
+    'vp9itxfm.asm',
+    'vp9itxfm_16bpp.asm',
+    'vp9lpf.asm',
+    'vp9lpf_16bpp.asm',
+    'vp9mc.asm',
+    'vp9mc_16bpp.asm',
+]
+
+if CONFIG['CPU_ARCH'] == 'x86':
+    SOURCES += [ 'simple_idct.asm' ]
+
+if CONFIG['CPU_ARCH'] == 'x86_64':
+    SOURCES += [ 'simple_idct10.asm' ]
+
+if CONFIG['MOZ_LIBAV_FFT']:
+    SOURCES += [
+        'fft.asm',
+        'fft_init.c',
+    ]
+
+FINAL_LIBRARY = 'mozavcodec'
+
+include('/media/ffvpx/ffvpxcommon.mozbuild')
diff --git a/media/ffvpx/libavcodec/x86/mpegaudiodsp.c b/media/ffvpx/libavcodec/x86/mpegaudiodsp.c
new file mode 100644
index 0000000000..6586fe0726
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/mpegaudiodsp.c
@@ -0,0 +1,284 @@
+/*
+ * SIMD-optimized MP3 decoding functions
+ * Copyright (c) 2010 Vitor Sessak
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stddef.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/mpegaudiodsp.h"
+
+#define DECL(CPU)\
+static void imdct36_blocks_ ## CPU(float *out, float *buf, float *in, int count, int switch_point, int block_type);\
+void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win);
+
+#if HAVE_X86ASM
+DECL(sse2)
+DECL(sse3)
+DECL(ssse3)
+DECL(avx)
+#endif /* HAVE_X86ASM */
+
+void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win,
+                               float *tmpbuf);
+void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,
+                               float *tmpbuf);
+
+DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];
+
+#if HAVE_6REGS && HAVE_SSE_INLINE
+
+#define MACS(rt, ra, rb) rt+=(ra)*(rb)
+#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
+
+#define SUM8(op, sum, w, p)               \
+{                                         \
+    op(sum, (w)[0 * 64], (p)[0 * 64]);    \
+    op(sum, (w)[1 * 64], (p)[1 * 64]);    \
+    op(sum, (w)[2 * 64], (p)[2 * 64]);    \
+    op(sum, (w)[3 * 64], (p)[3 * 64]);    \
+    op(sum, (w)[4 * 64], (p)[4 * 64]);    \
+    op(sum, (w)[5 * 64], (p)[5 * 64]);    \
+    op(sum, (w)[6 * 64], (p)[6 * 64]);    \
+    op(sum, (w)[7 * 64], (p)[7 * 64]);    \
+}
+
+static void apply_window(const float *buf, const float *win1,
+                         const float *win2, float *sum1, float *sum2, int len)
+{
+    x86_reg count = - 4*len;
+    const float *win1a = win1+len;
+    const float *win2a = win2+len;
+    const float *bufa  = buf+len;
+    float *sum1a = sum1+len;
+    float *sum2a = sum2+len;
+
+
+#define MULT(a, b)                                 \
+    "movaps " #a "(%1,%0), %%xmm1           \n\t"  \
+    "movaps " #a "(%3,%0), %%xmm2           \n\t"  \
+    "mulps         %%xmm2, %%xmm1           \n\t"  \
+    "subps         %%xmm1, %%xmm0           \n\t"  \
+    "mulps  " #b "(%2,%0), %%xmm2           \n\t"  \
+    "subps         %%xmm2, %%xmm4           \n\t"  \
+
+    __asm__ volatile(
+            "1:                                   \n\t"
+            "xorps       %%xmm0, %%xmm0           \n\t"
+            "xorps       %%xmm4, %%xmm4           \n\t"
+
+            MULT(   0,   0)
+            MULT( 256,  64)
+            MULT( 512, 128)
+            MULT( 768, 192)
+            MULT(1024, 256)
+            MULT(1280, 320)
+            MULT(1536, 384)
+            MULT(1792, 448)
+
+            "movaps      %%xmm0, (%4,%0)          \n\t"
+            "movaps      %%xmm4, (%5,%0)          \n\t"
+            "add            $16,  %0              \n\t"
+            "jl              1b                   \n\t"
+            :"+&r"(count)
+            :"r"(win1a), "r"(win2a), "r"(bufa), "r"(sum1a), "r"(sum2a)
+            );
+
+#undef MULT
+}
+
+static void apply_window_mp3(float *in, float *win, int *unused, float *out,
+                             ptrdiff_t incr)
+{
+    LOCAL_ALIGNED_16(float, suma, [17]);
+    LOCAL_ALIGNED_16(float, sumb, [17]);
+    LOCAL_ALIGNED_16(float, sumc, [17]);
+    LOCAL_ALIGNED_16(float, sumd, [17]);
+
+    float sum;
+
+    /* copy to avoid wrap */
+    __asm__ volatile(
+            "movaps    0(%0), %%xmm0   \n\t" \
+            "movaps   16(%0), %%xmm1   \n\t" \
+            "movaps   32(%0), %%xmm2   \n\t" \
+            "movaps   48(%0), %%xmm3   \n\t" \
+            "movaps   %%xmm0,   0(%1) \n\t" \
+            "movaps   %%xmm1,  16(%1) \n\t" \
+            "movaps   %%xmm2,  32(%1) \n\t" \
+            "movaps   %%xmm3,  48(%1) \n\t" \
+            "movaps   64(%0), %%xmm0   \n\t" \
+            "movaps   80(%0), %%xmm1   \n\t" \
+            "movaps   96(%0), %%xmm2   \n\t" \
+            "movaps  112(%0), %%xmm3   \n\t" \
+            "movaps   %%xmm0,  64(%1) \n\t" \
+            "movaps   %%xmm1,  80(%1) \n\t" \
+            "movaps   %%xmm2,  96(%1) \n\t" \
+            "movaps   %%xmm3, 112(%1) \n\t"
+            ::"r"(in), "r"(in+512)
+            :"memory"
+            );
+
+    apply_window(in + 16, win     , win + 512, suma, sumc, 16);
+    apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16);
+
+    SUM8(MACS, suma[0], win + 32, in + 48);
+
+    sumc[ 0] = 0;
+    sumb[16] = 0;
+    sumd[16] = 0;
+
+#define SUMS(suma, sumb, sumc, sumd, out1, out2)               \
+            "movups " #sumd "(%4),       %%xmm0          \n\t" \
+            "shufps         $0x1b,       %%xmm0, %%xmm0  \n\t" \
+            "subps  " #suma "(%1),       %%xmm0          \n\t" \
+            "movaps        %%xmm0," #out1 "(%0)          \n\t" \
+\
+            "movups " #sumc "(%3),       %%xmm0          \n\t" \
+            "shufps         $0x1b,       %%xmm0, %%xmm0  \n\t" \
+            "addps  " #sumb "(%2),       %%xmm0          \n\t" \
+            "movaps        %%xmm0," #out2 "(%0)          \n\t"
+
+    if (incr == 1) {
+        __asm__ volatile(
+            SUMS( 0, 48,  4, 52,  0, 112)
+            SUMS(16, 32, 20, 36, 16,  96)
+            SUMS(32, 16, 36, 20, 32,  80)
+            SUMS(48,  0, 52,  4, 48,  64)
+
+            :"+&r"(out)
+            :"r"(&suma[0]), "r"(&sumb[0]), "r"(&sumc[0]), "r"(&sumd[0])
+            :"memory"
+            );
+        out += 16*incr;
+    } else {
+        int j;
+        float *out2 = out + 32 * incr;
+        out[0  ]  = -suma[   0];
+        out += incr;
+        out2 -= incr;
+        for(j=1;j<16;j++) {
+            *out  = -suma[   j] + sumd[16-j];
+            *out2 =  sumb[16-j] + sumc[   j];
+            out  += incr;
+            out2 -= incr;
+        }
+    }
+
+    sum = 0;
+    SUM8(MLSS, sum, win + 16 + 32, in + 32);
+    *out = sum;
+}
+
+#endif /* HAVE_6REGS && HAVE_SSE_INLINE */
+
+#if HAVE_X86ASM
+#define DECL_IMDCT_BLOCKS(CPU1, CPU2)                                       \
+static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in,      \
+                               int count, int switch_point, int block_type) \
+{                                                                           \
+    int align_end = count - (count & 3);                                \
+    int j;                                                              \
+    for (j = 0; j < align_end; j+= 4) {                                 \
+        LOCAL_ALIGNED_16(float, tmpbuf, [1024]);                        \
+        float *win = mdct_win_sse[switch_point && j < 4][block_type];   \
+        /* apply window & overlap with previous buffer */               \
+                                                                        \
+        /* select window */                                             \
+        ff_four_imdct36_float_ ## CPU2(out, buf, in, win, tmpbuf);      \
+        in      += 4*18;                                                \
+        buf     += 4*18;                                                \
+        out     += 4;                                                   \
+    }                                                                   \
+    for (; j < count; j++) {                                            \
+        /* apply window & overlap with previous buffer */               \
+                                                                        \
+        /* select window */                                             \
+        int win_idx = (switch_point && j < 2) ? 0 : block_type;         \
+        float *win = ff_mdct_win_float[win_idx + (4 & -(j & 1))];       \
+                                                                        \
+        ff_imdct36_float_ ## CPU1(out, buf, in, win);                   \
+                                                                        \
+        in  += 18;                                                      \
+        buf++;                                                          \
+        out++;                                                          \
+    }                                                                   \
+}
+
+#if HAVE_SSE
+DECL_IMDCT_BLOCKS(sse2,sse)
+DECL_IMDCT_BLOCKS(sse3,sse)
+DECL_IMDCT_BLOCKS(ssse3,sse)
+#endif
+#if HAVE_AVX_EXTERNAL
+DECL_IMDCT_BLOCKS(avx,avx)
+#endif
+#endif /* HAVE_X86ASM */
+
+av_cold void ff_mpadsp_init_x86_tabs(void)
+{
+    int i, j;
+    for (j = 0; j < 4; j++) {
+        for (i = 0; i < 40; i ++) {
+            mdct_win_sse[0][j][4*i    ] = ff_mdct_win_float[j    ][i];
+            mdct_win_sse[0][j][4*i + 1] = ff_mdct_win_float[j + 4][i];
+            mdct_win_sse[0][j][4*i + 2] = ff_mdct_win_float[j    ][i];
+            mdct_win_sse[0][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
+            mdct_win_sse[1][j][4*i    ] = ff_mdct_win_float[0    ][i];
+            mdct_win_sse[1][j][4*i + 1] = ff_mdct_win_float[4    ][i];
+            mdct_win_sse[1][j][4*i + 2] = ff_mdct_win_float[j    ][i];
+            mdct_win_sse[1][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
+        }
+    }
+}
+
+av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
+{
+    av_unused int cpu_flags = av_get_cpu_flags();
+
+#if HAVE_6REGS && HAVE_SSE_INLINE
+    if (INLINE_SSE(cpu_flags)) {
+        s->apply_window_float = apply_window_mp3;
+    }
+#endif /* HAVE_SSE_INLINE */
+
+#if HAVE_X86ASM
+#if HAVE_SSE
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        s->imdct36_blocks_float = imdct36_blocks_sse2;
+    }
+    if (EXTERNAL_SSE3(cpu_flags)) {
+        s->imdct36_blocks_float = imdct36_blocks_sse3;
+    }
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        s->imdct36_blocks_float = imdct36_blocks_ssse3;
+    }
+#endif
+#if HAVE_AVX_EXTERNAL
+    if (EXTERNAL_AVX(cpu_flags)) {
+        s->imdct36_blocks_float = imdct36_blocks_avx;
+    }
+#endif
+#endif /* HAVE_X86ASM */
+}
diff --git a/media/ffvpx/libavcodec/x86/simple_idct.asm b/media/ffvpx/libavcodec/x86/simple_idct.asm
new file mode 100644
index 0000000000..982b2f0bbb
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/simple_idct.asm
@@ -0,0 +1,871 @@
+;
+; Simple IDCT MMX
+;
+; Copyright (c) 2001, 2002 Michael Niedermayer <michaelni@gmx.at>
+;
+; Conversion from gcc syntax to x264asm syntax with minimal modifications
+; by James Darnley <jdarnley@obe.tv>.
+;
+; This file is part of FFmpeg.
+;
+; FFmpeg is free software; you can redistribute it and/or
+; modify it under the terms of the GNU Lesser General Public
+; License as published by the Free Software Foundation; either
+; version 2.1 of the License, or (at your option) any later version.
+;
+; FFmpeg is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; Lesser General Public License for more details.
+;
+; You should have received a copy of the GNU Lesser General Public
+; License along with FFmpeg; if not, write to the Free Software
+; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;/
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+%if ARCH_X86_32
+cextern pb_80
+
+wm1010: dw 0, 0xffff, 0, 0xffff
+d40000: dd 4 << 16, 0
+
+; 23170.475006
+; 22725.260826
+; 21406.727617
+; 19265.545870
+; 16384.000000
+; 12872.826198
+; 8866.956905
+; 4520.335430
+
+%define C0 23170 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+%define C1 22725 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+%define C2 21407 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+%define C3 19266 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+%define C4 16383 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
+%define C5 12873 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+%define C6 8867  ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+%define C7 4520  ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+
+%define ROW_SHIFT 11
+%define COL_SHIFT 20 ; 6
+
+coeffs:
+    dw 1 << (ROW_SHIFT - 1), 0
+    dw 1 << (ROW_SHIFT - 1), 0
+    dw 1 << (ROW_SHIFT - 1), 1
+    dw 1 << (ROW_SHIFT - 1), 0
+
+    dw C4,  C4,  C4,  C4
+    dw C4, -C4,  C4, -C4
+
+    dw C2,  C6,  C2,  C6
+    dw C6, -C2,  C6, -C2
+
+    dw C1,  C3,  C1,  C3
+    dw C5,  C7,  C5,  C7
+
+    dw C3, -C7,  C3, -C7
+    dw -C1, -C5, -C1, -C5
+
+    dw C5, -C1,  C5, -C1
+    dw C7,  C3,  C7,  C3
+
+    dw C7, -C5,  C7, -C5
+    dw C3, -C1,  C3, -C1
+
+SECTION .text
+
+%macro DC_COND_IDCT 7
+    movq            mm0, [blockq + %1]  ; R4     R0      r4      r0
+    movq            mm1, [blockq + %2]  ; R6     R2      r6      r2
+    movq            mm2, [blockq + %3]  ; R3     R1      r3      r1
+    movq            mm3, [blockq + %4]  ; R7     R5      r7      r5
+    movq            mm4, [wm1010]
+    pand            mm4, mm0
+    por             mm4, mm1
+    por             mm4, mm2
+    por             mm4, mm3
+    packssdw        mm4, mm4
+    movd            t0d, mm4
+    or              t0d, t0d
+    jz              %%1
+    movq            mm4, [coeffs + 16]  ; C4     C4      C4      C4
+    pmaddwd         mm4, mm0            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm5, [coeffs + 24]  ; -C4    C4      -C4     C4
+    pmaddwd         mm0, mm5            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm5, [coeffs + 32]  ; C6     C2      C6      C2
+    pmaddwd         mm5, mm1            ; C6R6+C2R2      C6r6+C2r2
+    movq            mm6, [coeffs + 40]  ; -C2    C6      -C2     C6
+    pmaddwd         mm1, mm6            ; -C2R6+C6R2     -C2r6+C6r2
+    movq            mm7, [coeffs + 48]  ; C3     C1      C3      C1
+    pmaddwd         mm7, mm2            ; C3R3+C1R1      C3r3+C1r1
+    paddd           mm4, [coeffs + 8]
+    movq            mm6, mm4            ; C4R4+C4R0      C4r4+C4r0
+    paddd           mm4, mm5            ; A0             a0
+    psubd           mm6, mm5            ; A3             a3
+    movq            mm5, [coeffs + 56]  ; C7     C5      C7      C5
+    pmaddwd         mm5, mm3            ; C7R7+C5R5      C7r7+C5r5
+    paddd           mm0, [coeffs + 8]
+    paddd           mm1, mm0            ; A1             a1
+    paddd           mm0, mm0
+    psubd           mm0, mm1            ; A2             a2
+    pmaddwd         mm2, [coeffs + 64]  ; -C7R3+C3R1     -C7r3+C3r1
+    paddd           mm7, mm5            ; B0             b0
+    movq            mm5, [coeffs + 72]  ; -C5    -C1     -C5     -C1
+    pmaddwd         mm5, mm3            ; -C5R7-C1R5     -C5r7-C1r5
+    paddd           mm7, mm4            ; A0+B0          a0+b0
+    paddd           mm4, mm4            ; 2A0            2a0
+    psubd           mm4, mm7            ; A0-B0          a0-b0
+    paddd           mm5, mm2            ; B1             b1
+    psrad           mm7, %7
+    psrad           mm4, %7
+    movq            mm2, mm1            ; A1             a1
+    paddd           mm1, mm5            ; A1+B1          a1+b1
+    psubd           mm2, mm5            ; A1-B1          a1-b1
+    psrad           mm1, %7
+    psrad           mm2, %7
+    packssdw        mm7, mm1            ; A1+B1  a1+b1   A0+B0   a0+b0
+    packssdw        mm2, mm4            ; A0-B0  a0-b0   A1-B1   a1-b1
+    movq           [%5], mm7
+    movq            mm1, [blockq + %3]  ; R3     R1      r3      r1
+    movq            mm4, [coeffs + 80]  ; -C1    C5      -C1     C5
+    movq      [24 + %5], mm2
+    pmaddwd         mm4, mm1            ; -C1R3+C5R1     -C1r3+C5r1
+    movq            mm7, [coeffs + 88]  ; C3     C7      C3      C7
+    pmaddwd         mm1, [coeffs + 96]  ; -C5R3+C7R1     -C5r3+C7r1
+    pmaddwd         mm7, mm3            ; C3R7+C7R5      C3r7+C7r5
+    movq            mm2, mm0            ; A2             a2
+    pmaddwd         mm3, [coeffs + 104] ; -C1R7+C3R5     -C1r7+C3r5
+    paddd           mm4, mm7            ; B2             b2
+    paddd           mm2, mm4            ; A2+B2          a2+b2
+    psubd           mm0, mm4            ; a2-B2          a2-b2
+    psrad           mm2, %7
+    psrad           mm0, %7
+    movq            mm4, mm6            ; A3             a3
+    paddd           mm3, mm1            ; B3             b3
+    paddd           mm6, mm3            ; A3+B3          a3+b3
+    psubd           mm4, mm3            ; a3-B3          a3-b3
+    psrad           mm6, %7
+    packssdw        mm2, mm6            ; A3+B3  a3+b3   A2+B2   a2+b2
+    movq       [8 + %5], mm2
+    psrad           mm4, %7
+    packssdw        mm4, mm0            ; A2-B2  a2-b2   A3-B3   a3-b3
+    movq      [16 + %5], mm4
+    jmp             %%2
+%%1:
+    pslld           mm0, 16
+    paddd           mm0, [d40000]
+    psrad           mm0, 13
+    packssdw        mm0, mm0
+    movq           [%5], mm0
+    movq       [8 + %5], mm0
+    movq      [16 + %5], mm0
+    movq      [24 + %5], mm0
+%%2:
+%endmacro
+
+%macro Z_COND_IDCT 8
+    movq            mm0, [blockq + %1]  ; R4     R0      r4      r0
+    movq            mm1, [blockq + %2]  ; R6     R2      r6      r2
+    movq            mm2, [blockq + %3]  ; R3     R1      r3      r1
+    movq            mm3, [blockq + %4]  ; R7     R5      r7      r5
+    movq            mm4, mm0
+    por             mm4, mm1
+    por             mm4, mm2
+    por             mm4, mm3
+    packssdw        mm4, mm4
+    movd            t0d, mm4
+    or              t0d, t0d
+    jz               %8
+    movq            mm4, [coeffs + 16]  ; C4     C4      C4      C4
+    pmaddwd         mm4, mm0            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm5, [coeffs + 24]  ; -C4    C4      -C4     C4
+    pmaddwd         mm0, mm5            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm5, [coeffs + 32]  ; C6     C2      C6      C2
+    pmaddwd         mm5, mm1            ; C6R6+C2R2      C6r6+C2r2
+    movq            mm6, [coeffs + 40]  ; -C2    C6      -C2     C6
+    pmaddwd         mm1, mm6            ; -C2R6+C6R2     -C2r6+C6r2
+    movq            mm7, [coeffs + 48]  ; C3     C1      C3      C1
+    pmaddwd         mm7, mm2            ; C3R3+C1R1      C3r3+C1r1
+    paddd           mm4, [coeffs]
+    movq            mm6, mm4            ; C4R4+C4R0      C4r4+C4r0
+    paddd           mm4, mm5            ; A0             a0
+    psubd           mm6, mm5            ; A3             a3
+    movq            mm5, [coeffs + 56]  ; C7     C5      C7      C5
+    pmaddwd         mm5, mm3            ; C7R7+C5R5      C7r7+C5r5
+    paddd           mm0, [coeffs]
+    paddd           mm1, mm0            ; A1             a1
+    paddd           mm0, mm0
+    psubd           mm0, mm1            ; A2             a2
+    pmaddwd         mm2, [coeffs + 64]  ; -C7R3+C3R1     -C7r3+C3r1
+    paddd           mm7, mm5            ; B0             b0
+    movq            mm5, [coeffs + 72]  ; -C5    -C1     -C5     -C1
+    pmaddwd         mm5, mm3            ; -C5R7-C1R5     -C5r7-C1r5
+    paddd           mm7, mm4            ; A0+B0          a0+b0
+    paddd           mm4, mm4            ; 2A0            2a0
+    psubd           mm4, mm7            ; A0-B0          a0-b0
+    paddd           mm5, mm2            ; B1             b1
+    psrad           mm7, %7
+    psrad           mm4, %7
+    movq            mm2, mm1            ; A1             a1
+    paddd           mm1, mm5            ; A1+B1          a1+b1
+    psubd           mm2, mm5            ; A1-B1          a1-b1
+    psrad           mm1, %7
+    psrad           mm2, %7
+    packssdw        mm7, mm1            ; A1+B1  a1+b1   A0+B0   a0+b0
+    packssdw        mm2, mm4            ; A0-B0  a0-b0   A1-B1   a1-b1
+    movq           [%5], mm7
+    movq            mm1, [blockq + %3]  ; R3     R1      r3      r1
+    movq            mm4, [coeffs + 80]  ; -C1    C5      -C1     C5
+    movq      [24 + %5], mm2
+    pmaddwd         mm4, mm1            ; -C1R3+C5R1     -C1r3+C5r1
+    movq            mm7, [coeffs + 88]  ; C3     C7      C3      C7
+    pmaddwd         mm1, [coeffs + 96]  ; -C5R3+C7R1     -C5r3+C7r1
+    pmaddwd         mm7, mm3            ; C3R7+C7R5      C3r7+C7r5
+    movq            mm2, mm0            ; A2             a2
+    pmaddwd         mm3, [coeffs + 104] ; -C1R7+C3R5     -C1r7+C3r5
+    paddd           mm4, mm7            ; B2             b2
+    paddd           mm2, mm4            ; A2+B2          a2+b2
+    psubd           mm0, mm4            ; a2-B2          a2-b2
+    psrad           mm2, %7
+    psrad           mm0, %7
+    movq            mm4, mm6            ; A3             a3
+    paddd           mm3, mm1            ; B3             b3
+    paddd           mm6, mm3            ; A3+B3          a3+b3
+    psubd           mm4, mm3            ; a3-B3          a3-b3
+    psrad           mm6, %7
+    packssdw        mm2, mm6            ; A3+B3  a3+b3   A2+B2   a2+b2
+    movq       [8 + %5], mm2
+    psrad           mm4, %7
+    packssdw        mm4, mm0            ; A2-B2  a2-b2   A3-B3   a3-b3
+    movq      [16 + %5], mm4
+%endmacro
+
+%macro IDCT1 6
+    movq            mm0, %1             ; R4     R0      r4      r0
+    movq            mm1, %2             ; R6     R2      r6      r2
+    movq            mm2, %3             ; R3     R1      r3      r1
+    movq            mm3, %4             ; R7     R5      r7      r5
+    movq            mm4, [coeffs + 16]  ; C4     C4      C4      C4
+    pmaddwd         mm4, mm0            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm5, [coeffs + 24]  ; -C4    C4      -C4     C4
+    pmaddwd         mm0, mm5            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm5, [coeffs + 32]  ; C6     C2      C6      C2
+    pmaddwd         mm5, mm1            ; C6R6+C2R2      C6r6+C2r2
+    movq            mm6, [coeffs + 40]  ; -C2    C6      -C2     C6
+    pmaddwd         mm1, mm6            ; -C2R6+C6R2     -C2r6+C6r2
+    movq            mm6, mm4            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm7, [coeffs + 48]  ; C3     C1      C3      C1
+    pmaddwd         mm7, mm2            ; C3R3+C1R1      C3r3+C1r1
+    paddd           mm4, mm5            ; A0             a0
+    psubd           mm6, mm5            ; A3             a3
+    movq            mm5, mm0            ; -C4R4+C4R0     -C4r4+C4r0
+    paddd           mm0, mm1            ; A1             a1
+    psubd           mm5, mm1            ; A2             a2
+    movq            mm1, [coeffs + 56]  ; C7     C5      C7      C5
+    pmaddwd         mm1, mm3            ; C7R7+C5R5      C7r7+C5r5
+    pmaddwd         mm2, [coeffs + 64]  ; -C7R3+C3R1     -C7r3+C3r1
+    paddd           mm7, mm1            ; B0             b0
+    movq            mm1, [coeffs + 72]  ; -C5    -C1     -C5     -C1
+    pmaddwd         mm1, mm3            ; -C5R7-C1R5     -C5r7-C1r5
+    paddd           mm7, mm4            ; A0+B0          a0+b0
+    paddd           mm4, mm4            ; 2A0            2a0
+    psubd           mm4, mm7            ; A0-B0          a0-b0
+    paddd           mm1, mm2            ; B1             b1
+    psrad           mm7, %6
+    psrad           mm4, %6
+    movq            mm2, mm0            ; A1             a1
+    paddd           mm0, mm1            ; A1+B1          a1+b1
+    psubd           mm2, mm1            ; A1-B1          a1-b1
+    psrad           mm0, %6
+    psrad           mm2, %6
+    packssdw        mm7, mm7            ; A0+B0  a0+b0
+    movd           [%5], mm7
+    packssdw        mm0, mm0            ; A1+B1  a1+b1
+    movd      [16 + %5], mm0
+    packssdw        mm2, mm2            ; A1-B1  a1-b1
+    movd      [96 + %5], mm2
+    packssdw        mm4, mm4            ; A0-B0  a0-b0
+    movd     [112 + %5], mm4
+    movq            mm0, %3             ; R3     R1      r3      r1
+    movq            mm4, [coeffs + 80]  ; -C1    C5      -C1     C5
+    pmaddwd         mm4, mm0            ; -C1R3+C5R1     -C1r3+C5r1
+    movq            mm7, [coeffs + 88]  ; C3     C7      C3      C7
+    pmaddwd         mm0, [coeffs + 96]  ; -C5R3+C7R1     -C5r3+C7r1
+    pmaddwd         mm7, mm3            ; C3R7+C7R5      C3r7+C7r5
+    movq            mm2, mm5            ; A2             a2
+    pmaddwd         mm3, [coeffs + 104] ; -C1R7+C3R5     -C1r7+C3r5
+    paddd           mm4, mm7            ; B2             b2
+    paddd           mm2, mm4            ; A2+B2          a2+b2
+    psubd           mm5, mm4            ; a2-B2          a2-b2
+    psrad           mm2, %6
+    psrad           mm5, %6
+    movq            mm4, mm6            ; A3             a3
+    paddd           mm3, mm0            ; B3             b3
+    paddd           mm6, mm3            ; A3+B3          a3+b3
+    psubd           mm4, mm3            ; a3-B3          a3-b3
+    psrad           mm6, %6
+    psrad           mm4, %6
+    packssdw        mm2, mm2            ; A2+B2  a2+b2
+    packssdw        mm6, mm6            ; A3+B3  a3+b3
+    movd      [32 + %5], mm2
+    packssdw        mm4, mm4            ; A3-B3  a3-b3
+    packssdw        mm5, mm5            ; A2-B2  a2-b2
+    movd      [48 + %5], mm6
+    movd      [64 + %5], mm4
+    movd      [80 + %5], mm5
+%endmacro
+
+%macro IDCT2 6
+    movq            mm0, %1             ; R4     R0      r4      r0
+    movq            mm1, %2             ; R6     R2      r6      r2
+    movq            mm3, %4             ; R7     R5      r7      r5
+    movq            mm4, [coeffs + 16]  ; C4     C4      C4      C4
+    pmaddwd         mm4, mm0            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm5, [coeffs + 24]  ; -C4    C4      -C4     C4
+    pmaddwd         mm0, mm5            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm5, [coeffs + 32]  ; C6     C2      C6      C2
+    pmaddwd         mm5, mm1            ; C6R6+C2R2      C6r6+C2r2
+    movq            mm6, [coeffs + 40]  ; -C2    C6      -C2     C6
+    pmaddwd         mm1, mm6            ; -C2R6+C6R2     -C2r6+C6r2
+    movq            mm6, mm4            ; C4R4+C4R0      C4r4+C4r0
+    paddd           mm4, mm5            ; A0             a0
+    psubd           mm6, mm5            ; A3             a3
+    movq            mm5, mm0            ; -C4R4+C4R0     -C4r4+C4r0
+    paddd           mm0, mm1            ; A1             a1
+    psubd           mm5, mm1            ; A2             a2
+    movq            mm1, [coeffs + 56]  ; C7     C5      C7      C5
+    pmaddwd         mm1, mm3            ; C7R7+C5R5      C7r7+C5r5
+    movq            mm7, [coeffs + 72]  ; -C5    -C1     -C5     -C1
+    pmaddwd         mm7, mm3            ; -C5R7-C1R5     -C5r7-C1r5
+    paddd           mm1, mm4            ; A0+B0          a0+b0
+    paddd           mm4, mm4            ; 2A0            2a0
+    psubd           mm4, mm1            ; A0-B0          a0-b0
+    psrad           mm1, %6
+    psrad           mm4, %6
+    movq            mm2, mm0            ; A1             a1
+    paddd           mm0, mm7            ; A1+B1          a1+b1
+    psubd           mm2, mm7            ; A1-B1          a1-b1
+    psrad           mm0, %6
+    psrad           mm2, %6
+    packssdw        mm1, mm1            ; A0+B0  a0+b0
+    movd           [%5], mm1
+    packssdw        mm0, mm0            ; A1+B1  a1+b1
+    movd      [16 + %5], mm0
+    packssdw        mm2, mm2            ; A1-B1  a1-b1
+    movd      [96 + %5], mm2
+    packssdw        mm4, mm4            ; A0-B0  a0-b0
+    movd     [112 + %5], mm4
+    movq            mm1, [coeffs + 88]  ; C3     C7      C3      C7
+    pmaddwd         mm1, mm3            ; C3R7+C7R5      C3r7+C7r5
+    movq            mm2, mm5            ; A2             a2
+    pmaddwd         mm3, [coeffs + 104] ; -C1R7+C3R5     -C1r7+C3r5
+    paddd           mm2, mm1            ; A2+B2          a2+b2
+    psubd           mm5, mm1            ; a2-B2          a2-b2
+    psrad           mm2, %6
+    psrad           mm5, %6
+    movq            mm1, mm6            ; A3             a3
+    paddd           mm6, mm3            ; A3+B3          a3+b3
+    psubd           mm1, mm3            ; a3-B3          a3-b3
+    psrad           mm6, %6
+    psrad           mm1, %6
+    packssdw        mm2, mm2            ; A2+B2  a2+b2
+    packssdw        mm6, mm6            ; A3+B3  a3+b3
+    movd      [32 + %5], mm2
+    packssdw        mm1, mm1            ; A3-B3  a3-b3
+    packssdw        mm5, mm5            ; A2-B2  a2-b2
+    movd      [48 + %5], mm6
+    movd      [64 + %5], mm1
+    movd      [80 + %5], mm5
+%endmacro
+
+%macro IDCT3 6
+    movq            mm0, %1             ; R4     R0      r4      r0
+    movq            mm3, %4             ; R7     R5      r7      r5
+    movq            mm4, [coeffs + 16]  ; C4     C4      C4      C4
+    pmaddwd         mm4, mm0            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm5, [coeffs + 24]  ; -C4    C4      -C4     C4
+    pmaddwd         mm0, mm5            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm6, mm4            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm5, mm0            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm1, [coeffs + 56]  ; C7     C5      C7      C5
+    pmaddwd         mm1, mm3            ; C7R7+C5R5      C7r7+C5r5
+    movq            mm7, [coeffs + 72]  ; -C5    -C1     -C5     -C1
+    pmaddwd         mm7, mm3            ; -C5R7-C1R5     -C5r7-C1r5
+    paddd           mm1, mm4            ; A0+B0          a0+b0
+    paddd           mm4, mm4            ; 2A0            2a0
+    psubd           mm4, mm1            ; A0-B0          a0-b0
+    psrad           mm1, %6
+    psrad           mm4, %6
+    movq            mm2, mm0            ; A1             a1
+    paddd           mm0, mm7            ; A1+B1          a1+b1
+    psubd           mm2, mm7            ; A1-B1          a1-b1
+    psrad           mm0, %6
+    psrad           mm2, %6
+    packssdw        mm1, mm1            ; A0+B0  a0+b0
+    movd           [%5], mm1
+    packssdw        mm0, mm0            ; A1+B1  a1+b1
+    movd      [16 + %5], mm0
+    packssdw        mm2, mm2            ; A1-B1  a1-b1
+    movd      [96 + %5], mm2
+    packssdw        mm4, mm4            ; A0-B0  a0-b0
+    movd     [112 + %5], mm4
+    movq            mm1, [coeffs + 88]  ; C3     C7      C3      C7
+    pmaddwd         mm1, mm3            ; C3R7+C7R5      C3r7+C7r5
+    movq            mm2, mm5            ; A2             a2
+    pmaddwd         mm3, [coeffs + 104] ; -C1R7+C3R5     -C1r7+C3r5
+    paddd           mm2, mm1            ; A2+B2          a2+b2
+    psubd           mm5, mm1            ; a2-B2          a2-b2
+    psrad           mm2, %6
+    psrad           mm5, %6
+    movq            mm1, mm6            ; A3             a3
+    paddd           mm6, mm3            ; A3+B3          a3+b3
+    psubd           mm1, mm3            ; a3-B3          a3-b3
+    psrad           mm6, %6
+    psrad           mm1, %6
+    packssdw        mm2, mm2            ; A2+B2  a2+b2
+    packssdw        mm6, mm6            ; A3+B3  a3+b3
+    movd      [32 + %5], mm2
+    packssdw        mm1, mm1            ; A3-B3  a3-b3
+    packssdw        mm5, mm5            ; A2-B2  a2-b2
+    movd      [48 + %5], mm6
+    movd      [64 + %5], mm1
+    movd      [80 + %5], mm5
+%endmacro
+
+%macro IDCT4 6
+    movq            mm0, %1             ; R4     R0      r4      r0
+    movq            mm2, %3             ; R3     R1      r3      r1
+    movq            mm3, %4             ; R7     R5      r7      r5
+    movq            mm4, [coeffs + 16]  ; C4     C4      C4      C4
+    pmaddwd         mm4, mm0            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm5, [coeffs + 24]  ; -C4    C4      -C4     C4
+    pmaddwd         mm0, mm5            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm6, mm4            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm7, [coeffs + 48]  ; C3     C1      C3      C1
+    pmaddwd         mm7, mm2            ; C3R3+C1R1      C3r3+C1r1
+    movq            mm5, mm0            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm1, [coeffs + 56]  ; C7     C5      C7      C5
+    pmaddwd         mm1, mm3            ; C7R7+C5R5      C7r7+C5r5
+    pmaddwd         mm2, [coeffs + 64]  ; -C7R3+C3R1     -C7r3+C3r1
+    paddd           mm7, mm1            ; B0             b0
+    movq            mm1, [coeffs + 72]  ; -C5    -C1     -C5     -C1
+    pmaddwd         mm1, mm3            ; -C5R7-C1R5     -C5r7-C1r5
+    paddd           mm7, mm4            ; A0+B0          a0+b0
+    paddd           mm4, mm4            ; 2A0            2a0
+    psubd           mm4, mm7            ; A0-B0          a0-b0
+    paddd           mm1, mm2            ; B1             b1
+    psrad           mm7, %6
+    psrad           mm4, %6
+    movq            mm2, mm0            ; A1             a1
+    paddd           mm0, mm1            ; A1+B1          a1+b1
+    psubd           mm2, mm1            ; A1-B1          a1-b1
+    psrad           mm0, %6
+    psrad           mm2, %6
+    packssdw        mm7, mm7            ; A0+B0  a0+b0
+    movd           [%5], mm7
+    packssdw        mm0, mm0            ; A1+B1  a1+b1
+    movd      [16 + %5], mm0
+    packssdw        mm2, mm2            ; A1-B1  a1-b1
+    movd      [96 + %5], mm2
+    packssdw        mm4, mm4            ; A0-B0  a0-b0
+    movd     [112 + %5], mm4
+    movq            mm0, %3             ; R3     R1      r3      r1
+    movq            mm4, [coeffs + 80]  ; -C1    C5      -C1     C5
+    pmaddwd         mm4, mm0            ; -C1R3+C5R1     -C1r3+C5r1
+    movq            mm7, [coeffs + 88]  ; C3     C7      C3      C7
+    pmaddwd         mm0, [coeffs + 96]  ; -C5R3+C7R1     -C5r3+C7r1
+    pmaddwd         mm7, mm3            ; C3R7+C7R5      C3r7+C7r5
+    movq            mm2, mm5            ; A2             a2
+    pmaddwd         mm3, [coeffs + 104] ; -C1R7+C3R5     -C1r7+C3r5
+    paddd           mm4, mm7            ; B2             b2
+    paddd           mm2, mm4            ; A2+B2          a2+b2
+    psubd           mm5, mm4            ; a2-B2          a2-b2
+    psrad           mm2, %6
+    psrad           mm5, %6
+    movq            mm4, mm6            ; A3             a3
+    paddd           mm3, mm0            ; B3             b3
+    paddd           mm6, mm3            ; A3+B3          a3+b3
+    psubd           mm4, mm3            ; a3-B3          a3-b3
+    psrad           mm6, %6
+    psrad           mm4, %6
+    packssdw        mm2, mm2            ; A2+B2  a2+b2
+    packssdw        mm6, mm6            ; A3+B3  a3+b3
+    movd      [32 + %5], mm2
+    packssdw        mm4, mm4            ; A3-B3  a3-b3
+    packssdw        mm5, mm5            ; A2-B2  a2-b2
+    movd      [48 + %5], mm6
+    movd      [64 + %5], mm4
+    movd      [80 + %5], mm5
+%endmacro
+
+%macro IDCT5 6
+    movq            mm0, %1             ; R4     R0      r4      r0
+    movq            mm2, %3             ; R3     R1      r3      r1
+    movq            mm4, [coeffs + 16]  ; C4     C4      C4      C4
+    pmaddwd         mm4, mm0            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm5, [coeffs + 24]  ; -C4    C4      -C4     C4
+    pmaddwd         mm0, mm5            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm6, mm4            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm7, [coeffs + 48]  ; C3     C1      C3      C1
+    pmaddwd         mm7, mm2            ; C3R3+C1R1      C3r3+C1r1
+    movq            mm5, mm0            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm3, [coeffs + 64]
+    pmaddwd         mm3, mm2            ; -C7R3+C3R1     -C7r3+C3r1
+    paddd           mm7, mm4            ; A0+B0          a0+b0
+    paddd           mm4, mm4            ; 2A0            2a0
+    psubd           mm4, mm7            ; A0-B0          a0-b0
+    psrad           mm7, %6
+    psrad           mm4, %6
+    movq            mm1, mm0            ; A1             a1
+    paddd           mm0, mm3            ; A1+B1          a1+b1
+    psubd           mm1, mm3            ; A1-B1          a1-b1
+    psrad           mm0, %6
+    psrad           mm1, %6
+    packssdw        mm7, mm7            ; A0+B0  a0+b0
+    movd           [%5], mm7
+    packssdw        mm0, mm0            ; A1+B1  a1+b1
+    movd      [16 + %5], mm0
+    packssdw        mm1, mm1            ; A1-B1  a1-b1
+    movd      [96 + %5], mm1
+    packssdw        mm4, mm4            ; A0-B0  a0-b0
+    movd     [112 + %5], mm4
+    movq            mm4, [coeffs + 80]  ; -C1    C5      -C1     C5
+    pmaddwd         mm4, mm2            ; -C1R3+C5R1     -C1r3+C5r1
+    pmaddwd         mm2, [coeffs + 96]  ; -C5R3+C7R1     -C5r3+C7r1
+    movq            mm1, mm5            ; A2             a2
+    paddd           mm1, mm4            ; A2+B2          a2+b2
+    psubd           mm5, mm4            ; a2-B2          a2-b2
+    psrad           mm1, %6
+    psrad           mm5, %6
+    movq            mm4, mm6            ; A3             a3
+    paddd           mm6, mm2            ; A3+B3          a3+b3
+    psubd           mm4, mm2            ; a3-B3          a3-b3
+    psrad           mm6, %6
+    psrad           mm4, %6
+    packssdw        mm1, mm1            ; A2+B2  a2+b2
+    packssdw        mm6, mm6            ; A3+B3  a3+b3
+    movd      [32 + %5], mm1
+    packssdw        mm4, mm4            ; A3-B3  a3-b3
+    packssdw        mm5, mm5            ; A2-B2  a2-b2
+    movd      [48 + %5], mm6
+    movd      [64 + %5], mm4
+    movd      [80 + %5], mm5
+%endmacro
+
+%macro IDCT6 6
+    movq            mm0, [%1]           ; R4     R0      r4      r0
+    movq            mm1, [%2]           ; R6     R2      r6      r2
+    movq            mm4, [coeffs + 16]  ; C4     C4      C4      C4
+    pmaddwd         mm4, mm0            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm5, [coeffs + 24]  ; -C4    C4      -C4     C4
+    pmaddwd         mm0, mm5            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm5, [coeffs + 32]  ; C6     C2      C6      C2
+    pmaddwd         mm5, mm1            ; C6R6+C2R2      C6r6+C2r2
+    movq            mm6, [coeffs + 40]  ; -C2    C6      -C2     C6
+    pmaddwd         mm1, mm6            ; -C2R6+C6R2     -C2r6+C6r2
+    movq            mm6, mm4            ; C4R4+C4R0      C4r4+C4r0
+    paddd           mm4, mm5            ; A0             a0
+    psubd           mm6, mm5            ; A3             a3
+    movq            mm5, mm0            ; -C4R4+C4R0     -C4r4+C4r0
+    paddd           mm0, mm1            ; A1             a1
+    psubd           mm5, mm1            ; A2             a2
+    movq            mm2, [8 + %1]       ; R4     R0      r4      r0
+    movq            mm3, [8 + %2]       ; R6     R2      r6      r2
+    movq            mm1, [coeffs + 16]  ; C4     C4      C4      C4
+    pmaddwd         mm1, mm2            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm7, [coeffs + 24]  ; -C4    C4      -C4     C4
+    pmaddwd         mm2, mm7            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm7, [coeffs + 32]  ; C6     C2      C6      C2
+    pmaddwd         mm7, mm3            ; C6R6+C2R2      C6r6+C2r2
+    pmaddwd         mm3, [coeffs + 40]  ; -C2R6+C6R2     -C2r6+C6r2
+    paddd           mm7, mm1            ; A0             a0
+    paddd           mm1, mm1            ; 2C0            2c0
+    psubd           mm1, mm7            ; A3             a3
+    paddd           mm3, mm2            ; A1             a1
+    paddd           mm2, mm2            ; 2C1            2c1
+    psubd           mm2, mm3            ; A2             a2
+    psrad           mm4, %6
+    psrad           mm7, %6
+    psrad           mm3, %6
+    packssdw        mm4, mm7            ; A0     a0
+    movq           [%5], mm4
+    psrad           mm0, %6
+    packssdw        mm0, mm3            ; A1     a1
+    movq      [16 + %5], mm0
+    movq      [96 + %5], mm0
+    movq     [112 + %5], mm4
+    psrad           mm5, %6
+    psrad           mm6, %6
+    psrad           mm2, %6
+    packssdw        mm5, mm2            ; A2-B2  a2-b2
+    movq      [32 + %5], mm5
+    psrad           mm1, %6
+    packssdw        mm6, mm1            ; A3+B3  a3+b3
+    movq      [48 + %5], mm6
+    movq      [64 + %5], mm6
+    movq      [80 + %5], mm5
+%endmacro
+
+%macro IDCT7 6
+    movq            mm0, %1             ; R4     R0      r4      r0
+    movq            mm1, %2             ; R6     R2      r6      r2
+    movq            mm2, %3             ; R3     R1      r3      r1
+    movq            mm4, [coeffs + 16]  ; C4     C4      C4      C4
+    pmaddwd         mm4, mm0            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm5, [coeffs + 24]  ; -C4    C4      -C4     C4
+    pmaddwd         mm0, mm5            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm5, [coeffs + 32]  ; C6     C2      C6      C2
+    pmaddwd         mm5, mm1            ; C6R6+C2R2      C6r6+C2r2
+    movq            mm6, [coeffs + 40]  ; -C2    C6      -C2     C6
+    pmaddwd         mm1, mm6            ; -C2R6+C6R2     -C2r6+C6r2
+    movq            mm6, mm4            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm7, [coeffs + 48]  ; C3     C1      C3      C1
+    pmaddwd         mm7, mm2            ; C3R3+C1R1      C3r3+C1r1
+    paddd           mm4, mm5            ; A0             a0
+    psubd           mm6, mm5            ; A3             a3
+    movq            mm5, mm0            ; -C4R4+C4R0     -C4r4+C4r0
+    paddd           mm0, mm1            ; A1             a1
+    psubd           mm5, mm1            ; A2             a2
+    movq            mm1, [coeffs + 64]
+    pmaddwd         mm1, mm2            ; -C7R3+C3R1     -C7r3+C3r1
+    paddd           mm7, mm4            ; A0+B0          a0+b0
+    paddd           mm4, mm4            ; 2A0            2a0
+    psubd           mm4, mm7            ; A0-B0          a0-b0
+    psrad           mm7, %6
+    psrad           mm4, %6
+    movq            mm3, mm0            ; A1             a1
+    paddd           mm0, mm1            ; A1+B1          a1+b1
+    psubd           mm3, mm1            ; A1-B1          a1-b1
+    psrad           mm0, %6
+    psrad           mm3, %6
+    packssdw        mm7, mm7            ; A0+B0  a0+b0
+    movd           [%5], mm7
+    packssdw        mm0, mm0            ; A1+B1  a1+b1
+    movd      [16 + %5], mm0
+    packssdw        mm3, mm3            ; A1-B1  a1-b1
+    movd      [96 + %5], mm3
+    packssdw        mm4, mm4            ; A0-B0  a0-b0
+    movd     [112 + %5], mm4
+    movq            mm4, [coeffs + 80]  ; -C1    C5      -C1     C5
+    pmaddwd         mm4, mm2            ; -C1R3+C5R1     -C1r3+C5r1
+    pmaddwd         mm2, [coeffs + 96]  ; -C5R3+C7R1     -C5r3+C7r1
+    movq            mm3, mm5            ; A2             a2
+    paddd           mm3, mm4            ; A2+B2          a2+b2
+    psubd           mm5, mm4            ; a2-B2          a2-b2
+    psrad           mm3, %6
+    psrad           mm5, %6
+    movq            mm4, mm6            ; A3             a3
+    paddd           mm6, mm2            ; A3+B3          a3+b3
+    psubd           mm4, mm2            ; a3-B3          a3-b3
+    psrad           mm6, %6
+    packssdw        mm3, mm3            ; A2+B2  a2+b2
+    movd      [32 + %5], mm3
+    psrad           mm4, %6
+    packssdw        mm6, mm6            ; A3+B3  a3+b3
+    movd      [48 + %5], mm6
+    packssdw        mm4, mm4            ; A3-B3  a3-b3
+    packssdw        mm5, mm5            ; A2-B2  a2-b2
+    movd      [64 + %5], mm4
+    movd      [80 + %5], mm5
+%endmacro
+
+%macro IDCT8 6
+    movq            mm0, [%1]           ; R4     R0      r4      r0
+    movq            mm4, [coeffs + 16]  ; C4     C4      C4      C4
+    pmaddwd         mm4, mm0            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm5, [coeffs + 24]  ; -C4    C4      -C4     C4
+    pmaddwd         mm0, mm5            ; -C4R4+C4R0     -C4r4+C4r0
+    psrad           mm4, %6
+    psrad           mm0, %6
+    movq            mm2, [8 + %1]       ; R4     R0      r4      r0
+    movq            mm1, [coeffs + 16]  ; C4     C4      C4      C4
+    pmaddwd         mm1, mm2            ; C4R4+C4R0      C4r4+C4r0
+    movq            mm7, [coeffs + 24]  ; -C4    C4      -C4     C4
+    pmaddwd         mm2, mm7            ; -C4R4+C4R0     -C4r4+C4r0
+    movq            mm7, [coeffs + 32]  ; C6     C2      C6      C2
+    psrad           mm1, %6
+    packssdw        mm4, mm1            ; A0     a0
+    movq           [%5], mm4
+    psrad           mm2, %6
+    packssdw        mm0, mm2            ; A1     a1
+    movq      [16 + %5], mm0
+    movq      [96 + %5], mm0
+    movq     [112 + %5], mm4
+    movq      [32 + %5], mm0
+    movq      [48 + %5], mm4
+    movq      [64 + %5], mm4
+    movq      [80 + %5], mm0
+%endmacro
+
+%macro IDCT 0
+    DC_COND_IDCT  0,   8,  16,  24, rsp +  0, null, 11
+    Z_COND_IDCT  32,  40,  48,  56, rsp + 32, null, 11, %%4
+    Z_COND_IDCT  64,  72,  80,  88, rsp + 64, null, 11, %%2
+    Z_COND_IDCT  96, 104, 112, 120, rsp + 96, null, 11, %%1
+
+    IDCT1 [rsp +  0], [rsp + 64], [rsp + 32], [rsp +  96], blockq +  0, 20
+    IDCT1 [rsp +  8], [rsp + 72], [rsp + 40], [rsp + 104], blockq +  4, 20
+    IDCT1 [rsp + 16], [rsp + 80], [rsp + 48], [rsp + 112], blockq +  8, 20
+    IDCT1 [rsp + 24], [rsp + 88], [rsp + 56], [rsp + 120], blockq + 12, 20
+    jmp %%9
+
+    ALIGN 16
+    %%4:
+    Z_COND_IDCT 64,  72,  80,  88, rsp + 64, null, 11, %%6
+    Z_COND_IDCT 96, 104, 112, 120, rsp + 96, null, 11, %%5
+
+    IDCT2 [rsp +  0], [rsp + 64], [rsp + 32], [rsp +  96], blockq +  0, 20
+    IDCT2 [rsp +  8], [rsp + 72], [rsp + 40], [rsp + 104], blockq +  4, 20
+    IDCT2 [rsp + 16], [rsp + 80], [rsp + 48], [rsp + 112], blockq +  8, 20
+    IDCT2 [rsp + 24], [rsp + 88], [rsp + 56], [rsp + 120], blockq + 12, 20
+    jmp %%9
+
+    ALIGN 16
+    %%6:
+    Z_COND_IDCT 96, 104, 112, 120, rsp + 96, null, 11, %%7
+
+    IDCT3 [rsp +  0], [rsp + 64], [rsp + 32], [rsp +  96], blockq +  0, 20
+    IDCT3 [rsp +  8], [rsp + 72], [rsp + 40], [rsp + 104], blockq +  4, 20
+    IDCT3 [rsp + 16], [rsp + 80], [rsp + 48], [rsp + 112], blockq +  8, 20
+    IDCT3 [rsp + 24], [rsp + 88], [rsp + 56], [rsp + 120], blockq + 12, 20
+    jmp %%9
+
+    ALIGN 16
+    %%2:
+    Z_COND_IDCT 96, 104, 112, 120, rsp + 96, null, 11, %%3
+
+    IDCT4 [rsp +  0], [rsp + 64], [rsp + 32], [rsp +  96], blockq +  0, 20
+    IDCT4 [rsp +  8], [rsp + 72], [rsp + 40], [rsp + 104], blockq +  4, 20
+    IDCT4 [rsp + 16], [rsp + 80], [rsp + 48], [rsp + 112], blockq +  8, 20
+    IDCT4 [rsp + 24], [rsp + 88], [rsp + 56], [rsp + 120], blockq + 12, 20
+    jmp %%9
+
+    ALIGN 16
+    %%3:
+
+    IDCT5 [rsp +  0], [rsp + 64], [rsp + 32], [rsp +  96], blockq +  0, 20
+    IDCT5 [rsp +  8], [rsp + 72], [rsp + 40], [rsp + 104], blockq +  4, 20
+    IDCT5 [rsp + 16], [rsp + 80], [rsp + 48], [rsp + 112], blockq +  8, 20
+    IDCT5 [rsp + 24], [rsp + 88], [rsp + 56], [rsp + 120], blockq + 12, 20
+    jmp %%9
+
+    ALIGN 16
+    %%5:
+
+    IDCT6 rsp +  0, rsp + 64, rsp + 32, rsp +  96, blockq +  0, 20
+    IDCT6 rsp + 16, rsp + 80, rsp + 48, rsp + 112, blockq +  8, 20
+    jmp %%9
+
+    ALIGN 16
+    %%1:
+
+    IDCT7 [rsp +  0], [rsp + 64], [rsp + 32], [rsp +  96], blockq +  0, 20
+    IDCT7 [rsp +  8], [rsp + 72], [rsp + 40], [rsp + 104], blockq +  4, 20
+    IDCT7 [rsp + 16], [rsp + 80], [rsp + 48], [rsp + 112], blockq +  8, 20
+    IDCT7 [rsp + 24], [rsp + 88], [rsp + 56], [rsp + 120], blockq + 12, 20
+    jmp %%9
+
+    ALIGN 16
+    %%7:
+
+    IDCT8 rsp +  0, rsp + 64, rsp + 32, rsp +  96, blockq +  0, 20
+    IDCT8 rsp + 16, rsp + 80, rsp + 48, rsp + 112, blockq +  8, 20
+
+    %%9:
+%endmacro
+
+%macro PUT_PIXELS_CLAMPED_HALF 1
+    mova     m0, [blockq+mmsize*0+%1]
+    mova     m1, [blockq+mmsize*2+%1]
+%if mmsize == 8
+    mova     m2, [blockq+mmsize*4+%1]
+    mova     m3, [blockq+mmsize*6+%1]
+%endif
+    packuswb m0, [blockq+mmsize*1+%1]
+    packuswb m1, [blockq+mmsize*3+%1]
+%if mmsize == 8
+    packuswb m2, [blockq+mmsize*5+%1]
+    packuswb m3, [blockq+mmsize*7+%1]
+    movq           [pixelsq], m0
+    movq    [lsizeq+pixelsq], m1
+    movq  [2*lsizeq+pixelsq], m2
+    movq   [lsize3q+pixelsq], m3
+%else
+    movq           [pixelsq], m0
+    movhps  [lsizeq+pixelsq], m0
+    movq  [2*lsizeq+pixelsq], m1
+    movhps [lsize3q+pixelsq], m1
+%endif
+%endmacro
+
+%macro ADD_PIXELS_CLAMPED 1
+    mova       m0, [blockq+mmsize*0+%1]
+    mova       m1, [blockq+mmsize*1+%1]
+%if mmsize == 8
+    mova       m5, [blockq+mmsize*2+%1]
+    mova       m6, [blockq+mmsize*3+%1]
+%endif
+    movq       m2, [pixelsq]
+    movq       m3, [pixelsq+lsizeq]
+%if mmsize == 8
+    mova       m7, m2
+    punpcklbw  m2, m4
+    punpckhbw  m7, m4
+    paddsw     m0, m2
+    paddsw     m1, m7
+    mova       m7, m3
+    punpcklbw  m3, m4
+    punpckhbw  m7, m4
+    paddsw     m5, m3
+    paddsw     m6, m7
+%else
+    punpcklbw  m2, m4
+    punpcklbw  m3, m4
+    paddsw     m0, m2
+    paddsw     m1, m3
+%endif
+    packuswb   m0, m1
+%if mmsize == 8
+    packuswb   m5, m6
+    movq       [pixelsq], m0
+    movq       [pixelsq+lsizeq], m5
+%else
+    movq       [pixelsq], m0
+    movhps     [pixelsq+lsizeq], m0
+%endif
+%endmacro
+
+INIT_MMX mmx
+
+cglobal simple_idct, 1, 2, 8, 128, block, t0
+    IDCT
+RET
+
+INIT_XMM sse2
+
+cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0
+    IDCT
+    lea lsize3q, [lsizeq*3]
+    PUT_PIXELS_CLAMPED_HALF 0
+    lea pixelsq, [pixelsq+lsizeq*4]
+    PUT_PIXELS_CLAMPED_HALF 64
+RET
+
+cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
+    IDCT
+    pxor       m4, m4
+    ADD_PIXELS_CLAMPED 0
+    lea        pixelsq, [pixelsq+lsizeq*2]
+    ADD_PIXELS_CLAMPED 32
+    lea        pixelsq, [pixelsq+lsizeq*2]
+    ADD_PIXELS_CLAMPED 64
+    lea        pixelsq, [pixelsq+lsizeq*2]
+    ADD_PIXELS_CLAMPED 96
+RET
+%endif
diff --git a/media/ffvpx/libavcodec/x86/simple_idct.h b/media/ffvpx/libavcodec/x86/simple_idct.h
new file mode 100644
index 0000000000..9b64cfe9bc
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/simple_idct.h
@@ -0,0 +1,53 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_SIMPLE_IDCT_H
+#define AVCODEC_X86_SIMPLE_IDCT_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_simple_idct_mmx(int16_t *block);
+void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+
+void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+
+void ff_simple_idct8_sse2(int16_t *block);
+void ff_simple_idct8_avx(int16_t *block);
+
+void ff_simple_idct8_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct8_put_avx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+
+void ff_simple_idct8_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct8_add_avx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+
+void ff_simple_idct10_sse2(int16_t *block);
+void ff_simple_idct10_avx(int16_t *block);
+
+void ff_simple_idct10_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct10_put_avx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+
+void ff_simple_idct12_sse2(int16_t *block);
+void ff_simple_idct12_avx(int16_t *block);
+
+void ff_simple_idct12_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct12_put_avx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+
+#endif /* AVCODEC_X86_SIMPLE_IDCT_H */
diff --git a/media/ffvpx/libavcodec/x86/simple_idct10.asm b/media/ffvpx/libavcodec/x86/simple_idct10.asm
new file mode 100644
index 0000000000..069bb61378
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/simple_idct10.asm
@@ -0,0 +1,205 @@
+;******************************************************************************
+;* x86-SIMD-optimized IDCT for prores
+;* this is identical to "simple" IDCT written by Michael Niedermayer
+;* except for the clip range
+;*
+;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
+;* Copyright (c) 2015 Christophe Gisquet
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+%if ARCH_X86_64
+
+SECTION_RODATA
+
+cextern pw_2
+cextern pw_16
+cextern pw_32
+cextern pw_1023
+cextern pw_4095
+pd_round_11: times 4 dd 1<<(11-1)
+pd_round_12: times 4 dd 1<<(12-1)
+pd_round_15: times 4 dd 1<<(15-1)
+pd_round_19: times 4 dd 1<<(19-1)
+pd_round_20: times 4 dd 1<<(20-1)
+
+%macro CONST_DEC  3
+const %1
+times 4 dw %2, %3
+%endmacro
+
+%define W1sh2 22725 ; W1 = 90901 = 22725<<2 + 1
+%define W2sh2 21407 ; W2 = 85627 = 21407<<2 - 1
+%define W3sh2 19265 ; W3 = 77062 = 19265<<2 + 2
+%define W4sh2 16384 ; W4 = 65535 = 16384<<2 - 1
+%define W3sh2_lo 19266
+%define W4sh2_lo 16383
+%define W5sh2 12873 ; W5 = 51491 = 12873<<2 - 1
+%define W6sh2  8867 ; W6 = 35468 =  8867<<2
+%define W7sh2  4520 ; W7 = 18081 =  4520<<2 + 1
+
+CONST_DEC  w4_plus_w2_hi,   W4sh2, +W2sh2
+CONST_DEC  w4_min_w2_hi,    W4sh2, -W2sh2
+CONST_DEC  w4_plus_w6_hi,   W4sh2, +W6sh2
+CONST_DEC  w4_min_w6_hi,    W4sh2, -W6sh2
+CONST_DEC  w1_plus_w3_hi,   W1sh2, +W3sh2
+CONST_DEC  w3_min_w1_hi,    W3sh2, -W1sh2
+CONST_DEC  w7_plus_w3_hi,   W7sh2, +W3sh2
+CONST_DEC  w3_min_w7_hi,    W3sh2, -W7sh2
+CONST_DEC  w1_plus_w5,   W1sh2, +W5sh2
+CONST_DEC  w5_min_w1,    W5sh2, -W1sh2
+CONST_DEC  w5_plus_w7,   W5sh2, +W7sh2
+CONST_DEC  w7_min_w5,    W7sh2, -W5sh2
+CONST_DEC  w4_plus_w2_lo,   W4sh2_lo, +W2sh2
+CONST_DEC  w4_min_w2_lo,    W4sh2_lo, -W2sh2
+CONST_DEC  w4_plus_w6_lo,   W4sh2_lo, +W6sh2
+CONST_DEC  w4_min_w6_lo,    W4sh2_lo, -W6sh2
+CONST_DEC  w1_plus_w3_lo,   W1sh2,    +W3sh2_lo
+CONST_DEC  w3_min_w1_lo,    W3sh2_lo, -W1sh2
+CONST_DEC  w7_plus_w3_lo,   W7sh2,    +W3sh2_lo
+CONST_DEC  w3_min_w7_lo,    W3sh2_lo, -W7sh2
+
+%include "libavcodec/x86/simple_idct10_template.asm"
+
+SECTION .text
+
+%macro STORE_HI_LO 12
+    movq   %1, %9
+    movq   %3, %10
+    movq   %5, %11
+    movq   %7, %12
+    movhps %2, %9
+    movhps %4, %10
+    movhps %6, %11
+    movhps %8, %12
+%endmacro
+
+%macro LOAD_ZXBW_8 16
+    pmovzxbw %1, %9
+    pmovzxbw %2, %10
+    pmovzxbw %3, %11
+    pmovzxbw %4, %12
+    pmovzxbw %5, %13
+    pmovzxbw %6, %14
+    pmovzxbw %7, %15
+    pmovzxbw %8, %16
+%endmacro
+
+%macro LOAD_ZXBW_4 9
+    movh %1, %5
+    movh %2, %6
+    movh %3, %7
+    movh %4, %8
+    punpcklbw %1, %9
+    punpcklbw %2, %9
+    punpcklbw %3, %9
+    punpcklbw %4, %9
+%endmacro
+
+%define PASS4ROWS(base, stride, stride3) \
+    [base], [base + stride], [base + 2*stride], [base + stride3]
+
+%macro idct_fn 0
+
+define_constants _lo
+
+cglobal simple_idct8, 1, 1, 16, 32, block
+    IDCT_FN    "", 11, pw_32, 20, "store"
+RET
+
+cglobal simple_idct8_put, 3, 4, 16, 32, pixels, lsize, block
+    IDCT_FN    "", 11, pw_32, 20
+    lea       r3, [3*lsizeq]
+    lea       r2, [pixelsq + r3]
+    packuswb  m8, m0
+    packuswb  m1, m2
+    packuswb  m4, m11
+    packuswb  m9, m10
+    STORE_HI_LO PASS8ROWS(pixelsq, r2, lsizeq, r3), m8, m1, m4, m9
+RET
+
+cglobal simple_idct8_add, 3, 4, 16, 32, pixels, lsize, block
+    IDCT_FN    "", 11, pw_32, 20
+    lea r2, [3*lsizeq]
+    %if cpuflag(sse4)
+        lea r3, [pixelsq + r2]
+        LOAD_ZXBW_8 m3, m5, m6, m7, m12, m13, m14, m15, PASS8ROWS(pixelsq, r3, lsizeq, r2)
+        paddsw m8, m3
+        paddsw m0, m5
+        paddsw m1, m6
+        paddsw m2, m7
+        paddsw m4, m12
+        paddsw m11, m13
+        paddsw m9, m14
+        paddsw m10, m15
+    %else
+        pxor m12, m12
+        LOAD_ZXBW_4 m3, m5, m6, m7, PASS4ROWS(pixelsq, lsizeq, r2), m12
+        paddsw m8, m3
+        paddsw m0, m5
+        paddsw m1, m6
+        paddsw m2, m7
+        lea r3, [pixelsq + 4*lsizeq]
+        LOAD_ZXBW_4 m3, m5, m6, m7, PASS4ROWS(r3, lsizeq, r2), m12
+        paddsw m4, m3
+        paddsw m11, m5
+        paddsw m9, m6
+        paddsw m10, m7
+        lea r3, [pixelsq + r2]
+    %endif
+    packuswb  m8, m0
+    packuswb  m1, m2
+    packuswb  m4, m11
+    packuswb  m9, m10
+    STORE_HI_LO PASS8ROWS(pixelsq, r3, lsizeq, r2), m8, m1, m4, m9
+RET
+
+define_constants _hi
+
+cglobal simple_idct10, 1, 1, 16, block
+    IDCT_FN    "", 12, "", 19, "store"
+    RET
+
+cglobal simple_idct10_put, 3, 3, 16, pixels, lsize, block
+    IDCT_FN    "", 12, "", 19, "put", 0, pw_1023
+    RET
+
+cglobal simple_idct12, 1, 1, 16, block
+    ; coeffs are already 15bits, adding the offset would cause
+    ; overflow in the input
+    IDCT_FN    "", 15, pw_2, 16, "store"
+    RET
+
+cglobal simple_idct12_put, 3, 3, 16, pixels, lsize, block
+    ; range isn't known, so the C simple_idct range is used
+    ; Also, using a bias on input overflows, so use the bias
+    ; on output of the first butterfly instead
+    IDCT_FN    "", 15, pw_2, 16, "put", 0, pw_4095
+    RET
+%endmacro
+
+INIT_XMM sse2
+idct_fn
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+idct_fn
+%endif
+
+%endif
diff --git a/media/ffvpx/libavcodec/x86/simple_idct10_template.asm b/media/ffvpx/libavcodec/x86/simple_idct10_template.asm
new file mode 100644
index 0000000000..0d04a9818a
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/simple_idct10_template.asm
@@ -0,0 +1,369 @@
+;******************************************************************************
+;* x86-SIMD-optimized IDCT for prores
+;* this is identical to "simple" IDCT written by Michael Niedermayer
+;* except for the clip range
+;*
+;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+; add SECTION_RODATA and proper include before including this file!
+
+%if ARCH_X86_64
+
+%macro define_constants 1
+    %undef w4_plus_w2
+    %undef w4_min_w2
+    %undef w4_plus_w6
+    %undef w4_min_w6
+    %undef w1_plus_w3
+    %undef w3_min_w1
+    %undef w7_plus_w3
+    %undef w3_min_w7
+    %define w4_plus_w2 w4_plus_w2%1
+    %define w4_min_w2  w4_min_w2%1
+    %define w4_plus_w6 w4_plus_w6%1
+    %define w4_min_w6  w4_min_w6%1
+    %define w1_plus_w3 w1_plus_w3%1
+    %define w3_min_w1  w3_min_w1%1
+    %define w7_plus_w3 w7_plus_w3%1
+    %define w3_min_w7  w3_min_w7%1
+%endmacro
+
+; interleave data while maintaining source
+; %1=type, %2=dstlo, %3=dsthi, %4=src, %5=interleave
+%macro SBUTTERFLY3 5
+    punpckl%1   m%2, m%4, m%5
+    punpckh%1   m%3, m%4, m%5
+%endmacro
+
+; %1/%2=src1/dst1, %3/%4=dst2, %5/%6=src2, %7=shift
+; action: %3/%4 = %1/%2 - %5/%6; %1/%2 += %5/%6
+;         %1/%2/%3/%4 >>= %7; dword -> word (in %1/%3)
+%macro SUMSUB_SHPK 7
+    psubd       %3,  %1,  %5       ; { a0 - b0 }[0-3]
+    psubd       %4,  %2,  %6       ; { a0 - b0 }[4-7]
+    paddd       %1,  %5            ; { a0 + b0 }[0-3]
+    paddd       %2,  %6            ; { a0 + b0 }[4-7]
+    psrad       %1,  %7
+    psrad       %2,  %7
+    psrad       %3,  %7
+    psrad       %4,  %7
+    packssdw    %1,  %2            ; row[0]
+    packssdw    %3,  %4            ; row[7]
+%endmacro
+
+; %1 = initial bias ("" if nop)
+; %2 = number of bits to shift at the end
+; %3 = qmat (for prores)
+%macro IDCT_1D 2-3
+    ; a0 = (W4 * row[0]) + (1 << (15 - 1));
+    ; a1 = a0;
+    ; a2 = a0;
+    ; a3 = a0;
+    ; a0 += W2 * row[2];
+    ; a1 += W6 * row[2];
+    ; a2 -= W6 * row[2];
+    ; a3 -= W2 * row[2];
+%ifstr %1
+    mova        m15, [pd_round_ %+ %2]
+%else
+    paddw       m10, [%1]
+%endif
+    SBUTTERFLY3 wd,  0,  1, 10,  8 ; { row[0], row[2] }[0-3]/[4-7]
+    pmaddwd     m2,  m0, [w4_plus_w6]
+    pmaddwd     m3,  m1, [w4_plus_w6]
+    pmaddwd     m4,  m0, [w4_min_w6]
+    pmaddwd     m5,  m1, [w4_min_w6]
+    pmaddwd     m6,  m0, [w4_min_w2]
+    pmaddwd     m7,  m1, [w4_min_w2]
+    pmaddwd     m0, [w4_plus_w2]
+    pmaddwd     m1, [w4_plus_w2]
+%ifstr %1
+    ; Adding 1<<(%2-1) for >=15 bits values
+    paddd       m2, m15
+    paddd       m3, m15
+    paddd       m4, m15
+    paddd       m5, m15
+    paddd       m6, m15
+    paddd       m7, m15
+    paddd       m0, m15
+    paddd       m1, m15
+%endif
+
+    ; a0: -1*row[0]-1*row[2]
+    ; a1: -1*row[0]
+    ; a2: -1*row[0]
+    ; a3: -1*row[0]+1*row[2]
+
+    ; a0 +=   W4*row[4] + W6*row[6]; i.e. -1*row[4]
+    ; a1 -=   W4*row[4] + W2*row[6]; i.e. -1*row[4]-1*row[6]
+    ; a2 -=   W4*row[4] - W2*row[6]; i.e. -1*row[4]+1*row[6]
+    ; a3 +=   W4*row[4] - W6*row[6]; i.e. -1*row[4]
+    SBUTTERFLY3 wd,  8,  9, 13, 12 ; { row[4], row[6] }[0-3]/[4-7]
+    pmaddwd     m10, m8, [w4_plus_w6]
+    pmaddwd     m11, m9, [w4_plus_w6]
+    paddd       m0,  m10            ; a0[0-3]
+    paddd       m1,  m11            ; a0[4-7]
+    pmaddwd     m10, m8, [w4_min_w6]
+    pmaddwd     m11, m9, [w4_min_w6]
+    paddd       m6,  m10           ; a3[0-3]
+    paddd       m7,  m11           ; a3[4-7]
+    pmaddwd     m10, m8, [w4_min_w2]
+    pmaddwd     m11, m9, [w4_min_w2]
+    pmaddwd     m8, [w4_plus_w2]
+    pmaddwd     m9, [w4_plus_w2]
+    psubd       m4,  m10           ; a2[0-3] intermediate
+    psubd       m5,  m11           ; a2[4-7] intermediate
+    psubd       m2,  m8            ; a1[0-3] intermediate
+    psubd       m3,  m9            ; a1[4-7] intermediate
+
+    ; load/store
+    mova   [blockq+  0], m0
+    mova   [blockq+ 32], m2
+    mova   [blockq+ 64], m4
+    mova   [blockq+ 96], m6
+    mova        m10,[blockq+ 16]       ; { row[1] }[0-7]
+    mova        m8, [blockq+ 48]       ; { row[3] }[0-7]
+    mova        m13,[blockq+ 80]       ; { row[5] }[0-7]
+    mova        m14,[blockq+112]       ; { row[7] }[0-7]
+    mova   [blockq+ 16], m1
+    mova   [blockq+ 48], m3
+    mova   [blockq+ 80], m5
+    mova   [blockq+112], m7
+%if %0 == 3
+    pmullw      m10,[%3+ 16]
+    pmullw      m8, [%3+ 48]
+    pmullw      m13,[%3+ 80]
+    pmullw      m14,[%3+112]
+%endif
+
+    ; b0 = MUL(W1, row[1]);
+    ; MAC(b0, W3, row[3]);
+    ; b1 = MUL(W3, row[1]);
+    ; MAC(b1, -W7, row[3]);
+    ; b2 = MUL(W5, row[1]);
+    ; MAC(b2, -W1, row[3]);
+    ; b3 = MUL(W7, row[1]);
+    ; MAC(b3, -W5, row[3]);
+    SBUTTERFLY3 wd,  0,  1, 10, 8  ; { row[1], row[3] }[0-3]/[4-7]
+    pmaddwd     m2,  m0, [w3_min_w7]
+    pmaddwd     m3,  m1, [w3_min_w7]
+    pmaddwd     m4,  m0, [w5_min_w1]
+    pmaddwd     m5,  m1, [w5_min_w1]
+    pmaddwd     m6,  m0, [w7_min_w5]
+    pmaddwd     m7,  m1, [w7_min_w5]
+    pmaddwd     m0, [w1_plus_w3]
+    pmaddwd     m1, [w1_plus_w3]
+
+    ; b0: +1*row[1]+2*row[3]
+    ; b1: +2*row[1]-1*row[3]
+    ; b2: -1*row[1]-1*row[3]
+    ; b3: +1*row[1]+1*row[3]
+
+    ; MAC(b0,  W5, row[5]);
+    ; MAC(b0,  W7, row[7]);
+    ; MAC(b1, -W1, row[5]);
+    ; MAC(b1, -W5, row[7]);
+    ; MAC(b2,  W7, row[5]);
+    ; MAC(b2,  W3, row[7]);
+    ; MAC(b3,  W3, row[5]);
+    ; MAC(b3, -W1, row[7]);
+    SBUTTERFLY3 wd,  8,  9, 13, 14 ; { row[5], row[7] }[0-3]/[4-7]
+
+    ; b0: -1*row[5]+1*row[7]
+    ; b1: -1*row[5]+1*row[7]
+    ; b2: +1*row[5]+2*row[7]
+    ; b3: +2*row[5]-1*row[7]
+
+    pmaddwd     m10, m8, [w1_plus_w5]
+    pmaddwd     m11, m9, [w1_plus_w5]
+    pmaddwd     m12, m8, [w5_plus_w7]
+    pmaddwd     m13, m9, [w5_plus_w7]
+    psubd       m2,  m10           ; b1[0-3]
+    psubd       m3,  m11           ; b1[4-7]
+    paddd       m0,  m12            ; b0[0-3]
+    paddd       m1,  m13            ; b0[4-7]
+    pmaddwd     m12, m8, [w7_plus_w3]
+    pmaddwd     m13, m9, [w7_plus_w3]
+    pmaddwd     m8, [w3_min_w1]
+    pmaddwd     m9, [w3_min_w1]
+    paddd       m4,  m12           ; b2[0-3]
+    paddd       m5,  m13           ; b2[4-7]
+    paddd       m6,  m8            ; b3[0-3]
+    paddd       m7,  m9            ; b3[4-7]
+
+    ; row[0] = (a0 + b0) >> 15;
+    ; row[7] = (a0 - b0) >> 15;
+    ; row[1] = (a1 + b1) >> 15;
+    ; row[6] = (a1 - b1) >> 15;
+    ; row[2] = (a2 + b2) >> 15;
+    ; row[5] = (a2 - b2) >> 15;
+    ; row[3] = (a3 + b3) >> 15;
+    ; row[4] = (a3 - b3) >> 15;
+    mova        m8, [blockq+ 0]        ; a0[0-3]
+    mova        m9, [blockq+16]        ; a0[4-7]
+    SUMSUB_SHPK m8,  m9,  m10, m11, m0,  m1,  %2
+    mova        m0, [blockq+32]        ; a1[0-3]
+    mova        m1, [blockq+48]        ; a1[4-7]
+    SUMSUB_SHPK m0,  m1,  m9,  m11, m2,  m3,  %2
+    mova        m1, [blockq+64]        ; a2[0-3]
+    mova        m2, [blockq+80]        ; a2[4-7]
+    SUMSUB_SHPK m1,  m2,  m11, m3,  m4,  m5,  %2
+    mova        m2, [blockq+96]        ; a3[0-3]
+    mova        m3, [blockq+112]       ; a3[4-7]
+    SUMSUB_SHPK m2,  m3,  m4,  m5,  m6,  m7,  %2
+%endmacro
+
+; void ff_prores_idct_put_10_<opt>(uint8_t *pixels, ptrdiff_t stride,
+;                                  int16_t *block, const int16_t *qmat);
+
+; %1 = row shift
+; %2 = row bias macro
+; %3 = column shift
+; %4 = column bias macro
+; %5 = final action (nothing, "store", "put", "add")
+; %6 = min pixel value
+; %7 = max pixel value
+; %8 = qmat (for prores)
+
+%macro IDCT_FN 4-8
+    ; for (i = 0; i < 8; i++)
+    ;     idctRowCondDC(block + i*8);
+    mova        m10,[blockq+ 0]        ; { row[0] }[0-7]
+    mova        m8, [blockq+32]        ; { row[2] }[0-7]
+    mova        m13,[blockq+64]        ; { row[4] }[0-7]
+    mova        m12,[blockq+96]        ; { row[6] }[0-7]
+
+%if %0 == 8
+    pmullw      m10,[%8+ 0]
+    pmullw      m8, [%8+32]
+    pmullw      m13,[%8+64]
+    pmullw      m12,[%8+96]
+
+    IDCT_1D     %1, %2, %8
+%elif %2 == 11
+    ; This copies the DC-only shortcut.  When there is only a DC coefficient the
+    ; C shifts the value and splats it to all coeffs rather than multiplying and
+    ; doing the full IDCT.  This causes a difference on 8-bit because the
+    ; coefficient is 16383 rather than 16384 (which you can get with shifting).
+    por      m1,  m8, m13
+    por      m1,  m12
+    por      m1, [blockq+ 16]       ; { row[1] }[0-7]
+    por      m1, [blockq+ 48]       ; { row[3] }[0-7]
+    por      m1, [blockq+ 80]       ; { row[5] }[0-7]
+    por      m1, [blockq+112]       ; { row[7] }[0-7]
+    pxor     m2,  m2
+    pcmpeqw  m1,  m2
+    psllw    m2,  m10, 3
+    pand     m2,  m1
+    pcmpeqb  m3,  m3
+    pxor     m1,  m3
+    mova    [rsp],    m1
+    mova    [rsp+16], m2
+
+    IDCT_1D  %1,  %2
+
+    mova     m5, [rsp]
+    mova     m6, [rsp+16]
+    pand     m8,  m5
+    por      m8,  m6
+    pand     m0,  m5
+    por      m0,  m6
+    pand     m1,  m5
+    por      m1,  m6
+    pand     m2,  m5
+    por      m2,  m6
+    pand     m4,  m5
+    por      m4,  m6
+    pand     m11, m5
+    por      m11, m6
+    pand     m9,  m5
+    por      m9,  m6
+    pand     m10, m5
+    por      m10, m6
+%else
+    IDCT_1D     %1, %2
+%endif
+
+    ; transpose for second part of IDCT
+    TRANSPOSE8x8W 8, 0, 1, 2, 4, 11, 9, 10, 3
+    mova   [blockq+ 16], m0
+    mova   [blockq+ 48], m2
+    mova   [blockq+ 80], m11
+    mova   [blockq+112], m10
+    SWAP         8,  10
+    SWAP         1,   8
+    SWAP         4,  13
+    SWAP         9,  12
+
+    ; for (i = 0; i < 8; i++)
+    ;     idctSparseColAdd(dest + i, line_size, block + i);
+    IDCT_1D     %3, %4
+
+    ; clip/store
+%if %0 >= 5
+%ifidn %5,"store"
+    ; No clamping, means pure idct
+    mova  [blockq+  0], m8
+    mova  [blockq+ 16], m0
+    mova  [blockq+ 32], m1
+    mova  [blockq+ 48], m2
+    mova  [blockq+ 64], m4
+    mova  [blockq+ 80], m11
+    mova  [blockq+ 96], m9
+    mova  [blockq+112], m10
+%elifidn %5,"put"
+%ifidn %6, 0
+    pxor        m3, m3
+%else
+    mova        m3, [%6]
+%endif ; ifidn %6, 0
+    mova        m5, [%7]
+    pmaxsw      m8,  m3
+    pmaxsw      m0,  m3
+    pmaxsw      m1,  m3
+    pmaxsw      m2,  m3
+    pmaxsw      m4,  m3
+    pmaxsw      m11, m3
+    pmaxsw      m9,  m3
+    pmaxsw      m10, m3
+    pminsw      m8,  m5
+    pminsw      m0,  m5
+    pminsw      m1,  m5
+    pminsw      m2,  m5
+    pminsw      m4,  m5
+    pminsw      m11, m5
+    pminsw      m9,  m5
+    pminsw      m10, m5
+
+    lea         r2, [r1*3]
+    mova  [r0     ], m8
+    mova  [r0+r1  ], m0
+    mova  [r0+r1*2], m1
+    mova  [r0+r2  ], m2
+    lea         r0, [r0+r1*4]
+    mova  [r0     ], m4
+    mova  [r0+r1  ], m11
+    mova  [r0+r1*2], m9
+    mova  [r0+r2  ], m10
+%endif ; %5 action
+%endif; if %0 >= 5
+%endmacro
+
+%endif
diff --git a/media/ffvpx/libavcodec/x86/videodsp.asm b/media/ffvpx/libavcodec/x86/videodsp.asm
new file mode 100644
index 0000000000..3cc07878d3
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/videodsp.asm
@@ -0,0 +1,436 @@
+;******************************************************************************
+;* Core video DSP functions
+;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+; slow vertical extension loop function. Works with variable-width, and
+; does per-line reading/writing of source data
+
+%macro V_COPY_ROW 2 ; type (top/body/bottom), h
+.%1_y_loop:                                     ; do {
+    mov              wq, r7mp                   ;   initialize w (r7mp = wmp)
+.%1_x_loop:                                     ;   do {
+    movu             m0, [srcq+wq]              ;     m0 = read($mmsize)
+    movu      [dstq+wq], m0                     ;     write(m0, $mmsize)
+    add              wq, mmsize                 ;     w -= $mmsize
+    cmp              wq, -mmsize                ;   } while (w > $mmsize);
+    jl .%1_x_loop
+    movu             m0, [srcq-mmsize]          ;     m0 = read($mmsize)
+    movu  [dstq-mmsize], m0                     ;     write(m0, $mmsize)
+%ifidn %1, body                                 ;   if ($type == body) {
+    add            srcq, src_strideq            ;     src += src_stride
+%endif                                          ;   }
+    add            dstq, dst_strideq            ;   dst += dst_stride
+    dec              %2                         ; } while (--$h);
+    jnz .%1_y_loop
+%endmacro
+
+; .----. <- zero
+; |    |    <- top is copied from first line in body of source
+; |----| <- start_y
+; |    |    <- body is copied verbatim (line-by-line) from source
+; |----| <- end_y
+; |    |    <- bottom is copied from last line in body of source
+; '----' <- bh
+INIT_XMM sse
+%if ARCH_X86_64
+cglobal emu_edge_vvar, 7, 8, 1, dst, dst_stride, src, src_stride, \
+                                start_y, end_y, bh, w
+%else ; x86-32
+cglobal emu_edge_vvar, 1, 6, 1, dst, src, start_y, end_y, bh, w
+%define src_strideq r3mp
+%define dst_strideq r1mp
+    mov            srcq, r2mp
+    mov        start_yq, r4mp
+    mov          end_yq, r5mp
+    mov             bhq, r6mp
+%endif
+    sub             bhq, end_yq                 ; bh    -= end_q
+    sub          end_yq, start_yq               ; end_q -= start_q
+    add            srcq, r7mp                   ; (r7mp = wmp)
+    add            dstq, r7mp                   ; (r7mp = wmp)
+    neg            r7mp                         ; (r7mp = wmp)
+    test       start_yq, start_yq               ; if (start_q) {
+    jz .body
+    V_COPY_ROW      top, start_yq               ;   v_copy_row(top, start_yq)
+.body:                                          ; }
+    V_COPY_ROW     body, end_yq                 ; v_copy_row(body, end_yq)
+    test            bhq, bhq                    ; if (bh) {
+    jz .end
+    sub            srcq, src_strideq            ;   src -= src_stride
+    V_COPY_ROW   bottom, bhq                    ;   v_copy_row(bottom, bh)
+.end:                                           ; }
+    RET
+
+%macro hvar_fn 0
+cglobal emu_edge_hvar, 5, 6, 1, dst, dst_stride, start_x, n_words, h, w
+    lea            dstq, [dstq+n_wordsq*2]
+    neg        n_wordsq
+    lea        start_xq, [start_xq+n_wordsq*2]
+.y_loop:                                        ; do {
+%if cpuflag(avx2)
+    vpbroadcastb     m0, [dstq+start_xq]
+    mov              wq, n_wordsq               ;   initialize w
+%else
+    movzx            wd, byte [dstq+start_xq]   ;   w = read(1)
+    imul             wd, 0x01010101             ;   w *= 0x01010101
+    movd             m0, wd
+    mov              wq, n_wordsq               ;   initialize w
+    pshufd           m0, m0, q0000              ;   splat
+%endif ; avx2
+.x_loop:                                        ;   do {
+    movu    [dstq+wq*2], m0                     ;     write($reg, $mmsize)
+    add              wq, mmsize/2               ;     w -= $mmsize/2
+    cmp              wq, -(mmsize/2)            ;   } while (w > $mmsize/2)
+    jl .x_loop
+    movu  [dstq-mmsize], m0                     ;   write($reg, $mmsize)
+    add            dstq, dst_strideq            ;   dst += dst_stride
+    dec              hq                         ; } while (h--)
+    jnz .y_loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+hvar_fn
+
+%if HAVE_AVX2_EXTERNAL
+INIT_XMM avx2
+hvar_fn
+%endif
+
+; macro to read/write a horizontal number of pixels (%2) to/from registers
+; on sse, - fills xmm0-15 for consecutive sets of 16 pixels
+;         - if (%2 & 8)  fills 8 bytes into xmm$next
+;         - if (%2 & 4)  fills 4 bytes into xmm$next
+;         - if (%2 & 3)  fills 1, 2 or 4 bytes in eax
+; on mmx, - fills mm0-7 for consecutive sets of 8 pixels
+;         - if (%2 & 4)  fills 4 bytes into mm$next
+;         - if (%2 & 3)  fills 1, 2 or 4 bytes in eax
+; writing data out is in the same way
+%macro READ_NUM_BYTES 2
+%assign %%off 0     ; offset in source buffer
+%assign %%mmx_idx 0 ; mmx register index
+%assign %%xmm_idx 0 ; xmm register index
+
+%rep %2/mmsize
+%if mmsize == 16
+    movu   xmm %+ %%xmm_idx, [srcq+%%off]
+%assign %%xmm_idx %%xmm_idx+1
+%else ; mmx
+    movu    mm %+ %%mmx_idx, [srcq+%%off]
+%assign %%mmx_idx %%mmx_idx+1
+%endif
+%assign %%off %%off+mmsize
+%endrep ; %2/mmsize
+
+%if mmsize == 16
+%if (%2-%%off) >= 8
+%if %2 > 16 && (%2-%%off) > 8
+    movu   xmm %+ %%xmm_idx, [srcq+%2-16]
+%assign %%xmm_idx %%xmm_idx+1
+%assign %%off %2
+%else
+    movq    mm %+ %%mmx_idx, [srcq+%%off]
+%assign %%mmx_idx %%mmx_idx+1
+%assign %%off %%off+8
+%endif
+%endif ; (%2-%%off) >= 8
+%endif
+
+%if (%2-%%off) >= 4
+%if %2 > 8 && (%2-%%off) > 4
+    movq    mm %+ %%mmx_idx, [srcq+%2-8]
+%assign %%off %2
+%else
+    movd    mm %+ %%mmx_idx, [srcq+%%off]
+%assign %%off %%off+4
+%endif
+%assign %%mmx_idx %%mmx_idx+1
+%endif ; (%2-%%off) >= 4
+
+%if (%2-%%off) >= 1
+%if %2 >= 4
+    movd mm %+ %%mmx_idx, [srcq+%2-4]
+%elif (%2-%%off) == 1
+    mov            valb, [srcq+%2-1]
+%elif (%2-%%off) == 2
+    mov            valw, [srcq+%2-2]
+%else
+    mov            valb, [srcq+%2-1]
+    ror            vald, 16
+    mov            valw, [srcq+%2-3]
+%endif
+%endif ; (%2-%%off) >= 1
+%endmacro ; READ_NUM_BYTES
+
+%macro WRITE_NUM_BYTES 2
+%assign %%off 0     ; offset in destination buffer
+%assign %%mmx_idx 0 ; mmx register index
+%assign %%xmm_idx 0 ; xmm register index
+
+%rep %2/mmsize
+%if mmsize == 16
+    movu   [dstq+%%off], xmm %+ %%xmm_idx
+%assign %%xmm_idx %%xmm_idx+1
+%else ; mmx
+    movu   [dstq+%%off], mm %+ %%mmx_idx
+%assign %%mmx_idx %%mmx_idx+1
+%endif
+%assign %%off %%off+mmsize
+%endrep ; %2/mmsize
+
+%if mmsize == 16
+%if (%2-%%off) >= 8
+%if %2 > 16 && (%2-%%off) > 8
+    movu   [dstq+%2-16], xmm %+ %%xmm_idx
+%assign %%xmm_idx %%xmm_idx+1
+%assign %%off %2
+%else
+    movq   [dstq+%%off], mm %+ %%mmx_idx
+%assign %%mmx_idx %%mmx_idx+1
+%assign %%off %%off+8
+%endif
+%endif ; (%2-%%off) >= 8
+%endif
+
+%if (%2-%%off) >= 4
+%if %2 > 8 && (%2-%%off) > 4
+    movq    [dstq+%2-8], mm %+ %%mmx_idx
+%assign %%off %2
+%else
+    movd   [dstq+%%off], mm %+ %%mmx_idx
+%assign %%off %%off+4
+%endif
+%assign %%mmx_idx %%mmx_idx+1
+%endif ; (%2-%%off) >= 4
+
+%if (%2-%%off) >= 1
+%if %2 >= 4
+    movd    [dstq+%2-4], mm %+ %%mmx_idx
+%elif (%2-%%off) == 1
+    mov     [dstq+%2-1], valb
+%elif (%2-%%off) == 2
+    mov     [dstq+%2-2], valw
+%else
+    mov     [dstq+%2-3], valw
+    ror            vald, 16
+    mov     [dstq+%2-1], valb
+%ifnidn %1, body
+    ror            vald, 16
+%endif
+%endif
+%endif ; (%2-%%off) >= 1
+%endmacro ; WRITE_NUM_BYTES
+
+; vertical top/bottom extend and body copy fast loops
+; these are function pointers to set-width line copy functions, i.e.
+; they read a fixed number of pixels into set registers, and write
+; those out into the destination buffer
+%macro VERTICAL_EXTEND 2
+%assign %%n %1
+%rep 1+%2-%1
+%if %%n <= 3
+%if ARCH_X86_64
+cglobal emu_edge_vfix %+ %%n, 6, 8, 0, dst, dst_stride, src, src_stride, \
+                                       start_y, end_y, val, bh
+    mov             bhq, r6mp                   ; r6mp = bhmp
+%else ; x86-32
+cglobal emu_edge_vfix %+ %%n, 0, 6, 0, val, dst, src, start_y, end_y, bh
+    mov            dstq, r0mp
+    mov            srcq, r2mp
+    mov        start_yq, r4mp
+    mov          end_yq, r5mp
+    mov             bhq, r6mp
+%define dst_strideq r1mp
+%define src_strideq r3mp
+%endif ; x86-64/32
+%else
+%if ARCH_X86_64
+cglobal emu_edge_vfix %+ %%n, 7, 7, 1, dst, dst_stride, src, src_stride, \
+                                       start_y, end_y, bh
+%else ; x86-32
+cglobal emu_edge_vfix %+ %%n, 1, 5, 1, dst, src, start_y, end_y, bh
+    mov            srcq, r2mp
+    mov        start_yq, r4mp
+    mov          end_yq, r5mp
+    mov             bhq, r6mp
+%define dst_strideq r1mp
+%define src_strideq r3mp
+%endif ; x86-64/32
+%endif
+    ; FIXME move this to c wrapper?
+    sub             bhq, end_yq                 ; bh    -= end_y
+    sub          end_yq, start_yq               ; end_y -= start_y
+
+    ; extend pixels above body
+    test       start_yq, start_yq               ; if (start_y) {
+    jz .body_loop
+    READ_NUM_BYTES  top, %%n                    ;   $variable_regs = read($n)
+.top_loop:                                      ;   do {
+    WRITE_NUM_BYTES top, %%n                    ;     write($variable_regs, $n)
+    add            dstq, dst_strideq            ;     dst += linesize
+    dec        start_yq                         ;   } while (--start_y)
+    jnz .top_loop                               ; }
+
+    ; copy body pixels
+.body_loop:                                     ; do {
+    READ_NUM_BYTES  body, %%n                   ;   $variable_regs = read($n)
+    WRITE_NUM_BYTES body, %%n                   ;   write($variable_regs, $n)
+    add            dstq, dst_strideq            ;   dst += dst_stride
+    add            srcq, src_strideq            ;   src += src_stride
+    dec          end_yq                         ; } while (--end_y)
+    jnz .body_loop
+
+    ; copy bottom pixels
+    test            bhq, bhq                    ; if (block_h) {
+    jz .end
+    sub            srcq, src_strideq            ;   src -= linesize
+    READ_NUM_BYTES  bottom, %%n                 ;   $variable_regs = read($n)
+.bottom_loop:                                   ;   do {
+    WRITE_NUM_BYTES bottom, %%n                 ;     write($variable_regs, $n)
+    add            dstq, dst_strideq            ;     dst += linesize
+    dec             bhq                         ;   } while (--bh)
+    jnz .bottom_loop                            ; }
+
+.end:
+    RET
+%assign %%n %%n+1
+%endrep ; 1+%2-%1
+%endmacro ; VERTICAL_EXTEND
+
+INIT_MMX mmx
+VERTICAL_EXTEND 1, 15
+
+INIT_XMM sse
+VERTICAL_EXTEND 16, 22
+
+; left/right (horizontal) fast extend functions
+; these are essentially identical to the vertical extend ones above,
+; just left/right separated because number of pixels to extend is
+; obviously not the same on both sides.
+
+%macro READ_V_PIXEL 2
+%if cpuflag(avx2)
+    vpbroadcastb     m0, %2
+%else
+    movzx          vald, byte %2
+    imul           vald, 0x01010101
+%if %1 >= 8
+    movd             m0, vald
+%if mmsize == 16
+    pshufd           m0, m0, q0000
+%else
+    punpckldq        m0, m0
+%endif ; mmsize == 16
+%endif ; %1 > 16
+%endif ; avx2
+%endmacro ; READ_V_PIXEL
+
+%macro WRITE_V_PIXEL 2
+%assign %%off 0
+
+%if %1 >= 8
+
+%rep %1/mmsize
+    movu     [%2+%%off], m0
+%assign %%off %%off+mmsize
+%endrep ; %1/mmsize
+
+%if mmsize == 16
+%if %1-%%off >= 8
+%if %1 > 16 && %1-%%off > 8
+    movu     [%2+%1-16], m0
+%assign %%off %1
+%else
+    movq     [%2+%%off], m0
+%assign %%off %%off+8
+%endif
+%endif ; %1-%%off >= 8
+%endif ; mmsize == 16
+
+%if %1-%%off >= 4
+%if %1 > 8 && %1-%%off > 4
+    movq      [%2+%1-8], m0
+%assign %%off %1
+%else
+    movd     [%2+%%off], m0
+%assign %%off %%off+4
+%endif
+%endif ; %1-%%off >= 4
+
+%else ; %1 < 8
+
+%rep %1/4
+    mov      [%2+%%off], vald
+%assign %%off %%off+4
+%endrep ; %1/4
+
+%endif ; %1 >=/< 8
+
+%if %1-%%off == 2
+%if cpuflag(avx2)
+    movd     [%2+%%off-2], m0
+%else
+    mov      [%2+%%off], valw
+%endif ; avx2
+%endif ; (%1-%%off)/2
+%endmacro ; WRITE_V_PIXEL
+
+%macro H_EXTEND 2
+%assign %%n %1
+%rep 1+(%2-%1)/2
+%if cpuflag(avx2)
+cglobal emu_edge_hfix %+ %%n, 4, 4, 1, dst, dst_stride, start_x, bh
+%else
+cglobal emu_edge_hfix %+ %%n, 4, 5, 1, dst, dst_stride, start_x, bh, val
+%endif
+.loop_y:                                        ; do {
+    READ_V_PIXEL    %%n, [dstq+start_xq]        ;   $variable_regs = read($n)
+    WRITE_V_PIXEL   %%n, dstq                   ;   write($variable_regs, $n)
+    add            dstq, dst_strideq            ;   dst += dst_stride
+    dec             bhq                         ; } while (--bh)
+    jnz .loop_y
+    RET
+%assign %%n %%n+2
+%endrep ; 1+(%2-%1)/2
+%endmacro ; H_EXTEND
+
+INIT_MMX mmx
+H_EXTEND 2, 14
+
+INIT_XMM sse2
+H_EXTEND 16, 22
+
+%if HAVE_AVX2_EXTERNAL
+INIT_XMM avx2
+H_EXTEND 8, 22
+%endif
+
+INIT_MMX mmxext
+cglobal prefetch, 3, 3, 0, buf, stride, h
+.loop:
+    prefetcht0 [bufq]
+    add      bufq, strideq
+    dec        hd
+    jg .loop
+    RET
diff --git a/media/ffvpx/libavcodec/x86/videodsp_init.c b/media/ffvpx/libavcodec/x86/videodsp_init.c
new file mode 100644
index 0000000000..ae9db95624
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/videodsp_init.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (C) 2002-2012 Michael Niedermayer
+ * Copyright (C) 2012 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/videodsp.h"
+
+#if HAVE_X86ASM
+typedef void emu_edge_vfix_func(uint8_t *dst, x86_reg dst_stride,
+                                const uint8_t *src, x86_reg src_stride,
+                                x86_reg start_y, x86_reg end_y, x86_reg bh);
+typedef void emu_edge_vvar_func(uint8_t *dst, x86_reg dst_stride,
+                                const uint8_t *src, x86_reg src_stride,
+                                x86_reg start_y, x86_reg end_y, x86_reg bh,
+                                x86_reg w);
+
+extern emu_edge_vfix_func ff_emu_edge_vfix1_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix2_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix3_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix4_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix5_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix6_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix7_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix8_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix9_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix10_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix11_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix12_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix13_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix14_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix15_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix16_sse;
+extern emu_edge_vfix_func ff_emu_edge_vfix17_sse;
+extern emu_edge_vfix_func ff_emu_edge_vfix18_sse;
+extern emu_edge_vfix_func ff_emu_edge_vfix19_sse;
+extern emu_edge_vfix_func ff_emu_edge_vfix20_sse;
+extern emu_edge_vfix_func ff_emu_edge_vfix21_sse;
+extern emu_edge_vfix_func ff_emu_edge_vfix22_sse;
+static emu_edge_vfix_func * const vfixtbl_sse[22] = {
+    ff_emu_edge_vfix1_mmx,  ff_emu_edge_vfix2_mmx,  ff_emu_edge_vfix3_mmx,
+    ff_emu_edge_vfix4_mmx,  ff_emu_edge_vfix5_mmx,  ff_emu_edge_vfix6_mmx,
+    ff_emu_edge_vfix7_mmx,  ff_emu_edge_vfix8_mmx,  ff_emu_edge_vfix9_mmx,
+    ff_emu_edge_vfix10_mmx, ff_emu_edge_vfix11_mmx, ff_emu_edge_vfix12_mmx,
+    ff_emu_edge_vfix13_mmx, ff_emu_edge_vfix14_mmx, ff_emu_edge_vfix15_mmx,
+    ff_emu_edge_vfix16_sse, ff_emu_edge_vfix17_sse, ff_emu_edge_vfix18_sse,
+    ff_emu_edge_vfix19_sse, ff_emu_edge_vfix20_sse, ff_emu_edge_vfix21_sse,
+    ff_emu_edge_vfix22_sse
+};
+extern emu_edge_vvar_func ff_emu_edge_vvar_sse;
+
+typedef void emu_edge_hfix_func(uint8_t *dst, x86_reg dst_stride,
+                                x86_reg start_x, x86_reg bh);
+typedef void emu_edge_hvar_func(uint8_t *dst, x86_reg dst_stride,
+                                x86_reg start_x, x86_reg n_words, x86_reg bh);
+
+extern emu_edge_hfix_func ff_emu_edge_hfix2_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix4_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix6_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix8_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix10_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix12_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix14_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix16_sse2;
+extern emu_edge_hfix_func ff_emu_edge_hfix18_sse2;
+extern emu_edge_hfix_func ff_emu_edge_hfix20_sse2;
+extern emu_edge_hfix_func ff_emu_edge_hfix22_sse2;
+static emu_edge_hfix_func * const hfixtbl_sse2[11] = {
+    ff_emu_edge_hfix2_mmx,  ff_emu_edge_hfix4_mmx,  ff_emu_edge_hfix6_mmx,
+    ff_emu_edge_hfix8_mmx,  ff_emu_edge_hfix10_mmx, ff_emu_edge_hfix12_mmx,
+    ff_emu_edge_hfix14_mmx, ff_emu_edge_hfix16_sse2, ff_emu_edge_hfix18_sse2,
+    ff_emu_edge_hfix20_sse2, ff_emu_edge_hfix22_sse2
+};
+extern emu_edge_hvar_func ff_emu_edge_hvar_sse2;
+#if HAVE_AVX2_EXTERNAL
+extern emu_edge_hfix_func ff_emu_edge_hfix8_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix10_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix12_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix14_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix16_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix18_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix20_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix22_avx2;
+static emu_edge_hfix_func * const hfixtbl_avx2[11] = {
+    ff_emu_edge_hfix2_mmx,  ff_emu_edge_hfix4_mmx,  ff_emu_edge_hfix6_mmx,
+    ff_emu_edge_hfix8_avx2,  ff_emu_edge_hfix10_avx2, ff_emu_edge_hfix12_avx2,
+    ff_emu_edge_hfix14_avx2, ff_emu_edge_hfix16_avx2, ff_emu_edge_hfix18_avx2,
+    ff_emu_edge_hfix20_avx2, ff_emu_edge_hfix22_avx2
+};
+extern emu_edge_hvar_func ff_emu_edge_hvar_avx2;
+#endif
+
+static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src,
+                                              ptrdiff_t dst_stride,
+                                              ptrdiff_t src_stride,
+                                              x86_reg block_w, x86_reg block_h,
+                                              x86_reg src_x, x86_reg src_y,
+                                              x86_reg w, x86_reg h,
+                                              emu_edge_vfix_func * const *vfix_tbl,
+                                              emu_edge_vvar_func *v_extend_var,
+                                              emu_edge_hfix_func * const *hfix_tbl,
+                                              emu_edge_hvar_func *h_extend_var)
+{
+    x86_reg start_y, start_x, end_y, end_x, src_y_add = 0, p;
+
+    if (!w || !h)
+        return;
+
+    av_assert2(block_w <= FFABS(dst_stride));
+
+    if (src_y >= h) {
+        src -= src_y*src_stride;
+        src_y_add = h - 1;
+        src_y     = h - 1;
+    } else if (src_y <= -block_h) {
+        src -= src_y*src_stride;
+        src_y_add = 1 - block_h;
+        src_y     = 1 - block_h;
+    }
+    if (src_x >= w) {
+        src   += w - 1 - src_x;
+        src_x  = w - 1;
+    } else if (src_x <= -block_w) {
+        src   += 1 - block_w - src_x;
+        src_x  = 1 - block_w;
+    }
+
+    start_y = FFMAX(0, -src_y);
+    start_x = FFMAX(0, -src_x);
+    end_y   = FFMIN(block_h, h-src_y);
+    end_x   = FFMIN(block_w, w-src_x);
+    av_assert2(start_x < end_x && block_w > 0);
+    av_assert2(start_y < end_y && block_h > 0);
+
+    // fill in the to-be-copied part plus all above/below
+    src += (src_y_add + start_y) * src_stride + start_x;
+    w = end_x - start_x;
+    if (w <= 22) {
+        vfix_tbl[w - 1](dst + start_x, dst_stride, src, src_stride,
+                        start_y, end_y, block_h);
+    } else {
+        v_extend_var(dst + start_x, dst_stride, src, src_stride,
+                     start_y, end_y, block_h, w);
+    }
+
+    // fill left
+    if (start_x) {
+        if (start_x <= 22) {
+            hfix_tbl[(start_x - 1) >> 1](dst, dst_stride, start_x, block_h);
+        } else {
+            h_extend_var(dst, dst_stride,
+                         start_x, (start_x + 1) >> 1, block_h);
+        }
+    }
+
+    // fill right
+    p = block_w - end_x;
+    if (p) {
+        if (p <= 22) {
+            hfix_tbl[(p - 1) >> 1](dst + end_x - (p & 1), dst_stride,
+                                   -!(p & 1), block_h);
+        } else {
+            h_extend_var(dst + end_x - (p & 1), dst_stride,
+                         -!(p & 1), (p + 1) >> 1, block_h);
+        }
+    }
+}
+
+static av_noinline void emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src,
+                                              ptrdiff_t buf_stride,
+                                              ptrdiff_t src_stride,
+                                              int block_w, int block_h,
+                                              int src_x, int src_y, int w,
+                                              int h)
+{
+    emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
+                     src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
+                     hfixtbl_sse2, &ff_emu_edge_hvar_sse2);
+}
+
+#if HAVE_AVX2_EXTERNAL
+static av_noinline void emulated_edge_mc_avx2(uint8_t *buf, const uint8_t *src,
+                                              ptrdiff_t buf_stride,
+                                              ptrdiff_t src_stride,
+                                              int block_w, int block_h,
+                                              int src_x, int src_y, int w,
+                                              int h)
+{
+    emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
+                     src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
+                     hfixtbl_avx2, &ff_emu_edge_hvar_avx2);
+}
+#endif /* HAVE_AVX2_EXTERNAL */
+#endif /* HAVE_X86ASM */
+
+void ff_prefetch_mmxext(const uint8_t *buf, ptrdiff_t stride, int h);
+
+av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc)
+{
+#if HAVE_X86ASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        ctx->prefetch = ff_prefetch_mmxext;
+    }
+    if (EXTERNAL_SSE2(cpu_flags) && bpc <= 8) {
+        ctx->emulated_edge_mc = emulated_edge_mc_sse2;
+    }
+#if HAVE_AVX2_EXTERNAL
+    if (EXTERNAL_AVX2(cpu_flags) && bpc <= 8) {
+        ctx->emulated_edge_mc = emulated_edge_mc_avx2;
+    }
+#endif
+#endif /* HAVE_X86ASM */
+}
diff --git a/media/ffvpx/libavcodec/x86/vp56_arith.h b/media/ffvpx/libavcodec/x86/vp56_arith.h
new file mode 100644
index 0000000000..9f7639980c
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp56_arith.h
@@ -0,0 +1,53 @@
+/**
+ * VP5 and VP6 compatible video decoder (arith decoder)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2010  Eli Friedman
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_VP56_ARITH_H
+#define AVCODEC_X86_VP56_ARITH_H
+
+#if HAVE_INLINE_ASM && HAVE_FAST_CMOV && HAVE_6REGS
+#include "libavutil/attributes.h"
+
+#define vp56_rac_get_prob vp56_rac_get_prob
+static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
+{
+    unsigned int code_word = vp56_rac_renorm(c);
+    unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
+    unsigned int low_shift = low << 16;
+    int bit = 0;
+    c->code_word = code_word;
+
+    __asm__(
+        "subl  %4, %1      \n\t"
+        "subl  %3, %2      \n\t"
+        "setae %b0         \n\t"
+        "cmovb %4, %1      \n\t"
+        "cmovb %5, %2      \n\t"
+        : "+q"(bit), "+&r"(c->high), "+&r"(c->code_word)
+        : "r"(low_shift), "r"(low), "r"(code_word)
+    );
+
+    return bit;
+}
+#endif
+
+#endif /* AVCODEC_X86_VP56_ARITH_H */
diff --git a/media/ffvpx/libavcodec/x86/vp8dsp.asm b/media/ffvpx/libavcodec/x86/vp8dsp.asm
new file mode 100644
index 0000000000..6ac5a7721b
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp8dsp.asm
@@ -0,0 +1,1116 @@
+;******************************************************************************
+;* VP8 MMXEXT optimizations
+;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
+;* Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+fourtap_filter_hw_m: times 4 dw  -6, 123
+                     times 4 dw  12,  -1
+                     times 4 dw  -9,  93
+                     times 4 dw  50,  -6
+                     times 4 dw  -6,  50
+                     times 4 dw  93,  -9
+                     times 4 dw  -1,  12
+                     times 4 dw 123,  -6
+
+sixtap_filter_hw_m:  times 4 dw   2, -11
+                     times 4 dw 108,  36
+                     times 4 dw  -8,   1
+                     times 4 dw   3, -16
+                     times 4 dw  77,  77
+                     times 4 dw -16,   3
+                     times 4 dw   1,  -8
+                     times 4 dw  36, 108
+                     times 4 dw -11,   2
+
+fourtap_filter_hb_m: times 8 db  -6, 123
+                     times 8 db  12,  -1
+                     times 8 db  -9,  93
+                     times 8 db  50,  -6
+                     times 8 db  -6,  50
+                     times 8 db  93,  -9
+                     times 8 db  -1,  12
+                     times 8 db 123,  -6
+
+sixtap_filter_hb_m:  times 8 db   2,   1
+                     times 8 db -11, 108
+                     times 8 db  36,  -8
+                     times 8 db   3,   3
+                     times 8 db -16,  77
+                     times 8 db  77, -16
+                     times 8 db   1,   2
+                     times 8 db  -8,  36
+                     times 8 db 108, -11
+
+fourtap_filter_v_m:  times 8 dw  -6
+                     times 8 dw 123
+                     times 8 dw  12
+                     times 8 dw  -1
+                     times 8 dw  -9
+                     times 8 dw  93
+                     times 8 dw  50
+                     times 8 dw  -6
+                     times 8 dw  -6
+                     times 8 dw  50
+                     times 8 dw  93
+                     times 8 dw  -9
+                     times 8 dw  -1
+                     times 8 dw  12
+                     times 8 dw 123
+                     times 8 dw  -6
+
+sixtap_filter_v_m:   times 8 dw   2
+                     times 8 dw -11
+                     times 8 dw 108
+                     times 8 dw  36
+                     times 8 dw  -8
+                     times 8 dw   1
+                     times 8 dw   3
+                     times 8 dw -16
+                     times 8 dw  77
+                     times 8 dw  77
+                     times 8 dw -16
+                     times 8 dw   3
+                     times 8 dw   1
+                     times 8 dw  -8
+                     times 8 dw  36
+                     times 8 dw 108
+                     times 8 dw -11
+                     times 8 dw   2
+
+bilinear_filter_vw_m: times 8 dw 1
+                      times 8 dw 2
+                      times 8 dw 3
+                      times 8 dw 4
+                      times 8 dw 5
+                      times 8 dw 6
+                      times 8 dw 7
+
+bilinear_filter_vb_m: times 8 db 7, 1
+                      times 8 db 6, 2
+                      times 8 db 5, 3
+                      times 8 db 4, 4
+                      times 8 db 3, 5
+                      times 8 db 2, 6
+                      times 8 db 1, 7
+
+%ifdef PIC
+%define fourtap_filter_hw  picregq
+%define sixtap_filter_hw   picregq
+%define fourtap_filter_hb  picregq
+%define sixtap_filter_hb   picregq
+%define fourtap_filter_v   picregq
+%define sixtap_filter_v    picregq
+%define bilinear_filter_vw picregq
+%define bilinear_filter_vb picregq
+%define npicregs 1
+%else
+%define fourtap_filter_hw  fourtap_filter_hw_m
+%define sixtap_filter_hw   sixtap_filter_hw_m
+%define fourtap_filter_hb  fourtap_filter_hb_m
+%define sixtap_filter_hb   sixtap_filter_hb_m
+%define fourtap_filter_v   fourtap_filter_v_m
+%define sixtap_filter_v    sixtap_filter_v_m
+%define bilinear_filter_vw bilinear_filter_vw_m
+%define bilinear_filter_vb bilinear_filter_vb_m
+%define npicregs 0
+%endif
+
+filter_h2_shuf:  db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,  6, 6,  7,  7,  8
+filter_h4_shuf:  db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,  8, 8,  9,  9, 10
+
+filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11,  7, 12
+filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,  7, 7,  8,  8,  9
+filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,  9, 9, 10, 10, 11
+
+pw_20091: times 4 dw 20091
+pw_17734: times 4 dw 17734
+
+cextern pw_3
+cextern pw_4
+cextern pw_64
+cextern pw_256
+
+SECTION .text
+
+;-------------------------------------------------------------------------------
+; subpel MC functions:
+;
+; void ff_put_vp8_epel<size>_h<htap>v<vtap>_<opt>(uint8_t *dst, ptrdiff_t deststride,
+;                                                 const uint8_t *src, ptrdiff_t srcstride,
+;                                                 int height,   int mx, int my);
+;-------------------------------------------------------------------------------
+
+%macro FILTER_SSSE3 1
+cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, height, mx, picreg
+    lea      mxd, [mxq*3]
+    mova      m3, [filter_h6_shuf2]
+    mova      m4, [filter_h6_shuf3]
+%ifdef PIC
+    lea  picregq, [sixtap_filter_hb_m]
+%endif
+    mova      m5, [sixtap_filter_hb+mxq*8-48] ; set up 6tap filter in bytes
+    mova      m6, [sixtap_filter_hb+mxq*8-32]
+    mova      m7, [sixtap_filter_hb+mxq*8-16]
+
+.nextrow:
+    movu      m0, [srcq-2]
+    mova      m1, m0
+    mova      m2, m0
+%if mmsize == 8
+; For epel4, we need 9 bytes, but only 8 get loaded; to compensate, do the
+; shuffle with a memory operand
+    punpcklbw m0, [srcq+3]
+%else
+    pshufb    m0, [filter_h6_shuf1]
+%endif
+    pshufb    m1, m3
+    pshufb    m2, m4
+    pmaddubsw m0, m5
+    pmaddubsw m1, m6
+    pmaddubsw m2, m7
+    paddsw    m0, m1
+    paddsw    m0, m2
+    pmulhrsw  m0, [pw_256]
+    packuswb  m0, m0
+    movh  [dstq], m0        ; store
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd            ; next row
+    jg .nextrow
+    RET
+
+cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
+    shl      mxd, 4
+    mova      m2, [pw_256]
+    mova      m3, [filter_h2_shuf]
+    mova      m4, [filter_h4_shuf]
+%ifdef PIC
+    lea  picregq, [fourtap_filter_hb_m]
+%endif
+    mova      m5, [fourtap_filter_hb+mxq-16] ; set up 4tap filter in bytes
+    mova      m6, [fourtap_filter_hb+mxq]
+
+.nextrow:
+    movu      m0, [srcq-1]
+    mova      m1, m0
+    pshufb    m0, m3
+    pshufb    m1, m4
+    pmaddubsw m0, m5
+    pmaddubsw m1, m6
+    paddsw    m0, m1
+    pmulhrsw  m0, m2
+    packuswb  m0, m0
+    movh  [dstq], m0        ; store
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd            ; next row
+    jg .nextrow
+    RET
+
+cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
+    shl      myd, 4
+%ifdef PIC
+    lea  picregq, [fourtap_filter_hb_m]
+%endif
+    mova      m5, [fourtap_filter_hb+myq-16]
+    mova      m6, [fourtap_filter_hb+myq]
+    mova      m7, [pw_256]
+
+    ; read 3 lines
+    sub     srcq, srcstrideq
+    movh      m0, [srcq]
+    movh      m1, [srcq+  srcstrideq]
+    movh      m2, [srcq+2*srcstrideq]
+    add     srcq, srcstrideq
+
+.nextrow:
+    movh      m3, [srcq+2*srcstrideq]      ; read new row
+    mova      m4, m0
+    mova      m0, m1
+    punpcklbw m4, m1
+    mova      m1, m2
+    punpcklbw m2, m3
+    pmaddubsw m4, m5
+    pmaddubsw m2, m6
+    paddsw    m4, m2
+    mova      m2, m3
+    pmulhrsw  m4, m7
+    packuswb  m4, m4
+    movh  [dstq], m4
+
+    ; go to next line
+    add      dstq, dststrideq
+    add      srcq, srcstrideq
+    dec   heightd                          ; next row
+    jg .nextrow
+    RET
+
+cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
+    lea      myd, [myq*3]
+%ifdef PIC
+    lea  picregq, [sixtap_filter_hb_m]
+%endif
+    lea      myq, [sixtap_filter_hb+myq*8]
+
+    ; read 5 lines
+    sub     srcq, srcstrideq
+    sub     srcq, srcstrideq
+    movh      m0, [srcq]
+    movh      m1, [srcq+srcstrideq]
+    movh      m2, [srcq+srcstrideq*2]
+    lea     srcq, [srcq+srcstrideq*2]
+    add     srcq, srcstrideq
+    movh      m3, [srcq]
+    movh      m4, [srcq+srcstrideq]
+
+.nextrow:
+    movh      m5, [srcq+2*srcstrideq]      ; read new row
+    mova      m6, m0
+    punpcklbw m6, m5
+    mova      m0, m1
+    punpcklbw m1, m2
+    mova      m7, m3
+    punpcklbw m7, m4
+    pmaddubsw m6, [myq-48]
+    pmaddubsw m1, [myq-32]
+    pmaddubsw m7, [myq-16]
+    paddsw    m6, m1
+    paddsw    m6, m7
+    mova      m1, m2
+    mova      m2, m3
+    pmulhrsw  m6, [pw_256]
+    mova      m3, m4
+    packuswb  m6, m6
+    mova      m4, m5
+    movh  [dstq], m6
+
+    ; go to next line
+    add      dstq, dststrideq
+    add      srcq, srcstrideq
+    dec   heightd                          ; next row
+    jg .nextrow
+    RET
+%endmacro
+
+INIT_MMX ssse3
+FILTER_SSSE3 4
+INIT_XMM ssse3
+FILTER_SSSE3 8
+
+; 4x4 block, H-only 4-tap filter
+INIT_MMX mmxext
+cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg
+    shl       mxd, 4
+%ifdef PIC
+    lea   picregq, [fourtap_filter_hw_m]
+%endif
+    movq      mm4, [fourtap_filter_hw+mxq-16] ; set up 4tap filter in words
+    movq      mm5, [fourtap_filter_hw+mxq]
+    movq      mm7, [pw_64]
+    pxor      mm6, mm6
+
+.nextrow:
+    movq      mm1, [srcq-1]                ; (ABCDEFGH) load 8 horizontal pixels
+
+    ; first set of 2 pixels
+    movq      mm2, mm1                     ; byte ABCD..
+    punpcklbw mm1, mm6                     ; byte->word ABCD
+    pshufw    mm0, mm2, 9                  ; byte CDEF..
+    punpcklbw mm0, mm6                     ; byte->word CDEF
+    pshufw    mm3, mm1, 0x94               ; word ABBC
+    pshufw    mm1, mm0, 0x94               ; word CDDE
+    pmaddwd   mm3, mm4                     ; multiply 2px with F0/F1
+    movq      mm0, mm1                     ; backup for second set of pixels
+    pmaddwd   mm1, mm5                     ; multiply 2px with F2/F3
+    paddd     mm3, mm1                     ; finish 1st 2px
+
+    ; second set of 2 pixels, use backup of above
+    punpckhbw mm2, mm6                     ; byte->word EFGH
+    pmaddwd   mm0, mm4                     ; multiply backed up 2px with F0/F1
+    pshufw    mm1, mm2, 0x94               ; word EFFG
+    pmaddwd   mm1, mm5                     ; multiply 2px with F2/F3
+    paddd     mm0, mm1                     ; finish 2nd 2px
+
+    ; merge two sets of 2 pixels into one set of 4, round/clip/store
+    packssdw  mm3, mm0                     ; merge dword->word (4px)
+    paddsw    mm3, mm7                     ; rounding
+    psraw     mm3, 7
+    packuswb  mm3, mm6                     ; clip and word->bytes
+    movd   [dstq], mm3                     ; store
+
+    ; go to next line
+    add      dstq, dststrideq
+    add      srcq, srcstrideq
+    dec   heightd                          ; next row
+    jg .nextrow
+    RET
+
+; 4x4 block, H-only 6-tap filter
+INIT_MMX mmxext
+cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg
+    lea       mxd, [mxq*3]
+%ifdef PIC
+    lea   picregq, [sixtap_filter_hw_m]
+%endif
+    movq      mm4, [sixtap_filter_hw+mxq*8-48] ; set up 4tap filter in words
+    movq      mm5, [sixtap_filter_hw+mxq*8-32]
+    movq      mm6, [sixtap_filter_hw+mxq*8-16]
+    movq      mm7, [pw_64]
+    pxor      mm3, mm3
+
+.nextrow:
+    movq      mm1, [srcq-2]                ; (ABCDEFGH) load 8 horizontal pixels
+
+    ; first set of 2 pixels
+    movq      mm2, mm1                     ; byte ABCD..
+    punpcklbw mm1, mm3                     ; byte->word ABCD
+    pshufw    mm0, mm2, 0x9                ; byte CDEF..
+    punpckhbw mm2, mm3                     ; byte->word EFGH
+    punpcklbw mm0, mm3                     ; byte->word CDEF
+    pshufw    mm1, mm1, 0x94               ; word ABBC
+    pshufw    mm2, mm2, 0x94               ; word EFFG
+    pmaddwd   mm1, mm4                     ; multiply 2px with F0/F1
+    pshufw    mm3, mm0, 0x94               ; word CDDE
+    movq      mm0, mm3                     ; backup for second set of pixels
+    pmaddwd   mm3, mm5                     ; multiply 2px with F2/F3
+    paddd     mm1, mm3                     ; add to 1st 2px cache
+    movq      mm3, mm2                     ; backup for second set of pixels
+    pmaddwd   mm2, mm6                     ; multiply 2px with F4/F5
+    paddd     mm1, mm2                     ; finish 1st 2px
+
+    ; second set of 2 pixels, use backup of above
+    movd      mm2, [srcq+3]                ; byte FGHI (prevent overreads)
+    pmaddwd   mm0, mm4                     ; multiply 1st backed up 2px with F0/F1
+    pmaddwd   mm3, mm5                     ; multiply 2nd backed up 2px with F2/F3
+    paddd     mm0, mm3                     ; add to 2nd 2px cache
+    pxor      mm3, mm3
+    punpcklbw mm2, mm3                     ; byte->word FGHI
+    pshufw    mm2, mm2, 0xE9               ; word GHHI
+    pmaddwd   mm2, mm6                     ; multiply 2px with F4/F5
+    paddd     mm0, mm2                     ; finish 2nd 2px
+
+    ; merge two sets of 2 pixels into one set of 4, round/clip/store
+    packssdw  mm1, mm0                     ; merge dword->word (4px)
+    paddsw    mm1, mm7                     ; rounding
+    psraw     mm1, 7
+    packuswb  mm1, mm3                     ; clip and word->bytes
+    movd   [dstq], mm1                     ; store
+
+    ; go to next line
+    add      dstq, dststrideq
+    add      srcq, srcstrideq
+    dec   heightd                          ; next row
+    jg .nextrow
+    RET
+
+INIT_XMM sse2
+cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg
+    shl      mxd, 5
+%ifdef PIC
+    lea  picregq, [fourtap_filter_v_m]
+%endif
+    lea      mxq, [fourtap_filter_v+mxq-32]
+    pxor      m7, m7
+    mova      m4, [pw_64]
+    mova      m5, [mxq+ 0]
+    mova      m6, [mxq+16]
+%ifdef m8
+    mova      m8, [mxq+32]
+    mova      m9, [mxq+48]
+%endif
+.nextrow:
+    movq      m0, [srcq-1]
+    movq      m1, [srcq-0]
+    movq      m2, [srcq+1]
+    movq      m3, [srcq+2]
+    punpcklbw m0, m7
+    punpcklbw m1, m7
+    punpcklbw m2, m7
+    punpcklbw m3, m7
+    pmullw    m0, m5
+    pmullw    m1, m6
+%ifdef m8
+    pmullw    m2, m8
+    pmullw    m3, m9
+%else
+    pmullw    m2, [mxq+32]
+    pmullw    m3, [mxq+48]
+%endif
+    paddsw    m0, m1
+    paddsw    m2, m3
+    paddsw    m0, m2
+    paddsw    m0, m4
+    psraw     m0, 7
+    packuswb  m0, m7
+    movh  [dstq], m0        ; store
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd            ; next row
+    jg .nextrow
+    RET
+
+INIT_XMM sse2
+cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg
+    lea      mxd, [mxq*3]
+    shl      mxd, 4
+%ifdef PIC
+    lea  picregq, [sixtap_filter_v_m]
+%endif
+    lea      mxq, [sixtap_filter_v+mxq-96]
+    pxor      m7, m7
+    mova      m6, [pw_64]
+%ifdef m8
+    mova      m8, [mxq+ 0]
+    mova      m9, [mxq+16]
+    mova     m10, [mxq+32]
+    mova     m11, [mxq+48]
+    mova     m12, [mxq+64]
+    mova     m13, [mxq+80]
+%endif
+.nextrow:
+    movq      m0, [srcq-2]
+    movq      m1, [srcq-1]
+    movq      m2, [srcq-0]
+    movq      m3, [srcq+1]
+    movq      m4, [srcq+2]
+    movq      m5, [srcq+3]
+    punpcklbw m0, m7
+    punpcklbw m1, m7
+    punpcklbw m2, m7
+    punpcklbw m3, m7
+    punpcklbw m4, m7
+    punpcklbw m5, m7
+%ifdef m8
+    pmullw    m0, m8
+    pmullw    m1, m9
+    pmullw    m2, m10
+    pmullw    m3, m11
+    pmullw    m4, m12
+    pmullw    m5, m13
+%else
+    pmullw    m0, [mxq+ 0]
+    pmullw    m1, [mxq+16]
+    pmullw    m2, [mxq+32]
+    pmullw    m3, [mxq+48]
+    pmullw    m4, [mxq+64]
+    pmullw    m5, [mxq+80]
+%endif
+    paddsw    m1, m4
+    paddsw    m0, m5
+    paddsw    m1, m2
+    paddsw    m0, m3
+    paddsw    m0, m1
+    paddsw    m0, m6
+    psraw     m0, 7
+    packuswb  m0, m7
+    movh  [dstq], m0        ; store
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd            ; next row
+    jg .nextrow
+    RET
+
+%macro FILTER_V 1
+; 4x4 block, V-only 4-tap filter
+cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
+    shl      myd, 5
+%ifdef PIC
+    lea  picregq, [fourtap_filter_v_m]
+%endif
+    lea      myq, [fourtap_filter_v+myq-32]
+    mova      m6, [pw_64]
+    pxor      m7, m7
+    mova      m5, [myq+48]
+
+    ; read 3 lines
+    sub     srcq, srcstrideq
+    movh      m0, [srcq]
+    movh      m1, [srcq+  srcstrideq]
+    movh      m2, [srcq+2*srcstrideq]
+    add     srcq, srcstrideq
+    punpcklbw m0, m7
+    punpcklbw m1, m7
+    punpcklbw m2, m7
+
+.nextrow:
+    ; first calculate negative taps (to prevent losing positive overflows)
+    movh      m4, [srcq+2*srcstrideq]      ; read new row
+    punpcklbw m4, m7
+    mova      m3, m4
+    pmullw    m0, [myq+0]
+    pmullw    m4, m5
+    paddsw    m4, m0
+
+    ; then calculate positive taps
+    mova      m0, m1
+    pmullw    m1, [myq+16]
+    paddsw    m4, m1
+    mova      m1, m2
+    pmullw    m2, [myq+32]
+    paddsw    m4, m2
+    mova      m2, m3
+
+    ; round/clip/store
+    paddsw    m4, m6
+    psraw     m4, 7
+    packuswb  m4, m7
+    movh  [dstq], m4
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd                           ; next row
+    jg .nextrow
+    RET
+
+
+; 4x4 block, V-only 6-tap filter
+cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
+    shl      myd, 4
+    lea      myq, [myq*3]
+%ifdef PIC
+    lea  picregq, [sixtap_filter_v_m]
+%endif
+    lea      myq, [sixtap_filter_v+myq-96]
+    pxor      m7, m7
+
+    ; read 5 lines
+    sub     srcq, srcstrideq
+    sub     srcq, srcstrideq
+    movh      m0, [srcq]
+    movh      m1, [srcq+srcstrideq]
+    movh      m2, [srcq+srcstrideq*2]
+    lea     srcq, [srcq+srcstrideq*2]
+    add     srcq, srcstrideq
+    movh      m3, [srcq]
+    movh      m4, [srcq+srcstrideq]
+    punpcklbw m0, m7
+    punpcklbw m1, m7
+    punpcklbw m2, m7
+    punpcklbw m3, m7
+    punpcklbw m4, m7
+
+.nextrow:
+    ; first calculate negative taps (to prevent losing positive overflows)
+    mova      m5, m1
+    pmullw    m5, [myq+16]
+    mova      m6, m4
+    pmullw    m6, [myq+64]
+    paddsw    m6, m5
+
+    ; then calculate positive taps
+    movh      m5, [srcq+2*srcstrideq]      ; read new row
+    punpcklbw m5, m7
+    pmullw    m0, [myq+0]
+    paddsw    m6, m0
+    mova      m0, m1
+    mova      m1, m2
+    pmullw    m2, [myq+32]
+    paddsw    m6, m2
+    mova      m2, m3
+    pmullw    m3, [myq+48]
+    paddsw    m6, m3
+    mova      m3, m4
+    mova      m4, m5
+    pmullw    m5, [myq+80]
+    paddsw    m6, m5
+
+    ; round/clip/store
+    paddsw    m6, [pw_64]
+    psraw     m6, 7
+    packuswb  m6, m7
+    movh  [dstq], m6
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd                           ; next row
+    jg .nextrow
+    RET
+%endmacro
+
+INIT_MMX mmxext
+FILTER_V 4
+INIT_XMM sse2
+FILTER_V 8
+
+%macro FILTER_BILINEAR 1
+%if cpuflag(ssse3)
+cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
+    shl      myd, 4
+%ifdef PIC
+    lea  picregq, [bilinear_filter_vb_m]
+%endif
+    pxor      m4, m4
+    mova      m3, [bilinear_filter_vb+myq-16]
+.nextrow:
+    movh      m0, [srcq+srcstrideq*0]
+    movh      m1, [srcq+srcstrideq*1]
+    movh      m2, [srcq+srcstrideq*2]
+    punpcklbw m0, m1
+    punpcklbw m1, m2
+    pmaddubsw m0, m3
+    pmaddubsw m1, m3
+    psraw     m0, 2
+    psraw     m1, 2
+    pavgw     m0, m4
+    pavgw     m1, m4
+%if mmsize==8
+    packuswb  m0, m0
+    packuswb  m1, m1
+    movh   [dstq+dststrideq*0], m0
+    movh   [dstq+dststrideq*1], m1
+%else
+    packuswb  m0, m1
+    movh   [dstq+dststrideq*0], m0
+    movhps [dstq+dststrideq*1], m0
+%endif
+%else ; cpuflag(ssse3)
+cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my
+    shl      myd, 4
+%ifdef PIC
+    lea  picregq, [bilinear_filter_vw_m]
+%endif
+    pxor      m6, m6
+    mova      m5, [bilinear_filter_vw+myq-1*16]
+    neg      myq
+    mova      m4, [bilinear_filter_vw+myq+7*16]
+.nextrow:
+    movh      m0, [srcq+srcstrideq*0]
+    movh      m1, [srcq+srcstrideq*1]
+    movh      m3, [srcq+srcstrideq*2]
+    punpcklbw m0, m6
+    punpcklbw m1, m6
+    punpcklbw m3, m6
+    mova      m2, m1
+    pmullw    m0, m4
+    pmullw    m1, m5
+    pmullw    m2, m4
+    pmullw    m3, m5
+    paddsw    m0, m1
+    paddsw    m2, m3
+    psraw     m0, 2
+    psraw     m2, 2
+    pavgw     m0, m6
+    pavgw     m2, m6
+%if mmsize == 8
+    packuswb  m0, m0
+    packuswb  m2, m2
+    movh   [dstq+dststrideq*0], m0
+    movh   [dstq+dststrideq*1], m2
+%else
+    packuswb  m0, m2
+    movh   [dstq+dststrideq*0], m0
+    movhps [dstq+dststrideq*1], m0
+%endif
+%endif ; cpuflag(ssse3)
+
+    lea     dstq, [dstq+dststrideq*2]
+    lea     srcq, [srcq+srcstrideq*2]
+    sub  heightd, 2
+    jg .nextrow
+    RET
+
+%if cpuflag(ssse3)
+cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
+    shl      mxd, 4
+%ifdef PIC
+    lea  picregq, [bilinear_filter_vb_m]
+%endif
+    pxor      m4, m4
+    mova      m2, [filter_h2_shuf]
+    mova      m3, [bilinear_filter_vb+mxq-16]
+.nextrow:
+    movu      m0, [srcq+srcstrideq*0]
+    movu      m1, [srcq+srcstrideq*1]
+    pshufb    m0, m2
+    pshufb    m1, m2
+    pmaddubsw m0, m3
+    pmaddubsw m1, m3
+    psraw     m0, 2
+    psraw     m1, 2
+    pavgw     m0, m4
+    pavgw     m1, m4
+%if mmsize==8
+    packuswb  m0, m0
+    packuswb  m1, m1
+    movh   [dstq+dststrideq*0], m0
+    movh   [dstq+dststrideq*1], m1
+%else
+    packuswb  m0, m1
+    movh   [dstq+dststrideq*0], m0
+    movhps [dstq+dststrideq*1], m0
+%endif
+%else ; cpuflag(ssse3)
+cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
+    shl      mxd, 4
+%ifdef PIC
+    lea  picregq, [bilinear_filter_vw_m]
+%endif
+    pxor      m6, m6
+    mova      m5, [bilinear_filter_vw+mxq-1*16]
+    neg      mxq
+    mova      m4, [bilinear_filter_vw+mxq+7*16]
+.nextrow:
+    movh      m0, [srcq+srcstrideq*0+0]
+    movh      m1, [srcq+srcstrideq*0+1]
+    movh      m2, [srcq+srcstrideq*1+0]
+    movh      m3, [srcq+srcstrideq*1+1]
+    punpcklbw m0, m6
+    punpcklbw m1, m6
+    punpcklbw m2, m6
+    punpcklbw m3, m6
+    pmullw    m0, m4
+    pmullw    m1, m5
+    pmullw    m2, m4
+    pmullw    m3, m5
+    paddsw    m0, m1
+    paddsw    m2, m3
+    psraw     m0, 2
+    psraw     m2, 2
+    pavgw     m0, m6
+    pavgw     m2, m6
+%if mmsize == 8
+    packuswb  m0, m0
+    packuswb  m2, m2
+    movh   [dstq+dststrideq*0], m0
+    movh   [dstq+dststrideq*1], m2
+%else
+    packuswb  m0, m2
+    movh   [dstq+dststrideq*0], m0
+    movhps [dstq+dststrideq*1], m0
+%endif
+%endif ; cpuflag(ssse3)
+
+    lea     dstq, [dstq+dststrideq*2]
+    lea     srcq, [srcq+srcstrideq*2]
+    sub  heightd, 2
+    jg .nextrow
+    RET
+%endmacro
+
+INIT_MMX mmxext
+FILTER_BILINEAR 4
+INIT_XMM sse2
+FILTER_BILINEAR 8
+INIT_MMX ssse3
+FILTER_BILINEAR 4
+INIT_XMM ssse3
+FILTER_BILINEAR 8
+
+INIT_MMX mmx
+cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height
+.nextrow:
+    movq    mm0, [srcq+srcstrideq*0]
+    movq    mm1, [srcq+srcstrideq*1]
+    lea    srcq, [srcq+srcstrideq*2]
+    movq [dstq+dststrideq*0], mm0
+    movq [dstq+dststrideq*1], mm1
+    lea    dstq, [dstq+dststrideq*2]
+    sub heightd, 2
+    jg .nextrow
+    RET
+
+INIT_XMM sse
+cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
+.nextrow:
+    movups xmm0, [srcq+srcstrideq*0]
+    movups xmm1, [srcq+srcstrideq*1]
+    lea    srcq, [srcq+srcstrideq*2]
+    movaps [dstq+dststrideq*0], xmm0
+    movaps [dstq+dststrideq*1], xmm1
+    lea    dstq, [dstq+dststrideq*2]
+    sub heightd, 2
+    jg .nextrow
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_idct_dc_add_<opt>(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+;-----------------------------------------------------------------------------
+
+%macro ADD_DC 4
+    %4        m2, [dst1q+%3]
+    %4        m3, [dst1q+strideq+%3]
+    %4        m4, [dst2q+%3]
+    %4        m5, [dst2q+strideq+%3]
+    paddusb   m2, %1
+    paddusb   m3, %1
+    paddusb   m4, %1
+    paddusb   m5, %1
+    psubusb   m2, %2
+    psubusb   m3, %2
+    psubusb   m4, %2
+    psubusb   m5, %2
+    %4 [dst1q+%3], m2
+    %4 [dst1q+strideq+%3], m3
+    %4 [dst2q+%3], m4
+    %4 [dst2q+strideq+%3], m5
+%endmacro
+
+%macro VP8_IDCT_DC_ADD 0
+cglobal vp8_idct_dc_add, 3, 3, 6, dst, block, stride
+    ; load data
+    movd       m0, [blockq]
+    pxor       m1, m1
+
+    ; calculate DC
+    paddw      m0, [pw_4]
+    movd [blockq], m1
+    DEFINE_ARGS dst1, dst2, stride
+    lea     dst2q, [dst1q+strideq*2]
+    movd       m2, [dst1q]
+    movd       m3, [dst1q+strideq]
+    movd       m4, [dst2q]
+    movd       m5, [dst2q+strideq]
+    psraw      m0, 3
+    pshuflw    m0, m0, 0
+    punpcklqdq m0, m0
+    punpckldq  m2, m3
+    punpckldq  m4, m5
+    punpcklbw  m2, m1
+    punpcklbw  m4, m1
+    paddw      m2, m0
+    paddw      m4, m0
+    packuswb   m2, m4
+    movd   [dst1q], m2
+%if cpuflag(sse4)
+    pextrd [dst1q+strideq], m2, 1
+    pextrd [dst2q], m2, 2
+    pextrd [dst2q+strideq], m2, 3
+%else
+    psrldq     m2, 4
+    movd [dst1q+strideq], m2
+    psrldq     m2, 4
+    movd [dst2q], m2
+    psrldq     m2, 4
+    movd [dst2q+strideq], m2
+%endif
+    RET
+%endmacro
+
+INIT_XMM sse2
+VP8_IDCT_DC_ADD
+INIT_XMM sse4
+VP8_IDCT_DC_ADD
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_idct_dc_add4y_<opt>(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
+;-----------------------------------------------------------------------------
+
+INIT_XMM sse2
+cglobal vp8_idct_dc_add4y, 3, 3, 6, dst, block, stride
+    ; load data
+    movd      m0, [blockq+32*0] ; A
+    movd      m1, [blockq+32*2] ; C
+    punpcklwd m0, [blockq+32*1] ; A B
+    punpcklwd m1, [blockq+32*3] ; C D
+    punpckldq m0, m1        ; A B C D
+    pxor      m1, m1
+
+    ; calculate DC
+    paddw     m0, [pw_4]
+    movd [blockq+32*0], m1
+    movd [blockq+32*1], m1
+    movd [blockq+32*2], m1
+    movd [blockq+32*3], m1
+    psraw     m0, 3
+    psubw     m1, m0
+    packuswb  m0, m0
+    packuswb  m1, m1
+    punpcklbw m0, m0
+    punpcklbw m1, m1
+    punpcklbw m0, m0
+    punpcklbw m1, m1
+
+    ; add DC
+    DEFINE_ARGS dst1, dst2, stride
+    lea    dst2q, [dst1q+strideq*2]
+    ADD_DC    m0, m1, 0, mova
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_idct_dc_add4uv_<opt>(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmx
+cglobal vp8_idct_dc_add4uv, 3, 3, 0, dst, block, stride
+    ; load data
+    movd      m0, [blockq+32*0] ; A
+    movd      m1, [blockq+32*2] ; C
+    punpcklwd m0, [blockq+32*1] ; A B
+    punpcklwd m1, [blockq+32*3] ; C D
+    punpckldq m0, m1        ; A B C D
+    pxor      m6, m6
+
+    ; calculate DC
+    paddw     m0, [pw_4]
+    movd [blockq+32*0], m6
+    movd [blockq+32*1], m6
+    movd [blockq+32*2], m6
+    movd [blockq+32*3], m6
+    psraw     m0, 3
+    psubw     m6, m0
+    packuswb  m0, m0
+    packuswb  m6, m6
+    punpcklbw m0, m0 ; AABBCCDD
+    punpcklbw m6, m6 ; AABBCCDD
+    movq      m1, m0
+    movq      m7, m6
+    punpcklbw m0, m0 ; AAAABBBB
+    punpckhbw m1, m1 ; CCCCDDDD
+    punpcklbw m6, m6 ; AAAABBBB
+    punpckhbw m7, m7 ; CCCCDDDD
+
+    ; add DC
+    DEFINE_ARGS dst1, dst2, stride
+    lea    dst2q, [dst1q+strideq*2]
+    ADD_DC    m0, m6, 0, mova
+    lea    dst1q, [dst1q+strideq*4]
+    lea    dst2q, [dst2q+strideq*4]
+    ADD_DC    m1, m7, 0, mova
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_idct_add_<opt>(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+;-----------------------------------------------------------------------------
+
+; calculate %1=mul_35468(%1)-mul_20091(%2); %2=mul_20091(%1)+mul_35468(%2)
+;           this macro assumes that m6/m7 have words for 20091/17734 loaded
+%macro VP8_MULTIPLY_SUMSUB 4
+    mova      %3, %1
+    mova      %4, %2
+    pmulhw    %3, m6 ;20091(1)
+    pmulhw    %4, m6 ;20091(2)
+    paddw     %3, %1
+    paddw     %4, %2
+    paddw     %1, %1
+    paddw     %2, %2
+    pmulhw    %1, m7 ;35468(1)
+    pmulhw    %2, m7 ;35468(2)
+    psubw     %1, %4
+    paddw     %2, %3
+%endmacro
+
+; calculate x0=%1+%3; x1=%1-%3
+;           x2=mul_35468(%2)-mul_20091(%4); x3=mul_20091(%2)+mul_35468(%4)
+;           %1=x0+x3 (tmp0); %2=x1+x2 (tmp1); %3=x1-x2 (tmp2); %4=x0-x3 (tmp3)
+;           %5/%6 are temporary registers
+;           we assume m6/m7 have constant words 20091/17734 loaded in them
+%macro VP8_IDCT_TRANSFORM4x4_1D 6
+    SUMSUB_BA         w, %3,  %1,  %5     ;t0, t1
+    VP8_MULTIPLY_SUMSUB m%2, m%4, m%5,m%6 ;t2, t3
+    SUMSUB_BA         w, %4,  %3,  %5     ;tmp0, tmp3
+    SUMSUB_BA         w, %2,  %1,  %5     ;tmp1, tmp2
+    SWAP                 %4,  %1
+    SWAP                 %4,  %3
+%endmacro
+
+INIT_MMX sse
+cglobal vp8_idct_add, 3, 3, 0, dst, block, stride
+    ; load block data
+    movq         m0, [blockq+ 0]
+    movq         m1, [blockq+ 8]
+    movq         m2, [blockq+16]
+    movq         m3, [blockq+24]
+    movq         m6, [pw_20091]
+    movq         m7, [pw_17734]
+    xorps      xmm0, xmm0
+    movaps [blockq+ 0], xmm0
+    movaps [blockq+16], xmm0
+
+    ; actual IDCT
+    VP8_IDCT_TRANSFORM4x4_1D 0, 1, 2, 3, 4, 5
+    TRANSPOSE4x4W            0, 1, 2, 3, 4
+    paddw        m0, [pw_4]
+    VP8_IDCT_TRANSFORM4x4_1D 0, 1, 2, 3, 4, 5
+    TRANSPOSE4x4W            0, 1, 2, 3, 4
+
+    ; store
+    pxor         m4, m4
+    DEFINE_ARGS dst1, dst2, stride
+    lea       dst2q, [dst1q+2*strideq]
+    STORE_DIFFx2 m0, m1, m6, m7, m4, 3, dst1q, strideq
+    STORE_DIFFx2 m2, m3, m6, m7, m4, 3, dst2q, strideq
+
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_luma_dc_wht(int16_t block[4][4][16], int16_t dc[16])
+;-----------------------------------------------------------------------------
+
+%macro SCATTER_WHT 3
+    movd dc1d, m%1
+    movd dc2d, m%2
+    mov [blockq+2*16*(0+%3)], dc1w
+    mov [blockq+2*16*(1+%3)], dc2w
+    shr  dc1d, 16
+    shr  dc2d, 16
+    psrlq m%1, 32
+    psrlq m%2, 32
+    mov [blockq+2*16*(4+%3)], dc1w
+    mov [blockq+2*16*(5+%3)], dc2w
+    movd dc1d, m%1
+    movd dc2d, m%2
+    mov [blockq+2*16*(8+%3)], dc1w
+    mov [blockq+2*16*(9+%3)], dc2w
+    shr  dc1d, 16
+    shr  dc2d, 16
+    mov [blockq+2*16*(12+%3)], dc1w
+    mov [blockq+2*16*(13+%3)], dc2w
+%endmacro
+
+%macro HADAMARD4_1D 4
+    SUMSUB_BADC w, %2, %1, %4, %3
+    SUMSUB_BADC w, %4, %2, %3, %1
+    SWAP %1, %4, %3
+%endmacro
+
+INIT_MMX sse
+cglobal vp8_luma_dc_wht, 2, 3, 0, block, dc1, dc2
+    movq          m0, [dc1q]
+    movq          m1, [dc1q+8]
+    movq          m2, [dc1q+16]
+    movq          m3, [dc1q+24]
+    xorps      xmm0, xmm0
+    movaps [dc1q+ 0], xmm0
+    movaps [dc1q+16], xmm0
+    HADAMARD4_1D  0, 1, 2, 3
+    TRANSPOSE4x4W 0, 1, 2, 3, 4
+    paddw         m0, [pw_3]
+    HADAMARD4_1D  0, 1, 2, 3
+    psraw         m0, 3
+    psraw         m1, 3
+    psraw         m2, 3
+    psraw         m3, 3
+    SCATTER_WHT   0, 1, 0
+    SCATTER_WHT   2, 3, 2
+    RET
diff --git a/media/ffvpx/libavcodec/x86/vp8dsp_init.c b/media/ffvpx/libavcodec/x86/vp8dsp_init.c
new file mode 100644
index 0000000000..bd20da1fc9
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp8dsp_init.c
@@ -0,0 +1,383 @@
+/*
+ * VP8 DSP functions x86-optimized
+ * Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
+ * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/vp8dsp.h"
+
+#if HAVE_X86ASM
+
+/*
+ * MC functions
+ */
+void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+
+void ff_put_vp8_epel8_h4_sse2  (uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_h6_sse2  (uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_v4_sse2  (uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_v6_sse2  (uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+
+void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                const uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+
+void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, ptrdiff_t dststride,
+                                   const uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+void ff_put_vp8_bilinear8_h_sse2  (uint8_t *dst, ptrdiff_t dststride,
+                                   const uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                   const uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                   const uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+
+void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, ptrdiff_t dststride,
+                                   const uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+void ff_put_vp8_bilinear8_v_sse2  (uint8_t *dst, ptrdiff_t dststride,
+                                   const uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                   const uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                   const uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+
+
+void ff_put_vp8_pixels8_mmx (uint8_t *dst, ptrdiff_t dststride,
+                             const uint8_t *src, ptrdiff_t srcstride,
+                             int height, int mx, int my);
+void ff_put_vp8_pixels16_sse(uint8_t *dst, ptrdiff_t dststride,
+                             const uint8_t *src, ptrdiff_t srcstride,
+                             int height, int mx, int my);
+
+#define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
+static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
+    uint8_t *dst,  ptrdiff_t dststride, const uint8_t *src, \
+    ptrdiff_t srcstride, int height, int mx, int my) \
+{ \
+    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
+        dst,     dststride, src,     srcstride, height, mx, my); \
+    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
+        dst + 8, dststride, src + 8, srcstride, height, mx, my); \
+}
+#define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \
+static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
+    uint8_t *dst,  ptrdiff_t dststride, uint8_t *src, \
+    ptrdiff_t srcstride, int height, int mx, int my) \
+{ \
+    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
+        dst,     dststride, src,     srcstride, height, mx, my); \
+    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
+        dst + 4, dststride, src + 4, srcstride, height, mx, my); \
+}
+
+TAP_W16(sse2,  epel, h6)
+TAP_W16(sse2,  epel, v6)
+TAP_W16(sse2,  bilinear, h)
+TAP_W16(sse2,  bilinear, v)
+
+TAP_W16(ssse3, epel, h6)
+TAP_W16(ssse3, epel, v6)
+TAP_W16(ssse3, bilinear, h)
+TAP_W16(ssse3, bilinear, v)
+
+#define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
+static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
+    uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
+    ptrdiff_t srcstride, int height, int mx, int my) \
+{ \
+    LOCAL_ALIGNED(ALIGN, uint8_t, tmp, [SIZE * (MAXHEIGHT + TAPNUMY - 1)]); \
+    uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \
+    src -= srcstride * (TAPNUMY / 2 - 1); \
+    ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \
+        tmp, SIZE,      src,    srcstride, height + TAPNUMY - 1, mx, my); \
+    ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \
+        dst, dststride, tmpptr, SIZE,      height,               mx, my); \
+}
+
+#define HVTAPMMX(x, y) \
+HVTAP(mmxext, 8, x, y,  4,  8)
+
+HVTAPMMX(4, 4)
+HVTAPMMX(4, 6)
+HVTAPMMX(6, 4)
+HVTAPMMX(6, 6)
+
+#define HVTAPSSE2(x, y, w) \
+HVTAP(sse2,  16, x, y, w, 16) \
+HVTAP(ssse3, 16, x, y, w, 16)
+
+HVTAPSSE2(4, 4, 8)
+HVTAPSSE2(4, 6, 8)
+HVTAPSSE2(6, 4, 8)
+HVTAPSSE2(6, 6, 8)
+HVTAPSSE2(6, 6, 16)
+
+HVTAP(ssse3, 16, 4, 4, 4, 8)
+HVTAP(ssse3, 16, 4, 6, 4, 8)
+HVTAP(ssse3, 16, 6, 4, 4, 8)
+HVTAP(ssse3, 16, 6, 6, 4, 8)
+
+#define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
+static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
+    uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
+    ptrdiff_t srcstride, int height, int mx, int my) \
+{ \
+    LOCAL_ALIGNED(ALIGN, uint8_t, tmp, [SIZE * (MAXHEIGHT + 2)]); \
+    ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \
+        tmp, SIZE,      src, srcstride, height + 1, mx, my); \
+    ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \
+        dst, dststride, tmp, SIZE,      height,     mx, my); \
+}
+
+HVBILIN(mmxext,  8,  4,  8)
+HVBILIN(sse2,  8,  8, 16)
+HVBILIN(sse2,  8, 16, 16)
+HVBILIN(ssse3, 8,  4,  8)
+HVBILIN(ssse3, 8,  8, 16)
+HVBILIN(ssse3, 8, 16, 16)
+
+void ff_vp8_idct_dc_add_sse2(uint8_t *dst, int16_t block[16],
+                             ptrdiff_t stride);
+void ff_vp8_idct_dc_add_sse4(uint8_t *dst, int16_t block[16],
+                             ptrdiff_t stride);
+void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, int16_t block[4][16],
+                               ptrdiff_t stride);
+void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, int16_t block[2][16],
+                               ptrdiff_t stride);
+void ff_vp8_luma_dc_wht_sse(int16_t block[4][4][16], int16_t dc[16]);
+void ff_vp8_idct_add_sse(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+
+#define DECLARE_LOOP_FILTER(NAME)                                       \
+void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst,                 \
+                                          ptrdiff_t stride,             \
+                                          int flim);                    \
+void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst,                 \
+                                          ptrdiff_t stride,             \
+                                          int flim);                    \
+void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst,              \
+                                             ptrdiff_t stride,          \
+                                             int e, int i, int hvt);    \
+void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst,              \
+                                             ptrdiff_t stride,          \
+                                             int e, int i, int hvt);    \
+void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU,             \
+                                             uint8_t *dstV,             \
+                                             ptrdiff_t s,               \
+                                             int e, int i, int hvt);    \
+void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU,             \
+                                             uint8_t *dstV,             \
+                                             ptrdiff_t s,               \
+                                             int e, int i, int hvt);    \
+void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst,              \
+                                             ptrdiff_t stride,          \
+                                             int e, int i, int hvt);    \
+void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst,              \
+                                             ptrdiff_t stride,          \
+                                             int e, int i, int hvt);    \
+void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU,             \
+                                             uint8_t *dstV,             \
+                                             ptrdiff_t s,               \
+                                             int e, int i, int hvt);    \
+void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU,             \
+                                             uint8_t *dstV,             \
+                                             ptrdiff_t s,               \
+                                             int e, int i, int hvt);
+
+DECLARE_LOOP_FILTER(sse2)
+DECLARE_LOOP_FILTER(ssse3)
+DECLARE_LOOP_FILTER(sse4)
+
+#endif /* HAVE_X86ASM */
+
+#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
+    c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
+    c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
+    c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT
+
+#define VP8_MC_FUNC(IDX, SIZE, OPT) \
+    c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \
+    c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \
+    c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \
+    c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \
+    c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \
+    VP8_LUMA_MC_FUNC(IDX, SIZE, OPT)
+
+#define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \
+    c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
+
+
+av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c)
+{
+#if HAVE_X86ASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        c->put_vp8_epel_pixels_tab[1][0][0]     =
+        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
+    }
+
+    /* note that 4-tap width=16 functions are missing because w=16
+     * is only used for luma, and luma is always a copy or sixtap. */
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        VP8_MC_FUNC(2, 4, mmxext);
+        VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
+    }
+
+    if (EXTERNAL_SSE(cpu_flags)) {
+        c->put_vp8_epel_pixels_tab[0][0][0]     =
+        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
+    }
+
+    if (EXTERNAL_SSE2_SLOW(cpu_flags)) {
+        VP8_LUMA_MC_FUNC(0, 16, sse2);
+        VP8_MC_FUNC(1, 8, sse2);
+        VP8_BILINEAR_MC_FUNC(0, 16, sse2);
+        VP8_BILINEAR_MC_FUNC(1, 8, sse2);
+    }
+
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        VP8_LUMA_MC_FUNC(0, 16, ssse3);
+        VP8_MC_FUNC(1, 8, ssse3);
+        VP8_MC_FUNC(2, 4, ssse3);
+        VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
+        VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
+        VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
+    }
+#endif /* HAVE_X86ASM */
+}
+
+av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c)
+{
+#if HAVE_X86ASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
+    }
+
+    if (EXTERNAL_SSE(cpu_flags)) {
+        c->vp8_idct_add                         = ff_vp8_idct_add_sse;
+        c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
+    }
+
+    if (EXTERNAL_SSE2_SLOW(cpu_flags)) {
+        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
+
+        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
+        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
+
+        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_sse2;
+        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_sse2;
+    }
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->vp8_idct_dc_add            = ff_vp8_idct_dc_add_sse2;
+        c->vp8_idct_dc_add4y          = ff_vp8_idct_dc_add4y_sse2;
+
+        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse2;
+
+        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
+        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
+
+        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse2;
+        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse2;
+    }
+
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
+        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
+
+        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
+        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
+        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
+        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
+
+        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_ssse3;
+        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_ssse3;
+        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
+        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
+    }
+
+    if (EXTERNAL_SSE4(cpu_flags)) {
+        c->vp8_idct_dc_add            = ff_vp8_idct_dc_add_sse4;
+
+        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse4;
+        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse4;
+        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse4;
+    }
+#endif /* HAVE_X86ASM */
+}
diff --git a/media/ffvpx/libavcodec/x86/vp8dsp_loopfilter.asm b/media/ffvpx/libavcodec/x86/vp8dsp_loopfilter.asm
new file mode 100644
index 0000000000..ef397efd3e
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp8dsp_loopfilter.asm
@@ -0,0 +1,1234 @@
+;******************************************************************************
+;* VP8 MMXEXT optimizations
+;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
+;* Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pw_27:    times 8 dw 27
+pw_63:    times 8 dw 63
+
+pb_4:     times 16 db 4
+pb_F8:    times 16 db 0xF8
+pb_FE:    times 16 db 0xFE
+pb_27_63: times 8 db 27, 63
+pb_18_63: times 8 db 18, 63
+pb_9_63:  times 8 db  9, 63
+
+cextern pb_1
+cextern pb_3
+cextern pw_9
+cextern pw_18
+cextern pb_80
+
+SECTION .text
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, ptrdiff_t stride, int flim);
+;-----------------------------------------------------------------------------
+
+; macro called with 7 mm register indexes as argument, and 5 regular registers
+; first 11 mean the same as READ_8x4_TRANSPOSED above
+; fifth regular register is scratchspace to reach the bottom 8 rows, it
+; will be set to second regular register + 8*stride at the end
+%macro READ_16x4_INTERLEAVED 12
+    ; transpose 16 (A-P) rows of 4 pixels each
+    lea           %12, [r0+8*r2]
+
+    ; read (and interleave) those addressable by %8 (=r0), A/C/D/E/I/K/L/M
+    movd          m%1, [%8+%10*4]   ; A0-3
+    movd          m%3, [%12+%10*4]  ; I0-3
+    movd          m%2, [%8+%10*2]   ; C0-3
+    movd          m%4, [%12+%10*2]  ; K0-3
+    movd          m%6, [%8+%10]     ; D0-3
+    movd          m%5, [%12+%10]    ; L0-3
+    movd          m%7, [%12]        ; M0-3
+    add           %12, %11
+    punpcklbw     m%1, m%3          ; A/I
+    movd          m%3, [%8]         ; E0-3
+    punpcklbw     m%2, m%4          ; C/K
+    punpcklbw     m%6, m%5          ; D/L
+    punpcklbw     m%3, m%7          ; E/M
+    punpcklbw     m%2, m%6          ; C/D/K/L interleaved
+
+    ; read (and interleave) those addressable by %9 (=r4), B/F/G/H/J/N/O/P
+    movd         m%5, [%9+%10*4]   ; B0-3
+    movd         m%4, [%12+%10*4]  ; J0-3
+    movd         m%7, [%9]         ; F0-3
+    movd         m%6, [%12]        ; N0-3
+    punpcklbw    m%5, m%4          ; B/J
+    punpcklbw    m%7, m%6          ; F/N
+    punpcklbw    m%1, m%5          ; A/B/I/J interleaved
+    punpcklbw    m%3, m%7          ; E/F/M/N interleaved
+    movd         m%4, [%9+%11]     ; G0-3
+    movd         m%6, [%12+%11]    ; O0-3
+    movd         m%5, [%9+%11*2]   ; H0-3
+    movd         m%7, [%12+%11*2]  ; P0-3
+    punpcklbw    m%4, m%6          ; G/O
+    punpcklbw    m%5, m%7          ; H/P
+    punpcklbw    m%4, m%5          ; G/H/O/P interleaved
+%endmacro
+
+; write 4 xmm registers of 4 dwords each
+; arguments same as WRITE_2x4D, but with an extra register, so that the 5 regular
+; registers contain buf+4*stride, buf+5*stride, buf+12*stride, -stride and +stride
+; we add 1*stride to the third regular registry in the process
+; the 10th argument is 16 if it's a Y filter (i.e. all regular registers cover the
+; same memory region), or 8 if they cover two separate buffers (third one points to
+; a different memory region than the first two), allowing for more optimal code for
+; the 16-width case
+%macro WRITE_4x4D 10
+    ; write out (4 dwords per register), start with dwords zero
+    movd    [%5+%8*4], m%1
+    movd         [%5], m%2
+    movd    [%7+%8*4], m%3
+    movd         [%7], m%4
+
+    ; store dwords 1
+    psrldq        m%1, 4
+    psrldq        m%2, 4
+    psrldq        m%3, 4
+    psrldq        m%4, 4
+    movd    [%6+%8*4], m%1
+    movd         [%6], m%2
+%if %10 == 16
+    movd    [%6+%9*4], m%3
+%endif
+    movd      [%7+%9], m%4
+
+    ; write dwords 2
+    psrldq        m%1, 4
+    psrldq        m%2, 4
+%if %10 == 8
+    movd    [%5+%8*2], m%1
+    movd          %5d, m%3
+%endif
+    psrldq        m%3, 4
+    psrldq        m%4, 4
+%if %10 == 16
+    movd    [%5+%8*2], m%1
+%endif
+    movd      [%6+%9], m%2
+    movd    [%7+%8*2], m%3
+    movd    [%7+%9*2], m%4
+    add            %7, %9
+
+    ; store dwords 3
+    psrldq        m%1, 4
+    psrldq        m%2, 4
+    psrldq        m%3, 4
+    psrldq        m%4, 4
+%if %10 == 8
+    mov     [%7+%8*4], %5d
+    movd    [%6+%8*2], m%1
+%else
+    movd      [%5+%8], m%1
+%endif
+    movd    [%6+%9*2], m%2
+    movd    [%7+%8*2], m%3
+    movd    [%7+%9*2], m%4
+%endmacro
+
+%macro WRITE_8W 5
+%if cpuflag(sse4)
+    pextrw    [%3+%4*4], %1, 0
+    pextrw    [%2+%4*4], %1, 1
+    pextrw    [%3+%4*2], %1, 2
+    pextrw    [%3+%4  ], %1, 3
+    pextrw    [%3     ], %1, 4
+    pextrw    [%2     ], %1, 5
+    pextrw    [%2+%5  ], %1, 6
+    pextrw    [%2+%5*2], %1, 7
+%else
+    movd            %2d, %1
+    psrldq           %1, 4
+    mov       [%3+%4*4], %2w
+    shr              %2, 16
+    add              %3, %5
+    mov       [%3+%4*4], %2w
+
+    movd            %2d, %1
+    psrldq           %1, 4
+    add              %3, %4
+    mov       [%3+%4*2], %2w
+    shr              %2, 16
+    mov       [%3+%4  ], %2w
+
+    movd            %2d, %1
+    psrldq           %1, 4
+    mov       [%3     ], %2w
+    shr              %2, 16
+    mov       [%3+%5  ], %2w
+
+    movd            %2d, %1
+    add              %3, %5
+    mov       [%3+%5  ], %2w
+    shr              %2, 16
+    mov       [%3+%5*2], %2w
+%endif
+%endmacro
+
+%macro SIMPLE_LOOPFILTER 2
+cglobal vp8_%1_loop_filter_simple, 3, %2, 8, dst, stride, flim, cntr
+%if cpuflag(ssse3)
+    pxor           m0, m0
+%endif
+    SPLATB_REG     m7, flim, m0     ; splat "flim" into register
+
+    ; set up indexes to address 4 rows
+    DEFINE_ARGS dst1, mstride, stride, dst3, dst2
+    mov       strideq, mstrideq
+    neg      mstrideq
+%ifidn %1, h
+    lea         dst1q, [dst1q+4*strideq-2]
+%endif
+
+%ifidn %1, v
+    ; read 4 half/full rows of pixels
+    mova           m0, [dst1q+mstrideq*2]    ; p1
+    mova           m1, [dst1q+mstrideq]      ; p0
+    mova           m2, [dst1q]               ; q0
+    mova           m3, [dst1q+ strideq]      ; q1
+%else ; h
+    lea         dst2q, [dst1q+ strideq]
+
+    READ_16x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, dst1q, dst2q, mstrideq, strideq, dst3q
+    TRANSPOSE4x4W         0, 1, 2, 3, 4
+%endif
+
+    ; simple_limit
+    mova           m5, m2           ; m5=backup of q0
+    mova           m6, m1           ; m6=backup of p0
+    psubusb        m1, m2           ; p0-q0
+    psubusb        m2, m6           ; q0-p0
+    por            m1, m2           ; FFABS(p0-q0)
+    paddusb        m1, m1           ; m1=FFABS(p0-q0)*2
+
+    mova           m4, m3
+    mova           m2, m0
+    psubusb        m3, m0           ; q1-p1
+    psubusb        m0, m4           ; p1-q1
+    por            m3, m0           ; FFABS(p1-q1)
+    mova           m0, [pb_80]
+    pxor           m2, m0
+    pxor           m4, m0
+    psubsb         m2, m4           ; m2=p1-q1 (signed) backup for below
+    pand           m3, [pb_FE]
+    psrlq          m3, 1            ; m3=FFABS(p1-q1)/2, this can be used signed
+    paddusb        m3, m1
+    psubusb        m3, m7
+    pxor           m1, m1
+    pcmpeqb        m3, m1           ; abs(p0-q0)*2+abs(p1-q1)/2<=flim mask(0xff/0x0)
+
+    ; filter_common (use m2/p1-q1, m4=q0, m6=p0, m5/q0-p0 and m3/mask)
+    mova           m4, m5
+    pxor           m5, m0
+    pxor           m0, m6
+    psubsb         m5, m0           ; q0-p0 (signed)
+    paddsb         m2, m5
+    paddsb         m2, m5
+    paddsb         m2, m5           ; a=(p1-q1) + 3*(q0-p0)
+    pand           m2, m3           ; apply filter mask (m3)
+
+    mova           m3, [pb_F8]
+    mova           m1, m2
+    paddsb         m2, [pb_4]       ; f1<<3=a+4
+    paddsb         m1, [pb_3]       ; f2<<3=a+3
+    pand           m2, m3
+    pand           m1, m3           ; cache f2<<3
+
+    pxor           m0, m0
+    pxor           m3, m3
+    pcmpgtb        m0, m2           ; which values are <0?
+    psubb          m3, m2           ; -f1<<3
+    psrlq          m2, 3            ; +f1
+    psrlq          m3, 3            ; -f1
+    pand           m3, m0
+    pandn          m0, m2
+    psubusb        m4, m0
+    paddusb        m4, m3           ; q0-f1
+
+    pxor           m0, m0
+    pxor           m3, m3
+    pcmpgtb        m0, m1           ; which values are <0?
+    psubb          m3, m1           ; -f2<<3
+    psrlq          m1, 3            ; +f2
+    psrlq          m3, 3            ; -f2
+    pand           m3, m0
+    pandn          m0, m1
+    paddusb        m6, m0
+    psubusb        m6, m3           ; p0+f2
+
+    ; store
+%ifidn %1, v
+    mova      [dst1q], m4
+    mova [dst1q+mstrideq], m6
+%else ; h
+    inc        dst1q
+    SBUTTERFLY    bw, 6, 4, 0
+
+%if cpuflag(sse4)
+    inc         dst2q
+%endif
+    WRITE_8W       m6, dst2q, dst1q, mstrideq, strideq
+    lea         dst2q, [dst3q+mstrideq+1]
+%if cpuflag(sse4)
+    inc         dst3q
+%endif
+    WRITE_8W       m4, dst3q, dst2q, mstrideq, strideq
+%endif
+
+    RET
+%endmacro
+
+INIT_XMM sse2
+SIMPLE_LOOPFILTER v, 3
+SIMPLE_LOOPFILTER h, 5
+INIT_XMM ssse3
+SIMPLE_LOOPFILTER v, 3
+SIMPLE_LOOPFILTER h, 5
+INIT_XMM sse4
+SIMPLE_LOOPFILTER h, 5
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] ptrdiff_t stride,
+;                                               int flimE, int flimI, int hev_thr);
+;-----------------------------------------------------------------------------
+
+%macro INNER_LOOPFILTER 2
+%define stack_size 0
+%ifndef m8   ; stack layout: [0]=E, [1]=I, [2]=hev_thr
+%ifidn %1, v ;               [3]=hev() result
+%define stack_size mmsize * -4
+%else ; h    ; extra storage space for transposes
+%define stack_size mmsize * -5
+%endif
+%endif
+
+%if %2 == 8 ; chroma
+cglobal vp8_%1_loop_filter8uv_inner, 6, 6, 13, stack_size, dst, dst8, stride, flimE, flimI, hevthr
+%else ; luma
+cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, stack_size, dst, stride, flimE, flimI, hevthr
+%endif
+
+%if cpuflag(ssse3)
+    pxor             m7, m7
+%endif
+
+%ifndef m8
+    ; splat function arguments
+    SPLATB_REG       m0, flimEq, m7   ; E
+    SPLATB_REG       m1, flimIq, m7   ; I
+    SPLATB_REG       m2, hevthrq, m7  ; hev_thresh
+
+%define m_flimE    [rsp]
+%define m_flimI    [rsp+mmsize]
+%define m_hevthr   [rsp+mmsize*2]
+%define m_maskres  [rsp+mmsize*3]
+%define m_p0backup [rsp+mmsize*3]
+%define m_q0backup [rsp+mmsize*4]
+
+    mova        m_flimE, m0
+    mova        m_flimI, m1
+    mova       m_hevthr, m2
+%else
+%define m_flimE    m9
+%define m_flimI    m10
+%define m_hevthr   m11
+%define m_maskres  m12
+%define m_p0backup m12
+%define m_q0backup m8
+
+    ; splat function arguments
+    SPLATB_REG  m_flimE, flimEq, m7   ; E
+    SPLATB_REG  m_flimI, flimIq, m7   ; I
+    SPLATB_REG m_hevthr, hevthrq, m7  ; hev_thresh
+%endif
+
+%if %2 == 8 ; chroma
+    DEFINE_ARGS dst1, dst8, mstride, stride, dst2
+%else
+    DEFINE_ARGS dst1, mstride, stride, dst2, dst8
+%endif
+    mov         strideq, mstrideq
+    neg        mstrideq
+%ifidn %1, h
+    lea           dst1q, [dst1q+strideq*4-4]
+%if %2 == 8 ; chroma
+    lea           dst8q, [dst8q+strideq*4-4]
+%endif
+%endif
+
+    ; read
+    lea           dst2q, [dst1q+strideq]
+%ifidn %1, v
+%if %2 == 8 && mmsize == 16
+%define movrow movh
+%else
+%define movrow mova
+%endif
+    movrow           m0, [dst1q+mstrideq*4] ; p3
+    movrow           m1, [dst2q+mstrideq*4] ; p2
+    movrow           m2, [dst1q+mstrideq*2] ; p1
+    movrow           m5, [dst2q]            ; q1
+    movrow           m6, [dst2q+ strideq*1] ; q2
+    movrow           m7, [dst2q+ strideq*2] ; q3
+%if mmsize == 16 && %2 == 8
+    movhps           m0, [dst8q+mstrideq*4]
+    movhps           m2, [dst8q+mstrideq*2]
+    add           dst8q, strideq
+    movhps           m1, [dst8q+mstrideq*4]
+    movhps           m5, [dst8q]
+    movhps           m6, [dst8q+ strideq  ]
+    movhps           m7, [dst8q+ strideq*2]
+    add           dst8q, mstrideq
+%endif
+%else ; h
+%if %2 == 16
+    lea           dst8q, [dst1q+ strideq*8]
+%endif
+
+    ; read 16 rows of 8px each, interleave
+    movh             m0, [dst1q+mstrideq*4]
+    movh             m1, [dst8q+mstrideq*4]
+    movh             m2, [dst1q+mstrideq*2]
+    movh             m5, [dst8q+mstrideq*2]
+    movh             m3, [dst1q+mstrideq  ]
+    movh             m6, [dst8q+mstrideq  ]
+    movh             m4, [dst1q]
+    movh             m7, [dst8q]
+    punpcklbw        m0, m1          ; A/I
+    punpcklbw        m2, m5          ; C/K
+    punpcklbw        m3, m6          ; D/L
+    punpcklbw        m4, m7          ; E/M
+
+    add           dst8q, strideq
+    movh             m1, [dst2q+mstrideq*4]
+    movh             m6, [dst8q+mstrideq*4]
+    movh             m5, [dst2q]
+    movh             m7, [dst8q]
+    punpcklbw        m1, m6          ; B/J
+    punpcklbw        m5, m7          ; F/N
+    movh             m6, [dst2q+ strideq  ]
+    movh             m7, [dst8q+ strideq  ]
+    punpcklbw        m6, m7          ; G/O
+
+    ; 8x16 transpose
+    TRANSPOSE4x4B     0, 1, 2, 3, 7
+%ifdef m8
+    SWAP              1, 8
+%else
+    mova     m_q0backup, m1
+%endif
+    movh             m7, [dst2q+ strideq*2]
+    movh             m1, [dst8q+ strideq*2]
+    punpcklbw        m7, m1          ; H/P
+    TRANSPOSE4x4B     4, 5, 6, 7, 1
+    SBUTTERFLY       dq, 0, 4, 1     ; p3/p2
+    SBUTTERFLY       dq, 2, 6, 1     ; q0/q1
+    SBUTTERFLY       dq, 3, 7, 1     ; q2/q3
+%ifdef m8
+    SWAP              1, 8
+    SWAP              2, 8
+%else
+    mova             m1, m_q0backup
+    mova     m_q0backup, m2          ; store q0
+%endif
+    SBUTTERFLY       dq, 1, 5, 2     ; p1/p0
+%ifdef m12
+    SWAP              5, 12
+%else
+    mova     m_p0backup, m5          ; store p0
+%endif
+    SWAP              1, 4
+    SWAP              2, 4
+    SWAP              6, 3
+    SWAP              5, 3
+%endif
+
+    ; normal_limit for p3-p2, p2-p1, q3-q2 and q2-q1
+    mova             m4, m1
+    SWAP              4, 1
+    psubusb          m4, m0          ; p2-p3
+    psubusb          m0, m1          ; p3-p2
+    por              m0, m4          ; abs(p3-p2)
+
+    mova             m4, m2
+    SWAP              4, 2
+    psubusb          m4, m1          ; p1-p2
+    psubusb          m1, m2          ; p2-p1
+    por              m1, m4          ; abs(p2-p1)
+
+    mova             m4, m6
+    SWAP              4, 6
+    psubusb          m4, m7          ; q2-q3
+    psubusb          m7, m6          ; q3-q2
+    por              m7, m4          ; abs(q3-q2)
+
+    mova             m4, m5
+    SWAP              4, 5
+    psubusb          m4, m6          ; q1-q2
+    psubusb          m6, m5          ; q2-q1
+    por              m6, m4          ; abs(q2-q1)
+
+    pmaxub           m0, m1
+    pmaxub           m6, m7
+    pmaxub           m0, m6
+
+    ; normal_limit and high_edge_variance for p1-p0, q1-q0
+    SWAP              7, 3           ; now m7 is zero
+%ifidn %1, v
+    movrow           m3, [dst1q+mstrideq  ] ; p0
+%if mmsize == 16 && %2 == 8
+    movhps           m3, [dst8q+mstrideq  ]
+%endif
+%elifdef m12
+    SWAP              3, 12
+%else
+    mova             m3, m_p0backup
+%endif
+
+    mova             m1, m2
+    SWAP              1, 2
+    mova             m6, m3
+    SWAP              3, 6
+    psubusb          m1, m3          ; p1-p0
+    psubusb          m6, m2          ; p0-p1
+    por              m1, m6          ; abs(p1-p0)
+    pmaxub           m0, m1          ; max_I
+    SWAP              1, 4           ; max_hev_thresh
+
+    SWAP              6, 4           ; now m6 is I
+%ifidn %1, v
+    movrow           m4, [dst1q]     ; q0
+%if mmsize == 16 && %2 == 8
+    movhps           m4, [dst8q]
+%endif
+%elifdef m8
+    SWAP              4, 8
+%else
+    mova             m4, m_q0backup
+%endif
+    mova             m1, m4
+    SWAP              1, 4
+    mova             m7, m5
+    SWAP              7, 5
+    psubusb          m1, m5          ; q0-q1
+    psubusb          m7, m4          ; q1-q0
+    por              m1, m7          ; abs(q1-q0)
+    pxor             m7, m7
+    pmaxub           m0, m1
+    pmaxub           m6, m1
+    psubusb          m0, m_flimI
+    psubusb          m6, m_hevthr
+    pcmpeqb          m0, m7          ; max(abs(..)) <= I
+    pcmpeqb          m6, m7          ; !(max(abs..) > thresh)
+%ifdef m12
+    SWAP              6, 12
+%else
+    mova      m_maskres, m6          ; !(abs(p1-p0) > hev_t || abs(q1-q0) > hev_t)
+%endif
+
+    ; simple_limit
+    mova             m1, m3
+    SWAP              1, 3
+    mova             m6, m4          ; keep copies of p0/q0 around for later use
+    SWAP              6, 4
+    psubusb          m1, m4          ; p0-q0
+    psubusb          m6, m3          ; q0-p0
+    por              m1, m6          ; abs(q0-p0)
+    paddusb          m1, m1          ; m1=2*abs(q0-p0)
+
+    mova             m7, m2
+    SWAP              7, 2
+    mova             m6, m5
+    SWAP              6, 5
+    psubusb          m7, m5          ; p1-q1
+    psubusb          m6, m2          ; q1-p1
+    por              m7, m6          ; abs(q1-p1)
+    pxor             m6, m6
+    pand             m7, [pb_FE]
+    psrlq            m7, 1           ; abs(q1-p1)/2
+    paddusb          m7, m1          ; abs(q0-p0)*2+abs(q1-p1)/2
+    psubusb          m7, m_flimE
+    pcmpeqb          m7, m6          ; abs(q0-p0)*2+abs(q1-p1)/2 <= E
+    pand             m0, m7          ; normal_limit result
+
+    ; filter_common; at this point, m2-m5=p1-q1 and m0 is filter_mask
+%ifdef m8 ; x86-64 && sse2
+    mova             m8, [pb_80]
+%define m_pb_80 m8
+%else ; x86-32 or mmx/mmxext
+%define m_pb_80 [pb_80]
+%endif
+    mova             m1, m4
+    mova             m7, m3
+    pxor             m1, m_pb_80
+    pxor             m7, m_pb_80
+    psubsb           m1, m7          ; (signed) q0-p0
+    mova             m6, m2
+    mova             m7, m5
+    pxor             m6, m_pb_80
+    pxor             m7, m_pb_80
+    psubsb           m6, m7          ; (signed) p1-q1
+    mova             m7, m_maskres
+    pandn            m7, m6
+    paddsb           m7, m1
+    paddsb           m7, m1
+    paddsb           m7, m1          ; 3*(q0-p0)+is4tap?(p1-q1)
+
+    pand             m7, m0
+    mova             m1, [pb_F8]
+    mova             m6, m7
+    paddsb           m7, [pb_3]
+    paddsb           m6, [pb_4]
+    pand             m7, m1
+    pand             m6, m1
+
+    pxor             m1, m1
+    pxor             m0, m0
+    pcmpgtb          m1, m7
+    psubb            m0, m7
+    psrlq            m7, 3           ; +f2
+    psrlq            m0, 3           ; -f2
+    pand             m0, m1
+    pandn            m1, m7
+    psubusb          m3, m0
+    paddusb          m3, m1          ; p0+f2
+
+    pxor             m1, m1
+    pxor             m0, m0
+    pcmpgtb          m0, m6
+    psubb            m1, m6
+    psrlq            m6, 3           ; +f1
+    psrlq            m1, 3           ; -f1
+    pand             m1, m0
+    pandn            m0, m6
+    psubusb          m4, m0
+    paddusb          m4, m1          ; q0-f1
+
+%ifdef m12
+    SWAP              6, 12
+%else
+    mova             m6, m_maskres
+%endif
+    pxor             m7, m7
+    pand             m0, m6
+    pand             m1, m6
+    psubusb          m1, [pb_1]
+    pavgb            m0, m7          ; a
+    pavgb            m1, m7          ; -a
+    psubusb          m5, m0
+    psubusb          m2, m1
+    paddusb          m5, m1          ; q1-a
+    paddusb          m2, m0          ; p1+a
+
+    ; store
+%ifidn %1, v
+    movrow [dst1q+mstrideq*2], m2
+    movrow [dst1q+mstrideq  ], m3
+    movrow      [dst1q], m4
+    movrow [dst1q+ strideq  ], m5
+%if mmsize == 16 && %2 == 8
+    movhps [dst8q+mstrideq*2], m2
+    movhps [dst8q+mstrideq  ], m3
+    movhps      [dst8q], m4
+    movhps [dst8q+ strideq  ], m5
+%endif
+%else ; h
+    add           dst1q, 2
+    add           dst2q, 2
+
+    ; 4x8/16 transpose
+    TRANSPOSE4x4B     2, 3, 4, 5, 6
+
+    lea           dst8q, [dst8q+mstrideq  +2]
+    WRITE_4x4D        2, 3, 4, 5, dst1q, dst2q, dst8q, mstrideq, strideq, %2
+%endif
+
+    RET
+%endmacro
+
+INIT_XMM sse2
+INNER_LOOPFILTER v, 16
+INNER_LOOPFILTER h, 16
+INNER_LOOPFILTER v,  8
+INNER_LOOPFILTER h,  8
+
+INIT_XMM ssse3
+INNER_LOOPFILTER v, 16
+INNER_LOOPFILTER h, 16
+INNER_LOOPFILTER v,  8
+INNER_LOOPFILTER h,  8
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_h/v_loop_filter<size>_mbedge_<opt>(uint8_t *dst, [uint8_t *v,] ptrdiff_t stride,
+;                                                int flimE, int flimI, int hev_thr);
+;-----------------------------------------------------------------------------
+
+%macro MBEDGE_LOOPFILTER 2
+%define stack_size 0
+%ifndef m8       ; stack layout: [0]=E, [1]=I, [2]=hev_thr
+%if mmsize == 16 ;               [3]=hev() result
+                 ;               [4]=filter tmp result
+                 ;               [5]/[6] = p2/q2 backup
+                 ;               [7]=lim_res sign result
+%define stack_size mmsize * -7
+%else ; 8        ; extra storage space for transposes
+%define stack_size mmsize * -8
+%endif
+%endif
+
+%if %2 == 8 ; chroma
+cglobal vp8_%1_loop_filter8uv_mbedge, 6, 6, 15, stack_size, dst1, dst8, stride, flimE, flimI, hevthr
+%else ; luma
+cglobal vp8_%1_loop_filter16y_mbedge, 5, 5, 15, stack_size, dst1, stride, flimE, flimI, hevthr
+%endif
+
+%if cpuflag(ssse3)
+    pxor             m7, m7
+%endif
+
+%ifndef m8
+    ; splat function arguments
+    SPLATB_REG       m0, flimEq, m7   ; E
+    SPLATB_REG       m1, flimIq, m7   ; I
+    SPLATB_REG       m2, hevthrq, m7  ; hev_thresh
+
+%define m_flimE    [rsp]
+%define m_flimI    [rsp+mmsize]
+%define m_hevthr   [rsp+mmsize*2]
+%define m_maskres  [rsp+mmsize*3]
+%define m_limres   [rsp+mmsize*4]
+%define m_p0backup [rsp+mmsize*3]
+%define m_q0backup [rsp+mmsize*4]
+%define m_p2backup [rsp+mmsize*5]
+%define m_q2backup [rsp+mmsize*6]
+%if mmsize == 16
+%define m_limsign  [rsp]
+%else
+%define m_limsign  [rsp+mmsize*7]
+%endif
+
+    mova        m_flimE, m0
+    mova        m_flimI, m1
+    mova       m_hevthr, m2
+%else ; sse2 on x86-64
+%define m_flimE    m9
+%define m_flimI    m10
+%define m_hevthr   m11
+%define m_maskres  m12
+%define m_limres   m8
+%define m_p0backup m12
+%define m_q0backup m8
+%define m_p2backup m13
+%define m_q2backup m14
+%define m_limsign  m9
+
+    ; splat function arguments
+    SPLATB_REG  m_flimE, flimEq, m7   ; E
+    SPLATB_REG  m_flimI, flimIq, m7   ; I
+    SPLATB_REG m_hevthr, hevthrq, m7  ; hev_thresh
+%endif
+
+%if %2 == 8 ; chroma
+    DEFINE_ARGS dst1, dst8, mstride, stride, dst2
+%else
+    DEFINE_ARGS dst1, mstride, stride, dst2, dst8
+%endif
+    mov         strideq, mstrideq
+    neg        mstrideq
+%ifidn %1, h
+    lea           dst1q, [dst1q+strideq*4-4]
+%if %2 == 8 ; chroma
+    lea           dst8q, [dst8q+strideq*4-4]
+%endif
+%endif
+
+    ; read
+    lea           dst2q, [dst1q+ strideq  ]
+%ifidn %1, v
+%if %2 == 8 && mmsize == 16
+%define movrow movh
+%else
+%define movrow mova
+%endif
+    movrow           m0, [dst1q+mstrideq*4] ; p3
+    movrow           m1, [dst2q+mstrideq*4] ; p2
+    movrow           m2, [dst1q+mstrideq*2] ; p1
+    movrow           m5, [dst2q]            ; q1
+    movrow           m6, [dst2q+ strideq  ] ; q2
+    movrow           m7, [dst2q+ strideq*2] ; q3
+%if mmsize == 16 && %2 == 8
+    movhps           m0, [dst8q+mstrideq*4]
+    movhps           m2, [dst8q+mstrideq*2]
+    add           dst8q, strideq
+    movhps           m1, [dst8q+mstrideq*4]
+    movhps           m5, [dst8q]
+    movhps           m6, [dst8q+ strideq  ]
+    movhps           m7, [dst8q+ strideq*2]
+    add           dst8q, mstrideq
+%endif
+%else ; h
+%if %2 == 16
+    lea           dst8q, [dst1q+ strideq*8  ]
+%endif
+
+    ; read 16 rows of 8px each, interleave
+    movh             m0, [dst1q+mstrideq*4]
+    movh             m1, [dst8q+mstrideq*4]
+    movh             m2, [dst1q+mstrideq*2]
+    movh             m5, [dst8q+mstrideq*2]
+    movh             m3, [dst1q+mstrideq  ]
+    movh             m6, [dst8q+mstrideq  ]
+    movh             m4, [dst1q]
+    movh             m7, [dst8q]
+    punpcklbw        m0, m1          ; A/I
+    punpcklbw        m2, m5          ; C/K
+    punpcklbw        m3, m6          ; D/L
+    punpcklbw        m4, m7          ; E/M
+
+    add           dst8q, strideq
+    movh             m1, [dst2q+mstrideq*4]
+    movh             m6, [dst8q+mstrideq*4]
+    movh             m5, [dst2q]
+    movh             m7, [dst8q]
+    punpcklbw        m1, m6          ; B/J
+    punpcklbw        m5, m7          ; F/N
+    movh             m6, [dst2q+ strideq  ]
+    movh             m7, [dst8q+ strideq  ]
+    punpcklbw        m6, m7          ; G/O
+
+    ; 8x16 transpose
+    TRANSPOSE4x4B     0, 1, 2, 3, 7
+%ifdef m8
+    SWAP              1, 8
+%else
+    mova     m_q0backup, m1
+%endif
+    movh             m7, [dst2q+ strideq*2]
+    movh             m1, [dst8q+ strideq*2]
+    punpcklbw        m7, m1          ; H/P
+    TRANSPOSE4x4B     4, 5, 6, 7, 1
+    SBUTTERFLY       dq, 0, 4, 1     ; p3/p2
+    SBUTTERFLY       dq, 2, 6, 1     ; q0/q1
+    SBUTTERFLY       dq, 3, 7, 1     ; q2/q3
+%ifdef m8
+    SWAP              1, 8
+    SWAP              2, 8
+%else
+    mova             m1, m_q0backup
+    mova     m_q0backup, m2          ; store q0
+%endif
+    SBUTTERFLY       dq, 1, 5, 2     ; p1/p0
+%ifdef m12
+    SWAP              5, 12
+%else
+    mova     m_p0backup, m5          ; store p0
+%endif
+    SWAP              1, 4
+    SWAP              2, 4
+    SWAP              6, 3
+    SWAP              5, 3
+%endif
+
+    ; normal_limit for p3-p2, p2-p1, q3-q2 and q2-q1
+    mova             m4, m1
+    SWAP              4, 1
+    psubusb          m4, m0          ; p2-p3
+    psubusb          m0, m1          ; p3-p2
+    por              m0, m4          ; abs(p3-p2)
+
+    mova             m4, m2
+    SWAP              4, 2
+    psubusb          m4, m1          ; p1-p2
+    mova     m_p2backup, m1
+    psubusb          m1, m2          ; p2-p1
+    por              m1, m4          ; abs(p2-p1)
+
+    mova             m4, m6
+    SWAP              4, 6
+    psubusb          m4, m7          ; q2-q3
+    psubusb          m7, m6          ; q3-q2
+    por              m7, m4          ; abs(q3-q2)
+
+    mova             m4, m5
+    SWAP              4, 5
+    psubusb          m4, m6          ; q1-q2
+    mova     m_q2backup, m6
+    psubusb          m6, m5          ; q2-q1
+    por              m6, m4          ; abs(q2-q1)
+
+    pmaxub           m0, m1
+    pmaxub           m6, m7
+    pmaxub           m0, m6
+
+    ; normal_limit and high_edge_variance for p1-p0, q1-q0
+    SWAP              7, 3           ; now m7 is zero
+%ifidn %1, v
+    movrow           m3, [dst1q+mstrideq  ] ; p0
+%if mmsize == 16 && %2 == 8
+    movhps           m3, [dst8q+mstrideq  ]
+%endif
+%elifdef m12
+    SWAP              3, 12
+%else
+    mova             m3, m_p0backup
+%endif
+
+    mova             m1, m2
+    SWAP              1, 2
+    mova             m6, m3
+    SWAP              3, 6
+    psubusb          m1, m3          ; p1-p0
+    psubusb          m6, m2          ; p0-p1
+    por              m1, m6          ; abs(p1-p0)
+    pmaxub           m0, m1          ; max_I
+    SWAP              1, 4           ; max_hev_thresh
+
+    SWAP              6, 4           ; now m6 is I
+%ifidn %1, v
+    movrow           m4, [dst1q]     ; q0
+%if mmsize == 16 && %2 == 8
+    movhps           m4, [dst8q]
+%endif
+%elifdef m8
+    SWAP              4, 8
+%else
+    mova             m4, m_q0backup
+%endif
+    mova             m1, m4
+    SWAP              1, 4
+    mova             m7, m5
+    SWAP              7, 5
+    psubusb          m1, m5          ; q0-q1
+    psubusb          m7, m4          ; q1-q0
+    por              m1, m7          ; abs(q1-q0)
+    pxor             m7, m7
+    pmaxub           m0, m1
+    pmaxub           m6, m1
+    psubusb          m0, m_flimI
+    psubusb          m6, m_hevthr
+    pcmpeqb          m0, m7          ; max(abs(..)) <= I
+    pcmpeqb          m6, m7          ; !(max(abs..) > thresh)
+%ifdef m12
+    SWAP              6, 12
+%else
+    mova      m_maskres, m6          ; !(abs(p1-p0) > hev_t || abs(q1-q0) > hev_t)
+%endif
+
+    ; simple_limit
+    mova             m1, m3
+    SWAP              1, 3
+    mova             m6, m4          ; keep copies of p0/q0 around for later use
+    SWAP              6, 4
+    psubusb          m1, m4          ; p0-q0
+    psubusb          m6, m3          ; q0-p0
+    por              m1, m6          ; abs(q0-p0)
+    paddusb          m1, m1          ; m1=2*abs(q0-p0)
+
+    mova             m7, m2
+    SWAP              7, 2
+    mova             m6, m5
+    SWAP              6, 5
+    psubusb          m7, m5          ; p1-q1
+    psubusb          m6, m2          ; q1-p1
+    por              m7, m6          ; abs(q1-p1)
+    pxor             m6, m6
+    pand             m7, [pb_FE]
+    psrlq            m7, 1           ; abs(q1-p1)/2
+    paddusb          m7, m1          ; abs(q0-p0)*2+abs(q1-p1)/2
+    psubusb          m7, m_flimE
+    pcmpeqb          m7, m6          ; abs(q0-p0)*2+abs(q1-p1)/2 <= E
+    pand             m0, m7          ; normal_limit result
+
+    ; filter_common; at this point, m2-m5=p1-q1 and m0 is filter_mask
+%ifdef m8 ; x86-64 && sse2
+    mova             m8, [pb_80]
+%define m_pb_80 m8
+%else ; x86-32 or mmx/mmxext
+%define m_pb_80 [pb_80]
+%endif
+    mova             m1, m4
+    mova             m7, m3
+    pxor             m1, m_pb_80
+    pxor             m7, m_pb_80
+    psubsb           m1, m7          ; (signed) q0-p0
+    mova             m6, m2
+    mova             m7, m5
+    pxor             m6, m_pb_80
+    pxor             m7, m_pb_80
+    psubsb           m6, m7          ; (signed) p1-q1
+    mova             m7, m_maskres
+    paddsb           m6, m1
+    paddsb           m6, m1
+    paddsb           m6, m1
+    pand             m6, m0
+%ifdef m8
+    mova       m_limres, m6          ; 3*(qp-p0)+(p1-q1) masked for filter_mbedge
+    pand       m_limres, m7
+%else
+    mova             m0, m6
+    pand             m0, m7
+    mova       m_limres, m0
+%endif
+    pandn            m7, m6          ; 3*(q0-p0)+(p1-q1) masked for filter_common
+
+    mova             m1, [pb_F8]
+    mova             m6, m7
+    paddsb           m7, [pb_3]
+    paddsb           m6, [pb_4]
+    pand             m7, m1
+    pand             m6, m1
+
+    pxor             m1, m1
+    pxor             m0, m0
+    pcmpgtb          m1, m7
+    psubb            m0, m7
+    psrlq            m7, 3           ; +f2
+    psrlq            m0, 3           ; -f2
+    pand             m0, m1
+    pandn            m1, m7
+    psubusb          m3, m0
+    paddusb          m3, m1          ; p0+f2
+
+    pxor             m1, m1
+    pxor             m0, m0
+    pcmpgtb          m0, m6
+    psubb            m1, m6
+    psrlq            m6, 3           ; +f1
+    psrlq            m1, 3           ; -f1
+    pand             m1, m0
+    pandn            m0, m6
+    psubusb          m4, m0
+    paddusb          m4, m1          ; q0-f1
+
+    ; filter_mbedge (m2-m5 = p1-q1; lim_res carries w)
+%if cpuflag(ssse3)
+    mova             m7, [pb_1]
+%else
+    mova             m7, [pw_63]
+%endif
+%ifdef m8
+    SWAP              1, 8
+%else
+    mova             m1, m_limres
+%endif
+    pxor             m0, m0
+    mova             m6, m1
+    pcmpgtb          m0, m1         ; which are negative
+%if cpuflag(ssse3)
+    punpcklbw        m6, m7         ; interleave with "1" for rounding
+    punpckhbw        m1, m7
+%else
+    punpcklbw        m6, m0         ; signed byte->word
+    punpckhbw        m1, m0
+%endif
+    mova      m_limsign, m0
+%if cpuflag(ssse3)
+    mova             m7, [pb_27_63]
+%ifndef m8
+    mova       m_limres, m1
+%endif
+%ifdef m10
+    SWAP              0, 10         ; don't lose lim_sign copy
+%endif
+    mova             m0, m7
+    pmaddubsw        m7, m6
+    SWAP              6, 7
+    pmaddubsw        m0, m1
+    SWAP              1, 0
+%ifdef m10
+    SWAP              0, 10
+%else
+    mova             m0, m_limsign
+%endif
+%else
+    mova      m_maskres, m6         ; backup for later in filter
+    mova       m_limres, m1
+    pmullw          m6, [pw_27]
+    pmullw          m1, [pw_27]
+    paddw           m6, m7
+    paddw           m1, m7
+%endif
+    psraw           m6, 7
+    psraw           m1, 7
+    packsswb        m6, m1          ; a0
+    pxor            m1, m1
+    psubb           m1, m6
+    pand            m1, m0          ; -a0
+    pandn           m0, m6          ; +a0
+%if cpuflag(ssse3)
+    mova            m6, [pb_18_63]  ; pipelining
+%endif
+    psubusb         m3, m1
+    paddusb         m4, m1
+    paddusb         m3, m0          ; p0+a0
+    psubusb         m4, m0          ; q0-a0
+
+%if cpuflag(ssse3)
+    SWAP             6, 7
+%ifdef m10
+    SWAP             1, 10
+%else
+    mova            m1, m_limres
+%endif
+    mova            m0, m7
+    pmaddubsw       m7, m6
+    SWAP             6, 7
+    pmaddubsw       m0, m1
+    SWAP             1, 0
+%ifdef m10
+    SWAP             0, 10
+%endif
+    mova            m0, m_limsign
+%else
+    mova            m6, m_maskres
+    mova            m1, m_limres
+    pmullw          m6, [pw_18]
+    pmullw          m1, [pw_18]
+    paddw           m6, m7
+    paddw           m1, m7
+%endif
+    mova            m0, m_limsign
+    psraw           m6, 7
+    psraw           m1, 7
+    packsswb        m6, m1          ; a1
+    pxor            m1, m1
+    psubb           m1, m6
+    pand            m1, m0          ; -a1
+    pandn           m0, m6          ; +a1
+%if cpuflag(ssse3)
+    mova            m6, [pb_9_63]
+%endif
+    psubusb         m2, m1
+    paddusb         m5, m1
+    paddusb         m2, m0          ; p1+a1
+    psubusb         m5, m0          ; q1-a1
+
+%if cpuflag(ssse3)
+    SWAP             6, 7
+%ifdef m10
+    SWAP             1, 10
+%else
+    mova            m1, m_limres
+%endif
+    mova            m0, m7
+    pmaddubsw       m7, m6
+    SWAP             6, 7
+    pmaddubsw       m0, m1
+    SWAP             1, 0
+%else
+%ifdef m8
+    SWAP             6, 12
+    SWAP             1, 8
+%else
+    mova            m6, m_maskres
+    mova            m1, m_limres
+%endif
+    pmullw          m6, [pw_9]
+    pmullw          m1, [pw_9]
+    paddw           m6, m7
+    paddw           m1, m7
+%endif
+%ifdef m9
+    SWAP             7, 9
+%else
+    mova            m7, m_limsign
+%endif
+    psraw           m6, 7
+    psraw           m1, 7
+    packsswb        m6, m1          ; a1
+    pxor            m0, m0
+    psubb           m0, m6
+    pand            m0, m7          ; -a1
+    pandn           m7, m6          ; +a1
+%ifdef m8
+    SWAP             1, 13
+    SWAP             6, 14
+%else
+    mova            m1, m_p2backup
+    mova            m6, m_q2backup
+%endif
+    psubusb         m1, m0
+    paddusb         m6, m0
+    paddusb         m1, m7          ; p1+a1
+    psubusb         m6, m7          ; q1-a1
+
+    ; store
+%ifidn %1, v
+    movrow [dst2q+mstrideq*4], m1
+    movrow [dst1q+mstrideq*2], m2
+    movrow [dst1q+mstrideq  ], m3
+    movrow     [dst1q], m4
+    movrow     [dst2q], m5
+    movrow [dst2q+ strideq  ], m6
+%if mmsize == 16 && %2 == 8
+    add           dst8q, mstrideq
+    movhps [dst8q+mstrideq*2], m1
+    movhps [dst8q+mstrideq  ], m2
+    movhps     [dst8q], m3
+    add          dst8q, strideq
+    movhps     [dst8q], m4
+    movhps [dst8q+ strideq  ], m5
+    movhps [dst8q+ strideq*2], m6
+%endif
+%else ; h
+    inc          dst1q
+    inc          dst2q
+
+    ; 4x8/16 transpose
+    TRANSPOSE4x4B    1, 2, 3, 4, 0
+    SBUTTERFLY      bw, 5, 6, 0
+
+    lea          dst8q, [dst8q+mstrideq+1]
+    WRITE_4x4D       1, 2, 3, 4, dst1q, dst2q, dst8q, mstrideq, strideq, %2
+    lea          dst1q, [dst2q+mstrideq+4]
+    lea          dst8q, [dst8q+mstrideq+4]
+%if cpuflag(sse4)
+    add          dst2q, 4
+%endif
+    WRITE_8W        m5, dst2q, dst1q,  mstrideq, strideq
+%if cpuflag(sse4)
+    lea          dst2q, [dst8q+ strideq  ]
+%endif
+    WRITE_8W        m6, dst2q, dst8q, mstrideq, strideq
+%endif
+
+    RET
+%endmacro
+
+INIT_XMM sse2
+MBEDGE_LOOPFILTER v, 16
+MBEDGE_LOOPFILTER h, 16
+MBEDGE_LOOPFILTER v,  8
+MBEDGE_LOOPFILTER h,  8
+
+INIT_XMM ssse3
+MBEDGE_LOOPFILTER v, 16
+MBEDGE_LOOPFILTER h, 16
+MBEDGE_LOOPFILTER v,  8
+MBEDGE_LOOPFILTER h,  8
+
+INIT_XMM sse4
+MBEDGE_LOOPFILTER h, 16
+MBEDGE_LOOPFILTER h,  8
diff --git a/media/ffvpx/libavcodec/x86/vp9dsp_init.c b/media/ffvpx/libavcodec/x86/vp9dsp_init.c
new file mode 100644
index 0000000000..8d11dbc348
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9dsp_init.c
@@ -0,0 +1,415 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "libavcodec/x86/vp9dsp_init.h"
+
+#if HAVE_X86ASM
+
+decl_fpel_func(put,  4,   , mmx);
+decl_fpel_func(put,  8,   , mmx);
+decl_fpel_func(put, 16,   , sse);
+decl_fpel_func(put, 32,   , sse);
+decl_fpel_func(put, 64,   , sse);
+decl_fpel_func(avg,  4, _8, mmxext);
+decl_fpel_func(avg,  8, _8, mmxext);
+decl_fpel_func(avg, 16, _8, sse2);
+decl_fpel_func(avg, 32, _8, sse2);
+decl_fpel_func(avg, 64, _8, sse2);
+decl_fpel_func(put, 32,   , avx);
+decl_fpel_func(put, 64,   , avx);
+decl_fpel_func(avg, 32, _8, avx2);
+decl_fpel_func(avg, 64, _8, avx2);
+
+decl_mc_funcs(4, mmxext, int16_t, 8, 8);
+decl_mc_funcs(8, sse2, int16_t,  8, 8);
+decl_mc_funcs(4, ssse3, int8_t, 32, 8);
+decl_mc_funcs(8, ssse3, int8_t, 32, 8);
+#if ARCH_X86_64
+decl_mc_funcs(16, ssse3, int8_t, 32, 8);
+decl_mc_funcs(32, avx2, int8_t, 32, 8);
+#endif
+
+mc_rep_funcs(16,  8,  8,  sse2, int16_t,  8, 8)
+#if ARCH_X86_32
+mc_rep_funcs(16,  8,  8, ssse3, int8_t,  32, 8)
+#endif
+mc_rep_funcs(32, 16, 16, sse2,  int16_t,  8, 8)
+mc_rep_funcs(32, 16, 16, ssse3, int8_t,  32, 8)
+mc_rep_funcs(64, 32, 32, sse2,  int16_t,  8, 8)
+mc_rep_funcs(64, 32, 32, ssse3, int8_t,  32, 8)
+#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+mc_rep_funcs(64, 32, 32, avx2,  int8_t,  32, 8)
+#endif
+
+extern const int8_t ff_filters_ssse3[3][15][4][32];
+extern const int16_t ff_filters_sse2[3][15][8][8];
+
+filters_8tap_2d_fn2(put, 16, 8, 1, mmxext, sse2, sse2)
+filters_8tap_2d_fn2(avg, 16, 8, 1, mmxext, sse2, sse2)
+filters_8tap_2d_fn2(put, 16, 8, 1, ssse3, ssse3, ssse3)
+filters_8tap_2d_fn2(avg, 16, 8, 1, ssse3, ssse3, ssse3)
+#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+filters_8tap_2d_fn(put, 64, 32, 8, 1, avx2, ssse3)
+filters_8tap_2d_fn(put, 32, 32, 8, 1, avx2, ssse3)
+filters_8tap_2d_fn(avg, 64, 32, 8, 1, avx2, ssse3)
+filters_8tap_2d_fn(avg, 32, 32, 8, 1, avx2, ssse3)
+#endif
+
+filters_8tap_1d_fn3(put, 8, mmxext, sse2, sse2)
+filters_8tap_1d_fn3(avg, 8, mmxext, sse2, sse2)
+filters_8tap_1d_fn3(put, 8, ssse3, ssse3, ssse3)
+filters_8tap_1d_fn3(avg, 8, ssse3, ssse3, ssse3)
+#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+filters_8tap_1d_fn2(put, 64, 8, avx2, ssse3)
+filters_8tap_1d_fn2(put, 32, 8, avx2, ssse3)
+filters_8tap_1d_fn2(avg, 64, 8, avx2, ssse3)
+filters_8tap_1d_fn2(avg, 32, 8, avx2, ssse3)
+#endif
+
+#define itxfm_func(typea, typeb, size, opt) \
+void ff_vp9_##typea##_##typeb##_##size##x##size##_add_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                            int16_t *block, int eob)
+#define itxfm_funcs(size, opt) \
+itxfm_func(idct,  idct,  size, opt); \
+itxfm_func(iadst, idct,  size, opt); \
+itxfm_func(idct,  iadst, size, opt); \
+itxfm_func(iadst, iadst, size, opt)
+
+itxfm_func(idct,  idct,  4, mmxext);
+itxfm_func(idct,  iadst, 4, sse2);
+itxfm_func(iadst, idct,  4, sse2);
+itxfm_func(iadst, iadst, 4, sse2);
+itxfm_funcs(4, ssse3);
+itxfm_funcs(8, sse2);
+itxfm_funcs(8, ssse3);
+itxfm_funcs(8, avx);
+itxfm_funcs(16, sse2);
+itxfm_funcs(16, ssse3);
+itxfm_funcs(16, avx);
+itxfm_func(idct, idct, 32, sse2);
+itxfm_func(idct, idct, 32, ssse3);
+itxfm_func(idct, idct, 32, avx);
+itxfm_func(iwht, iwht, 4, mmx);
+itxfm_funcs(16, avx2);
+itxfm_func(idct, idct, 32, avx2);
+
+#undef itxfm_func
+#undef itxfm_funcs
+
+#define lpf_funcs(size1, size2, opt) \
+void ff_vp9_loop_filter_v_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                    int E, int I, int H); \
+void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                    int E, int I, int H)
+
+lpf_funcs(4, 8, mmxext);
+lpf_funcs(8, 8, mmxext);
+lpf_funcs(16, 16, sse2);
+lpf_funcs(16, 16, ssse3);
+lpf_funcs(16, 16, avx);
+lpf_funcs(44, 16, sse2);
+lpf_funcs(44, 16, ssse3);
+lpf_funcs(44, 16, avx);
+lpf_funcs(84, 16, sse2);
+lpf_funcs(84, 16, ssse3);
+lpf_funcs(84, 16, avx);
+lpf_funcs(48, 16, sse2);
+lpf_funcs(48, 16, ssse3);
+lpf_funcs(48, 16, avx);
+lpf_funcs(88, 16, sse2);
+lpf_funcs(88, 16, ssse3);
+lpf_funcs(88, 16, avx);
+
+#undef lpf_funcs
+
+#define ipred_func(size, type, opt) \
+void ff_vp9_ipred_##type##_##size##x##size##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                   const uint8_t *l, const uint8_t *a)
+
+ipred_func(8, v, mmx);
+
+#define ipred_dc_funcs(size, opt) \
+ipred_func(size, dc, opt); \
+ipred_func(size, dc_left, opt); \
+ipred_func(size, dc_top, opt)
+
+ipred_dc_funcs(4, mmxext);
+ipred_dc_funcs(8, mmxext);
+
+#define ipred_dir_tm_funcs(size, opt) \
+ipred_func(size, tm, opt); \
+ipred_func(size, dl, opt); \
+ipred_func(size, dr, opt); \
+ipred_func(size, hd, opt); \
+ipred_func(size, hu, opt); \
+ipred_func(size, vl, opt); \
+ipred_func(size, vr, opt)
+
+ipred_dir_tm_funcs(4, mmxext);
+
+ipred_func(16, v, sse);
+ipred_func(32, v, sse);
+
+ipred_dc_funcs(16, sse2);
+ipred_dc_funcs(32, sse2);
+
+#define ipred_dir_tm_h_funcs(size, opt) \
+ipred_dir_tm_funcs(size, opt); \
+ipred_func(size, h, opt)
+
+ipred_dir_tm_h_funcs(8, sse2);
+ipred_dir_tm_h_funcs(16, sse2);
+ipred_dir_tm_h_funcs(32, sse2);
+
+ipred_func(4, h, sse2);
+
+#define ipred_all_funcs(size, opt) \
+ipred_dc_funcs(size, opt); \
+ipred_dir_tm_h_funcs(size, opt)
+
+// FIXME hd/vl_4x4_ssse3 does not exist
+ipred_all_funcs(4, ssse3);
+ipred_all_funcs(8, ssse3);
+ipred_all_funcs(16, ssse3);
+ipred_all_funcs(32, ssse3);
+
+ipred_dir_tm_h_funcs(8, avx);
+ipred_dir_tm_h_funcs(16, avx);
+ipred_dir_tm_h_funcs(32, avx);
+
+ipred_func(32, v, avx);
+
+ipred_dc_funcs(32, avx2);
+ipred_func(32, h, avx2);
+ipred_func(32, tm, avx2);
+
+#undef ipred_func
+#undef ipred_dir_tm_h_funcs
+#undef ipred_dir_tm_funcs
+#undef ipred_dc_funcs
+
+#endif /* HAVE_X86ASM */
+
+av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
+{
+#if HAVE_X86ASM
+    int cpu_flags;
+
+    if (bpp == 10) {
+        ff_vp9dsp_init_10bpp_x86(dsp, bitexact);
+        return;
+    } else if (bpp == 12) {
+        ff_vp9dsp_init_12bpp_x86(dsp, bitexact);
+        return;
+    }
+
+    cpu_flags = av_get_cpu_flags();
+
+#define init_lpf(opt) do { \
+    dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
+    dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
+    dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
+    dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
+    dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
+    dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
+    dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
+    dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
+    dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
+    dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
+} while (0)
+
+#define init_ipred(sz, opt, t, e) \
+    dsp->intra_pred[TX_##sz##X##sz][e##_PRED] = ff_vp9_ipred_##t##_##sz##x##sz##_##opt
+
+#define ff_vp9_ipred_hd_4x4_ssse3 ff_vp9_ipred_hd_4x4_mmxext
+#define ff_vp9_ipred_vl_4x4_ssse3 ff_vp9_ipred_vl_4x4_mmxext
+#define init_dir_tm_ipred(sz, opt) do { \
+    init_ipred(sz, opt, dl, DIAG_DOWN_LEFT); \
+    init_ipred(sz, opt, dr, DIAG_DOWN_RIGHT); \
+    init_ipred(sz, opt, hd, HOR_DOWN); \
+    init_ipred(sz, opt, vl, VERT_LEFT); \
+    init_ipred(sz, opt, hu, HOR_UP); \
+    init_ipred(sz, opt, tm, TM_VP8); \
+    init_ipred(sz, opt, vr, VERT_RIGHT); \
+} while (0)
+#define init_dir_tm_h_ipred(sz, opt) do { \
+    init_dir_tm_ipred(sz, opt); \
+    init_ipred(sz, opt, h,  HOR); \
+} while (0)
+#define init_dc_ipred(sz, opt) do { \
+    init_ipred(sz, opt, dc,      DC); \
+    init_ipred(sz, opt, dc_left, LEFT_DC); \
+    init_ipred(sz, opt, dc_top,  TOP_DC); \
+} while (0)
+#define init_all_ipred(sz, opt) do { \
+    init_dc_ipred(sz, opt); \
+    init_dir_tm_h_ipred(sz, opt); \
+} while (0)
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        init_fpel_func(4, 0,  4, put, , mmx);
+        init_fpel_func(3, 0,  8, put, , mmx);
+        if (!bitexact) {
+            dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
+            dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
+            dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
+            dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
+        }
+        init_ipred(8, mmx, v, VERT);
+    }
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        dsp->loop_filter_8[0][0] = ff_vp9_loop_filter_h_4_8_mmxext;
+        dsp->loop_filter_8[0][1] = ff_vp9_loop_filter_v_4_8_mmxext;
+        dsp->loop_filter_8[1][0] = ff_vp9_loop_filter_h_8_8_mmxext;
+        dsp->loop_filter_8[1][1] = ff_vp9_loop_filter_v_8_8_mmxext;
+        init_subpel2(4, 0, 4, put, 8, mmxext);
+        init_subpel2(4, 1, 4, avg, 8, mmxext);
+        init_fpel_func(4, 1,  4, avg, _8, mmxext);
+        init_fpel_func(3, 1,  8, avg, _8, mmxext);
+        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext;
+        init_dc_ipred(4, mmxext);
+        init_dc_ipred(8, mmxext);
+        init_dir_tm_ipred(4, mmxext);
+    }
+
+    if (EXTERNAL_SSE(cpu_flags)) {
+        init_fpel_func(2, 0, 16, put, , sse);
+        init_fpel_func(1, 0, 32, put, , sse);
+        init_fpel_func(0, 0, 64, put, , sse);
+        init_ipred(16, sse, v, VERT);
+        init_ipred(32, sse, v, VERT);
+    }
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        init_subpel3_8to64(0, put, 8, sse2);
+        init_subpel3_8to64(1, avg, 8, sse2);
+        init_fpel_func(2, 1, 16, avg,  _8, sse2);
+        init_fpel_func(1, 1, 32, avg,  _8, sse2);
+        init_fpel_func(0, 1, 64, avg,  _8, sse2);
+        init_lpf(sse2);
+        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_sse2;
+        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_sse2;
+        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_sse2;
+        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_sse2;
+        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_sse2;
+        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_sse2;
+        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_sse2;
+        dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_sse2;
+        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_sse2;
+        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_sse2;
+        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_sse2;
+        dsp->itxfm_add[TX_32X32][ADST_ADST] =
+        dsp->itxfm_add[TX_32X32][ADST_DCT] =
+        dsp->itxfm_add[TX_32X32][DCT_ADST] =
+        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_sse2;
+        init_dc_ipred(16, sse2);
+        init_dc_ipred(32, sse2);
+        init_dir_tm_h_ipred(8, sse2);
+        init_dir_tm_h_ipred(16, sse2);
+        init_dir_tm_h_ipred(32, sse2);
+        init_ipred(4, sse2, h, HOR);
+    }
+
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        init_subpel3(0, put, 8, ssse3);
+        init_subpel3(1, avg, 8, ssse3);
+        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
+        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_ssse3;
+        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_ssse3;
+        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3;
+        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
+        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_ssse3;
+        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_ssse3;
+        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3;
+        dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_ssse3;
+        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_ssse3;
+        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_ssse3;
+        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3;
+        dsp->itxfm_add[TX_32X32][ADST_ADST] =
+        dsp->itxfm_add[TX_32X32][ADST_DCT] =
+        dsp->itxfm_add[TX_32X32][DCT_ADST] =
+        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
+        init_lpf(ssse3);
+        init_all_ipred(4, ssse3);
+        init_all_ipred(8, ssse3);
+        init_all_ipred(16, ssse3);
+        init_all_ipred(32, ssse3);
+    }
+
+    if (EXTERNAL_AVX(cpu_flags)) {
+        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
+        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_avx;
+        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_avx;
+        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx;
+        dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
+        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_avx;
+        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_avx;
+        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx;
+        dsp->itxfm_add[TX_32X32][ADST_ADST] =
+        dsp->itxfm_add[TX_32X32][ADST_DCT] =
+        dsp->itxfm_add[TX_32X32][DCT_ADST] =
+        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
+        init_lpf(avx);
+        init_dir_tm_h_ipred(8, avx);
+        init_dir_tm_h_ipred(16, avx);
+        init_dir_tm_h_ipred(32, avx);
+    }
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
+        init_fpel_func(1, 0, 32, put, , avx);
+        init_fpel_func(0, 0, 64, put, , avx);
+        init_ipred(32, avx, v, VERT);
+    }
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        init_fpel_func(1, 1, 32, avg, _8, avx2);
+        init_fpel_func(0, 1, 64, avg, _8, avx2);
+        if (ARCH_X86_64) {
+#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+            dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx2;
+            dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_avx2;
+            dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_avx2;
+            dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx2;
+            dsp->itxfm_add[TX_32X32][ADST_ADST] =
+            dsp->itxfm_add[TX_32X32][ADST_DCT] =
+            dsp->itxfm_add[TX_32X32][DCT_ADST] =
+            dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx2;
+            init_subpel3_32_64(0, put, 8, avx2);
+            init_subpel3_32_64(1, avg, 8, avx2);
+#endif
+        }
+        init_dc_ipred(32, avx2);
+        init_ipred(32, avx2, h,  HOR);
+        init_ipred(32, avx2, tm, TM_VP8);
+    }
+
+#undef init_fpel
+#undef init_subpel1
+#undef init_subpel2
+#undef init_subpel3
+
+#endif /* HAVE_X86ASM */
+}
diff --git a/media/ffvpx/libavcodec/x86/vp9dsp_init.h b/media/ffvpx/libavcodec/x86/vp9dsp_init.h
new file mode 100644
index 0000000000..fc1e0557fa
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9dsp_init.h
@@ -0,0 +1,192 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_VP9DSP_INIT_H
+#define AVCODEC_X86_VP9DSP_INIT_H
+
+#include "libavutil/attributes.h"
+#include "libavutil/mem_internal.h"
+
+#include "libavcodec/vp9dsp.h"
+
+// hack to force-expand BPC
+#define cat(a, bpp, b) a##bpp##b
+
+#define decl_fpel_func(avg, sz, bpp, opt) \
+void ff_vp9_##avg##sz##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                   const uint8_t *src, ptrdiff_t src_stride, \
+                                   int h, int mx, int my)
+
+#define decl_mc_func(avg, sz, dir, opt, type, f_sz, bpp) \
+void ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                         const uint8_t *src, ptrdiff_t src_stride, \
+                                                         int h, const type (*filter)[f_sz])
+
+#define decl_mc_funcs(sz, opt, type, fsz, bpp) \
+decl_mc_func(put, sz, h, opt, type, fsz, bpp); \
+decl_mc_func(avg, sz, h, opt, type, fsz, bpp); \
+decl_mc_func(put, sz, v, opt, type, fsz, bpp); \
+decl_mc_func(avg, sz, v, opt, type, fsz, bpp)
+
+#define decl_ipred_fn(type, sz, bpp, opt) \
+void ff_vp9_ipred_##type##_##sz##x##sz##_##bpp##_##opt(uint8_t *dst, \
+                                                       ptrdiff_t stride, \
+                                                       const uint8_t *l, \
+                                                       const uint8_t *a)
+
+#define decl_ipred_fns(type, bpp, opt4, opt8_16_32) \
+decl_ipred_fn(type,  4, bpp, opt4); \
+decl_ipred_fn(type,  8, bpp, opt8_16_32); \
+decl_ipred_fn(type, 16, bpp, opt8_16_32); \
+decl_ipred_fn(type, 32, bpp, opt8_16_32)
+
+#define decl_itxfm_func(typea, typeb, size, bpp, opt) \
+void cat(ff_vp9_##typea##_##typeb##_##size##x##size##_add_, bpp, _##opt)(uint8_t *dst, \
+                                                                         ptrdiff_t stride, \
+                                                                         int16_t *block, \
+                                                                         int eob)
+
+#define decl_itxfm_funcs(size, bpp, opt) \
+decl_itxfm_func(idct,  idct,  size, bpp, opt); \
+decl_itxfm_func(iadst, idct,  size, bpp, opt); \
+decl_itxfm_func(idct,  iadst, size, bpp, opt); \
+decl_itxfm_func(iadst, iadst, size, bpp, opt)
+
+#define mc_rep_func(avg, sz, hsz, hszb, dir, opt, type, f_sz, bpp) \
+static av_always_inline void \
+ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                    const uint8_t *src, ptrdiff_t src_stride, \
+                                                    int h, const type (*filter)[f_sz]) \
+{ \
+    ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##bpp##_##opt(dst,        dst_stride, src, \
+                                                         src_stride, h, filter); \
+    ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##bpp##_##opt(dst + hszb, dst_stride, src + hszb, \
+                                                         src_stride, h, filter); \
+}
+
+#define mc_rep_funcs(sz, hsz, hszb, opt, type, fsz, bpp) \
+mc_rep_func(put, sz, hsz, hszb, h, opt, type, fsz, bpp) \
+mc_rep_func(avg, sz, hsz, hszb, h, opt, type, fsz, bpp) \
+mc_rep_func(put, sz, hsz, hszb, v, opt, type, fsz, bpp) \
+mc_rep_func(avg, sz, hsz, hszb, v, opt, type, fsz, bpp)
+
+#define filter_8tap_1d_fn(op, sz, f, f_opt, fname, dir, dvar, bpp, opt) \
+static void op##_8tap_##fname##_##sz##dir##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                          const uint8_t *src, ptrdiff_t src_stride, \
+                                                          int h, int mx, int my) \
+{ \
+    ff_vp9_##op##_8tap_1d_##dir##_##sz##_##bpp##_##opt(dst, dst_stride, src, src_stride, \
+                                                       h, ff_filters_##f_opt[f][dvar - 1]); \
+}
+
+#define filters_8tap_1d_fn(op, sz, dir, dvar, bpp, opt, f_opt) \
+filter_8tap_1d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, dir, dvar, bpp, opt) \
+filter_8tap_1d_fn(op, sz, FILTER_8TAP_SHARP,   f_opt, sharp,   dir, dvar, bpp, opt) \
+filter_8tap_1d_fn(op, sz, FILTER_8TAP_SMOOTH,  f_opt, smooth,  dir, dvar, bpp, opt)
+
+#define filters_8tap_1d_fn2(op, sz, bpp, opt, f_opt) \
+filters_8tap_1d_fn(op, sz, h, mx, bpp, opt, f_opt) \
+filters_8tap_1d_fn(op, sz, v, my, bpp, opt, f_opt)
+
+#define filters_8tap_1d_fn3(op, bpp, opt4, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 64, bpp, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 32, bpp, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 16, bpp, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 8, bpp, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 4, bpp, opt4, f_opt)
+
+#define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, bpp, bytes, opt) \
+static void op##_8tap_##fname##_##sz##hv_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                       const uint8_t *src, ptrdiff_t src_stride, \
+                                                       int h, int mx, int my) \
+{ \
+    LOCAL_ALIGNED_##align(uint8_t, temp, [71 * 64 * bytes]); \
+    ff_vp9_put_8tap_1d_h_##sz##_##bpp##_##opt(temp, 64 * bytes, src - 3 * src_stride, \
+                                              src_stride,  h + 7, \
+                                              ff_filters_##f_opt[f][mx - 1]); \
+    ff_vp9_##op##_8tap_1d_v_##sz##_##bpp##_##opt(dst, dst_stride, temp + 3 * bytes * 64, \
+                                                 64 * bytes, h, \
+                                                 ff_filters_##f_opt[f][my - 1]); \
+}
+
+#define filters_8tap_2d_fn(op, sz, align, bpp, bytes, opt, f_opt) \
+filter_8tap_2d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, align, bpp, bytes, opt) \
+filter_8tap_2d_fn(op, sz, FILTER_8TAP_SHARP,   f_opt, sharp, align, bpp, bytes, opt) \
+filter_8tap_2d_fn(op, sz, FILTER_8TAP_SMOOTH,  f_opt, smooth, align, bpp, bytes, opt)
+
+#define filters_8tap_2d_fn2(op, align, bpp, bytes, opt4, opt8, f_opt) \
+filters_8tap_2d_fn(op, 64, align, bpp, bytes, opt8, f_opt) \
+filters_8tap_2d_fn(op, 32, align, bpp, bytes, opt8, f_opt) \
+filters_8tap_2d_fn(op, 16, align, bpp, bytes, opt8, f_opt) \
+filters_8tap_2d_fn(op, 8, align, bpp, bytes, opt8, f_opt) \
+filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt)
+
+#define init_fpel_func(idx1, idx2, sz, type, bpp, opt) \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_##type##sz##bpp##_##opt
+
+#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, bpp, opt) \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = \
+        type##_8tap_smooth_##sz##dir##_##bpp##_##opt; \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = \
+        type##_8tap_regular_##sz##dir##_##bpp##_##opt; \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = \
+        type##_8tap_sharp_##sz##dir##_##bpp##_##opt
+
+#define init_subpel2(idx1, idx2, sz, type, bpp, opt) \
+    init_subpel1(idx1, idx2, 1, 1, sz, hv, type, bpp, opt); \
+    init_subpel1(idx1, idx2, 0, 1, sz, v,  type, bpp, opt); \
+    init_subpel1(idx1, idx2, 1, 0, sz, h,  type, bpp, opt)
+
+#define init_subpel3_32_64(idx, type, bpp, opt) \
+    init_subpel2(0, idx, 64, type, bpp, opt); \
+    init_subpel2(1, idx, 32, type, bpp, opt)
+
+#define init_subpel3_8to64(idx, type, bpp, opt) \
+    init_subpel3_32_64(idx, type, bpp, opt); \
+    init_subpel2(2, idx, 16, type, bpp, opt); \
+    init_subpel2(3, idx,  8, type, bpp, opt)
+
+#define init_subpel3(idx, type, bpp, opt) \
+    init_subpel3_8to64(idx, type, bpp, opt); \
+    init_subpel2(4, idx,  4, type, bpp, opt)
+
+#define init_ipred_func(type, enum, sz, bpp, opt) \
+    dsp->intra_pred[TX_##sz##X##sz][enum##_PRED] = \
+        cat(ff_vp9_ipred_##type##_##sz##x##sz##_, bpp, _##opt)
+
+#define init_8_16_32_ipred_funcs(type, enum, bpp, opt) \
+    init_ipred_func(type, enum,  8, bpp, opt); \
+    init_ipred_func(type, enum, 16, bpp, opt); \
+    init_ipred_func(type, enum, 32, bpp, opt)
+
+#define init_ipred_funcs(type, enum, bpp, opt) \
+    init_ipred_func(type, enum,  4, bpp, opt); \
+    init_8_16_32_ipred_funcs(type, enum, bpp, opt)
+
+void ff_vp9dsp_init_10bpp_x86(VP9DSPContext *dsp, int bitexact);
+void ff_vp9dsp_init_12bpp_x86(VP9DSPContext *dsp, int bitexact);
+void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp);
+
+#endif /* AVCODEC_X86_VP9DSP_INIT_H */
diff --git a/media/ffvpx/libavcodec/x86/vp9dsp_init_10bpp.c b/media/ffvpx/libavcodec/x86/vp9dsp_init_10bpp.c
new file mode 100644
index 0000000000..2694c06cb2
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9dsp_init_10bpp.c
@@ -0,0 +1,25 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BPC 10
+#define INIT_FUNC ff_vp9dsp_init_10bpp_x86
+#include "vp9dsp_init_16bpp_template.c"
diff --git a/media/ffvpx/libavcodec/x86/vp9dsp_init_12bpp.c b/media/ffvpx/libavcodec/x86/vp9dsp_init_12bpp.c
new file mode 100644
index 0000000000..5da3bc1840
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9dsp_init_12bpp.c
@@ -0,0 +1,25 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BPC 12
+#define INIT_FUNC ff_vp9dsp_init_12bpp_x86
+#include "vp9dsp_init_16bpp_template.c"
diff --git a/media/ffvpx/libavcodec/x86/vp9dsp_init_16bpp.c b/media/ffvpx/libavcodec/x86/vp9dsp_init_16bpp.c
new file mode 100644
index 0000000000..e5afea1512
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9dsp_init_16bpp.c
@@ -0,0 +1,152 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "libavcodec/x86/vp9dsp_init.h"
+
+#if HAVE_X86ASM
+
+decl_fpel_func(put,   8,    , mmx);
+decl_fpel_func(avg,   8, _16, mmxext);
+decl_fpel_func(put,  16,    , sse);
+decl_fpel_func(put,  32,    , sse);
+decl_fpel_func(put,  64,    , sse);
+decl_fpel_func(put, 128,    , sse);
+decl_fpel_func(avg,  16, _16, sse2);
+decl_fpel_func(avg,  32, _16, sse2);
+decl_fpel_func(avg,  64, _16, sse2);
+decl_fpel_func(avg, 128, _16, sse2);
+decl_fpel_func(put,  32,    , avx);
+decl_fpel_func(put,  64,    , avx);
+decl_fpel_func(put, 128,    , avx);
+decl_fpel_func(avg,  32, _16, avx2);
+decl_fpel_func(avg,  64, _16, avx2);
+decl_fpel_func(avg, 128, _16, avx2);
+
+decl_ipred_fns(v,       16, mmx,    sse);
+decl_ipred_fns(h,       16, mmxext, sse2);
+decl_ipred_fns(dc,      16, mmxext, sse2);
+decl_ipred_fns(dc_top,  16, mmxext, sse2);
+decl_ipred_fns(dc_left, 16, mmxext, sse2);
+decl_ipred_fn(dl,       16,     16, avx2);
+decl_ipred_fn(dl,       32,     16, avx2);
+decl_ipred_fn(dr,       16,     16, avx2);
+decl_ipred_fn(dr,       32,     16, avx2);
+decl_ipred_fn(vl,       16,     16, avx2);
+decl_ipred_fn(hd,       16,     16, avx2);
+
+#define decl_ipred_dir_funcs(type) \
+decl_ipred_fns(type, 16, sse2,  sse2); \
+decl_ipred_fns(type, 16, ssse3, ssse3); \
+decl_ipred_fns(type, 16, avx,   avx)
+
+decl_ipred_dir_funcs(dl);
+decl_ipred_dir_funcs(dr);
+decl_ipred_dir_funcs(vl);
+decl_ipred_dir_funcs(vr);
+decl_ipred_dir_funcs(hu);
+decl_ipred_dir_funcs(hd);
+#endif /* HAVE_X86ASM */
+
+av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
+{
+#if HAVE_X86ASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        init_fpel_func(4, 0,   8, put, , mmx);
+        init_ipred_func(v, VERT, 4, 16, mmx);
+    }
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        init_fpel_func(4, 1,   8, avg, _16, mmxext);
+        init_ipred_func(h, HOR, 4, 16, mmxext);
+        init_ipred_func(dc, DC, 4, 16, mmxext);
+        init_ipred_func(dc_top,  TOP_DC,  4, 16, mmxext);
+        init_ipred_func(dc_left, LEFT_DC, 4, 16, mmxext);
+    }
+
+    if (EXTERNAL_SSE(cpu_flags)) {
+        init_fpel_func(3, 0,  16, put, , sse);
+        init_fpel_func(2, 0,  32, put, , sse);
+        init_fpel_func(1, 0,  64, put, , sse);
+        init_fpel_func(0, 0, 128, put, , sse);
+        init_8_16_32_ipred_funcs(v, VERT, 16, sse);
+    }
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        init_fpel_func(3, 1,  16, avg, _16, sse2);
+        init_fpel_func(2, 1,  32, avg, _16, sse2);
+        init_fpel_func(1, 1,  64, avg, _16, sse2);
+        init_fpel_func(0, 1, 128, avg, _16, sse2);
+        init_8_16_32_ipred_funcs(h, HOR, 16, sse2);
+        init_8_16_32_ipred_funcs(dc, DC, 16, sse2);
+        init_8_16_32_ipred_funcs(dc_top,  TOP_DC,  16, sse2);
+        init_8_16_32_ipred_funcs(dc_left, LEFT_DC, 16, sse2);
+        init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, sse2);
+        init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, sse2);
+        init_ipred_funcs(vl, VERT_LEFT, 16, sse2);
+        init_ipred_funcs(vr, VERT_RIGHT, 16, sse2);
+        init_ipred_funcs(hu, HOR_UP, 16, sse2);
+        init_ipred_funcs(hd, HOR_DOWN, 16, sse2);
+    }
+
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, ssse3);
+        init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, ssse3);
+        init_ipred_funcs(vl, VERT_LEFT, 16, ssse3);
+        init_ipred_funcs(vr, VERT_RIGHT, 16, ssse3);
+        init_ipred_funcs(hu, HOR_UP, 16, ssse3);
+        init_ipred_funcs(hd, HOR_DOWN, 16, ssse3);
+    }
+
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
+        init_fpel_func(2, 0,  32, put, , avx);
+        init_fpel_func(1, 0,  64, put, , avx);
+        init_fpel_func(0, 0, 128, put, , avx);
+        init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, avx);
+        init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, avx);
+        init_ipred_funcs(vl, VERT_LEFT, 16, avx);
+        init_ipred_funcs(vr, VERT_RIGHT, 16, avx);
+        init_ipred_funcs(hu, HOR_UP, 16, avx);
+        init_ipred_funcs(hd, HOR_DOWN, 16, avx);
+    }
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        init_fpel_func(2, 1,  32, avg, _16, avx2);
+        init_fpel_func(1, 1,  64, avg, _16, avx2);
+        init_fpel_func(0, 1, 128, avg, _16, avx2);
+        init_ipred_func(dl, DIAG_DOWN_LEFT, 16, 16, avx2);
+        init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2);
+        init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2);
+        init_ipred_func(vl, VERT_LEFT, 16, 16, avx2);
+        init_ipred_func(hd, HOR_DOWN, 16, 16, avx2);
+#if ARCH_X86_64
+        init_ipred_func(dr, DIAG_DOWN_RIGHT, 32, 16, avx2);
+#endif
+    }
+
+#endif /* HAVE_X86ASM */
+}
diff --git a/media/ffvpx/libavcodec/x86/vp9dsp_init_16bpp_template.c b/media/ffvpx/libavcodec/x86/vp9dsp_init_16bpp_template.c
new file mode 100644
index 0000000000..f93ea2468e
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9dsp_init_16bpp_template.c
@@ -0,0 +1,239 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "libavcodec/x86/vp9dsp_init.h"
+
+#if HAVE_X86ASM
+
+extern const int16_t ff_filters_16bpp[3][15][4][16];
+
+decl_mc_funcs(4, sse2, int16_t, 16, BPC);
+decl_mc_funcs(8, sse2, int16_t, 16, BPC);
+decl_mc_funcs(16, avx2, int16_t, 16, BPC);
+
+mc_rep_funcs(16,  8, 16, sse2, int16_t, 16, BPC)
+mc_rep_funcs(32, 16, 32, sse2, int16_t, 16, BPC)
+mc_rep_funcs(64, 32, 64, sse2, int16_t, 16, BPC)
+#if HAVE_AVX2_EXTERNAL
+mc_rep_funcs(32, 16, 32, avx2, int16_t, 16, BPC)
+mc_rep_funcs(64, 32, 64, avx2, int16_t, 16, BPC)
+#endif
+
+filters_8tap_2d_fn2(put, 16, BPC, 2, sse2, sse2, 16bpp)
+filters_8tap_2d_fn2(avg, 16, BPC, 2, sse2, sse2, 16bpp)
+#if HAVE_AVX2_EXTERNAL
+filters_8tap_2d_fn(put, 64, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(avg, 64, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(put, 32, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(avg, 32, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(put, 16, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(avg, 16, 32, BPC, 2, avx2, 16bpp)
+#endif
+
+filters_8tap_1d_fn3(put, BPC, sse2, sse2, 16bpp)
+filters_8tap_1d_fn3(avg, BPC, sse2, sse2, 16bpp)
+#if HAVE_AVX2_EXTERNAL
+filters_8tap_1d_fn2(put, 64, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(avg, 64, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(put, 32, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(avg, 32, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(put, 16, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(avg, 16, BPC, avx2, 16bpp)
+#endif
+
+#define decl_lpf_func(dir, wd, bpp, opt) \
+void ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                     int E, int I, int H)
+
+#define decl_lpf_funcs(dir, wd, bpp) \
+decl_lpf_func(dir, wd, bpp, sse2); \
+decl_lpf_func(dir, wd, bpp, ssse3); \
+decl_lpf_func(dir, wd, bpp, avx)
+
+#define decl_lpf_funcs_wd(dir) \
+decl_lpf_funcs(dir,  4, BPC); \
+decl_lpf_funcs(dir,  8, BPC); \
+decl_lpf_funcs(dir, 16, BPC)
+
+decl_lpf_funcs_wd(h);
+decl_lpf_funcs_wd(v);
+
+#define lpf_16_wrapper(dir, off, bpp, opt) \
+static void loop_filter_##dir##_16_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                 int E, int I, int H) \
+{ \
+    ff_vp9_loop_filter_##dir##_16_##bpp##_##opt(dst,       stride, E, I, H); \
+    ff_vp9_loop_filter_##dir##_16_##bpp##_##opt(dst + off, stride, E, I, H); \
+}
+
+#define lpf_16_wrappers(bpp, opt) \
+lpf_16_wrapper(h, 8 * stride, bpp, opt) \
+lpf_16_wrapper(v, 16,         bpp, opt)
+
+lpf_16_wrappers(BPC, sse2)
+lpf_16_wrappers(BPC, ssse3)
+lpf_16_wrappers(BPC, avx)
+
+#define lpf_mix2_wrapper(dir, off, wd1, wd2, bpp, opt) \
+static void loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                           int E, int I, int H) \
+{ \
+    ff_vp9_loop_filter_##dir##_##wd1##_##bpp##_##opt(dst,       stride, \
+                                                     E & 0xff, I & 0xff, H & 0xff); \
+    ff_vp9_loop_filter_##dir##_##wd2##_##bpp##_##opt(dst + off, stride, \
+                                                     E >> 8,   I >> 8,   H >> 8); \
+}
+
+#define lpf_mix2_wrappers(wd1, wd2, bpp, opt) \
+lpf_mix2_wrapper(h, 8 * stride, wd1, wd2, bpp, opt) \
+lpf_mix2_wrapper(v, 16,         wd1, wd2, bpp, opt)
+
+#define lpf_mix2_wrappers_set(bpp, opt) \
+lpf_mix2_wrappers(4, 4, bpp, opt) \
+lpf_mix2_wrappers(4, 8, bpp, opt) \
+lpf_mix2_wrappers(8, 4, bpp, opt) \
+lpf_mix2_wrappers(8, 8, bpp, opt) \
+
+lpf_mix2_wrappers_set(BPC, sse2)
+lpf_mix2_wrappers_set(BPC, ssse3)
+lpf_mix2_wrappers_set(BPC, avx)
+
+decl_ipred_fns(tm, BPC, mmxext, sse2);
+
+decl_itxfm_func(iwht, iwht, 4, BPC, mmxext);
+#if BPC == 10
+decl_itxfm_func(idct,  idct,  4, BPC, mmxext);
+decl_itxfm_funcs(4, BPC, ssse3);
+#else
+decl_itxfm_func(idct,  idct,  4, BPC, sse2);
+#endif
+decl_itxfm_func(idct,  iadst, 4, BPC, sse2);
+decl_itxfm_func(iadst, idct,  4, BPC, sse2);
+decl_itxfm_func(iadst, iadst, 4, BPC, sse2);
+decl_itxfm_funcs(8, BPC, sse2);
+decl_itxfm_funcs(16, BPC, sse2);
+decl_itxfm_func(idct,  idct, 32, BPC, sse2);
+#endif /* HAVE_X86ASM */
+
+av_cold void INIT_FUNC(VP9DSPContext *dsp, int bitexact)
+{
+#if HAVE_X86ASM
+    int cpu_flags = av_get_cpu_flags();
+
+#define init_lpf_8_func(idx1, idx2, dir, wd, bpp, opt) \
+    dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt
+#define init_lpf_16_func(idx, dir, bpp, opt) \
+    dsp->loop_filter_16[idx] = loop_filter_##dir##_16_##bpp##_##opt
+#define init_lpf_mix2_func(idx1, idx2, idx3, dir, wd1, wd2, bpp, opt) \
+    dsp->loop_filter_mix2[idx1][idx2][idx3] = loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt
+
+#define init_lpf_funcs(bpp, opt) \
+    init_lpf_8_func(0, 0, h,  4, bpp, opt); \
+    init_lpf_8_func(0, 1, v,  4, bpp, opt); \
+    init_lpf_8_func(1, 0, h,  8, bpp, opt); \
+    init_lpf_8_func(1, 1, v,  8, bpp, opt); \
+    init_lpf_8_func(2, 0, h, 16, bpp, opt); \
+    init_lpf_8_func(2, 1, v, 16, bpp, opt); \
+    init_lpf_16_func(0, h, bpp, opt); \
+    init_lpf_16_func(1, v, bpp, opt); \
+    init_lpf_mix2_func(0, 0, 0, h, 4, 4, bpp, opt); \
+    init_lpf_mix2_func(0, 1, 0, h, 4, 8, bpp, opt); \
+    init_lpf_mix2_func(1, 0, 0, h, 8, 4, bpp, opt); \
+    init_lpf_mix2_func(1, 1, 0, h, 8, 8, bpp, opt); \
+    init_lpf_mix2_func(0, 0, 1, v, 4, 4, bpp, opt); \
+    init_lpf_mix2_func(0, 1, 1, v, 4, 8, bpp, opt); \
+    init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \
+    init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt)
+
+#define init_itx_func(idxa, idxb, typea, typeb, size, bpp, opt) \
+    dsp->itxfm_add[idxa][idxb] = \
+        cat(ff_vp9_##typea##_##typeb##_##size##x##size##_add_, bpp, _##opt);
+#define init_itx_func_one(idx, typea, typeb, size, bpp, opt) \
+    init_itx_func(idx, DCT_DCT,   typea, typeb, size, bpp, opt); \
+    init_itx_func(idx, ADST_DCT,  typea, typeb, size, bpp, opt); \
+    init_itx_func(idx, DCT_ADST,  typea, typeb, size, bpp, opt); \
+    init_itx_func(idx, ADST_ADST, typea, typeb, size, bpp, opt)
+#define init_itx_funcs(idx, size, bpp, opt) \
+    init_itx_func(idx, DCT_DCT,   idct,  idct,  size, bpp, opt); \
+    init_itx_func(idx, ADST_DCT,  idct,  iadst, size, bpp, opt); \
+    init_itx_func(idx, DCT_ADST,  iadst, idct,  size, bpp, opt); \
+    init_itx_func(idx, ADST_ADST, iadst, iadst, size, bpp, opt); \
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        init_ipred_func(tm, TM_VP8, 4, BPC, mmxext);
+        if (!bitexact) {
+            init_itx_func_one(4 /* lossless */, iwht, iwht, 4, BPC, mmxext);
+#if BPC == 10
+            init_itx_func(TX_4X4, DCT_DCT, idct, idct, 4, 10, mmxext);
+#endif
+        }
+    }
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        init_subpel3(0, put, BPC, sse2);
+        init_subpel3(1, avg, BPC, sse2);
+        init_lpf_funcs(BPC, sse2);
+        init_8_16_32_ipred_funcs(tm, TM_VP8, BPC, sse2);
+#if BPC == 10
+        if (!bitexact) {
+            init_itx_func(TX_4X4, ADST_DCT,  idct,  iadst, 4, 10, sse2);
+            init_itx_func(TX_4X4, DCT_ADST,  iadst, idct,  4, 10, sse2);
+            init_itx_func(TX_4X4, ADST_ADST, iadst, iadst, 4, 10, sse2);
+        }
+#else
+        init_itx_funcs(TX_4X4, 4, 12, sse2);
+#endif
+        init_itx_funcs(TX_8X8, 8, BPC, sse2);
+        init_itx_funcs(TX_16X16, 16, BPC, sse2);
+        init_itx_func_one(TX_32X32, idct, idct, 32, BPC, sse2);
+    }
+
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        init_lpf_funcs(BPC, ssse3);
+#if BPC == 10
+        if (!bitexact) {
+            init_itx_funcs(TX_4X4, 4, BPC, ssse3);
+        }
+#endif
+    }
+
+    if (EXTERNAL_AVX(cpu_flags)) {
+        init_lpf_funcs(BPC, avx);
+    }
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+#if HAVE_AVX2_EXTERNAL
+        init_subpel3_32_64(0,  put, BPC, avx2);
+        init_subpel3_32_64(1,  avg, BPC, avx2);
+        init_subpel2(2, 0, 16, put, BPC, avx2);
+        init_subpel2(2, 1, 16, avg, BPC, avx2);
+#endif
+    }
+
+#endif /* HAVE_X86ASM */
+
+    ff_vp9dsp_init_16bpp_x86(dsp);
+}
diff --git a/media/ffvpx/libavcodec/x86/vp9intrapred.asm b/media/ffvpx/libavcodec/x86/vp9intrapred.asm
new file mode 100644
index 0000000000..31f7d449fd
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9intrapred.asm
@@ -0,0 +1,2044 @@
+;******************************************************************************
+;* VP9 Intra prediction SIMD optimizations
+;*
+;* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* Parts based on:
+;* H.264 intra prediction asm optimizations
+;* Copyright (c) 2010 Fiona Glaser
+;* Copyright (c) 2010 Holger Lubitz
+;* Copyright (c) 2010 Loren Merritt
+;* Copyright (c) 2010 Ronald S. Bultje
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+pw_m256: times 16 dw -256
+pw_m255: times 16 dw -255
+pw_4096: times 8 dw 4096
+
+pb_4x3_4x2_4x1_4x0: times 4 db 3
+                    times 4 db 2
+                    times 4 db 1
+                    times 4 db 0
+pb_8x1_8x0:   times 8 db 1
+              times 8 db 0
+pb_8x3_8x2:   times 8 db 3
+              times 8 db 2
+pb_0to5_2x7:  db 0, 1, 2, 3, 4, 5, 7, 7
+              times 8 db -1
+pb_0to6_9x7:  db 0, 1, 2, 3, 4, 5, 6
+              times 9 db 7
+pb_1to6_10x7: db 1, 2, 3, 4, 5, 6
+              times 10 db 7
+pb_2to6_3x7:
+pb_2to6_11x7: db 2, 3, 4, 5, 6
+              times 11 db 7
+pb_1toE_2xF:  db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15
+pb_2toE_3xF:  db 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15
+pb_13456_3xm1: db 1, 3, 4, 5, 6
+               times 3 db -1
+pb_6012_4xm1: db 6, 0, 1, 2
+              times 4 db -1
+pb_6xm1_246_8toE: times 6 db -1
+                  db 2, 4, 6, 8, 9, 10, 11, 12, 13, 14
+pb_6xm1_BDF_0to6: times 6 db -1
+                  db 11, 13, 15, 0, 1, 2, 3, 4, 5, 6
+pb_02468ACE_13579BDF: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
+
+pb_15x0_1xm1: times 15 db 0
+              db -1
+pb_0to2_5x3: db 0, 1, 2
+             times 5 db 3
+pb_6xm1_2x0: times 6 db -1
+             times 2 db 0
+pb_6x0_2xm1: times 6 db 0
+             times 2 db -1
+
+cextern pb_1
+cextern pb_2
+cextern pb_3
+cextern pb_15
+cextern pw_2
+cextern pw_4
+cextern pw_8
+cextern pw_16
+cextern pw_32
+cextern pw_255
+cextern pw_512
+cextern pw_1024
+cextern pw_2048
+cextern pw_8192
+
+SECTION .text
+
+; dc_NxN(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
+
+%macro DC_4to8_FUNCS 0
+cglobal vp9_ipred_dc_4x4, 4, 4, 0, dst, stride, l, a
+    movd                    m0, [lq]
+    punpckldq               m0, [aq]
+    pxor                    m1, m1
+    psadbw                  m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_4096]
+    pshufb                  m0, m1
+%else
+    paddw                   m0, [pw_4]
+    psraw                   m0, 3
+    punpcklbw               m0, m0
+    pshufw                  m0, m0, q0000
+%endif
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m0
+    lea                   dstq, [dstq+strideq*2]
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m0
+    RET
+
+cglobal vp9_ipred_dc_8x8, 4, 4, 0, dst, stride, l, a
+    movq                    m0, [lq]
+    movq                    m1, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    psadbw                  m1, m2
+    paddw                   m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_2048]
+    pshufb                  m0, m2
+%else
+    paddw                   m0, [pw_8]
+    psraw                   m0, 4
+    punpcklbw               m0, m0
+    pshufw                  m0, m0, q0000
+%endif
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    RET
+%endmacro
+
+INIT_MMX mmxext
+DC_4to8_FUNCS
+INIT_MMX ssse3
+DC_4to8_FUNCS
+
+%macro DC_16to32_FUNCS 0
+cglobal vp9_ipred_dc_16x16, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m1, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    psadbw                  m1, m2
+    paddw                   m0, m1
+    movhlps                 m1, m0
+    paddw                   m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_1024]
+    pshufb                  m0, m2
+%else
+    paddw                   m0, [pw_16]
+    psraw                   m0, 5
+    punpcklbw               m0, m0
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+%endif
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dc_32x32, 4, 4, 5, dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m1, [lq+16]
+    mova                    m2, [aq]
+    mova                    m3, [aq+16]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m4, m4
+    psadbw                  m0, m4
+    psadbw                  m1, m4
+    psadbw                  m2, m4
+    psadbw                  m3, m4
+    paddw                   m0, m1
+    paddw                   m2, m3
+    paddw                   m0, m2
+    movhlps                 m1, m0
+    paddw                   m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_512]
+    pshufb                  m0, m4
+%else
+    paddw                   m0, [pw_32]
+    psraw                   m0, 6
+    punpcklbw               m0, m0
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+%endif
+    mov                   cntd, 8
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m0
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DC_16to32_FUNCS
+INIT_XMM ssse3
+DC_16to32_FUNCS
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_ipred_dc_32x32, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m1, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    psadbw                  m1, m2
+    paddw                   m0, m1
+    vextracti128           xm1, m0, 1
+    paddw                  xm0, xm1
+    movhlps                xm1, xm0
+    paddw                  xm0, xm1
+    pmulhrsw               xm0, [pw_512]
+    vpbroadcastb            m0, xm0
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+%endif
+
+; dc_top/left_NxN(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
+
+%macro DC_1D_4to8_FUNCS 2 ; dir (top or left), arg (a or l)
+cglobal vp9_ipred_dc_%1_4x4, 4, 4, 0, dst, stride, l, a
+    movd                    m0, [%2q]
+    pxor                    m1, m1
+    psadbw                  m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_8192]
+    pshufb                  m0, m1
+%else
+    paddw                   m0, [pw_2]
+    psraw                   m0, 2
+    punpcklbw               m0, m0
+    pshufw                  m0, m0, q0000
+%endif
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m0
+    lea                   dstq, [dstq+strideq*2]
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m0
+    RET
+
+cglobal vp9_ipred_dc_%1_8x8, 4, 4, 0, dst, stride, l, a
+    movq                    m0, [%2q]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pxor                    m1, m1
+    psadbw                  m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_4096]
+    pshufb                  m0, m1
+%else
+    paddw                   m0, [pw_4]
+    psraw                   m0, 3
+    punpcklbw               m0, m0
+    pshufw                  m0, m0, q0000
+%endif
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    RET
+%endmacro
+
+INIT_MMX mmxext
+DC_1D_4to8_FUNCS top,  a
+DC_1D_4to8_FUNCS left, l
+INIT_MMX ssse3
+DC_1D_4to8_FUNCS top,  a
+DC_1D_4to8_FUNCS left, l
+
+%macro DC_1D_16to32_FUNCS 2; dir (top or left), arg (a or l)
+cglobal vp9_ipred_dc_%1_16x16, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [%2q]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    movhlps                 m1, m0
+    paddw                   m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_2048]
+    pshufb                  m0, m2
+%else
+    paddw                   m0, [pw_8]
+    psraw                   m0, 4
+    punpcklbw               m0, m0
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+%endif
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dc_%1_32x32, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [%2q]
+    mova                    m1, [%2q+16]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    psadbw                  m1, m2
+    paddw                   m0, m1
+    movhlps                 m1, m0
+    paddw                   m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_1024]
+    pshufb                  m0, m2
+%else
+    paddw                   m0, [pw_16]
+    psraw                   m0, 5
+    punpcklbw               m0, m0
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+%endif
+    mov                   cntd, 8
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m0
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DC_1D_16to32_FUNCS top,  a
+DC_1D_16to32_FUNCS left, l
+INIT_XMM ssse3
+DC_1D_16to32_FUNCS top,  a
+DC_1D_16to32_FUNCS left, l
+
+%macro DC_1D_AVX2_FUNCS 2 ; dir (top or left), arg (a or l)
+%if HAVE_AVX2_EXTERNAL
+cglobal vp9_ipred_dc_%1_32x32, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [%2q]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    vextracti128           xm1, m0, 1
+    paddw                  xm0, xm1
+    movhlps                xm1, xm0
+    paddw                  xm0, xm1
+    pmulhrsw               xm0, [pw_1024]
+    vpbroadcastb            m0, xm0
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+%endif
+%endmacro
+
+INIT_YMM avx2
+DC_1D_AVX2_FUNCS top,  a
+DC_1D_AVX2_FUNCS left, l
+
+; v
+
+INIT_MMX mmx
+cglobal vp9_ipred_v_8x8, 4, 4, 0, dst, stride, l, a
+    movq                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse
+cglobal vp9_ipred_v_16x16, 4, 4, 1, dst, stride, l, a
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM sse
+cglobal vp9_ipred_v_32x32, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [aq]
+    mova                    m1, [aq+16]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 8
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m1
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m1
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m1
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_YMM avx
+cglobal vp9_ipred_v_32x32, 4, 4, 1, dst, stride, l, a
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+; h
+
+%macro H_XMM_FUNCS 2
+%if notcpuflag(avx)
+cglobal vp9_ipred_h_4x4, 3, 4, 1, dst, stride, l, stride3
+    movd                    m0, [lq]
+%if cpuflag(ssse3)
+    pshufb                  m0, [pb_4x3_4x2_4x1_4x0]
+%else
+    punpcklbw               m0, m0
+    pshuflw                 m0, m0, q0123
+    punpcklwd               m0, m0
+%endif
+    lea               stride3q, [strideq*3]
+    movd      [dstq+strideq*0], m0
+    psrldq                  m0, 4
+    movd      [dstq+strideq*1], m0
+    psrldq                  m0, 4
+    movd      [dstq+strideq*2], m0
+    psrldq                  m0, 4
+    movd      [dstq+stride3q ], m0
+    RET
+%endif
+
+cglobal vp9_ipred_h_8x8, 3, 5, %1, dst, stride, l, stride3, cnt
+%if cpuflag(ssse3)
+    mova                    m2, [pb_8x1_8x0]
+    mova                    m3, [pb_8x3_8x2]
+%endif
+    lea               stride3q, [strideq*3]
+    mov                   cntq, 1
+.loop:
+    movd                    m0, [lq+cntq*4]
+%if cpuflag(ssse3)
+    pshufb                  m1, m0, m3
+    pshufb                  m0, m2
+%else
+    punpcklbw               m0, m0
+    punpcklwd               m0, m0
+    pshufd                  m1, m0, q2233
+    pshufd                  m0, m0, q0011
+%endif
+    movq      [dstq+strideq*0], m1
+    movhps    [dstq+strideq*1], m1
+    movq      [dstq+strideq*2], m0
+    movhps    [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntq
+    jge .loop
+    RET
+
+cglobal vp9_ipred_h_16x16, 3, 5, %2, dst, stride, l, stride3, cnt
+%if cpuflag(ssse3)
+    mova                    m5, [pb_1]
+    mova                    m6, [pb_2]
+    mova                    m7, [pb_3]
+    pxor                    m4, m4
+%endif
+    lea               stride3q, [strideq*3]
+    mov                   cntq, 3
+.loop:
+    movd                    m3, [lq+cntq*4]
+%if cpuflag(ssse3)
+    pshufb                  m0, m3, m7
+    pshufb                  m1, m3, m6
+%else
+    punpcklbw               m3, m3
+    punpcklwd               m3, m3
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+%endif
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+%if cpuflag(ssse3)
+    pshufb                  m2, m3, m5
+    pshufb                  m3, m4
+%else
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+%endif
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntq
+    jge .loop
+    RET
+
+cglobal vp9_ipred_h_32x32, 3, 5, %2, dst, stride, l, stride3, cnt
+%if cpuflag(ssse3)
+    mova                    m5, [pb_1]
+    mova                    m6, [pb_2]
+    mova                    m7, [pb_3]
+    pxor                    m4, m4
+%endif
+    lea               stride3q, [strideq*3]
+    mov                   cntq, 7
+.loop:
+    movd                    m3, [lq+cntq*4]
+%if cpuflag(ssse3)
+    pshufb                  m0, m3, m7
+    pshufb                  m1, m3, m6
+%else
+    punpcklbw               m3, m3
+    punpcklwd               m3, m3
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+%endif
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m1
+    mova   [dstq+strideq*1+16], m1
+%if cpuflag(ssse3)
+    pshufb                  m2, m3, m5
+    pshufb                  m3, m4
+%else
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+%endif
+    mova   [dstq+strideq*2+ 0], m2
+    mova   [dstq+strideq*2+16], m2
+    mova   [dstq+stride3q + 0], m3
+    mova   [dstq+stride3q +16], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntq
+    jge .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+H_XMM_FUNCS 2, 4
+INIT_XMM ssse3
+H_XMM_FUNCS 4, 8
+INIT_XMM avx
+H_XMM_FUNCS 4, 8
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_ipred_h_32x32, 3, 5, 8, dst, stride, l, stride3, cnt
+    mova                    m5, [pb_1]
+    mova                    m6, [pb_2]
+    mova                    m7, [pb_3]
+    pxor                    m4, m4
+    lea               stride3q, [strideq*3]
+    mov                   cntq, 7
+.loop:
+    movd                   xm3, [lq+cntq*4]
+    vinserti128             m3, m3, xm3, 1
+    pshufb                  m0, m3, m7
+    pshufb                  m1, m3, m6
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    pshufb                  m2, m3, m5
+    pshufb                  m3, m4
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntq
+    jge .loop
+    RET
+%endif
+
+; tm
+
+%macro TM_MMX_FUNCS 0
+cglobal vp9_ipred_tm_4x4, 4, 4, 0, dst, stride, l, a
+    pxor                    m1, m1
+    movd                    m0, [aq]
+    pinsrw                  m2, [aq-1], 0
+    punpcklbw               m0, m1
+    DEFINE_ARGS dst, stride, l, cnt
+%if cpuflag(ssse3)
+    mova                    m3, [pw_m256]
+    mova                    m1, [pw_m255]
+    pshufb                  m2, m3
+%else
+    punpcklbw               m2, m1
+    pshufw                  m2, m2, q0000
+%endif
+    psubw                   m0, m2
+    mov                   cntq, 1
+.loop:
+    pinsrw                  m2, [lq+cntq*2], 0
+%if cpuflag(ssse3)
+    pshufb                  m4, m2, m1
+    pshufb                  m2, m3
+%else
+    punpcklbw               m2, m1
+    pshufw                  m4, m2, q1111
+    pshufw                  m2, m2, q0000
+%endif
+    paddw                   m4, m0
+    paddw                   m2, m0
+    packuswb                m4, m4
+    packuswb                m2, m2
+    movd      [dstq+strideq*0], m4
+    movd      [dstq+strideq*1], m2
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+%endmacro
+
+INIT_MMX mmxext
+TM_MMX_FUNCS
+INIT_MMX ssse3
+TM_MMX_FUNCS
+
+%macro TM_XMM_FUNCS 0
+cglobal vp9_ipred_tm_8x8, 4, 4, 5, dst, stride, l, a
+    pxor                    m1, m1
+    movh                    m0, [aq]
+    pinsrw                  m2, [aq-1], 0
+    punpcklbw               m0, m1
+    DEFINE_ARGS dst, stride, l, cnt
+%if cpuflag(ssse3)
+    mova                    m3, [pw_m256]
+    mova                    m1, [pw_m255]
+    pshufb                  m2, m3
+%else
+    punpcklbw               m2, m1
+    punpcklwd               m2, m2
+    pshufd                  m2, m2, q0000
+%endif
+    psubw                   m0, m2
+    mov                   cntq, 3
+.loop:
+    pinsrw                  m2, [lq+cntq*2], 0
+%if cpuflag(ssse3)
+    pshufb                  m4, m2, m1
+    pshufb                  m2, m3
+%else
+    punpcklbw               m2, m1
+    punpcklwd               m2, m2
+    pshufd                  m4, m2, q1111
+    pshufd                  m2, m2, q0000
+%endif
+    paddw                   m4, m0
+    paddw                   m2, m0
+    packuswb                m4, m2
+    movh      [dstq+strideq*0], m4
+    movhps    [dstq+strideq*1], m4
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+
+cglobal vp9_ipred_tm_16x16, 4, 4, 8, dst, stride, l, a
+    pxor                    m3, m3
+    mova                    m0, [aq]
+    pinsrw                  m2, [aq-1], 0
+    punpckhbw               m1, m0, m3
+    punpcklbw               m0, m3
+    DEFINE_ARGS dst, stride, l, cnt
+%if cpuflag(ssse3)
+    mova                    m4, [pw_m256]
+    mova                    m3, [pw_m255]
+    pshufb                  m2, m4
+%else
+    punpcklbw               m2, m3
+    punpcklwd               m2, m2
+    pshufd                  m2, m2, q0000
+%endif
+    psubw                   m1, m2
+    psubw                   m0, m2
+    mov                   cntq, 7
+.loop:
+    pinsrw                  m7, [lq+cntq*2], 0
+%if cpuflag(ssse3)
+    pshufb                  m5, m7, m3
+    pshufb                  m7, m4
+%else
+    punpcklbw               m7, m3
+    punpcklwd               m7, m7
+    pshufd                  m5, m7, q1111
+    pshufd                  m7, m7, q0000
+%endif
+    paddw                   m2, m5, m0
+    paddw                   m5, m1
+    paddw                   m6, m7, m0
+    paddw                   m7, m1
+    packuswb                m2, m5
+    packuswb                m6, m7
+    mova      [dstq+strideq*0], m2
+    mova      [dstq+strideq*1], m6
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+
+%if ARCH_X86_64
+%define mem 0
+%else
+%define mem 64
+%endif
+cglobal vp9_ipred_tm_32x32, 4, 4, 14, mem, dst, stride, l, a
+    pxor                    m5, m5
+    pinsrw                  m4, [aq-1], 0
+    mova                    m0, [aq]
+    mova                    m2, [aq+16]
+    DEFINE_ARGS dst, stride, l, cnt
+%if cpuflag(ssse3)
+%if ARCH_X86_64
+    mova                   m12, [pw_m256]
+    mova                   m13, [pw_m255]
+%define pw_m256_reg m12
+%define pw_m255_reg m13
+%else
+%define pw_m256_reg [pw_m256]
+%define pw_m255_reg [pw_m255]
+%endif
+    pshufb                  m4, pw_m256_reg
+%else
+    punpcklbw               m4, m5
+    punpcklwd               m4, m4
+    pshufd                  m4, m4, q0000
+%endif
+    punpckhbw               m1, m0,  m5
+    punpckhbw               m3, m2,  m5
+    punpcklbw               m0, m5
+    punpcklbw               m2, m5
+    psubw                   m1, m4
+    psubw                   m0, m4
+    psubw                   m3, m4
+    psubw                   m2, m4
+%if ARCH_X86_64
+    SWAP                     0, 8
+    SWAP                     1, 9
+    SWAP                     2, 10
+    SWAP                     3, 11
+%else
+    mova            [rsp+0*16], m0
+    mova            [rsp+1*16], m1
+    mova            [rsp+2*16], m2
+    mova            [rsp+3*16], m3
+%endif
+    mov                   cntq, 15
+.loop:
+    pinsrw                  m3, [lq+cntq*2], 0
+%if cpuflag(ssse3)
+    pshufb                  m7, m3, pw_m255_reg
+    pshufb                  m3, pw_m256_reg
+%else
+    pxor                    m7, m7
+    punpcklbw               m3, m7
+    punpcklwd               m3, m3
+    pshufd                  m7, m3, q1111
+    pshufd                  m3, m3, q0000
+%endif
+%if ARCH_X86_64
+    paddw                   m4, m7, m8
+    paddw                   m5, m7, m9
+    paddw                   m6, m7, m10
+    paddw                   m7, m11
+    paddw                   m0, m3, m8
+    paddw                   m1, m3, m9
+    paddw                   m2, m3, m10
+    paddw                   m3, m11
+%else
+    paddw                   m4, m7, [rsp+0*16]
+    paddw                   m5, m7, [rsp+1*16]
+    paddw                   m6, m7, [rsp+2*16]
+    paddw                   m7, [rsp+3*16]
+    paddw                   m0, m3, [rsp+0*16]
+    paddw                   m1, m3, [rsp+1*16]
+    paddw                   m2, m3, [rsp+2*16]
+    paddw                   m3, [rsp+3*16]
+%endif
+    packuswb                m4, m5
+    packuswb                m6, m7
+    packuswb                m0, m1
+    packuswb                m2, m3
+    mova   [dstq+strideq*0+ 0], m4
+    mova   [dstq+strideq*0+16], m6
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m2
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+%undef pw_m256_reg
+%undef pw_m255_reg
+%undef mem
+%endmacro
+
+INIT_XMM sse2
+TM_XMM_FUNCS
+INIT_XMM ssse3
+TM_XMM_FUNCS
+INIT_XMM avx
+TM_XMM_FUNCS
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_ipred_tm_32x32, 4, 4, 8, dst, stride, l, a
+    pxor                    m3, m3
+    pinsrw                 xm2, [aq-1], 0
+    vinserti128             m2, m2, xm2, 1
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, l, cnt
+    mova                    m4, [pw_m256]
+    mova                    m5, [pw_m255]
+    pshufb                  m2, m4
+    punpckhbw               m1, m0, m3
+    punpcklbw               m0, m3
+    psubw                   m1, m2
+    psubw                   m0, m2
+    mov                   cntq, 15
+.loop:
+    pinsrw                 xm7, [lq+cntq*2], 0
+    vinserti128             m7, m7, xm7, 1
+    pshufb                  m3, m7, m5
+    pshufb                  m7, m4
+    paddw                   m2, m3, m0
+    paddw                   m3, m1
+    paddw                   m6, m7, m0
+    paddw                   m7, m1
+    packuswb                m2, m3
+    packuswb                m6, m7
+    mova      [dstq+strideq*0], m2
+    mova      [dstq+strideq*1], m6
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+%endif
+
+; dl
+
+%macro LOWPASS 4 ; left [dst], center, right, tmp
+    pxor                   m%4, m%1, m%3
+    pand                   m%4, [pb_1]
+    pavgb                  m%1, m%3
+    psubusb                m%1, m%4
+    pavgb                  m%1, m%2
+%endmacro
+
+%macro DL_MMX_FUNCS 0
+cglobal vp9_ipred_dl_4x4, 4, 4, 0, dst, stride, l, a
+    movq                    m1, [aq]
+%if cpuflag(ssse3)
+    pshufb                  m0, m1, [pb_0to5_2x7]
+    pshufb                  m2, m1, [pb_2to6_3x7]
+%else
+    punpckhbw               m3, m1, m1              ; 44556677
+    pand                    m0, m1, [pb_6xm1_2x0]   ; 012345__
+    pand                    m3, [pb_6x0_2xm1]       ; ______77
+    psrlq                   m2, m1, 16              ; 234567__
+    por                     m0, m3                  ; 01234577
+    por                     m2, m3                  ; 23456777
+%endif
+    psrlq                   m1, 8
+    LOWPASS                  0, 1, 2, 3
+
+    pshufw                  m1, m0, q3321
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*2], m1
+    psrlq                   m0, 8
+    psrlq                   m1, 8
+    add                   dstq, strideq
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*2], m1
+    RET
+%endmacro
+
+INIT_MMX mmxext
+DL_MMX_FUNCS
+INIT_MMX ssse3
+DL_MMX_FUNCS
+
+%macro DL_XMM_FUNCS 0
+cglobal vp9_ipred_dl_8x8, 4, 4, 4, dst, stride, stride5, a
+    movq                    m0, [aq]
+    lea               stride5q, [strideq*5]
+%if cpuflag(ssse3)
+    pshufb                  m1, m0, [pb_1to6_10x7]
+%else
+    punpcklbw               m1, m0, m0              ; 0011223344556677
+    punpckhwd               m1, m1                  ; 4x4,4x5,4x6,4x7
+%endif
+    shufps                  m0, m1, q3310
+%if notcpuflag(ssse3)
+    psrldq                  m1, m0, 1
+    shufps                  m1, m0, q3210
+%endif
+    psrldq                  m2, m1, 1
+    LOWPASS                  0, 1, 2, 3
+
+    pshufd                  m1, m0, q3321
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*4], m1
+    psrldq                  m0, 1
+    psrldq                  m1, 1
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+stride5q ], m1
+    lea                   dstq, [dstq+strideq*2]
+    psrldq                  m0, 1
+    psrldq                  m1, 1
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*4], m1
+    psrldq                  m0, 1
+    psrldq                  m1, 1
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+stride5q ], m1
+    RET
+
+cglobal vp9_ipred_dl_16x16, 4, 4, 6, dst, stride, l, a
+    mova                    m0, [aq]
+%if cpuflag(ssse3)
+    mova                    m5, [pb_1toE_2xF]
+    pshufb                  m1, m0, m5
+    pshufb                  m2, m1, m5
+    pshufb                  m4, m0, [pb_15]
+%else
+    pand                    m5, m0, [pb_15x0_1xm1]      ; _______________F
+    psrldq                  m1, m0, 1                   ; 123456789ABCDEF_
+    por                     m1, m5                      ; 123456789ABCDEFF
+    psrldq                  m2, m1, 1                   ; 23456789ABCDEFF_
+    por                     m2, m5                      ; 23456789ABCDEFFF
+    pshufhw                 m4, m1, q3333               ; xxxxxxxxFFFFFFFF
+%endif
+    LOWPASS                  0, 1, 2, 3
+    DEFINE_ARGS dst, stride, cnt, stride9
+    lea               stride9q, [strideq+strideq*8]
+    mov                   cntd, 4
+
+.loop:
+    movhlps                 m4, m0
+    mova      [dstq+strideq*0], m0
+%if cpuflag(ssse3)
+    pshufb                  m0, m5
+%else
+    psrldq                  m0, 1
+    por                     m0, m5
+%endif
+    mova      [dstq+strideq*8], m4
+    movhlps                 m4, m0
+    mova      [dstq+strideq*1], m0
+%if cpuflag(ssse3)
+    pshufb                  m0, m5
+%else
+    psrldq                  m0, 1
+    por                     m0, m5
+%endif
+    mova      [dstq+stride9q ], m4
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dl_32x32, 4, 5, 8, dst, stride, cnt, a, dst16
+    mova                    m0, [aq]
+    mova                    m1, [aq+16]
+    PALIGNR                 m2, m1, m0, 1, m4
+    PALIGNR                 m3, m1, m0, 2, m4
+    LOWPASS                  0, 2, 3, 4
+%if cpuflag(ssse3)
+    mova                    m5, [pb_1toE_2xF]
+    pshufb                  m2, m1, m5
+    pshufb                  m3, m2, m5
+    pshufb                  m6, m1, [pb_15]
+    mova                    m7, m6
+%else
+    pand                    m5, m1, [pb_15x0_1xm1]      ; _______________F
+    psrldq                  m2, m1, 1                   ; 123456789ABCDEF_
+    por                     m2, m5                      ; 123456789ABCDEFF
+    psrldq                  m3, m2, 1                   ; 23456789ABCDEFF_
+    por                     m3, m5                      ; 23456789ABCDEFFF
+    pshufhw                 m7, m2, q3333               ; xxxxxxxxFFFFFFFF
+    pshufd                  m6, m7, q3333
+%endif
+    LOWPASS                  1, 2, 3, 4
+    lea                 dst16q, [dstq  +strideq*8]
+    mov                   cntd, 8
+    lea                 dst16q, [dst16q+strideq*8]
+.loop:
+    movhlps                 m7, m1
+    mova [dstq  +strideq*0+ 0], m0
+    mova [dstq  +strideq*0+16], m1
+    movhps [dstq+strideq*8+ 0], m0
+    movq [dstq  +strideq*8+ 8], m1
+    mova [dstq  +strideq*8+16], m7
+    mova [dst16q+strideq*0+ 0], m1
+    mova [dst16q+strideq*0+16], m6
+    mova [dst16q+strideq*8+ 0], m7
+    mova [dst16q+strideq*8+16], m6
+%if cpuflag(avx)
+    vpalignr                m0, m1, m0, 1
+    pshufb                  m1, m5
+%elif cpuflag(ssse3)
+    palignr                 m2, m1, m0, 1
+    pshufb                  m1, m5
+    mova                    m0, m2
+%else
+    mova                    m4, m1
+    psrldq                  m0, 1
+    pslldq                  m4, 15
+    psrldq                  m1, 1
+    por                     m0, m4
+    por                     m1, m5
+%endif
+    add                   dstq, strideq
+    add                 dst16q, strideq
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DL_XMM_FUNCS
+INIT_XMM ssse3
+DL_XMM_FUNCS
+INIT_XMM avx
+DL_XMM_FUNCS
+
+; dr
+
+%macro DR_MMX_FUNCS 0
+cglobal vp9_ipred_dr_4x4, 4, 4, 0, dst, stride, l, a
+    movd                    m0, [lq]
+    punpckldq               m0, [aq-1]
+    movd                    m1, [aq+3]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    PALIGNR                 m1, m0, 1, m3
+    psrlq                   m2, m1, 8
+    LOWPASS                  0, 1, 2, 3
+
+    movd      [dstq+stride3q ], m0
+    psrlq                   m0, 8
+    movd      [dstq+strideq*2], m0
+    psrlq                   m0, 8
+    movd      [dstq+strideq*1], m0
+    psrlq                   m0, 8
+    movd      [dstq+strideq*0], m0
+    RET
+%endmacro
+
+INIT_MMX mmxext
+DR_MMX_FUNCS
+INIT_MMX ssse3
+DR_MMX_FUNCS
+
+%macro DR_XMM_FUNCS 0
+cglobal vp9_ipred_dr_8x8, 4, 4, 4, dst, stride, l, a
+    movq                    m1, [lq]
+    movhps                  m1, [aq-1]
+    movd                    m2, [aq+7]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pslldq                  m0, m1, 1
+    PALIGNR                 m2, m1, 1, m3
+    LOWPASS                  0, 1, 2, 3
+
+    movhps    [dstq+strideq*0], m0
+    pslldq                  m0, 1
+    movhps    [dstq+strideq*1], m0
+    pslldq                  m0, 1
+    movhps    [dstq+strideq*2], m0
+    pslldq                  m0, 1
+    movhps    [dstq+stride3q ], m0
+    pslldq                  m0, 1
+    lea                   dstq, [dstq+strideq*4]
+    movhps    [dstq+strideq*0], m0
+    pslldq                  m0, 1
+    movhps    [dstq+strideq*1], m0
+    pslldq                  m0, 1
+    movhps    [dstq+strideq*2], m0
+    pslldq                  m0, 1
+    movhps    [dstq+stride3q ], m0
+    RET
+
+cglobal vp9_ipred_dr_16x16, 4, 4, 6, dst, stride, l, a
+    mova                    m1, [lq]
+    movu                    m2, [aq-1]
+    movd                    m4, [aq+15]
+    DEFINE_ARGS dst, stride, stride9, cnt
+    lea               stride9q, [strideq *3]
+    mov                   cntd, 4
+    lea               stride9q, [stride9q*3]
+    PALIGNR                 m4, m2, 1, m5
+    PALIGNR                 m3, m2, m1, 15, m5
+    LOWPASS                  3,  2, 4, 5
+    pslldq                  m0, m1, 1
+    PALIGNR                 m2, m1, 1, m4
+    LOWPASS                  0,  1, 2, 4
+
+.loop:
+    mova    [dstq+strideq*0  ], m3
+    movhps  [dstq+strideq*8+0], m0
+    movq    [dstq+strideq*8+8], m3
+    PALIGNR                 m3, m0, 15, m1
+    pslldq                  m0, 1
+    mova    [dstq+strideq*1  ], m3
+    movhps  [dstq+stride9q +0], m0
+    movq    [dstq+stride9q +8], m3
+    PALIGNR                 m3, m0, 15, m1
+    pslldq                  m0, 1
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dr_32x32, 4, 4, 8, dst, stride, l, a
+    mova                    m1, [lq]
+    mova                    m2, [lq+16]
+    movu                    m3, [aq-1]
+    movu                    m4, [aq+15]
+    movd                    m5, [aq+31]
+    DEFINE_ARGS dst, stride, stride8, cnt
+    lea               stride8q, [strideq*8]
+    PALIGNR                 m5, m4, 1, m7
+    PALIGNR                 m6, m4, m3, 15, m7
+    LOWPASS                  5,  4,  6,  7
+    PALIGNR                 m4, m3, 1, m7
+    PALIGNR                 m6, m3, m2, 15, m7
+    LOWPASS                  4,  3,  6,  7
+    PALIGNR                 m3, m2, 1, m7
+    PALIGNR                 m6, m2, m1, 15, m7
+    LOWPASS                  3,  2,  6,  7
+    PALIGNR                 m2, m1, 1, m6
+    pslldq                  m0, m1, 1
+    LOWPASS                  2,  1,  0,  6
+    mov                   cntd, 16
+
+    ; out=m2/m3/m4/m5
+.loop:
+    mova  [dstq+stride8q*0+ 0], m4
+    mova  [dstq+stride8q*0+16], m5
+    mova  [dstq+stride8q*2+ 0], m3
+    mova  [dstq+stride8q*2+16], m4
+    PALIGNR                 m5, m4, 15, m6
+    PALIGNR                 m4, m3, 15, m6
+    PALIGNR                 m3, m2, 15, m6
+    pslldq                  m2, 1
+    add                   dstq, strideq
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DR_XMM_FUNCS
+INIT_XMM ssse3
+DR_XMM_FUNCS
+INIT_XMM avx
+DR_XMM_FUNCS
+
+; vl
+
+INIT_MMX mmxext
+cglobal vp9_ipred_vl_4x4, 4, 4, 0, dst, stride, l, a
+    movq                    m0, [aq]
+    psrlq                   m1, m0, 8
+    psrlq                   m2, m1, 8
+    LOWPASS                  2,  1, 0, 3
+    pavgb                   m1, m0
+    movd      [dstq+strideq*0], m1
+    movd      [dstq+strideq*1], m2
+    lea                   dstq, [dstq+strideq*2]
+    psrlq                   m1, 8
+    psrlq                   m2, 8
+    movd      [dstq+strideq*0], m1
+    movd      [dstq+strideq*1], m2
+    RET
+
+%macro VL_XMM_FUNCS 0
+cglobal vp9_ipred_vl_8x8, 4, 4, 4, dst, stride, l, a
+    movq                    m0, [aq]
+%if cpuflag(ssse3)
+    pshufb                  m0, [pb_0to6_9x7]
+%else
+    punpcklbw               m1, m0, m0
+    punpckhwd               m1, m1
+    shufps                  m0, m1, q3310
+%endif
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    psrldq                  m1, m0, 1
+    psrldq                  m2, m0, 2
+    LOWPASS                  2,  1,  0,  3
+    pavgb                   m1, m0
+
+    movq      [dstq+strideq*0], m1
+    movq      [dstq+strideq*1], m2
+    psrldq                  m1, 1
+    psrldq                  m2, 1
+    movq      [dstq+strideq*2], m1
+    movq      [dstq+stride3q ], m2
+    lea                   dstq, [dstq+strideq*4]
+    psrldq                  m1, 1
+    psrldq                  m2, 1
+    movq      [dstq+strideq*0], m1
+    movq      [dstq+strideq*1], m2
+    psrldq                  m1, 1
+    psrldq                  m2, 1
+    movq      [dstq+strideq*2], m1
+    movq      [dstq+stride3q ], m2
+    RET
+
+cglobal vp9_ipred_vl_16x16, 4, 4, 5, dst, stride, l, a
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+%if cpuflag(ssse3)
+    mova                    m4, [pb_1toE_2xF]
+    pshufb                  m1, m0, m4
+    pshufb                  m2, m1, m4
+%else
+    pand                    m4, m0, [pb_15x0_1xm1]  ; _______________F
+    psrldq                  m1, m0, 1               ; 123456789ABCDEF_
+    por                     m1, m4                  ; 123456789ABCDEFF
+    psrldq                  m2, m1, 1               ; 23456789ABCDEFF_
+    por                     m2, m4                  ; 23456789ABCDEFFF
+%endif
+    LOWPASS                  2,  1,  0, 3
+    pavgb                   m1, m0
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m1
+    mova      [dstq+strideq*1], m2
+%if cpuflag(ssse3)
+    pshufb                  m1, m4
+    pshufb                  m2, m4
+%else
+    psrldq                  m1, 1
+    psrldq                  m2, 1
+    por                     m1, m4
+    por                     m2, m4
+%endif
+    mova      [dstq+strideq*2], m1
+    mova      [dstq+stride3q ], m2
+%if cpuflag(ssse3)
+    pshufb                  m1, m4
+    pshufb                  m2, m4
+%else
+    psrldq                  m1, 1
+    psrldq                  m2, 1
+    por                     m1, m4
+    por                     m2, m4
+%endif
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_vl_32x32, 4, 4, 7, dst, stride, l, a
+    mova                    m0, [aq]
+    mova                    m5, [aq+16]
+    DEFINE_ARGS dst, stride, dst16, cnt
+    PALIGNR                 m2, m5, m0, 1, m4
+    PALIGNR                 m3, m5, m0, 2, m4
+    lea                 dst16q, [dstq  +strideq*8]
+    LOWPASS                  3,  2,  0, 6
+    pavgb                   m2, m0
+%if cpuflag(ssse3)
+    mova                    m4, [pb_1toE_2xF]
+    pshufb                  m0, m5, m4
+    pshufb                  m1, m0, m4
+%else
+    pand                    m4, m5, [pb_15x0_1xm1]  ; _______________F
+    psrldq                  m0, m5, 1               ; 123456789ABCDEF_
+    por                     m0, m4                  ; 123456789ABCDEFF
+    psrldq                  m1, m0, 1               ; 23456789ABCDEFF_
+    por                     m1, m4                  ; 23456789ABCDEFFF
+%endif
+    lea                 dst16q, [dst16q+strideq*8]
+    LOWPASS                  1,  0,  5, 6
+    pavgb                   m0, m5
+%if cpuflag(ssse3)
+    pshufb                  m5, [pb_15]
+%else
+    punpckhbw               m5, m4, m4
+    pshufhw                 m5, m5, q3333
+    punpckhqdq              m5, m5
+%endif
+    mov                   cntd, 8
+
+.loop:
+%macro %%write 3
+    mova    [dstq+stride%1+ 0], %2
+    mova    [dstq+stride%1+16], %3
+    movhps  [dst16q+stride%1 ], %2
+    movu  [dst16q+stride%1+ 8], %3
+    movq  [dst16q+stride%1+24], m5
+%if cpuflag(avx)
+    palignr                 %2, %3, %2, 1
+    pshufb                  %3, m4
+%elif cpuflag(ssse3)
+    palignr                 m6, %3, %2, 1
+    pshufb                  %3, m4
+    mova                    %2, m6
+%else
+    pslldq                  m6, %3, 15
+    psrldq                  %3, 1
+    psrldq                  %2, 1
+    por                     %3, m4
+    por                     %2, m6
+%endif
+%endmacro
+
+    %%write                q*0, m2, m0
+    %%write                q*1, m3, m1
+    lea                   dstq, [dstq  +strideq*2]
+    lea                 dst16q, [dst16q+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+VL_XMM_FUNCS
+INIT_XMM ssse3
+VL_XMM_FUNCS
+INIT_XMM avx
+VL_XMM_FUNCS
+
+; vr
+
+%macro VR_MMX_FUNCS 0
+cglobal vp9_ipred_vr_4x4, 4, 4, 0, dst, stride, l, a
+    movq                    m1, [aq-1]
+    punpckldq               m2, [lq]
+    movd                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pavgb                   m0, m1
+    PALIGNR                 m1, m2, 5, m3
+    psrlq                   m2, m1, 8
+    psllq                   m3, m1, 8
+    LOWPASS                  2,  1, 3, 4
+
+    ; ABCD <- for the following predictor:
+    ; EFGH
+    ; IABC  | m0 contains ABCDxxxx
+    ; JEFG  | m2 contains xJIEFGHx
+
+%if cpuflag(ssse3)
+    punpckldq               m0, m2
+    pshufb                  m2, [pb_13456_3xm1]
+    movd      [dstq+strideq*0], m0
+    pshufb                  m0, [pb_6012_4xm1]
+    movd      [dstq+stride3q ], m2
+    psrlq                   m2, 8
+    movd      [dstq+strideq*2], m0
+    movd      [dstq+strideq*1], m2
+%else
+    psllq                   m1, m2, 40
+    psrlq                   m2, 24
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m2
+    PALIGNR                 m0, m1, 7, m3
+    psllq                   m1, 8
+    PALIGNR                 m2, m1, 7, m3
+    movd      [dstq+strideq*2], m0
+    movd      [dstq+stride3q ], m2
+%endif
+    RET
+%endmacro
+
+INIT_MMX mmxext
+VR_MMX_FUNCS
+INIT_MMX ssse3
+VR_MMX_FUNCS
+
+%macro VR_XMM_FUNCS 1 ; n_xmm_regs for 16x16
+cglobal vp9_ipred_vr_8x8, 4, 4, 5, dst, stride, l, a
+    movu                    m1, [aq-1]
+    movhps                  m2, [lq]
+    movq                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pavgb                   m0, m1
+    PALIGNR                 m1, m2, 9, m3
+    pslldq                  m2, m1, 1
+    pslldq                  m3, m1, 2
+    LOWPASS                  1,  2, 3, 4
+
+    ; ABCDEFGH <- for the following predictor:
+    ; IJKLMNOP
+    ; QABCDEFG  | m0 contains ABCDEFGHxxxxxxxx
+    ; RIJKLMNO  | m1 contains xxVUTSRQIJKLMNOP
+    ; SQABCDEF
+    ; TRIJKLMN
+    ; USQABCDE
+    ; VTRIJKLM
+
+%if cpuflag(ssse3)
+    punpcklqdq              m0, m1 ; ABCDEFGHxxVUTSRQ
+%endif
+    movq      [dstq+strideq*0], m0
+    movhps    [dstq+strideq*1], m1
+%if cpuflag(ssse3)
+    pshufb                  m0, [pb_6xm1_BDF_0to6]  ; xxxxxxUSQABCDEFG
+    pshufb                  m1, [pb_6xm1_246_8toE]  ; xxxxxxVTRIJKLMNO
+%else
+    psrlw                   m2, m1, 8               ; x_U_S_Q_xxxxxxxx
+    pand                    m3, m1, [pw_255]        ; x_V_T_R_xxxxxxxx
+    packuswb                m3, m2                  ; xVTRxxxxxUSQxxxx
+    pslldq                  m3, 4                   ; xxxxxVTRxxxxxUSQ
+    PALIGNR                 m0, m3, 7, m4           ; xxxxxxUSQABCDEFG
+    psrldq                  m1, 8
+    pslldq                  m3, 8
+    PALIGNR                 m1, m3, 7, m4           ; xxxxxxVTRIJKLMNO
+%endif
+    movhps    [dstq+strideq*2], m0
+    movhps    [dstq+stride3q ], m1
+    lea                   dstq, [dstq+strideq*4]
+    pslldq                  m0, 1
+    pslldq                  m1, 1
+    movhps    [dstq+strideq*0], m0
+    movhps    [dstq+strideq*1], m1
+    pslldq                  m0, 1
+    pslldq                  m1, 1
+    movhps    [dstq+strideq*2], m0
+    movhps    [dstq+stride3q ], m1
+    RET
+
+cglobal vp9_ipred_vr_16x16, 4, 4, %1, dst, stride, l, a
+    mova                    m0, [aq]
+    movu                    m1, [aq-1]
+    mova                    m2, [lq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    PALIGNR                 m3, m1, m2, 15, m6
+    LOWPASS                  3,  1,  0,  4
+    pavgb                   m0, m1
+    PALIGNR                 m1, m2,  1, m6
+    pslldq                  m4, m2,  1
+    LOWPASS                  1,  2,  4,  5
+%if cpuflag(ssse3)
+    pshufb                  m1, [pb_02468ACE_13579BDF]
+%else
+    psrlw                   m5, m1, 8
+    pand                    m1, [pw_255]
+    packuswb                m1, m5
+%endif
+    mov                   cntd, 4
+
+.loop:
+    movlhps                 m2, m1
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m3
+    PALIGNR                 m4, m0, m1, 15, m6
+    PALIGNR                 m5, m3, m2, 15, m6
+    mova      [dstq+strideq*2], m4
+    mova      [dstq+stride3q ], m5
+    lea                   dstq, [dstq+strideq*4]
+    PALIGNR                 m0, m1, 14, m6
+    PALIGNR                 m3, m2, 14, m6
+    pslldq                  m1, 2
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_vr_32x32, 4, 4, 9, dst, stride, l, a
+    mova                    m0, [aq]
+    mova                    m2, [aq+16]
+    movu                    m1, [aq-1]
+    PALIGNR                 m3, m2, m0, 15, m6
+    PALIGNR                 m4, m2, m0, 14, m6
+    LOWPASS                  4,  3,  2,  5
+    pavgb                   m3, m2
+    mova                    m2, [lq+16]
+    PALIGNR                 m5, m1, m2, 15, m6
+    LOWPASS                  5,  1,  0,  6
+    pavgb                   m0, m1
+    mova                    m6, [lq]
+%if ARCH_X86_64
+    SWAP                     0, 8
+%else
+    mova                [dstq], m0
+%endif
+    PALIGNR                 m1, m2,  1, m0
+    PALIGNR                 m7, m2, m6, 15, m0
+    LOWPASS                  1,  2,  7,  0
+    PALIGNR                 m2, m6,  1, m0
+    pslldq                  m7, m6,  1
+    LOWPASS                  2,  6,  7,  0
+%if cpuflag(ssse3)
+    pshufb                  m1, [pb_02468ACE_13579BDF]
+    pshufb                  m2, [pb_02468ACE_13579BDF]
+%else
+    psrlw                   m0, m1, 8
+    psrlw                   m6, m2, 8
+    pand                    m1, [pw_255]
+    pand                    m2, [pw_255]
+    packuswb                m1, m0
+    packuswb                m2, m6
+%endif
+    DEFINE_ARGS dst, stride, dst16, cnt
+    lea                 dst16q, [dstq  +strideq*8]
+    lea                 dst16q, [dst16q+strideq*8]
+    SBUTTERFLY             qdq,  2,  1,  6
+%if ARCH_X86_64
+    SWAP                     0, 8
+%else
+    mova                    m0, [dstq]
+%endif
+    mov                   cntd, 8
+
+.loop:
+    ; even lines (0, 2, 4, ...): m1 | m0, m3
+    ;  odd lines (1, 3, 5, ...): m2 | m5, m4
+%macro %%write 4
+    mova    [dstq+stride%1+ 0], %3
+    mova    [dstq+stride%1+16], %4
+    movhps  [dst16q+stride%1 ], %2
+    movu  [dst16q+stride%1+ 8], %3
+    movq  [dst16q+stride%1+24], %4
+    PALIGNR                 %4, %3, 15, m6
+    PALIGNR                 %3, %2, 15, m6
+    pslldq                  %2,  1
+%endmacro
+
+    %%write                q*0, m1, m0, m3
+    %%write                q*1, m2, m5, m4
+    lea                   dstq, [dstq  +strideq*2]
+    lea                 dst16q, [dst16q+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+VR_XMM_FUNCS 7
+INIT_XMM ssse3
+VR_XMM_FUNCS 6
+INIT_XMM avx
+VR_XMM_FUNCS 6
+
+; hd
+
+INIT_MMX mmxext
+cglobal vp9_ipred_hd_4x4, 4, 4, 0, dst, stride, l, a
+    movd                    m0, [lq]
+    punpckldq               m0, [aq-1]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    psrlq                   m1, m0, 8
+    psrlq                   m2, m1, 8
+    LOWPASS                  2,  1, 0,  3
+    pavgb                   m1, m0
+
+    ; DHIJ <- for the following predictor:
+    ; CGDH
+    ; BFCG  | m1 contains ABCDxxxx
+    ; AEBF  | m2 contains EFGHIJxx
+
+    punpcklbw               m1, m2
+    punpckhdq               m0, m1, m2
+
+    ; m1 contains AEBFCGDH
+    ; m0 contains CGDHIJxx
+
+    movd      [dstq+stride3q ], m1
+    movd      [dstq+strideq*1], m0
+    psrlq                   m1, 16
+    psrlq                   m0, 16
+    movd      [dstq+strideq*2], m1
+    movd      [dstq+strideq*0], m0
+    RET
+
+%macro HD_XMM_FUNCS 0
+cglobal vp9_ipred_hd_8x8, 4, 4, 5, dst, stride, l, a
+    movq                    m0, [lq]
+    movhps                  m0, [aq-1]
+    DEFINE_ARGS dst, stride, stride3, dst4
+    lea               stride3q, [strideq*3]
+    lea                  dst4q, [dstq+strideq*4]
+    psrldq                  m1, m0, 1
+    psrldq                  m2, m1, 1
+    LOWPASS                  2,  1,  0,  3
+    pavgb                   m1, m0
+
+    ; HPQRSTUV <- for the following predictor
+    ; GOHPQRST
+    ; FNGOHPQR  | m1 contains ABCDEFGHxxxxxxxx
+    ; EMFNGOHP  | m2 contains IJKLMNOPQRSTUVxx
+    ; DLEMFNGO
+    ; CKDLEMFN
+    ; BJCKDLEM
+    ; AIBJCKDL
+
+    punpcklbw               m1, m2
+    movhlps                 m2, m2
+
+    ; m1 contains AIBJCKDLEMFNGOHP
+    ; m2 contains QRSTUVxxxxxxxxxx
+
+    movhps   [dstq +stride3q ], m1
+    movq     [dst4q+stride3q ], m1
+    PALIGNR                 m3, m2, m1, 2, m4
+    movhps   [dstq +strideq*2], m3
+    movq     [dst4q+strideq*2], m3
+    PALIGNR                 m3, m2, m1, 4, m4
+    movhps   [dstq +strideq*1], m3
+    movq     [dst4q+strideq*1], m3
+    PALIGNR                 m2, m1, 6, m4
+    movhps   [dstq +strideq*0], m2
+    movq     [dst4q+strideq*0], m2
+    RET
+
+cglobal vp9_ipred_hd_16x16, 4, 6, 7, dst, stride, l, a
+    mova                    m0, [lq]
+    movu                    m3, [aq-1]
+    DEFINE_ARGS dst, stride, stride4, dst4, dst8, dst12
+    lea               stride4q, [strideq*4]
+    lea                  dst4q, [dstq +stride4q]
+    lea                  dst8q, [dst4q+stride4q]
+    lea                 dst12q, [dst8q+stride4q]
+    psrldq                  m4, m3,  1
+    psrldq                  m5, m3,  2
+    LOWPASS                  5,  4,  3,  6
+    PALIGNR                 m1, m3, m0,  1, m6
+    PALIGNR                 m2, m3, m0,  2, m6
+    LOWPASS                  2,  1,  0,  6
+    pavgb                   m1, m0
+    SBUTTERFLY              bw,  1,  2,  6
+
+    ; I PROBABLY INVERTED L0 ad L16 here
+    ; m1, m2, m5
+.loop:
+    sub               stride4q, strideq
+    movhps [dstq +stride4q +0], m2
+    movq   [dstq +stride4q +8], m5
+    mova   [dst4q+stride4q   ], m2
+    movhps [dst8q+stride4q +0], m1
+    movq   [dst8q+stride4q +8], m2
+    mova  [dst12q+stride4q   ], m1
+%if cpuflag(avx)
+    palignr                 m1, m2, m1, 2
+    palignr                 m2, m5, m2, 2
+%elif cpuflag(ssse3)
+    palignr                 m3, m2, m1, 2
+    palignr                 m0, m5, m2, 2
+    mova                    m1, m3
+    mova                    m2, m0
+%else
+    ; slightly modified version of PALIGNR
+    mova                    m6, m2
+    mova                    m4, m5
+    pslldq                  m6, 14
+    pslldq                  m4, 14
+    psrldq                  m1, 2
+    psrldq                  m2, 2
+    por                     m1, m6
+    por                     m2, m4
+%endif
+    psrldq                  m5, 2
+    jg .loop
+    RET
+
+cglobal vp9_ipred_hd_32x32, 4, 6, 8, dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m1, [lq+16]
+    movu                    m2, [aq-1]
+    movu                    m3, [aq+15]
+    DEFINE_ARGS dst, stride, stride8, dst8, dst16, dst24
+    lea               stride8q, [strideq*8]
+    lea                  dst8q, [dstq  +stride8q]
+    lea                 dst16q, [dst8q +stride8q]
+    lea                 dst24q, [dst16q+stride8q]
+    psrldq                  m4, m3,  1
+    psrldq                  m5, m3,  2
+    LOWPASS                  5,  4,  3,  6
+    PALIGNR                 m4, m3, m2,  2, m6
+    PALIGNR                 m3, m2,  1, m6
+    LOWPASS                  4,  3,  2,  6
+    PALIGNR                 m3, m2, m1,  2, m6
+    PALIGNR                 m2, m1,  1, m6
+    LOWPASS                  3,  2,  1,  6
+    pavgb                   m2, m1
+    PALIGNR                 m6, m1, m0,  1, m7
+    PALIGNR                 m1, m0,  2, m7
+    LOWPASS                  1,  6,  0,  7
+    pavgb                   m0, m6
+    SBUTTERFLY              bw,  2,  3,  6
+    SBUTTERFLY              bw,  0,  1,  6
+
+    ; m0, m1, m2, m3, m4, m5
+.loop:
+    sub               stride8q, strideq
+    mova  [dstq  +stride8q+ 0], m3
+    mova  [dstq  +stride8q+16], m4
+    mova  [dst8q +stride8q+ 0], m2
+    mova  [dst8q +stride8q+16], m3
+    mova  [dst16q+stride8q+ 0], m1
+    mova  [dst16q+stride8q+16], m2
+    mova  [dst24q+stride8q+ 0], m0
+    mova  [dst24q+stride8q+16], m1
+%if cpuflag(avx)
+    palignr                 m0, m1, m0, 2
+    palignr                 m1, m2, m1, 2
+    palignr                 m2, m3, m2, 2
+    palignr                 m3, m4, m3, 2
+    palignr                 m4, m5, m4, 2
+    psrldq                  m5, 2
+%elif cpuflag(ssse3)
+    psrldq                  m6, m5, 2
+    palignr                 m5, m4, 2
+    palignr                 m4, m3, 2
+    palignr                 m3, m2, 2
+    palignr                 m2, m1, 2
+    palignr                 m1, m0, 2
+    mova                    m0, m1
+    mova                    m1, m2
+    mova                    m2, m3
+    mova                    m3, m4
+    mova                    m4, m5
+    mova                    m5, m6
+%else
+    ; sort of a half-integrated version of PALIGNR
+    pslldq                  m7, m4, 14
+    pslldq                  m6, m5, 14
+    psrldq                  m4, 2
+    psrldq                  m5, 2
+    por                     m4, m6
+    pslldq                  m6, m3, 14
+    psrldq                  m3, 2
+    por                     m3, m7
+    pslldq                  m7, m2, 14
+    psrldq                  m2, 2
+    por                     m2, m6
+    pslldq                  m6, m1, 14
+    psrldq                  m1, 2
+    por                     m1, m7
+    psrldq                  m0, 2
+    por                     m0, m6
+%endif
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+HD_XMM_FUNCS
+INIT_XMM ssse3
+HD_XMM_FUNCS
+INIT_XMM avx
+HD_XMM_FUNCS
+
+%macro HU_MMX_FUNCS 0
+cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l
+    movd                    m0, [lq]
+%if cpuflag(ssse3)
+    pshufb                  m0, [pb_0to2_5x3]
+%else
+    punpcklbw               m1, m0, m0          ; 00112233
+    pshufw                  m1, m1, q3333       ; 33333333
+    punpckldq               m0, m1              ; 01233333
+%endif
+    psrlq                   m1, m0, 8
+    psrlq                   m2, m1, 8
+    LOWPASS                  2,  1, 0, 3
+    pavgb                   m1, m0
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    SBUTTERFLY              bw,  1, 2, 0
+    PALIGNR                 m2, m1, 2, m0
+    movd      [dstq+strideq*0], m1
+    movd      [dstq+strideq*1], m2
+    punpckhdq               m1, m1
+    punpckhdq               m2, m2
+    movd      [dstq+strideq*2], m1
+    movd      [dstq+stride3q ], m2
+    RET
+%endmacro
+
+INIT_MMX mmxext
+HU_MMX_FUNCS
+INIT_MMX ssse3
+HU_MMX_FUNCS
+
+%macro HU_XMM_FUNCS 1 ; n_xmm_regs in hu_32x32
+cglobal vp9_ipred_hu_8x8, 3, 4, 4, dst, stride, l
+    movq                    m0, [lq]
+%if cpuflag(ssse3)
+    pshufb                  m0, [pb_0to6_9x7]
+%else
+    punpcklbw               m1, m0, m0          ; 0011223344556677
+    punpckhwd               m1, m1              ; 4444555566667777
+    shufps                  m0, m1, q3310       ; 0123456777777777
+%endif
+    psrldq                  m1, m0, 1
+    psrldq                  m2, m1, 1
+    LOWPASS                  2,  1, 0, 3
+    pavgb                   m1, m0
+    DEFINE_ARGS dst, stride, stride3, dst4
+    lea               stride3q, [strideq*3]
+    lea                  dst4q, [dstq+strideq*4]
+    SBUTTERFLY              bw,  1, 2, 0
+    movq     [dstq +strideq*0], m1
+    movhps   [dst4q+strideq*0], m1
+    PALIGNR                 m0, m2, m1, 2, m3
+    movq     [dstq +strideq*1], m0
+    movhps   [dst4q+strideq*1], m0
+    PALIGNR                 m0, m2, m1, 4, m3
+    movq     [dstq +strideq*2], m0
+    movhps   [dst4q+strideq*2], m0
+    PALIGNR                 m2, m1, 6, m3
+    movq     [dstq +stride3q ], m2
+    movhps   [dst4q+stride3q ], m2
+    RET
+
+cglobal vp9_ipred_hu_16x16, 3, 4, 5, dst, stride, l
+    mova                    m0, [lq]
+%if cpuflag(ssse3)
+    mova                    m3, [pb_2toE_3xF]
+    pshufb                  m1, m0, [pb_1toE_2xF]
+    pshufb                  m2, m0, m3
+%else
+    pand                    m3, m0, [pb_15x0_1xm1]
+    psrldq                  m1, m0, 1
+    por                     m1, m3
+    punpckhbw               m3, m3
+    psrldq                  m2, m0, 2
+    por                     m2, m3
+%endif
+    LOWPASS                  2,  1,  0,  4
+    pavgb                   m1, m0
+    DEFINE_ARGS dst, stride, stride9, cnt
+    lea                stride9q, [strideq*8+strideq]
+    mov                   cntd,  4
+    SBUTTERFLY              bw,  1,  2,  0
+
+.loop:
+    mova      [dstq+strideq*0], m1
+    mova      [dstq+strideq*8], m2
+    PALIGNR                 m0, m2, m1, 2, m4
+%if cpuflag(ssse3)
+    pshufb                  m2, m3
+%else
+    psrldq                  m2, 2
+    por                     m2, m3
+%endif
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+stride9q ], m2
+    PALIGNR                 m1, m2, m0, 2, m4
+%if cpuflag(ssse3)
+    pshufb                  m2, m3
+%else
+    psrldq                  m2, 2
+    por                     m2, m3
+%endif
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_hu_32x32, 3, 7, %1, dst, stride, l
+    mova                    m1, [lq]
+    mova                    m0, [lq+16]
+    PALIGNR                 m2, m0, m1,  1, m5
+    PALIGNR                 m3, m0, m1,  2, m5
+    LOWPASS                  3,  2,  1,  5
+    pavgb                   m2, m1
+%if cpuflag(ssse3)
+    mova                    m4, [pb_2toE_3xF]
+    pshufb                  m5, m0, [pb_1toE_2xF]
+    pshufb                  m1, m0, m4
+%else
+    pand                    m4, m0, [pb_15x0_1xm1]
+    psrldq                  m5, m0, 1
+    por                     m5, m4
+    punpckhbw               m4, m4
+    psrldq                  m1, m0, 2
+    por                     m1, m4
+%endif
+    LOWPASS                  1,  5,  0,  6
+    pavgb                   m0, m5
+    DEFINE_ARGS dst, stride, cnt, stride0, dst8, dst16, dst24
+    mov                   cntd,  8
+    xor               stride0q, stride0q
+    lea                  dst8q, [dstq  +strideq*8]
+    lea                 dst16q, [dst8q +strideq*8]
+    lea                 dst24q, [dst16q+strideq*8]
+    SBUTTERFLY              bw,  0,  1,  5
+    SBUTTERFLY              bw,  2,  3,  5
+%if cpuflag(ssse3)
+    pshufb                  m6, m1, [pb_15]
+%else
+    pshufhw                 m6, m4, q3333
+    punpckhqdq              m6, m6
+%endif
+
+.loop:
+    mova  [dstq  +stride0q+ 0], m2
+    mova  [dstq  +stride0q+16], m3
+    mova  [dst8q +stride0q+ 0], m3
+    mova  [dst8q +stride0q+16], m0
+    mova  [dst16q+stride0q+ 0], m0
+    mova  [dst16q+stride0q+16], m1
+    mova  [dst24q+stride0q+ 0], m1
+    mova  [dst24q+stride0q+16], m6
+%if cpuflag(avx)
+    palignr                 m2, m3, m2, 2
+    palignr                 m3, m0, m3, 2
+    palignr                 m0, m1, m0, 2
+    pshufb                  m1, m4
+%elif cpuflag(ssse3)
+    pshufb                  m5, m1, m4
+    palignr                 m1, m0, 2
+    palignr                 m0, m3, 2
+    palignr                 m3, m2, 2
+    mova                    m2, m3
+    mova                    m3, m0
+    mova                    m0, m1
+    mova                    m1, m5
+%else
+    ; half-integrated version of PALIGNR
+    pslldq                  m5, m1, 14
+    pslldq                  m7, m0, 14
+    psrldq                  m1, 2
+    psrldq                  m0, 2
+    por                     m1, m4
+    por                     m0, m5
+    pslldq                  m5, m3, 14
+    psrldq                  m3, 2
+    por                     m3, m7
+    psrldq                  m2, 2
+    por                     m2, m5
+%endif
+    add               stride0q, strideq
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+HU_XMM_FUNCS 8
+INIT_XMM ssse3
+HU_XMM_FUNCS 7
+INIT_XMM avx
+HU_XMM_FUNCS 7
+
+; FIXME 127, 128, 129 ?
diff --git a/media/ffvpx/libavcodec/x86/vp9intrapred_16bpp.asm b/media/ffvpx/libavcodec/x86/vp9intrapred_16bpp.asm
new file mode 100644
index 0000000000..808056a809
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9intrapred_16bpp.asm
@@ -0,0 +1,2497 @@
+;******************************************************************************
+;* VP9 Intra prediction SIMD optimizations
+;*
+;* Copyright (c) 2015 Ronald S. Bultje <rsbultje gmail com>
+;* Copyright (c) 2015 Henrik Gramner <henrik gramner com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+pd_2: times 8 dd 2
+pd_4: times 8 dd 4
+pd_8: times 8 dd 8
+
+pb_2to15_14_15: db 2,3,4,5,6,7,8,9,10,11,12,13,14,15,14,15
+pb_4_5_8to13_8x0: db 4,5,8,9,10,11,12,13,0,0,0,0,0,0,0,0
+pb_0to7_67x4: db 0,1,2,3,4,5,6,7,6,7,6,7,6,7,6,7
+
+cextern pw_1
+cextern pw_1023
+cextern pw_4095
+cextern pd_16
+cextern pd_32
+cextern pd_65535;
+
+; FIXME most top-only functions (ddl, vl, v, dc_top) can be modified to take
+; only 3 registers on x86-32, which would make it one cycle faster, but that
+; would make the code quite a bit uglier...
+
+SECTION .text
+
+%macro SCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%if %0 == 4
+%define reg_%4 m%2
+%endif
+%else
+    mova              [%3], m%1
+%if %0 == 4
+%define reg_%4 [%3]
+%endif
+%endif
+%endmacro
+
+%macro UNSCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova               m%1, [%3]
+%endif
+%if %0 == 4
+%undef reg_%4
+%endif
+%endmacro
+
+%macro PRELOAD 2-3
+%if ARCH_X86_64
+    mova               m%1, [%2]
+%if %0 == 3
+%define reg_%3 m%1
+%endif
+%elif %0 == 3
+%define reg_%3 [%2]
+%endif
+%endmacro
+
+INIT_MMX mmx
+cglobal vp9_ipred_v_4x4_16, 2, 4, 1, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse
+cglobal vp9_ipred_v_8x8_16, 2, 4, 1, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse
+cglobal vp9_ipred_v_16x16_16, 2, 4, 2, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]
+    mova                    m1, [aq+mmsize]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m1
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m1
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m1
+    lea                   dstq, [dstq+strideq*4]
+    dec               cntd
+    jg .loop
+    RET
+
+INIT_XMM sse
+cglobal vp9_ipred_v_32x32_16, 2, 4, 4, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq+mmsize*0]
+    mova                    m1, [aq+mmsize*1]
+    mova                    m2, [aq+mmsize*2]
+    mova                    m3, [aq+mmsize*3]
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 16
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m3
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m1
+    mova   [dstq+strideq*1+32], m2
+    mova   [dstq+strideq*1+48], m3
+    lea                   dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop
+    RET
+
+INIT_MMX mmxext
+cglobal vp9_ipred_h_4x4_16, 3, 3, 4, dst, stride, l, a
+    mova                    m3, [lq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pshufw                  m0, m3, q3333
+    pshufw                  m1, m3, q2222
+    pshufw                  m2, m3, q1111
+    pshufw                  m3, m3, q0000
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_h_8x8_16, 3, 3, 4, dst, stride, l, a
+    mova                    m2, [lq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    punpckhwd               m3, m2, m2
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    pshufd                  m0, m3, q1111
+    pshufd                  m1, m3, q0000
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m1
+    lea                   dstq, [dstq+strideq*4]
+    punpcklwd               m2, m2
+    pshufd                  m0, m2, q3333
+    pshufd                  m1, m2, q2222
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    pshufd                  m0, m2, q1111
+    pshufd                  m1, m2, q0000
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m1
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_h_16x16_16, 3, 5, 4, dst, stride, l, stride3, cnt
+    mov                   cntd, 3
+    lea               stride3q, [strideq*3]
+.loop:
+    movh                    m3, [lq+cntq*8]
+    punpcklwd               m3, m3
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+    mova    [dstq+strideq*0+ 0], m0
+    mova    [dstq+strideq*0+16], m0
+    mova    [dstq+strideq*1+ 0], m1
+    mova    [dstq+strideq*1+16], m1
+    mova    [dstq+strideq*2+ 0], m2
+    mova    [dstq+strideq*2+16], m2
+    mova    [dstq+stride3q + 0], m3
+    mova    [dstq+stride3q +16], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jge .loop
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_h_32x32_16, 3, 5, 4, dst, stride, l, stride3, cnt
+    mov                   cntd, 7
+    lea               stride3q, [strideq*3]
+.loop:
+    movh                    m3, [lq+cntq*8]
+    punpcklwd               m3, m3
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*0+32], m0
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+ 0], m1
+    mova   [dstq+strideq*1+16], m1
+    mova   [dstq+strideq*1+32], m1
+    mova   [dstq+strideq*1+48], m1
+    mova   [dstq+strideq*2+ 0], m2
+    mova   [dstq+strideq*2+16], m2
+    mova   [dstq+strideq*2+32], m2
+    mova   [dstq+strideq*2+48], m2
+    mova   [dstq+stride3q + 0], m3
+    mova   [dstq+stride3q +16], m3
+    mova   [dstq+stride3q +32], m3
+    mova   [dstq+stride3q +48], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jge .loop
+    RET
+
+INIT_MMX mmxext
+cglobal vp9_ipred_dc_4x4_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [lq]
+    paddw                   m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pmaddwd                 m0, [pw_1]
+    pshufw                  m1, m0, q3232
+    paddd                   m0, [pd_4]
+    paddd                   m0, m1
+    psrad                   m0, 3
+    pshufw                  m0, m0, q0000
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_8x8_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [lq]
+    paddw                   m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_8]
+    paddd                   m0, m1
+    psrad                   m0, 4
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_16x16_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [lq]
+    paddw                   m0, [lq+mmsize]
+    paddw                   m0, [aq]
+    paddw                   m0, [aq+mmsize]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_16]
+    paddd                   m0, m1
+    psrad                   m0, 5
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m0
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_32x32_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [lq+mmsize*0]
+    paddw                   m0, [lq+mmsize*1]
+    paddw                   m0, [lq+mmsize*2]
+    paddw                   m0, [lq+mmsize*3]
+    paddw                   m0, [aq+mmsize*0]
+    paddw                   m0, [aq+mmsize*1]
+    paddw                   m0, [aq+mmsize*2]
+    paddw                   m0, [aq+mmsize*3]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 16
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_32]
+    paddd                   m0, m1
+    psrad                   m0, 6
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*0+32], m0
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*1+32], m0
+    mova   [dstq+strideq*1+48], m0
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+
+%macro DC_1D_FNS 2
+INIT_MMX mmxext
+cglobal vp9_ipred_dc_%1_4x4_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [%2]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pmaddwd                 m0, [pw_1]
+    pshufw                  m1, m0, q3232
+    paddd                   m0, [pd_2]
+    paddd                   m0, m1
+    psrad                   m0, 2
+    pshufw                  m0, m0, q0000
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_%1_8x8_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [%2]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_4]
+    paddd                   m0, m1
+    psrad                   m0, 3
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_%1_16x16_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [%2]
+    paddw                   m0, [%2+mmsize]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_8]
+    paddd                   m0, m1
+    psrad                   m0, 4
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m0
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_%1_32x32_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [%2+mmsize*0]
+    paddw                   m0, [%2+mmsize*1]
+    paddw                   m0, [%2+mmsize*2]
+    paddw                   m0, [%2+mmsize*3]
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 16
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_16]
+    paddd                   m0, m1
+    psrad                   m0, 5
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*0+32], m0
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*1+32], m0
+    mova   [dstq+strideq*1+48], m0
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+DC_1D_FNS top,  aq
+DC_1D_FNS left, lq
+
+INIT_MMX mmxext
+cglobal vp9_ipred_tm_4x4_10, 4, 4, 6, dst, stride, l, a
+    mova                    m5, [pw_1023]
+.body:
+    mova                    m4, [aq]
+    mova                    m3, [lq]
+    movd                    m0, [aq-4]
+    pshufw                  m0, m0, q1111
+    psubw                   m4, m0
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pshufw                  m0, m3, q3333
+    pshufw                  m1, m3, q2222
+    pshufw                  m2, m3, q1111
+    pshufw                  m3, m3, q0000
+    paddw                   m0, m4
+    paddw                   m1, m4
+    paddw                   m2, m4
+    paddw                   m3, m4
+    pxor                    m4, m4
+    pmaxsw                  m0, m4
+    pmaxsw                  m1, m4
+    pmaxsw                  m2, m4
+    pmaxsw                  m3, m4
+    pminsw                  m0, m5
+    pminsw                  m1, m5
+    pminsw                  m2, m5
+    pminsw                  m3, m5
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    RET
+
+cglobal vp9_ipred_tm_4x4_12, 4, 4, 6, dst, stride, l, a
+    mova                    m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_4x4_10 %+ SUFFIX).body
+
+INIT_XMM sse2
+cglobal vp9_ipred_tm_8x8_10, 4, 5, 7, dst, stride, l, a
+    mova                    m4, [pw_1023]
+.body:
+    pxor                    m6, m6
+    mova                    m5, [aq]
+    movd                    m0, [aq-4]
+    pshuflw                 m0, m0, q1111
+    punpcklqdq              m0, m0
+    psubw                   m5, m0
+    DEFINE_ARGS dst, stride, l, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 1
+.loop:
+    movh                    m3, [lq+cntq*8]
+    punpcklwd               m3, m3
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+    paddw                   m0, m5
+    paddw                   m1, m5
+    paddw                   m2, m5
+    paddw                   m3, m5
+    pmaxsw                  m0, m6
+    pmaxsw                  m1, m6
+    pmaxsw                  m2, m6
+    pmaxsw                  m3, m6
+    pminsw                  m0, m4
+    pminsw                  m1, m4
+    pminsw                  m2, m4
+    pminsw                  m3, m4
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jge .loop
+    RET
+
+cglobal vp9_ipred_tm_8x8_12, 4, 5, 7, dst, stride, l, a
+    mova                    m4, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_8x8_10 %+ SUFFIX).body
+
+INIT_XMM sse2
+cglobal vp9_ipred_tm_16x16_10, 4, 4, 8, dst, stride, l, a
+    mova                    m7, [pw_1023]
+.body:
+    pxor                    m6, m6
+    mova                    m4, [aq]
+    mova                    m5, [aq+mmsize]
+    movd                    m0, [aq-4]
+    pshuflw                 m0, m0, q1111
+    punpcklqdq              m0, m0
+    psubw                   m4, m0
+    psubw                   m5, m0
+    DEFINE_ARGS dst, stride, l, cnt
+    mov                   cntd, 7
+.loop:
+    movd                    m3, [lq+cntq*4]
+    punpcklwd               m3, m3
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+    paddw                   m0, m2, m4
+    paddw                   m2, m5
+    paddw                   m1, m3, m4
+    paddw                   m3, m5
+    pmaxsw                  m0, m6
+    pmaxsw                  m2, m6
+    pmaxsw                  m1, m6
+    pmaxsw                  m3, m6
+    pminsw                  m0, m7
+    pminsw                  m2, m7
+    pminsw                  m1, m7
+    pminsw                  m3, m7
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m2
+    mova   [dstq+strideq*1+ 0], m1
+    mova   [dstq+strideq*1+16], m3
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jge .loop
+    RET
+
+cglobal vp9_ipred_tm_16x16_12, 4, 4, 8, dst, stride, l, a
+    mova                    m7, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_16x16_10 %+ SUFFIX).body
+
+INIT_XMM sse2
+cglobal vp9_ipred_tm_32x32_10, 4, 4, 10, 32 * -ARCH_X86_32, dst, stride, l, a
+    mova                    m0, [pw_1023]
+.body:
+    pxor                    m1, m1
+%if ARCH_X86_64
+    SWAP                     0, 8
+    SWAP                     1, 9
+%define reg_min m9
+%define reg_max m8
+%else
+    mova              [rsp+ 0], m0
+    mova              [rsp+16], m1
+%define reg_min [rsp+16]
+%define reg_max [rsp+ 0]
+%endif
+
+    mova                    m4, [aq+mmsize*0]
+    mova                    m5, [aq+mmsize*1]
+    mova                    m6, [aq+mmsize*2]
+    mova                    m7, [aq+mmsize*3]
+    movd                    m0, [aq-4]
+    pshuflw                 m0, m0, q1111
+    punpcklqdq              m0, m0
+    psubw                   m4, m0
+    psubw                   m5, m0
+    psubw                   m6, m0
+    psubw                   m7, m0
+    DEFINE_ARGS dst, stride, l, cnt
+    mov                   cntd, 31
+.loop:
+    pinsrw                  m3, [lq+cntq*2], 0
+    punpcklwd               m3, m3
+    pshufd                  m3, m3, q0000
+    paddw                   m0, m3, m4
+    paddw                   m1, m3, m5
+    paddw                   m2, m3, m6
+    paddw                   m3, m7
+    pmaxsw                  m0, reg_min
+    pmaxsw                  m1, reg_min
+    pmaxsw                  m2, reg_min
+    pmaxsw                  m3, reg_min
+    pminsw                  m0, reg_max
+    pminsw                  m1, reg_max
+    pminsw                  m2, reg_max
+    pminsw                  m3, reg_max
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m3
+    add                   dstq, strideq
+    dec                   cntd
+    jge .loop
+    RET
+
+cglobal vp9_ipred_tm_32x32_12, 4, 4, 10, 32 * -ARCH_X86_32, dst, stride, l, a
+    mova                    m0, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_32x32_10 %+ SUFFIX).body
+
+; Directional intra predicion functions
+;
+; in the functions below, 'abcdefgh' refers to above data (sometimes simply
+; abbreviated as a[N-M]). 'stuvwxyz' refers to left data (sometimes simply
+; abbreviated as l[N-M]). * is top-left data. ABCDEFG or A[N-M] is filtered
+; above data, STUVWXYZ or L[N-M] is filtered left data, and # is filtered
+; top-left data.
+
+; left=(left+2*center+right+2)>>2
+%macro LOWPASS 3 ; left [dst], center, right
+    paddw                  m%1, m%3
+    psraw                  m%1, 1
+    pavgw                  m%1, m%2
+%endmacro
+
+; abcdefgh (src) -> bcdefghh (dst)
+; dst/src can be the same register
+%macro SHIFT_RIGHT 2-3 [pb_2to15_14_15] ; dst, src, [ssse3_shift_reg]
+%if cpuflag(ssse3)
+    pshufb                  %1, %2, %3              ; abcdefgh -> bcdefghh
+%else
+    psrldq                  %1, %2, 2               ; abcdefgh -> bcdefgh.
+    pshufhw                 %1, %1, q2210           ; bcdefgh. -> bcdefghh
+%endif
+%endmacro
+
+; abcdefgh (src) -> bcdefghh (dst1) and cdefghhh (dst2)
+%macro SHIFT_RIGHTx2 3-4 [pb_2to15_14_15] ; dst1, dst2, src, [ssse3_shift_reg]
+%if cpuflag(ssse3)
+    pshufb                  %1, %3, %4              ; abcdefgh -> bcdefghh
+    pshufb                  %2, %1, %4              ; bcdefghh -> cdefghhh
+%else
+    psrldq                  %1, %3, 2               ; abcdefgh -> bcdefgh.
+    psrldq                  %2, %3, 4               ; abcdefgh -> cdefgh..
+    pshufhw                 %1, %1, q2210           ; bcdefgh. -> bcdefghh
+    pshufhw                 %2, %2, q1110           ; cdefgh.. -> cdefghhh
+%endif
+%endmacro
+
+%macro DL_FUNCS 0
+cglobal vp9_ipred_dl_4x4_16, 2, 4, 3, dst, stride, l, a
+    movifnidn               aq, amp
+    movu                    m1, [aq]                ; abcdefgh
+    pshufhw                 m0, m1, q3310           ; abcdefhh
+    SHIFT_RIGHT             m1, m1                  ; bcdefghh
+    psrldq                  m2, m1, 2               ; cdefghh.
+    LOWPASS                  0,  1,  2              ; BCDEFGh.
+    pshufd                  m1, m0, q3321           ; DEFGh...
+    movh      [dstq+strideq*0], m0
+    movh      [dstq+strideq*2], m1
+    add                   dstq, strideq
+    psrldq                  m0, 2                   ; CDEFGh..
+    psrldq                  m1, 2                   ; EFGh....
+    movh      [dstq+strideq*0], m0
+    movh      [dstq+strideq*2], m1
+    RET
+
+cglobal vp9_ipred_dl_8x8_16, 2, 4, 5, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]                ; abcdefgh
+%if cpuflag(ssse3)
+    mova                    m4, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m1, m2, m0, m4          ; bcdefghh/cdefghhh
+    LOWPASS                  0,  1,  2              ; BCDEFGHh
+    shufps                  m1, m0, m2, q3332       ; FGHhhhhh
+    shufps                  m3, m0, m1, q2121       ; DEFGHhhh
+    DEFINE_ARGS dst, stride, stride5
+    lea               stride5q, [strideq*5]
+
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*4], m1
+    SHIFT_RIGHT             m0, m0, m4              ; CDEFGHhh
+    pshuflw                 m1, m1, q3321           ; GHhhhhhh
+    pshufd                  m2, m0, q3321           ; EFGHhhhh
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+stride5q ], m1
+    lea                   dstq, [dstq+strideq*2]
+    pshuflw                 m1, m1, q3321           ; Hhhhhhhh
+    mova      [dstq+strideq*0], m3
+    mova      [dstq+strideq*4], m1
+    pshuflw                 m1, m1, q3321           ; hhhhhhhh
+    mova      [dstq+strideq*1], m2
+    mova      [dstq+stride5q ], m1
+    RET
+
+cglobal vp9_ipred_dl_16x16_16, 2, 4, 5, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]                ; abcdefgh
+    mova                    m3, [aq+mmsize]         ; ijklmnop
+    PALIGNR                 m1, m3, m0, 2, m4       ; bcdefghi
+    PALIGNR                 m2, m3, m0, 4, m4       ; cdefghij
+    LOWPASS                  0,  1,  2              ; BCDEFGHI
+%if cpuflag(ssse3)
+    mova                    m4, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m2, m1, m3, m4          ; jklmnopp/klmnoppp
+    LOWPASS                  1,  2,  3              ; JKLMNOPp
+    pshufd                  m2, m2, q3333           ; pppppppp
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 8
+
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*8+ 0], m1
+    mova   [dstq+strideq*8+16], m2
+    add                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m0, m1, m0, 2
+%else
+    PALIGNR                 m3, m1, m0, 2, m4
+    mova                    m0, m3
+%endif
+    SHIFT_RIGHT             m1, m1, m4
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dl_32x32_16, 2, 5, 7, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq+mmsize*0]       ; abcdefgh
+    mova                    m1, [aq+mmsize*1]       ; ijklmnop
+    mova                    m2, [aq+mmsize*2]       ; qrstuvwx
+    mova                    m3, [aq+mmsize*3]       ; yz012345
+    PALIGNR                 m4, m1, m0, 2, m6
+    PALIGNR                 m5, m1, m0, 4, m6
+    LOWPASS                  0,  4,  5              ; BCDEFGHI
+    PALIGNR                 m4, m2, m1, 2, m6
+    PALIGNR                 m5, m2, m1, 4, m6
+    LOWPASS                  1,  4,  5              ; JKLMNOPQ
+    PALIGNR                 m4, m3, m2, 2, m6
+    PALIGNR                 m5, m3, m2, 4, m6
+    LOWPASS                  2,  4,  5              ; RSTUVWXY
+%if cpuflag(ssse3)
+    mova                    m6, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m4, m5, m3, m6
+    LOWPASS                  3,  4,  5              ; Z0123455
+    pshufd                  m4, m4, q3333           ; 55555555
+    DEFINE_ARGS dst, stride, stride8, stride24, cnt
+    mov                   cntd, 8
+    lea               stride8q, [strideq*8]
+    lea              stride24q, [stride8q*3]
+
+.loop:
+    mova  [dstq+stride8q*0+ 0], m0
+    mova  [dstq+stride8q*0+16], m1
+    mova  [dstq+stride8q*0+32], m2
+    mova  [dstq+stride8q*0+48], m3
+    mova  [dstq+stride8q*1+ 0], m1
+    mova  [dstq+stride8q*1+16], m2
+    mova  [dstq+stride8q*1+32], m3
+    mova  [dstq+stride8q*1+48], m4
+    mova  [dstq+stride8q*2+ 0], m2
+    mova  [dstq+stride8q*2+16], m3
+    mova  [dstq+stride8q*2+32], m4
+    mova  [dstq+stride8q*2+48], m4
+    mova  [dstq+stride24q + 0], m3
+    mova  [dstq+stride24q +16], m4
+    mova  [dstq+stride24q +32], m4
+    mova  [dstq+stride24q +48], m4
+    add                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m0, m1, m0, 2
+    vpalignr                m1, m2, m1, 2
+    vpalignr                m2, m3, m2, 2
+%else
+    PALIGNR                 m5, m1, m0, 2, m6
+    mova                    m0, m5
+    PALIGNR                 m5, m2, m1, 2, m6
+    mova                    m1, m5
+    PALIGNR                 m5, m3, m2, 2, m6
+    mova                    m2, m5
+%endif
+    SHIFT_RIGHT             m3, m3, m6
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DL_FUNCS
+INIT_XMM ssse3
+DL_FUNCS
+INIT_XMM avx
+DL_FUNCS
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_ipred_dl_16x16_16, 2, 4, 5, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]                   ; abcdefghijklmnop
+    vpbroadcastw           xm1, [aq+30]                ; pppppppp
+    vperm2i128              m2, m0, m1, q0201          ; ijklmnoppppppppp
+    vpalignr                m3, m2, m0, 2              ; bcdefghijklmnopp
+    vpalignr                m4, m2, m0, 4              ; cdefghijklmnoppp
+    LOWPASS                  0,  3,  4                 ; BCDEFGHIJKLMNOPp
+    vperm2i128              m2, m0, m1, q0201          ; JKLMNOPppppppppp
+    DEFINE_ARGS dst, stride, stride3, cnt
+    mov                   cntd, 2
+    lea               stride3q, [strideq*3]
+
+.loop:
+    mova      [dstq+strideq*0], m0
+    vpalignr                m3, m2, m0, 2
+    vpalignr                m4, m2, m0, 4
+    mova      [dstq+strideq*1], m3
+    mova      [dstq+strideq*2], m4
+    vpalignr                m3, m2, m0, 6
+    vpalignr                m4, m2, m0, 8
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m4
+    vpalignr                m3, m2, m0, 10
+    vpalignr                m4, m2, m0, 12
+    mova      [dstq+strideq*1], m3
+    mova      [dstq+strideq*2], m4
+    vpalignr                m3, m2, m0, 14
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    mova                    m0, m2
+    vperm2i128              m2, m2, m2, q0101          ; pppppppppppppppp
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dl_32x32_16, 2, 6, 7, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq+mmsize*0+ 0]       ; abcdefghijklmnop
+    mova                    m1, [aq+mmsize*1+ 0]       ; qrstuvwxyz012345
+    vpbroadcastw           xm4, [aq+mmsize*1+30]       ; 55555555
+    vperm2i128              m5, m0, m1, q0201          ; ijklmnopqrstuvwx
+    vpalignr                m2, m5, m0, 2              ; bcdefghijklmnopq
+    vpalignr                m3, m5, m0, 4              ; cdefghijklmnopqr
+    LOWPASS                  0,  2,  3                 ; BCDEFGHIJKLMNOPQ
+    vperm2i128              m5, m1, m4, q0201          ; yz01234555555555
+    vpalignr                m2, m5, m1, 2              ; rstuvwxyz0123455
+    vpalignr                m3, m5, m1, 4              ; stuvwxyz01234555
+    LOWPASS                  1,  2,  3                 ; RSTUVWXYZ......5
+    vperm2i128              m2, m1, m4, q0201          ; Z......555555555
+    vperm2i128              m5, m0, m1, q0201          ; JKLMNOPQRSTUVWXY
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+
+.loop:
+    mova   [dstq+strideq*0 + 0], m0
+    mova   [dstq+strideq*0 +32], m1
+    vpalignr                 m3, m5, m0, 2
+    vpalignr                 m4, m2, m1, 2
+    mova   [dstq+strideq*1 + 0], m3
+    mova   [dstq+strideq*1 +32], m4
+    vpalignr                 m3, m5, m0, 4
+    vpalignr                 m4, m2, m1, 4
+    mova   [dstq+strideq*2 + 0], m3
+    mova   [dstq+strideq*2 +32], m4
+    vpalignr                 m3, m5, m0, 6
+    vpalignr                 m4, m2, m1, 6
+    mova   [dstq+stride3q*1+ 0], m3
+    mova   [dstq+stride3q*1+32], m4
+    lea                    dstq, [dstq+strideq*4]
+    vpalignr                 m3, m5, m0, 8
+    vpalignr                 m4, m2, m1, 8
+    mova   [dstq+strideq*0 + 0], m3
+    mova   [dstq+strideq*0 +32], m4
+    vpalignr                 m3, m5, m0, 10
+    vpalignr                 m4, m2, m1, 10
+    mova   [dstq+strideq*1 + 0], m3
+    mova   [dstq+strideq*1 +32], m4
+    vpalignr                 m3, m5, m0, 12
+    vpalignr                 m4, m2, m1, 12
+    mova   [dstq+strideq*2+ 0], m3
+    mova   [dstq+strideq*2+32], m4
+    vpalignr                 m3, m5, m0, 14
+    vpalignr                 m4, m2, m1, 14
+    mova   [dstq+stride3q+  0], m3
+    mova   [dstq+stride3q+ 32], m4
+    vpalignr                 m3, m5, m0, 16
+    vpalignr                 m4, m2, m1, 16
+    vperm2i128               m5, m3, m4, q0201
+    vperm2i128               m2, m4, m4, q0101
+    mova                     m0, m3
+    mova                     m1, m4
+    lea                    dstq, [dstq+strideq*4]
+    dec                    cntd
+    jg .loop
+    RET
+%endif
+
+%macro DR_FUNCS 1 ; stack_mem_for_32x32_32bit_function
+cglobal vp9_ipred_dr_4x4_16, 4, 4, 3, dst, stride, l, a
+    movh                    m0, [lq]                ; wxyz....
+    movhps                  m0, [aq-2]              ; wxyz*abc
+    movd                    m1, [aq+6]              ; d.......
+    PALIGNR                 m1, m0, 2, m2           ; xyz*abcd
+    psrldq                  m2, m1, 2               ; yz*abcd.
+    LOWPASS                  0, 1, 2                ; XYZ#ABC.
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movh      [dstq+stride3q ], m0
+    psrldq                  m0, 2                   ; YZ#ABC..
+    movh      [dstq+strideq*2], m0
+    psrldq                  m0, 2                   ; Z#ABC...
+    movh      [dstq+strideq*1], m0
+    psrldq                  m0, 2                   ; #ABC....
+    movh      [dstq+strideq*0], m0
+    RET
+
+cglobal vp9_ipred_dr_8x8_16, 4, 4, 5, dst, stride, l, a
+    mova                    m0, [lq]                ; stuvwxyz
+    movu                    m1, [aq-2]              ; *abcdefg
+    mova                    m2, [aq]                ; abcdefgh
+    psrldq                  m3, m2, 2               ; bcdefgh.
+    LOWPASS                  3,  2, 1               ; ABCDEFG.
+    PALIGNR                 m1, m0, 2, m4           ; tuvwxyz*
+    PALIGNR                 m2, m1, 2, m4           ; uvwxyz*a
+    LOWPASS                  2,  1, 0               ; TUVWXYZ#
+    DEFINE_ARGS dst, stride, dst4, stride3
+    lea               stride3q, [strideq*3]
+    lea                  dst4q, [dstq+strideq*4]
+
+    movhps [dstq +stride3q +0], m2
+    movh   [dstq+ stride3q +8], m3
+    mova   [dst4q+stride3q +0], m2
+    PALIGNR                 m1, m3, m2, 2, m0
+    psrldq                  m3, 2
+    movhps [dstq +strideq*2+0], m1
+    movh   [dstq+ strideq*2+8], m3
+    mova   [dst4q+strideq*2+0], m1
+    PALIGNR                 m2, m3, m1, 2, m0
+    psrldq                  m3, 2
+    movhps [dstq +strideq*1+0], m2
+    movh   [dstq+ strideq*1+8], m3
+    mova   [dst4q+strideq*1+0], m2
+    PALIGNR                 m1, m3, m2, 2, m0
+    psrldq                  m3, 2
+    movhps [dstq +strideq*0+0], m1
+    movh   [dstq+ strideq*0+8], m3
+    mova   [dst4q+strideq*0+0], m1
+    RET
+
+cglobal vp9_ipred_dr_16x16_16, 4, 4, 7, dst, stride, l, a
+    mova                    m0, [lq]                ; klmnopqr
+    mova                    m1, [lq+mmsize]         ; stuvwxyz
+    movu                    m2, [aq-2]              ; *abcdefg
+    movu                    m3, [aq+mmsize-2]       ; hijklmno
+    mova                    m4, [aq]                ; abcdefgh
+    mova                    m5, [aq+mmsize]         ; ijklmnop
+    psrldq                  m6, m5, 2               ; jklmnop.
+    LOWPASS                  6,  5, 3               ; IJKLMNO.
+    PALIGNR                 m5, m4, 2, m3           ; bcdefghi
+    LOWPASS                  5,  4, 2               ; ABCDEFGH
+    PALIGNR                 m2, m1, 2, m3           ; tuvwxyz*
+    PALIGNR                 m4, m2, 2, m3           ; uvwxyz*a
+    LOWPASS                  4,  2, 1               ; TUVWXYZ#
+    PALIGNR                 m1, m0, 2, m3           ; lmnopqrs
+    PALIGNR                 m2, m1, 2, m3           ; mnopqrst
+    LOWPASS                  2, 1, 0                ; LMNOPQRS
+    DEFINE_ARGS dst, stride, dst8, cnt
+    lea                  dst8q, [dstq+strideq*8]
+    mov                   cntd, 8
+
+.loop:
+    sub                  dst8q, strideq
+    mova  [dst8q+strideq*0+ 0], m4
+    mova  [dst8q+strideq*0+16], m5
+    mova  [dst8q+strideq*8+ 0], m2
+    mova  [dst8q+strideq*8+16], m4
+%if cpuflag(avx)
+    vpalignr                m2, m4, m2, 2
+    vpalignr                m4, m5, m4, 2
+    vpalignr                m5, m6, m5, 2
+%else
+    PALIGNR                 m0, m4, m2, 2, m1
+    mova                    m2, m0
+    PALIGNR                 m0, m5, m4, 2, m1
+    mova                    m4, m0
+    PALIGNR                 m0, m6, m5, 2, m1
+    mova                    m5, m0
+%endif
+    psrldq                  m6, 2
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dr_32x32_16, 4, 5, 10 + notcpuflag(ssse3), \
+                               %1 * ARCH_X86_32 * -mmsize, dst, stride, l, a
+    mova                    m0, [aq+mmsize*3]       ; a[24-31]
+    movu                    m1, [aq+mmsize*3-2]     ; a[23-30]
+    psrldq                  m2, m0, 2               ; a[25-31].
+    LOWPASS                  2,  0, 1               ; A[24-30].
+    mova                    m1, [aq+mmsize*2]       ; a[16-23]
+    movu                    m3, [aq+mmsize*2-2]     ; a[15-22]
+    PALIGNR                 m0, m1, 2, m4           ; a[17-24]
+    LOWPASS                  0,  1, 3               ; A[16-23]
+    mova                    m3, [aq+mmsize*1]       ; a[8-15]
+    movu                    m4, [aq+mmsize*1-2]     ; a[7-14]
+    PALIGNR                 m1, m3, 2, m5           ; a[9-16]
+    LOWPASS                  1,  3, 4               ; A[8-15]
+    mova                    m4, [aq+mmsize*0]       ; a[0-7]
+    movu                    m5, [aq+mmsize*0-2]     ; *a[0-6]
+    PALIGNR                 m3, m4, 2, m6           ; a[1-8]
+    LOWPASS                  3,  4, 5               ; A[0-7]
+    SCRATCH                  1,  8, rsp+0*mmsize
+    SCRATCH                  3,  9, rsp+1*mmsize
+%if notcpuflag(ssse3)
+    SCRATCH                  0, 10, rsp+2*mmsize
+%endif
+    mova                    m6, [lq+mmsize*3]       ; l[24-31]
+    PALIGNR                 m5, m6, 2, m0           ; l[25-31]*
+    PALIGNR                 m4, m5, 2, m0           ; l[26-31]*a
+    LOWPASS                  4,  5, 6               ; L[25-31]#
+    mova                    m7, [lq+mmsize*2]       ; l[16-23]
+    PALIGNR                 m6, m7, 2, m0           ; l[17-24]
+    PALIGNR                 m5, m6, 2, m0           ; l[18-25]
+    LOWPASS                  5,  6, 7               ; L[17-24]
+    mova                    m1, [lq+mmsize*1]       ; l[8-15]
+    PALIGNR                 m7, m1, 2, m0           ; l[9-16]
+    PALIGNR                 m6, m7, 2, m0           ; l[10-17]
+    LOWPASS                  6,  7, 1               ; L[9-16]
+    mova                    m3, [lq+mmsize*0]       ; l[0-7]
+    PALIGNR                 m1, m3, 2, m0           ; l[1-8]
+    PALIGNR                 m7, m1, 2, m0           ; l[2-9]
+    LOWPASS                  7,  1, 3               ; L[1-8]
+%if cpuflag(ssse3)
+%if cpuflag(avx)
+    UNSCRATCH                1,  8, rsp+0*mmsize
+%endif
+    UNSCRATCH                3,  9, rsp+1*mmsize
+%else
+    UNSCRATCH                0, 10, rsp+2*mmsize
+%endif
+    DEFINE_ARGS dst8, stride, stride8, stride24, cnt
+    lea               stride8q, [strideq*8]
+    lea              stride24q, [stride8q*3]
+    lea                  dst8q, [dst8q+strideq*8]
+    mov                   cntd, 8
+
+.loop:
+    sub                  dst8q, strideq
+%if notcpuflag(avx)
+    UNSCRATCH                1,  8, rsp+0*mmsize
+%if notcpuflag(ssse3)
+    UNSCRATCH                3,  9, rsp+1*mmsize
+%endif
+%endif
+    mova [dst8q+stride8q*0+ 0], m4
+    mova [dst8q+stride8q*0+16], m3
+    mova [dst8q+stride8q*0+32], m1
+    mova [dst8q+stride8q*0+48], m0
+    mova [dst8q+stride8q*1+ 0], m5
+    mova [dst8q+stride8q*1+16], m4
+    mova [dst8q+stride8q*1+32], m3
+    mova [dst8q+stride8q*1+48], m1
+    mova [dst8q+stride8q*2+ 0], m6
+    mova [dst8q+stride8q*2+16], m5
+    mova [dst8q+stride8q*2+32], m4
+    mova [dst8q+stride8q*2+48], m3
+    mova [dst8q+stride24q + 0], m7
+    mova [dst8q+stride24q +16], m6
+    mova [dst8q+stride24q +32], m5
+    mova [dst8q+stride24q +48], m4
+%if cpuflag(avx)
+    vpalignr                m7, m6, m7, 2
+    vpalignr                m6, m5, m6, 2
+    vpalignr                m5, m4, m5, 2
+    vpalignr                m4, m3, m4, 2
+    vpalignr                m3, m1, m3, 2
+    vpalignr                m1, m0, m1, 2
+    vpalignr                m0, m2, m0, 2
+%else
+    SCRATCH                  2,  8, rsp+0*mmsize
+%if notcpuflag(ssse3)
+    SCRATCH                  0,  9, rsp+1*mmsize
+%endif
+    PALIGNR                 m2, m6, m7, 2, m0
+    mova                    m7, m2
+    PALIGNR                 m2, m5, m6, 2, m0
+    mova                    m6, m2
+    PALIGNR                 m2, m4, m5, 2, m0
+    mova                    m5, m2
+    PALIGNR                 m2, m3, m4, 2, m0
+    mova                    m4, m2
+    PALIGNR                 m2, m1, m3, 2, m0
+    mova                    m3, m2
+%if notcpuflag(ssse3)
+    UNSCRATCH                0,  9, rsp+1*mmsize
+    SCRATCH                  3,  9, rsp+1*mmsize
+%endif
+    PALIGNR                 m2, m0, m1, 2, m3
+    mova                    m1, m2
+    UNSCRATCH                2,  8, rsp+0*mmsize
+    SCRATCH                  1,  8, rsp+0*mmsize
+    PALIGNR                 m1, m2, m0, 2, m3
+    mova                    m0, m1
+%endif
+    psrldq                  m2, 2
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DR_FUNCS 3
+INIT_XMM ssse3
+DR_FUNCS 2
+INIT_XMM avx
+DR_FUNCS 2
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_ipred_dr_16x16_16, 4, 5, 6, dst, stride, l, a
+    mova                    m0, [lq]                   ; klmnopqrstuvwxyz
+    movu                    m1, [aq-2]                 ; *abcdefghijklmno
+    mova                    m2, [aq]                   ; abcdefghijklmnop
+    vperm2i128              m4, m2, m2, q2001          ; ijklmnop........
+    vpalignr                m5, m4, m2, 2              ; bcdefghijklmnop.
+    vperm2i128              m3, m0, m1, q0201          ; stuvwxyz*abcdefg
+    LOWPASS                  1,  2,  5                 ; ABCDEFGHIJKLMNO.
+    vpalignr                m4, m3, m0, 2              ; lmnopqrstuvwxyz*
+    vpalignr                m5, m3, m0, 4              ; mnopqrstuvwxyz*a
+    LOWPASS                  0,  4,  5                 ; LMNOPQRSTUVWXYZ#
+    vperm2i128              m5, m0, m1, q0201          ; TUVWXYZ#ABCDEFGH
+    DEFINE_ARGS dst, stride, stride3, stride5, dst3
+    lea                  dst3q, [dstq+strideq*4]
+    lea               stride3q, [strideq*3]
+    lea               stride5q, [stride3q+strideq*2]
+
+    vpalignr                m3, m5, m0, 2
+    vpalignr                m4, m1, m5, 2
+    mova    [dst3q+stride5q*2], m3                     ; 14
+    mova    [ dstq+stride3q*2], m4                     ; 6
+    vpalignr                m3, m5, m0, 4
+    vpalignr                m4, m1, m5, 4
+    sub                  dst3q, strideq
+    mova    [dst3q+stride5q*2], m3                     ; 13
+    mova    [dst3q+strideq*2 ], m4                     ; 5
+    mova    [dst3q+stride3q*4], m0                     ; 15
+    vpalignr                m3, m5, m0, 6
+    vpalignr                m4, m1, m5, 6
+    mova     [dstq+stride3q*4], m3                     ; 12
+    mova     [dst3q+strideq*1], m4                     ; 4
+    vpalignr                m3, m5, m0, 8
+    vpalignr                m4, m1, m5, 8
+    mova     [dst3q+strideq*8], m3                     ; 11
+    mova     [dst3q+strideq*0], m4                     ; 3
+    vpalignr                m3, m5, m0, 10
+    vpalignr                m4, m1, m5, 10
+    mova     [dstq+stride5q*2], m3                     ; 10
+    mova     [dstq+strideq*2 ], m4                     ; 2
+    vpalignr                m3, m5, m0, 12
+    vpalignr                m4, m1, m5, 12
+    mova    [dst3q+stride3q*2], m3                     ; 9
+    mova     [dstq+strideq*1 ], m4                     ; 1
+    vpalignr                m3, m5, m0, 14
+    vpalignr                m4, m1, m5, 14
+    mova      [dstq+strideq*8], m3                     ; 8
+    mova      [dstq+strideq*0], m4                     ; 0
+    mova     [dst3q+strideq*4], m5                     ; 7
+    RET
+
+cglobal vp9_ipred_vl_16x16_16, 4, 5, 7, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]                   ; abcdefghijklmnop
+    vpbroadcastw           xm1, [aq+30]                ; pppppppp
+    vperm2i128              m2, m0, m1, q0201          ; ijklmnoppppppppp
+    vpalignr                m3, m2, m0, 2              ; bcdefghijklmnopp
+    vperm2i128              m4, m3, m1, q0201          ; jklmnopppppppppp
+    vpalignr                m5, m2, m0, 4              ; cdefghijklmnoppp
+    vperm2i128              m6, m5, m1, q0201          ; klmnoppppppppppp
+    LOWPASS                  5,  3,  0                 ; BCDEFGHIJKLMNOPP
+    LOWPASS                  6,  4,  2                 ; JKLMNOPPPPPPPPPP
+    pavgw                   m3, m0                     ; abcdefghijklmnop
+    pavgw                   m4, m2                     ; ijklmnoppppppppp
+    DEFINE_ARGS dst, stride, stride3, stride5, dst4
+    lea                  dst4q, [dstq+strideq*4]
+    lea               stride3q, [strideq*3]
+    lea               stride5q, [stride3q+strideq*2]
+
+    mova      [dstq+strideq*0], m3                     ; 0  abcdefghijklmnop
+    mova      [dstq+strideq*1], m5                     ; 1  BCDEFGHIJKLMNOPP
+    vpalignr                m0, m4, m3, 2
+    vpalignr                m1, m6, m5, 2
+    mova     [dstq+strideq*2 ], m0                     ; 2  bcdefghijklmnopp
+    mova     [dstq+stride3q*1], m1                     ; 3  CDEFGHIJKLMNOPPP
+    vpalignr                m0, m4, m3, 4
+    vpalignr                m1, m6, m5, 4
+    mova     [dst4q+strideq*0], m0                     ; 4  cdefghijklmnoppp
+    mova     [dstq+stride5q*1], m1                     ; 5  DEFGHIJKLMNOPPPP
+    vpalignr                m0, m4, m3, 6
+    vpalignr                m1, m6, m5, 6
+    mova    [ dstq+stride3q*2], m0                     ; 6  defghijklmnopppp
+    mova    [dst4q+stride3q*1], m1                     ; 7  EFGHIJKLMNOPPPPP
+    vpalignr                m0, m4, m3, 8
+    vpalignr                m1, m6, m5, 8
+    mova    [  dstq+strideq*8], m0                     ; 8  efghijklmnoppppp
+    mova    [dst4q+stride5q*1], m1                     ; 9  FGHIJKLMNOPPPPPP
+    vpalignr                m0, m4, m3, 10
+    mova     [dstq+stride5q*2], m0                     ; 10 fghijklmnopppppp
+    vpalignr                m0, m4, m3, 12
+    mova     [dst4q+strideq*8], m0                     ; 12 ghijklmnoppppppp
+    vpalignr                m0, m4, m3, 14
+    mova    [dst4q+stride5q*2], m0                     ; 14 hijklmnopppppppp
+    sub                  dst4q, strideq
+    vpalignr                m1, m6, m5, 10
+    mova     [dst4q+strideq*8], m1                     ; 11 GHIJKLMNOPPPPPPP
+    vpalignr                m1, m6, m5, 12
+    mova    [dst4q+stride5q*2], m1                     ; 13 HIJKLMNOPPPPPPPP
+    vpalignr                m1, m6, m5, 14
+    mova    [dst4q+stride3q*4], m1                     ; 15 IJKLMNOPPPPPPPPP
+    RET
+
+cglobal vp9_ipred_hd_16x16_16, 4, 5, 7, dst, stride, l, a
+    movu                    m0, [aq-2]                 ; *abcdefghijklmno
+    mova                    m1, [lq]                   ; klmnopqrstuvwxyz
+    vperm2i128              m2, m1, m0, q0201          ; stuvwxyz*abcdefg
+    vpalignr                m3, m2, m1, 2              ; lmnopqrstuvwxyz*
+    vpalignr                m4, m2, m1, 4              ; mnopqrstuvwxyz*a
+    LOWPASS                  4,  3,  1                 ; LMNOPQRSTUVWXYZ#
+    pavgw                   m3, m1                     ; klmnopqrstuvwxyz
+    mova                    m1, [aq]                   ; abcdefghijklmnop
+    movu                    m2, [aq+2]                 ; bcdefghijklmnop.
+    LOWPASS                  2,  1,  0                 ; ABCDEFGHIJKLMNO.
+    vpunpcklwd              m0, m3, m4                 ; kLlMmNnOsTtUuVvW
+    vpunpckhwd              m1, m3, m4                 ; oPpQqRrSwXxYyZz#
+    vperm2i128              m3, m1, m0, q0002          ; kLlMmNnOoPpQqRrS
+    vperm2i128              m4, m0, m1, q0301          ; sTtUuVvWwXxYyZz#
+    vperm2i128              m0, m4, m2, q0201          ; wXxYyZz#ABCDEFGH
+    vperm2i128              m1, m3, m4, q0201          ; oPpQqRrSsTtUuVvW
+    DEFINE_ARGS dst, stride, stride3, stride5, dst5
+    lea               stride3q, [strideq*3]
+    lea               stride5q, [stride3q+strideq*2]
+    lea                  dst5q, [dstq+stride5q]
+
+    mova    [dst5q+stride5q*2], m3                     ; 15 kLlMmNnOoPpQqRrS
+    mova    [dst5q+stride3q*2], m1                     ; 11 oPpQqRrSsTtUuVvW
+    mova     [dst5q+strideq*2], m4                     ; 7  sTtUuVvWwXxYyZz#
+    mova     [dstq+stride3q*1], m0                     ; 3  wXxYyZz#ABCDEFGH
+    vpalignr                m5, m4, m1, 4
+    mova     [dstq+stride5q*2], m5                     ; 10 pQqRrSsTtUuVvWwX
+    vpalignr                m5, m0, m4, 4
+    vpalignr                m6, m2, m0, 4
+    mova     [dstq+stride3q*2], m5                     ; 6  tUuVvWwXxYyZz#AB
+    mova      [dstq+strideq*2], m6                     ; 2  xYyZz#ABCDEFGHIJ
+    vpalignr                m5, m4, m1, 8
+    mova     [dst5q+strideq*4], m5                     ; 9  qRrSsTtUuVvWwXxY
+    vpalignr                m5, m0, m4, 8
+    vpalignr                m6, m2, m0, 8
+    mova     [dstq+stride5q*1], m5                     ; 5  uVvWwXxYyZz#ABCD
+    mova      [dstq+strideq*1], m6                     ; 1  yZz#ABCDEFGHIJKL
+    vpalignr                m5, m1, m3, 12
+    vpalignr                m6, m4, m1, 12
+    mova     [dstq+stride3q*4], m5                     ; 12 nOoPpQqRrSsTtUuV
+    mova      [dst5q+stride3q], m6                     ; 8  rSsTtUuVvWwXxYyZ
+    vpalignr                m5, m0, m4, 12
+    vpalignr                m6, m2, m0, 12
+    mova      [dstq+strideq*4], m5                     ; 4  nOoPpQqRrSsTtUuV
+    mova      [dstq+strideq*0], m6                     ; 0  z#ABCDEFGHIJKLMN
+    sub                  dst5q, strideq
+    vpalignr                m5, m1, m3, 4
+    mova    [dst5q+stride5q*2], m5                     ; 14 lMmNnOoPpQqRrSsT
+    sub                  dst5q, strideq
+    vpalignr                m5, m1, m3, 8
+    mova    [dst5q+stride5q*2], m5                    ; 13 mNnOoPpQqRrSsTtU
+    RET
+
+%if ARCH_X86_64
+cglobal vp9_ipred_dr_32x32_16, 4, 7, 10, dst, stride, l, a
+    mova                    m0, [lq+mmsize*0+0]        ; l[0-15]
+    mova                    m1, [lq+mmsize*1+0]        ; l[16-31]
+    movu                    m2, [aq+mmsize*0-2]        ; *abcdefghijklmno
+    mova                    m3, [aq+mmsize*0+0]        ; abcdefghijklmnop
+    mova                    m4, [aq+mmsize*1+0]        ; qrstuvwxyz012345
+    vperm2i128              m5, m0, m1, q0201          ; lmnopqrstuvwxyz0
+    vpalignr                m6, m5, m0, 2              ; mnopqrstuvwxyz01
+    vpalignr                m7, m5, m0, 4              ; nopqrstuvwxyz012
+    LOWPASS                  0,  6,  7                 ; L[0-15]
+    vperm2i128              m7, m1, m2, q0201          ; stuvwxyz*abcdefg
+    vpalignr                m5, m7, m1, 2              ; lmnopqrstuvwxyz*
+    vpalignr                m6, m7, m1, 4              ; mnopqrstuvwxyz*a
+    LOWPASS                  1,  5,  6                 ; L[16-31]#
+    vperm2i128              m5, m3, m4, q0201          ; ijklmnopqrstuvwx
+    vpalignr                m6, m5, m3, 2              ; bcdefghijklmnopq
+    LOWPASS                  2,  3,  6                 ; A[0-15]
+    movu                    m3, [aq+mmsize*1-2]        ; pqrstuvwxyz01234
+    vperm2i128              m6, m4, m4, q2001          ; yz012345........
+    vpalignr                m7, m6, m4, 2              ; rstuvwxyz012345.
+    LOWPASS                  3,  4,  7                 ; A[16-31].
+    vperm2i128              m4, m1, m2, q0201          ; TUVWXYZ#ABCDEFGH
+    vperm2i128              m5, m0, m1, q0201          ; L[7-15]L[16-23]
+    vperm2i128              m8, m2, m3, q0201          ; IJKLMNOPQRSTUVWX
+    DEFINE_ARGS dst8, stride, stride3, stride7, stride5, dst24, cnt
+    lea               stride3q, [strideq*3]
+    lea               stride5q, [stride3q+strideq*2]
+    lea               stride7q, [strideq*4+stride3q]
+    lea                 dst24q, [dst8q+stride3q*8]
+    lea                  dst8q, [dst8q+strideq*8]
+    mov                   cntd, 2
+
+.loop:
+    mova  [dst24q+stride7q+0 ], m0                     ; 31 23 15 7
+    mova  [dst24q+stride7q+32], m1
+    mova    [dst8q+stride7q+0], m1
+    mova   [dst8q+stride7q+32], m2
+    vpalignr                m6, m4, m1, 2
+    vpalignr                m7, m5, m0, 2
+    vpalignr                m9, m8, m2, 2
+    mova [dst24q+stride3q*2+0], m7                     ; 30 22 14 6
+    mova [dst24q+stride3q*2+32], m6
+    mova  [dst8q+stride3q*2+0], m6
+    mova [dst8q+stride3q*2+32], m9
+    vpalignr                m6, m4, m1, 4
+    vpalignr                m7, m5, m0, 4
+    vpalignr                m9, m8, m2, 4
+    mova   [dst24q+stride5q+0], m7                     ; 29 21 13 5
+    mova  [dst24q+stride5q+32], m6
+    mova    [dst8q+stride5q+0], m6
+    mova   [dst8q+stride5q+32], m9
+    vpalignr                m6, m4, m1, 6
+    vpalignr                m7, m5, m0, 6
+    vpalignr                m9, m8, m2, 6
+    mova [dst24q+strideq*4+0 ], m7                     ; 28 20 12 4
+    mova [dst24q+strideq*4+32], m6
+    mova   [dst8q+strideq*4+0], m6
+    mova  [dst8q+strideq*4+32], m9
+    vpalignr                m6, m4, m1, 8
+    vpalignr                m7, m5, m0, 8
+    vpalignr                m9, m8, m2, 8
+    mova  [dst24q+stride3q+0 ], m7                     ; 27 19 11 3
+    mova  [dst24q+stride3q+32], m6
+    mova    [dst8q+stride3q+0], m6
+    mova   [dst8q+stride3q+32], m9
+    vpalignr                m6, m4, m1, 10
+    vpalignr                m7, m5, m0, 10
+    vpalignr                m9, m8, m2, 10
+    mova [dst24q+strideq*2+0 ], m7                     ; 26 18 10 2
+    mova [dst24q+strideq*2+32], m6
+    mova   [dst8q+strideq*2+0], m6
+    mova  [dst8q+strideq*2+32], m9
+    vpalignr                m6, m4, m1, 12
+    vpalignr                m7, m5, m0, 12
+    vpalignr                m9, m8, m2, 12
+    mova   [dst24q+strideq+0 ], m7                     ; 25 17 9 1
+    mova   [dst24q+strideq+32], m6
+    mova     [dst8q+strideq+0], m6
+    mova    [dst8q+strideq+32], m9
+    vpalignr                m6, m4, m1, 14
+    vpalignr                m7, m5, m0, 14
+    vpalignr                m9, m8, m2, 14
+    mova [dst24q+strideq*0+0 ], m7                     ; 24 16 8 0
+    mova [dst24q+strideq*0+32], m6
+    mova   [dst8q+strideq*0+0], m6
+    mova  [dst8q+strideq*0+32], m9
+    mova                    m0, m5
+    mova                    m5, m1
+    mova                    m1, m4
+    mova                    m4, m2
+    mova                    m2, m8
+    mova                    m8, m3
+    sub                 dst24q, stride7q
+    sub                 dst24q, strideq
+    sub                  dst8q, stride7q
+    sub                  dst8q, strideq
+    dec                   cntd
+    jg .loop
+    RET
+%endif
+%endif
+
+%macro VL_FUNCS 1 ; stack_mem_for_32x32_32bit_function
+cglobal vp9_ipred_vl_4x4_16, 2, 4, 3, dst, stride, l, a
+    movifnidn               aq, amp
+    movu                    m0, [aq]                ; abcdefgh
+    psrldq                  m1, m0, 2               ; bcdefgh.
+    psrldq                  m2, m0, 4               ; cdefgh..
+    LOWPASS                  2,  1, 0               ; BCDEFGH.
+    pavgw                   m1, m0                  ; ABCDEFG.
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movh      [dstq+strideq*0], m1
+    movh      [dstq+strideq*1], m2
+    psrldq                  m1, 2
+    psrldq                  m2, 2
+    movh      [dstq+strideq*2], m1
+    movh      [dstq+stride3q ], m2
+    RET
+
+cglobal vp9_ipred_vl_8x8_16, 2, 4, 4, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]                ; abcdefgh
+%if cpuflag(ssse3)
+    mova                    m3, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m1, m2, m0, m3          ; bcdefghh/cdefghhh
+    LOWPASS                  2,  1, 0               ; BCDEFGHh
+    pavgw                   m1, m0                  ; ABCDEFGh
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    mova      [dstq+strideq*0], m1
+    mova      [dstq+strideq*1], m2
+    SHIFT_RIGHT             m1, m1, m3
+    SHIFT_RIGHT             m2, m2, m3
+    mova      [dstq+strideq*2], m1
+    mova      [dstq+stride3q ], m2
+    lea                   dstq, [dstq+strideq*4]
+    SHIFT_RIGHT             m1, m1, m3
+    SHIFT_RIGHT             m2, m2, m3
+    mova      [dstq+strideq*0], m1
+    mova      [dstq+strideq*1], m2
+    SHIFT_RIGHT             m1, m1, m3
+    SHIFT_RIGHT             m2, m2, m3
+    mova      [dstq+strideq*2], m1
+    mova      [dstq+stride3q ], m2
+    RET
+
+cglobal vp9_ipred_vl_16x16_16, 2, 4, 6, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]
+    mova                    m1, [aq+mmsize]
+    PALIGNR                 m2, m1, m0, 2, m3
+    PALIGNR                 m3, m1, m0, 4, m4
+    LOWPASS                  3,  2,  0
+    pavgw                   m2, m0
+%if cpuflag(ssse3)
+    mova                    m4, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m5, m0, m1, m4
+    LOWPASS                  0,  5,  1
+    pavgw                   m1, m5
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 8
+
+.loop:
+    mova   [dstq+strideq*0+ 0], m2
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*1+ 0], m3
+    mova   [dstq+strideq*1+16], m0
+    lea                   dstq, [dstq+strideq*2]
+%if cpuflag(avx)
+    vpalignr                m2, m1, m2, 2
+    vpalignr                m3, m0, m3, 2
+%else
+    PALIGNR                 m5, m1, m2, 2, m4
+    mova                    m2, m5
+    PALIGNR                 m5, m0, m3, 2, m4
+    mova                    m3, m5
+%endif
+    SHIFT_RIGHT             m1, m1, m4
+    SHIFT_RIGHT             m0, m0, m4
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_vl_32x32_16, 2, 5, 11, %1 * mmsize * ARCH_X86_32, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq+mmsize*0]
+    mova                    m1, [aq+mmsize*1]
+    mova                    m2, [aq+mmsize*2]
+    PALIGNR                 m6, m1, m0, 2, m5
+    PALIGNR                 m7, m1, m0, 4, m5
+    LOWPASS                  7,  6,  0
+    pavgw                   m6, m0
+    SCRATCH                  6,  8, rsp+0*mmsize
+    PALIGNR                 m4, m2, m1, 2, m0
+    PALIGNR                 m5, m2, m1, 4, m0
+    LOWPASS                  5,  4,  1
+    pavgw                   m4, m1
+    mova                    m0, [aq+mmsize*3]
+    PALIGNR                 m1, m0, m2, 2, m6
+    PALIGNR                 m3, m0, m2, 4, m6
+    LOWPASS                  3,  1,  2
+    pavgw                   m2, m1
+%if cpuflag(ssse3)
+    PRELOAD                 10, pb_2to15_14_15, shuf
+%endif
+    SHIFT_RIGHTx2           m6, m1, m0, reg_shuf
+    LOWPASS                  1,  6,  0
+    pavgw                   m0, m6
+%if ARCH_X86_64
+    pshufd                  m9, m6, q3333
+%endif
+%if cpuflag(avx)
+    UNSCRATCH                6,  8, rsp+0*mmsize
+%endif
+    DEFINE_ARGS dst, stride, cnt, stride16, stride17
+    mov              stride16q, strideq
+    mov                   cntd, 8
+    shl              stride16q, 4
+    lea              stride17q, [stride16q+strideq]
+
+    ; FIXME m8 is unused for avx, so we could save one register here for win64
+.loop:
+%if notcpuflag(avx)
+    UNSCRATCH                6,  8, rsp+0*mmsize
+%endif
+    mova   [dstq+strideq*0+ 0], m6
+    mova   [dstq+strideq*0+16], m4
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+ 0], m7
+    mova   [dstq+strideq*1+16], m5
+    mova   [dstq+strideq*1+32], m3
+    mova   [dstq+strideq*1+48], m1
+    mova   [dstq+stride16q+ 0], m4
+    mova   [dstq+stride16q+16], m2
+    mova   [dstq+stride16q+32], m0
+%if ARCH_X86_64
+    mova   [dstq+stride16q+48], m9
+%endif
+    mova   [dstq+stride17q+ 0], m5
+    mova   [dstq+stride17q+16], m3
+    mova   [dstq+stride17q+32], m1
+%if ARCH_X86_64
+    mova   [dstq+stride17q+48], m9
+%endif
+    lea                   dstq, [dstq+strideq*2]
+%if cpuflag(avx)
+    vpalignr                m6, m4, m6, 2
+    vpalignr                m4, m2, m4, 2
+    vpalignr                m2, m0, m2, 2
+    vpalignr                m7, m5, m7, 2
+    vpalignr                m5, m3, m5, 2
+    vpalignr                m3, m1, m3, 2
+%else
+    SCRATCH                  3,  8, rsp+0*mmsize
+%if notcpuflag(ssse3)
+    SCRATCH                  1, 10, rsp+1*mmsize
+%endif
+    PALIGNR                 m3, m4, m6, 2, m1
+    mova                    m6, m3
+    PALIGNR                 m3, m2, m4, 2, m1
+    mova                    m4, m3
+    PALIGNR                 m3, m0, m2, 2, m1
+    mova                    m2, m3
+    PALIGNR                 m3, m5, m7, 2, m1
+    mova                    m7, m3
+    UNSCRATCH                3,  8, rsp+0*mmsize
+    SCRATCH                  6,  8, rsp+0*mmsize
+%if notcpuflag(ssse3)
+    UNSCRATCH                1, 10, rsp+1*mmsize
+    SCRATCH                  7, 10, rsp+1*mmsize
+%endif
+    PALIGNR                 m6, m3, m5, 2, m7
+    mova                    m5, m6
+    PALIGNR                 m6, m1, m3, 2, m7
+    mova                    m3, m6
+%if notcpuflag(ssse3)
+    UNSCRATCH                7, 10, rsp+1*mmsize
+%endif
+%endif
+    SHIFT_RIGHT             m1, m1, reg_shuf
+    SHIFT_RIGHT             m0, m0, reg_shuf
+    dec                   cntd
+    jg .loop
+
+%if ARCH_X86_32
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+%assign %%n 0
+%rep 4
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+48], m0
+    mova   [dstq+strideq*2+48], m0
+    mova   [dstq+stride3q +48], m0
+%if %%n < 3
+    lea                   dstq, [dstq+strideq*4]
+%endif
+%assign %%n (%%n+1)
+%endrep
+%endif
+    RET
+%endmacro
+
+INIT_XMM sse2
+VL_FUNCS 2
+INIT_XMM ssse3
+VL_FUNCS 1
+INIT_XMM avx
+VL_FUNCS 1
+
+%macro VR_FUNCS 0
+cglobal vp9_ipred_vr_4x4_16, 4, 4, 3, dst, stride, l, a
+    movu                    m0, [aq-2]
+    movhps                  m1, [lq]
+    PALIGNR                 m0, m1, 10, m2          ; xyz*abcd
+    pslldq                  m1, m0, 2               ; .xyz*abc
+    pslldq                  m2, m0, 4               ; ..xyz*ab
+    LOWPASS                  2,  1, 0               ; ..YZ#ABC
+    pavgw                   m1, m0                  ; ....#ABC
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movhps    [dstq+strideq*0], m1
+    movhps    [dstq+strideq*1], m2
+    shufps                  m0, m2, m1, q3210
+%if cpuflag(ssse3)
+    pshufb                  m2, [pb_4_5_8to13_8x0]
+%else
+    pshuflw                 m2, m2, q2222
+    psrldq                  m2, 6
+%endif
+    psrldq                  m0, 6
+    movh      [dstq+strideq*2], m0
+    movh      [dstq+stride3q ], m2
+    RET
+
+cglobal vp9_ipred_vr_8x8_16, 4, 4, 5, dst, stride, l, a
+    movu                    m1, [aq-2]              ; *abcdefg
+    movu                    m2, [lq]                ; stuvwxyz
+    mova                    m0, [aq]                ; abcdefgh
+    PALIGNR                 m3, m1, m2, 14, m4      ; z*abcdef
+    LOWPASS                  3,  1,  0
+    pavgw                   m0, m1
+    PALIGNR                 m1, m2,  2, m4          ; tuvwxyz*
+    pslldq                  m4, m2,  2              ; .stuvwxy
+    LOWPASS                  4,  2,  1
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m3
+    PALIGNR                 m0, m4, 14, m1
+    pslldq                  m4, 2
+    PALIGNR                 m3, m4, 14, m1
+    pslldq                  m4, 2
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    PALIGNR                 m0, m4, 14, m1
+    pslldq                  m4, 2
+    PALIGNR                 m3, m4, 14, m1
+    pslldq                  m4, 2
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m3
+    PALIGNR                 m0, m4, 14, m1
+    pslldq                  m4, 2
+    PALIGNR                 m3, m4, 14, m4
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m3
+    RET
+
+cglobal vp9_ipred_vr_16x16_16, 4, 4, 8, dst, stride, l, a
+    movu                    m1, [aq-2]              ; *abcdefg
+    movu                    m2, [aq+mmsize-2]       ; hijklmno
+    mova                    m3, [aq]                ; abcdefgh
+    mova                    m4, [aq+mmsize]         ; ijklmnop
+    mova                    m5, [lq+mmsize]         ; stuvwxyz
+    PALIGNR                 m0, m1, m5, 14, m6      ; z*abcdef
+    movu                    m6, [aq+mmsize-4]       ; ghijklmn
+    LOWPASS                  6,  2,  4
+    pavgw                   m2, m4
+    LOWPASS                  0,  1,  3
+    pavgw                   m3, m1
+    PALIGNR                 m1, m5,  2, m7          ; tuvwxyz*
+    movu                    m7, [lq+mmsize-2]       ; rstuvwxy
+    LOWPASS                  1,  5,  7
+    movu                    m5, [lq+2]              ; lmnopqrs
+    pslldq                  m4, m5,  2              ; .lmnopqr
+    pslldq                  m7, m5,  4              ; ..lmnopq
+    LOWPASS                  5,  4,  7
+    psrld                   m4, m1, 16
+    psrld                   m7, m5, 16
+    pand                    m1, [pd_65535]
+    pand                    m5, [pd_65535]
+    packssdw                m7, m4
+    packssdw                m5, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 8
+
+.loop:
+    mova   [dstq+strideq*0+ 0], m3
+    mova   [dstq+strideq*0+16], m2
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m6
+    lea                   dstq, [dstq+strideq*2]
+    PALIGNR                 m2, m3, 14, m4
+    PALIGNR                 m3, m7, 14, m4
+    pslldq                  m7, 2
+    PALIGNR                 m6, m0, 14, m4
+    PALIGNR                 m0, m5, 14, m4
+    pslldq                  m5, 2
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_vr_32x32_16, 4, 5, 14, 6 * mmsize * ARCH_X86_32, dst, stride, l, a
+    movu                    m0, [aq+mmsize*0-2]     ; *a[0-6]
+    movu                    m1, [aq+mmsize*1-2]     ; a[7-14]
+    movu                    m2, [aq+mmsize*2-2]     ; a[15-22]
+    movu                    m3, [aq+mmsize*3-2]     ; a[23-30]
+    mova                    m4, [aq+mmsize*3+0]     ; a[24-31]
+    movu                    m5, [aq+mmsize*3-4]     ; a[22-29]
+    LOWPASS                  5,  3,  4              ; A[23-30]
+    SCRATCH                  5,  8, rsp+0*mmsize
+    pavgw                   m3, m4
+    mova                    m4, [aq+mmsize*2+0]     ; a[16-23]
+    movu                    m6, [aq+mmsize*2-4]     ; a[14-21]
+    LOWPASS                  6,  2,  4              ; A[15-22]
+    SCRATCH                  6,  9, rsp+1*mmsize
+    pavgw                   m2, m4
+    mova                    m4, [aq+mmsize*1+0]     ; a[8-15]
+    movu                    m7, [aq+mmsize*1-4]     ; a[6-13]
+    LOWPASS                  7,  1,  4              ; A[7-14]
+    SCRATCH                  7, 10, rsp+2*mmsize
+    pavgw                   m1, m4
+    mova                    m4, [aq+mmsize*0+0]     ; a[0-7]
+    mova                    m5, [lq+mmsize*3+0]     ; l[24-31]
+    PALIGNR                 m6, m0, m5, 14, m7      ; l[31]*a[0-5]
+    LOWPASS                  6,  0,  4              ; #A[0-6]
+    SCRATCH                  6, 11, rsp+3*mmsize
+    pavgw                   m4, m0
+    PALIGNR                 m0, m5,  2, m7          ; l[25-31]*
+    movu                    m7, [lq+mmsize*3-2]     ; l[23-30]
+    LOWPASS                  0,  5,  7              ; L[24-31]
+    movu                    m5, [lq+mmsize*2-2]     ; l[15-22]
+    mova                    m7, [lq+mmsize*2+0]     ; l[16-23]
+    movu                    m6, [lq+mmsize*2+2]     ; l[17-24]
+    LOWPASS                  5,  7,  6              ; L[16-23]
+    psrld                   m7, m0, 16
+    psrld                   m6, m5, 16
+    pand                    m0, [pd_65535]
+    pand                    m5, [pd_65535]
+    packssdw                m6, m7
+    packssdw                m5, m0
+    SCRATCH                  5, 12, rsp+4*mmsize
+    SCRATCH                  6, 13, rsp+5*mmsize
+    movu                    m6, [lq+mmsize*1-2]     ; l[7-14]
+    mova                    m0, [lq+mmsize*1+0]     ; l[8-15]
+    movu                    m5, [lq+mmsize*1+2]     ; l[9-16]
+    LOWPASS                  6,  0,  5              ; L[8-15]
+    movu                    m0, [lq+mmsize*0+2]     ; l[1-8]
+    pslldq                  m5, m0,  2              ; .l[1-7]
+    pslldq                  m7, m0,  4              ; ..l[1-6]
+    LOWPASS                  0,  5,  7
+    psrld                   m5, m6, 16
+    psrld                   m7, m0, 16
+    pand                    m6, [pd_65535]
+    pand                    m0, [pd_65535]
+    packssdw                m7, m5
+    packssdw                m0, m6
+    UNSCRATCH                6, 13, rsp+5*mmsize
+    DEFINE_ARGS dst, stride, stride16, cnt, stride17
+    mov              stride16q, strideq
+    mov                   cntd, 8
+    shl              stride16q, 4
+%if ARCH_X86_64
+    lea              stride17q, [stride16q+strideq]
+%endif
+
+.loop:
+    mova   [dstq+strideq*0+ 0], m4
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m3
+%if ARCH_X86_64
+    mova   [dstq+strideq*1+ 0], m11
+    mova   [dstq+strideq*1+16], m10
+    mova   [dstq+strideq*1+32], m9
+    mova   [dstq+strideq*1+48], m8
+%endif
+    mova   [dstq+stride16q+ 0], m6
+    mova   [dstq+stride16q+16], m4
+    mova   [dstq+stride16q+32], m1
+    mova   [dstq+stride16q+48], m2
+%if ARCH_X86_64
+    mova   [dstq+stride17q+ 0], m12
+    mova   [dstq+stride17q+16], m11
+    mova   [dstq+stride17q+32], m10
+    mova   [dstq+stride17q+48], m9
+%endif
+    lea                   dstq, [dstq+strideq*2]
+    PALIGNR                 m3, m2,  14, m5
+    PALIGNR                 m2, m1,  14, m5
+    PALIGNR                 m1, m4,  14, m5
+    PALIGNR                 m4, m6,  14, m5
+    PALIGNR                 m6, m7,  14, m5
+    pslldq                  m7, 2
+%if ARCH_X86_64
+    PALIGNR                 m8, m9,  14, m5
+    PALIGNR                 m9, m10, 14, m5
+    PALIGNR                m10, m11, 14, m5
+    PALIGNR                m11, m12, 14, m5
+    PALIGNR                m12, m0,  14, m5
+    pslldq                  m0, 2
+%endif
+    dec                   cntd
+    jg .loop
+
+%if ARCH_X86_32
+    UNSCRATCH                5, 12, rsp+4*mmsize
+    UNSCRATCH                4, 11, rsp+3*mmsize
+    UNSCRATCH                3, 10, rsp+2*mmsize
+    UNSCRATCH                2,  9, rsp+1*mmsize
+    UNSCRATCH                1,  8, rsp+0*mmsize
+    mov                   dstq, dstm
+    mov                   cntd, 8
+    add                   dstq, strideq
+.loop2:
+    mova   [dstq+strideq*0+ 0], m4
+    mova   [dstq+strideq*0+16], m3
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m1
+    mova   [dstq+stride16q+ 0], m5
+    mova   [dstq+stride16q+16], m4
+    mova   [dstq+stride16q+32], m3
+    mova   [dstq+stride16q+48], m2
+    lea                   dstq, [dstq+strideq*2]
+    PALIGNR                 m1, m2,  14, m6
+    PALIGNR                 m2, m3,  14, m6
+    PALIGNR                 m3, m4,  14, m6
+    PALIGNR                 m4, m5,  14, m6
+    PALIGNR                 m5, m0,  14, m6
+    pslldq                  m0, 2
+    dec                   cntd
+    jg .loop2
+%endif
+    RET
+%endmacro
+
+INIT_XMM sse2
+VR_FUNCS
+INIT_XMM ssse3
+VR_FUNCS
+INIT_XMM avx
+VR_FUNCS
+
+%macro HU_FUNCS 1 ; stack_mem_for_32x32_32bit_function
+cglobal vp9_ipred_hu_4x4_16, 3, 3, 3, dst, stride, l, a
+    movh                    m0, [lq]                ; abcd
+%if cpuflag(ssse3)
+    pshufb                  m0, [pb_0to7_67x4]      ; abcddddd
+%else
+    punpcklqdq              m0, m0
+    pshufhw                 m0, m0, q3333           ; abcddddd
+%endif
+    psrldq                  m1, m0,  2              ; bcddddd.
+    psrldq                  m2, m0,  4              ; cddddd..
+    LOWPASS                  2,  1,  0              ; BCDddd..
+    pavgw                   m1, m0                  ; abcddddd
+    SBUTTERFLY          wd,  1,  2,  0              ; aBbCcDdd, dddddddd
+    PALIGNR                 m2, m1,  4, m0          ; bCcDdddd
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movh      [dstq+strideq*0], m1                  ; aBbC
+    movh      [dstq+strideq*1], m2                  ; bCcD
+    movhps    [dstq+strideq*2], m1                  ; cDdd
+    movhps    [dstq+stride3q ], m2                  ; dddd
+    RET
+
+cglobal vp9_ipred_hu_8x8_16, 3, 3, 4, dst, stride, l, a
+    mova                    m0, [lq]
+%if cpuflag(ssse3)
+    mova                    m3, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m1, m2, m0, m3
+    LOWPASS                  2,  1,  0
+    pavgw                   m1, m0
+    SBUTTERFLY          wd,  1,  2,  0
+    shufps                  m0, m1, m2, q1032
+    pshufd                  m3, m2, q3332
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    mova     [dstq+strideq *0], m1
+    mova     [dstq+strideq *2], m0
+    mova     [dstq+strideq *4], m2
+    mova     [dstq+stride3q*2], m3
+    add                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m1, m2, m1, 4
+%else
+    PALIGNR                 m0, m2, m1, 4, m3
+    mova                    m1, m0
+%endif
+    pshufd                  m2, m2, q3321
+    shufps                  m0, m1, m2, q1032
+    pshufd                  m3, m2, q3332
+    mova     [dstq+strideq *0], m1
+    mova     [dstq+strideq *2], m0
+    mova     [dstq+strideq *4], m2
+    mova     [dstq+stride3q*2], m3
+    RET
+
+cglobal vp9_ipred_hu_16x16_16, 3, 4, 6 + notcpuflag(ssse3), dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m3, [lq+mmsize]
+    movu                    m1, [lq+2]
+    movu                    m2, [lq+4]
+    LOWPASS                  2,  1,  0
+    pavgw                   m1, m0
+    SBUTTERFLY           wd, 1,  2,  0
+%if cpuflag(ssse3)
+    mova                    m5, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m0, m4, m3, m5
+    LOWPASS                  4,  0,  3
+    pavgw                   m3, m0
+    SBUTTERFLY           wd, 3,  4,  5
+    pshufd                  m0, m0, q3333
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+
+.loop:
+    mova  [dstq+strideq *0+ 0], m1
+    mova  [dstq+strideq *0+16], m2
+    mova  [dstq+strideq *4+ 0], m2
+    mova  [dstq+strideq *4+16], m3
+    mova  [dstq+strideq *8+ 0], m3
+    mova  [dstq+strideq *8+16], m4
+    mova  [dstq+stride3q*4+ 0], m4
+    mova  [dstq+stride3q*4+16], m0
+    add                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m1, m2, m1, 4
+    vpalignr                m2, m3, m2, 4
+    vpalignr                m3, m4, m3, 4
+    vpalignr                m4, m0, m4, 4
+%else
+    PALIGNR                 m5, m2, m1, 4, m6
+    mova                    m1, m5
+    PALIGNR                 m5, m3, m2, 4, m6
+    mova                    m2, m5
+    PALIGNR                 m5, m4, m3, 4, m6
+    mova                    m3, m5
+    PALIGNR                 m5, m0, m4, 4, m6
+    mova                    m4, m5
+%endif
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_hu_32x32_16, 3, 7, 10 + notcpuflag(ssse3), \
+                               %1 * -mmsize * ARCH_X86_32, dst, stride, l, a
+    mova                    m2, [lq+mmsize*0+0]
+    movu                    m1, [lq+mmsize*0+2]
+    movu                    m0, [lq+mmsize*0+4]
+    LOWPASS                  0,  1,  2
+    pavgw                   m1, m2
+    SBUTTERFLY           wd, 1,  0,  2
+    SCRATCH                  1,  8, rsp+0*mmsize
+    mova                    m4, [lq+mmsize*1+0]
+    movu                    m3, [lq+mmsize*1+2]
+    movu                    m2, [lq+mmsize*1+4]
+    LOWPASS                  2,  3,  4
+    pavgw                   m3, m4
+    SBUTTERFLY           wd, 3,  2,  4
+    mova                    m6, [lq+mmsize*2+0]
+    movu                    m5, [lq+mmsize*2+2]
+    movu                    m4, [lq+mmsize*2+4]
+    LOWPASS                  4,  5,  6
+    pavgw                   m5, m6
+    SBUTTERFLY           wd, 5,  4,  6
+    mova                    m7, [lq+mmsize*3+0]
+    SCRATCH                  0,  9, rsp+1*mmsize
+%if cpuflag(ssse3)
+    mova                    m0, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m1, m6, m7, m0
+    LOWPASS                  6,  1,  7
+    pavgw                   m7, m1
+    SBUTTERFLY           wd, 7,  6,  0
+    pshufd                  m1, m1, q3333
+    UNSCRATCH                0,  9, rsp+1*mmsize
+    DEFINE_ARGS dst, stride, cnt, stride3, stride4, stride20, stride28
+    lea               stride3q, [strideq*3]
+    lea               stride4q, [strideq*4]
+    lea              stride28q, [stride4q*8]
+    lea              stride20q, [stride4q*5]
+    sub              stride28q, stride4q
+    mov                   cntd, 4
+
+.loop:
+%if ARCH_X86_64
+    SWAP                     1,  8
+%else
+    mova        [rsp+1*mmsize], m1
+    mova                    m1, [rsp+0*mmsize]
+%endif
+    mova  [dstq+strideq *0+ 0], m1
+    mova  [dstq+strideq *0+16], m0
+    mova  [dstq+strideq *0+32], m3
+    mova  [dstq+strideq *0+48], m2
+    mova  [dstq+stride4q*1+ 0], m0
+    mova  [dstq+stride4q*1+16], m3
+    mova  [dstq+stride4q*1+32], m2
+    mova  [dstq+stride4q*1+48], m5
+    mova  [dstq+stride4q*2+ 0], m3
+    mova  [dstq+stride4q*2+16], m2
+    mova  [dstq+stride4q*2+32], m5
+    mova  [dstq+stride4q*2+48], m4
+%if cpuflag(avx)
+    vpalignr                m1, m0, m1, 4
+    vpalignr                m0, m3, m0, 4
+    vpalignr                m3, m2, m3, 4
+%else
+    SCRATCH                  6,  9, rsp+2*mmsize
+%if notcpuflag(ssse3)
+    SCRATCH                  7, 10, rsp+3*mmsize
+%endif
+    PALIGNR                 m6, m0, m1, 4, m7
+    mova                    m1, m6
+    PALIGNR                 m6, m3, m0, 4, m7
+    mova                    m0, m6
+    PALIGNR                 m6, m2, m3, 4, m7
+    mova                    m3, m6
+    UNSCRATCH                6,  9, rsp+2*mmsize
+    SCRATCH                  0,  9, rsp+2*mmsize
+%if notcpuflag(ssse3)
+    UNSCRATCH                7, 10, rsp+3*mmsize
+    SCRATCH                  3, 10, rsp+3*mmsize
+%endif
+%endif
+%if ARCH_X86_64
+    SWAP                     1,  8
+%else
+    mova        [rsp+0*mmsize], m1
+    mova                    m1, [rsp+1*mmsize]
+%endif
+    mova  [dstq+stride3q*4+ 0], m2
+    mova  [dstq+stride3q*4+16], m5
+    mova  [dstq+stride3q*4+32], m4
+    mova  [dstq+stride3q*4+48], m7
+    mova  [dstq+stride4q*4+ 0], m5
+    mova  [dstq+stride4q*4+16], m4
+    mova  [dstq+stride4q*4+32], m7
+    mova  [dstq+stride4q*4+48], m6
+    mova  [dstq+stride20q + 0], m4
+    mova  [dstq+stride20q +16], m7
+    mova  [dstq+stride20q +32], m6
+    mova  [dstq+stride20q +48], m1
+    mova  [dstq+stride3q*8+ 0], m7
+    mova  [dstq+stride3q*8+16], m6
+    mova  [dstq+stride3q*8+32], m1
+    mova  [dstq+stride3q*8+48], m1
+    mova  [dstq+stride28q + 0], m6
+    mova  [dstq+stride28q +16], m1
+    mova  [dstq+stride28q +32], m1
+    mova  [dstq+stride28q +48], m1
+%if cpuflag(avx)
+    vpalignr                m2, m5, m2, 4
+    vpalignr                m5, m4, m5, 4
+    vpalignr                m4, m7, m4, 4
+    vpalignr                m7, m6, m7, 4
+    vpalignr                m6, m1, m6, 4
+%else
+    PALIGNR                 m0, m5, m2, 4, m3
+    mova                    m2, m0
+    PALIGNR                 m0, m4, m5, 4, m3
+    mova                    m5, m0
+    PALIGNR                 m0, m7, m4, 4, m3
+    mova                    m4, m0
+    PALIGNR                 m0, m6, m7, 4, m3
+    mova                    m7, m0
+    PALIGNR                 m0, m1, m6, 4, m3
+    mova                    m6, m0
+    UNSCRATCH                0,  9, rsp+2*mmsize
+%if notcpuflag(ssse3)
+    UNSCRATCH                3, 10, rsp+3*mmsize
+%endif
+%endif
+    add                   dstq, strideq
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+HU_FUNCS 4
+INIT_XMM ssse3
+HU_FUNCS 3
+INIT_XMM avx
+HU_FUNCS 2
+
+%macro HD_FUNCS 0
+cglobal vp9_ipred_hd_4x4_16, 4, 4, 4, dst, stride, l, a
+    movh                    m0, [lq]
+    movhps                  m0, [aq-2]
+    psrldq                  m1, m0, 2
+    psrldq                  m2, m0, 4
+    LOWPASS                  2,  1,  0
+    pavgw                   m1, m0
+    punpcklwd               m1, m2
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movh      [dstq+stride3q ], m1
+    movhps    [dstq+strideq*1], m1
+    movhlps                 m2, m2
+    PALIGNR                 m2, m1, 4, m0
+    movh      [dstq+strideq*2], m2
+    movhps    [dstq+strideq*0], m2
+    RET
+
+cglobal vp9_ipred_hd_8x8_16, 4, 4, 5, dst, stride, l, a
+    mova                    m0, [lq]
+    movu                    m1, [aq-2]
+    PALIGNR                 m2, m1, m0, 2, m3
+    PALIGNR                 m3, m1, m0, 4, m4
+    LOWPASS                  3,  2,  0
+    pavgw                   m2, m0
+    SBUTTERFLY           wd, 2,  3,  0
+    psrldq                  m0, m1,  2
+    psrldq                  m4, m1,  4
+    LOWPASS                  1,  0,  4
+    DEFINE_ARGS dst8, mstride, cnt
+    lea                  dst8q, [dst8q+mstrideq*8]
+    neg               mstrideq
+    mov                   cntd, 4
+
+.loop:
+    add                  dst8q, mstrideq
+    mova    [dst8q+mstrideq*0], m2
+    mova    [dst8q+mstrideq*4], m3
+%if cpuflag(avx)
+    vpalignr                m2, m3, m2, 4
+    vpalignr                m3, m1, m3, 4
+%else
+    PALIGNR                 m0, m3, m2, 4, m4
+    mova                    m2, m0
+    PALIGNR                 m0, m1, m3, 4, m4
+    mova                    m3, m0
+%endif
+    psrldq                  m1, 4
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_hd_16x16_16, 4, 4, 8, dst, stride, l, a
+    mova                    m2, [lq]
+    movu                    m1, [lq+2]
+    movu                    m0, [lq+4]
+    LOWPASS                  0,  1,  2
+    pavgw                   m1, m2
+    mova                    m4, [lq+mmsize]
+    movu                    m5, [aq-2]
+    PALIGNR                 m3, m5, m4, 2, m6
+    PALIGNR                 m2, m5, m4, 4, m6
+    LOWPASS                  2,  3,  4
+    pavgw                   m3, m4
+    SBUTTERFLY           wd, 1,  0,  4
+    SBUTTERFLY           wd, 3,  2,  4
+    mova                    m6, [aq]
+    movu                    m4, [aq+2]
+    LOWPASS                  4,  6,  5
+    movu                    m5, [aq+mmsize-2]
+    psrldq                  m6, m5,  2
+    psrldq                  m7, m5,  4
+    LOWPASS                  5,  6,  7
+    DEFINE_ARGS dst, mstride, mstride3, cnt
+    lea                   dstq, [dstq+mstrideq*8]
+    lea                   dstq, [dstq+mstrideq*8]
+    neg               mstrideq
+    lea              mstride3q, [mstrideq*3]
+    mov                   cntd, 4
+
+.loop:
+    add                  dstq, mstrideq
+    mova [dstq+mstride3q*4+ 0], m2
+    mova [dstq+mstride3q*4+16], m4
+    mova [dstq+mstrideq *8+ 0], m3
+    mova [dstq+mstrideq *8+16], m2
+    mova [dstq+mstrideq *4+ 0], m0
+    mova [dstq+mstrideq *4+16], m3
+    mova [dstq+mstrideq *0+ 0], m1
+    mova [dstq+mstrideq *0+16], m0
+%if cpuflag(avx)
+    vpalignr                m1, m0, m1, 4
+    vpalignr                m0, m3, m0, 4
+    vpalignr                m3, m2, m3, 4
+    vpalignr                m2, m4, m2, 4
+    vpalignr                m4, m5, m4, 4
+%else
+    PALIGNR                 m6, m0, m1, 4, m7
+    mova                    m1, m6
+    PALIGNR                 m6, m3, m0, 4, m7
+    mova                    m0, m6
+    PALIGNR                 m6, m2, m3, 4, m7
+    mova                    m3, m6
+    PALIGNR                 m6, m4, m2, 4, m7
+    mova                    m2, m6
+    PALIGNR                 m6, m5, m4, 4, m7
+    mova                    m4, m6
+%endif
+    psrldq                  m5, 4
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_hd_32x32_16, 4, 4 + 3 * ARCH_X86_64, 14, \
+                               10 * -mmsize * ARCH_X86_32, dst, stride, l, a
+    mova                    m2, [lq+mmsize*0+0]
+    movu                    m1, [lq+mmsize*0+2]
+    movu                    m0, [lq+mmsize*0+4]
+    LOWPASS                  0,  1,  2
+    pavgw                   m1, m2
+    SBUTTERFLY           wd, 1,  0,  2
+    mova                    m4, [lq+mmsize*1+0]
+    movu                    m3, [lq+mmsize*1+2]
+    movu                    m2, [lq+mmsize*1+4]
+    LOWPASS                  2,  3,  4
+    pavgw                   m3, m4
+    SBUTTERFLY           wd, 3,  2,  4
+    SCRATCH                  0,  8, rsp+0*mmsize
+    SCRATCH                  1,  9, rsp+1*mmsize
+    SCRATCH                  2, 10, rsp+2*mmsize
+    SCRATCH                  3, 11, rsp+3*mmsize
+    mova                    m6, [lq+mmsize*2+0]
+    movu                    m5, [lq+mmsize*2+2]
+    movu                    m4, [lq+mmsize*2+4]
+    LOWPASS                  4,  5,  6
+    pavgw                   m5, m6
+    SBUTTERFLY           wd, 5,  4,  6
+    mova                    m0, [lq+mmsize*3+0]
+    movu                    m1, [aq+mmsize*0-2]
+    PALIGNR                 m7, m1, m0, 2, m2
+    PALIGNR                 m6, m1, m0, 4, m2
+    LOWPASS                  6,  7,  0
+    pavgw                   m7, m0
+    SBUTTERFLY           wd, 7,  6,  0
+    mova                    m2, [aq+mmsize*0+0]
+    movu                    m0, [aq+mmsize*0+2]
+    LOWPASS                  0,  2,  1
+    movu                    m1, [aq+mmsize*1-2]
+    mova                    m2, [aq+mmsize*1+0]
+    movu                    m3, [aq+mmsize*1+2]
+    LOWPASS                  1,  2,  3
+    SCRATCH                  6, 12, rsp+6*mmsize
+    SCRATCH                  7, 13, rsp+7*mmsize
+    movu                    m2, [aq+mmsize*2-2]
+    mova                    m3, [aq+mmsize*2+0]
+    movu                    m6, [aq+mmsize*2+2]
+    LOWPASS                  2,  3,  6
+    movu                    m3, [aq+mmsize*3-2]
+    psrldq                  m6, m3,  2
+    psrldq                  m7, m3,  4
+    LOWPASS                  3,  6,  7
+    UNSCRATCH                6, 12, rsp+6*mmsize
+    UNSCRATCH                7, 13, rsp+7*mmsize
+%if ARCH_X86_32
+    mova        [rsp+4*mmsize], m4
+    mova        [rsp+5*mmsize], m5
+    ; we already backed up m6/m7 earlier on x86-32 in SCRATCH, so we don't need
+    ; to do it again here
+%endif
+    DEFINE_ARGS dst, stride, cnt, stride3, stride4, stride20, stride28
+    mov                   cntd, 4
+    lea               stride3q, [strideq*3]
+%if ARCH_X86_64
+    lea               stride4q, [strideq*4]
+    lea              stride28q, [stride4q*8]
+    lea              stride20q, [stride4q*5]
+    sub              stride28q, stride4q
+%endif
+    add                   dstq, stride3q
+
+    ; x86-32 doesn't have enough registers, so on that platform, we split
+    ; the loop in 2... Otherwise you spend most of the loop (un)scratching
+.loop:
+%if ARCH_X86_64
+    mova  [dstq+stride28q + 0], m9
+    mova  [dstq+stride28q +16], m8
+    mova  [dstq+stride28q +32], m11
+    mova  [dstq+stride28q +48], m10
+    mova  [dstq+stride3q*8+ 0], m8
+    mova  [dstq+stride3q*8+16], m11
+    mova  [dstq+stride3q*8+32], m10
+    mova  [dstq+stride3q*8+48], m5
+    mova  [dstq+stride20q + 0], m11
+    mova  [dstq+stride20q +16], m10
+    mova  [dstq+stride20q +32], m5
+    mova  [dstq+stride20q +48], m4
+    mova  [dstq+stride4q*4+ 0], m10
+    mova  [dstq+stride4q*4+16], m5
+    mova  [dstq+stride4q*4+32], m4
+    mova  [dstq+stride4q*4+48], m7
+%endif
+    mova  [dstq+stride3q*4+ 0], m5
+    mova  [dstq+stride3q*4+16], m4
+    mova  [dstq+stride3q*4+32], m7
+    mova  [dstq+stride3q*4+48], m6
+    mova  [dstq+strideq* 8+ 0], m4
+    mova  [dstq+strideq* 8+16], m7
+    mova  [dstq+strideq* 8+32], m6
+    mova  [dstq+strideq* 8+48], m0
+    mova  [dstq+strideq* 4+ 0], m7
+    mova  [dstq+strideq* 4+16], m6
+    mova  [dstq+strideq* 4+32], m0
+    mova  [dstq+strideq* 4+48], m1
+    mova  [dstq+strideq* 0+ 0], m6
+    mova  [dstq+strideq* 0+16], m0
+    mova  [dstq+strideq* 0+32], m1
+    mova  [dstq+strideq* 0+48], m2
+    sub                   dstq, strideq
+%if cpuflag(avx)
+%if ARCH_X86_64
+    vpalignr                m9, m8,  m9,  4
+    vpalignr                m8, m11, m8,  4
+    vpalignr               m11, m10, m11, 4
+    vpalignr               m10, m5,  m10, 4
+%endif
+    vpalignr                m5, m4,  m5,  4
+    vpalignr                m4, m7,  m4,  4
+    vpalignr                m7, m6,  m7,  4
+    vpalignr                m6, m0,  m6,  4
+    vpalignr                m0, m1,  m0,  4
+    vpalignr                m1, m2,  m1,  4
+    vpalignr                m2, m3,  m2,  4
+%else
+%if ARCH_X86_64
+    PALIGNR                m12, m8,  m9,  4, m13
+    mova                    m9, m12
+    PALIGNR                m12, m11, m8,  4, m13
+    mova                    m8, m12
+    PALIGNR                m12, m10, m11, 4, m13
+    mova                   m11, m12
+    PALIGNR                m12, m5,  m10, 4, m13
+    mova                   m10, m12
+%endif
+    SCRATCH                  3, 12, rsp+8*mmsize, sh
+%if notcpuflag(ssse3)
+    SCRATCH                  2, 13, rsp+9*mmsize
+%endif
+    PALIGNR                 m3, m4,  m5,  4, m2
+    mova                    m5, m3
+    PALIGNR                 m3, m7,  m4,  4, m2
+    mova                    m4, m3
+    PALIGNR                 m3, m6,  m7,  4, m2
+    mova                    m7, m3
+    PALIGNR                 m3, m0,  m6,  4, m2
+    mova                    m6, m3
+    PALIGNR                 m3, m1,  m0,  4, m2
+    mova                    m0, m3
+%if notcpuflag(ssse3)
+    UNSCRATCH                2, 13, rsp+9*mmsize
+    SCRATCH                  0, 13, rsp+9*mmsize
+%endif
+    PALIGNR                 m3, m2,  m1,  4, m0
+    mova                    m1, m3
+    PALIGNR                 m3, reg_sh,  m2,  4, m0
+    mova                    m2, m3
+%if notcpuflag(ssse3)
+    UNSCRATCH                0, 13, rsp+9*mmsize
+%endif
+    UNSCRATCH                3, 12, rsp+8*mmsize, sh
+%endif
+    psrldq                  m3, 4
+    dec                   cntd
+    jg .loop
+
+%if ARCH_X86_32
+    UNSCRATCH                0,  8, rsp+0*mmsize
+    UNSCRATCH                1,  9, rsp+1*mmsize
+    UNSCRATCH                2, 10, rsp+2*mmsize
+    UNSCRATCH                3, 11, rsp+3*mmsize
+    mova                    m4, [rsp+4*mmsize]
+    mova                    m5, [rsp+5*mmsize]
+    mova                    m6, [rsp+6*mmsize]
+    mova                    m7, [rsp+7*mmsize]
+    DEFINE_ARGS dst, stride, stride5, stride3
+    lea               stride5q, [strideq*5]
+    lea                   dstq, [dstq+stride5q*4]
+    DEFINE_ARGS dst, stride, cnt, stride3
+    mov                   cntd, 4
+.loop_2:
+    mova  [dstq+stride3q*4+ 0], m1
+    mova  [dstq+stride3q*4+16], m0
+    mova  [dstq+stride3q*4+32], m3
+    mova  [dstq+stride3q*4+48], m2
+    mova  [dstq+strideq* 8+ 0], m0
+    mova  [dstq+strideq* 8+16], m3
+    mova  [dstq+strideq* 8+32], m2
+    mova  [dstq+strideq* 8+48], m5
+    mova  [dstq+strideq* 4+ 0], m3
+    mova  [dstq+strideq* 4+16], m2
+    mova  [dstq+strideq* 4+32], m5
+    mova  [dstq+strideq* 4+48], m4
+    mova  [dstq+strideq* 0+ 0], m2
+    mova  [dstq+strideq* 0+16], m5
+    mova  [dstq+strideq* 0+32], m4
+    mova  [dstq+strideq* 0+48], m7
+    sub                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m1, m0,  m1,  4
+    vpalignr                m0, m3,  m0,  4
+    vpalignr                m3, m2,  m3,  4
+    vpalignr                m2, m5,  m2,  4
+    vpalignr                m5, m4,  m5,  4
+    vpalignr                m4, m7,  m4,  4
+    vpalignr                m7, m6,  m7,  4
+%else
+    SCRATCH                  6, 12, rsp+8*mmsize, sh
+%if notcpuflag(ssse3)
+    SCRATCH                  7, 13, rsp+9*mmsize
+%endif
+    PALIGNR                 m6, m0,  m1,  4, m7
+    mova                    m1, m6
+    PALIGNR                 m6, m3,  m0,  4, m7
+    mova                    m0, m6
+    PALIGNR                 m6, m2,  m3,  4, m7
+    mova                    m3, m6
+    PALIGNR                 m6, m5,  m2,  4, m7
+    mova                    m2, m6
+    PALIGNR                 m6, m4,  m5,  4, m7
+    mova                    m5, m6
+%if notcpuflag(ssse3)
+    UNSCRATCH                7, 13, rsp+9*mmsize
+    SCRATCH                  5, 13, rsp+9*mmsize
+%endif
+    PALIGNR                 m6, m7,  m4,  4, m5
+    mova                    m4, m6
+    PALIGNR                 m6, reg_sh,  m7,  4, m5
+    mova                    m7, m6
+%if notcpuflag(ssse3)
+    UNSCRATCH                5, 13, rsp+9*mmsize
+%endif
+    UNSCRATCH                6, 12, rsp+8*mmsize, sh
+%endif
+    psrldq                  m6, 4
+    dec                   cntd
+    jg .loop_2
+%endif
+    RET
+%endmacro
+
+INIT_XMM sse2
+HD_FUNCS
+INIT_XMM ssse3
+HD_FUNCS
+INIT_XMM avx
+HD_FUNCS
diff --git a/media/ffvpx/libavcodec/x86/vp9itxfm.asm b/media/ffvpx/libavcodec/x86/vp9itxfm.asm
new file mode 100644
index 0000000000..2c63fe514a
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9itxfm.asm
@@ -0,0 +1,3197 @@
+;******************************************************************************
+;* VP9 IDCT SIMD optimizations
+;*
+;* Copyright (C) 2013 Clément Bœsch <u pkh me>
+;* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+%include "vp9itxfm_template.asm"
+
+SECTION_RODATA 32
+
+%macro VP9_IDCT_COEFFS 2-3 0
+const pw_m%1_%2
+times 8 dw -%1,  %2
+const pw_%2_%1
+times 8 dw  %2,  %1
+
+%if %3 == 1
+const pw_m%2_m%1
+times 8 dw -%2, -%1
+%if %1 != %2
+const pw_m%2_%1
+times 8 dw -%2,  %1
+const pw_%1_%2
+times 8 dw  %1,  %2
+%endif
+%endif
+
+%if %1 < 11585
+pw_m%1x2:   times 16 dw -%1*2
+%elif %1 > 11585
+pw_%1x2:    times 16 dw  %1*2
+%else
+const pw_%1x2
+times 16 dw %1*2
+%endif
+
+%if %2 != %1
+pw_%2x2:    times 16 dw  %2*2
+%endif
+%endmacro
+
+VP9_IDCT_COEFFS 16364,   804
+VP9_IDCT_COEFFS 16305,  1606
+VP9_IDCT_COEFFS 16069,  3196, 1
+VP9_IDCT_COEFFS 15893,  3981
+VP9_IDCT_COEFFS 15137,  6270, 1
+VP9_IDCT_COEFFS 14811,  7005
+VP9_IDCT_COEFFS 14449,  7723
+VP9_IDCT_COEFFS 13160,  9760
+VP9_IDCT_COEFFS 11585, 11585, 1
+VP9_IDCT_COEFFS 11003, 12140
+VP9_IDCT_COEFFS 10394, 12665
+VP9_IDCT_COEFFS  9102, 13623, 1
+VP9_IDCT_COEFFS  8423, 14053
+VP9_IDCT_COEFFS  5520, 15426
+VP9_IDCT_COEFFS  4756, 15679
+VP9_IDCT_COEFFS  2404, 16207
+
+const pw_5283_13377
+times 4 dw 5283, 13377
+const pw_9929_13377
+times 4 dw 9929, 13377
+const pw_15212_m13377
+times 4 dw 15212, -13377
+const pw_15212_9929
+times 4 dw 15212, 9929
+const pw_m5283_m15212
+times 4 dw -5283, -15212
+const pw_13377x2
+times 8 dw 13377*2
+const pw_m13377_13377
+times 4 dw -13377, 13377
+const pw_13377_0
+times 4 dw 13377, 0
+
+cextern pw_8
+cextern pw_16
+cextern pw_32
+cextern pw_512
+cextern pw_1024
+cextern pw_2048
+cextern pw_m1
+cextern pd_8192
+
+SECTION .text
+
+%macro VP9_UNPACK_MULSUB_2D_4X 6 ; dst1 [src1], dst2 [src2], dst3, dst4, mul1, mul2
+    punpckhwd          m%4, m%2, m%1
+    punpcklwd          m%2, m%1
+    pmaddwd            m%3, m%4, [pw_m%5_%6]
+    pmaddwd            m%4, [pw_%6_%5]
+    pmaddwd            m%1, m%2, [pw_m%5_%6]
+    pmaddwd            m%2, [pw_%6_%5]
+%endmacro
+
+%macro VP9_RND_SH_SUMSUB_BA 6 ; dst1 [src1], dst2 [src2], src3, src4, tmp, round
+    SUMSUB_BA            d, %1, %2, %5
+    SUMSUB_BA            d, %3, %4, %5
+    paddd              m%1, %6
+    paddd              m%2, %6
+    paddd              m%3, %6
+    paddd              m%4, %6
+    psrad              m%1, 14
+    psrad              m%2, 14
+    psrad              m%3, 14
+    psrad              m%4, 14
+    packssdw           m%1, m%3
+    packssdw           m%2, m%4
+%endmacro
+
+%macro VP9_STORE_2X 5-6 dstq ; reg1, reg2, tmp1, tmp2, zero, dst
+%if mmsize == 32
+    pmovzxbw           m%3, [%6]
+    pmovzxbw           m%4, [%6+strideq]
+%else
+    movh               m%3, [%6]
+    movh               m%4, [%6+strideq]
+    punpcklbw          m%3, m%5
+    punpcklbw          m%4, m%5
+%endif
+    paddw              m%3, m%1
+    paddw              m%4, m%2
+%if mmsize == 32
+    packuswb           m%3, m%4
+    ; Intel...
+    vpermq             m%3, m%3, q3120
+    mova              [%6], xm%3
+    vextracti128 [%6+strideq], m%3, 1
+%elif mmsize == 16
+    packuswb           m%3, m%4
+    movh              [%6], m%3
+    movhps    [%6+strideq], m%3
+%else
+    packuswb           m%3, m%5
+    packuswb           m%4, m%5
+    movh              [%6], m%3
+    movh      [%6+strideq], m%4
+%endif
+%endmacro
+
+%macro ZERO_BLOCK 4 ; mem, stride, nnzcpl, zero_reg
+%assign %%y 0
+%rep %3
+%assign %%x 0
+%rep %3*2/mmsize
+    mova      [%1+%%y+%%x], %4
+%assign %%x (%%x+mmsize)
+%endrep
+%assign %%y (%%y+%2)
+%endrep
+%endmacro
+
+;-------------------------------------------------------------------------------------------
+; void vp9_iwht_iwht_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;-------------------------------------------------------------------------------------------
+
+INIT_MMX mmx
+cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob
+    mova                m0, [blockq+0*8]
+    mova                m1, [blockq+1*8]
+    mova                m2, [blockq+2*8]
+    mova                m3, [blockq+3*8]
+    psraw               m0, 2
+    psraw               m1, 2
+    psraw               m2, 2
+    psraw               m3, 2
+
+    VP9_IWHT4_1D
+    TRANSPOSE4x4W        0, 1, 2, 3, 4
+    VP9_IWHT4_1D
+
+    pxor                m4, m4
+    VP9_STORE_2X         0, 1, 5, 6, 4
+    lea               dstq, [dstq+strideq*2]
+    VP9_STORE_2X         2, 3, 5, 6, 4
+    ZERO_BLOCK      blockq, 8, 4, m4
+    RET
+
+;-------------------------------------------------------------------------------------------
+; void vp9_idct_idct_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;-------------------------------------------------------------------------------------------
+
+; 2x2 top left corner
+%macro VP9_IDCT4_2x2_1D 0
+    pmulhrsw            m0, m5                              ; m0=t1
+    mova                m2, m0                              ; m2=t0
+    mova                m3, m1
+    pmulhrsw            m1, m6                              ; m1=t2
+    pmulhrsw            m3, m7                              ; m3=t3
+    VP9_IDCT4_1D_FINALIZE
+%endmacro
+
+%macro VP9_IDCT4_WRITEOUT 0
+%if cpuflag(ssse3)
+    mova                m5, [pw_2048]
+    pmulhrsw            m0, m5              ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4
+    pmulhrsw            m1, m5
+%else
+    mova                m5, [pw_8]
+    paddw               m0, m5
+    paddw               m1, m5
+    psraw               m0, 4
+    psraw               m1, 4
+%endif
+    VP9_STORE_2X         0,  1,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+%if cpuflag(ssse3)
+    pmulhrsw            m2, m5
+    pmulhrsw            m3, m5
+%else
+    paddw               m2, m5
+    paddw               m3, m5
+    psraw               m2, 4
+    psraw               m3, 4
+%endif
+    VP9_STORE_2X         2,  3,  6,  7,  4
+%endmacro
+
+%macro IDCT_4x4_FN 1
+INIT_MMX %1
+cglobal vp9_idct_idct_4x4_add, 4, 4, 0, dst, stride, block, eob
+
+%if cpuflag(ssse3)
+    cmp eobd, 4 ; 2x2 or smaller
+    jg .idctfull
+
+    cmp eobd, 1 ; faster path for when only DC is set
+    jne .idct2x2
+%else
+    cmp eobd, 1
+    jg .idctfull
+%endif
+
+%if cpuflag(ssse3)
+    movd                m0, [blockq]
+    mova                m5, [pw_11585x2]
+    pmulhrsw            m0, m5
+    pmulhrsw            m0, m5
+%else
+    DEFINE_ARGS dst, stride, block, coef
+    movsx            coefd, word [blockq]
+    imul             coefd, 11585
+    add              coefd, 8192
+    sar              coefd, 14
+    imul             coefd, 11585
+    add              coefd, (8 << 14) + 8192
+    sar              coefd, 14 + 4
+    movd                m0, coefd
+%endif
+    pshufw              m0, m0, 0
+    pxor                m4, m4
+    movh          [blockq], m4
+%if cpuflag(ssse3)
+    pmulhrsw            m0, [pw_2048]       ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4
+%endif
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    RET
+
+%if cpuflag(ssse3)
+; faster path for when only top left 2x2 block is set
+.idct2x2:
+    movd                m0, [blockq+0]
+    movd                m1, [blockq+8]
+    mova                m5, [pw_11585x2]
+    mova                m6, [pw_6270x2]
+    mova                m7, [pw_15137x2]
+    VP9_IDCT4_2x2_1D
+    ; partial 2x4 transpose
+    punpcklwd           m0, m1
+    punpcklwd           m2, m3
+    SBUTTERFLY          dq, 0, 2, 1
+    SWAP                1, 2
+    VP9_IDCT4_2x2_1D
+    pxor                m4, m4  ; used for the block reset, and VP9_STORE_2X
+    movh       [blockq+ 0], m4
+    movh       [blockq+ 8], m4
+    VP9_IDCT4_WRITEOUT
+    RET
+%endif
+
+.idctfull: ; generic full 4x4 idct/idct
+    mova                m0, [blockq+ 0]
+    mova                m1, [blockq+ 8]
+    mova                m2, [blockq+16]
+    mova                m3, [blockq+24]
+%if cpuflag(ssse3)
+    mova                m6, [pw_11585x2]
+%endif
+    mova                m7, [pd_8192]       ; rounding
+    VP9_IDCT4_1D
+    TRANSPOSE4x4W  0, 1, 2, 3, 4
+    VP9_IDCT4_1D
+    pxor                m4, m4  ; used for the block reset, and VP9_STORE_2X
+    mova       [blockq+ 0], m4
+    mova       [blockq+ 8], m4
+    mova       [blockq+16], m4
+    mova       [blockq+24], m4
+    VP9_IDCT4_WRITEOUT
+    RET
+%endmacro
+
+IDCT_4x4_FN mmxext
+IDCT_4x4_FN ssse3
+
+;-------------------------------------------------------------------------------------------
+; void vp9_iadst_iadst_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;-------------------------------------------------------------------------------------------
+
+%macro IADST4_FN 5
+INIT_MMX %5
+cglobal vp9_%1_%3_4x4_add, 3, 3, 0, dst, stride, block, eob
+%if WIN64 && notcpuflag(ssse3)
+    WIN64_SPILL_XMM 8
+%endif
+    movdqa            xmm5, [pd_8192]
+    mova                m0, [blockq+ 0]
+    mova                m1, [blockq+ 8]
+    mova                m2, [blockq+16]
+    mova                m3, [blockq+24]
+%if cpuflag(ssse3)
+    mova                m6, [pw_11585x2]
+%endif
+%ifnidn %1%3, iadstiadst
+    movdq2q             m7, xmm5
+%endif
+    VP9_%2_1D
+    TRANSPOSE4x4W  0, 1, 2, 3, 4
+    VP9_%4_1D
+    pxor                m4, m4  ; used for the block reset, and VP9_STORE_2X
+    mova       [blockq+ 0], m4
+    mova       [blockq+ 8], m4
+    mova       [blockq+16], m4
+    mova       [blockq+24], m4
+    VP9_IDCT4_WRITEOUT
+    RET
+%endmacro
+
+IADST4_FN idct,  IDCT4,  iadst, IADST4, sse2
+IADST4_FN iadst, IADST4, idct,  IDCT4,  sse2
+IADST4_FN iadst, IADST4, iadst, IADST4, sse2
+
+IADST4_FN idct,  IDCT4,  iadst, IADST4, ssse3
+IADST4_FN iadst, IADST4, idct,  IDCT4,  ssse3
+IADST4_FN iadst, IADST4, iadst, IADST4, ssse3
+
+%macro SCRATCH 3
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova              [%3], m%1
+%endif
+%endmacro
+
+%macro UNSCRATCH 3
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova               m%1, [%3]
+%endif
+%endmacro
+
+;-------------------------------------------------------------------------------------------
+; void vp9_idct_idct_8x8_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;-------------------------------------------------------------------------------------------
+
+%macro VP9_IDCT8_1D_FINALIZE 0
+    SUMSUB_BA            w,  3,  6, 5                       ; m3=t0+t7, m6=t0-t7
+    SUMSUB_BA            w,  1,  2, 5                       ; m1=t1+t6, m2=t1-t6
+    SUMSUB_BA            w,  7,  0, 5                       ; m7=t2+t5, m0=t2-t5
+
+    UNSCRATCH            5, 8, blockq+ 0
+    SCRATCH              2, 8, blockq+ 0
+
+    SUMSUB_BA            w,  5,  4, 2                       ; m5=t3+t4, m4=t3-t4
+    SWAP                 7,  6,  2
+    SWAP                 3,  5,  0
+
+%if ARCH_X86_64
+    SWAP                 6, 8
+%endif
+%endmacro
+
+; x86-32
+; - in: m0/m4 is in mem
+; - out: m6 is in mem
+; x86-64:
+; - everything is in registers (m0-7)
+%macro VP9_IDCT8_1D 0
+%if ARCH_X86_64
+    SWAP                 0, 8
+    SWAP                 4, 9
+%endif
+
+    VP9_UNPACK_MULSUB_2W_4X 5,  3,  9102, 13623, D_8192_REG, 0, 4  ; m5=t5a, m3=t6a
+    VP9_UNPACK_MULSUB_2W_4X 1,  7, 16069,  3196, D_8192_REG, 0, 4  ; m1=t4a, m7=t7a
+    SUMSUB_BA            w,  5,  1, 0                       ; m5=t4a+t5a (t4), m1=t4a-t5a (t5a)
+    SUMSUB_BA            w,  3,  7, 0                       ; m3=t7a+t6a (t7), m7=t7a-t6a (t6a)
+%if cpuflag(ssse3)
+    SUMSUB_BA            w,  1,  7, 0                       ; m1=t6a+t5a (t6), m7=t6a-t5a (t5)
+    pmulhrsw            m1, W_11585x2_REG                   ; m1=t6
+    pmulhrsw            m7, W_11585x2_REG                   ; m7=t5
+%else
+    VP9_UNPACK_MULSUB_2W_4X 7,  1, 11585, 11585, D_8192_REG, 0, 4
+%endif
+    VP9_UNPACK_MULSUB_2W_4X 2,  6, 15137,  6270, D_8192_REG, 0, 4  ; m2=t2a, m6=t3a
+
+    UNSCRATCH            0, 8, blockq+ 0    ; IN(0)
+    UNSCRATCH            4, 9, blockq+64    ; IN(4)
+    SCRATCH              5, 8, blockq+ 0
+
+%if cpuflag(ssse3)
+    SUMSUB_BA            w, 4, 0, 5                         ; m4=IN(0)+IN(4) m0=IN(0)-IN(4)
+    pmulhrsw            m4, W_11585x2_REG                   ; m4=t0a
+    pmulhrsw            m0, W_11585x2_REG                   ; m0=t1a
+%else
+    SCRATCH              7, 9, blockq+64
+    VP9_UNPACK_MULSUB_2W_4X 0,  4, 11585, 11585, D_8192_REG, 5, 7
+    UNSCRATCH            7, 9, blockq+64
+%endif
+    SUMSUB_BA            w,  6,  4, 5                       ; m6=t0a+t3a (t0), m4=t0a-t3a (t3)
+    SUMSUB_BA            w,  2,  0, 5                       ; m2=t1a+t2a (t1), m0=t1a-t2a (t2)
+
+    VP9_IDCT8_1D_FINALIZE
+%endmacro
+
+%macro VP9_IDCT8_4x4_1D 0
+    pmulhrsw            m0, W_11585x2_REG                   ; m0=t1a/t0a
+    pmulhrsw            m6, m2, [pw_15137x2]                ; m6=t3a
+    pmulhrsw            m2, [pw_6270x2]                     ; m2=t2a
+    pmulhrsw            m7, m1, [pw_16069x2]                ; m7=t7a
+    pmulhrsw            m1, [pw_3196x2]                     ; m1=t4a
+    pmulhrsw            m5, m3, [pw_m9102x2]                ; m5=t5a
+    pmulhrsw            m3, [pw_13623x2]                    ; m3=t6a
+    SUMSUB_BA            w,  5,  1, 4                       ; m1=t4a+t5a (t4), m5=t4a-t5a (t5a)
+    SUMSUB_BA            w,  3,  7, 4                       ; m3=t7a+t6a (t7), m7=t7a-t6a (t6a)
+    SUMSUB_BA            w,  1,  7, 4                       ; m1=t6a+t5a (t6), m7=t6a-t5a (t5)
+    pmulhrsw            m1, W_11585x2_REG                   ; m1=t6
+    pmulhrsw            m7, W_11585x2_REG                   ; m7=t5
+    psubw               m4, m0, m6                          ; m4=t0a-t3a (t3)
+    paddw               m6, m0                              ; m6=t0a+t3a (t0)
+    SCRATCH              5,  8, blockq+ 0
+    SUMSUB_BA            w,  2,  0, 5                       ; m2=t1a+t2a (t1), m0=t1a-t2a (t2)
+    VP9_IDCT8_1D_FINALIZE
+%endmacro
+
+%macro VP9_IDCT8_2x2_1D 1
+    pmulhrsw            m0, W_11585x2_REG                   ; m0=t0
+    pmulhrsw            m3, m1, W_16069x2_REG               ; m3=t7
+    pmulhrsw            m1, W_3196x2_REG                    ; m1=t4
+    psubw               m7, m3, m1                          ; t5 = t7a - t4a
+    paddw               m5, m3, m1                          ; t6 = t7a + t4a
+    pmulhrsw            m7, W_11585x2_REG                   ; m7=t5
+    pmulhrsw            m5, W_11585x2_REG                   ; m5=t6
+    SWAP                 5,  1
+    ; merged VP9_IDCT8_1D_FINALIZE to make register-sharing w/ avx easier
+    psubw               m6, m0, m3                          ; m6=t0-t7
+    paddw               m3, m0                              ; m3=t0+t7
+    psubw               m2, m0, m1                          ; m2=t1-t6
+    paddw               m1, m0                              ; m1=t1+t6
+%if %1 == 1
+    punpcklwd           m3, m1
+%define SCRATCH_REG 1
+%elif ARCH_X86_32
+    mova       [blockq+ 0], m2
+%define SCRATCH_REG 2
+%else
+%define SCRATCH_REG 8
+%endif
+    psubw               m4, m0, m5                          ; m4=t3-t4
+    paddw               m5, m0                              ; m5=t3+t4
+    SUMSUB_BA            w,  7,  0, SCRATCH_REG             ; m7=t2+t5, m0=t2-t5
+    SWAP                 7,  6,  2
+    SWAP                 3,  5,  0
+%undef SCRATCH_REG
+%endmacro
+
+%macro VP9_IDCT8_WRITEx2 6-8 5 ; line1, line2, tmp1, tmp2, zero, pw_1024/pw_16, shift
+%if cpuflag(ssse3)
+    pmulhrsw           m%1, %6              ; (x*1024 + (1<<14))>>15 <=> (x+16)>>5
+    pmulhrsw           m%2, %6
+%else
+    paddw              m%1, %6
+    paddw              m%2, %6
+    psraw              m%1, %7
+    psraw              m%2, %7
+%endif
+%if %0 <= 7
+    VP9_STORE_2X        %1, %2, %3, %4, %5
+%else
+    VP9_STORE_2X        %1, %2, %3, %4, %5, %8
+%endif
+%endmacro
+
+; x86-32:
+; - m6 is in mem
+; x86-64:
+; - m8 holds m6 (SWAP)
+; m6 holds zero
+%macro VP9_IDCT8_WRITEOUT 0
+%if ARCH_X86_64
+%if cpuflag(ssse3)
+    mova                m9, [pw_1024]
+%else
+    mova                m9, [pw_16]
+%endif
+%define ROUND_REG m9
+%else
+%if cpuflag(ssse3)
+%define ROUND_REG [pw_1024]
+%else
+%define ROUND_REG [pw_16]
+%endif
+%endif
+    SCRATCH              5, 10, blockq+16
+    SCRATCH              7, 11, blockq+32
+    VP9_IDCT8_WRITEx2    0,  1, 5, 7, 6, ROUND_REG
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2    2,  3, 5, 7, 6, ROUND_REG
+    lea               dstq, [dstq+2*strideq]
+    UNSCRATCH            5, 10, blockq+16
+    UNSCRATCH            7, 11, blockq+32
+    VP9_IDCT8_WRITEx2    4,  5, 0, 1, 6, ROUND_REG
+    lea               dstq, [dstq+2*strideq]
+    UNSCRATCH            5, 8, blockq+ 0
+    VP9_IDCT8_WRITEx2    5,  7, 0, 1, 6, ROUND_REG
+
+%undef ROUND_REG
+%endmacro
+
+%macro VP9_IDCT_IDCT_8x8_ADD_XMM 2
+INIT_XMM %1
+cglobal vp9_idct_idct_8x8_add, 4, 4, %2, dst, stride, block, eob
+
+%if cpuflag(ssse3)
+%if ARCH_X86_64
+    mova               m12, [pw_11585x2]    ; often used
+%define W_11585x2_REG m12
+%else
+%define W_11585x2_REG [pw_11585x2]
+%endif
+
+    cmp eobd, 12 ; top left half or less
+    jg .idctfull
+
+    cmp eobd, 3  ; top left corner or less
+    jg .idcthalf
+
+    cmp eobd, 1 ; faster path for when only DC is set
+    jne .idcttopleftcorner
+%else
+    cmp eobd, 1
+    jg .idctfull
+%endif
+
+%if cpuflag(ssse3)
+    movd                m0, [blockq]
+    pmulhrsw            m0, W_11585x2_REG
+    pmulhrsw            m0, W_11585x2_REG
+%else
+    DEFINE_ARGS dst, stride, block, coef
+    movsx            coefd, word [blockq]
+    imul             coefd, 11585
+    add              coefd, 8192
+    sar              coefd, 14
+    imul             coefd, 11585
+    add              coefd, (16 << 14) + 8192
+    sar              coefd, 14 + 5
+    movd                m0, coefd
+%endif
+    SPLATW              m0, m0, 0
+    pxor                m4, m4
+    movd          [blockq], m4
+%if cpuflag(ssse3)
+    pmulhrsw            m0, [pw_1024]       ; (x*1024 + (1<<14))>>15 <=> (x+16)>>5
+%endif
+%rep 3
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+%endrep
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    RET
+
+%if cpuflag(ssse3)
+; faster path for when only left corner is set (3 input: DC, right to DC, below
+; to DC). Note: also working with a 2x2 block
+.idcttopleftcorner:
+    movd                m0, [blockq+0]
+    movd                m1, [blockq+16]
+%if ARCH_X86_64
+    mova               m10, [pw_3196x2]
+    mova               m11, [pw_16069x2]
+%define W_3196x2_REG m10
+%define W_16069x2_REG m11
+%else
+%define W_3196x2_REG [pw_3196x2]
+%define W_16069x2_REG [pw_16069x2]
+%endif
+    VP9_IDCT8_2x2_1D 1
+    ; partial 2x8 transpose
+    ; punpcklwd m0, m1 already done inside idct
+    punpcklwd           m2, m3
+    punpcklwd           m4, m5
+    punpcklwd           m6, m7
+    punpckldq           m0, m2
+    punpckldq           m4, m6
+    SBUTTERFLY         qdq, 0, 4, 1
+    SWAP                 1, 4
+    VP9_IDCT8_2x2_1D 2
+%if ARCH_X86_64
+    SWAP                 6, 8
+%endif
+    pxor                m6, m6  ; used for the block reset, and VP9_STORE_2X
+    VP9_IDCT8_WRITEOUT
+%if ARCH_X86_64
+    movd       [blockq+ 0], m6
+    movd       [blockq+16], m6
+%else
+    mova       [blockq+ 0], m6
+    mova       [blockq+16], m6
+    mova       [blockq+32], m6
+%endif
+    RET
+
+.idcthalf:
+    movh                m0, [blockq + 0]
+    movh                m1, [blockq +16]
+    movh                m2, [blockq +32]
+    movh                m3, [blockq +48]
+    VP9_IDCT8_4x4_1D
+    ; partial 4x8 transpose
+%if ARCH_X86_32
+    mova                m6, [blockq+ 0]
+%endif
+    punpcklwd           m0, m1
+    punpcklwd           m2, m3
+    punpcklwd           m4, m5
+    punpcklwd           m6, m7
+    SBUTTERFLY          dq, 0, 2, 1
+    SBUTTERFLY          dq, 4, 6, 5
+    SBUTTERFLY         qdq, 0, 4, 1
+    SBUTTERFLY         qdq, 2, 6, 5
+    SWAP                 1, 4
+    SWAP                 3, 6
+    VP9_IDCT8_4x4_1D
+%if ARCH_X86_64
+    SWAP                 6, 8
+%endif
+    pxor                m6, m6
+    VP9_IDCT8_WRITEOUT
+%if ARCH_X86_64
+    movh       [blockq+ 0], m6
+    movh       [blockq+16], m6
+    movh       [blockq+32], m6
+%else
+    mova       [blockq+ 0], m6
+    mova       [blockq+16], m6
+    mova       [blockq+32], m6
+%endif
+    movh       [blockq+48], m6
+    RET
+%endif
+
+.idctfull: ; generic full 8x8 idct/idct
+%if ARCH_X86_64
+    mova                m0, [blockq+  0]    ; IN(0)
+%endif
+    mova                m1, [blockq+ 16]    ; IN(1)
+    mova                m2, [blockq+ 32]    ; IN(2)
+    mova                m3, [blockq+ 48]    ; IN(3)
+%if ARCH_X86_64
+    mova                m4, [blockq+ 64]    ; IN(4)
+%endif
+    mova                m5, [blockq+ 80]    ; IN(5)
+    mova                m6, [blockq+ 96]    ; IN(6)
+    mova                m7, [blockq+112]    ; IN(7)
+%if ARCH_X86_64
+    mova               m11, [pd_8192]       ; rounding
+%define D_8192_REG m11
+%else
+%define D_8192_REG [pd_8192]
+%endif
+    VP9_IDCT8_1D
+%if ARCH_X86_64
+    TRANSPOSE8x8W  0, 1, 2, 3, 4, 5, 6, 7, 8
+%else
+    TRANSPOSE8x8W  0, 1, 2, 3, 4, 5, 6, 7, [blockq+0], [blockq+64], 1
+    mova        [blockq+0], m0
+%endif
+    VP9_IDCT8_1D
+
+%if ARCH_X86_64
+    SWAP                 6, 8
+%endif
+    pxor                m6, m6  ; used for the block reset, and VP9_STORE_2X
+    VP9_IDCT8_WRITEOUT
+    ZERO_BLOCK      blockq, 16, 8, m6
+    RET
+%undef W_11585x2_REG
+%endmacro
+
+VP9_IDCT_IDCT_8x8_ADD_XMM sse2, 12
+VP9_IDCT_IDCT_8x8_ADD_XMM ssse3, 13
+VP9_IDCT_IDCT_8x8_ADD_XMM avx, 13
+
+;---------------------------------------------------------------------------------------------
+; void vp9_iadst_iadst_8x8_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;---------------------------------------------------------------------------------------------
+
+; x86-32:
+; - in: m0/3/4/7 are in mem [blockq+N*16]
+; - out: m6 is in mem [blockq+0]
+; x86-64:
+; - everything is in registers
+%macro VP9_IADST8_1D 0 ; input/output=m0/1/2/3/4/5/6/7
+%if ARCH_X86_64
+    SWAP                     0, 8
+    SWAP                     3, 9
+    SWAP                     4, 10
+    SWAP                     7, 11
+%endif
+
+    VP9_UNPACK_MULSUB_2D_4X  5,  2,  0,  3, 14449,  7723    ; m5/2=t3[d], m2/4=t2[d]
+    VP9_UNPACK_MULSUB_2D_4X  1,  6,  4,  7,  4756, 15679    ; m1/4=t7[d], m6/7=t6[d]
+    SCRATCH                  4, 12, blockq+1*16
+    VP9_RND_SH_SUMSUB_BA     6,  2,  7,  3, 4, D_8192_REG  ; m6=t2[w], m2=t6[w]
+    UNSCRATCH                4, 12, blockq+1*16
+    VP9_RND_SH_SUMSUB_BA     1,  5,  4,  0, 3, D_8192_REG  ; m1=t3[w], m5=t7[w]
+
+    UNSCRATCH                0,  8, blockq+16*0
+    UNSCRATCH                3,  9, blockq+16*3
+    UNSCRATCH                4, 10, blockq+16*4
+    UNSCRATCH                7, 11, blockq+16*7
+    SCRATCH                  1,  8, blockq+16*1
+    SCRATCH                  2,  9, blockq+16*2
+    SCRATCH                  5, 10, blockq+16*5
+    SCRATCH                  6, 11, blockq+16*6
+
+    VP9_UNPACK_MULSUB_2D_4X  7,  0,  1,  2, 16305,  1606    ; m7/1=t1[d], m0/2=t0[d]
+    VP9_UNPACK_MULSUB_2D_4X  3,  4,  5,  6, 10394, 12665    ; m3/5=t5[d], m4/6=t4[d]
+    SCRATCH                  1, 12, blockq+ 0*16
+    VP9_RND_SH_SUMSUB_BA     4,  0,  6,  2, 1, D_8192_REG  ; m4=t0[w], m0=t4[w]
+    UNSCRATCH                1, 12, blockq+ 0*16
+    VP9_RND_SH_SUMSUB_BA     3,  7,  5,  1, 2, D_8192_REG  ; m3=t1[w], m7=t5[w]
+
+    UNSCRATCH                2,  9, blockq+16*2
+    UNSCRATCH                5, 10, blockq+16*5
+    SCRATCH                  3,  9, blockq+16*3
+    SCRATCH                  4, 10, blockq+16*4
+
+    ; m4=t0, m3=t1, m6=t2, m1=t3, m0=t4, m7=t5, m2=t6, m5=t7
+
+    VP9_UNPACK_MULSUB_2D_4X  0,  7,  1,  3, 15137,  6270    ; m0/1=t5[d], m7/3=t4[d]
+    VP9_UNPACK_MULSUB_2D_4X  5,  2,  4,  6,  6270, 15137    ; m5/4=t6[d], m2/6=t7[d]
+    SCRATCH                  1, 12, blockq+ 0*16
+    VP9_RND_SH_SUMSUB_BA     5,  7,  4,  3, 1, D_8192_REG
+    UNSCRATCH                1, 12, blockq+ 0*16
+    PSIGNW                  m5, W_M1_REG                    ; m5=out1[w], m7=t6[w]
+    VP9_RND_SH_SUMSUB_BA     2,  0,  6,  1, 3, D_8192_REG   ; m2=out6[w], m0=t7[w]
+
+    UNSCRATCH                1,  8, blockq+16*1
+    UNSCRATCH                3,  9, blockq+16*3
+    UNSCRATCH                4, 10, blockq+16*4
+    UNSCRATCH                6, 11, blockq+16*6
+    SCRATCH                  2,  8, blockq+16*0
+
+    SUMSUB_BA                w,  6,  4, 2                   ; m6=out0[w], m4=t2[w]
+    SUMSUB_BA                w,  1,  3, 2
+    PSIGNW                  m1, W_M1_REG                    ; m1=out7[w], m3=t3[w]
+
+    ; m6=out0, m5=out1, m4=t2, m3=t3, m7=t6, m0=t7, m2=out6, m1=out7
+
+    ; unfortunately, the code below overflows in some cases
+%if 0; cpuflag(ssse3)
+    SUMSUB_BA                w,  3,  4,  2
+    SUMSUB_BA                w,  0,  7,  2
+    pmulhrsw                m3, W_11585x2_REG
+    pmulhrsw                m7, W_11585x2_REG
+    pmulhrsw                m4, W_11585x2_REG               ; out4
+    pmulhrsw                m0, W_11585x2_REG               ; out2
+%else
+    SCRATCH                  5,  9, blockq+16*1
+    VP9_UNPACK_MULSUB_2W_4X  4, 3, 11585, 11585, D_8192_REG, 2, 5
+    VP9_UNPACK_MULSUB_2W_4X  7, 0, 11585, 11585, D_8192_REG, 2, 5
+    UNSCRATCH                5,  9, blockq+16*1
+%endif
+    PSIGNW                  m3, W_M1_REG                    ; out3
+    PSIGNW                  m7, W_M1_REG                    ; out5
+
+    ; m6=out0, m5=out1, m0=out2, m3=out3, m4=out4, m7=out5, m2=out6, m1=out7
+
+%if ARCH_X86_64
+    SWAP                     2, 8
+%endif
+    SWAP                     0, 6, 2
+    SWAP                     7, 1, 5
+%endmacro
+
+%macro IADST8_FN 6
+INIT_XMM %5
+cglobal vp9_%1_%3_8x8_add, 3, 3, %6, dst, stride, block, eob
+
+%ifidn %1, idct
+%define first_is_idct 1
+%else
+%define first_is_idct 0
+%endif
+
+%ifidn %3, idct
+%define second_is_idct 1
+%else
+%define second_is_idct 0
+%endif
+
+%if ARCH_X86_64
+    mova                m0, [blockq+  0]    ; IN(0)
+%endif
+    mova                m1, [blockq+ 16]    ; IN(1)
+    mova                m2, [blockq+ 32]    ; IN(2)
+%if ARCH_X86_64 || first_is_idct
+    mova                m3, [blockq+ 48]    ; IN(3)
+%endif
+%if ARCH_X86_64
+    mova                m4, [blockq+ 64]    ; IN(4)
+%endif
+    mova                m5, [blockq+ 80]    ; IN(5)
+    mova                m6, [blockq+ 96]    ; IN(6)
+%if ARCH_X86_64 || first_is_idct
+    mova                m7, [blockq+112]    ; IN(7)
+%endif
+%if ARCH_X86_64
+%if cpuflag(ssse3)
+    mova               m15, [pw_11585x2]    ; often used
+%endif
+    mova               m13, [pd_8192]       ; rounding
+    mova               m14, [pw_m1]
+%define W_11585x2_REG m15
+%define D_8192_REG m13
+%define W_M1_REG m14
+%else
+%define W_11585x2_REG [pw_11585x2]
+%define D_8192_REG [pd_8192]
+%define W_M1_REG [pw_m1]
+%endif
+
+    ; note different calling conventions for idct8 vs. iadst8 on x86-32
+    VP9_%2_1D
+%if ARCH_X86_64
+    TRANSPOSE8x8W  0, 1, 2, 3, 4, 5, 6, 7, 8
+%else
+    TRANSPOSE8x8W  0, 1, 2, 3, 4, 5, 6, 7, [blockq+0], [blockq+64], 1
+    mova      [blockq+  0], m0
+%if second_is_idct == 0
+    mova      [blockq+ 48], m3
+    mova      [blockq+112], m7
+%endif
+%endif
+    VP9_%4_1D
+
+%if ARCH_X86_64
+    SWAP                 6, 8
+%endif
+    pxor                m6, m6  ; used for the block reset, and VP9_STORE_2X
+    VP9_IDCT8_WRITEOUT
+    ZERO_BLOCK      blockq, 16, 8, m6
+    RET
+
+%undef W_11585x2_REG
+%undef first_is_idct
+%undef second_is_idct
+
+%endmacro
+
+IADST8_FN idct,  IDCT8,  iadst, IADST8, sse2, 15
+IADST8_FN iadst, IADST8, idct,  IDCT8,  sse2, 15
+IADST8_FN iadst, IADST8, iadst, IADST8, sse2, 15
+IADST8_FN idct,  IDCT8,  iadst, IADST8, ssse3, 16
+IADST8_FN idct,  IDCT8,  iadst, IADST8, avx, 16
+IADST8_FN iadst, IADST8, idct,  IDCT8,  ssse3, 16
+IADST8_FN iadst, IADST8, idct,  IDCT8,  avx, 16
+IADST8_FN iadst, IADST8, iadst, IADST8, ssse3, 16
+IADST8_FN iadst, IADST8, iadst, IADST8, avx, 16
+
+;---------------------------------------------------------------------------------------------
+; void vp9_idct_idct_16x16_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;---------------------------------------------------------------------------------------------
+
+; x86-64:
+; at the end of this macro, m7 is stored in [%4+15*%5]
+; everything else (t0-6 and t8-15) is stored in m0-6 and m8-15
+; the following sumsubs have not been done yet:
+;    SUMSUB_BA            w,  6,  9, 15      ; t6, t9
+;    SUMSUB_BA            w,  7,  8, 15      ; t7, t8
+; or (x86-32) t0-t5 are in m0-m5, t10-t15 are in x11/9/7/5/3/1,
+; and the following simsubs have not been done yet:
+;    SUMSUB_BA            w, x13, x14, 7       ; t6, t9
+;    SUMSUB_BA            w, x15, x12, 7       ; t7, t8
+
+%macro VP9_IDCT16_1D_START 6 ; src, nnzc, stride, scratch, scratch_stride, is_iadst
+%if %2 <= 4
+    mova                m3, [%1+ 1*%3]      ; IN(1)
+    mova                m0, [%1+ 3*%3]      ; IN(3)
+
+    pmulhrsw            m4, m3,  [pw_16305x2]       ; t14-15
+    pmulhrsw            m3, [pw_1606x2]             ; t8-9
+    pmulhrsw            m7, m0,  [pw_m4756x2]       ; t10-11
+    pmulhrsw            m0, [pw_15679x2]            ; t12-13
+
+    ; m8=t0, m9=t1, m10=t2, m11=t3, m12=t4, m14=t5, m13=t6, m15=t7
+    ; m3=t8, m5=t9, m1=t10, m7=t11, m0=t12, m6=t13, m2=t14, m4=t15
+
+    VP9_UNPACK_MULSUB_2W_4X 2, 5, 4, 3, 15137,  6270, [pd_8192], 1, 6 ; t9,  t14
+    SCRATCH              4, 10, %4+ 1*%5
+    SCRATCH              5, 11, %4+ 7*%5
+    VP9_UNPACK_MULSUB_2W_4X 6, 1, 0, 7, 6270, m15137, [pd_8192], 4, 5 ; t10, t13
+    UNSCRATCH            5, 11, %4+ 7*%5
+
+    ; m15=t0, m14=t1, m13=t2, m12=t3, m11=t4, m10=t5, m9=t6, m8=t7
+    ; m7=t8, m6=t9, m2=t10, m3=t11, m4=t12, m5=t13, m1=t14, m0=t15
+%else
+    mova                m5, [%1+ 1*%3]      ; IN(1)
+    mova                m4, [%1+ 7*%3]      ; IN(7)
+%if %2 <= 8
+    pmulhrsw            m2, m5,  [pw_16305x2]       ; t15
+    pmulhrsw            m5, [pw_1606x2]             ; t8
+    pmulhrsw            m3, m4,  [pw_m10394x2]      ; t9
+    pmulhrsw            m4, [pw_12665x2]            ; t14
+%else
+    mova                m3, [%1+ 9*%3]      ; IN(9)
+    mova                m2, [%1+15*%3]      ; IN(15)
+
+    ; m10=in0, m5=in1, m14=in2, m6=in3, m9=in4, m7=in5, m15=in6, m4=in7
+    ; m11=in8, m3=in9, m12=in10 m0=in11, m8=in12, m1=in13, m13=in14, m2=in15
+
+    VP9_UNPACK_MULSUB_2W_4X   5,   2, 16305,  1606, [pd_8192], 0, 1 ; t8,  t15
+    VP9_UNPACK_MULSUB_2W_4X   3,   4, 10394, 12665, [pd_8192], 0, 1 ; t9,  t14
+%endif
+
+    SUMSUB_BA            w,  3,  5, 0       ; t8,  t9
+    SUMSUB_BA            w,  4,  2, 0       ; t15, t14
+
+    VP9_UNPACK_MULSUB_2W_4X   2,   5, 15137,  6270, [pd_8192], 0, 1 ; t9,  t14
+
+    SCRATCH              4, 10, %4+ 1*%5
+    SCRATCH              5, 11, %4+ 7*%5
+
+    mova                m6, [%1+ 3*%3]      ; IN(3)
+    mova                m7, [%1+ 5*%3]      ; IN(5)
+%if %2 <= 8
+    pmulhrsw            m0, m7,  [pw_14449x2]       ; t13
+    pmulhrsw            m7, [pw_7723x2]             ; t10
+    pmulhrsw            m1, m6,  [pw_m4756x2]       ; t11
+    pmulhrsw            m6, [pw_15679x2]            ; t12
+%else
+    mova                m0, [%1+11*%3]      ; IN(11)
+    mova                m1, [%1+13*%3]      ; IN(13)
+
+    VP9_UNPACK_MULSUB_2W_4X   7,   0, 14449,  7723, [pd_8192], 4, 5 ; t10, t13
+    VP9_UNPACK_MULSUB_2W_4X   1,   6,  4756, 15679, [pd_8192], 4, 5 ; t11, t12
+%endif
+
+    ; m11=t0, m10=t1, m9=t2, m8=t3, m14=t4, m12=t5, m15=t6, m13=t7
+    ; m5=t8, m3=t9, m7=t10, m1=t11, m6=t12, m0=t13, m4=t14, m2=t15
+
+    SUMSUB_BA            w,  7,  1, 4       ; t11, t10
+    SUMSUB_BA            w,  0,  6, 4       ; t12, t13
+
+    ; m8=t0, m9=t1, m10=t2, m11=t3, m12=t4, m14=t5, m13=t6, m15=t7
+    ; m3=t8, m5=t9, m1=t10, m7=t11, m0=t12, m6=t13, m2=t14, m4=t15
+
+    VP9_UNPACK_MULSUB_2W_4X   6,   1, 6270, m15137, [pd_8192], 4, 5 ; t10, t13
+
+    UNSCRATCH            5, 11, %4+ 7*%5
+%endif
+
+    ; m8=t0, m9=t1, m10=t2, m11=t3, m12=t4, m13=t5, m14=t6, m15=t7
+    ; m3=t8, m2=t9, m6=t10, m7=t11, m0=t12, m1=t13, m5=t14, m4=t15
+
+    SUMSUB_BA            w,  7,  3, 4       ; t8,  t11
+
+    ; backup first register
+    mova        [%4+15*%5], m7
+
+    SUMSUB_BA            w,  6,  2, 7       ; t9,  t10
+    UNSCRATCH            4, 10, %4+ 1*%5
+    SUMSUB_BA            w,  0,  4, 7       ; t15, t12
+    SUMSUB_BA            w,  1,  5, 7       ; t14. t13
+
+    ; m15=t0, m14=t1, m13=t2, m12=t3, m11=t4, m10=t5, m9=t6, m8=t7
+    ; m7=t8, m6=t9, m2=t10, m3=t11, m4=t12, m5=t13, m1=t14, m0=t15
+
+%if cpuflag(ssse3) && %6 == 0
+    SUMSUB_BA            w,  2,  5, 7
+    SUMSUB_BA            w,  3,  4, 7
+    pmulhrsw            m5, [pw_11585x2]    ; t10
+    pmulhrsw            m4, [pw_11585x2]    ; t11
+    pmulhrsw            m3, [pw_11585x2]    ; t12
+    pmulhrsw            m2, [pw_11585x2]    ; t13
+%else
+    SCRATCH              6, 10, %4+ 1*%5
+    VP9_UNPACK_MULSUB_2W_4X   5,   2, 11585, 11585, [pd_8192], 6, 7 ; t10, t13
+    VP9_UNPACK_MULSUB_2W_4X   4,   3, 11585, 11585, [pd_8192], 6, 7 ; t11, t12
+    UNSCRATCH            6, 10, %4+ 1*%5
+%endif
+
+    ; m15=t0, m14=t1, m13=t2, m12=t3, m11=t4, m10=t5, m9=t6, m8=t7
+    ; m7=t8, m6=t9, m5=t10, m4=t11, m3=t12, m2=t13, m1=t14, m0=t15
+
+    SCRATCH              0,  8, %4+ 1*%5
+    SCRATCH              1,  9, %4+ 3*%5
+    SCRATCH              2, 10, %4+ 5*%5
+    SCRATCH              3, 11, %4+ 7*%5
+    SCRATCH              4, 12, %4+ 9*%5
+    SCRATCH              5, 13, %4+11*%5
+    SCRATCH              6, 14, %4+13*%5
+
+    ; even (tx8x8)
+%if %2 <= 4
+    mova                m3, [%1+ 0*%3]      ; IN(0)
+    mova                m4, [%1+ 2*%3]      ; IN(2)
+
+    pmulhrsw            m3, [pw_11585x2]    ; t0-t3
+    pmulhrsw            m7, m4, [pw_16069x2]        ; t6-7
+    pmulhrsw            m4, [pw_3196x2]             ; t4-5
+
+%if 0 ; overflows :(
+    paddw               m6, m7, m4
+    psubw               m5, m7, m4
+    pmulhrsw            m5, [pw_11585x2]            ; t5
+    pmulhrsw            m6, [pw_11585x2]            ; t6
+%else
+    VP9_UNPACK_MULSUB_2W_4X  5, 6, 7, 4, 11585, 11585, [pd_8192], 0, 1 ; t5,  t6
+%endif
+
+    psubw               m0, m3, m7
+    paddw               m7, m3
+    psubw               m1, m3, m6
+    paddw               m6, m3
+    psubw               m2, m3, m5
+    paddw               m5, m3
+
+%if ARCH_X86_32
+    SWAP                 0, 7
+%endif
+    SCRATCH              7, 15, %4+12*%5
+%else
+    mova                m6, [%1+ 2*%3]      ; IN(2)
+    mova                m1, [%1+ 4*%3]      ; IN(4)
+    mova                m7, [%1+ 6*%3]      ; IN(6)
+%if %2 <= 8
+    pmulhrsw            m0, m1,  [pw_15137x2]       ; t3
+    pmulhrsw            m1, [pw_6270x2]             ; t2
+    pmulhrsw            m5, m6, [pw_16069x2]        ; t7
+    pmulhrsw            m6, [pw_3196x2]             ; t4
+    pmulhrsw            m4, m7, [pw_m9102x2]        ; t5
+    pmulhrsw            m7, [pw_13623x2]            ; t6
+%else
+    mova                m4, [%1+10*%3]      ; IN(10)
+    mova                m0, [%1+12*%3]      ; IN(12)
+    mova                m5, [%1+14*%3]      ; IN(14)
+
+    VP9_UNPACK_MULSUB_2W_4X   1,   0, 15137,  6270, [pd_8192], 2, 3 ; t2,  t3
+    VP9_UNPACK_MULSUB_2W_4X   6,   5, 16069,  3196, [pd_8192], 2, 3 ; t4,  t7
+    VP9_UNPACK_MULSUB_2W_4X   4,   7,  9102, 13623, [pd_8192], 2, 3 ; t5,  t6
+%endif
+
+    SUMSUB_BA            w,  4,  6, 2       ; t4,  t5
+    SUMSUB_BA            w,  7,  5, 2       ; t7,  t6
+
+%if cpuflag(ssse3) && %6 == 0
+    SUMSUB_BA            w,  6,  5, 2
+    pmulhrsw            m5, [pw_11585x2]                              ; t5
+    pmulhrsw            m6, [pw_11585x2]                              ; t6
+%else
+    VP9_UNPACK_MULSUB_2W_4X  5,  6, 11585, 11585, [pd_8192], 2, 3 ; t5,  t6
+%endif
+
+    SCRATCH              5, 15, %4+10*%5
+    mova                m2, [%1+ 0*%3]      ; IN(0)
+%if %2 <= 8
+    pmulhrsw            m2, [pw_11585x2]    ; t0 and t1
+    psubw               m3, m2, m0
+    paddw               m0, m2
+
+    SUMSUB_BA            w,  7,  0, 5       ; t0,  t7
+%else
+    mova                m3, [%1+ 8*%3]      ; IN(8)
+
+    ; from 3 stages back
+%if cpuflag(ssse3) && %6 == 0
+    SUMSUB_BA            w,  3,  2, 5
+    pmulhrsw            m3, [pw_11585x2]    ; t0
+    pmulhrsw            m2, [pw_11585x2]    ; t1
+%else
+    mova        [%1+ 0*%3], m0
+    VP9_UNPACK_MULSUB_2W_4X  2,  3, 11585,  11585, [pd_8192], 5, 0 ; t0, t1
+    mova                m0, [%1+ 0*%3]
+%endif
+
+    ; from 2 stages back
+    SUMSUB_BA            w,  0,  3, 5      ; t0,  t3
+
+    SUMSUB_BA            w,  7,  0, 5      ; t0,  t7
+%endif
+    UNSCRATCH            5, 15, %4+10*%5
+%if ARCH_X86_32
+    SWAP                 0, 7
+%endif
+    SCRATCH              7, 15, %4+12*%5
+    SUMSUB_BA            w,  1,  2, 7       ; t1,  t2
+
+    ; from 1 stage back
+    SUMSUB_BA            w,  6,  1, 7       ; t1,  t6
+    SUMSUB_BA            w,  5,  2, 7       ; t2,  t5
+%endif
+    SUMSUB_BA            w,  4,  3, 7       ; t3,  t4
+
+%if ARCH_X86_64
+    SWAP                 0, 8
+    SWAP                 1, 9
+    SWAP                 2, 10
+    SWAP                 3, 11
+    SWAP                 4, 12
+    SWAP                 5, 13
+    SWAP                 6, 14
+
+    SUMSUB_BA            w,  0, 15, 7       ; t0, t15
+    SUMSUB_BA            w,  1, 14, 7       ; t1, t14
+    SUMSUB_BA            w,  2, 13, 7       ; t2, t13
+    SUMSUB_BA            w,  3, 12, 7       ; t3, t12
+    SUMSUB_BA            w,  4, 11, 7       ; t4, t11
+    SUMSUB_BA            w,  5, 10, 7       ; t5, t10
+%else
+    SWAP                 1, 6
+    SWAP                 2, 5
+    SWAP                 3, 4
+    mova        [%4+14*%5], m6
+
+%macro %%SUMSUB_BA_STORE 5 ; reg, from_mem, to_mem, scratch, scratch_stride
+    mova                m6, [%4+%2*%5]
+    SUMSUB_BA            w,  6, %1, 7
+    SWAP                %1, 6
+    mova        [%4+%3*%5], m6
+%endmacro
+
+    %%SUMSUB_BA_STORE    0,  1,  1, %4, %5  ; t0, t15
+    %%SUMSUB_BA_STORE    1,  3,  3, %4, %5  ; t1, t14
+    %%SUMSUB_BA_STORE    2,  5,  5, %4, %5  ; t2, t13
+    %%SUMSUB_BA_STORE    3,  7,  7, %4, %5  ; t3, t12
+    %%SUMSUB_BA_STORE    4,  9,  9, %4, %5  ; t4, t11
+    %%SUMSUB_BA_STORE    5, 11, 11, %4, %5  ; t5, t10
+%endif
+%endmacro
+
+%macro VP9_IDCT16_1D 2-4 16, 1 ; src, pass, nnzc, is_iadst
+%if %2 == 1
+    VP9_IDCT16_1D_START %1, %3, 32, tmpq, 16, %4
+
+%if ARCH_X86_64
+    ; backup a different register
+    mova                m7, [tmpq+15*16]
+    mova      [tmpq+ 1*16], m15
+
+    SUMSUB_BA            w,  6,  9, 15      ; t6, t9
+    SUMSUB_BA            w,  7,  8, 15      ; t7, t8
+
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, 15
+    mova        [tmpq+  0], m0
+    mova        [tmpq+ 32], m1
+    mova        [tmpq+ 64], m2
+    mova        [tmpq+ 96], m3
+    mova        [tmpq+128], m4
+    mova        [tmpq+160], m5
+    mova        [tmpq+192], m6
+    mova        [tmpq+224], m7
+
+    mova               m15, [tmpq+ 1*16]
+    TRANSPOSE8x8W        8, 9, 10, 11, 12, 13, 14, 15, 0
+    mova        [tmpq+ 16], m8
+    mova        [tmpq+ 48], m9
+    mova        [tmpq+ 80], m10
+    mova        [tmpq+112], m11
+    mova        [tmpq+144], m12
+    mova        [tmpq+176], m13
+    mova        [tmpq+208], m14
+    mova        [tmpq+240], m15
+%else
+    mova                m6, [tmpq+13*16]
+    mova                m7, [tmpq+14*16]
+    SUMSUB_BA            w, 6, 7                ; t6, t9
+    mova      [tmpq+14*16], m6
+    mova      [tmpq+13*16], m7
+    mova                m7, [tmpq+15*16]
+    mova                m6, [tmpq+12*16]
+    SUMSUB_BA            w, 7, 6                ; t7, t8
+    mova      [tmpq+15*16], m6
+
+    TRANSPOSE8x8W       0, 1, 2, 3, 4, 5, 6, 7, [tmpq+14*16], [tmpq+ 8*16], 1
+    mova     [tmpq+ 0*16], m0
+    mova     [tmpq+ 2*16], m1
+    mova     [tmpq+ 4*16], m2
+    mova     [tmpq+ 6*16], m3
+    mova     [tmpq+10*16], m5
+    mova     [tmpq+12*16], m6
+    mova     [tmpq+14*16], m7
+
+    mova                m0, [tmpq+15*16]
+    mova                m1, [tmpq+13*16]
+    mova                m2, [tmpq+11*16]
+    mova                m3, [tmpq+ 9*16]
+    mova                m4, [tmpq+ 7*16]
+    mova                m5, [tmpq+ 5*16]
+    mova                m7, [tmpq+ 1*16]
+    TRANSPOSE8x8W       0, 1, 2, 3, 4, 5, 6, 7, [tmpq+ 3*16], [tmpq+ 9*16], 1
+    mova     [tmpq+ 1*16], m0
+    mova     [tmpq+ 3*16], m1
+    mova     [tmpq+ 5*16], m2
+    mova     [tmpq+ 7*16], m3
+    mova     [tmpq+11*16], m5
+    mova     [tmpq+13*16], m6
+    mova     [tmpq+15*16], m7
+%endif
+%else ; %2 == 2
+    VP9_IDCT16_1D_START %1, %3, 32, %1, 32, %4
+
+%if cpuflag(ssse3)
+%define ROUND_REG [pw_512]
+%else
+%define ROUND_REG [pw_32]
+%endif
+
+    pxor                m7, m7
+%if ARCH_X86_64
+    ; backup more registers
+    mova        [%1+ 2*32], m8
+    mova        [%1+ 3*32], m9
+
+    VP9_IDCT8_WRITEx2    0,  1, 8, 9, 7, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2    2,  3, 8, 9, 7, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2    4,  5, 8, 9, 7, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+
+    ; restore from cache
+    SWAP                 0, 7               ; move zero from m7 to m0
+    mova                m7, [%1+15*32]
+    mova                m8, [%1+ 2*32]
+    mova                m9, [%1+ 3*32]
+
+    SUMSUB_BA            w,  6,  9, 3       ; t6, t9
+    SUMSUB_BA            w,  7,  8, 3       ; t7, t8
+
+    VP9_IDCT8_WRITEx2    6,  7, 3, 4, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2    8,  9, 3, 4, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2   10, 11, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2   12, 13, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2   14, 15, 1, 2, 0, ROUND_REG, 6
+%else
+    mova      [tmpq+ 0*32], m5
+
+    VP9_IDCT8_WRITEx2    0,  1, 5, 6, 7, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2    2,  3, 5, 6, 7, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+
+    SWAP                 0, 7               ; move zero from m7 to m0
+    mova                m5, [tmpq+ 0*32]
+
+    VP9_IDCT8_WRITEx2    4,  5, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+
+    mova                m4, [tmpq+13*32]
+    mova                m7, [tmpq+14*32]
+    mova                m5, [tmpq+15*32]
+    mova                m6, [tmpq+12*32]
+    SUMSUB_BADC w, 4, 7, 5, 6, 1
+
+    VP9_IDCT8_WRITEx2    4,  5, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2    6,  7, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+
+    mova                m4, [tmpq+11*32]
+    mova                m5, [tmpq+ 9*32]
+    mova                m6, [tmpq+ 7*32]
+    mova                m7, [tmpq+ 5*32]
+
+    VP9_IDCT8_WRITEx2    4,  5, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2    6,  7, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+
+    mova                m4, [tmpq+ 3*32]
+    mova                m5, [tmpq+ 1*32]
+
+    VP9_IDCT8_WRITEx2    4,  5, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+%endif
+
+%undef ROUND_REG
+%endif ; %2 == 1/2
+%endmacro
+
+%macro VP9_STORE_2XFULL 6-7 strideq; dc, tmp1, tmp2, tmp3, tmp4, zero, stride
+    mova               m%3, [dstq]
+    mova               m%5, [dstq+%7]
+    punpcklbw          m%2, m%3, m%6
+    punpckhbw          m%3, m%6
+    punpcklbw          m%4, m%5, m%6
+    punpckhbw          m%5, m%6
+    paddw              m%2, m%1
+    paddw              m%3, m%1
+    paddw              m%4, m%1
+    paddw              m%5, m%1
+    packuswb           m%2, m%3
+    packuswb           m%4, m%5
+    mova            [dstq], m%2
+    mova         [dstq+%7], m%4
+%endmacro
+
+%macro VP9_IDCT_IDCT_16x16_ADD_XMM 1
+INIT_XMM %1
+cglobal vp9_idct_idct_16x16_add, 4, 6, 16, 512, dst, stride, block, eob
+%if cpuflag(ssse3)
+    ; 2x2=eob=3, 4x4=eob=10
+    cmp eobd, 38
+    jg .idctfull
+    cmp eobd, 1 ; faster path for when only DC is set
+    jne .idct8x8
+%else
+    cmp eobd, 1 ; faster path for when only DC is set
+    jg .idctfull
+%endif
+
+    ; dc-only
+%if cpuflag(ssse3)
+    movd                m0, [blockq]
+    mova                m1, [pw_11585x2]
+    pmulhrsw            m0, m1
+    pmulhrsw            m0, m1
+%else
+    DEFINE_ARGS dst, stride, block, coef
+    movsx            coefd, word [blockq]
+    imul             coefd, 11585
+    add              coefd, 8192
+    sar              coefd, 14
+    imul             coefd, 11585
+    add              coefd, (32 << 14) + 8192
+    sar              coefd, 14 + 6
+    movd                m0, coefd
+%endif
+    SPLATW              m0, m0, q0000
+%if cpuflag(ssse3)
+    pmulhrsw            m0, [pw_512]
+%endif
+    pxor                m5, m5
+    movd          [blockq], m5
+%rep 7
+    VP9_STORE_2XFULL    0, 1, 2, 3, 4, 5
+    lea               dstq, [dstq+2*strideq]
+%endrep
+    VP9_STORE_2XFULL    0, 1, 2, 3, 4, 5
+    RET
+
+    DEFINE_ARGS dst, stride, block, cnt, dst_bak, tmp
+%if cpuflag(ssse3)
+.idct8x8:
+    mov               tmpq, rsp
+    VP9_IDCT16_1D   blockq, 1, 8, 0
+
+    mov               cntd, 2
+    mov           dst_bakq, dstq
+.loop2_8x8:
+    VP9_IDCT16_1D     tmpq, 2, 8, 0
+    lea               dstq, [dst_bakq+8]
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_8x8
+
+    ; at the end of the loop, m0 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 32, 8, m0
+    RET
+%endif
+
+.idctfull:
+    mov               cntd, 2
+    mov               tmpq, rsp
+.loop1_full:
+    VP9_IDCT16_1D   blockq, 1, 16, 0
+    add             blockq, 16
+    add               tmpq, 256
+    dec               cntd
+    jg .loop1_full
+    sub             blockq, 32
+
+    mov               cntd, 2
+    mov               tmpq, rsp
+    mov           dst_bakq, dstq
+.loop2_full:
+    VP9_IDCT16_1D     tmpq, 2, 16, 0
+    lea               dstq, [dst_bakq+8]
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_full
+
+    ; at the end of the loop, m0 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 32, 16, m0
+    RET
+%endmacro
+
+VP9_IDCT_IDCT_16x16_ADD_XMM sse2
+VP9_IDCT_IDCT_16x16_ADD_XMM ssse3
+VP9_IDCT_IDCT_16x16_ADD_XMM avx
+
+%macro VP9_IDCT16_YMM_1D 0
+    VP9_UNPACK_MULSUB_2W_4X  1,  15, 16305,  1606, [pd_8192], 0, 4 ; t8,  t15
+    VP9_UNPACK_MULSUB_2W_4X  9,   7, 10394, 12665, [pd_8192], 0, 4 ; t9,  t14
+
+    SUMSUB_BA            w,  9,   1, 0      ; t8,  t9
+    SUMSUB_BA            w,  7,  15, 0      ; t15, t14
+
+    VP9_UNPACK_MULSUB_2W_4X 15,   1, 15137,  6270, [pd_8192], 0, 4 ; t9,  t14
+
+    VP9_UNPACK_MULSUB_2W_4X  5,  11, 14449,  7723, [pd_8192], 0, 4 ; t10, t13
+    VP9_UNPACK_MULSUB_2W_4X 13,   3,  4756, 15679, [pd_8192], 0, 4 ; t11, t12
+
+    SUMSUB_BA            w,  5,  13, 0      ; t11, t10
+    SUMSUB_BA            w, 11,   3, 0      ; t12, t13
+
+    VP9_UNPACK_MULSUB_2W_4X  3,  13, 6270, m15137, [pd_8192], 0, 4 ; t10, t13
+
+    SUMSUB_BA            w,  5,   9, 0      ; t8,  t11
+    SUMSUB_BA            w,  3,  15, 0      ; t9,  t10
+    SUMSUB_BA            w, 11,   7, 0      ; t15, t12
+    SUMSUB_BA            w, 13,   1, 0      ; t14, t13
+
+    SUMSUB_BA            w, 15,   1, 0
+    SUMSUB_BA            w,  9,   7, 0
+    pmulhrsw            m1, [pw_11585x2]    ; t10
+    pmulhrsw            m7, [pw_11585x2]    ; t11
+    pmulhrsw            m9, [pw_11585x2]    ; t12
+    pmulhrsw           m15, [pw_11585x2]    ; t13
+
+    ; even (tx8x8)
+    mova                m4, [blockq+128]
+    mova      [blockq+128], m5
+    VP9_UNPACK_MULSUB_2W_4X   4,  12, 15137,  6270, [pd_8192], 0, 5 ; t2,  t3
+    VP9_UNPACK_MULSUB_2W_4X   2,  14, 16069,  3196, [pd_8192], 0, 5 ; t4,  t7
+    VP9_UNPACK_MULSUB_2W_4X  10,   6,  9102, 13623, [pd_8192], 0, 5 ; t5,  t6
+    mova                m0, [blockq+  0]
+    SUMSUB_BA            w,   8,   0, 5
+    pmulhrsw            m8, [pw_11585x2]    ; t0
+    pmulhrsw            m0, [pw_11585x2]    ; t1
+
+    SUMSUB_BA            w,  10,   2, 5     ; t4,  t5
+    SUMSUB_BA            w,   6,  14, 5     ; t7,  t6
+    SUMSUB_BA            w,  12,   8, 5     ; t0,  t3
+    SUMSUB_BA            w,   4,   0, 5     ; t1,  t2
+
+    SUMSUB_BA            w,   2,  14, 5
+    pmulhrsw           m14, [pw_11585x2]    ; t5
+    pmulhrsw            m2, [pw_11585x2]    ; t6
+
+    SUMSUB_BA            w,   6,  12, 5     ; t0,  t7
+    SUMSUB_BA            w,   2,   4, 5     ; t1,  t6
+    SUMSUB_BA            w,  14,   0, 5     ; t2,  t5
+    SUMSUB_BA            w,  10,   8, 5     ; t3,  t4
+
+    ; final stage
+    SUMSUB_BA            w, 11,  6,  5      ; out0, out15
+    SUMSUB_BA            w, 13,  2,  5      ; out1, out14
+    SUMSUB_BA            w, 15, 14,  5      ; out2, out13
+    SUMSUB_BA            w,  9, 10,  5      ; out3, out12
+    SUMSUB_BA            w,  7,  8,  5      ; out4, out11
+    SUMSUB_BA            w,  1,  0,  5      ; out5, out10
+    SUMSUB_BA            w,  3,  4,  5      ; out6, out9
+    mova                m5, [blockq+128]
+    mova      [blockq+192], m3
+    SUMSUB_BA            w,  5, 12,  3      ; out7, out8
+
+    SWAP  0, 11,  8, 12, 10
+    SWAP  1, 13, 14,  2, 15,  6,  3,  9,  4,  7,  5
+%endmacro
+
+; this is almost identical to VP9_STORE_2X, but it does two rows
+; for slightly improved interleaving, and it omits vpermq since the
+; input is DC so all values are identical
+%macro VP9_STORE_YMM_DC_4X 6 ; reg, tmp1, tmp2, tmp3, tmp4, zero
+    mova              xm%2, [dstq]
+    mova              xm%4, [dstq+strideq*2]
+    vinserti128        m%2, m%2, [dstq+strideq], 1
+    vinserti128        m%4, m%4, [dstq+stride3q], 1
+    punpckhbw          m%3, m%2, m%6
+    punpcklbw          m%2, m%6
+    punpckhbw          m%5, m%4, m%6
+    punpcklbw          m%4, m%6
+    paddw              m%3, m%1
+    paddw              m%2, m%1
+    paddw              m%5, m%1
+    paddw              m%4, m%1
+    packuswb           m%2, m%3
+    packuswb           m%4, m%5
+    mova            [dstq], xm%2
+    mova        [dstq+strideq*2], xm%4
+    vextracti128  [dstq+strideq], m%2, 1
+    vextracti128 [dstq+stride3q], m%4, 1
+%endmacro
+
+%if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_idct_idct_16x16_add, 4, 4, 16, dst, stride, block, eob
+    cmp eobd, 1 ; faster path for when only DC is set
+    jg .idctfull
+
+    ; dc-only
+    mova                m1, [pw_11585x2]
+    vpbroadcastw        m0, [blockq]
+    pmulhrsw            m0, m1
+    pmulhrsw            m0, m1
+    pxor                m5, m5
+    pmulhrsw            m0, [pw_512]
+    movd          [blockq], xm5
+
+    DEFINE_ARGS dst, stride, stride3, cnt
+    mov               cntd, 4
+    lea           stride3q, [strideq*3]
+.loop_dc:
+    VP9_STORE_YMM_DC_4X  0, 1, 2, 3, 4, 5
+    lea               dstq, [dstq+4*strideq]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+    DEFINE_ARGS dst, stride, block, eob
+.idctfull:
+    mova                m1, [blockq+ 32]
+    mova                m2, [blockq+ 64]
+    mova                m3, [blockq+ 96]
+    mova                m5, [blockq+160]
+    mova                m6, [blockq+192]
+    mova                m7, [blockq+224]
+    mova                m8, [blockq+256]
+    mova                m9, [blockq+288]
+    mova               m10, [blockq+320]
+    mova               m11, [blockq+352]
+    mova               m12, [blockq+384]
+    mova               m13, [blockq+416]
+    mova               m14, [blockq+448]
+    mova               m15, [blockq+480]
+
+    VP9_IDCT16_YMM_1D
+    TRANSPOSE16x16W      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \
+                         [blockq+192], [blockq+128], 1
+    mova      [blockq+  0], m0
+    VP9_IDCT16_YMM_1D
+
+    mova      [blockq+224], m7
+
+    ; store
+    VP9_IDCT8_WRITEx2    0,  1, 6, 7, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2    2,  3, 6, 7, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2    4,  5, 6, 7, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    mova                m6, [blockq+192]
+    mova                m7, [blockq+224]
+    VP9_IDCT8_WRITEx2    6,  7, 1, 2, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2    8,  9, 1, 2, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2   10, 11, 1, 2, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2   12, 13, 1, 2, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2   14, 15, 1, 2, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+
+    ; at the end of the loop, m0 should still be zero
+    ; use that to zero out block coefficients
+    pxor                m0, m0
+    ZERO_BLOCK      blockq, 32, 16, m0
+    RET
+%endif
+
+;---------------------------------------------------------------------------------------------
+; void vp9_iadst_iadst_16x16_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;---------------------------------------------------------------------------------------------
+
+%macro VP9_IADST16_1D 2 ; src, pass
+%assign %%str 16*%2
+    mova                m0, [%1+ 0*32]  ; in0
+    mova                m1, [%1+15*32]  ; in15
+    mova                m2, [%1+ 7*32]  ; in7
+    mova                m3, [%1+ 8*32]  ; in8
+
+    VP9_UNPACK_MULSUB_2D_4X  1,  0,  4,  5, 16364,   804    ; m1/4=t1[d], m0/5=t0[d]
+    VP9_UNPACK_MULSUB_2D_4X  2,  3,  7,  6, 11003, 12140    ; m2/7=t9[d], m3/6=t8[d]
+    SCRATCH              4, 8, tmpq+ 0*%%str
+    VP9_RND_SH_SUMSUB_BA     3,  0,  6,  5,  4, [pd_8192]   ; m3=t0[w], m0=t8[w]
+    UNSCRATCH            4, 8, tmpq+ 0*%%str
+    VP9_RND_SH_SUMSUB_BA     2,  1,  7,  4,  5, [pd_8192]   ; m2=t1[w], m1=t9[w]
+
+    SCRATCH              0, 10, tmpq+ 0*%%str
+    SCRATCH              1, 11, tmpq+15*%%str
+    mova   [tmpq+ 7*%%str], m2
+    mova   [tmpq+ 8*%%str], m3
+
+    mova                m1, [%1+ 2*32]  ; in2
+    mova                m0, [%1+13*32]  ; in13
+    mova                m3, [%1+ 5*32]  ; in5
+    mova                m2, [%1+10*32]  ; in10
+
+    VP9_UNPACK_MULSUB_2D_4X  0,  1,  6,  7, 15893,  3981    ; m0/6=t3[d], m1/7=t2[d]
+    VP9_UNPACK_MULSUB_2D_4X  3,  2,  4,  5,  8423, 14053    ; m3/4=t11[d], m2/5=t10[d]
+    SCRATCH              4, 12, tmpq+ 2*%%str
+    VP9_RND_SH_SUMSUB_BA     2,  1,  5,  7,  4, [pd_8192]   ; m2=t2[w], m1=t10[w]
+    UNSCRATCH            4, 12, tmpq+ 2*%%str
+    VP9_RND_SH_SUMSUB_BA     3,  0,  4,  6,  5, [pd_8192]   ; m3=t3[w], m0=t11[w]
+
+    SCRATCH              0, 12, tmpq+ 2*%%str
+    SCRATCH              1, 13, tmpq+13*%%str
+    mova   [tmpq+ 5*%%str], m2
+    mova   [tmpq+10*%%str], m3
+
+    mova                m2, [%1+ 4*32]  ; in4
+    mova                m3, [%1+11*32]  ; in11
+    mova                m0, [%1+ 3*32]  ; in3
+    mova                m1, [%1+12*32]  ; in12
+
+    VP9_UNPACK_MULSUB_2D_4X  3,  2,  7,  6, 14811,  7005    ; m3/7=t5[d], m2/6=t4[d]
+    VP9_UNPACK_MULSUB_2D_4X  0,  1,  4,  5,  5520, 15426    ; m0/4=t13[d], m1/5=t12[d]
+    SCRATCH              4, 9, tmpq+ 4*%%str
+    VP9_RND_SH_SUMSUB_BA     1,  2,  5,  6,  4, [pd_8192]   ; m1=t4[w], m2=t12[w]
+    UNSCRATCH            4, 9, tmpq+ 4*%%str
+    VP9_RND_SH_SUMSUB_BA     0,  3,  4,  7,  6, [pd_8192]   ; m0=t5[w], m3=t13[w]
+
+    SCRATCH              0,  8, tmpq+ 4*%%str
+    mova   [tmpq+11*%%str], m1          ; t4:m1->r11
+    UNSCRATCH            0, 10, tmpq+ 0*%%str
+    UNSCRATCH            1, 11, tmpq+15*%%str
+
+    ; round 2 interleaved part 1
+    VP9_UNPACK_MULSUB_2D_4X  0,  1,  6,  7, 16069,  3196    ; m1/7=t8[d], m0/6=t9[d]
+    VP9_UNPACK_MULSUB_2D_4X  3,  2,  5,  4,  3196, 16069    ; m3/5=t12[d], m2/4=t13[d]
+    SCRATCH              4, 9, tmpq+ 3*%%str
+    VP9_RND_SH_SUMSUB_BA     3,  1,  5,  7,  4, [pd_8192]   ; m3=t8[w], m1=t12[w]
+    UNSCRATCH            4, 9, tmpq+ 3*%%str
+    VP9_RND_SH_SUMSUB_BA     2,  0,  4,  6,  5, [pd_8192]   ; m2=t9[w], m0=t13[w]
+
+    SCRATCH              0, 10, tmpq+ 0*%%str
+    SCRATCH              1, 11, tmpq+15*%%str
+    SCRATCH              2, 14, tmpq+ 3*%%str
+    SCRATCH              3, 15, tmpq+12*%%str
+
+    mova                m2, [%1+ 6*32]  ; in6
+    mova                m3, [%1+ 9*32]  ; in9
+    mova                m0, [%1+ 1*32]  ; in1
+    mova                m1, [%1+14*32]  ; in14
+
+    VP9_UNPACK_MULSUB_2D_4X  3,  2,  7,  6, 13160,  9760    ; m3/7=t7[d], m2/6=t6[d]
+    VP9_UNPACK_MULSUB_2D_4X  0,  1,  4,  5,  2404, 16207    ; m0/4=t15[d], m1/5=t14[d]
+    SCRATCH              4, 9, tmpq+ 6*%%str
+    VP9_RND_SH_SUMSUB_BA     1,  2,  5,  6,  4, [pd_8192]   ; m1=t6[w], m2=t14[w]
+    UNSCRATCH            4, 9, tmpq+ 6*%%str
+    VP9_RND_SH_SUMSUB_BA     0,  3,  4,  7,  6, [pd_8192]   ; m0=t7[w], m3=t15[w]
+
+    ; r8=t0, r7=t1, r5=t2, r10=t3, r11=t4, m8|r4=t5, m1=t6, m0=t7
+    ; m10|r0=t8, m11|r15=t9, m13|r13=t10, m12|r2=t11, m14|r3=t12, m15|r12=t13, m2=t14, m3=t15
+
+    UNSCRATCH            4, 12, tmpq+ 2*%%str
+    UNSCRATCH            5, 13, tmpq+13*%%str
+    SCRATCH              0, 12, tmpq+ 1*%%str
+    SCRATCH              1, 13, tmpq+14*%%str
+
+    ; remainder of round 2 (rest of t8-15)
+    VP9_UNPACK_MULSUB_2D_4X  5,  4,  6,  7,  9102, 13623    ; m5/6=t11[d], m4/7=t10[d]
+    VP9_UNPACK_MULSUB_2D_4X  3,  2,  1,  0, 13623,  9102    ; m3/1=t14[d], m2/0=t15[d]
+    SCRATCH              0, 9, tmpq+ 6*%%str
+    VP9_RND_SH_SUMSUB_BA     3,  4,  1,  7,  0, [pd_8192]   ; m3=t10[w], m4=t14[w]
+    UNSCRATCH            0, 9, tmpq+ 6*%%str
+    VP9_RND_SH_SUMSUB_BA     2,  5,  0,  6,  1, [pd_8192]   ; m2=t11[w], m5=t15[w]
+
+    ; m15|r12=t8, m14|r3=t9, m3=t10, m2=t11, m11|r15=t12, m10|r0=t13, m4=t14, m5=t15
+
+    UNSCRATCH            6, 14, tmpq+ 3*%%str
+    UNSCRATCH            7, 15, tmpq+12*%%str
+
+    SUMSUB_BA                w,  3,  7,  1
+    PSIGNW                  m3, [pw_m1]                     ; m3=out1[w], m7=t10[w]
+    SUMSUB_BA                w,  2,  6,  1                  ; m2=out14[w], m6=t11[w]
+
+    ; unfortunately, the code below overflows in some cases, e.g.
+    ; http://downloads.webmproject.org/test_data/libvpx/vp90-2-14-resize-fp-tiles-16-8.webm
+%if 0; cpuflag(ssse3)
+    SUMSUB_BA                w,  7,  6,  1
+    pmulhrsw                m7, [pw_11585x2]                ; m7=out6[w]
+    pmulhrsw                m6, [pw_11585x2]                ; m6=out9[w]
+%else
+    VP9_UNPACK_MULSUB_2W_4X  6,  7, 11585, 11585, [pd_8192], 1, 0
+%endif
+
+    mova       [tmpq+ 3*%%str], m6
+    mova       [tmpq+ 6*%%str], m7
+    UNSCRATCH                6, 10, tmpq+ 0*%%str
+    UNSCRATCH                7, 11, tmpq+15*%%str
+    mova       [tmpq+13*%%str], m2
+    SCRATCH                  3, 11, tmpq+ 9*%%str
+
+    VP9_UNPACK_MULSUB_2D_4X  7,  6,  2,  3, 15137,  6270    ; m6/3=t13[d], m7/2=t12[d]
+    VP9_UNPACK_MULSUB_2D_4X  5,  4,  1,  0,  6270, 15137    ; m5/1=t14[d], m4/0=t15[d]
+    SCRATCH              0, 9, tmpq+ 2*%%str
+    VP9_RND_SH_SUMSUB_BA     5,  6,  1,  3,  0, [pd_8192]   ; m5=out2[w], m6=t14[w]
+    UNSCRATCH            0, 9, tmpq+ 2*%%str
+    VP9_RND_SH_SUMSUB_BA     4,  7,  0,  2,  1, [pd_8192]
+    PSIGNW                  m4, [pw_m1]                     ; m4=out13[w], m7=t15[w]
+
+    ; unfortunately, the code below overflows in some cases
+%if 0; cpuflag(ssse3)
+    SUMSUB_BA                w,  7,  6,  1
+    pmulhrsw                m7, [pw_m11585x2]               ; m7=out5[w]
+    pmulhrsw                m6, [pw_11585x2]                ; m6=out10[w]
+%else
+    PSIGNW                  m7, [pw_m1]
+    VP9_UNPACK_MULSUB_2W_4X  7,  6, 11585, 11585, [pd_8192], 1, 0
+%endif
+
+    ; m11|r13=out1, m5=out2, m7=out5, r15=out6, r3=out9, m6=out10, m4=out13, r2=out14
+
+    mova                    m2, [tmpq+ 8*%%str]
+    mova                    m3, [tmpq+ 7*%%str]
+    mova                    m1, [tmpq+11*%%str]
+    mova       [tmpq+ 7*%%str], m6
+    mova       [tmpq+11*%%str], m4
+    mova                    m4, [tmpq+ 5*%%str]
+    SCRATCH                  5, 14, tmpq+ 5*%%str
+    SCRATCH                  7, 15, tmpq+ 8*%%str
+    UNSCRATCH                6,  8, tmpq+ 4*%%str
+    UNSCRATCH                5, 12, tmpq+ 1*%%str
+    UNSCRATCH                7, 13, tmpq+14*%%str
+
+    ; m2=t0, m3=t1, m9=t2, m0=t3, m1=t4, m8=t5, m13=t6, m12=t7
+    ; m11|r13=out1, m5=out2, m7=out5, r15=out6, r3=out9, r10=out10, r11=out13, r2=out14
+
+    SUMSUB_BA                w,  1,  2, 0                   ; m1=t0[w], m2=t4[w]
+    mova                    m0, [tmpq+10*%%str]
+    SCRATCH                  1, 12, tmpq+ 1*%%str
+    SUMSUB_BA                w,  6,  3, 1                   ; m8=t1[w], m3=t5[w]
+    SCRATCH                  6, 13, tmpq+ 4*%%str
+    SUMSUB_BA                w,  7,  4, 1                   ; m13=t2[w], m9=t6[w]
+    SCRATCH                  7,  8, tmpq+10*%%str
+    SUMSUB_BA                w,  5,  0, 1                   ; m12=t3[w], m0=t7[w]
+    SCRATCH                  5,  9, tmpq+14*%%str
+
+    VP9_UNPACK_MULSUB_2D_4X  2,  3,  7,  5, 15137,  6270    ; m2/6=t5[d], m3/10=t4[d]
+    VP9_UNPACK_MULSUB_2D_4X  0,  4,  1,  6,  6270, 15137    ; m0/14=t6[d], m9/15=t7[d]
+    SCRATCH                  6, 10, tmpq+ 0*%%str
+    VP9_RND_SH_SUMSUB_BA     0,  3,  1,  5,  6, [pd_8192]
+    UNSCRATCH                6, 10, tmpq+ 0*%%str
+    PSIGNW                  m0, [pw_m1]                     ; m0=out3[w], m3=t6[w]
+    VP9_RND_SH_SUMSUB_BA     4,  2,  6,  7,  5, [pd_8192]   ; m9=out12[w], m2=t7[w]
+
+    UNSCRATCH                1,  8, tmpq+10*%%str
+    UNSCRATCH                5,  9, tmpq+14*%%str
+    UNSCRATCH                6, 12, tmpq+ 1*%%str
+    UNSCRATCH                7, 13, tmpq+ 4*%%str
+    SCRATCH                  4,  9, tmpq+14*%%str
+
+    SUMSUB_BA                w,  1,  6,  4                  ; m13=out0[w], m1=t2[w]
+    SUMSUB_BA                w,  5,  7,  4
+    PSIGNW                  m5, [pw_m1]                     ; m12=out15[w], m8=t3[w]
+
+    ; unfortunately, the code below overflows in some cases, e.g.
+    ; http://downloads.webmproject.org/test_data/libvpx/vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm
+%if 0 ; cpuflag(ssse3)
+    SUMSUB_BA               w,   7,  6,  4
+    pmulhrsw                m7, [pw_m11585x2]               ; m8=out7[w]
+    pmulhrsw                m6, [pw_11585x2]                ; m1=out8[w]
+    SWAP                     6,  7
+    SUMSUB_BA                w,  3,  2,  4
+    pmulhrsw                m3, [pw_11585x2]                ; m3=out4[w]
+    pmulhrsw                m2, [pw_11585x2]                ; m2=out11[w]
+%else
+    SCRATCH                  5,  8, tmpq+10*%%str
+    VP9_UNPACK_MULSUB_2W_4X  6,  7, 11585, m11585, [pd_8192],  5,  4
+    VP9_UNPACK_MULSUB_2W_4X  2,  3, 11585, 11585, [pd_8192],  5,  4
+    UNSCRATCH                5,  8, tmpq+10*%%str
+%endif
+
+    ; m13=out0, m0=out3, m3=out4, m8=out7, m1=out8, m2=out11, m9=out12, m12=out15
+    ; m11|r13=out1, m5=out2, m7=out5, r15=out6, r3=out9, r10=out10, r11=out13, r2=out14
+
+%if %2 == 1
+%if ARCH_X86_64
+    mova                   m13, [tmpq+ 6*%%str]
+    TRANSPOSE8x8W            1, 11, 14, 0, 3, 15, 13, 6, 10
+    mova          [tmpq+ 0*16], m1
+    mova          [tmpq+ 2*16], m11
+    mova          [tmpq+ 4*16], m14
+    mova          [tmpq+ 6*16], m0
+    mova                    m1, [tmpq+ 3*%%str]
+    mova                   m11, [tmpq+ 7*%%str]
+    mova                   m14, [tmpq+11*%%str]
+    mova                    m0, [tmpq+13*%%str]
+    mova          [tmpq+ 8*16], m3
+    mova          [tmpq+10*16], m15
+    mova          [tmpq+12*16], m13
+    mova          [tmpq+14*16], m6
+
+    TRANSPOSE8x8W            7, 1, 11, 2, 9, 14, 0, 5, 10
+    mova          [tmpq+ 1*16], m7
+    mova          [tmpq+ 3*16], m1
+    mova          [tmpq+ 5*16], m11
+    mova          [tmpq+ 7*16], m2
+    mova          [tmpq+ 9*16], m9
+    mova          [tmpq+11*16], m14
+    mova          [tmpq+13*16], m0
+    mova          [tmpq+15*16], m5
+%else
+    mova       [tmpq+12*%%str], m2
+    mova       [tmpq+ 1*%%str], m5
+    mova       [tmpq+15*%%str], m7
+    mova                    m2, [tmpq+ 9*%%str]
+    mova                    m5, [tmpq+ 5*%%str]
+    mova                    m7, [tmpq+ 8*%%str]
+    TRANSPOSE8x8W            1, 2, 5, 0, 3, 7, 4, 6, [tmpq+ 6*%%str], [tmpq+ 8*%%str], 1
+    mova          [tmpq+ 0*16], m1
+    mova          [tmpq+ 2*16], m2
+    mova          [tmpq+ 4*16], m5
+    mova          [tmpq+ 6*16], m0
+    mova          [tmpq+10*16], m7
+    mova                    m3, [tmpq+12*%%str]
+    mova          [tmpq+12*16], m4
+    mova                    m4, [tmpq+14*%%str]
+    mova          [tmpq+14*16], m6
+
+    mova                    m0, [tmpq+15*%%str]
+    mova                    m1, [tmpq+ 3*%%str]
+    mova                    m2, [tmpq+ 7*%%str]
+    mova                    m5, [tmpq+11*%%str]
+    mova                    m7, [tmpq+ 1*%%str]
+    TRANSPOSE8x8W            0, 1, 2, 3, 4, 5, 6, 7, [tmpq+13*%%str], [tmpq+ 9*%%str], 1
+    mova          [tmpq+ 1*16], m0
+    mova          [tmpq+ 3*16], m1
+    mova          [tmpq+ 5*16], m2
+    mova          [tmpq+ 7*16], m3
+    mova          [tmpq+11*16], m5
+    mova          [tmpq+13*16], m6
+    mova          [tmpq+15*16], m7
+%endif
+%else
+    pxor                    m4, m4
+
+%if cpuflag(ssse3)
+%define ROUND_REG [pw_512]
+%else
+%define ROUND_REG [pw_32]
+%endif
+
+%if ARCH_X86_64
+    mova                   m12, [tmpq+ 6*%%str]
+    VP9_IDCT8_WRITEx2        1, 11, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2       14,  0, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2        3, 15, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2       12,  6, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+
+    mova                    m1, [tmpq+ 3*%%str]
+    mova                   m11, [tmpq+ 7*%%str]
+    mova                   m14, [tmpq+11*%%str]
+    mova                    m0, [tmpq+13*%%str]
+
+    VP9_IDCT8_WRITEx2        7,  1, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2       11,  2, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2        9, 14, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2        0,  5, 10,  8,  4, ROUND_REG, 6
+%else
+    mova       [tmpq+ 0*%%str], m2
+    mova       [tmpq+ 1*%%str], m5
+    mova       [tmpq+ 2*%%str], m7
+    mova                    m2, [tmpq+ 9*%%str]
+    VP9_IDCT8_WRITEx2        1,  2,  5,  7,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    mova                    m5, [tmpq+ 5*%%str]
+    VP9_IDCT8_WRITEx2        5,  0,  1,  2,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    mova                    m5, [tmpq+ 8*%%str]
+    VP9_IDCT8_WRITEx2        3,  5,  1,  2,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    mova                    m5, [tmpq+ 6*%%str]
+    VP9_IDCT8_WRITEx2        5,  6,  1,  2,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+
+    mova                    m0, [tmpq+ 2*%%str]
+    mova                    m3, [tmpq+ 3*%%str]
+    VP9_IDCT8_WRITEx2        0,  3,  1,  2,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    mova                    m0, [tmpq+ 7*%%str]
+    mova                    m3, [tmpq+ 0*%%str]
+    VP9_IDCT8_WRITEx2        0,  3,  1,  2,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    mova                    m0, [tmpq+14*%%str]
+    mova                    m3, [tmpq+11*%%str]
+    VP9_IDCT8_WRITEx2        0,  3,  1,  2,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    mova                    m0, [tmpq+13*%%str]
+    mova                    m3, [tmpq+ 1*%%str]
+    VP9_IDCT8_WRITEx2        0,  3,  1,  2,  4, ROUND_REG, 6
+%endif
+
+    SWAP                     0,  4 ; zero
+%undef ROUND_REG
+%endif
+%endmacro
+
+%macro IADST16_FN 5
+INIT_XMM %5
+cglobal vp9_%1_%3_16x16_add, 3, 6, 16, 512, dst, stride, block, cnt, dst_bak, tmp
+    mov               cntd, 2
+    mov               tmpq, rsp
+.loop1_full:
+    VP9_%2_1D       blockq, 1
+    add             blockq, 16
+    add               tmpq, 256
+    dec               cntd
+    jg .loop1_full
+    sub             blockq, 32
+
+    mov               cntd, 2
+    mov               tmpq, rsp
+    mov           dst_bakq, dstq
+.loop2_full:
+    VP9_%4_1D         tmpq, 2
+    lea               dstq, [dst_bakq+8]
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_full
+
+    ; at the end of the loop, m0 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 32, 16, m0
+    RET
+%endmacro
+
+IADST16_FN idct,  IDCT16,  iadst, IADST16, sse2
+IADST16_FN iadst, IADST16, idct,  IDCT16,  sse2
+IADST16_FN iadst, IADST16, iadst, IADST16, sse2
+IADST16_FN idct,  IDCT16,  iadst, IADST16, ssse3
+IADST16_FN iadst, IADST16, idct,  IDCT16,  ssse3
+IADST16_FN iadst, IADST16, iadst, IADST16, ssse3
+IADST16_FN idct,  IDCT16,  iadst, IADST16, avx
+IADST16_FN iadst, IADST16, idct,  IDCT16,  avx
+IADST16_FN iadst, IADST16, iadst, IADST16, avx
+
+; in: data in m[0-15] except m0/m4, which are in [blockq+0] and [blockq+128]
+; out: m[0-15] except m6, which is in [blockq+192]
+; uses blockq as scratch space
+%macro VP9_IADST16_YMM_1D 0
+    mova          [blockq+ 32], m3
+    mova          [blockq+ 64], m7
+    mova          [blockq+ 96], m8
+
+    ; first half of round 1
+    VP9_UNPACK_MULSUB_2D_4X  9,  6,  0,  3, 13160,  9760    ; m9/x=t7[d], m6/x=t6[d]
+    VP9_UNPACK_MULSUB_2D_4X  1, 14,  4,  7,  2404, 16207    ; m1/x=t15[d], m14/x=t14[d]
+    VP9_RND_SH_SUMSUB_BA    14,  6,  7,  3,  8, [pd_8192]   ; m14=t6[w], m6=t14[w]
+    VP9_RND_SH_SUMSUB_BA     1,  9,  4,  0,  8, [pd_8192]   ; m1=t7[w], m9=t15[w]
+
+    VP9_UNPACK_MULSUB_2D_4X 13,  2,  4,  7, 15893,  3981    ; m13/x=t3[d], m2/x=t2[d]
+    VP9_UNPACK_MULSUB_2D_4X  5, 10,  0,  3,  8423, 14053    ; m5/x=t11[d], m10/x=t10[d]
+    VP9_RND_SH_SUMSUB_BA    10,  2,  3,  7,  8, [pd_8192]   ; m10=t2[w], m2=t10[w]
+    VP9_RND_SH_SUMSUB_BA     5, 13,  0,  4,  8, [pd_8192]   ; m5=t3[w], m13=t11[w]
+
+    ; half of round 2 t8-15
+    VP9_UNPACK_MULSUB_2D_4X  2, 13,  4,  7,  9102, 13623    ; m2/x=t11[d], m13/x=t10[d]
+    VP9_UNPACK_MULSUB_2D_4X  9,  6,  3,  0, 13623,  9102    ; m9/x=t14[d], m6/x=t15[d]
+    VP9_RND_SH_SUMSUB_BA     9, 13,  3,  7,  8, [pd_8192]   ; m9=t10[w], m13=t14[w]
+    VP9_RND_SH_SUMSUB_BA     6,  2,  0,  4,  8, [pd_8192]   ; m6=t11[w], m2=t15[w]
+
+    SUMSUB_BA            w, 14, 10,  8                      ; m14=t2, m10=t6
+    SUMSUB_BA            w,  1,  5,  8                      ; m1=t3, m5=t7
+
+    mova                    m0, [blockq+  0]
+    mova                    m4, [blockq+128]
+    mova                    m3, [blockq+ 32]
+    mova                    m7, [blockq+ 64]
+    mova                    m8, [blockq+ 96]
+    mova          [blockq+  0], m1
+    mova          [blockq+128], m14
+    mova          [blockq+ 32], m6
+    mova          [blockq+ 64], m9
+    mova          [blockq+ 96], m10
+
+    ; second half of round 1
+    VP9_UNPACK_MULSUB_2D_4X 15,  0,  1,  9, 16364,   804    ; m15/x=t1[d], m0/x=t0[d]
+    VP9_UNPACK_MULSUB_2D_4X  7,  8, 10,  6, 11003, 12140    ; m7/x=t9[d], m8/x=t8[d]
+    VP9_RND_SH_SUMSUB_BA     8,  0,  6,  9, 14, [pd_8192]   ; m8=t0[w], m0=t8[w]
+    VP9_RND_SH_SUMSUB_BA     7, 15, 10,  1, 14, [pd_8192]   ; m7=t1[w], m15=t9[w]
+
+    VP9_UNPACK_MULSUB_2D_4X 11,  4, 10,  6, 14811,  7005    ; m11/x=t5[d], m4/x=t4[d]
+    VP9_UNPACK_MULSUB_2D_4X  3, 12,  1,  9,  5520, 15426    ; m3/x=t13[d], m12/x=t12[d]
+    VP9_RND_SH_SUMSUB_BA    12,  4,  9,  6, 14, [pd_8192]   ; m12=t4[w], m4=t12[w]
+    VP9_RND_SH_SUMSUB_BA     3, 11,  1, 10, 14, [pd_8192]   ; m3=t5[w], m11=t13[w]
+
+    ; second half of round 2 t8-15
+    VP9_UNPACK_MULSUB_2D_4X  0, 15,  6, 10, 16069,  3196    ; m15/x=t8[d], m0/x=t9[d]
+    VP9_UNPACK_MULSUB_2D_4X 11,  4,  9,  1,  3196, 16069    ; m11/x=t12[d], m4/x=t13[d]
+    VP9_RND_SH_SUMSUB_BA    11, 15,  9, 10, 14, [pd_8192]   ; m11=t8[w], m15=t12[w]
+    VP9_RND_SH_SUMSUB_BA     4,  0,  1,  6, 14, [pd_8192]   ; m4=t9[w], m0=t13[w]
+
+    SUMSUB_BA            w, 12,  8, 14                      ; m12=t0, m8=t4
+    SUMSUB_BA            w,  3,  7, 14                      ; m3=t1, m7=t5
+
+    mova                   m10, [blockq+ 96]
+    mova          [blockq+ 96], m12
+
+    ; round 3
+    VP9_UNPACK_MULSUB_2D_4X 15,  0,  9, 12, 15137,  6270    ; m15/x=t13[d], m0/x=t12[d]
+    VP9_UNPACK_MULSUB_2D_4X  2, 13,  1,  6,  6270, 15137    ; m2/x=t14[d], m13/x=t15[d]
+    VP9_RND_SH_SUMSUB_BA     2,  0,  1, 12, 14, [pd_8192]   ; m2=out2[w], m0=t14a[w]
+    VP9_RND_SH_SUMSUB_BA    13, 15,  6,  9, 14, [pd_8192]
+    PSIGNW                 m13, [pw_m1]                     ; m13=out13[w], m15=t15a[w]
+
+    VP9_UNPACK_MULSUB_2D_4X  8,  7, 12,  9, 15137,  6270    ; m8/x=t5[d], m7/x=t4[d]
+    VP9_UNPACK_MULSUB_2D_4X  5, 10,  1,  6,  6270, 15137    ; m5/x=t6[d], m10/x=t7[d]
+    VP9_RND_SH_SUMSUB_BA     5,  7,  1,  9, 14, [pd_8192]
+    PSIGNW                  m5, [pw_m1]                     ; m5=out3[w], m7=t6[w]
+    VP9_RND_SH_SUMSUB_BA    10,  8,  6, 12, 14, [pd_8192]   ; m10=out12[w], m8=t7[w]
+
+    mova                    m1, [blockq+  0]
+    mova                   m14, [blockq+128]
+    mova                    m6, [blockq+ 32]
+    mova                    m9, [blockq+ 64]
+    mova                   m12, [blockq+ 96]
+    mova          [blockq+  0], m10
+    mova          [blockq+128], m5
+
+    SUMSUB_BA            w, 14, 12,  5                      ; m14=out0, m12=t2a
+    SUMSUB_BA            w,  1,  3,  5
+    PSIGNW                  m1, [pw_m1]                     ; m1=out15, m3=t3a
+
+    SUMSUB_BA            w,  9, 11,  5
+    PSIGNW                  m9, [pw_m1]                     ; m9=out1, m11=t10
+    SUMSUB_BA            w,  6,  4,  5                      ; m6=out14, m4=t11
+
+    VP9_UNPACK_MULSUB_2W_4X  4, 11, 11585, 11585, [pd_8192],  5, 10 ; m4=out9, m11=out6
+    mova                    m5, [blockq+128]
+    mova          [blockq+192], m11
+    PSIGNW                 m15, [pw_m1]
+    VP9_UNPACK_MULSUB_2W_4X 15,  0, 11585, 11585, [pd_8192], 10, 11 ; m15=out5, m0=out10
+
+    PSIGNW                  m3, [pw_m1]
+    VP9_UNPACK_MULSUB_2W_4X  3, 12, 11585, 11585, [pd_8192], 10, 11 ; m3=out7,m12=out8
+    VP9_UNPACK_MULSUB_2W_4X  8,  7, 11585, 11585, [pd_8192], 10, 11 ; m8=out11,m7=out4
+
+    mova                   m10, [blockq+  0]
+
+    SWAP                     0, 14,  6, 11,  8, 12, 10
+    SWAP                     1,  9, 15,  4,  7,  3,  5
+    SWAP                     5,  9, 15
+%endmacro
+
+%if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+%macro IADST16_YMM_FN 4
+INIT_YMM avx2
+cglobal vp9_%1_%3_16x16_add, 4, 4, 16, dst, stride, block, eob
+    mova                m1, [blockq+ 32]
+    mova                m2, [blockq+ 64]
+    mova                m3, [blockq+ 96]
+    mova                m5, [blockq+160]
+    mova                m6, [blockq+192]
+    mova                m7, [blockq+224]
+    mova                m8, [blockq+256]
+    mova                m9, [blockq+288]
+    mova               m10, [blockq+320]
+    mova               m11, [blockq+352]
+    mova               m12, [blockq+384]
+    mova               m13, [blockq+416]
+    mova               m14, [blockq+448]
+    mova               m15, [blockq+480]
+
+    VP9_%2_YMM_1D
+    TRANSPOSE16x16W      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \
+                         [blockq+192], [blockq+128], 1
+    mova      [blockq+  0], m0
+    VP9_%4_YMM_1D
+
+    mova      [blockq+224], m7
+
+    ; store
+    VP9_IDCT8_WRITEx2    0,  1, 6, 7, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2    2,  3, 6, 7, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2    4,  5, 6, 7, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    mova                m6, [blockq+192]
+    mova                m7, [blockq+224]
+    VP9_IDCT8_WRITEx2    6,  7, 1, 2, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2    8,  9, 1, 2, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2   10, 11, 1, 2, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2   12, 13, 1, 2, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2   14, 15, 1, 2, unused, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+
+    ; at the end of the loop, m0 should still be zero
+    ; use that to zero out block coefficients
+    pxor                m0, m0
+    ZERO_BLOCK      blockq, 32, 16, m0
+    RET
+%endmacro
+
+IADST16_YMM_FN idct,  IDCT16,  iadst, IADST16
+IADST16_YMM_FN iadst, IADST16, idct,  IDCT16
+IADST16_YMM_FN iadst, IADST16, iadst, IADST16
+%endif
+
+;---------------------------------------------------------------------------------------------
+; void vp9_idct_idct_32x32_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;---------------------------------------------------------------------------------------------
+
+%macro VP9_IDCT32_1D 2-3 32 ; src, pass, nnzc
+%if %2 == 1
+%assign %%str mmsize
+%else
+%assign %%str 64
+%endif
+
+    ; first do t0-15, this can be done identical to idct16x16
+    VP9_IDCT16_1D_START %1, %3/2, 64*2, tmpq, 2*%%str, 1
+
+    ; store everything on stack to make space available for t16-31
+    ; we store interleaved with the output of the second half (t16-31)
+    ; so we don't need to allocate extra stack space
+    mova    [tmpq+ 0*%%str], m0     ; t0
+    mova    [tmpq+ 4*%%str], m1     ; t1
+    mova    [tmpq+ 8*%%str], m2     ; t2
+    mova    [tmpq+12*%%str], m3     ; t3
+    mova    [tmpq+16*%%str], m4     ; t4
+    mova    [tmpq+20*%%str], m5     ; t5
+%if ARCH_X86_64
+    mova    [tmpq+22*%%str], m10    ; t10
+    mova    [tmpq+18*%%str], m11    ; t11
+    mova    [tmpq+14*%%str], m12    ; t12
+    mova    [tmpq+10*%%str], m13    ; t13
+    mova    [tmpq+ 6*%%str], m14    ; t14
+    mova    [tmpq+ 2*%%str], m15    ; t15
+%endif
+
+    mova                m0, [tmpq+ 30*%%str]
+    UNSCRATCH            1,  6, tmpq+26*%%str
+    UNSCRATCH            2,  8, tmpq+24*%%str
+    UNSCRATCH            3,  9, tmpq+28*%%str
+    SUMSUB_BA            w,  1,  3, 4       ; t6, t9
+    SUMSUB_BA            w,  0,  2, 4       ; t7, t8
+
+    mova    [tmpq+24*%%str], m1     ; t6
+    mova    [tmpq+28*%%str], m0     ; t7
+    mova    [tmpq+30*%%str], m2     ; t8
+    mova    [tmpq+26*%%str], m3     ; t9
+
+    ; then, secondly, do t16-31
+%if %3 <= 8
+    mova                 m4, [%1+ 1*64]
+    mova                 m7, [%1+ 7*64]
+
+    pmulhrsw             m1,  m4, [pw_16364x2] ;t31
+    pmulhrsw             m4, [pw_804x2] ;t16
+
+    VP9_UNPACK_MULSUB_2W_4X   5,  0,  1,  4, 16069,  3196, [pd_8192], 6,  2 ; t17, t30
+
+    pmulhrsw             m3,  m7, [pw_m5520x2] ;t19
+    pmulhrsw             m7, [pw_15426x2] ;t28
+
+    SCRATCH               4, 13, tmpq+ 1*%%str
+    SCRATCH               5, 12, tmpq+15*%%str
+
+    VP9_UNPACK_MULSUB_2W_4X   2,  6,  7,  3, 3196, m16069, [pd_8192], 4,  5 ; t18, t29
+%else
+    mova                 m0, [%1+ 1*64]
+    mova                 m1, [%1+15*64]
+%if %3 <= 16
+    pmulhrsw             m5, m0, [pw_16364x2]
+    pmulhrsw             m0, [pw_804x2]
+    pmulhrsw             m4, m1, [pw_m11003x2]
+    pmulhrsw             m1, [pw_12140x2]
+%else
+    mova                 m4, [%1+17*64]
+    mova                 m5, [%1+31*64]
+
+    VP9_UNPACK_MULSUB_2W_4X   0,  5, 16364,   804, [pd_8192], 2, 3 ; t16, t31
+    VP9_UNPACK_MULSUB_2W_4X   4,  1, 11003, 12140, [pd_8192], 2, 3 ; t17, t30
+%endif
+    SUMSUB_BA             w,  4,  0,  2
+    SUMSUB_BA             w,  1,  5,  2
+
+    VP9_UNPACK_MULSUB_2W_4X   5,  0, 16069,  3196, [pd_8192], 2, 3 ; t17, t30
+
+    SCRATCH               4, 13, tmpq+ 1*%%str
+    SCRATCH               5, 12, tmpq+15*%%str
+
+    mova                 m2, [%1+ 7*64]
+    mova                 m3, [%1+ 9*64]
+%if %3 <= 16
+    pmulhrsw             m7,  m3, [pw_14811x2]
+    pmulhrsw             m3, [pw_7005x2]
+    pmulhrsw             m6,  m2, [pw_m5520x2]
+    pmulhrsw             m2, [pw_15426x2]
+%else
+    mova                 m7, [%1+23*64]
+    mova                 m6, [%1+25*64]
+
+    VP9_UNPACK_MULSUB_2W_4X   3,  7, 14811,  7005, [pd_8192], 4, 5 ; t18, t29
+    VP9_UNPACK_MULSUB_2W_4X   6,  2,  5520, 15426, [pd_8192], 4, 5 ; t19, t28
+%endif
+    SUMSUB_BA             w,  3,  6,  4
+    SUMSUB_BA             w,  7,  2,  4
+
+    VP9_UNPACK_MULSUB_2W_4X   2,  6, 3196, m16069, [pd_8192], 4, 5 ; t18, t29
+%endif
+
+    UNSCRATCH             5, 12, tmpq+15*%%str
+    SUMSUB_BA             w,  6,  0,  4
+    mova    [tmpq+25*%%str], m6             ; t19
+    UNSCRATCH             4, 13, tmpq+ 1*%%str
+    SUMSUB_BA             w,  7,  1,  6
+    SUMSUB_BA             w,  3,  4,  6
+    mova    [tmpq+23*%%str], m3             ; t16
+    SUMSUB_BA             w,  2,  5,  6
+
+    VP9_UNPACK_MULSUB_2W_4X   0,  5, 15137,  6270, [pd_8192], 6, 3 ; t18, t29
+    VP9_UNPACK_MULSUB_2W_4X   1,  4, 15137,  6270, [pd_8192], 6, 3 ; t19, t28
+
+    SCRATCH               0, 10, tmpq+ 1*%%str
+    SCRATCH               1, 11, tmpq+ 7*%%str
+    SCRATCH               2,  9, tmpq+ 9*%%str
+    SCRATCH               4, 14, tmpq+15*%%str
+    SCRATCH               5, 15, tmpq+17*%%str
+    SCRATCH               7, 13, tmpq+31*%%str
+
+%if %3 <= 8
+    mova                 m0, [%1+ 5*64]
+    mova                 m3, [%1+ 3*64]
+
+    pmulhrsw             m5,  m0, [pw_15893x2] ;t27
+    pmulhrsw             m0, [pw_3981x2] ;t20
+
+    VP9_UNPACK_MULSUB_2W_4X   1,  4,  5,  0,  9102, 13623, [pd_8192], 7,  2 ; t21, t26
+
+    pmulhrsw             m6,  m3, [pw_m2404x2] ;t23
+    pmulhrsw             m3, [pw_16207x2] ;t24
+
+    SCRATCH               5,  8, tmpq+ 5*%%str
+    SCRATCH               4, 12, tmpq+11*%%str
+
+    VP9_UNPACK_MULSUB_2W_4X   7,  2,  3,  6, 13623, m9102, [pd_8192], 4, 5 ; t22, t25
+%else
+    mova                 m4, [%1+ 5*64]
+    mova                 m5, [%1+11*64]
+%if %3 <= 16
+    pmulhrsw             m1, m4, [pw_15893x2]
+    pmulhrsw             m4, [pw_3981x2]
+    pmulhrsw             m0, m5, [pw_m8423x2]
+    pmulhrsw             m5, [pw_14053x2]
+%else
+    mova                 m0, [%1+21*64]
+    mova                 m1, [%1+27*64]
+
+    VP9_UNPACK_MULSUB_2W_4X   4,  1, 15893,  3981, [pd_8192], 2, 3 ; t20, t27
+    VP9_UNPACK_MULSUB_2W_4X   0,  5,  8423, 14053, [pd_8192], 2, 3 ; t21, t26
+%endif
+    SUMSUB_BA             w,  0,  4,  2
+    SUMSUB_BA             w,  5,  1,  2
+
+    VP9_UNPACK_MULSUB_2W_4X   1,  4,  9102, 13623, [pd_8192], 2, 3 ; t21, t26
+
+    SCRATCH               5,  8, tmpq+ 5*%%str
+    SCRATCH               4, 12, tmpq+11*%%str
+
+    mova                 m7, [%1+ 3*64]
+    mova                 m6, [%1+13*64]
+%if %3 <= 16
+    pmulhrsw             m3, m6, [pw_13160x2]
+    pmulhrsw             m6, [pw_9760x2]
+    pmulhrsw             m2, m7, [pw_m2404x2]
+    pmulhrsw             m7, [pw_16207x2]
+%else
+    mova                 m2, [%1+29*64]
+    mova                 m3, [%1+19*64]
+    VP9_UNPACK_MULSUB_2W_4X   6,  3, 13160,  9760, [pd_8192], 4, 5 ; t22, t25
+    VP9_UNPACK_MULSUB_2W_4X   2,  7,  2404, 16207, [pd_8192], 4, 5 ; t23, t24
+%endif
+    SUMSUB_BA             w,  6,  2,  4
+    SUMSUB_BA             w,  3,  7,  4
+
+    VP9_UNPACK_MULSUB_2W_4X   7,  2, 13623, m9102, [pd_8192], 4, 5 ; t22, t25
+%endif
+
+    ; m4=t16, m5=t17, m9=t18, m8=t19, m0=t20, m1=t21, m13=t22, m12=t23,
+    ; m3=t24, m2=t25, m14=t26, m15=t27, m7=t28, m6=t29, m10=t30, m11=t31
+
+    UNSCRATCH             4, 12, tmpq+11*%%str
+    SUMSUB_BA             w,  0,  6, 5
+    SUMSUB_BA             w,  4,  2, 5
+    UNSCRATCH             5,  8, tmpq+ 5*%%str
+    SCRATCH               4,  8, tmpq+11*%%str
+    SUMSUB_BA             w,  1,  7, 4
+    SUMSUB_BA             w,  5,  3, 4
+    SCRATCH               5, 12, tmpq+ 5*%%str
+
+    VP9_UNPACK_MULSUB_2W_4X   3,  6, 6270, m15137, [pd_8192], 4, 5 ; t20, t27
+    VP9_UNPACK_MULSUB_2W_4X   2,  7, 6270, m15137, [pd_8192], 4, 5 ; t21, t26
+
+    ; m8[s]=t16, m9=t17, m5=t18, m4[s]=t19, m12=t20, m13=t21, m1=t22, m0=t23,
+    ; m15=t24, m14=t25, m2=t26, m3=t27, m11=t28, m10=t29, m6=t30, m7=t31
+
+    UNSCRATCH             5,  9, tmpq+ 9*%%str
+    mova                 m4, [tmpq+23*%%str] ; t16
+%if ARCH_X86_64
+    SUMSUB_BA             w,  1,  5,  9
+    SUMSUB_BA             w,  0,  4,  9
+%else
+    SUMSUB_BADC           w,  1,  5,  0,  4
+%endif
+    mova    [tmpq+29*%%str], m1     ; t17
+    mova    [tmpq+21*%%str], m0     ; t16
+    UNSCRATCH             0, 10, tmpq+ 1*%%str
+    UNSCRATCH             1, 11, tmpq+ 7*%%str
+%if ARCH_X86_64
+    SUMSUB_BA             w,  2,  0,  9
+    SUMSUB_BA             w,  3,  1,  9
+%else
+    SUMSUB_BADC           w,  2,  0,  3,  1
+%endif
+    mova    [tmpq+ 9*%%str], m2     ; t18
+    mova    [tmpq+13*%%str], m3     ; t19
+    SCRATCH               0, 10, tmpq+23*%%str
+    SCRATCH               1, 11, tmpq+27*%%str
+
+    UNSCRATCH             2, 14, tmpq+15*%%str
+    UNSCRATCH             3, 15, tmpq+17*%%str
+    SUMSUB_BA             w,  6,  2, 0
+    SUMSUB_BA             w,  7,  3, 0
+    SCRATCH               6, 14, tmpq+ 3*%%str
+    SCRATCH               7, 15, tmpq+ 7*%%str
+
+    UNSCRATCH             0,  8, tmpq+11*%%str
+    mova                 m1, [tmpq+25*%%str] ; t19
+    UNSCRATCH             6, 12, tmpq+ 5*%%str
+    UNSCRATCH             7, 13, tmpq+31*%%str
+%if ARCH_X86_64
+    SUMSUB_BA             w,  0,  1,  9
+    SUMSUB_BA             w,  6,  7,  9
+%else
+    SUMSUB_BADC           w,  0,  1,  6,  7
+%endif
+
+    ; m0=t16, m1=t17, m2=t18, m3=t19, m11=t20, m10=t21, m9=t22, m8=t23,
+    ; m7=t24, m6=t25, m5=t26, m4=t27, m12=t28, m13=t29, m14=t30, m15=t31
+
+%if 0; cpuflag(ssse3)
+%if ARCH_X86_64
+    SUMSUB_BA             w,  4,  7,  8
+    SUMSUB_BA             w,  5,  1,  8
+%else
+    SUMSUB_BADC           w,  4,  7,  5,  1
+%endif
+
+    pmulhrsw             m7, [pw_11585x2]
+    pmulhrsw             m4, [pw_11585x2]
+    pmulhrsw             m1, [pw_11585x2]
+    pmulhrsw             m5, [pw_11585x2]
+
+    mova    [tmpq+ 5*%%str], m7     ; t23
+    SCRATCH               1, 13, tmpq+25*%%str
+    UNSCRATCH             7, 10, tmpq+23*%%str
+    UNSCRATCH             1, 11, tmpq+27*%%str
+
+%if ARCH_X86_64
+    SUMSUB_BA             w,  7,  3, 10
+    SUMSUB_BA             w,  1,  2, 10
+%else
+    SUMSUB_BADC           w,  7,  3,  1,  2
+%endif
+
+    pmulhrsw             m3, [pw_11585x2]
+    pmulhrsw             m7, [pw_11585x2]
+    pmulhrsw             m2, [pw_11585x2]
+    pmulhrsw             m1, [pw_11585x2]
+%else
+    SCRATCH               0,  8, tmpq+15*%%str
+    SCRATCH               6,  9, tmpq+17*%%str
+    VP9_UNPACK_MULSUB_2W_4X  7,  4, 11585, 11585, [pd_8192], 0, 6
+    mova    [tmpq+ 5*%%str], m7     ; t23
+    UNSCRATCH             7, 10, tmpq+23*%%str
+    VP9_UNPACK_MULSUB_2W_4X  1,  5, 11585, 11585, [pd_8192], 0, 6
+    SCRATCH               1, 13, tmpq+25*%%str
+    UNSCRATCH             1, 11, tmpq+27*%%str
+    VP9_UNPACK_MULSUB_2W_4X  3,  7, 11585, 11585, [pd_8192], 0, 6
+    VP9_UNPACK_MULSUB_2W_4X  2,  1, 11585, 11585, [pd_8192], 0, 6
+    UNSCRATCH             0,  8, tmpq+15*%%str
+    UNSCRATCH             6,  9, tmpq+17*%%str
+%endif
+
+    ; m0=t16, m1=t17, m2=t18, m3=t19, m4=t20, m5=t21, m6=t22, m7=t23,
+    ; m8=t24, m9=t25, m10=t26, m11=t27, m12=t28, m13=t29, m14=t30, m15=t31
+
+    ; then do final pass to sumsub+store the two halves
+%if %2 == 1
+    mova    [tmpq+17*%%str], m2     ; t20
+    mova    [tmpq+ 1*%%str], m3     ; t21
+%if ARCH_X86_64
+    mova    [tmpq+25*%%str], m13    ; t22
+
+    mova                 m8, [tmpq+ 0*%%str] ; t0
+    mova                 m9, [tmpq+ 4*%%str] ; t1
+    mova                m12, [tmpq+ 8*%%str] ; t2
+    mova                m11, [tmpq+12*%%str] ; t3
+    mova                 m2, [tmpq+16*%%str] ; t4
+    mova                 m3, [tmpq+20*%%str] ; t5
+    mova                m13, [tmpq+24*%%str] ; t6
+
+    SUMSUB_BA             w,  6,  8, 10
+    mova    [tmpq+ 3*%%str], m8              ; t15
+    SUMSUB_BA             w,  0,  9,  8
+    SUMSUB_BA             w, 15, 12,  8
+    SUMSUB_BA             w, 14, 11,  8
+    SUMSUB_BA             w,  1,  2,  8
+    SUMSUB_BA             w,  7,  3,  8
+    SUMSUB_BA             w,  5, 13,  8
+    mova                m10, [tmpq+28*%%str] ; t7
+    SUMSUB_BA             w,  4, 10,  8
+%if cpuflag(avx2)
+    ; the "shitty" about this idct is that the final pass does the outermost
+    ; interleave sumsubs (t0/31, t1/30, etc) but the tN for the 16x16 need
+    ; to be sequential, which means I need to load/store half of the sumsub
+    ; intermediates back to/from memory to get a 16x16 transpose going...
+    ; This would be easier if we had more (e.g. 32) YMM regs here.
+    mova    [tmpq+ 7*%%str], m9
+    mova    [tmpq+11*%%str], m12
+    mova    [tmpq+15*%%str], m11
+    mova    [tmpq+19*%%str], m2
+    mova    [tmpq+23*%%str], m3
+    mova    [tmpq+27*%%str], m13
+    mova    [tmpq+31*%%str], m10
+    mova    [tmpq+12*%%str], m5
+
+    mova                m13, [tmpq+30*%%str] ; t8
+    mova                m12, [tmpq+26*%%str] ; t9
+    mova                m11, [tmpq+22*%%str] ; t10
+    mova                m10, [tmpq+18*%%str] ; t11
+    mova                 m9, [tmpq+17*%%str] ; t20
+    mova                 m8, [tmpq+ 1*%%str] ; t21
+    mova                 m3, [tmpq+25*%%str] ; t22
+    mova                 m2, [tmpq+ 5*%%str] ; t23
+
+    SUMSUB_BA             w,  9, 10, 5
+    SUMSUB_BA             w,  8, 11, 5
+    SUMSUB_BA             w,  3, 12, 5
+    SUMSUB_BA             w,  2, 13, 5
+    mova    [tmpq+ 1*%%str], m10
+    mova    [tmpq+ 5*%%str], m11
+    mova    [tmpq+17*%%str], m12
+    mova    [tmpq+25*%%str], m13
+
+    mova                m13, [tmpq+14*%%str] ; t12
+    mova                m12, [tmpq+10*%%str] ; t13
+    mova                m11, [tmpq+ 9*%%str] ; t18
+    mova                m10, [tmpq+13*%%str] ; t19
+
+    SUMSUB_BA             w, 11, 12, 5
+    SUMSUB_BA             w, 10, 13, 5
+    mova    [tmpq+ 9*%%str], m13
+    mova    [tmpq+13*%%str], m12
+    mova    [tmpq+10*%%str], m10
+    mova    [tmpq+14*%%str], m11
+
+    mova                m13, [tmpq+ 6*%%str] ; t14
+    mova                m12, [tmpq+ 2*%%str] ; t15
+    mova                m11, [tmpq+21*%%str] ; t16
+    mova                m10, [tmpq+29*%%str] ; t17
+    SUMSUB_BA             w, 11, 12, 5
+    SUMSUB_BA             w, 10, 13, 5
+    mova    [tmpq+21*%%str], m12
+    mova    [tmpq+29*%%str], m13
+    mova                m12, [tmpq+10*%%str]
+    mova                m13, [tmpq+14*%%str]
+
+    TRANSPOSE16x16W       6,  0, 15, 14,  1,  7,  5,  4, \
+                          2,  3,  8,  9, 12, 13, 10, 11, \
+            [tmpq+12*%%str], [tmpq+ 8*%%str], 1
+    mova    [tmpq+ 0*%%str], m6
+    mova    [tmpq+ 2*%%str], m0
+    mova    [tmpq+ 4*%%str], m15
+    mova    [tmpq+ 6*%%str], m14
+    mova    [tmpq+10*%%str], m7
+    mova    [tmpq+12*%%str], m5
+    mova    [tmpq+14*%%str], m4
+    mova    [tmpq+16*%%str], m2
+    mova    [tmpq+18*%%str], m3
+    mova    [tmpq+20*%%str], m8
+    mova    [tmpq+22*%%str], m9
+    mova    [tmpq+24*%%str], m12
+    mova    [tmpq+26*%%str], m13
+    mova    [tmpq+28*%%str], m10
+    mova    [tmpq+30*%%str], m11
+
+    mova                 m0, [tmpq+21*%%str]
+    mova                 m1, [tmpq+29*%%str]
+    mova                 m2, [tmpq+13*%%str]
+    mova                 m3, [tmpq+ 9*%%str]
+    mova                 m4, [tmpq+ 1*%%str]
+    mova                 m5, [tmpq+ 5*%%str]
+    mova                 m7, [tmpq+25*%%str]
+    mova                 m8, [tmpq+31*%%str]
+    mova                 m9, [tmpq+27*%%str]
+    mova                m10, [tmpq+23*%%str]
+    mova                m11, [tmpq+19*%%str]
+    mova                m12, [tmpq+15*%%str]
+    mova                m13, [tmpq+11*%%str]
+    mova                m14, [tmpq+ 7*%%str]
+    mova                m15, [tmpq+ 3*%%str]
+    TRANSPOSE16x16W       0,  1,  2,  3,  4,  5,  6,  7, \
+                          8,  9, 10, 11, 12, 13, 14, 15, \
+            [tmpq+17*%%str], [tmpq+ 9*%%str], 1
+    mova    [tmpq+ 1*%%str], m0
+    mova    [tmpq+ 3*%%str], m1
+    mova    [tmpq+ 5*%%str], m2
+    mova    [tmpq+ 7*%%str], m3
+    mova    [tmpq+11*%%str], m5
+    mova    [tmpq+13*%%str], m6
+    mova    [tmpq+15*%%str], m7
+    mova    [tmpq+17*%%str], m8
+    mova    [tmpq+19*%%str], m9
+    mova    [tmpq+21*%%str], m10
+    mova    [tmpq+23*%%str], m11
+    mova    [tmpq+25*%%str], m12
+    mova    [tmpq+27*%%str], m13
+    mova    [tmpq+29*%%str], m14
+    mova    [tmpq+31*%%str], m15
+%else ; !avx2
+    TRANSPOSE8x8W         6, 0, 15, 14, 1, 7, 5, 4, 8
+    mova    [tmpq+ 0*%%str], m6
+    mova    [tmpq+ 4*%%str], m0
+    mova    [tmpq+ 8*%%str], m15
+    mova    [tmpq+12*%%str], m14
+    mova    [tmpq+16*%%str], m1
+    mova    [tmpq+20*%%str], m7
+    mova    [tmpq+24*%%str], m5
+    mova    [tmpq+28*%%str], m4
+
+    mova                  m8, [tmpq+ 3*%%str] ; t15
+    TRANSPOSE8x8W         10, 13, 3, 2, 11, 12, 9, 8, 0
+    mova    [tmpq+ 3*%%str], m10
+    mova    [tmpq+ 7*%%str], m13
+    mova    [tmpq+11*%%str], m3
+    mova    [tmpq+15*%%str], m2
+    mova    [tmpq+19*%%str], m11
+    mova    [tmpq+23*%%str], m12
+    mova    [tmpq+27*%%str], m9
+    mova    [tmpq+31*%%str], m8
+
+    mova                m15, [tmpq+30*%%str] ; t8
+    mova                m14, [tmpq+26*%%str] ; t9
+    mova                m13, [tmpq+22*%%str] ; t10
+    mova                m12, [tmpq+18*%%str] ; t11
+    mova                m11, [tmpq+14*%%str] ; t12
+    mova                m10, [tmpq+10*%%str] ; t13
+    mova                 m9, [tmpq+ 6*%%str] ; t14
+    mova                 m8, [tmpq+ 2*%%str] ; t15
+    mova                 m7, [tmpq+21*%%str] ; t16
+    mova                 m6, [tmpq+29*%%str] ; t17
+    mova                 m5, [tmpq+ 9*%%str] ; t18
+    mova                 m4, [tmpq+13*%%str] ; t19
+    mova                 m3, [tmpq+17*%%str] ; t20
+    mova                 m2, [tmpq+ 1*%%str] ; t21
+    mova                 m1, [tmpq+25*%%str] ; t22
+
+    SUMSUB_BA             w,  7,  8, 0
+    mova    [tmpq+ 2*%%str], m8
+    mova                 m0, [tmpq+ 5*%%str] ; t23
+    SUMSUB_BA             w,  6,  9, 8
+    SUMSUB_BA             w,  5, 10, 8
+    SUMSUB_BA             w,  4, 11, 8
+    SUMSUB_BA             w,  3, 12, 8
+    SUMSUB_BA             w,  2, 13, 8
+    SUMSUB_BA             w,  1, 14, 8
+    SUMSUB_BA             w,  0, 15, 8
+
+    TRANSPOSE8x8W         0, 1, 2, 3, 4, 5, 6, 7, 8
+    mova    [tmpq+ 1*%%str], m0
+    mova    [tmpq+ 5*%%str], m1
+    mova    [tmpq+ 9*%%str], m2
+    mova    [tmpq+13*%%str], m3
+    mova    [tmpq+17*%%str], m4
+    mova    [tmpq+21*%%str], m5
+    mova    [tmpq+25*%%str], m6
+    mova    [tmpq+29*%%str], m7
+
+    mova                 m8, [tmpq+ 2*%%str]
+    TRANSPOSE8x8W         8, 9, 10, 11, 12, 13, 14, 15, 0
+    mova    [tmpq+ 2*%%str], m8
+    mova    [tmpq+ 6*%%str], m9
+    mova    [tmpq+10*%%str], m10
+    mova    [tmpq+14*%%str], m11
+    mova    [tmpq+18*%%str], m12
+    mova    [tmpq+22*%%str], m13
+    mova    [tmpq+26*%%str], m14
+    mova    [tmpq+30*%%str], m15
+%endif ; avx2
+%else
+    mova                 m2, [tmpq+24*%%str] ; t6
+    mova                 m3, [tmpq+28*%%str] ; t7
+    SUMSUB_BADC           w,  5,  2,  4,  3
+    mova    [tmpq+24*%%str], m5
+    mova    [tmpq+23*%%str], m2
+    mova    [tmpq+28*%%str], m4
+    mova    [tmpq+19*%%str], m3
+
+    mova                 m2, [tmpq+16*%%str] ; t4
+    mova                 m3, [tmpq+20*%%str] ; t5
+    SUMSUB_BA             w,  1,  2,  5
+    SUMSUB_BA             w,  7,  3,  5
+    mova    [tmpq+15*%%str], m2
+    mova    [tmpq+11*%%str], m3
+
+    mova                 m2, [tmpq+ 0*%%str] ; t0
+    mova                 m3, [tmpq+ 4*%%str] ; t1
+    SUMSUB_BA             w,  6,  2,  5
+    SUMSUB_BA             w,  0,  3,  5
+    mova    [tmpq+31*%%str], m2
+    mova    [tmpq+27*%%str], m3
+
+    mova                 m2, [tmpq+ 8*%%str] ; t2
+    mova                 m3, [tmpq+12*%%str] ; t3
+    mova                 m5, [tmpq+ 7*%%str]
+    mova                 m4, [tmpq+ 3*%%str]
+    SUMSUB_BADC           w,  5,  2,  4,  3
+    mova    [tmpq+ 7*%%str], m2
+    mova    [tmpq+ 3*%%str], m3
+
+    mova                 m3, [tmpq+28*%%str]
+    TRANSPOSE8x8W         6, 0, 5, 4, 1, 7, 2, 3, [tmpq+24*%%str], [tmpq+16*%%str], 1
+    mova    [tmpq+ 0*%%str], m6
+    mova    [tmpq+ 4*%%str], m0
+    mova    [tmpq+ 8*%%str], m5
+    mova    [tmpq+12*%%str], m4
+    mova    [tmpq+20*%%str], m7
+    mova    [tmpq+24*%%str], m2
+    mova    [tmpq+28*%%str], m3
+
+    mova                 m6, [tmpq+19*%%str]
+    mova                 m0, [tmpq+23*%%str]
+    mova                 m5, [tmpq+11*%%str]
+    mova                 m4, [tmpq+15*%%str]
+    mova                 m1, [tmpq+ 3*%%str]
+    mova                 m7, [tmpq+ 7*%%str]
+    mova                 m3, [tmpq+31*%%str]
+    TRANSPOSE8x8W         6, 0, 5, 4, 1, 7, 2, 3, [tmpq+27*%%str], [tmpq+19*%%str], 1
+    mova    [tmpq+ 3*%%str], m6
+    mova    [tmpq+ 7*%%str], m0
+    mova    [tmpq+11*%%str], m5
+    mova    [tmpq+15*%%str], m4
+    mova    [tmpq+23*%%str], m7
+    mova    [tmpq+27*%%str], m2
+    mova    [tmpq+31*%%str], m3
+
+    mova                 m1, [tmpq+ 6*%%str] ; t14
+    mova                 m0, [tmpq+ 2*%%str] ; t15
+    mova                 m7, [tmpq+21*%%str] ; t16
+    mova                 m6, [tmpq+29*%%str] ; t17
+    SUMSUB_BA             w,  7,  0,  2
+    SUMSUB_BA             w,  6,  1,  2
+    mova    [tmpq+29*%%str], m7
+    mova    [tmpq+ 2*%%str], m0
+    mova    [tmpq+21*%%str], m6
+    mova    [tmpq+ 6*%%str], m1
+
+    mova                 m1, [tmpq+14*%%str] ; t12
+    mova                 m0, [tmpq+10*%%str] ; t13
+    mova                 m5, [tmpq+ 9*%%str] ; t18
+    mova                 m4, [tmpq+13*%%str] ; t19
+    SUMSUB_BA             w,  5,  0,  2
+    SUMSUB_BA             w,  4,  1,  2
+    mova     [tmpq+10*%%str], m0
+    mova     [tmpq+14*%%str], m1
+
+    mova                 m1, [tmpq+22*%%str] ; t10
+    mova                 m0, [tmpq+18*%%str] ; t11
+    mova                 m3, [tmpq+17*%%str] ; t20
+    mova                 m2, [tmpq+ 1*%%str] ; t21
+    SUMSUB_BA             w,  3,  0,  6
+    SUMSUB_BA             w,  2,  1,  6
+    mova     [tmpq+18*%%str], m0
+    mova     [tmpq+22*%%str], m1
+
+    mova                 m7, [tmpq+30*%%str] ; t8
+    mova                 m6, [tmpq+26*%%str] ; t9
+    mova                 m1, [tmpq+25*%%str] ; t22
+    mova                 m0, [tmpq+ 5*%%str] ; t23
+    SUMSUB_BADC           w,  1,  6,  0,  7
+    mova     [tmpq+26*%%str], m6
+    mova     [tmpq+30*%%str], m7
+
+    mova                 m7, [tmpq+29*%%str]
+    TRANSPOSE8x8W         0, 1, 2, 3, 4, 5, 6, 7, [tmpq+21*%%str], [tmpq+17*%%str], 1
+    mova    [tmpq+ 1*%%str], m0
+    mova    [tmpq+ 5*%%str], m1
+    mova    [tmpq+ 9*%%str], m2
+    mova    [tmpq+13*%%str], m3
+    mova    [tmpq+21*%%str], m5
+    mova    [tmpq+25*%%str], m6
+    mova    [tmpq+29*%%str], m7
+
+    mova                 m0, [tmpq+ 2*%%str]
+    mova                 m1, [tmpq+ 6*%%str]
+    mova                 m2, [tmpq+10*%%str]
+    mova                 m3, [tmpq+14*%%str]
+    mova                 m4, [tmpq+18*%%str]
+    mova                 m5, [tmpq+22*%%str]
+    mova                 m7, [tmpq+30*%%str]
+    TRANSPOSE8x8W         0, 1, 2, 3, 4, 5, 6, 7, [tmpq+26*%%str], [tmpq+18*%%str], 1
+    mova    [tmpq+ 2*%%str], m0
+    mova    [tmpq+ 6*%%str], m1
+    mova    [tmpq+10*%%str], m2
+    mova    [tmpq+14*%%str], m3
+    mova    [tmpq+22*%%str], m5
+    mova    [tmpq+26*%%str], m6
+    mova    [tmpq+30*%%str], m7
+%endif
+%else
+    ; t0-7 is in [tmpq+{0,4,8,12,16,20,24,28}*%%str]
+    ; t8-15 is in [tmpq+{2,6,10,14,18,22,26,30}*%%str]
+    ; t16-19 and t23 is in [tmpq+{1,5,9,13,29}*%%str]
+    ; t20-22 is in m4-6
+    ; t24-31 is in m8-15
+
+%if cpuflag(ssse3)
+%define ROUND_REG [pw_512]
+%else
+%define ROUND_REG [pw_32]
+%endif
+
+%macro %%STORE_2X2 7-8 1 ; src[1-4], tmp[1-2], zero, inc_dst_ptrs
+    SUMSUB_BA            w, %4, %1, %5
+    SUMSUB_BA            w, %3, %2, %5
+    VP9_IDCT8_WRITEx2   %4, %3, %5, %6, %7, ROUND_REG, 6
+%if %8 == 1
+    add               dstq, stride2q
+%endif
+    VP9_IDCT8_WRITEx2   %2, %1, %5, %6, %7, ROUND_REG, 6, dst_endq
+%if %8 == 1
+    sub           dst_endq, stride2q
+%endif
+%endmacro
+
+%if ARCH_X86_64
+    pxor               m10, m10
+
+    ; store t0-1 and t30-31
+    mova                m8, [tmpq+ 0*%%str]
+    mova                m9, [tmpq+ 4*%%str]
+    %%STORE_2X2          8,  9,  0,  6, 12, 11, 10
+
+    ; store t2-3 and t28-29
+    mova                m8, [tmpq+ 8*%%str]
+    mova                m9, [tmpq+12*%%str]
+    %%STORE_2X2          8,  9, 14, 15, 12, 11, 10
+
+    ; store t4-5 and t26-27
+    mova                m8, [tmpq+16*%%str]
+    mova                m9, [tmpq+20*%%str]
+    %%STORE_2X2          8,  9,  7,  1, 12, 11, 10
+
+    ; store t6-7 and t24-25
+    mova                m8, [tmpq+24*%%str]
+    mova                m9, [tmpq+28*%%str]
+    %%STORE_2X2          8,  9,  4,  5, 12, 11, 10
+
+    ; store t8-9 and t22-23
+    mova                m8, [tmpq+30*%%str]
+    mova                m9, [tmpq+26*%%str]
+    mova                m0, [tmpq+ 5*%%str]
+    %%STORE_2X2          8,  9, 13,  0, 12, 11, 10
+
+    ; store t10-11 and t20-21
+    mova                m8, [tmpq+22*%%str]
+    mova                m9, [tmpq+18*%%str]
+    %%STORE_2X2          8,  9,  2,  3, 12, 11, 10
+
+    ; store t12-13 and t18-19
+    mova                m8, [tmpq+14*%%str]
+    mova                m9, [tmpq+10*%%str]
+    mova                m5, [tmpq+13*%%str]
+    mova                m4, [tmpq+ 9*%%str]
+    %%STORE_2X2          8,  9,  4,  5, 12, 11, 10
+
+    ; store t14-17
+    mova                m8, [tmpq+ 6*%%str]
+    mova                m9, [tmpq+ 2*%%str]
+    mova                m5, [tmpq+29*%%str]
+    mova                m4, [tmpq+21*%%str]
+    %%STORE_2X2          8,  9,  4,  5, 12, 11, 10, 0
+
+    SWAP                 1, 10 ; zero
+%else
+    mova   [tmpq+ 1*%%str], m1
+    mova   [tmpq+11*%%str], m2
+    mova   [tmpq+15*%%str], m3
+    mova   [tmpq+17*%%str], m4
+    mova   [tmpq+19*%%str], m5
+    pxor                m1, m1
+
+    ; store t0-1 and t30-31
+    mova                m2, [tmpq+ 0*%%str]
+    mova                m3, [tmpq+ 4*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1
+
+    ; store t2-3 and t28-29
+    mova                m2, [tmpq+ 8*%%str]
+    mova                m3, [tmpq+12*%%str]
+    mova                m0, [tmpq+ 3*%%str]
+    mova                m6, [tmpq+ 7*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1
+
+    ; store t4-5 and t26-27
+    mova                m2, [tmpq+16*%%str]
+    mova                m3, [tmpq+20*%%str]
+    mova                m0, [tmpq+ 1*%%str]
+    %%STORE_2X2          2,  3,  7,  0, 4, 5, 1
+
+    ; store t6-7 and t24-25
+    mova                m2, [tmpq+24*%%str]
+    mova                m3, [tmpq+28*%%str]
+    mova                m0, [tmpq+17*%%str]
+    mova                m6, [tmpq+19*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1
+
+    ; store t8-9 and t22-23
+    mova                m2, [tmpq+30*%%str]
+    mova                m3, [tmpq+26*%%str]
+    mova                m0, [tmpq+25*%%str]
+    mova                m6, [tmpq+ 5*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1
+
+    ; store t10-11 and t20-21
+    mova                m2, [tmpq+22*%%str]
+    mova                m3, [tmpq+18*%%str]
+    mova                m0, [tmpq+11*%%str]
+    mova                m6, [tmpq+15*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1
+
+    ; store t12-13 and t18-19
+    mova                m2, [tmpq+14*%%str]
+    mova                m3, [tmpq+10*%%str]
+    mova                m6, [tmpq+13*%%str]
+    mova                m0, [tmpq+ 9*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1
+
+    ; store t14-17
+    mova                m2, [tmpq+ 6*%%str]
+    mova                m3, [tmpq+ 2*%%str]
+    mova                m6, [tmpq+29*%%str]
+    mova                m0, [tmpq+21*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1, 0
+%endif
+%undef ROUND_REG
+%endif
+%endmacro
+
+%macro VP9_IDCT_IDCT_32x32_ADD_XMM 1
+INIT_XMM %1
+cglobal vp9_idct_idct_32x32_add, 0, 6 + ARCH_X86_64 * 3, 16, 2048, dst, stride, block, eob
+    movifnidn         eobd, dword eobm
+%if cpuflag(ssse3)
+    cmp eobd, 135
+    jg .idctfull
+    cmp eobd, 34
+    jg .idct16x16
+    cmp eobd, 1
+    jg .idct8x8
+%else
+    cmp eobd, 1
+    jg .idctfull
+%endif
+
+    ; dc-only case
+    movifnidn       blockq, blockmp
+    movifnidn         dstq, dstmp
+    movifnidn      strideq, stridemp
+%if cpuflag(ssse3)
+    movd                m0, [blockq]
+    mova                m1, [pw_11585x2]
+    pmulhrsw            m0, m1
+    pmulhrsw            m0, m1
+%else
+    DEFINE_ARGS dst, stride, block, coef
+    movsx            coefd, word [blockq]
+    imul             coefd, 11585
+    add              coefd, 8192
+    sar              coefd, 14
+    imul             coefd, 11585
+    add              coefd, (32 << 14) + 8192
+    sar              coefd, 14 + 6
+    movd                m0, coefd
+%endif
+    SPLATW              m0, m0, q0000
+%if cpuflag(ssse3)
+    pmulhrsw            m0, [pw_512]
+%endif
+    pxor                m5, m5
+    movd          [blockq], m5
+%rep 31
+    VP9_STORE_2XFULL    0, 1, 2, 3, 4, 5, mmsize
+    add               dstq, strideq
+%endrep
+    VP9_STORE_2XFULL    0, 1, 2, 3, 4, 5, mmsize
+    RET
+
+%if ARCH_X86_64
+    DEFINE_ARGS dst_bak, stride, block, cnt, dst, stride30, dst_end, stride2, tmp
+%else
+%define dst_bakq r0mp
+%endif
+%if cpuflag(ssse3)
+.idct8x8:
+%if ARCH_X86_32
+    DEFINE_ARGS block, u1, u2, u3, u4, tmp
+    mov             blockq, r2mp
+%endif
+    mov               tmpq, rsp
+    VP9_IDCT32_1D   blockq, 1, 8
+
+%if ARCH_X86_32
+    DEFINE_ARGS dst, stride, stride30, dst_end, stride2, tmp
+    mov            strideq, r1mp
+%define cntd dword r3m
+%endif
+    mov          stride30q, strideq         ; stride
+    lea           stride2q, [strideq*2]     ; stride*2
+    shl          stride30q, 5               ; stride*32
+    mov               cntd, 4
+    sub          stride30q, stride2q        ; stride*30
+.loop2_8x8:
+    mov               dstq, dst_bakq
+    lea           dst_endq, [dstq+stride30q]
+    VP9_IDCT32_1D     tmpq, 2, 8
+    add           dst_bakq, 8
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_8x8
+
+    ; at the end of the loop, m7 should still be zero
+    ; use that to zero out block coefficients
+%if ARCH_X86_32
+    DEFINE_ARGS block
+    mov             blockq, r2mp
+%endif
+    ZERO_BLOCK      blockq, 64,  8, m1
+    RET
+
+.idct16x16:
+%if ARCH_X86_32
+    DEFINE_ARGS block, tmp, cnt
+    mov             blockq, r2mp
+%endif
+    mov               cntd, 2
+    mov               tmpq, rsp
+.loop1_16x16:
+    VP9_IDCT32_1D   blockq, 1, 16
+    add             blockq, 16
+    add               tmpq, 512
+    dec               cntd
+    jg .loop1_16x16
+
+%if ARCH_X86_64
+    sub             blockq, 32
+%else
+    DEFINE_ARGS dst, stride, stride30, dst_end, stride2, tmp
+    mov            strideq, r1mp
+%define cntd dword r3m
+%endif
+
+    mov          stride30q, strideq         ; stride
+    lea           stride2q, [strideq*2]     ; stride*2
+    shl          stride30q, 5               ; stride*32
+    mov               cntd, 4
+    mov               tmpq, rsp
+    sub          stride30q, stride2q        ; stride*30
+.loop2_16x16:
+    mov               dstq, dst_bakq
+    lea           dst_endq, [dstq+stride30q]
+    VP9_IDCT32_1D     tmpq, 2, 16
+    add           dst_bakq, 8
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_16x16
+
+    ; at the end of the loop, m7 should still be zero
+    ; use that to zero out block coefficients
+%if ARCH_X86_32
+    DEFINE_ARGS block
+    mov             blockq, r2mp
+%endif
+    ZERO_BLOCK      blockq, 64, 16, m1
+    RET
+%endif
+
+.idctfull:
+%if ARCH_X86_32
+    DEFINE_ARGS block, tmp, cnt
+    mov             blockq, r2mp
+%endif
+    mov               cntd, 4
+    mov               tmpq, rsp
+.loop1_full:
+    VP9_IDCT32_1D   blockq, 1
+    add             blockq, 16
+    add               tmpq, 512
+    dec               cntd
+    jg .loop1_full
+
+%if ARCH_X86_64
+    sub             blockq, 64
+%else
+    DEFINE_ARGS dst, stride, stride30, dst_end, stride2, tmp
+    mov            strideq, r1mp
+%define cntd dword r3m
+%endif
+
+    mov          stride30q, strideq         ; stride
+    lea           stride2q, [strideq*2]     ; stride*2
+    shl          stride30q, 5               ; stride*32
+    mov               cntd, 4
+    mov               tmpq, rsp
+    sub          stride30q, stride2q        ; stride*30
+.loop2_full:
+    mov               dstq, dst_bakq
+    lea           dst_endq, [dstq+stride30q]
+    VP9_IDCT32_1D     tmpq, 2
+    add           dst_bakq, 8
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_full
+
+    ; at the end of the loop, m7 should still be zero
+    ; use that to zero out block coefficients
+%if ARCH_X86_32
+    DEFINE_ARGS block
+    mov             blockq, r2mp
+%endif
+    ZERO_BLOCK      blockq, 64, 32, m1
+    RET
+%endmacro
+
+VP9_IDCT_IDCT_32x32_ADD_XMM sse2
+VP9_IDCT_IDCT_32x32_ADD_XMM ssse3
+VP9_IDCT_IDCT_32x32_ADD_XMM avx
+
+; this is almost identical to VP9_STORE_2X, but it does two rows
+; for slightly improved interleaving, and it omits vpermq since the
+; input is DC so all values are identical
+%macro VP9_STORE_YMM_DC_2X2 6 ; reg, tmp1, tmp2, tmp3, tmp4, zero
+    mova               m%2, [dstq]
+    mova               m%4, [dstq+strideq]
+    punpckhbw          m%3, m%2, m%6
+    punpcklbw          m%2, m%6
+    punpckhbw          m%5, m%4, m%6
+    punpcklbw          m%4, m%6
+    paddw              m%3, m%1
+    paddw              m%2, m%1
+    paddw              m%5, m%1
+    paddw              m%4, m%1
+    packuswb           m%2, m%3
+    packuswb           m%4, m%5
+    mova  [dstq+strideq*0], m%2
+    mova  [dstq+strideq*1], m%4
+%endmacro
+
+%if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_idct_idct_32x32_add, 4, 9, 16, 2048, dst, stride, block, eob
+    cmp eobd, 135
+    jg .idctfull
+    cmp eobd, 1
+    jg .idct16x16
+
+    ; dc-only case
+    mova                m1, [pw_11585x2]
+    vpbroadcastw        m0, [blockq]
+    pmulhrsw            m0, m1
+    pmulhrsw            m0, m1
+    pxor                m5, m5
+    pmulhrsw            m0, [pw_512]
+    movd          [blockq], xm5
+
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 16
+.loop_dc:
+    VP9_STORE_YMM_DC_2X2 0, 1, 2, 3, 4, 5
+    lea               dstq, [dstq+2*strideq]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+    DEFINE_ARGS dst_bak, stride, block, cnt, dst, stride30, dst_end, stride2, tmp
+.idct16x16:
+    mov               tmpq, rsp
+    VP9_IDCT32_1D   blockq, 1, 16
+
+    mov          stride30q, strideq         ; stride
+    lea           stride2q, [strideq*2]     ; stride*2
+    shl          stride30q, 5               ; stride*32
+    mov               cntd, 2
+    sub          stride30q, stride2q        ; stride*30
+.loop2_16x16:
+    mov               dstq, dst_bakq
+    lea           dst_endq, [dstq+stride30q]
+    VP9_IDCT32_1D     tmpq, 2, 16
+    add           dst_bakq, 16
+    add               tmpq, 32
+    dec               cntd
+    jg .loop2_16x16
+
+    ; at the end of the loop, m1 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 64, 16, m1
+    RET
+
+.idctfull:
+    mov               cntd, 2
+    mov               tmpq, rsp
+.loop1_full:
+    VP9_IDCT32_1D   blockq, 1
+    add             blockq, 32
+    add               tmpq, 1024
+    dec               cntd
+    jg .loop1_full
+
+    sub             blockq, 64
+
+    mov          stride30q, strideq         ; stride
+    lea           stride2q, [strideq*2]     ; stride*2
+    shl          stride30q, 5               ; stride*32
+    mov               cntd, 2
+    mov               tmpq, rsp
+    sub          stride30q, stride2q        ; stride*30
+.loop2_full:
+    mov               dstq, dst_bakq
+    lea           dst_endq, [dstq+stride30q]
+    VP9_IDCT32_1D     tmpq, 2
+    add           dst_bakq, 16
+    add               tmpq, 32
+    dec               cntd
+    jg .loop2_full
+
+    ; at the end of the loop, m1 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 64, 32, m1
+    RET
+%endif
diff --git a/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm b/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm
new file mode 100644
index 0000000000..902685edf6
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm
@@ -0,0 +1,2044 @@
+;******************************************************************************
+;* VP9 inverse transform x86 SIMD optimizations
+;*
+;* Copyright (C) 2015 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+%include "vp9itxfm_template.asm"
+
+SECTION_RODATA
+
+cextern pw_8
+cextern pw_1023
+cextern pw_2048
+cextern pw_4095
+cextern pw_m1
+cextern pd_1
+cextern pd_16
+cextern pd_32
+cextern pd_8192
+
+pd_8: times 4 dd 8
+pd_3fff: times 4 dd 0x3fff
+
+cextern pw_11585x2
+
+cextern pw_5283_13377
+cextern pw_9929_13377
+cextern pw_15212_m13377
+cextern pw_15212_9929
+cextern pw_m5283_m15212
+cextern pw_13377x2
+cextern pw_m13377_13377
+cextern pw_13377_0
+
+pw_9929_m5283: times 4 dw 9929, -5283
+
+%macro COEF_PAIR 2-3
+cextern pw_m%1_%2
+cextern pw_%2_%1
+%if %0 == 3
+cextern pw_m%1_m%2
+%if %1 != %2
+cextern pw_m%2_%1
+cextern pw_%1_%2
+%endif
+%endif
+%endmacro
+
+COEF_PAIR  2404, 16207
+COEF_PAIR  3196, 16069, 1
+COEF_PAIR  4756, 15679
+COEF_PAIR  5520, 15426
+COEF_PAIR  6270, 15137, 1
+COEF_PAIR  8423, 14053
+COEF_PAIR 10394, 12665
+COEF_PAIR 11003, 12140
+COEF_PAIR 11585, 11585, 1
+COEF_PAIR 13160,  9760
+COEF_PAIR 13623,  9102, 1
+COEF_PAIR 14449,  7723
+COEF_PAIR 14811,  7005
+COEF_PAIR 15893,  3981
+COEF_PAIR 16305,  1606
+COEF_PAIR 16364,   804
+
+default_8x8:
+times 12 db 1
+times 52 db 2
+row_8x8:
+times 18 db 1
+times 46 db 2
+col_8x8:
+times 6 db 1
+times 58 db 2
+default_16x16:
+times 10 db 1
+times 28 db 2
+times 51 db 3
+times 167 db 4
+row_16x16:
+times 21 db 1
+times 45 db 2
+times 60 db 3
+times 130 db 4
+col_16x16:
+times 5 db 1
+times 12 db 2
+times 25 db 3
+times 214 db 4
+default_32x32:
+times 9 db 1
+times 25 db 2
+times 36 db 3
+times 65 db 4
+times 105 db 5
+times 96 db 6
+times 112 db 7
+times 576 db 8
+
+SECTION .text
+
+%macro VP9_STORE_2X 6-7 dstq ; reg1, reg2, tmp1, tmp2, min, max, dst
+    mova               m%3, [%7]
+    mova               m%4, [%7+strideq]
+    paddw              m%3, m%1
+    paddw              m%4, m%2
+    pmaxsw             m%3, m%5
+    pmaxsw             m%4, m%5
+    pminsw             m%3, m%6
+    pminsw             m%4, m%6
+    mova              [%7], m%3
+    mova      [%7+strideq], m%4
+%endmacro
+
+%macro ZERO_BLOCK 4 ; mem, stride, nnzcpl, zero_reg
+%assign %%y 0
+%rep %3
+%assign %%x 0
+%rep %3*4/mmsize
+    mova      [%1+%%y+%%x], %4
+%assign %%x (%%x+mmsize)
+%endrep
+%assign %%y (%%y+%2)
+%endrep
+%endmacro
+
+; the input coefficients are scaled up by 2 bit (which we downscale immediately
+; in the iwht), and is otherwise orthonormally increased by 1 bit per iwht_1d.
+; therefore, a diff of 10-12+sign bit will fit in 12-14+sign bit after scaling,
+; i.e. everything can be done in 15+1bpp words. Since the quant fractional bits
+; add 2 bits, we need to scale before converting to word in 12bpp, since the
+; input will be 16+sign bit which doesn't fit in 15+sign words, but in 10bpp
+; we can scale after converting to words (which is half the instructions),
+; since the input is only 14+sign bit, which fits in 15+sign words directly.
+
+%macro IWHT4_FN 2 ; bpp, max
+cglobal vp9_iwht_iwht_4x4_add_%1, 3, 3, 8, dst, stride, block, eob
+    mova                m7, [pw_%2]
+    mova                m0, [blockq+0*16+0]
+    mova                m1, [blockq+1*16+0]
+%if %1 >= 12
+    mova                m4, [blockq+0*16+8]
+    mova                m5, [blockq+1*16+8]
+    psrad               m0, 2
+    psrad               m1, 2
+    psrad               m4, 2
+    psrad               m5, 2
+    packssdw            m0, m4
+    packssdw            m1, m5
+%else
+    packssdw            m0, [blockq+0*16+8]
+    packssdw            m1, [blockq+1*16+8]
+    psraw               m0, 2
+    psraw               m1, 2
+%endif
+    mova                m2, [blockq+2*16+0]
+    mova                m3, [blockq+3*16+0]
+%if %1 >= 12
+    mova                m4, [blockq+2*16+8]
+    mova                m5, [blockq+3*16+8]
+    psrad               m2, 2
+    psrad               m3, 2
+    psrad               m4, 2
+    psrad               m5, 2
+    packssdw            m2, m4
+    packssdw            m3, m5
+%else
+    packssdw            m2, [blockq+2*16+8]
+    packssdw            m3, [blockq+3*16+8]
+    psraw               m2, 2
+    psraw               m3, 2
+%endif
+
+    VP9_IWHT4_1D
+    TRANSPOSE4x4W        0, 1, 2, 3, 4
+    VP9_IWHT4_1D
+
+    pxor                m6, m6
+    VP9_STORE_2X         0, 1, 4, 5, 6, 7
+    lea               dstq, [dstq+strideq*2]
+    VP9_STORE_2X         2, 3, 4, 5, 6, 7
+    ZERO_BLOCK      blockq, 16, 4, m6
+    RET
+%endmacro
+
+INIT_MMX mmxext
+IWHT4_FN 10, 1023
+INIT_MMX mmxext
+IWHT4_FN 12, 4095
+
+%macro VP9_IDCT4_WRITEOUT 0
+%if cpuflag(ssse3)
+    mova                m5, [pw_2048]
+    pmulhrsw            m0, m5
+    pmulhrsw            m1, m5
+    pmulhrsw            m2, m5
+    pmulhrsw            m3, m5
+%else
+    mova                m5, [pw_8]
+    paddw               m0, m5
+    paddw               m1, m5
+    paddw               m2, m5
+    paddw               m3, m5
+    psraw               m0, 4
+    psraw               m1, 4
+    psraw               m2, 4
+    psraw               m3, 4
+%endif
+    mova                m5, [pw_1023]
+    VP9_STORE_2X         0,  1,  6,  7,  4,  5
+    lea               dstq, [dstq+2*strideq]
+    VP9_STORE_2X         2,  3,  6,  7,  4,  5
+%endmacro
+
+%macro DC_ONLY 2 ; shift, zero
+    mov              coefd, dword [blockq]
+    movd          [blockq], %2
+    imul             coefd, 11585
+    add              coefd, 8192
+    sar              coefd, 14
+    imul             coefd, 11585
+    add              coefd, ((1 << (%1 - 1)) << 14) + 8192
+    sar              coefd, 14 + %1
+%endmacro
+
+; 4x4 coefficients are 5+depth+sign bits, so for 10bpp, everything still fits
+; in 15+1 words without additional effort, since the coefficients are 15bpp.
+
+%macro IDCT4_10_FN 0
+cglobal vp9_idct_idct_4x4_add_10, 4, 4, 8, dst, stride, block, eob
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only
+    pxor                m4, m4
+%if cpuflag(ssse3)
+    movd                m0, [blockq]
+    movd          [blockq], m4
+    mova                m5, [pw_11585x2]
+    pmulhrsw            m0, m5
+    pmulhrsw            m0, m5
+%else
+    DEFINE_ARGS dst, stride, block, coef
+    DC_ONLY              4, m4
+    movd                m0, coefd
+%endif
+    pshufw              m0, m0, 0
+    mova                m5, [pw_1023]
+%if cpuflag(ssse3)
+    pmulhrsw            m0, [pw_2048]       ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4
+%endif
+    VP9_STORE_2X         0,  0,  6,  7,  4,  5
+    lea               dstq, [dstq+2*strideq]
+    VP9_STORE_2X         0,  0,  6,  7,  4,  5
+    RET
+
+.idctfull:
+    mova                m0, [blockq+0*16+0]
+    mova                m1, [blockq+1*16+0]
+    packssdw            m0, [blockq+0*16+8]
+    packssdw            m1, [blockq+1*16+8]
+    mova                m2, [blockq+2*16+0]
+    mova                m3, [blockq+3*16+0]
+    packssdw            m2, [blockq+2*16+8]
+    packssdw            m3, [blockq+3*16+8]
+
+%if cpuflag(ssse3)
+    mova                m6, [pw_11585x2]
+%endif
+    mova                m7, [pd_8192]       ; rounding
+    VP9_IDCT4_1D
+    TRANSPOSE4x4W  0, 1, 2, 3, 4
+    VP9_IDCT4_1D
+
+    pxor                m4, m4
+    ZERO_BLOCK      blockq, 16, 4, m4
+    VP9_IDCT4_WRITEOUT
+    RET
+%endmacro
+
+INIT_MMX mmxext
+IDCT4_10_FN
+INIT_MMX ssse3
+IDCT4_10_FN
+
+%macro IADST4_FN 4
+cglobal vp9_%1_%3_4x4_add_10, 3, 3, 0, dst, stride, block, eob
+%if WIN64 && notcpuflag(ssse3)
+    WIN64_SPILL_XMM 8
+%endif
+    movdqa            xmm5, [pd_8192]
+    mova                m0, [blockq+0*16+0]
+    mova                m1, [blockq+1*16+0]
+    packssdw            m0, [blockq+0*16+8]
+    packssdw            m1, [blockq+1*16+8]
+    mova                m2, [blockq+2*16+0]
+    mova                m3, [blockq+3*16+0]
+    packssdw            m2, [blockq+2*16+8]
+    packssdw            m3, [blockq+3*16+8]
+
+%if cpuflag(ssse3)
+    mova                m6, [pw_11585x2]
+%endif
+%ifnidn %1%3, iadstiadst
+    movdq2q             m7, xmm5
+%endif
+    VP9_%2_1D
+    TRANSPOSE4x4W  0, 1, 2, 3, 4
+    VP9_%4_1D
+
+    pxor                m4, m4
+    ZERO_BLOCK      blockq, 16, 4, m4
+    VP9_IDCT4_WRITEOUT
+    RET
+%endmacro
+
+INIT_MMX sse2
+IADST4_FN idct,  IDCT4,  iadst, IADST4
+IADST4_FN iadst, IADST4, idct,  IDCT4
+IADST4_FN iadst, IADST4, iadst, IADST4
+
+INIT_MMX ssse3
+IADST4_FN idct,  IDCT4,  iadst, IADST4
+IADST4_FN iadst, IADST4, idct,  IDCT4
+IADST4_FN iadst, IADST4, iadst, IADST4
+
+; inputs and outputs are dwords, coefficients are words
+;
+; dst1 = src1 * coef1 + src2 * coef2 + rnd >> 14
+; dst2 = src1 * coef2 - src2 * coef1 + rnd >> 14
+%macro SUMSUB_MUL 6-8 [pd_8192], [pd_3fff] ; src/dst 1-2, tmp1-2, coef1-2, rnd, mask
+    pand               m%3, m%1, %8
+    pand               m%4, m%2, %8
+    psrad              m%1, 14
+    psrad              m%2, 14
+    packssdw           m%4, m%2
+    packssdw           m%3, m%1
+    punpckhwd          m%2, m%4, m%3
+    punpcklwd          m%4, m%3
+    pmaddwd            m%3, m%4, [pw_%6_%5]
+    pmaddwd            m%1, m%2, [pw_%6_%5]
+    pmaddwd            m%4, [pw_m%5_%6]
+    pmaddwd            m%2, [pw_m%5_%6]
+    paddd              m%3, %7
+    paddd              m%4, %7
+    psrad              m%3, 14
+    psrad              m%4, 14
+    paddd              m%1, m%3
+    paddd              m%2, m%4
+%endmacro
+
+%macro IDCT4_12BPP_1D 0-8 [pd_8192], [pd_3fff], 0, 1, 2, 3, 4, 5 ; rnd, mask, in/out0-3, tmp0-1
+    SUMSUB_MUL          %3, %5, %7, %8, 11585, 11585, %1, %2
+    SUMSUB_MUL          %4, %6, %7, %8, 15137,  6270, %1, %2
+    SUMSUB_BA        d, %4, %3, %7
+    SUMSUB_BA        d, %6, %5, %7
+    SWAP                %4, %6, %3
+%endmacro
+
+%macro STORE_4x4 6 ; tmp1-2, reg1-2, min, max
+    movh               m%1, [dstq+strideq*0]
+    movh               m%2, [dstq+strideq*2]
+    movhps             m%1, [dstq+strideq*1]
+    movhps             m%2, [dstq+stride3q ]
+    paddw              m%1, m%3
+    paddw              m%2, m%4
+    pmaxsw             m%1, %5
+    pmaxsw             m%2, %5
+    pminsw             m%1, %6
+    pminsw             m%2, %6
+    movh   [dstq+strideq*0], m%1
+    movhps [dstq+strideq*1], m%1
+    movh   [dstq+strideq*2], m%2
+    movhps [dstq+stride3q ], m%2
+%endmacro
+
+%macro ROUND_AND_STORE_4x4 8 ; reg1-4, min, max, rnd, shift
+    paddd              m%1, %7
+    paddd              m%2, %7
+    paddd              m%3, %7
+    paddd              m%4, %7
+    psrad              m%1, %8
+    psrad              m%2, %8
+    psrad              m%3, %8
+    psrad              m%4, %8
+    packssdw           m%1, m%2
+    packssdw           m%3, m%4
+    STORE_4x4           %2, %4, %1, %3, %5, %6
+%endmacro
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_4x4_add_12, 4, 4, 8, dst, stride, block, eob
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only - this is special, since for 4x4 12bpp, the max coef size is
+    ; 17+sign bpp. Since the multiply is with 11585, which is 14bpp, the
+    ; result of each multiply is 31+sign bit, i.e. it _exactly_ fits in a
+    ; dword. After the final shift (4), the result is 13+sign bits, so we
+    ; don't need any additional processing to fit it in a word
+    DEFINE_ARGS dst, stride, block, coef
+    pxor                m4, m4
+    DC_ONLY              4, m4
+    movd                m0, coefd
+    pshuflw             m0, m0, q0000
+    punpcklqdq          m0, m0
+    mova                m5, [pw_4095]
+    DEFINE_ARGS dst, stride, stride3
+    lea           stride3q, [strideq*3]
+    STORE_4x4            1, 3, 0, 0, m4, m5
+    RET
+
+.idctfull:
+    DEFINE_ARGS dst, stride, block, eob
+    mova                m0, [blockq+0*16]
+    mova                m1, [blockq+1*16]
+    mova                m2, [blockq+2*16]
+    mova                m3, [blockq+3*16]
+    mova                m6, [pd_8192]
+    mova                m7, [pd_3fff]
+
+    IDCT4_12BPP_1D      m6, m7
+    TRANSPOSE4x4D        0, 1, 2, 3, 4
+    IDCT4_12BPP_1D      m6, m7
+
+    pxor                m4, m4
+    ZERO_BLOCK      blockq, 16, 4, m4
+
+    ; writeout
+    DEFINE_ARGS dst, stride, stride3
+    lea           stride3q, [strideq*3]
+    mova                m5, [pw_4095]
+    mova                m6, [pd_8]
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m4, m5, m6, 4
+    RET
+
+%macro SCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%if %0 == 4
+%define reg_%4 m%2
+%endif
+%else
+    mova              [%3], m%1
+%if %0 == 4
+%define reg_%4 [%3]
+%endif
+%endif
+%endmacro
+
+%macro UNSCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova               m%1, [%3]
+%endif
+%if %0 == 4
+%undef reg_%4
+%endif
+%endmacro
+
+%macro PRELOAD 2-3
+%if ARCH_X86_64
+    mova               m%1, [%2]
+%if %0 == 3
+%define reg_%3 m%1
+%endif
+%elif %0 == 3
+%define reg_%3 [%2]
+%endif
+%endmacro
+
+; out0 =  5283 * in0 + 13377 + in1 + 15212 * in2 +  9929 * in3 + rnd >> 14
+; out1 =  9929 * in0 + 13377 * in1 -  5283 * in2 - 15282 * in3 + rnd >> 14
+; out2 = 13377 * in0               - 13377 * in2 + 13377 * in3 + rnd >> 14
+; out3 = 15212 * in0 - 13377 * in1 +  9929 * in2 -  5283 * in3 + rnd >> 14
+%macro IADST4_12BPP_1D 0-2 [pd_8192], [pd_3fff] ; rnd, mask
+    pand                m4, m0, %2
+    pand                m5, m1, %2
+    psrad               m0, 14
+    psrad               m1, 14
+    packssdw            m5, m1
+    packssdw            m4, m0
+    punpckhwd           m1, m4, m5
+    punpcklwd           m4, m5
+    pand                m5, m2, %2
+    pand                m6, m3, %2
+    psrad               m2, 14
+    psrad               m3, 14
+    packssdw            m6, m3
+    packssdw            m5, m2
+    punpckhwd           m3, m5, m6
+    punpcklwd           m5, m6
+    SCRATCH              1,  8, rsp+0*mmsize, a
+    SCRATCH              5,  9, rsp+1*mmsize, b
+
+    ; m1/3 have the high bits of 0,1,2,3
+    ; m4/5 have the low bits of 0,1,2,3
+    ; m0/2/6/7 are free
+
+    mova                m2, [pw_15212_9929]
+    mova                m0, [pw_5283_13377]
+    pmaddwd             m7, m2, reg_b
+    pmaddwd             m6, m4, m0
+    pmaddwd             m2, m3
+    pmaddwd             m0, reg_a
+    paddd               m6, m7
+    paddd               m0, m2
+    mova                m1, [pw_m13377_13377]
+    mova                m5, [pw_13377_0]
+    pmaddwd             m7, m1, reg_b
+    pmaddwd             m2, m4, m5
+    pmaddwd             m1, m3
+    pmaddwd             m5, reg_a
+    paddd               m2, m7
+    paddd               m1, m5
+    paddd               m6, %1
+    paddd               m2, %1
+    psrad               m6, 14
+    psrad               m2, 14
+    paddd               m0, m6                      ; t0
+    paddd               m2, m1                      ; t2
+
+    mova                m7, [pw_m5283_m15212]
+    mova                m5, [pw_9929_13377]
+    pmaddwd             m1, m7, reg_b
+    pmaddwd             m6, m4, m5
+    pmaddwd             m7, m3
+    pmaddwd             m5, reg_a
+    paddd               m6, m1
+    paddd               m7, m5
+    UNSCRATCH            5,  9, rsp+1*mmsize, b
+    pmaddwd             m5, [pw_9929_m5283]
+    pmaddwd             m4, [pw_15212_m13377]
+    pmaddwd             m3, [pw_9929_m5283]
+    UNSCRATCH            1,  8, rsp+0*mmsize, a
+    pmaddwd             m1, [pw_15212_m13377]
+    paddd               m4, m5
+    paddd               m3, m1
+    paddd               m6, %1
+    paddd               m4, %1
+    psrad               m6, 14
+    psrad               m4, 14
+    paddd               m7, m6                      ; t1
+    paddd               m3, m4                      ; t3
+
+    SWAP                 1, 7
+%endmacro
+
+%macro IADST4_12BPP_FN 4
+cglobal vp9_%1_%3_4x4_add_12, 3, 3, 12, 2 * ARCH_X86_32 * mmsize, dst, stride, block, eob
+    mova                m0, [blockq+0*16]
+    mova                m1, [blockq+1*16]
+    mova                m2, [blockq+2*16]
+    mova                m3, [blockq+3*16]
+
+    PRELOAD             10, pd_8192, rnd
+    PRELOAD             11, pd_3fff, mask
+    %2_12BPP_1D    reg_rnd, reg_mask
+    TRANSPOSE4x4D        0, 1, 2, 3, 4
+    %4_12BPP_1D    reg_rnd, reg_mask
+
+    pxor                m4, m4
+    ZERO_BLOCK      blockq, 16, 4, m4
+
+    ; writeout
+    DEFINE_ARGS dst, stride, stride3
+    lea           stride3q, [strideq*3]
+    mova                m5, [pw_4095]
+    mova                m6, [pd_8]
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m4, m5, m6, 4
+    RET
+%endmacro
+
+INIT_XMM sse2
+IADST4_12BPP_FN idct,  IDCT4,  iadst, IADST4
+IADST4_12BPP_FN iadst, IADST4, idct,  IDCT4
+IADST4_12BPP_FN iadst, IADST4, iadst, IADST4
+
+; the following line has not been executed at the end of this macro:
+; UNSCRATCH            6, 8, rsp+%3*mmsize
+%macro IDCT8_1D 1-5 [pd_8192], [pd_3fff], 2 * mmsize, 17 ; src, rnd, mask, src_stride, stack_offset
+    mova                m0, [%1+0*%4]
+    mova                m2, [%1+2*%4]
+    mova                m4, [%1+4*%4]
+    mova                m6, [%1+6*%4]
+    IDCT4_12BPP_1D      %2, %3, 0, 2, 4, 6, 1, 3            ; m0/2/4/6 have t0/1/2/3
+    SCRATCH              4, 8, rsp+(%5+0)*mmsize
+    SCRATCH              6, 9, rsp+(%5+1)*mmsize
+    mova                m1, [%1+1*%4]
+    mova                m3, [%1+3*%4]
+    mova                m5, [%1+5*%4]
+    mova                m7, [%1+7*%4]
+    SUMSUB_MUL           1, 7, 4, 6, 16069,  3196, %2, %3   ; m1=t7a, m7=t4a
+    SUMSUB_MUL           5, 3, 4, 6,  9102, 13623, %2, %3   ; m5=t6a, m3=t5a
+    SUMSUB_BA         d, 3, 7, 4                            ; m3=t4, m7=t5a
+    SUMSUB_BA         d, 5, 1, 4                            ; m5=t7, m1=t6a
+    SUMSUB_MUL           1, 7, 4, 6, 11585, 11585, %2, %3   ; m1=t6, m7=t5
+    SUMSUB_BA         d, 5, 0, 4                            ; m5=out0, m0=out7
+    SUMSUB_BA         d, 1, 2, 4                            ; m1=out1, m2=out6
+    UNSCRATCH            4, 8, rsp+(%5+0)*mmsize
+    UNSCRATCH            6, 9, rsp+(%5+1)*mmsize
+    SCRATCH              2, 8, rsp+(%5+0)*mmsize
+    SUMSUB_BA         d, 7, 4, 2                            ; m7=out2, m4=out5
+    SUMSUB_BA         d, 3, 6, 2                            ; m3=out3, m6=out4
+    SWAP                 0, 5, 4, 6, 2, 7
+%endmacro
+
+%macro STORE_2x8 5-7 dstq, strideq ; tmp1-2, reg, min, max
+    mova               m%1, [%6+%7*0]
+    mova               m%2, [%6+%7*1]
+    paddw              m%1, m%3
+    paddw              m%2, m%3
+    pmaxsw             m%1, %4
+    pmaxsw             m%2, %4
+    pminsw             m%1, %5
+    pminsw             m%2, %5
+    mova         [%6+%7*0], m%1
+    mova         [%6+%7*1], m%2
+%endmacro
+
+; FIXME we can use the intermediate storage (rsp[0-15]) on x86-32 for temp
+; storage also instead of allocating two more stack spaces. This doesn't
+; matter much but it's something...
+INIT_XMM sse2
+cglobal vp9_idct_idct_8x8_add_10, 4, 6 + ARCH_X86_64, 14, \
+                                  16 * mmsize + 3 * ARCH_X86_32 * mmsize, \
+                                  dst, stride, block, eob
+    mova                m0, [pw_1023]
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only - the 10bit version can be done entirely in 32bit, since the max
+    ; coef values are 16+sign bit, and the coef is 14bit, so 30+sign easily
+    ; fits in 32bit
+    DEFINE_ARGS dst, stride, block, coef
+    pxor                m2, m2
+    DC_ONLY              5, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 4
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0
+    lea               dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+.idctfull:
+    SCRATCH              0, 12, rsp+16*mmsize, max
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [default_8x8]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [default_8x8+cntq-1]
+%endif
+    mov              skipd, 2
+    sub              skipd, cntd
+    mov               ptrq, rsp
+    PRELOAD             10, pd_8192, rnd
+    PRELOAD             11, pd_3fff, mask
+    PRELOAD             13, pd_16, srnd
+.loop_1:
+    IDCT8_1D        blockq, reg_rnd, reg_mask
+
+    TRANSPOSE4x4D        0, 1, 2, 3, 6
+    mova  [ptrq+ 0*mmsize], m0
+    mova  [ptrq+ 2*mmsize], m1
+    mova  [ptrq+ 4*mmsize], m2
+    mova  [ptrq+ 6*mmsize], m3
+    UNSCRATCH            6, 8, rsp+17*mmsize
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 1*mmsize], m4
+    mova  [ptrq+ 3*mmsize], m5
+    mova  [ptrq+ 5*mmsize], m6
+    mova  [ptrq+ 7*mmsize], m7
+    add               ptrq, 8 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    add              skipd, skipd
+    lea             blockq, [blockq+skipq*(mmsize/2)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    add               ptrq, 4 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 2
+    mov               ptrq, rsp
+.loop_2:
+    IDCT8_1D          ptrq, reg_rnd, reg_mask
+
+    pxor                m6, m6
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m6, reg_max, reg_srnd, 5
+    lea               dstq, [dstq+strideq*4]
+    UNSCRATCH            0, 8, rsp+17*mmsize
+    UNSCRATCH            1, 12, rsp+16*mmsize, max
+    UNSCRATCH            2, 13, pd_16, srnd
+    ROUND_AND_STORE_4x4  4, 5, 0, 7, m6, m1, m2, 5
+    add               ptrq, 16
+%if ARCH_X86_64
+    lea               dstq, [dstbakq+8]
+%else
+    mov               dstq, dstm
+    add               dstq, 8
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m6 is still zero
+    ZERO_BLOCK blockq-2*mmsize, 32, 8, m6
+    RET
+
+%macro DC_ONLY_64BIT 2 ; shift, zero
+%if ARCH_X86_64
+    movsxd           coefq, dword [blockq]
+    movd          [blockq], %2
+    imul             coefq, 11585
+    add              coefq, 8192
+    sar              coefq, 14
+    imul             coefq, 11585
+    add              coefq, ((1 << (%1 - 1)) << 14) + 8192
+    sar              coefq, 14 + %1
+%else
+    mov              coefd, dword [blockq]
+    movd          [blockq], %2
+    DEFINE_ARGS dst, stride, cnt, coef, coefl
+    mov               cntd, 2
+.loop_dc_calc:
+    mov             coefld, coefd
+    sar              coefd, 14
+    and             coefld, 0x3fff
+    imul             coefd, 11585
+    imul            coefld, 11585
+    add             coefld, 8192
+    sar             coefld, 14
+    add              coefd, coefld
+    dec               cntd
+    jg .loop_dc_calc
+    add              coefd, 1 << (%1 - 1)
+    sar              coefd, %1
+%endif
+%endmacro
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_8x8_add_12, 4, 6 + ARCH_X86_64, 14, \
+                                  16 * mmsize + 3 * ARCH_X86_32 * mmsize, \
+                                  dst, stride, block, eob
+    mova                m0, [pw_4095]
+    cmp               eobd, 1
+    jg mangle(private_prefix %+ _ %+ vp9_idct_idct_8x8_add_10 %+ SUFFIX).idctfull
+
+    ; dc-only - unfortunately, this one can overflow, since coefs are 18+sign
+    ; bpp, and 18+14+sign does not fit in 32bit, so we do 2-stage multiplies
+    DEFINE_ARGS dst, stride, block, coef, coefl
+    pxor                m2, m2
+    DC_ONLY_64BIT        5, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 4
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0
+    lea               dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+; inputs and outputs are dwords, coefficients are words
+;
+; dst1[hi]:dst3[lo] = src1 * coef1 + src2 * coef2
+; dst2[hi]:dst4[lo] = src1 * coef2 - src2 * coef1
+%macro SUMSUB_MUL_D 6-7 [pd_3fff] ; src/dst 1-2, dst3-4, coef1-2, mask
+    pand               m%3, m%1, %7
+    pand               m%4, m%2, %7
+    psrad              m%1, 14
+    psrad              m%2, 14
+    packssdw           m%4, m%2
+    packssdw           m%3, m%1
+    punpckhwd          m%2, m%4, m%3
+    punpcklwd          m%4, m%3
+    pmaddwd            m%3, m%4, [pw_%6_%5]
+    pmaddwd            m%1, m%2, [pw_%6_%5]
+    pmaddwd            m%4, [pw_m%5_%6]
+    pmaddwd            m%2, [pw_m%5_%6]
+%endmacro
+
+; dst1 = src2[hi]:src4[lo] + src1[hi]:src3[lo] + rnd >> 14
+; dst2 = src2[hi]:src4[lo] - src1[hi]:src3[lo] + rnd >> 14
+%macro SUMSUB_PACK_D 5-6 [pd_8192] ; src/dst 1-2, src3-4, tmp, rnd
+    SUMSUB_BA        d, %1, %2, %5
+    SUMSUB_BA        d, %3, %4, %5
+    paddd              m%3, %6
+    paddd              m%4, %6
+    psrad              m%3, 14
+    psrad              m%4, 14
+    paddd              m%1, m%3
+    paddd              m%2, m%4
+%endmacro
+
+%macro NEGD 1
+%if cpuflag(ssse3)
+    psignd              %1, [pw_m1]
+%else
+    pxor                %1, [pw_m1]
+    paddd               %1, [pd_1]
+%endif
+%endmacro
+
+; the following line has not been executed at the end of this macro:
+; UNSCRATCH            6, 8, rsp+17*mmsize
+%macro IADST8_1D 1-3 [pd_8192], [pd_3fff] ; src, rnd, mask
+    mova                m0, [%1+ 0*mmsize]
+    mova                m3, [%1+ 6*mmsize]
+    mova                m4, [%1+ 8*mmsize]
+    mova                m7, [%1+14*mmsize]
+    SUMSUB_MUL_D         7, 0, 1, 2, 16305,  1606, %3   ; m7/1=t0a, m0/2=t1a
+    SUMSUB_MUL_D         3, 4, 5, 6, 10394, 12665, %3   ; m3/5=t4a, m4/6=t5a
+    SCRATCH              0, 8, rsp+17*mmsize
+    SUMSUB_PACK_D        3, 7, 5, 1, 0, %2              ; m3=t0, m7=t4
+    UNSCRATCH            0, 8, rsp+17*mmsize
+    SUMSUB_PACK_D        4, 0, 6, 2, 1, %2              ; m4=t1, m0=t5
+
+    SCRATCH              3, 8, rsp+17*mmsize
+    SCRATCH              4, 9, rsp+18*mmsize
+    SCRATCH              7, 10, rsp+19*mmsize
+    SCRATCH              0, 11, rsp+20*mmsize
+
+    mova                m1, [%1+ 2*mmsize]
+    mova                m2, [%1+ 4*mmsize]
+    mova                m5, [%1+10*mmsize]
+    mova                m6, [%1+12*mmsize]
+    SUMSUB_MUL_D         5, 2, 3, 4, 14449,  7723, %3   ; m5/8=t2a, m2/9=t3a
+    SUMSUB_MUL_D         1, 6, 7, 0,  4756, 15679, %3   ; m1/10=t6a, m6/11=t7a
+    SCRATCH              2, 12, rsp+21*mmsize
+    SUMSUB_PACK_D        1, 5, 7, 3, 2, %2              ; m1=t2, m5=t6
+    UNSCRATCH            2, 12, rsp+21*mmsize
+    SUMSUB_PACK_D        6, 2, 0, 4, 3, %2              ; m6=t3, m2=t7
+
+    UNSCRATCH            7, 10, rsp+19*mmsize
+    UNSCRATCH            0, 11, rsp+20*mmsize
+    SCRATCH              1, 10, rsp+19*mmsize
+    SCRATCH              6, 11, rsp+20*mmsize
+
+    SUMSUB_MUL_D         7, 0, 3, 4, 15137,  6270, %3   ; m7/8=t4a, m0/9=t5a
+    SUMSUB_MUL_D         2, 5, 1, 6,  6270, 15137, %3   ; m2/10=t7a, m5/11=t6a
+    SCRATCH              2, 12, rsp+21*mmsize
+    SUMSUB_PACK_D        5, 7, 6, 3, 2, %2              ; m5=-out1, m7=t6
+    UNSCRATCH            2, 12, rsp+21*mmsize
+    NEGD                m5                              ; m5=out1
+    SUMSUB_PACK_D        2, 0, 1, 4, 3, %2              ; m2=out6, m0=t7
+    SUMSUB_MUL           7, 0, 3, 4, 11585, 11585, %2, %3   ; m7=out2, m0=-out5
+    NEGD                m0                              ; m0=out5
+
+    UNSCRATCH            3, 8, rsp+17*mmsize
+    UNSCRATCH            4, 9, rsp+18*mmsize
+    UNSCRATCH            1, 10, rsp+19*mmsize
+    UNSCRATCH            6, 11, rsp+20*mmsize
+    SCRATCH              2, 8, rsp+17*mmsize
+    SCRATCH              0, 9, rsp+18*mmsize
+
+    SUMSUB_BA         d, 1, 3,  2                       ; m1=out0, m3=t2
+    SUMSUB_BA         d, 6, 4,  2                       ; m6=-out7, m4=t3
+    NEGD                m6                              ; m6=out7
+    SUMSUB_MUL           3, 4,  2,  0, 11585, 11585, %2, %3 ; m3=-out3, m4=out4
+    NEGD                m3                              ; m3=out3
+
+    UNSCRATCH            0, 9, rsp+18*mmsize
+
+    SWAP                 0, 1, 5
+    SWAP                 2, 7, 6
+%endmacro
+
+%macro IADST8_FN 5
+cglobal vp9_%1_%3_8x8_add_10, 4, 6 + ARCH_X86_64, 16, \
+                              16 * mmsize + ARCH_X86_32 * 6 * mmsize, \
+                              dst, stride, block, eob
+    mova                m0, [pw_1023]
+
+.body:
+    SCRATCH              0, 13, rsp+16*mmsize, max
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [%5_8x8]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [%5_8x8+cntq-1]
+%endif
+    mov              skipd, 2
+    sub              skipd, cntd
+    mov               ptrq, rsp
+    PRELOAD             14, pd_8192, rnd
+    PRELOAD             15, pd_3fff, mask
+.loop_1:
+    %2_1D           blockq, reg_rnd, reg_mask
+
+    TRANSPOSE4x4D        0, 1, 2, 3, 6
+    mova  [ptrq+ 0*mmsize], m0
+    mova  [ptrq+ 2*mmsize], m1
+    mova  [ptrq+ 4*mmsize], m2
+    mova  [ptrq+ 6*mmsize], m3
+    UNSCRATCH            6, 8, rsp+17*mmsize
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 1*mmsize], m4
+    mova  [ptrq+ 3*mmsize], m5
+    mova  [ptrq+ 5*mmsize], m6
+    mova  [ptrq+ 7*mmsize], m7
+    add               ptrq, 8 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    add              skipd, skipd
+    lea             blockq, [blockq+skipq*(mmsize/2)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    add               ptrq, 4 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 2
+    mov               ptrq, rsp
+.loop_2:
+    %4_1D             ptrq, reg_rnd, reg_mask
+
+    pxor                m6, m6
+    PRELOAD              9, pd_16, srnd
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m6, reg_max, reg_srnd, 5
+    lea               dstq, [dstq+strideq*4]
+    UNSCRATCH            0, 8, rsp+17*mmsize
+    UNSCRATCH            1, 13, rsp+16*mmsize, max
+    UNSCRATCH            2, 9, pd_16, srnd
+    ROUND_AND_STORE_4x4  4, 5, 0, 7, m6, m1, m2, 5
+    add               ptrq, 16
+%if ARCH_X86_64
+    lea               dstq, [dstbakq+8]
+%else
+    mov               dstq, dstm
+    add               dstq, 8
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m6 is still zero
+    ZERO_BLOCK blockq-2*mmsize, 32, 8, m6
+    RET
+
+cglobal vp9_%1_%3_8x8_add_12, 4, 6 + ARCH_X86_64, 16, \
+                              16 * mmsize + ARCH_X86_32 * 6 * mmsize, \
+                              dst, stride, block, eob
+    mova                m0, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_%3_8x8_add_10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+IADST8_FN idct,  IDCT8,  iadst, IADST8, row
+IADST8_FN iadst, IADST8, idct,  IDCT8,  col
+IADST8_FN iadst, IADST8, iadst, IADST8, default
+
+%macro IDCT16_1D 1-4 4 * mmsize, 65, 67 ; src, src_stride, stack_offset, mm32bit_stack_offset
+    IDCT8_1D            %1, [pd_8192], [pd_3fff], %2 * 2, %4    ; m0-3=t0-3a, m4-5/m8|r67/m7=t4-7
+    ; SCRATCH            6, 8, rsp+(%4+0)*mmsize    ; t6
+    SCRATCH              0, 15, rsp+(%4+7)*mmsize   ; t0a
+    SCRATCH              1, 14, rsp+(%4+6)*mmsize   ; t1a
+    SCRATCH              2, 13, rsp+(%4+5)*mmsize   ; t2a
+    SCRATCH              3, 12, rsp+(%4+4)*mmsize   ; t3a
+    SCRATCH              4, 11, rsp+(%4+3)*mmsize   ; t4
+    mova [rsp+(%3+0)*mmsize], m5                    ; t5
+    mova [rsp+(%3+1)*mmsize], m7                    ; t7
+
+    mova                m0, [%1+ 1*%2]              ; in1
+    mova                m3, [%1+ 7*%2]              ; in7
+    mova                m4, [%1+ 9*%2]              ; in9
+    mova                m7, [%1+15*%2]              ; in15
+
+    SUMSUB_MUL           0, 7, 1, 2, 16305,  1606   ; m0=t15a, m7=t8a
+    SUMSUB_MUL           4, 3, 1, 2, 10394, 12665   ; m4=t14a, m3=t9a
+    SUMSUB_BA         d, 3, 7, 1                    ; m3=t8, m7=t9
+    SUMSUB_BA         d, 4, 0, 1                    ; m4=t15,m0=t14
+    SUMSUB_MUL           0, 7, 1, 2, 15137,  6270   ; m0=t14a, m7=t9a
+
+    mova                m1, [%1+ 3*%2]              ; in3
+    mova                m2, [%1+ 5*%2]              ; in5
+    mova                m5, [%1+11*%2]              ; in11
+    mova                m6, [%1+13*%2]              ; in13
+
+    SCRATCH              0,  9, rsp+(%4+1)*mmsize
+    SCRATCH              7, 10, rsp+(%4+2)*mmsize
+
+    SUMSUB_MUL           2, 5, 0, 7, 14449,  7723   ; m2=t13a, m5=t10a
+    SUMSUB_MUL           6, 1, 0, 7,  4756, 15679   ; m6=t12a, m1=t11a
+    SUMSUB_BA         d, 5, 1, 0                    ; m5=t11,m1=t10
+    SUMSUB_BA         d, 2, 6, 0                    ; m2=t12,m6=t13
+    NEGD                m1                          ; m1=-t10
+    SUMSUB_MUL           1, 6, 0, 7, 15137,  6270   ; m1=t13a, m6=t10a
+
+    UNSCRATCH            7, 10, rsp+(%4+2)*mmsize
+    SUMSUB_BA         d, 5, 3, 0                    ; m5=t8a, m3=t11a
+    SUMSUB_BA         d, 6, 7, 0                    ; m6=t9,  m7=t10
+    SUMSUB_BA         d, 2, 4, 0                    ; m2=t15a,m4=t12a
+    SCRATCH              5, 10, rsp+(%4+2)*mmsize
+    SUMSUB_MUL           4, 3, 0, 5, 11585, 11585   ; m4=t12, m3=t11
+    UNSCRATCH            0, 9, rsp+(%4+1)*mmsize
+    SUMSUB_BA         d, 1, 0, 5                    ; m1=t14, m0=t13
+    SCRATCH              6, 9, rsp+(%4+1)*mmsize
+    SUMSUB_MUL           0, 7, 6, 5, 11585, 11585   ; m0=t13a,m7=t10a
+
+    ; order: 15|r74,14|r73,13|r72,12|r71,11|r70,r65,8|r67,r66,10|r69,9|r68,7,3,4,0,1,2
+    ; free: 6,5
+
+    UNSCRATCH            5, 15, rsp+(%4+7)*mmsize
+    SUMSUB_BA         d, 2, 5, 6                    ; m2=out0, m5=out15
+    SCRATCH              5, 15, rsp+(%4+7)*mmsize
+    UNSCRATCH            5, 14, rsp+(%4+6)*mmsize
+    SUMSUB_BA         d, 1, 5, 6                    ; m1=out1, m5=out14
+    SCRATCH              5, 14, rsp+(%4+6)*mmsize
+    UNSCRATCH            5, 13, rsp+(%4+5)*mmsize
+    SUMSUB_BA         d, 0, 5, 6                    ; m0=out2, m5=out13
+    SCRATCH              5, 13, rsp+(%4+5)*mmsize
+    UNSCRATCH            5, 12, rsp+(%4+4)*mmsize
+    SUMSUB_BA         d, 4, 5, 6                    ; m4=out3, m5=out12
+    SCRATCH              5, 12, rsp+(%4+4)*mmsize
+    UNSCRATCH            5, 11, rsp+(%4+3)*mmsize
+    SUMSUB_BA         d, 3, 5, 6                    ; m3=out4, m5=out11
+    SCRATCH              4, 11, rsp+(%4+3)*mmsize
+    mova                m4, [rsp+(%3+0)*mmsize]
+    SUMSUB_BA         d, 7, 4, 6                    ; m7=out5, m4=out10
+    mova [rsp+(%3+0)*mmsize], m5
+    UNSCRATCH            5, 8, rsp+(%4+0)*mmsize
+    UNSCRATCH            6, 9, rsp+(%4+1)*mmsize
+    SCRATCH              2, 8, rsp+(%4+0)*mmsize
+    SCRATCH              1, 9, rsp+(%4+1)*mmsize
+    UNSCRATCH            1, 10, rsp+(%4+2)*mmsize
+    SCRATCH              0, 10, rsp+(%4+2)*mmsize
+    mova                m0, [rsp+(%3+1)*mmsize]
+    SUMSUB_BA         d, 6, 5, 2                    ; m6=out6, m5=out9
+    SUMSUB_BA         d, 1, 0, 2                    ; m1=out7, m0=out8
+
+    SWAP                 0, 3, 1, 7, 2, 6, 4
+
+    ; output order: 8-11|r67-70=out0-3
+    ;               0-6,r65=out4-11
+    ;               12-15|r71-74=out12-15
+%endmacro
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \
+                                    67 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                    dst, stride, block, eob
+    mova                m0, [pw_1023]
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only - the 10bit version can be done entirely in 32bit, since the max
+    ; coef values are 17+sign bit, and the coef is 14bit, so 31+sign easily
+    ; fits in 32bit
+    DEFINE_ARGS dst, stride, block, coef
+    pxor                m2, m2
+    DC_ONLY              6, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 8
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0, dstq,         mmsize
+    STORE_2x8            3, 4, 1, m2, m0, dstq+strideq, mmsize
+    lea               dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+.idctfull:
+    mova   [rsp+64*mmsize], m0
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [default_16x16]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [default_16x16+cntq-1]
+%endif
+    mov              skipd, 4
+    sub              skipd, cntd
+    mov               ptrq, rsp
+.loop_1:
+    IDCT16_1D       blockq
+
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 1*mmsize], m0
+    mova  [ptrq+ 5*mmsize], m1
+    mova  [ptrq+ 9*mmsize], m2
+    mova  [ptrq+13*mmsize], m3
+    mova                m7, [rsp+65*mmsize]
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 2*mmsize], m4
+    mova  [ptrq+ 6*mmsize], m5
+    mova  [ptrq+10*mmsize], m6
+    mova  [ptrq+14*mmsize], m7
+    UNSCRATCH               0, 8, rsp+67*mmsize
+    UNSCRATCH               1, 9, rsp+68*mmsize
+    UNSCRATCH               2, 10, rsp+69*mmsize
+    UNSCRATCH               3, 11, rsp+70*mmsize
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 0*mmsize], m0
+    mova  [ptrq+ 4*mmsize], m1
+    mova  [ptrq+ 8*mmsize], m2
+    mova  [ptrq+12*mmsize], m3
+    UNSCRATCH               4, 12, rsp+71*mmsize
+    UNSCRATCH               5, 13, rsp+72*mmsize
+    UNSCRATCH               6, 14, rsp+73*mmsize
+    UNSCRATCH               7, 15, rsp+74*mmsize
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 3*mmsize], m4
+    mova  [ptrq+ 7*mmsize], m5
+    mova  [ptrq+11*mmsize], m6
+    mova  [ptrq+15*mmsize], m7
+    add               ptrq, 16 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    add              skipd, skipd
+    lea             blockq, [blockq+skipq*(mmsize/2)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    mova   [ptrq+mmsize*4], m0
+    mova   [ptrq+mmsize*5], m0
+    mova   [ptrq+mmsize*6], m0
+    mova   [ptrq+mmsize*7], m0
+    add               ptrq, 8 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 4
+    mov               ptrq, rsp
+.loop_2:
+    IDCT16_1D         ptrq
+
+    pxor               m7, m7
+    lea               dstq, [dstq+strideq*4]
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m7, [rsp+64*mmsize], [pd_32], 6
+    lea               dstq, [dstq+strideq*4]
+    mova                m0, [rsp+65*mmsize]
+    mova                m1, [rsp+64*mmsize]
+    mova                m2, [pd_32]
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, m1, m2, 6
+
+%if ARCH_X86_64
+    DEFINE_ARGS dstbak, stride, block, cnt, ptr, stride3, dst
+%else
+    mov               dstq, dstm
+%endif
+    UNSCRATCH               0, 8, rsp+67*mmsize
+    UNSCRATCH               4, 9, rsp+68*mmsize
+    UNSCRATCH               5, 10, rsp+69*mmsize
+    UNSCRATCH               3, 11, rsp+70*mmsize
+    ROUND_AND_STORE_4x4  0, 4, 5, 3, m7, m1, m2, 6
+%if ARCH_X86_64
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea               dstq, [dstbakq+stride3q*4]
+%else
+    lea               dstq, [dstq+stride3q*4]
+%endif
+    UNSCRATCH               4, 12, rsp+71*mmsize
+    UNSCRATCH               5, 13, rsp+72*mmsize
+    UNSCRATCH               6, 14, rsp+73*mmsize
+    UNSCRATCH               0, 15, rsp+74*mmsize
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, m1, m2, 6
+
+    add               ptrq, mmsize
+%if ARCH_X86_64
+    add            dstbakq, 8
+    mov               dstq, dstbakq
+%else
+    add         dword dstm, 8
+    mov               dstq, dstm
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m7 is still zero
+    ZERO_BLOCK blockq-4*mmsize, 64, 16, m7
+    RET
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_16x16_add_12, 4, 6 + ARCH_X86_64, 16, \
+                                    67 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                    dst, stride, block, eob
+    mova                m0, [pw_4095]
+    cmp               eobd, 1
+    jg mangle(private_prefix %+ _ %+ vp9_idct_idct_16x16_add_10 %+ SUFFIX).idctfull
+
+    ; dc-only - unfortunately, this one can overflow, since coefs are 19+sign
+    ; bpp, and 19+14+sign does not fit in 32bit, so we do 2-stage multiplies
+    DEFINE_ARGS dst, stride, block, coef, coefl
+    pxor                m2, m2
+    DC_ONLY_64BIT        6, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 8
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0, dstq,         mmsize
+    STORE_2x8            3, 4, 1, m2, m0, dstq+strideq, mmsize
+    lea               dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+; r65-69 are available for spills
+; r70-77 are available on x86-32 only (x86-64 should use m8-15)
+; output should be in m8-11|r70-73, m0-6,r65 and m12-15|r74-77
+%macro IADST16_1D 1 ; src
+    mova                m0, [%1+ 0*4*mmsize]        ; in0
+    mova                m1, [%1+ 7*4*mmsize]        ; in7
+    mova                m2, [%1+ 8*4*mmsize]        ; in8
+    mova                m3, [%1+15*4*mmsize]        ; in15
+    SUMSUB_MUL_D         3, 0, 4, 5, 16364,  804    ; m3/4=t0, m0/5=t1
+    SUMSUB_MUL_D         1, 2, 6, 7, 11003, 12140   ; m1/6=t8, m2/7=t9
+    SCRATCH              0, 8, rsp+70*mmsize
+    SUMSUB_PACK_D        1, 3, 6, 4, 0              ; m1=t0a, m3=t8a
+    UNSCRATCH            0, 8, rsp+70*mmsize
+    SUMSUB_PACK_D        2, 0, 7, 5, 4              ; m2=t1a, m0=t9a
+    mova   [rsp+67*mmsize], m1
+    SCRATCH              2, 9, rsp+71*mmsize
+    SCRATCH              3, 12, rsp+74*mmsize
+    SCRATCH              0, 13, rsp+75*mmsize
+
+    mova                m0, [%1+ 3*4*mmsize]        ; in3
+    mova                m1, [%1+ 4*4*mmsize]        ; in4
+    mova                m2, [%1+11*4*mmsize]        ; in11
+    mova                m3, [%1+12*4*mmsize]        ; in12
+    SUMSUB_MUL_D         2, 1, 4, 5, 14811,  7005   ; m2/4=t4, m1/5=t5
+    SUMSUB_MUL_D         0, 3, 6, 7,  5520, 15426   ; m0/6=t12, m3/7=t13
+    SCRATCH              1, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        0, 2, 6, 4, 1              ; m0=t4a, m2=t12a
+    UNSCRATCH            1, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        3, 1, 7, 5, 4              ; m3=t5a, m1=t13a
+    SCRATCH              0, 15, rsp+77*mmsize
+    SCRATCH              3, 11, rsp+73*mmsize
+
+    UNSCRATCH            0, 12, rsp+74*mmsize       ; t8a
+    UNSCRATCH            3, 13, rsp+75*mmsize       ; t9a
+    SUMSUB_MUL_D         0, 3, 4, 5, 16069,  3196   ; m0/4=t8, m3/5=t9
+    SUMSUB_MUL_D         1, 2, 6, 7,  3196, 16069   ; m1/6=t13, m2/7=t12
+    SCRATCH              1, 12, rsp+74*mmsize
+    SUMSUB_PACK_D        2, 0, 7, 4, 1              ; m2=t8a, m0=t12a
+    UNSCRATCH            1, 12, rsp+74*mmsize
+    SUMSUB_PACK_D        1, 3, 6, 5, 4              ; m1=t9a, m3=t13a
+    mova   [rsp+65*mmsize], m2
+    mova   [rsp+66*mmsize], m1
+    SCRATCH              0, 8, rsp+70*mmsize
+    SCRATCH              3, 12, rsp+74*mmsize
+
+    mova                m0, [%1+ 2*4*mmsize]        ; in2
+    mova                m1, [%1+ 5*4*mmsize]        ; in5
+    mova                m2, [%1+10*4*mmsize]        ; in10
+    mova                m3, [%1+13*4*mmsize]        ; in13
+    SUMSUB_MUL_D         3, 0, 4, 5, 15893,  3981   ; m3/4=t2, m0/5=t3
+    SUMSUB_MUL_D         1, 2, 6, 7,  8423, 14053   ; m1/6=t10, m2/7=t11
+    SCRATCH              0, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        1, 3, 6, 4, 0              ; m1=t2a, m3=t10a
+    UNSCRATCH            0, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        2, 0, 7, 5, 4              ; m2=t3a, m0=t11a
+    mova   [rsp+68*mmsize], m1
+    mova   [rsp+69*mmsize], m2
+    SCRATCH              3, 13, rsp+75*mmsize
+    SCRATCH              0, 14, rsp+76*mmsize
+
+    mova                m0, [%1+ 1*4*mmsize]        ; in1
+    mova                m1, [%1+ 6*4*mmsize]        ; in6
+    mova                m2, [%1+ 9*4*mmsize]        ; in9
+    mova                m3, [%1+14*4*mmsize]        ; in14
+    SUMSUB_MUL_D         2, 1, 4, 5, 13160,  9760   ; m2/4=t6, m1/5=t7
+    SUMSUB_MUL_D         0, 3, 6, 7,  2404, 16207   ; m0/6=t14, m3/7=t15
+    SCRATCH              1, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        0, 2, 6, 4, 1              ; m0=t6a, m2=t14a
+    UNSCRATCH            1, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        3, 1, 7, 5, 4              ; m3=t7a, m1=t15a
+
+    UNSCRATCH            4, 13, rsp+75*mmsize       ; t10a
+    UNSCRATCH            5, 14, rsp+76*mmsize       ; t11a
+    SCRATCH              0, 13, rsp+75*mmsize
+    SCRATCH              3, 14, rsp+76*mmsize
+    SUMSUB_MUL_D         4, 5, 6, 7,  9102, 13623   ; m4/6=t10, m5/7=t11
+    SUMSUB_MUL_D         1, 2, 0, 3, 13623,  9102   ; m1/0=t15, m2/3=t14
+    SCRATCH              0, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        2, 4, 3, 6, 0              ; m2=t10a, m4=t14a
+    UNSCRATCH            0, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        1, 5, 0, 7, 6              ; m1=t11a, m5=t15a
+
+    UNSCRATCH            0, 8, rsp+70*mmsize        ; t12a
+    UNSCRATCH            3, 12, rsp+74*mmsize       ; t13a
+    SCRATCH              2, 8, rsp+70*mmsize
+    SCRATCH              1, 12, rsp+74*mmsize
+    SUMSUB_MUL_D         0, 3, 1, 2, 15137,  6270   ; m0/1=t12, m3/2=t13
+    SUMSUB_MUL_D         5, 4, 7, 6,  6270, 15137   ; m5/7=t15, m4/6=t14
+    SCRATCH              2, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        4, 0, 6, 1, 2              ; m4=out2, m0=t14a
+    UNSCRATCH            2, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        5, 3, 7, 2, 1              ; m5=-out13, m3=t15a
+    NEGD                m5                          ; m5=out13
+
+    UNSCRATCH            1, 9, rsp+71*mmsize        ; t1a
+    mova                m2, [rsp+68*mmsize]         ; t2a
+    UNSCRATCH            6, 13, rsp+75*mmsize       ; t6a
+    UNSCRATCH            7, 14, rsp+76*mmsize       ; t7a
+    SCRATCH              4, 10, rsp+72*mmsize
+    SCRATCH              5, 13, rsp+75*mmsize
+    UNSCRATCH            4, 15, rsp+77*mmsize       ; t4a
+    UNSCRATCH            5, 11, rsp+73*mmsize       ; t5a
+    SCRATCH              0, 14, rsp+76*mmsize
+    SCRATCH              3, 15, rsp+77*mmsize
+    mova                m0, [rsp+67*mmsize]         ; t0a
+    SUMSUB_BA         d, 4, 0, 3                    ; m4=t0, m0=t4
+    SUMSUB_BA         d, 5, 1, 3                    ; m5=t1, m1=t5
+    SUMSUB_BA         d, 6, 2, 3                    ; m6=t2, m2=t6
+    SCRATCH              4, 9, rsp+71*mmsize
+    mova                m3, [rsp+69*mmsize]         ; t3a
+    SUMSUB_BA         d, 7, 3, 4                    ; m7=t3, m3=t7
+
+    mova   [rsp+67*mmsize], m5
+    mova   [rsp+68*mmsize], m6
+    mova   [rsp+69*mmsize], m7
+    SUMSUB_MUL_D         0, 1, 4, 5, 15137,  6270   ; m0/4=t4a, m1/5=t5a
+    SUMSUB_MUL_D         3, 2, 7, 6,  6270, 15137   ; m3/7=t7a, m2/6=t6a
+    SCRATCH              1, 11, rsp+73*mmsize
+    SUMSUB_PACK_D        2, 0, 6, 4, 1              ; m2=-out3, m0=t6
+    NEGD                m2                          ; m2=out3
+    UNSCRATCH            1, 11, rsp+73*mmsize
+    SUMSUB_PACK_D        3, 1, 7, 5, 4              ; m3=out12, m1=t7
+    SCRATCH              2, 11, rsp+73*mmsize
+    UNSCRATCH            2, 12, rsp+74*mmsize       ; t11a
+    SCRATCH              3, 12, rsp+74*mmsize
+
+    UNSCRATCH            3, 8, rsp+70*mmsize        ; t10a
+    mova                m4, [rsp+65*mmsize]         ; t8a
+    mova                m5, [rsp+66*mmsize]         ; t9a
+    SUMSUB_BA         d, 3, 4, 6                    ; m3=-out1, m4=t10
+    NEGD                m3                          ; m3=out1
+    SUMSUB_BA         d, 2, 5, 6                    ; m2=out14, m5=t11
+    UNSCRATCH            6, 9, rsp+71*mmsize        ; t0
+    UNSCRATCH            7, 14, rsp+76*mmsize       ; t14a
+    SCRATCH              3, 9, rsp+71*mmsize
+    SCRATCH              2, 14, rsp+76*mmsize
+
+    SUMSUB_MUL           1, 0, 2, 3, 11585, 11585   ; m1=out4, m0=out11
+    mova   [rsp+65*mmsize], m0
+    SUMSUB_MUL           5, 4, 2, 3, 11585, 11585   ; m5=out6, m4=out9
+    UNSCRATCH            0, 15, rsp+77*mmsize       ; t15a
+    SUMSUB_MUL           7, 0, 2, 3, 11585, m11585  ; m7=out10, m0=out5
+
+    mova                m2, [rsp+68*mmsize]         ; t2
+    SUMSUB_BA         d, 2, 6, 3                    ; m2=out0, m6=t2a
+    SCRATCH              2, 8, rsp+70*mmsize
+    mova                m2, [rsp+67*mmsize]         ; t1
+    mova                m3, [rsp+69*mmsize]         ; t3
+    mova   [rsp+67*mmsize], m7
+    SUMSUB_BA         d, 3, 2, 7                    ; m3=-out15, m2=t3a
+    NEGD                m3                          ; m3=out15
+    SCRATCH              3, 15, rsp+77*mmsize
+    SUMSUB_MUL           6, 2, 7, 3, 11585, m11585  ; m6=out8, m2=out7
+    mova                m7, [rsp+67*mmsize]
+
+    SWAP                 0, 1
+    SWAP                 2, 5, 4, 6, 7, 3
+%endmacro
+
+%macro IADST16_FN 7
+cglobal vp9_%1_%4_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \
+                                70 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                dst, stride, block, eob
+    mova                m0, [pw_1023]
+
+.body:
+    mova   [rsp+64*mmsize], m0
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [%7_16x16]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [%7_16x16+cntq-1]
+%endif
+    mov              skipd, 4
+    sub              skipd, cntd
+    mov               ptrq, rsp
+.loop_1:
+    %2_1D           blockq
+
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 1*mmsize], m0
+    mova  [ptrq+ 5*mmsize], m1
+    mova  [ptrq+ 9*mmsize], m2
+    mova  [ptrq+13*mmsize], m3
+    mova                m7, [rsp+65*mmsize]
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 2*mmsize], m4
+    mova  [ptrq+ 6*mmsize], m5
+    mova  [ptrq+10*mmsize], m6
+    mova  [ptrq+14*mmsize], m7
+    UNSCRATCH               0, 8, rsp+(%3+0)*mmsize
+    UNSCRATCH               1, 9, rsp+(%3+1)*mmsize
+    UNSCRATCH               2, 10, rsp+(%3+2)*mmsize
+    UNSCRATCH               3, 11, rsp+(%3+3)*mmsize
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 0*mmsize], m0
+    mova  [ptrq+ 4*mmsize], m1
+    mova  [ptrq+ 8*mmsize], m2
+    mova  [ptrq+12*mmsize], m3
+    UNSCRATCH               4, 12, rsp+(%3+4)*mmsize
+    UNSCRATCH               5, 13, rsp+(%3+5)*mmsize
+    UNSCRATCH               6, 14, rsp+(%3+6)*mmsize
+    UNSCRATCH               7, 15, rsp+(%3+7)*mmsize
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 3*mmsize], m4
+    mova  [ptrq+ 7*mmsize], m5
+    mova  [ptrq+11*mmsize], m6
+    mova  [ptrq+15*mmsize], m7
+    add               ptrq, 16 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    add              skipd, skipd
+    lea             blockq, [blockq+skipq*(mmsize/2)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    mova   [ptrq+mmsize*4], m0
+    mova   [ptrq+mmsize*5], m0
+    mova   [ptrq+mmsize*6], m0
+    mova   [ptrq+mmsize*7], m0
+    add               ptrq, 8 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 4
+    mov               ptrq, rsp
+.loop_2:
+    %5_1D             ptrq
+
+    pxor                m7, m7
+    lea               dstq, [dstq+strideq*4]
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m7, [rsp+64*mmsize], [pd_32], 6
+    lea               dstq, [dstq+strideq*4]
+    mova                m0, [rsp+65*mmsize]
+    mova                m1, [rsp+64*mmsize]
+    mova                m2, [pd_32]
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, m1, m2, 6
+
+%if ARCH_X86_64
+    DEFINE_ARGS dstbak, stride, block, cnt, ptr, stride3, dst
+%else
+    mov               dstq, dstm
+%endif
+    UNSCRATCH               0, 8, rsp+(%6+0)*mmsize
+    UNSCRATCH               4, 9, rsp+(%6+1)*mmsize
+    UNSCRATCH               5, 10, rsp+(%6+2)*mmsize
+    UNSCRATCH               3, 11, rsp+(%6+3)*mmsize
+    ROUND_AND_STORE_4x4  0, 4, 5, 3, m7, m1, m2, 6
+%if ARCH_X86_64
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea               dstq, [dstbakq+stride3q*4]
+%else
+    lea               dstq, [dstq+stride3q*4]
+%endif
+    UNSCRATCH               4, 12, rsp+(%6+4)*mmsize
+    UNSCRATCH               5, 13, rsp+(%6+5)*mmsize
+    UNSCRATCH               6, 14, rsp+(%6+6)*mmsize
+    UNSCRATCH               0, 15, rsp+(%6+7)*mmsize
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, m1, m2, 6
+
+    add               ptrq, mmsize
+%if ARCH_X86_64
+    add            dstbakq, 8
+    mov               dstq, dstbakq
+%else
+    add         dword dstm, 8
+    mov               dstq, dstm
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m7 is still zero
+    ZERO_BLOCK blockq-4*mmsize, 64, 16, m7
+    RET
+
+cglobal vp9_%1_%4_16x16_add_12, 4, 6 + ARCH_X86_64, 16, \
+                                70 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                dst, stride, block, eob
+    mova                m0, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_%4_16x16_add_10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+IADST16_FN idct,  IDCT16,  67, iadst, IADST16, 70, row
+IADST16_FN iadst, IADST16, 70, idct,  IDCT16,  67, col
+IADST16_FN iadst, IADST16, 70, iadst, IADST16, 70, default
+
+%macro IDCT32_1D 2-3 8 * mmsize; pass[1/2], src, src_stride
+    IDCT16_1D %2, 2 * %3, 272, 257
+%if ARCH_X86_64
+    mova  [rsp+257*mmsize], m8
+    mova  [rsp+258*mmsize], m9
+    mova  [rsp+259*mmsize], m10
+    mova  [rsp+260*mmsize], m11
+    mova  [rsp+261*mmsize], m12
+    mova  [rsp+262*mmsize], m13
+    mova  [rsp+263*mmsize], m14
+    mova  [rsp+264*mmsize], m15
+%endif
+    mova  [rsp+265*mmsize], m0
+    mova  [rsp+266*mmsize], m1
+    mova  [rsp+267*mmsize], m2
+    mova  [rsp+268*mmsize], m3
+    mova  [rsp+269*mmsize], m4
+    mova  [rsp+270*mmsize], m5
+    mova  [rsp+271*mmsize], m6
+
+    ; r257-260: t0-3
+    ; r265-272: t4/5a/6a/7/8/9a/10/11a
+    ; r261-264: t12a/13/14a/15
+    ; r273-274 is free as scratch space, and 275-282 mirrors m8-15 on 32bit
+
+    mova                m0, [%2+ 1*%3]              ; in1
+    mova                m1, [%2+15*%3]              ; in15
+    mova                m2, [%2+17*%3]              ; in17
+    mova                m3, [%2+31*%3]              ; in31
+    SUMSUB_MUL           0, 3, 4, 5, 16364,  804    ; m0=t31a, m3=t16a
+    SUMSUB_MUL           2, 1, 4, 5, 11003, 12140   ; m2=t30a, m1=t17a
+    SUMSUB_BA         d, 1, 3, 4                    ; m1=t16, m3=t17
+    SUMSUB_BA         d, 2, 0, 4                    ; m2=t31, m0=t30
+    SUMSUB_MUL           0, 3, 4, 5, 16069,  3196   ; m0=t30a, m3=t17a
+    SCRATCH              0, 8, rsp+275*mmsize
+    SCRATCH              2, 9, rsp+276*mmsize
+
+    ; end of stage 1-3 first quart
+
+    mova                m0, [%2+ 7*%3]              ; in7
+    mova                m2, [%2+ 9*%3]              ; in9
+    mova                m4, [%2+23*%3]              ; in23
+    mova                m5, [%2+25*%3]              ; in25
+    SUMSUB_MUL           2, 4, 6, 7, 14811,  7005   ; m2=t29a, m4=t18a
+    SUMSUB_MUL           5, 0, 6, 7,  5520, 15426   ; m5=t28a, m0=t19a
+    SUMSUB_BA         d, 4, 0, 6                    ; m4=t19, m0=t18
+    SUMSUB_BA         d, 2, 5, 6                    ; m2=t28, m5=t29
+    SUMSUB_MUL           5, 0, 6, 7,  3196, m16069  ; m5=t29a, m0=t18a
+
+    ; end of stage 1-3 second quart
+
+    SUMSUB_BA         d, 4, 1, 6                    ; m4=t16a, m1=t19a
+    SUMSUB_BA         d, 0, 3, 6                    ; m0=t17, m3=t18
+    UNSCRATCH            6, 8, rsp+275*mmsize       ; t30a
+    UNSCRATCH            7, 9, rsp+276*mmsize       ; t31
+    mova  [rsp+273*mmsize], m4
+    mova  [rsp+274*mmsize], m0
+    SUMSUB_BA         d, 2, 7, 0                    ; m2=t31a, m7=t28a
+    SUMSUB_BA         d, 5, 6, 0                    ; m5=t30, m6=t29
+    SUMSUB_MUL           6, 3, 0, 4, 15137,  6270   ; m6=t29a, m3=t18a
+    SUMSUB_MUL           7, 1, 0, 4, 15137,  6270   ; m7=t28, m1=t19
+    SCRATCH              3, 10, rsp+277*mmsize
+    SCRATCH              1, 11, rsp+278*mmsize
+    SCRATCH              7, 12, rsp+279*mmsize
+    SCRATCH              6, 13, rsp+280*mmsize
+    SCRATCH              5, 14, rsp+281*mmsize
+    SCRATCH              2, 15, rsp+282*mmsize
+
+    ; end of stage 4-5 first half
+
+    mova                m0, [%2+ 5*%3]              ; in5
+    mova                m1, [%2+11*%3]              ; in11
+    mova                m2, [%2+21*%3]              ; in21
+    mova                m3, [%2+27*%3]              ; in27
+    SUMSUB_MUL           0, 3, 4, 5, 15893,  3981   ; m0=t27a, m3=t20a
+    SUMSUB_MUL           2, 1, 4, 5,  8423, 14053   ; m2=t26a, m1=t21a
+    SUMSUB_BA         d, 1, 3, 4                    ; m1=t20, m3=t21
+    SUMSUB_BA         d, 2, 0, 4                    ; m2=t27, m0=t26
+    SUMSUB_MUL           0, 3, 4, 5,  9102, 13623   ; m0=t26a, m3=t21a
+    SCRATCH              0, 8, rsp+275*mmsize
+    SCRATCH              2, 9, rsp+276*mmsize
+
+    ; end of stage 1-3 third quart
+
+    mova                m0, [%2+ 3*%3]              ; in3
+    mova                m2, [%2+13*%3]              ; in13
+    mova                m4, [%2+19*%3]              ; in19
+    mova                m5, [%2+29*%3]              ; in29
+    SUMSUB_MUL           2, 4, 6, 7, 13160,  9760   ; m2=t25a, m4=t22a
+    SUMSUB_MUL           5, 0, 6, 7,  2404, 16207   ; m5=t24a, m0=t23a
+    SUMSUB_BA         d, 4, 0, 6                    ; m4=t23, m0=t22
+    SUMSUB_BA         d, 2, 5, 6                    ; m2=t24, m5=t25
+    SUMSUB_MUL           5, 0, 6, 7, 13623, m9102   ; m5=t25a, m0=t22a
+
+    ; end of stage 1-3 fourth quart
+
+    SUMSUB_BA         d, 1, 4, 6                    ; m1=t23a, m4=t20a
+    SUMSUB_BA         d, 3, 0, 6                    ; m3=t22, m0=t21
+    UNSCRATCH            6, 8, rsp+275*mmsize       ; t26a
+    UNSCRATCH            7, 9, rsp+276*mmsize       ; t27
+    SCRATCH              3, 8, rsp+275*mmsize
+    SCRATCH              1, 9, rsp+276*mmsize
+    SUMSUB_BA         d, 7, 2, 1                    ; m7=t24a, m2=t27a
+    SUMSUB_BA         d, 6, 5, 1                    ; m6=t25, m5=t26
+    SUMSUB_MUL           2, 4, 1, 3,  6270, m15137  ; m2=t27, m4=t20
+    SUMSUB_MUL           5, 0, 1, 3,  6270, m15137  ; m5=t26a, m0=t21a
+
+    ; end of stage 4-5 second half
+
+    UNSCRATCH            1, 12, rsp+279*mmsize      ; t28
+    UNSCRATCH            3, 13, rsp+280*mmsize      ; t29a
+    SCRATCH              4, 12, rsp+279*mmsize
+    SCRATCH              0, 13, rsp+280*mmsize
+    SUMSUB_BA         d, 5, 3, 0                    ; m5=t29, m3=t26
+    SUMSUB_BA         d, 2, 1, 0                    ; m2=t28a, m1=t27a
+    UNSCRATCH            0, 14, rsp+281*mmsize      ; t30
+    UNSCRATCH            4, 15, rsp+282*mmsize      ; t31a
+    SCRATCH              2, 14, rsp+281*mmsize
+    SCRATCH              5, 15, rsp+282*mmsize
+    SUMSUB_BA         d, 6, 0, 2                    ; m6=t30a, m0=t25a
+    SUMSUB_BA         d, 7, 4, 2                    ; m7=t31, m4=t24
+
+    mova                m2, [rsp+273*mmsize]        ; t16a
+    mova                m5, [rsp+274*mmsize]        ; t17
+    mova  [rsp+273*mmsize], m6
+    mova  [rsp+274*mmsize], m7
+    UNSCRATCH            6, 10, rsp+277*mmsize      ; t18a
+    UNSCRATCH            7, 11, rsp+278*mmsize      ; t19
+    SCRATCH              4, 10, rsp+277*mmsize
+    SCRATCH              0, 11, rsp+278*mmsize
+    UNSCRATCH            4, 12, rsp+279*mmsize      ; t20
+    UNSCRATCH            0, 13, rsp+280*mmsize      ; t21a
+    SCRATCH              3, 12, rsp+279*mmsize
+    SCRATCH              1, 13, rsp+280*mmsize
+    SUMSUB_BA         d, 0, 6, 1                    ; m0=t18, m6=t21
+    SUMSUB_BA         d, 4, 7, 1                    ; m4=t19a, m7=t20a
+    UNSCRATCH            3, 8, rsp+275*mmsize       ; t22
+    UNSCRATCH            1, 9, rsp+276*mmsize       ; t23a
+    SCRATCH              0, 8, rsp+275*mmsize
+    SCRATCH              4, 9, rsp+276*mmsize
+    SUMSUB_BA         d, 3, 5, 0                    ; m3=t17a, m5=t22a
+    SUMSUB_BA         d, 1, 2, 0                    ; m1=t16, m2=t23
+
+    ; end of stage 6
+
+    UNSCRATCH            0, 10, rsp+277*mmsize      ; t24
+    UNSCRATCH            4, 11, rsp+278*mmsize      ; t25a
+    SCRATCH              1, 10, rsp+277*mmsize
+    SCRATCH              3, 11, rsp+278*mmsize
+    SUMSUB_MUL           0, 2, 1, 3, 11585, 11585   ; m0=t24a, m2=t23a
+    SUMSUB_MUL           4, 5, 1, 3, 11585, 11585   ; m4=t25, m5=t22
+    UNSCRATCH            1, 12, rsp+279*mmsize      ; t26
+    UNSCRATCH            3, 13, rsp+280*mmsize      ; t27a
+    SCRATCH              0, 12, rsp+279*mmsize
+    SCRATCH              4, 13, rsp+280*mmsize
+    SUMSUB_MUL           3, 7, 0, 4, 11585, 11585   ; m3=t27, m7=t20
+    SUMSUB_MUL           1, 6, 0, 4, 11585, 11585   ; m1=t26a, m6=t21a
+
+    ; end of stage 7
+
+    mova                m0, [rsp+269*mmsize]        ; t8
+    mova                m4, [rsp+270*mmsize]        ; t9a
+    mova  [rsp+269*mmsize], m1                      ; t26a
+    mova  [rsp+270*mmsize], m3                      ; t27
+    mova                m3, [rsp+271*mmsize]        ; t10
+    SUMSUB_BA         d, 2, 0, 1                    ; m2=out8, m0=out23
+    SUMSUB_BA         d, 5, 4, 1                    ; m5=out9, m4=out22
+    SUMSUB_BA         d, 6, 3, 1                    ; m6=out10, m3=out21
+    mova                m1, [rsp+272*mmsize]        ; t11a
+    mova  [rsp+271*mmsize], m0
+    SUMSUB_BA         d, 7, 1, 0                    ; m7=out11, m1=out20
+
+%if %1 == 1
+    TRANSPOSE4x4D        2, 5, 6, 7, 0
+    mova  [ptrq+ 2*mmsize], m2
+    mova  [ptrq+10*mmsize], m5
+    mova  [ptrq+18*mmsize], m6
+    mova  [ptrq+26*mmsize], m7
+%else ; %1 == 2
+    pxor                m0, m0
+    lea               dstq, [dstq+strideq*8]
+    ROUND_AND_STORE_4x4  2, 5, 6, 7, m0, [rsp+256*mmsize], [pd_32], 6
+%endif
+    mova                m2, [rsp+271*mmsize]
+%if %1 == 1
+    TRANSPOSE4x4D        1, 3, 4, 2, 0
+    mova  [ptrq+ 5*mmsize], m1
+    mova  [ptrq+13*mmsize], m3
+    mova  [ptrq+21*mmsize], m4
+    mova  [ptrq+29*mmsize], m2
+%else ; %1 == 2
+    lea               dstq, [dstq+stride3q*4]
+    ROUND_AND_STORE_4x4  1, 3, 4, 2, m0, [rsp+256*mmsize], [pd_32], 6
+%endif
+
+    ; end of last stage + store for out8-11 and out20-23
+
+    UNSCRATCH            0, 9, rsp+276*mmsize       ; t19a
+    UNSCRATCH            1, 8, rsp+275*mmsize       ; t18
+    UNSCRATCH            2, 11, rsp+278*mmsize      ; t17a
+    UNSCRATCH            3, 10, rsp+277*mmsize      ; t16
+    mova                m7, [rsp+261*mmsize]        ; t12a
+    mova                m6, [rsp+262*mmsize]        ; t13
+    mova                m5, [rsp+263*mmsize]        ; t14a
+    SUMSUB_BA         d, 0, 7, 4                    ; m0=out12, m7=out19
+    SUMSUB_BA         d, 1, 6, 4                    ; m1=out13, m6=out18
+    SUMSUB_BA         d, 2, 5, 4                    ; m2=out14, m5=out17
+    mova                m4, [rsp+264*mmsize]        ; t15
+    SCRATCH              7, 8, rsp+275*mmsize
+    SUMSUB_BA         d, 3, 4, 7                    ; m3=out15, m4=out16
+
+%if %1 == 1
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 3*mmsize], m0
+    mova  [ptrq+11*mmsize], m1
+    mova  [ptrq+19*mmsize], m2
+    mova  [ptrq+27*mmsize], m3
+%else ; %1 == 2
+%if ARCH_X86_64
+    SWAP                 7, 9
+    lea               dstq, [dstbakq+stride3q*4]
+%else ; x86-32
+    pxor                m7, m7
+    mov               dstq, dstm
+    lea               dstq, [dstq+stride3q*4]
+%endif
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m7, [rsp+256*mmsize], [pd_32], 6
+%endif
+    UNSCRATCH            0, 8, rsp+275*mmsize       ; out19
+%if %1 == 1
+    TRANSPOSE4x4D        4, 5, 6, 0, 7
+    mova  [ptrq+ 4*mmsize], m4
+    mova  [ptrq+12*mmsize], m5
+    mova  [ptrq+20*mmsize], m6
+    mova  [ptrq+28*mmsize], m0
+%else ; %1 == 2
+    lea               dstq, [dstq+strideq*4]
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, [rsp+256*mmsize], [pd_32], 6
+%endif
+
+    ; end of last stage + store for out12-19
+
+%if ARCH_X86_64
+    SWAP                 7, 8
+%endif
+    mova                m7, [rsp+257*mmsize]        ; t0
+    mova                m6, [rsp+258*mmsize]        ; t1
+    mova                m5, [rsp+259*mmsize]        ; t2
+    mova                m4, [rsp+260*mmsize]        ; t3
+    mova                m0, [rsp+274*mmsize]        ; t31
+    mova                m1, [rsp+273*mmsize]        ; t30a
+    UNSCRATCH            2, 15, rsp+282*mmsize      ; t29
+    SUMSUB_BA         d, 0, 7, 3                    ; m0=out0, m7=out31
+    SUMSUB_BA         d, 1, 6, 3                    ; m1=out1, m6=out30
+    SUMSUB_BA         d, 2, 5, 3                    ; m2=out2, m5=out29
+    SCRATCH              0, 9, rsp+276*mmsize
+    UNSCRATCH            3, 14, rsp+281*mmsize      ; t28a
+    SUMSUB_BA         d, 3, 4, 0                    ; m3=out3, m4=out28
+
+%if %1 == 1
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 7*mmsize], m4
+    mova  [ptrq+15*mmsize], m5
+    mova  [ptrq+23*mmsize], m6
+    mova  [ptrq+31*mmsize], m7
+%else ; %1 == 2
+%if ARCH_X86_64
+    SWAP                 0, 8
+%else ; x86-32
+    pxor                m0, m0
+%endif
+    lea               dstq, [dstq+stride3q*4]
+    ROUND_AND_STORE_4x4  4, 5, 6, 7, m0, [rsp+256*mmsize], [pd_32], 6
+%endif
+    UNSCRATCH            7, 9, rsp+276*mmsize       ; out0
+%if %1 == 1
+    TRANSPOSE4x4D        7, 1, 2, 3, 0
+    mova  [ptrq+ 0*mmsize], m7
+    mova  [ptrq+ 8*mmsize], m1
+    mova  [ptrq+16*mmsize], m2
+    mova  [ptrq+24*mmsize], m3
+%else ; %1 == 2
+%if ARCH_X86_64
+    DEFINE_ARGS dstbak, stride, block, cnt, ptr, stride3, dst
+%else ; x86-32
+    mov               dstq, dstm
+%endif
+    ROUND_AND_STORE_4x4  7, 1, 2, 3, m0, [rsp+256*mmsize], [pd_32], 6
+%if ARCH_X86_64
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+%endif
+%endif
+
+    ; end of last stage + store for out0-3 and out28-31
+
+%if ARCH_X86_64
+    SWAP                 0, 8
+%endif
+    mova                m7, [rsp+265*mmsize]        ; t4
+    mova                m6, [rsp+266*mmsize]        ; t5a
+    mova                m5, [rsp+267*mmsize]        ; t6a
+    mova                m4, [rsp+268*mmsize]        ; t7
+    mova                m0, [rsp+270*mmsize]        ; t27
+    mova                m1, [rsp+269*mmsize]        ; t26a
+    UNSCRATCH            2, 13, rsp+280*mmsize      ; t25
+    SUMSUB_BA         d, 0, 7, 3                    ; m0=out4, m7=out27
+    SUMSUB_BA         d, 1, 6, 3                    ; m1=out5, m6=out26
+    SUMSUB_BA         d, 2, 5, 3                    ; m2=out6, m5=out25
+    UNSCRATCH            3, 12, rsp+279*mmsize      ; t24a
+    SCRATCH              7, 9, rsp+276*mmsize
+    SUMSUB_BA         d, 3, 4, 7                    ; m3=out7, m4=out24
+
+%if %1 == 1
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 1*mmsize], m0
+    mova  [ptrq+ 9*mmsize], m1
+    mova  [ptrq+17*mmsize], m2
+    mova  [ptrq+25*mmsize], m3
+%else ; %1 == 2
+%if ARCH_X86_64
+    SWAP                 7, 8
+    lea               dstq, [dstbakq+strideq*4]
+%else ; x86-32
+    pxor                m7, m7
+    lea               dstq, [dstq+strideq*4]
+%endif
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m7, [rsp+256*mmsize], [pd_32], 6
+%endif
+    UNSCRATCH            0, 9, rsp+276*mmsize       ; out27
+%if %1 == 1
+    TRANSPOSE4x4D        4, 5, 6, 0, 7
+    mova  [ptrq+ 6*mmsize], m4
+    mova  [ptrq+14*mmsize], m5
+    mova  [ptrq+22*mmsize], m6
+    mova  [ptrq+30*mmsize], m0
+%else ; %1 == 2
+%if ARCH_X86_64
+    lea               dstq, [dstbakq+stride3q*8]
+%else
+    mov               dstq, dstm
+    lea               dstq, [dstq+stride3q*8]
+%endif
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, [rsp+256*mmsize], [pd_32], 6
+%endif
+
+    ; end of last stage + store for out4-7 and out24-27
+%endmacro
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_32x32_add_10, 4, 6 + ARCH_X86_64, 16, \
+                                    275 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                    dst, stride, block, eob
+    mova                m0, [pw_1023]
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only - the 10bit version can be done entirely in 32bit, since the max
+    ; coef values are 17+sign bit, and the coef is 14bit, so 31+sign easily
+    ; fits in 32bit
+    DEFINE_ARGS dst, stride, block, coef
+    pxor                m2, m2
+    DC_ONLY              6, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 32
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0, dstq,          mmsize
+    STORE_2x8            3, 4, 1, m2, m0, dstq+mmsize*2, mmsize
+    add               dstq, strideq
+    dec               cntd
+    jg .loop_dc
+    RET
+
+.idctfull:
+    mova  [rsp+256*mmsize], m0
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [default_32x32]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [default_32x32+cntq-1]
+%endif
+    mov              skipd, 8
+    sub              skipd, cntd
+    mov               ptrq, rsp
+.loop_1:
+    IDCT32_1D            1, blockq
+
+    add               ptrq, 32 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    shl              skipd, 2
+    lea             blockq, [blockq+skipq*(mmsize/4)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    mova   [ptrq+mmsize*4], m0
+    mova   [ptrq+mmsize*5], m0
+    mova   [ptrq+mmsize*6], m0
+    mova   [ptrq+mmsize*7], m0
+    add               ptrq, 8 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 8
+    mov               ptrq, rsp
+.loop_2:
+    IDCT32_1D            2, ptrq
+
+    add               ptrq, mmsize
+%if ARCH_X86_64
+    add            dstbakq, 8
+    mov               dstq, dstbakq
+%else
+    add         dword dstm, 8
+    mov               dstq, dstm
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m7 is still zero
+    ZERO_BLOCK blockq-8*mmsize, 128, 32, m7
+    RET
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_32x32_add_12, 4, 6 + ARCH_X86_64, 16, \
+                                    275 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                    dst, stride, block, eob
+    mova                m0, [pw_4095]
+    cmp               eobd, 1
+    jg mangle(private_prefix %+ _ %+ vp9_idct_idct_32x32_add_10 %+ SUFFIX).idctfull
+
+    ; dc-only - unfortunately, this one can overflow, since coefs are 19+sign
+    ; bpp, and 19+14+sign does not fit in 32bit, so we do 2-stage multiplies
+    DEFINE_ARGS dst, stride, block, coef, coefl
+    pxor                m2, m2
+    DC_ONLY_64BIT        6, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 32
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0, dstq,          mmsize
+    STORE_2x8            3, 4, 1, m2, m0, dstq+mmsize*2, mmsize
+    add               dstq, strideq
+    dec               cntd
+    jg .loop_dc
+    RET
diff --git a/media/ffvpx/libavcodec/x86/vp9itxfm_template.asm b/media/ffvpx/libavcodec/x86/vp9itxfm_template.asm
new file mode 100644
index 0000000000..d2f2257d84
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9itxfm_template.asm
@@ -0,0 +1,142 @@
+;******************************************************************************
+;* VP9 IDCT SIMD optimizations
+;*
+;* Copyright (C) 2013 Clément Bœsch <u pkh me>
+;* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%macro VP9_IWHT4_1D 0
+    SWAP                 1, 2, 3
+    paddw               m0, m2
+    psubw               m3, m1
+    psubw               m4, m0, m3
+    psraw               m4, 1
+    psubw               m5, m4, m1
+    SWAP                 5, 1
+    psubw               m4, m2
+    SWAP                 4, 2
+    psubw               m0, m1
+    paddw               m3, m2
+    SWAP                 3, 2, 1
+%endmacro
+
+; (a*x + b*y + round) >> shift
+%macro VP9_MULSUB_2W_2X 5 ; dst1, dst2/src, round, coefs1, coefs2
+    pmaddwd            m%1, m%2, %4
+    pmaddwd            m%2,  %5
+    paddd              m%1,  %3
+    paddd              m%2,  %3
+    psrad              m%1,  14
+    psrad              m%2,  14
+%endmacro
+
+%macro VP9_MULSUB_2W_4X 7 ; dst1, dst2, coef1, coef2, rnd, tmp1/src, tmp2
+    VP9_MULSUB_2W_2X    %7,  %6,  %5, [pw_m%3_%4], [pw_%4_%3]
+    VP9_MULSUB_2W_2X    %1,  %2,  %5, [pw_m%3_%4], [pw_%4_%3]
+    packssdw           m%1, m%7
+    packssdw           m%2, m%6
+%endmacro
+
+%macro VP9_UNPACK_MULSUB_2W_4X 7-9 ; dst1, dst2, (src1, src2,) coef1, coef2, rnd, tmp1, tmp2
+%if %0 == 7
+    punpckhwd          m%6, m%2, m%1
+    punpcklwd          m%2, m%1
+    VP9_MULSUB_2W_4X   %1, %2, %3, %4, %5, %6, %7
+%else
+    punpckhwd          m%8, m%4, m%3
+    punpcklwd          m%2, m%4, m%3
+    VP9_MULSUB_2W_4X   %1, %2, %5, %6, %7, %8, %9
+%endif
+%endmacro
+
+%macro VP9_IDCT4_1D_FINALIZE 0
+    SUMSUB_BA            w, 3, 2, 4                         ; m3=t3+t0, m2=-t3+t0
+    SUMSUB_BA            w, 1, 0, 4                         ; m1=t2+t1, m0=-t2+t1
+    SWAP                 0, 3, 2                            ; 3102 -> 0123
+%endmacro
+
+%macro VP9_IDCT4_1D 0
+%if cpuflag(ssse3)
+    SUMSUB_BA            w, 2, 0, 4                         ; m2=IN(0)+IN(2) m0=IN(0)-IN(2)
+    pmulhrsw            m2, m6                              ; m2=t0
+    pmulhrsw            m0, m6                              ; m0=t1
+%else ; <= sse2
+    VP9_UNPACK_MULSUB_2W_4X 0, 2, 11585, 11585, m7, 4, 5    ; m0=t1, m1=t0
+%endif
+    VP9_UNPACK_MULSUB_2W_4X 1, 3, 15137, 6270, m7, 4, 5     ; m1=t2, m3=t3
+    VP9_IDCT4_1D_FINALIZE
+%endmacro
+
+%macro VP9_IADST4_1D 0
+    movq2dq           xmm0, m0
+    movq2dq           xmm1, m1
+    movq2dq           xmm2, m2
+    movq2dq           xmm3, m3
+%if cpuflag(ssse3)
+    paddw               m3, m0
+%endif
+    punpcklwd         xmm0, xmm1
+    punpcklwd         xmm2, xmm3
+    pmaddwd           xmm1, xmm0, [pw_5283_13377]
+    pmaddwd           xmm4, xmm0, [pw_9929_13377]
+%if notcpuflag(ssse3)
+    pmaddwd           xmm6, xmm0, [pw_13377_0]
+%endif
+    pmaddwd           xmm0, [pw_15212_m13377]
+    pmaddwd           xmm3, xmm2, [pw_15212_9929]
+%if notcpuflag(ssse3)
+    pmaddwd           xmm7, xmm2, [pw_m13377_13377]
+%endif
+    pmaddwd           xmm2, [pw_m5283_m15212]
+%if cpuflag(ssse3)
+    psubw               m3, m2
+%else
+    paddd             xmm6, xmm7
+%endif
+    paddd             xmm0, xmm2
+    paddd             xmm3, xmm5
+    paddd             xmm2, xmm5
+%if notcpuflag(ssse3)
+    paddd             xmm6, xmm5
+%endif
+    paddd             xmm1, xmm3
+    paddd             xmm0, xmm3
+    paddd             xmm4, xmm2
+    psrad             xmm1, 14
+    psrad             xmm0, 14
+    psrad             xmm4, 14
+%if cpuflag(ssse3)
+    pmulhrsw            m3, [pw_13377x2]        ; out2
+%else
+    psrad             xmm6, 14
+%endif
+    packssdw          xmm0, xmm0
+    packssdw          xmm1, xmm1
+    packssdw          xmm4, xmm4
+%if notcpuflag(ssse3)
+    packssdw          xmm6, xmm6
+%endif
+    movdq2q             m0, xmm0                ; out3
+    movdq2q             m1, xmm1                ; out0
+    movdq2q             m2, xmm4                ; out1
+%if notcpuflag(ssse3)
+    movdq2q             m3, xmm6                ; out2
+%endif
+    SWAP                 0, 1, 2, 3
+%endmacro
diff --git a/media/ffvpx/libavcodec/x86/vp9lpf.asm b/media/ffvpx/libavcodec/x86/vp9lpf.asm
new file mode 100644
index 0000000000..4e7ede2235
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9lpf.asm
@@ -0,0 +1,1211 @@
+;******************************************************************************
+;* VP9 loop filter SIMD optimizations
+;*
+;* Copyright (C) 2013-2014 Clément Bœsch <u pkh me>
+;* Copyright (C) 2014 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+cextern pb_3
+cextern pb_80
+
+pb_4:   times 16 db 0x04
+pb_10:  times 16 db 0x10
+pb_40:  times 16 db 0x40
+pb_81:  times 16 db 0x81
+pb_f8:  times 16 db 0xf8
+pb_fe:  times 16 db 0xfe
+pb_ff:  times 16 db 0xff
+
+cextern pw_4
+cextern pw_8
+
+; with mix functions, two 8-bit thresholds are stored in a 16-bit storage,
+; the following mask is used to splat both in the same register
+mask_mix: times 8 db 0
+          times 8 db 1
+
+mask_mix84: times 8 db 0xff
+            times 8 db 0x00
+mask_mix48: times 8 db 0x00
+            times 8 db 0xff
+
+SECTION .text
+
+%macro SCRATCH 3
+%ifdef m8
+    SWAP                %1, %2
+%else
+    mova              [%3], m%1
+%endif
+%endmacro
+
+%macro UNSCRATCH 3
+%ifdef m8
+    SWAP                %1, %2
+%else
+    mova               m%1, [%3]
+%endif
+%endmacro
+
+; %1 = abs(%2-%3)
+%macro ABSSUB 4 ; dst, src1 (RO), src2 (RO), tmp
+%ifdef m8
+    psubusb             %1, %3, %2
+    psubusb             %4, %2, %3
+%else
+    mova                %1, %3
+    mova                %4, %2
+    psubusb             %1, %2
+    psubusb             %4, %3
+%endif
+    por                 %1, %4
+%endmacro
+
+; %1 = %1>%2
+%macro CMP_GT 2-3 ; src/dst, cmp, pb_80
+%if %0 == 3
+    pxor                %1, %3
+%endif
+    pcmpgtb             %1, %2
+%endmacro
+
+; %1 = abs(%2-%3) > %4
+%macro ABSSUB_GT 5-6 [pb_80]; dst, src1, src2, cmp, tmp, [pb_80]
+    ABSSUB              %1, %2, %3, %5      ; dst = abs(src1-src2)
+    CMP_GT              %1, %4, %6          ; dst > cmp
+%endmacro
+
+%macro MASK_APPLY 4 ; %1=new_data/dst %2=old_data %3=mask %4=tmp
+    pand                %1, %3              ; new &= mask
+    pandn               %4, %3, %2          ; tmp = ~mask & old
+    por                 %1, %4              ; new&mask | old&~mask
+%endmacro
+
+%macro UNPACK 4
+%ifdef m8
+    punpck%1bw          %2, %3, %4
+%else
+    mova                %2, %3
+    punpck%1bw          %2, %4
+%endif
+%endmacro
+
+%macro FILTER_SUBx2_ADDx2 11 ; %1=dst %2=h/l %3=cache %4=stack_off %5=sub1 %6=sub2 %7=add1
+                             ; %8=add2 %9=rshift, [unpack], [unpack_is_mem_on_x86_32]
+    psubw               %3, [rsp+%4+%5*mmsize*2]
+    psubw               %3, [rsp+%4+%6*mmsize*2]
+    paddw               %3, [rsp+%4+%7*mmsize*2]
+%ifnidn %10, ""
+%if %11 == 0
+    punpck%2bw          %1, %10, m0
+%else
+    UNPACK          %2, %1, %10, m0
+%endif
+    mova [rsp+%4+%8*mmsize*2], %1
+    paddw               %3, %1
+%else
+    paddw               %3, [rsp+%4+%8*mmsize*2]
+%endif
+    psraw               %1, %3, %9
+%endmacro
+
+; FIXME interleave l/h better (for instruction pairing)
+%macro FILTER_INIT 9 ; tmp1, tmp2, cacheL, cacheH, dstp, stack_off, filterid, mask, source
+    FILTER%7_INIT       %1, l, %3, %6 +      0
+    FILTER%7_INIT       %2, h, %4, %6 + mmsize
+    packuswb            %1, %2
+    MASK_APPLY          %1, %9, %8, %2
+    mova                %5, %1
+%endmacro
+
+
+%macro FILTER_UPDATE 12-16 "", "", "", 0 ; tmp1, tmp2, cacheL, cacheH, dstp, stack_off, -, -, +, +, rshift,
+                                         ; mask, [source], [unpack + src], [unpack_is_mem_on_x86_32]
+; FIXME interleave this properly with the subx2/addx2
+%ifnidn %15, ""
+%if %16 == 0 || ARCH_X86_64
+    mova               %14, %15
+%endif
+%endif
+    FILTER_SUBx2_ADDx2  %1, l, %3, %6 +      0, %7, %8, %9, %10, %11, %14, %16
+    FILTER_SUBx2_ADDx2  %2, h, %4, %6 + mmsize, %7, %8, %9, %10, %11, %14, %16
+    packuswb            %1, %2
+%ifnidn %13, ""
+    MASK_APPLY          %1, %13, %12, %2
+%else
+    MASK_APPLY          %1, %5, %12, %2
+%endif
+    mova                %5, %1
+%endmacro
+
+%macro SRSHIFT3B_2X 4 ; reg1, reg2, [pb_10], tmp
+    mova                %4, [pb_f8]
+    pand                %1, %4
+    pand                %2, %4
+    psrlq               %1, 3
+    psrlq               %2, 3
+    pxor                %1, %3
+    pxor                %2, %3
+    psubb               %1, %3
+    psubb               %2, %3
+%endmacro
+
+%macro EXTRACT_POS_NEG 3 ; i8, neg, pos
+    pxor                %3, %3
+    pxor                %2, %2
+    pcmpgtb             %3, %1                          ; i8 < 0 mask
+    psubb               %2, %1                          ; neg values (only the originally - will be kept)
+    pand                %2, %3                          ; negative values of i8 (but stored as +)
+    pandn               %3, %1                          ; positive values of i8
+%endmacro
+
+; clip_u8(u8 + i8)
+%macro SIGN_ADD 4 ; dst, u8, i8, tmp1
+    EXTRACT_POS_NEG     %3, %4, %1
+    paddusb             %1, %2                          ; add the positives
+    psubusb             %1, %4                          ; sub the negatives
+%endmacro
+
+; clip_u8(u8 - i8)
+%macro SIGN_SUB 4 ; dst, u8, i8, tmp1
+    EXTRACT_POS_NEG     %3, %1, %4
+    paddusb             %1, %2                          ; add the negatives
+    psubusb             %1, %4                          ; sub the positives
+%endmacro
+
+%macro FILTER6_INIT 4 ; %1=dst %2=h/l %3=cache, %4=stack_off
+    UNPACK          %2, %1, rp3, m0                     ; p3: B->W
+    mova [rsp+%4+0*mmsize*2], %1
+    paddw               %3, %1, %1                      ; p3*2
+    paddw               %3, %1                          ; p3*3
+    punpck%2bw          %1, m1,  m0                     ; p2: B->W
+    mova [rsp+%4+1*mmsize*2], %1
+    paddw               %3, %1                          ; p3*3 + p2
+    paddw               %3, %1                          ; p3*3 + p2*2
+    UNPACK          %2, %1, rp1, m0                     ; p1: B->W
+    mova [rsp+%4+2*mmsize*2], %1
+    paddw               %3, %1                          ; p3*3 + p2*2 + p1
+    UNPACK          %2, %1, rp0, m0                     ; p0: B->W
+    mova [rsp+%4+3*mmsize*2], %1
+    paddw               %3, %1                          ; p3*3 + p2*2 + p1 + p0
+    UNPACK          %2, %1, rq0, m0                     ; q0: B->W
+    mova [rsp+%4+4*mmsize*2], %1
+    paddw               %3, %1                          ; p3*3 + p2*2 + p1 + p0 + q0
+    paddw               %3, [pw_4]                      ; p3*3 + p2*2 + p1 + p0 + q0 + 4
+    psraw               %1, %3, 3                       ; (p3*3 + p2*2 + p1 + p0 + q0 + 4) >> 3
+%endmacro
+
+%macro FILTER14_INIT 4 ; %1=dst %2=h/l %3=cache, %4=stack_off
+    punpck%2bw          %1, m2, m0                      ; p7: B->W
+    mova [rsp+%4+ 8*mmsize*2], %1
+    psllw               %3, %1, 3                       ; p7*8
+    psubw               %3, %1                          ; p7*7
+    punpck%2bw          %1, m3, m0                      ; p6: B->W
+    mova [rsp+%4+ 9*mmsize*2], %1
+    paddw               %3, %1                          ; p7*7 + p6
+    paddw               %3, %1                          ; p7*7 + p6*2
+    UNPACK          %2, %1, rp5, m0                     ; p5: B->W
+    mova [rsp+%4+10*mmsize*2], %1
+    paddw               %3, %1                          ; p7*7 + p6*2 + p5
+    UNPACK          %2, %1, rp4, m0                     ; p4: B->W
+    mova [rsp+%4+11*mmsize*2], %1
+    paddw               %3, %1                          ; p7*7 + p6*2 + p5 + p4
+    paddw               %3, [rsp+%4+ 0*mmsize*2]        ; p7*7 + p6*2 + p5 + p4 + p3
+    paddw               %3, [rsp+%4+ 1*mmsize*2]        ; p7*7 + p6*2 + p5 + .. + p2
+    paddw               %3, [rsp+%4+ 2*mmsize*2]        ; p7*7 + p6*2 + p5 + .. + p1
+    paddw               %3, [rsp+%4+ 3*mmsize*2]        ; p7*7 + p6*2 + p5 + .. + p0
+    paddw               %3, [rsp+%4+ 4*mmsize*2]        ; p7*7 + p6*2 + p5 + .. + p0 + q0
+    paddw               %3, [pw_8]                      ; p7*7 + p6*2 + p5 + .. + p0 + q0 + 8
+    psraw               %1, %3, 4                       ; (p7*7 + p6*2 + p5 + .. + p0 + q0 + 8) >> 4
+%endmacro
+
+%macro TRANSPOSE16x16B 17
+    mova %17, m%16
+    SBUTTERFLY bw,  %1,  %2,  %16
+    SBUTTERFLY bw,  %3,  %4,  %16
+    SBUTTERFLY bw,  %5,  %6,  %16
+    SBUTTERFLY bw,  %7,  %8,  %16
+    SBUTTERFLY bw,  %9,  %10, %16
+    SBUTTERFLY bw,  %11, %12, %16
+    SBUTTERFLY bw,  %13, %14, %16
+    mova m%16,  %17
+    mova  %17, m%14
+    SBUTTERFLY bw,  %15, %16, %14
+    SBUTTERFLY wd,  %1,  %3,  %14
+    SBUTTERFLY wd,  %2,  %4,  %14
+    SBUTTERFLY wd,  %5,  %7,  %14
+    SBUTTERFLY wd,  %6,  %8,  %14
+    SBUTTERFLY wd,  %9,  %11, %14
+    SBUTTERFLY wd,  %10, %12, %14
+    SBUTTERFLY wd,  %13, %15, %14
+    mova m%14,  %17
+    mova  %17, m%12
+    SBUTTERFLY wd,  %14, %16, %12
+    SBUTTERFLY dq,  %1,  %5,  %12
+    SBUTTERFLY dq,  %2,  %6,  %12
+    SBUTTERFLY dq,  %3,  %7,  %12
+    SBUTTERFLY dq,  %4,  %8,  %12
+    SBUTTERFLY dq,  %9,  %13, %12
+    SBUTTERFLY dq,  %10, %14, %12
+    SBUTTERFLY dq,  %11, %15, %12
+    mova m%12, %17
+    mova  %17, m%8
+    SBUTTERFLY dq,  %12, %16, %8
+    SBUTTERFLY qdq, %1,  %9,  %8
+    SBUTTERFLY qdq, %2,  %10, %8
+    SBUTTERFLY qdq, %3,  %11, %8
+    SBUTTERFLY qdq, %4,  %12, %8
+    SBUTTERFLY qdq, %5,  %13, %8
+    SBUTTERFLY qdq, %6,  %14, %8
+    SBUTTERFLY qdq, %7,  %15, %8
+    mova m%8, %17
+    mova %17, m%1
+    SBUTTERFLY qdq, %8,  %16, %1
+    mova m%1, %17
+    SWAP %2,  %9
+    SWAP %3,  %5
+    SWAP %4,  %13
+    SWAP %6,  %11
+    SWAP %8,  %15
+    SWAP %12, %14
+%endmacro
+
+%macro TRANSPOSE8x8B 13
+    SBUTTERFLY bw,  %1, %2, %7
+    movdq%10 m%7, %9
+    movdqa %11, m%2
+    SBUTTERFLY bw,  %3, %4, %2
+    SBUTTERFLY bw,  %5, %6, %2
+    SBUTTERFLY bw,  %7, %8, %2
+    SBUTTERFLY wd,  %1, %3, %2
+    movdqa m%2, %11
+    movdqa %11, m%3
+    SBUTTERFLY wd,  %2, %4, %3
+    SBUTTERFLY wd,  %5, %7, %3
+    SBUTTERFLY wd,  %6, %8, %3
+    SBUTTERFLY dq, %1, %5, %3
+    SBUTTERFLY dq, %2, %6, %3
+    movdqa m%3, %11
+    movh   %12, m%2
+    movhps %13, m%2
+    SBUTTERFLY dq, %3, %7, %2
+    SBUTTERFLY dq, %4, %8, %2
+    SWAP %2, %5
+    SWAP %4, %7
+%endmacro
+
+%macro DEFINE_REAL_P7_TO_Q7 0-1 0
+%define P7 dstq  + 4*mstrideq  + %1
+%define P6 dstq  +   mstride3q + %1
+%define P5 dstq  + 2*mstrideq  + %1
+%define P4 dstq  +   mstrideq  + %1
+%define P3 dstq                + %1
+%define P2 dstq  +    strideq  + %1
+%define P1 dstq  + 2* strideq  + %1
+%define P0 dstq  +    stride3q + %1
+%define Q0 dstq  + 4* strideq  + %1
+%define Q1 dst2q +   mstride3q + %1
+%define Q2 dst2q + 2*mstrideq  + %1
+%define Q3 dst2q +   mstrideq  + %1
+%define Q4 dst2q               + %1
+%define Q5 dst2q +    strideq  + %1
+%define Q6 dst2q + 2* strideq  + %1
+%define Q7 dst2q +    stride3q + %1
+%endmacro
+
+%macro DEFINE_TRANSPOSED_P7_TO_Q7 0-1 0
+%define P3 rsp +  0*mmsize + %1
+%define P2 rsp +  1*mmsize + %1
+%define P1 rsp +  2*mmsize + %1
+%define P0 rsp +  3*mmsize + %1
+%define Q0 rsp +  4*mmsize + %1
+%define Q1 rsp +  5*mmsize + %1
+%define Q2 rsp +  6*mmsize + %1
+%define Q3 rsp +  7*mmsize + %1
+%if mmsize == 16
+%define P7 rsp +  8*mmsize + %1
+%define P6 rsp +  9*mmsize + %1
+%define P5 rsp + 10*mmsize + %1
+%define P4 rsp + 11*mmsize + %1
+%define Q4 rsp + 12*mmsize + %1
+%define Q5 rsp + 13*mmsize + %1
+%define Q6 rsp + 14*mmsize + %1
+%define Q7 rsp + 15*mmsize + %1
+%endif
+%endmacro
+
+; ..............AB -> AAAAAAAABBBBBBBB
+%macro SPLATB_MIX 1-2 [mask_mix]
+%if cpuflag(ssse3)
+    pshufb     %1, %2
+%else
+    punpcklbw  %1, %1
+    punpcklwd  %1, %1
+    punpckldq  %1, %1
+%endif
+%endmacro
+
+%macro LOOPFILTER 5 ; %1=v/h %2=size1 %3+%4=stack, %5=mmx/32bit stack only
+%assign %%ext 0
+%if ARCH_X86_32 || mmsize == 8
+%assign %%ext %5
+%endif
+
+%if UNIX64
+cglobal vp9_loop_filter_%1_%2_ %+ mmsize, 5, 9, 16, %3 + %4 + %%ext, dst, stride, E, I, H, mstride, dst2, stride3, mstride3
+%else
+%if WIN64
+cglobal vp9_loop_filter_%1_%2_ %+ mmsize, 4, 8, 16, %3 + %4 + %%ext, dst, stride, E, I, mstride, dst2, stride3, mstride3
+%else
+cglobal vp9_loop_filter_%1_%2_ %+ mmsize, 2, 6, 16, %3 + %4 + %%ext, dst, stride, mstride, dst2, stride3, mstride3
+%define Ed dword r2m
+%define Id dword r3m
+%endif
+%define Hd dword r4m
+%endif
+
+    mov               mstrideq, strideq
+    neg               mstrideq
+
+    lea               stride3q, [strideq*3]
+    lea              mstride3q, [mstrideq*3]
+
+%ifidn %1, h
+%if %2 != 16
+%if mmsize == 16
+%define movx movh
+%else
+%define movx mova
+%endif
+    lea                   dstq, [dstq + 4*strideq - 4]
+%else
+%define movx movu
+    lea                   dstq, [dstq + 4*strideq - 8] ; go from top center (h pos) to center left (v pos)
+%endif
+%else
+    lea                   dstq, [dstq + 4*mstrideq]
+%endif
+    ; FIXME we shouldn't need two dts registers if mmsize == 8
+    lea                  dst2q, [dstq + 8*strideq]
+
+    DEFINE_REAL_P7_TO_Q7
+
+%ifidn %1, h
+    movx                    m0, [P7]
+    movx                    m1, [P6]
+    movx                    m2, [P5]
+    movx                    m3, [P4]
+    movx                    m4, [P3]
+    movx                    m5, [P2]
+%if (ARCH_X86_64 && mmsize == 16) || %2 > 16
+    movx                    m6, [P1]
+%endif
+    movx                    m7, [P0]
+%ifdef m8
+    movx                    m8, [Q0]
+    movx                    m9, [Q1]
+    movx                   m10, [Q2]
+    movx                   m11, [Q3]
+    movx                   m12, [Q4]
+    movx                   m13, [Q5]
+    movx                   m14, [Q6]
+    movx                   m15, [Q7]
+    DEFINE_TRANSPOSED_P7_TO_Q7
+%if %2 == 16
+    TRANSPOSE16x16B 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, [rsp]
+    mova           [P7],  m0
+    mova           [P6],  m1
+    mova           [P5],  m2
+    mova           [P4],  m3
+%else ; %2 == 44/48/84/88
+    ; 8x16 transpose
+    punpcklbw        m0,  m1
+    punpcklbw        m2,  m3
+    punpcklbw        m4,  m5
+    punpcklbw        m6,  m7
+    punpcklbw        m8,  m9
+    punpcklbw       m10, m11
+    punpcklbw       m12, m13
+    punpcklbw       m14, m15
+    TRANSPOSE8x8W     0, 2, 4, 6, 8, 10, 12, 14, 15
+    SWAP              0,  4
+    SWAP              2,  5
+    SWAP              0,  6
+    SWAP              0,  7
+    SWAP             10,  9
+    SWAP             12, 10
+    SWAP             14, 11
+%endif ; %2
+    mova           [P3],  m4
+    mova           [P2],  m5
+    mova           [P1],  m6
+    mova           [P0],  m7
+    mova           [Q0],  m8
+    mova           [Q1],  m9
+    mova           [Q2], m10
+    mova           [Q3], m11
+%if %2 == 16
+    mova           [Q4], m12
+    mova           [Q5], m13
+    mova           [Q6], m14
+    mova           [Q7], m15
+%endif ; %2
+%else ; x86-32
+%if %2 == 16
+    TRANSPOSE8x8B    0, 1, 2, 3, 4, 5, 6, 7, [P1], u, [rsp+%3+%4], [rsp+64], [rsp+80]
+    DEFINE_TRANSPOSED_P7_TO_Q7
+    movh          [P7], m0
+    movh          [P5], m1
+    movh          [P3], m2
+    movh          [P1], m3
+    movh          [Q2], m5
+    movh          [Q4], m6
+    movh          [Q6], m7
+    movhps        [P6], m0
+    movhps        [P4], m1
+    movhps        [P2], m2
+    movhps        [P0], m3
+    movhps        [Q3], m5
+    movhps        [Q5], m6
+    movhps        [Q7], m7
+    DEFINE_REAL_P7_TO_Q7
+    movx                    m0, [Q0]
+    movx                    m1, [Q1]
+    movx                    m2, [Q2]
+    movx                    m3, [Q3]
+    movx                    m4, [Q4]
+    movx                    m5, [Q5]
+    movx                    m7, [Q7]
+    TRANSPOSE8x8B 0, 1, 2, 3, 4, 5, 6, 7, [Q6], u, [rsp+%3+%4], [rsp+72], [rsp+88]
+    DEFINE_TRANSPOSED_P7_TO_Q7 8
+    movh          [P7], m0
+    movh          [P5], m1
+    movh          [P3], m2
+    movh          [P1], m3
+    movh          [Q2], m5
+    movh          [Q4], m6
+    movh          [Q6], m7
+    movhps        [P6], m0
+    movhps        [P4], m1
+    movhps        [P2], m2
+    movhps        [P0], m3
+    movhps        [Q3], m5
+    movhps        [Q5], m6
+    movhps        [Q7], m7
+    DEFINE_TRANSPOSED_P7_TO_Q7
+%elif %2 > 16 ; %2 == 44/48/84/88
+    punpcklbw        m0, m1
+    punpcklbw        m2, m3
+    punpcklbw        m4, m5
+    punpcklbw        m6, m7
+    movx             m1, [Q0]
+    movx             m3, [Q1]
+    movx             m5, [Q2]
+    movx             m7, [Q3]
+    punpcklbw        m1, m3
+    punpcklbw        m5, m7
+    movx             m3, [Q4]
+    movx             m7, [Q5]
+    punpcklbw        m3, m7
+    mova          [rsp], m3
+    movx             m3, [Q6]
+    movx             m7, [Q7]
+    punpcklbw        m3, m7
+    DEFINE_TRANSPOSED_P7_TO_Q7
+    TRANSPOSE8x8W     0, 2, 4, 6, 1, 5, 7, 3, [rsp], [Q0], 1
+    mova           [P3],  m0
+    mova           [P2],  m2
+    mova           [P1],  m4
+    mova           [P0],  m6
+    mova           [Q1],  m5
+    mova           [Q2],  m7
+    mova           [Q3],  m3
+%else ; %2 == 4 || %2 == 8
+    SBUTTERFLY       bw, 0, 1, 6
+    SBUTTERFLY       bw, 2, 3, 6
+    SBUTTERFLY       bw, 4, 5, 6
+    mova [rsp+4*mmsize], m5
+    mova             m6, [P1]
+    SBUTTERFLY       bw, 6, 7, 5
+    DEFINE_TRANSPOSED_P7_TO_Q7
+    TRANSPOSE4x4W     0, 2, 4, 6, 5
+    mova           [P3], m0
+    mova           [P2], m2
+    mova           [P1], m4
+    mova           [P0], m6
+    mova             m5, [rsp+4*mmsize]
+    TRANSPOSE4x4W     1, 3, 5, 7, 0
+    mova           [Q0], m1
+    mova           [Q1], m3
+    mova           [Q2], m5
+    mova           [Q3], m7
+%endif ; %2
+%endif ; x86-32/64
+%endif ; %1 == h
+
+    ; calc fm mask
+%if %2 == 16 || mmsize == 8
+%if cpuflag(ssse3)
+    pxor                m0, m0
+%endif
+    SPLATB_REG          m2, I, m0                       ; I I I I ...
+    SPLATB_REG          m3, E, m0                       ; E E E E ...
+%else
+%if cpuflag(ssse3)
+    mova                m0, [mask_mix]
+%endif
+    movd                m2, Id
+    movd                m3, Ed
+    SPLATB_MIX          m2, m0
+    SPLATB_MIX          m3, m0
+%endif
+    mova                m0, [pb_80]
+    pxor                m2, m0
+    pxor                m3, m0
+%ifdef m8
+%ifidn %1, v
+    mova                m8, [P3]
+    mova                m9, [P2]
+    mova               m10, [P1]
+    mova               m11, [P0]
+    mova               m12, [Q0]
+    mova               m13, [Q1]
+    mova               m14, [Q2]
+    mova               m15, [Q3]
+%else
+    ; In case of horizontal, P3..Q3 are already present in some registers due
+    ; to the previous transpose, so we just swap registers.
+    SWAP                 8,  4, 12
+    SWAP                 9,  5, 13
+    SWAP                10,  6, 14
+    SWAP                11,  7, 15
+%endif
+%define rp3 m8
+%define rp2 m9
+%define rp1 m10
+%define rp0 m11
+%define rq0 m12
+%define rq1 m13
+%define rq2 m14
+%define rq3 m15
+%else
+%define rp3 [P3]
+%define rp2 [P2]
+%define rp1 [P1]
+%define rp0 [P0]
+%define rq0 [Q0]
+%define rq1 [Q1]
+%define rq2 [Q2]
+%define rq3 [Q3]
+%endif
+    ABSSUB_GT           m5, rp3, rp2, m2, m7, m0        ; m5 = abs(p3-p2) <= I
+    ABSSUB_GT           m1, rp2, rp1, m2, m7, m0        ; m1 = abs(p2-p1) <= I
+    por                 m5, m1
+    ABSSUB_GT           m1, rp1, rp0, m2, m7, m0        ; m1 = abs(p1-p0) <= I
+    por                 m5, m1
+    ABSSUB_GT           m1, rq0, rq1, m2, m7, m0        ; m1 = abs(q1-q0) <= I
+    por                 m5, m1
+    ABSSUB_GT           m1, rq1, rq2, m2, m7, m0        ; m1 = abs(q2-q1) <= I
+    por                 m5, m1
+    ABSSUB_GT           m1, rq2, rq3, m2, m7, m0        ; m1 = abs(q3-q2) <= I
+    por                 m5, m1
+    ABSSUB              m1, rp0, rq0, m7                ; abs(p0-q0)
+    paddusb             m1, m1                          ; abs(p0-q0) * 2
+    ABSSUB              m2, rp1, rq1, m7                ; abs(p1-q1)
+    pand                m2, [pb_fe]                     ; drop lsb so shift can work
+    psrlq               m2, 1                           ; abs(p1-q1)/2
+    paddusb             m1, m2                          ; abs(p0-q0)*2 + abs(p1-q1)/2
+    pxor                m1, m0
+    pcmpgtb             m1, m3
+    por                 m1, m5                          ; fm final value
+    SWAP                 1, 3
+    pxor                m3, [pb_ff]
+
+    ; (m3: fm, m8..15: p3 p2 p1 p0 q0 q1 q2 q3)
+    ; calc flat8in (if not 44_16) and hev masks
+%if %2 != 44 && %2 != 4
+    mova                m6, [pb_81]                     ; [1 1 1 1 ...] ^ 0x80
+    ABSSUB_GT           m2, rp3, rp0, m6, m5            ; abs(p3 - p0) <= 1
+%ifdef m8
+    mova                m8, [pb_80]
+%define rb80 m8
+%else
+%define rb80 [pb_80]
+%endif
+    ABSSUB_GT           m1, rp2, rp0, m6, m5, rb80      ; abs(p2 - p0) <= 1
+    por                 m2, m1
+    ABSSUB              m4, rp1, rp0, m5                ; abs(p1 - p0)
+%if %2 <= 16
+%if cpuflag(ssse3)
+    pxor                m0, m0
+%endif
+    SPLATB_REG          m7, H, m0                       ; H H H H ...
+%else
+    movd                m7, Hd
+    SPLATB_MIX          m7
+%endif
+    pxor                m7, rb80
+    pxor                m4, rb80
+    pcmpgtb             m0, m4, m7                      ; abs(p1 - p0) > H (1/2 hev condition)
+    CMP_GT              m4, m6                          ; abs(p1 - p0) <= 1
+    por                 m2, m4                          ; (flat8in)
+    ABSSUB              m4, rq1, rq0, m1                ; abs(q1 - q0)
+    pxor                m4, rb80
+    pcmpgtb             m5, m4, m7                      ; abs(q1 - q0) > H (2/2 hev condition)
+    por                 m0, m5                          ; hev final value
+    CMP_GT              m4, m6                          ; abs(q1 - q0) <= 1
+    por                 m2, m4                          ; (flat8in)
+    ABSSUB_GT           m1, rq2, rq0, m6, m5, rb80      ; abs(q2 - q0) <= 1
+    por                 m2, m1
+    ABSSUB_GT           m1, rq3, rq0, m6, m5, rb80      ; abs(q3 - q0) <= 1
+    por                 m2, m1                          ; flat8in final value
+    pxor                m2, [pb_ff]
+%if %2 == 84 || %2 == 48
+    pand                m2, [mask_mix%2]
+%endif
+%else
+    mova                m6, [pb_80]
+%if %2 == 44
+    movd                m7, Hd
+    SPLATB_MIX          m7
+%else
+%if cpuflag(ssse3)
+    pxor                m0, m0
+%endif
+    SPLATB_REG          m7, H, m0                       ; H H H H ...
+%endif
+    pxor                m7, m6
+    ABSSUB              m4, rp1, rp0, m1                ; abs(p1 - p0)
+    pxor                m4, m6
+    pcmpgtb             m0, m4, m7                      ; abs(p1 - p0) > H (1/2 hev condition)
+    ABSSUB              m4, rq1, rq0, m1                ; abs(q1 - q0)
+    pxor                m4, m6
+    pcmpgtb             m5, m4, m7                      ; abs(q1 - q0) > H (2/2 hev condition)
+    por                 m0, m5                          ; hev final value
+%endif
+
+%if %2 == 16
+    ; (m0: hev, m2: flat8in, m3: fm, m6: pb_81, m9..15: p2 p1 p0 q0 q1 q2 q3)
+    ; calc flat8out mask
+%ifdef m8
+    mova                m8, [P7]
+    mova                m9, [P6]
+%define rp7 m8
+%define rp6 m9
+%else
+%define rp7 [P7]
+%define rp6 [P6]
+%endif
+    ABSSUB_GT           m1, rp7, rp0, m6, m5            ; abs(p7 - p0) <= 1
+    ABSSUB_GT           m7, rp6, rp0, m6, m5            ; abs(p6 - p0) <= 1
+    por                 m1, m7
+%ifdef m8
+    mova                m8, [P5]
+    mova                m9, [P4]
+%define rp5 m8
+%define rp4 m9
+%else
+%define rp5 [P5]
+%define rp4 [P4]
+%endif
+    ABSSUB_GT           m7, rp5, rp0, m6, m5            ; abs(p5 - p0) <= 1
+    por                 m1, m7
+    ABSSUB_GT           m7, rp4, rp0, m6, m5            ; abs(p4 - p0) <= 1
+    por                 m1, m7
+%ifdef m8
+    mova                m14, [Q4]
+    mova                m15, [Q5]
+%define rq4 m14
+%define rq5 m15
+%else
+%define rq4 [Q4]
+%define rq5 [Q5]
+%endif
+    ABSSUB_GT           m7, rq4, rq0, m6, m5            ; abs(q4 - q0) <= 1
+    por                 m1, m7
+    ABSSUB_GT           m7, rq5, rq0, m6, m5            ; abs(q5 - q0) <= 1
+    por                 m1, m7
+%ifdef m8
+    mova                m14, [Q6]
+    mova                m15, [Q7]
+%define rq6 m14
+%define rq7 m15
+%else
+%define rq6 [Q6]
+%define rq7 [Q7]
+%endif
+    ABSSUB_GT           m7, rq6, rq0, m6, m5            ; abs(q4 - q0) <= 1
+    por                 m1, m7
+    ABSSUB_GT           m7, rq7, rq0, m6, m5            ; abs(q5 - q0) <= 1
+    por                 m1, m7                          ; flat8out final value
+    pxor                m1, [pb_ff]
+%endif
+
+    ; if (fm) {
+    ;     if (out && in) filter_14()
+    ;     else if (in)   filter_6()
+    ;     else if (hev)  filter_2()
+    ;     else           filter_4()
+    ; }
+    ;
+    ; f14:                                                                            fm &  out &  in
+    ; f6:  fm & ~f14 & in        => fm & ~(out & in) & in                          => fm & ~out &  in
+    ; f2:  fm & ~f14 & ~f6 & hev => fm & ~(out & in) & ~(~out & in) & hev          => fm &  ~in &  hev
+    ; f4:  fm & ~f14 & ~f6 & ~f2 => fm & ~(out & in) & ~(~out & in) & ~(~in & hev) => fm &  ~in & ~hev
+
+    ; (m0: hev, [m1: flat8out], [m2: flat8in], m3: fm, m8..15: p5 p4 p1 p0 q0 q1 q6 q7)
+    ; filter2()
+%if %2 != 44 && %2 != 4
+    mova                m6, [pb_80]                     ; already in m6 if 44_16
+    SCRATCH              2, 15, rsp+%3+%4
+%if %2 == 16
+    SCRATCH              1,  8, rsp+%3+%4+16
+%endif
+%endif
+    pxor                m2, m6, rq0                     ; q0 ^ 0x80
+    pxor                m4, m6, rp0                     ; p0 ^ 0x80
+    psubsb              m2, m4                          ; (signed) q0 - p0
+    pxor                m4, m6, rp1                     ; p1 ^ 0x80
+    pxor                m5, m6, rq1                     ; q1 ^ 0x80
+    psubsb              m4, m5                          ; (signed) p1 - q1
+    paddsb              m4, m2                          ;   (q0 - p0) + (p1 - q1)
+    paddsb              m4, m2                          ; 2*(q0 - p0) + (p1 - q1)
+    paddsb              m4, m2                          ; 3*(q0 - p0) + (p1 - q1)
+    paddsb              m6, m4, [pb_4]                  ; m6: f1 = clip(f + 4, 127)
+    paddsb              m4, [pb_3]                      ; m4: f2 = clip(f + 3, 127)
+%ifdef m8
+    mova                m14, [pb_10]                    ; will be reused in filter4()
+%define rb10 m14
+%else
+%define rb10 [pb_10]
+%endif
+    SRSHIFT3B_2X        m6, m4, rb10, m7                ; f1 and f2 sign byte shift by 3
+    SIGN_SUB            m7, rq0, m6, m5                 ; m7 = q0 - f1
+    SIGN_ADD            m1, rp0, m4, m5                 ; m1 = p0 + f2
+%if %2 != 44 && %2 != 4
+%ifdef m8
+    pandn               m6, m15, m3                     ;  ~mask(in) & mask(fm)
+%else
+    mova                m6, [rsp+%3+%4]
+    pandn               m6, m3
+%endif
+    pand                m6, m0                          ; (~mask(in) & mask(fm)) & mask(hev)
+%else
+    pand                m6, m3, m0
+%endif
+    MASK_APPLY          m7, rq0, m6, m5                 ; m7 = filter2(q0) & mask / we write it in filter4()
+    MASK_APPLY          m1, rp0, m6, m5                 ; m1 = filter2(p0) & mask / we write it in filter4()
+
+    ; (m0: hev, m1: p0', m2: q0-p0, m3: fm, m7: q0', [m8: flat8out], m10..13: p1 p0 q0 q1, m14: pb_10, [m15: flat8in], )
+    ; filter4()
+    mova                m4, m2
+    paddsb              m2, m4                          ; 2 * (q0 - p0)
+    paddsb              m2, m4                          ; 3 * (q0 - p0)
+    paddsb              m6, m2, [pb_4]                  ; m6:  f1 = clip(f + 4, 127)
+    paddsb              m2, [pb_3]                      ; m2: f2 = clip(f + 3, 127)
+    SRSHIFT3B_2X        m6, m2, rb10, m4                ; f1 and f2 sign byte shift by 3
+%if %2 != 44 && %2 != 4
+%ifdef m8
+    pandn               m5, m15, m3                     ;               ~mask(in) & mask(fm)
+%else
+    mova                m5, [rsp+%3+%4]
+    pandn               m5, m3
+%endif
+    pandn               m0, m5                          ; ~mask(hev) & (~mask(in) & mask(fm))
+%else
+    pandn               m0, m3
+%endif
+    SIGN_SUB            m5, rq0, m6, m4                 ; q0 - f1
+    MASK_APPLY          m5, m7, m0, m4                  ; filter4(q0) & mask
+    mova                [Q0], m5
+    SIGN_ADD            m7, rp0, m2, m4                 ; p0 + f2
+    MASK_APPLY          m7, m1, m0, m4                  ; filter4(p0) & mask
+    mova                [P0], m7
+    paddb               m6, [pb_80]                     ;
+    pxor                m1, m1                          ;   f=(f1+1)>>1
+    pavgb               m6, m1                          ;
+    psubb               m6, [pb_40]                     ;
+    SIGN_ADD            m1, rp1, m6, m2                 ; p1 + f
+    SIGN_SUB            m4, rq1, m6, m2                 ; q1 - f
+    MASK_APPLY          m1, rp1, m0, m2                 ; m1 = filter4(p1)
+    MASK_APPLY          m4, rq1, m0, m2                 ; m4 = filter4(q1)
+    mova                [P1], m1
+    mova                [Q1], m4
+
+%if %2 != 44 && %2 != 4
+    UNSCRATCH            2, 15, rsp+%3+%4
+%endif
+
+    ; ([m1: flat8out], m2: flat8in, m3: fm, m10..13: p1 p0 q0 q1)
+    ; filter6()
+%if %2 != 44 && %2 != 4
+    pxor                m0, m0
+%if %2 != 16
+    pand                m3, m2
+%else
+    pand                m2, m3                          ;               mask(fm) & mask(in)
+%ifdef m8
+    pandn               m3, m8, m2                      ; ~mask(out) & (mask(fm) & mask(in))
+%else
+    mova                m3, [rsp+%3+%4+16]
+    pandn               m3, m2
+%endif
+%endif
+%ifdef m8
+    mova               m14, [P3]
+    mova                m9, [Q3]
+%define rp3 m14
+%define rq3 m9
+%else
+%define rp3 [P3]
+%define rq3 [Q3]
+%endif
+    mova                m1, [P2]
+    FILTER_INIT         m4, m5, m6, m7, [P2], %4, 6,             m3,  m1             ; [p2]
+    mova                m1, [Q2]
+    FILTER_UPDATE       m4, m5, m6, m7, [P1], %4, 0, 1, 2, 5, 3, m3,  "", rq1, "", 1 ; [p1] -p3 -p2 +p1 +q1
+    FILTER_UPDATE       m4, m5, m6, m7, [P0], %4, 0, 2, 3, 6, 3, m3,  "", m1         ; [p0] -p3 -p1 +p0 +q2
+    FILTER_UPDATE       m4, m5, m6, m7, [Q0], %4, 0, 3, 4, 7, 3, m3,  "", rq3, "", 1 ; [q0] -p3 -p0 +q0 +q3
+    FILTER_UPDATE       m4, m5, m6, m7, [Q1], %4, 1, 4, 5, 7, 3, m3,  ""             ; [q1] -p2 -q0 +q1 +q3
+    FILTER_UPDATE       m4, m5, m6, m7, [Q2], %4, 2, 5, 6, 7, 3, m3,  m1             ; [q2] -p1 -q1 +q2 +q3
+%endif
+
+%if %2 == 16
+    UNSCRATCH            1,  8, rsp+%3+%4+16
+%endif
+
+    ; (m0: 0, [m1: flat8out], m2: fm & flat8in, m8..15: q2 q3 p1 p0 q0 q1 p3 p2)
+    ; filter14()
+    ;
+    ;                            m2  m3  m8  m9 m14 m15 m10 m11 m12 m13
+    ;
+    ;                                    q2  q3  p3  p2  p1  p0  q0  q1
+    ; p6  -7                     p7  p6  p5  p4   .   .   .   .   .
+    ; p5  -6  -p7 -p6 +p5 +q1     .   .   .                           .
+    ; p4  -5  -p7 -p5 +p4 +q2     .       .   .                      q2
+    ; p3  -4  -p7 -p4 +p3 +q3     .           .   .                  q3
+    ; p2  -3  -p7 -p3 +p2 +q4     .               .   .              q4
+    ; p1  -2  -p7 -p2 +p1 +q5     .                   .   .          q5
+    ; p0  -1  -p7 -p1 +p0 +q6     .                       .   .      q6
+    ; q0  +0  -p7 -p0 +q0 +q7     .                           .   .  q7
+    ; q1  +1  -p6 -q0 +q1 +q7    q1   .                           .   .
+    ; q2  +2  -p5 -q1 +q2 +q7     .  q2   .                           .
+    ; q3  +3  -p4 -q2 +q3 +q7         .  q3   .                       .
+    ; q4  +4  -p3 -q3 +q4 +q7             .  q4   .                   .
+    ; q5  +5  -p2 -q4 +q5 +q7                 .  q5   .               .
+    ; q6  +6  -p1 -q5 +q6 +q7                     .  q6   .           .
+
+%if %2 == 16
+    pand            m1, m2                                                              ; mask(out) & (mask(fm) & mask(in))
+    mova            m2, [P7]
+    mova            m3, [P6]
+%ifdef m8
+    mova            m8, [P5]
+    mova            m9, [P4]
+%define rp5 m8
+%define rp4 m9
+%define rp5s m8
+%define rp4s m9
+%define rp3s m14
+%define rq4 m8
+%define rq5 m9
+%define rq6 m14
+%define rq7 m15
+%define rq4s m8
+%define rq5s m9
+%define rq6s m14
+%else
+%define rp5 [P5]
+%define rp4 [P4]
+%define rp5s ""
+%define rp4s ""
+%define rp3s ""
+%define rq4 [Q4]
+%define rq5 [Q5]
+%define rq6 [Q6]
+%define rq7 [Q7]
+%define rq4s ""
+%define rq5s ""
+%define rq6s ""
+%endif
+    FILTER_INIT     m4, m5, m6, m7, [P6], %4, 14,                m1,  m3            ; [p6]
+    FILTER_UPDATE   m4, m5, m6, m7, [P5], %4,  8,  9, 10,  5, 4, m1, rp5s           ; [p5] -p7 -p6 +p5 +q1
+    FILTER_UPDATE   m4, m5, m6, m7, [P4], %4,  8, 10, 11,  6, 4, m1, rp4s           ; [p4] -p7 -p5 +p4 +q2
+    FILTER_UPDATE   m4, m5, m6, m7, [P3], %4,  8, 11,  0,  7, 4, m1, rp3s           ; [p3] -p7 -p4 +p3 +q3
+    FILTER_UPDATE   m4, m5, m6, m7, [P2], %4,  8,  0,  1, 12, 4, m1,  "", rq4, [Q4], 1 ; [p2] -p7 -p3 +p2 +q4
+    FILTER_UPDATE   m4, m5, m6, m7, [P1], %4,  8,  1,  2, 13, 4, m1,  "", rq5, [Q5], 1 ; [p1] -p7 -p2 +p1 +q5
+    FILTER_UPDATE   m4, m5, m6, m7, [P0], %4,  8,  2,  3, 14, 4, m1,  "", rq6, [Q6], 1 ; [p0] -p7 -p1 +p0 +q6
+    FILTER_UPDATE   m4, m5, m6, m7, [Q0], %4,  8,  3,  4, 15, 4, m1,  "", rq7, [Q7], 1 ; [q0] -p7 -p0 +q0 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q1], %4,  9,  4,  5, 15, 4, m1,  ""            ; [q1] -p6 -q0 +q1 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q2], %4, 10,  5,  6, 15, 4, m1,  ""            ; [q2] -p5 -q1 +q2 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q3], %4, 11,  6,  7, 15, 4, m1,  ""            ; [q3] -p4 -q2 +q3 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q4], %4,  0,  7, 12, 15, 4, m1, rq4s           ; [q4] -p3 -q3 +q4 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q5], %4,  1, 12, 13, 15, 4, m1, rq5s           ; [q5] -p2 -q4 +q5 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q6], %4,  2, 13, 14, 15, 4, m1, rq6s           ; [q6] -p1 -q5 +q6 +q7
+%endif
+
+%ifidn %1, h
+%if %2 == 16
+    mova                    m0, [P7]
+    mova                    m1, [P6]
+    mova                    m2, [P5]
+    mova                    m3, [P4]
+    mova                    m4, [P3]
+    mova                    m5, [P2]
+%if ARCH_X86_64
+    mova                    m6, [P1]
+%endif
+    mova                    m7, [P0]
+%if ARCH_X86_64
+    mova                    m8, [Q0]
+    mova                    m9, [Q1]
+    mova                   m10, [Q2]
+    mova                   m11, [Q3]
+    mova                   m12, [Q4]
+    mova                   m13, [Q5]
+    mova                   m14, [Q6]
+    mova                   m15, [Q7]
+    TRANSPOSE16x16B 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, [rsp]
+    DEFINE_REAL_P7_TO_Q7
+    movu  [P7],  m0
+    movu  [P6],  m1
+    movu  [P5],  m2
+    movu  [P4],  m3
+    movu  [P3],  m4
+    movu  [P2],  m5
+    movu  [P1],  m6
+    movu  [P0],  m7
+    movu  [Q0],  m8
+    movu  [Q1],  m9
+    movu  [Q2], m10
+    movu  [Q3], m11
+    movu  [Q4], m12
+    movu  [Q5], m13
+    movu  [Q6], m14
+    movu  [Q7], m15
+%else
+    DEFINE_REAL_P7_TO_Q7
+    TRANSPOSE8x8B 0, 1, 2, 3, 4, 5, 6, 7, [rsp+32], a, [rsp+%3+%4], [Q0], [Q1]
+    movh   [P7],  m0
+    movh   [P5],  m1
+    movh   [P3],  m2
+    movh   [P1],  m3
+    movh   [Q2],  m5
+    movh   [Q4],  m6
+    movh   [Q6],  m7
+    movhps [P6],  m0
+    movhps [P4],  m1
+    movhps [P2],  m2
+    movhps [P0],  m3
+    movhps [Q3],  m5
+    movhps [Q5],  m6
+    movhps [Q7],  m7
+    DEFINE_TRANSPOSED_P7_TO_Q7
+    mova                    m0, [Q0]
+    mova                    m1, [Q1]
+    mova                    m2, [Q2]
+    mova                    m3, [Q3]
+    mova                    m4, [Q4]
+    mova                    m5, [Q5]
+    mova                    m7, [Q7]
+    DEFINE_REAL_P7_TO_Q7 8
+    TRANSPOSE8x8B 0, 1, 2, 3, 4, 5, 6, 7, [rsp+224], a, [rsp+%3+%4], [Q0], [Q1]
+    movh   [P7],  m0
+    movh   [P5],  m1
+    movh   [P3],  m2
+    movh   [P1],  m3
+    movh   [Q2],  m5
+    movh   [Q4],  m6
+    movh   [Q6],  m7
+    movhps [P6],  m0
+    movhps [P4],  m1
+    movhps [P2],  m2
+    movhps [P0],  m3
+    movhps [Q3],  m5
+    movhps [Q5],  m6
+    movhps [Q7],  m7
+%endif
+%elif %2 == 44 || %2 == 4
+    SWAP 0, 1   ; m0 = p1
+    SWAP 1, 7   ; m1 = p0
+    SWAP 2, 5   ; m2 = q0
+    SWAP 3, 4   ; m3 = q1
+    DEFINE_REAL_P7_TO_Q7 2
+    SBUTTERFLY  bw, 0, 1, 4
+    SBUTTERFLY  bw, 2, 3, 4
+    SBUTTERFLY  wd, 0, 2, 4
+    SBUTTERFLY  wd, 1, 3, 4
+%if mmsize == 16
+    movd  [P7], m0
+    movd  [P3], m2
+    movd  [Q0], m1
+    movd  [Q4], m3
+    psrldq  m0, 4
+    psrldq  m1, 4
+    psrldq  m2, 4
+    psrldq  m3, 4
+    movd  [P6], m0
+    movd  [P2], m2
+    movd  [Q1], m1
+    movd  [Q5], m3
+    psrldq  m0, 4
+    psrldq  m1, 4
+    psrldq  m2, 4
+    psrldq  m3, 4
+    movd  [P5], m0
+    movd  [P1], m2
+    movd  [Q2], m1
+    movd  [Q6], m3
+    psrldq  m0, 4
+    psrldq  m1, 4
+    psrldq  m2, 4
+    psrldq  m3, 4
+    movd  [P4], m0
+    movd  [P0], m2
+    movd  [Q3], m1
+    movd  [Q7], m3
+%else
+    movd  [P7], m0
+    movd  [P5], m2
+    movd  [P3], m1
+    movd  [P1], m3
+    psrlq   m0, 32
+    psrlq   m2, 32
+    psrlq   m1, 32
+    psrlq   m3, 32
+    movd  [P6], m0
+    movd  [P4], m2
+    movd  [P2], m1
+    movd  [P0], m3
+%endif
+%else
+    ; the following code do a transpose of 8 full lines to 16 half
+    ; lines (high part). It is inlined to avoid the need of a staging area
+    mova                    m0, [P3]
+    mova                    m1, [P2]
+    mova                    m2, [P1]
+    mova                    m3, [P0]
+    mova                    m4, [Q0]
+    mova                    m5, [Q1]
+%ifdef m8
+    mova                    m6, [Q2]
+%endif
+    mova                    m7, [Q3]
+    DEFINE_REAL_P7_TO_Q7
+%ifdef m8
+    SBUTTERFLY  bw,  0,  1, 8
+    SBUTTERFLY  bw,  2,  3, 8
+    SBUTTERFLY  bw,  4,  5, 8
+    SBUTTERFLY  bw,  6,  7, 8
+    SBUTTERFLY  wd,  0,  2, 8
+    SBUTTERFLY  wd,  1,  3, 8
+    SBUTTERFLY  wd,  4,  6, 8
+    SBUTTERFLY  wd,  5,  7, 8
+    SBUTTERFLY  dq,  0,  4, 8
+    SBUTTERFLY  dq,  1,  5, 8
+    SBUTTERFLY  dq,  2,  6, 8
+    SBUTTERFLY  dq,  3,  7, 8
+%else
+    SBUTTERFLY  bw,  0,  1, 6
+    mova [rsp+mmsize*4], m1
+    mova        m6, [rsp+mmsize*6]
+    SBUTTERFLY  bw,  2,  3, 1
+    SBUTTERFLY  bw,  4,  5, 1
+    SBUTTERFLY  bw,  6,  7, 1
+    SBUTTERFLY  wd,  0,  2, 1
+    mova [rsp+mmsize*6], m2
+    mova        m1, [rsp+mmsize*4]
+    SBUTTERFLY  wd,  1,  3, 2
+    SBUTTERFLY  wd,  4,  6, 2
+    SBUTTERFLY  wd,  5,  7, 2
+    SBUTTERFLY  dq,  0,  4, 2
+    SBUTTERFLY  dq,  1,  5, 2
+%if mmsize == 16
+    movh      [Q0], m1
+    movhps    [Q1], m1
+%else
+    mova      [P3], m1
+%endif
+    mova        m2, [rsp+mmsize*6]
+    SBUTTERFLY  dq,  2,  6, 1
+    SBUTTERFLY  dq,  3,  7, 1
+%endif
+    SWAP         3, 6
+    SWAP         1, 4
+%if mmsize == 16
+    movh      [P7], m0
+    movhps    [P6], m0
+    movh      [P5], m1
+    movhps    [P4], m1
+    movh      [P3], m2
+    movhps    [P2], m2
+    movh      [P1], m3
+    movhps    [P0], m3
+%ifdef m8
+    movh      [Q0], m4
+    movhps    [Q1], m4
+%endif
+    movh      [Q2], m5
+    movhps    [Q3], m5
+    movh      [Q4], m6
+    movhps    [Q5], m6
+    movh      [Q6], m7
+    movhps    [Q7], m7
+%else
+    mova      [P7], m0
+    mova      [P6], m1
+    mova      [P5], m2
+    mova      [P4], m3
+    mova      [P2], m5
+    mova      [P1], m6
+    mova      [P0], m7
+%endif
+%endif
+%endif
+
+    RET
+%endmacro
+
+%macro LPF_16_VH 5
+INIT_XMM %5
+LOOPFILTER v, %1, %2,  0, %4
+LOOPFILTER h, %1, %2, %3, %4
+%endmacro
+
+%macro LPF_16_VH_ALL_OPTS 4
+LPF_16_VH %1, %2, %3, %4, sse2
+LPF_16_VH %1, %2, %3, %4, ssse3
+LPF_16_VH %1, %2, %3, %4, avx
+%endmacro
+
+LPF_16_VH_ALL_OPTS 16, 512, 256, 32
+LPF_16_VH_ALL_OPTS 44,   0, 128,  0
+LPF_16_VH_ALL_OPTS 48, 256, 128, 16
+LPF_16_VH_ALL_OPTS 84, 256, 128, 16
+LPF_16_VH_ALL_OPTS 88, 256, 128, 16
+
+INIT_MMX mmxext
+LOOPFILTER v, 4,   0,  0, 0
+LOOPFILTER h, 4,   0, 64, 0
+LOOPFILTER v, 8, 128,  0, 8
+LOOPFILTER h, 8, 128, 64, 8
diff --git a/media/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm b/media/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm
new file mode 100644
index 0000000000..c0888170c9
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm
@@ -0,0 +1,823 @@
+;******************************************************************************
+;* VP9 loop filter SIMD optimizations
+;*
+;* Copyright (C) 2015 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pw_511: times 16 dw 511
+pw_2047: times 16 dw 2047
+pw_16384: times 16 dw 16384
+pw_m512: times 16 dw -512
+pw_m2048: times 16 dw -2048
+
+cextern pw_1
+cextern pw_3
+cextern pw_4
+cextern pw_8
+cextern pw_16
+cextern pw_256
+cextern pw_1023
+cextern pw_4095
+cextern pw_m1
+
+SECTION .text
+
+%macro SCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%if %0 == 4
+%define reg_%4 m%2
+%endif
+%else
+    mova              [%3], m%1
+%if %0 == 4
+%define reg_%4 [%3]
+%endif
+%endif
+%endmacro
+
+%macro UNSCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova               m%1, [%3]
+%endif
+%if %0 == 4
+%undef reg_%4
+%endif
+%endmacro
+
+%macro PRELOAD 2-3
+%if ARCH_X86_64
+    mova               m%1, [%2]
+%if %0 == 3
+%define reg_%3 m%1
+%endif
+%elif %0 == 3
+%define reg_%3 [%2]
+%endif
+%endmacro
+
+; calculate p or q portion of flat8out
+%macro FLAT8OUT_HALF 0
+    psubw               m4, m0                      ; q4-q0
+    psubw               m5, m0                      ; q5-q0
+    psubw               m6, m0                      ; q6-q0
+    psubw               m7, m0                      ; q7-q0
+    ABS2                m4, m5, m2, m3              ; abs(q4-q0) | abs(q5-q0)
+    ABS2                m6, m7, m2, m3              ; abs(q6-q0) | abs(q7-q0)
+    pcmpgtw             m4, reg_F                   ; abs(q4-q0) > F
+    pcmpgtw             m5, reg_F                   ; abs(q5-q0) > F
+    pcmpgtw             m6, reg_F                   ; abs(q6-q0) > F
+    pcmpgtw             m7, reg_F                   ; abs(q7-q0) > F
+    por                 m5, m4
+    por                 m7, m6
+    por                 m7, m5                      ; !flat8out, q portion
+%endmacro
+
+; calculate p or q portion of flat8in/hev/fm (excluding mb_edge condition)
+%macro FLAT8IN_HALF 1
+%if %1 > 4
+    psubw               m4, m3, m0                  ; q3-q0
+    psubw               m5, m2, m0                  ; q2-q0
+    ABS2                m4, m5, m6, m7              ; abs(q3-q0) | abs(q2-q0)
+    pcmpgtw             m4, reg_F                   ; abs(q3-q0) > F
+    pcmpgtw             m5, reg_F                   ; abs(q2-q0) > F
+%endif
+    psubw               m3, m2                      ; q3-q2
+    psubw               m2, m1                      ; q2-q1
+    ABS2                m3, m2, m6, m7              ; abs(q3-q2) | abs(q2-q1)
+    pcmpgtw             m3, reg_I                   ; abs(q3-q2) > I
+    pcmpgtw             m2, reg_I                   ; abs(q2-q1) > I
+%if %1 > 4
+    por                 m4, m5
+%endif
+    por                 m2, m3
+    psubw               m3, m1, m0                  ; q1-q0
+    ABS1                m3, m5                      ; abs(q1-q0)
+%if %1 > 4
+    pcmpgtw             m6, m3, reg_F               ; abs(q1-q0) > F
+%endif
+    pcmpgtw             m7, m3, reg_H               ; abs(q1-q0) > H
+    pcmpgtw             m3, reg_I                   ; abs(q1-q0) > I
+%if %1 > 4
+    por                 m4, m6
+%endif
+    por                 m2, m3
+%endmacro
+
+; one step in filter_14/filter_6
+;
+; take sum $reg, downshift, apply mask and write into dst
+;
+; if sub2/add1-2 are present, add/sub as appropriate to prepare for the next
+; step's sum $reg. This is omitted for the last row in each filter.
+;
+; if dont_store is set, don't write the result into memory, instead keep the
+; values in register so we can write it out later
+%macro FILTER_STEP 6-10 "", "", "", 0 ; tmp, reg, mask, shift, dst, \
+                                      ; src/sub1, sub2, add1, add2, dont_store
+    psrlw               %1, %2, %4
+    psubw               %1, %6                      ; abs->delta
+%ifnidn %7, ""
+    psubw               %2, %6
+    psubw               %2, %7
+    paddw               %2, %8
+    paddw               %2, %9
+%endif
+    pand                %1, reg_%3                  ; apply mask
+%if %10 == 1
+    paddw               %6, %1                      ; delta->abs
+%else
+    paddw               %1, %6                      ; delta->abs
+    mova              [%5], %1
+%endif
+%endmacro
+
+; FIXME avx2 versions for 16_16 and mix2_{4,8}{4,8}
+
+%macro LOOP_FILTER 3 ; dir[h/v], wd[4/8/16], bpp[10/12]
+
+%if ARCH_X86_64
+%if %2 == 16
+%assign %%num_xmm_regs 16
+%elif %2 == 8
+%assign %%num_xmm_regs 15
+%else ; %2 == 4
+%assign %%num_xmm_regs 14
+%endif ; %2
+%assign %%bak_mem 0
+%else ; ARCH_X86_32
+%assign %%num_xmm_regs 8
+%if %2 == 16
+%assign %%bak_mem 7
+%elif %2 == 8
+%assign %%bak_mem 6
+%else ; %2 == 4
+%assign %%bak_mem 5
+%endif ; %2
+%endif ; ARCH_X86_64/32
+
+%if %2 == 16
+%ifidn %1, v
+%assign %%num_gpr_regs 6
+%else ; %1 == h
+%assign %%num_gpr_regs 5
+%endif ; %1
+%assign %%wd_mem 6
+%else ; %2 == 8/4
+%assign %%num_gpr_regs 5
+%if ARCH_X86_32 && %2 == 8
+%assign %%wd_mem 2
+%else ; ARCH_X86_64 || %2 == 4
+%assign %%wd_mem 0
+%endif ; ARCH_X86_64/32 etc.
+%endif ; %2
+
+%ifidn %1, v
+%assign %%tsp_mem 0
+%elif %2 == 16 ; && %1 == h
+%assign %%tsp_mem 16
+%else ; %1 == h && %1 == 8/4
+%assign %%tsp_mem 8
+%endif ; %1/%2
+
+%assign %%off %%wd_mem
+%assign %%tspoff %%bak_mem+%%wd_mem
+%assign %%stack_mem ((%%bak_mem+%%wd_mem+%%tsp_mem)*mmsize)
+
+%if %3 == 10
+%define %%maxsgn 511
+%define %%minsgn m512
+%define %%maxusgn 1023
+%define %%maxf 4
+%else ; %3 == 12
+%define %%maxsgn 2047
+%define %%minsgn m2048
+%define %%maxusgn 4095
+%define %%maxf 16
+%endif ; %3
+
+cglobal vp9_loop_filter_%1_%2_%3, 5, %%num_gpr_regs, %%num_xmm_regs, %%stack_mem, dst, stride, E, I, H
+    ; prepare E, I and H masks
+    shl                 Ed, %3-8
+    shl                 Id, %3-8
+    shl                 Hd, %3-8
+%if cpuflag(ssse3)
+    mova                m0, [pw_256]
+%endif
+    movd                m1, Ed
+    movd                m2, Id
+    movd                m3, Hd
+%if cpuflag(ssse3)
+    pshufb              m1, m0                      ; E << (bit_depth - 8)
+    pshufb              m2, m0                      ; I << (bit_depth - 8)
+    pshufb              m3, m0                      ; H << (bit_depth - 8)
+%else
+    punpcklwd           m1, m1
+    punpcklwd           m2, m2
+    punpcklwd           m3, m3
+    pshufd              m1, m1, q0000
+    pshufd              m2, m2, q0000
+    pshufd              m3, m3, q0000
+%endif
+    SCRATCH              1,  8, rsp+(%%off+0)*mmsize,  E
+    SCRATCH              2,  9, rsp+(%%off+1)*mmsize,  I
+    SCRATCH              3, 10, rsp+(%%off+2)*mmsize,  H
+%if %2 > 4
+    PRELOAD                 11, pw_ %+ %%maxf, F
+%endif
+
+    ; set up variables to load data
+%ifidn %1, v
+    DEFINE_ARGS dst8, stride, stride3, dst0, dst4, dst12
+    lea           stride3q, [strideq*3]
+    neg            strideq
+%if %2 == 16
+    lea              dst0q, [dst8q+strideq*8]
+%else
+    lea              dst4q, [dst8q+strideq*4]
+%endif
+    neg            strideq
+%if %2 == 16
+    lea             dst12q, [dst8q+strideq*4]
+    lea              dst4q, [dst0q+strideq*4]
+%endif
+
+%if %2 == 16
+%define %%p7 dst0q
+%define %%p6 dst0q+strideq
+%define %%p5 dst0q+strideq*2
+%define %%p4 dst0q+stride3q
+%endif
+%define %%p3 dst4q
+%define %%p2 dst4q+strideq
+%define %%p1 dst4q+strideq*2
+%define %%p0 dst4q+stride3q
+%define %%q0 dst8q
+%define %%q1 dst8q+strideq
+%define %%q2 dst8q+strideq*2
+%define %%q3 dst8q+stride3q
+%if %2 == 16
+%define %%q4 dst12q
+%define %%q5 dst12q+strideq
+%define %%q6 dst12q+strideq*2
+%define %%q7 dst12q+stride3q
+%endif
+%else ; %1 == h
+    DEFINE_ARGS dst0, stride, stride3, dst4
+    lea           stride3q, [strideq*3]
+    lea              dst4q, [dst0q+strideq*4]
+
+%define %%p3 rsp+(%%tspoff+0)*mmsize
+%define %%p2 rsp+(%%tspoff+1)*mmsize
+%define %%p1 rsp+(%%tspoff+2)*mmsize
+%define %%p0 rsp+(%%tspoff+3)*mmsize
+%define %%q0 rsp+(%%tspoff+4)*mmsize
+%define %%q1 rsp+(%%tspoff+5)*mmsize
+%define %%q2 rsp+(%%tspoff+6)*mmsize
+%define %%q3 rsp+(%%tspoff+7)*mmsize
+
+%if %2 < 16
+    movu                m0, [dst0q+strideq*0-8]
+    movu                m1, [dst0q+strideq*1-8]
+    movu                m2, [dst0q+strideq*2-8]
+    movu                m3, [dst0q+stride3q -8]
+    movu                m4, [dst4q+strideq*0-8]
+    movu                m5, [dst4q+strideq*1-8]
+    movu                m6, [dst4q+strideq*2-8]
+    movu                m7, [dst4q+stride3q -8]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, 12
+%else
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, [%%p0], [%%q0]
+%endif
+
+    mova            [%%p3], m0
+    mova            [%%p2], m1
+    mova            [%%p1], m2
+    mova            [%%p0], m3
+%if ARCH_X86_64
+    mova            [%%q0], m4
+%endif
+    mova            [%%q1], m5
+    mova            [%%q2], m6
+    mova            [%%q3], m7
+
+    ; FIXME investigate if we can _not_ load q0-3 below if h, and adjust register
+    ; order here accordingly
+%else ; %2 == 16
+
+%define %%p7 rsp+(%%tspoff+ 8)*mmsize
+%define %%p6 rsp+(%%tspoff+ 9)*mmsize
+%define %%p5 rsp+(%%tspoff+10)*mmsize
+%define %%p4 rsp+(%%tspoff+11)*mmsize
+%define %%q4 rsp+(%%tspoff+12)*mmsize
+%define %%q5 rsp+(%%tspoff+13)*mmsize
+%define %%q6 rsp+(%%tspoff+14)*mmsize
+%define %%q7 rsp+(%%tspoff+15)*mmsize
+
+    mova                m0, [dst0q+strideq*0-16]
+    mova                m1, [dst0q+strideq*1-16]
+    mova                m2, [dst0q+strideq*2-16]
+    mova                m3, [dst0q+stride3q -16]
+    mova                m4, [dst4q+strideq*0-16]
+    mova                m5, [dst4q+strideq*1-16]
+%if ARCH_X86_64
+    mova                m6, [dst4q+strideq*2-16]
+%endif
+    mova                m7, [dst4q+stride3q -16]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, 12
+%else
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, [dst4q+strideq*2-16], [%%p3], 1
+%endif
+
+    mova            [%%p7], m0
+    mova            [%%p6], m1
+    mova            [%%p5], m2
+    mova            [%%p4], m3
+%if ARCH_X86_64
+    mova            [%%p3], m4
+%endif
+    mova            [%%p2], m5
+    mova            [%%p1], m6
+    mova            [%%p0], m7
+
+    mova                m0, [dst0q+strideq*0]
+    mova                m1, [dst0q+strideq*1]
+    mova                m2, [dst0q+strideq*2]
+    mova                m3, [dst0q+stride3q ]
+    mova                m4, [dst4q+strideq*0]
+    mova                m5, [dst4q+strideq*1]
+%if ARCH_X86_64
+    mova                m6, [dst4q+strideq*2]
+%endif
+    mova                m7, [dst4q+stride3q ]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, 12
+%else
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, [dst4q+strideq*2], [%%q4], 1
+%endif
+
+    mova            [%%q0], m0
+    mova            [%%q1], m1
+    mova            [%%q2], m2
+    mova            [%%q3], m3
+%if ARCH_X86_64
+    mova            [%%q4], m4
+%endif
+    mova            [%%q5], m5
+    mova            [%%q6], m6
+    mova            [%%q7], m7
+
+    ; FIXME investigate if we can _not_ load q0|q4-7 below if h, and adjust register
+    ; order here accordingly
+%endif ; %2
+%endif ; %1
+
+    ; load q0|q4-7 data
+    mova                m0, [%%q0]
+%if %2 == 16
+    mova                m4, [%%q4]
+    mova                m5, [%%q5]
+    mova                m6, [%%q6]
+    mova                m7, [%%q7]
+
+    ; flat8out q portion
+    FLAT8OUT_HALF
+    SCRATCH              7, 15, rsp+(%%off+6)*mmsize, F8O
+%endif
+
+    ; load q1-3 data
+    mova                m1, [%%q1]
+    mova                m2, [%%q2]
+    mova                m3, [%%q3]
+
+    ; r6-8|pw_4[m8-11]=reg_E/I/H/F
+    ; r9[m15]=!flatout[q]
+    ; m12-14=free
+    ; m0-3=q0-q3
+    ; m4-7=free
+
+    ; flat8in|fm|hev q portion
+    FLAT8IN_HALF        %2
+    SCRATCH              7, 13, rsp+(%%off+4)*mmsize, HEV
+%if %2 > 4
+    SCRATCH              4, 14, rsp+(%%off+5)*mmsize, F8I
+%endif
+
+    ; r6-8|pw_4[m8-11]=reg_E/I/H/F
+    ; r9[m15]=!flat8out[q]
+    ; r10[m13]=hev[q]
+    ; r11[m14]=!flat8in[q]
+    ; m2=!fm[q]
+    ; m0,1=q0-q1
+    ; m2-7=free
+    ; m12=free
+
+    ; load p0-1
+    mova                m3, [%%p0]
+    mova                m4, [%%p1]
+
+    ; fm mb_edge portion
+    psubw               m5, m3, m0                  ; q0-p0
+    psubw               m6, m4, m1                  ; q1-p1
+%if ARCH_X86_64
+    ABS2                m5, m6, m7, m12             ; abs(q0-p0) | abs(q1-p1)
+%else
+    ABS1                m5, m7                      ; abs(q0-p0)
+    ABS1                m6, m7                      ; abs(q1-p1)
+%endif
+    paddw               m5, m5
+    psraw               m6, 1
+    paddw               m6, m5                      ; abs(q0-p0)*2+(abs(q1-p1)>>1)
+    pcmpgtw             m6, reg_E
+    por                 m2, m6
+    SCRATCH              2, 12, rsp+(%%off+3)*mmsize, FM
+
+    ; r6-8|pw_4[m8-11]=reg_E/I/H/F
+    ; r9[m15]=!flat8out[q]
+    ; r10[m13]=hev[q]
+    ; r11[m14]=!flat8in[q]
+    ; r12[m12]=!fm[q]
+    ; m3-4=q0-1
+    ; m0-2/5-7=free
+
+    ; load p4-7 data
+    SWAP                 3, 0                       ; p0
+    SWAP                 4, 1                       ; p1
+%if %2 == 16
+    mova                m7, [%%p7]
+    mova                m6, [%%p6]
+    mova                m5, [%%p5]
+    mova                m4, [%%p4]
+
+    ; flat8out p portion
+    FLAT8OUT_HALF
+    por                 m7, reg_F8O
+    SCRATCH              7, 15, rsp+(%%off+6)*mmsize, F8O
+%endif
+
+    ; r6-8|pw_4[m8-11]=reg_E/I/H/F
+    ; r9[m15]=!flat8out
+    ; r10[m13]=hev[q]
+    ; r11[m14]=!flat8in[q]
+    ; r12[m12]=!fm[q]
+    ; m0=p0
+    ; m1-7=free
+
+    ; load p2-3 data
+    mova                m2, [%%p2]
+    mova                m3, [%%p3]
+
+    ; flat8in|fm|hev p portion
+    FLAT8IN_HALF        %2
+    por                 m7, reg_HEV
+%if %2 > 4
+    por                 m4, reg_F8I
+%endif
+    por                 m2, reg_FM
+%if %2 > 4
+    por                 m4, m2                      ; !flat8|!fm
+%if %2 == 16
+    por                 m5, m4, reg_F8O             ; !flat16|!fm
+    pandn               m2, m4                      ; filter4_mask
+    pandn               m4, m5                      ; filter8_mask
+    pxor                m5, [pw_m1]                 ; filter16_mask
+    SCRATCH              5, 15, rsp+(%%off+6)*mmsize, F16M
+%else
+    pandn               m2, m4                      ; filter4_mask
+    pxor                m4, [pw_m1]                 ; filter8_mask
+%endif
+    SCRATCH              4, 14, rsp+(%%off+5)*mmsize, F8M
+%else
+    pxor                m2, [pw_m1]                 ; filter4_mask
+%endif
+    SCRATCH              7, 13, rsp+(%%off+4)*mmsize, HEV
+    SCRATCH              2, 12, rsp+(%%off+3)*mmsize, F4M
+
+    ; r9[m15]=filter16_mask
+    ; r10[m13]=hev
+    ; r11[m14]=filter8_mask
+    ; r12[m12]=filter4_mask
+    ; m0,1=p0-p1
+    ; m2-7=free
+    ; m8-11=free
+
+%if %2 > 4
+%if %2 == 16
+    ; filter_14
+    mova                m2, [%%p7]
+    mova                m3, [%%p6]
+    mova                m6, [%%p5]
+    mova                m7, [%%p4]
+    PRELOAD              8, %%p3, P3
+    PRELOAD              9, %%p2, P2
+%endif
+    PRELOAD             10, %%q0, Q0
+    PRELOAD             11, %%q1, Q1
+%if %2 == 16
+    psllw               m4, m2, 3
+    paddw               m5, m3, m3
+    paddw               m4, m6
+    paddw               m5, m7
+    paddw               m4, reg_P3
+    paddw               m5, reg_P2
+    paddw               m4, m1
+    paddw               m5, m0
+    paddw               m4, reg_Q0                  ; q0+p1+p3+p5+p7*8
+    psubw               m5, m2                      ; p0+p2+p4+p6*2-p7
+    paddw               m4, [pw_8]
+    paddw               m5, m4                      ; q0+p0+p1+p2+p3+p4+p5+p6*2+p7*7+8
+
+    ; below, we use r0-5 for storing pre-filter pixels for subsequent subtraction
+    ; at the end of the filter
+
+    mova    [rsp+0*mmsize], m3
+    FILTER_STEP         m4, m5, F16M, 4, %%p6, m3,     m2,             m6,     reg_Q1
+%endif
+    mova                m3, [%%q2]
+%if %2 == 16
+    mova    [rsp+1*mmsize], m6
+    FILTER_STEP         m4, m5, F16M, 4, %%p5, m6,     m2,             m7,     m3
+%endif
+    mova                m6, [%%q3]
+%if %2 == 16
+    mova    [rsp+2*mmsize], m7
+    FILTER_STEP         m4, m5, F16M, 4, %%p4, m7,     m2,             reg_P3, m6
+    mova                m7, [%%q4]
+%if ARCH_X86_64
+    mova    [rsp+3*mmsize], reg_P3
+%else
+    mova                m4, reg_P3
+    mova    [rsp+3*mmsize], m4
+%endif
+    FILTER_STEP         m4, m5, F16M, 4, %%p3, reg_P3, m2,             reg_P2, m7
+    PRELOAD              8, %%q5, Q5
+%if ARCH_X86_64
+    mova    [rsp+4*mmsize], reg_P2
+%else
+    mova                m4, reg_P2
+    mova    [rsp+4*mmsize], m4
+%endif
+    FILTER_STEP         m4, m5, F16M, 4, %%p2, reg_P2, m2,             m1,     reg_Q5
+    PRELOAD              9, %%q6, Q6
+    mova    [rsp+5*mmsize], m1
+    FILTER_STEP         m4, m5, F16M, 4, %%p1, m1,     m2,             m0,     reg_Q6
+    mova                m1, [%%q7]
+    FILTER_STEP         m4, m5, F16M, 4, %%p0, m0,     m2,             reg_Q0, m1,     1
+    FILTER_STEP         m4, m5, F16M, 4, %%q0, reg_Q0, [rsp+0*mmsize], reg_Q1, m1,     ARCH_X86_64
+    FILTER_STEP         m4, m5, F16M, 4, %%q1, reg_Q1, [rsp+1*mmsize], m3,     m1,     ARCH_X86_64
+    FILTER_STEP         m4, m5, F16M, 4, %%q2, m3,     [rsp+2*mmsize], m6,     m1,     1
+    FILTER_STEP         m4, m5, F16M, 4, %%q3, m6,     [rsp+3*mmsize], m7,     m1
+    FILTER_STEP         m4, m5, F16M, 4, %%q4, m7,     [rsp+4*mmsize], reg_Q5, m1
+    FILTER_STEP         m4, m5, F16M, 4, %%q5, reg_Q5, [rsp+5*mmsize], reg_Q6, m1
+    FILTER_STEP         m4, m5, F16M, 4, %%q6, reg_Q6
+
+    mova                m7, [%%p1]
+%else
+    SWAP                 1, 7
+%endif
+
+    mova                m2, [%%p3]
+    mova                m1, [%%p2]
+
+    ; reg_Q0-1 (m10-m11)
+    ; m0=p0
+    ; m1=p2
+    ; m2=p3
+    ; m3=q2
+    ; m4-5=free
+    ; m6=q3
+    ; m7=p1
+    ; m8-9 unused
+
+    ; filter_6
+    psllw               m4, m2, 2
+    paddw               m5, m1, m1
+    paddw               m4, m7
+    psubw               m5, m2
+    paddw               m4, m0
+    paddw               m5, reg_Q0
+    paddw               m4, [pw_4]
+    paddw               m5, m4
+
+%if ARCH_X86_64
+    mova                m8, m1
+    mova                m9, m7
+%else
+    mova    [rsp+0*mmsize], m1
+    mova    [rsp+1*mmsize], m7
+%endif
+%ifidn %1, v
+    FILTER_STEP         m4, m5, F8M, 3, %%p2, m1,     m2,             m7,     reg_Q1
+%else
+    FILTER_STEP         m4, m5, F8M, 3, %%p2, m1,     m2,             m7,     reg_Q1, 1
+%endif
+    FILTER_STEP         m4, m5, F8M, 3, %%p1, m7,     m2,             m0,     m3, 1
+    FILTER_STEP         m4, m5, F8M, 3, %%p0, m0,     m2,             reg_Q0, m6, 1
+%if ARCH_X86_64
+    FILTER_STEP         m4, m5, F8M, 3, %%q0, reg_Q0, m8,             reg_Q1, m6, ARCH_X86_64
+    FILTER_STEP         m4, m5, F8M, 3, %%q1, reg_Q1, m9,             m3,     m6, ARCH_X86_64
+%else
+    FILTER_STEP         m4, m5, F8M, 3, %%q0, reg_Q0, [rsp+0*mmsize], reg_Q1, m6, ARCH_X86_64
+    FILTER_STEP         m4, m5, F8M, 3, %%q1, reg_Q1, [rsp+1*mmsize], m3,     m6, ARCH_X86_64
+%endif
+    FILTER_STEP         m4, m5, F8M, 3, %%q2, m3
+
+    UNSCRATCH            2, 10, %%q0
+    UNSCRATCH            6, 11, %%q1
+%else
+    SWAP                 1, 7
+    mova                m2, [%%q0]
+    mova                m6, [%%q1]
+%endif
+    UNSCRATCH            3, 13, rsp+(%%off+4)*mmsize, HEV
+
+    ; m0=p0
+    ; m1=p2
+    ; m2=q0
+    ; m3=hev_mask
+    ; m4-5=free
+    ; m6=q1
+    ; m7=p1
+
+    ; filter_4
+    psubw               m4, m7, m6              ; p1-q1
+    psubw               m5, m2, m0              ; q0-p0
+    pand                m4, m3
+    pminsw              m4, [pw_ %+ %%maxsgn]
+    pmaxsw              m4, [pw_ %+ %%minsgn]   ; clip_intp2(p1-q1, 9) -> f
+    paddw               m4, m5
+    paddw               m5, m5
+    paddw               m4, m5                  ; 3*(q0-p0)+f
+    pminsw              m4, [pw_ %+ %%maxsgn]
+    pmaxsw              m4, [pw_ %+ %%minsgn]   ; clip_intp2(3*(q0-p0)+f, 9) -> f
+    pand                m4, reg_F4M
+    paddw               m5, m4, [pw_4]
+    paddw               m4, [pw_3]
+    pminsw              m5, [pw_ %+ %%maxsgn]
+    pminsw              m4, [pw_ %+ %%maxsgn]
+    psraw               m5, 3                   ; min_intp2(f+4, 9)>>3 -> f1
+    psraw               m4, 3                   ; min_intp2(f+3, 9)>>3 -> f2
+    psubw               m2, m5                  ; q0-f1
+    paddw               m0, m4                  ; p0+f2
+    pandn               m3, m5                  ; f1 & !hev (for p1/q1 adj)
+    pxor                m4, m4
+    mova                m5, [pw_ %+ %%maxusgn]
+    pmaxsw              m2, m4
+    pmaxsw              m0, m4
+    pminsw              m2, m5
+    pminsw              m0, m5
+%if cpuflag(ssse3)
+    pmulhrsw            m3, [pw_16384]          ; (f1+1)>>1
+%else
+    paddw               m3, [pw_1]
+    psraw               m3, 1
+%endif
+    paddw               m7, m3                  ; p1+f
+    psubw               m6, m3                  ; q1-f
+    pmaxsw              m7, m4
+    pmaxsw              m6, m4
+    pminsw              m7, m5
+    pminsw              m6, m5
+
+    ; store
+%ifidn %1, v
+    mova            [%%p1], m7
+    mova            [%%p0], m0
+    mova            [%%q0], m2
+    mova            [%%q1], m6
+%else ; %1 == h
+%if %2 == 4
+    TRANSPOSE4x4W        7, 0, 2, 6, 1
+    movh   [dst0q+strideq*0-4], m7
+    movhps [dst0q+strideq*1-4], m7
+    movh   [dst0q+strideq*2-4], m0
+    movhps [dst0q+stride3q -4], m0
+    movh   [dst4q+strideq*0-4], m2
+    movhps [dst4q+strideq*1-4], m2
+    movh   [dst4q+strideq*2-4], m6
+    movhps [dst4q+stride3q -4], m6
+%elif %2 == 8
+    mova                m3, [%%p3]
+    mova                m4, [%%q2]
+    mova                m5, [%%q3]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        3, 1, 7, 0, 2, 6, 4, 5, 8
+%else
+    TRANSPOSE8x8W        3, 1, 7, 0, 2, 6, 4, 5, [%%q2], [%%q0], 1
+    mova                m2, [%%q0]
+%endif
+
+    movu [dst0q+strideq*0-8], m3
+    movu [dst0q+strideq*1-8], m1
+    movu [dst0q+strideq*2-8], m7
+    movu [dst0q+stride3q -8], m0
+    movu [dst4q+strideq*0-8], m2
+    movu [dst4q+strideq*1-8], m6
+    movu [dst4q+strideq*2-8], m4
+    movu [dst4q+stride3q -8], m5
+%else ; %2 == 16
+    SCRATCH              2, 8, %%q0
+    SCRATCH              6, 9, %%q1
+    mova                m2, [%%p7]
+    mova                m3, [%%p6]
+    mova                m4, [%%p5]
+    mova                m5, [%%p4]
+    mova                m6, [%%p3]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        2, 3, 4, 5, 6, 1, 7, 0, 10
+%else
+    mova            [%%p1], m7
+    TRANSPOSE8x8W        2, 3, 4, 5, 6, 1, 7, 0, [%%p1], [dst4q+strideq*0-16], 1
+%endif
+
+    mova [dst0q+strideq*0-16], m2
+    mova [dst0q+strideq*1-16], m3
+    mova [dst0q+strideq*2-16], m4
+    mova [dst0q+stride3q -16], m5
+%if ARCH_X86_64
+    mova [dst4q+strideq*0-16], m6
+%endif
+    mova [dst4q+strideq*1-16], m1
+    mova [dst4q+strideq*2-16], m7
+    mova [dst4q+stride3q -16], m0
+
+    UNSCRATCH            2, 8, %%q0
+    UNSCRATCH            6, 9, %%q1
+    mova                m0, [%%q2]
+    mova                m1, [%%q3]
+    mova                m3, [%%q4]
+    mova                m4, [%%q5]
+%if ARCH_X86_64
+    mova                m5, [%%q6]
+%endif
+    mova                m7, [%%q7]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        2, 6, 0, 1, 3, 4, 5, 7, 8
+%else
+    TRANSPOSE8x8W        2, 6, 0, 1, 3, 4, 5, 7, [%%q6], [dst4q+strideq*0], 1
+%endif
+
+    mova [dst0q+strideq*0], m2
+    mova [dst0q+strideq*1], m6
+    mova [dst0q+strideq*2], m0
+    mova [dst0q+stride3q ], m1
+%if ARCH_X86_64
+    mova [dst4q+strideq*0], m3
+%endif
+    mova [dst4q+strideq*1], m4
+    mova [dst4q+strideq*2], m5
+    mova [dst4q+stride3q ], m7
+%endif ; %2
+%endif ; %1
+    RET
+%endmacro
+
+%macro LOOP_FILTER_CPUSETS 3
+INIT_XMM sse2
+LOOP_FILTER %1, %2, %3
+INIT_XMM ssse3
+LOOP_FILTER %1, %2, %3
+INIT_XMM avx
+LOOP_FILTER %1, %2, %3
+%endmacro
+
+%macro LOOP_FILTER_WDSETS 2
+LOOP_FILTER_CPUSETS %1,  4, %2
+LOOP_FILTER_CPUSETS %1,  8, %2
+LOOP_FILTER_CPUSETS %1, 16, %2
+%endmacro
+
+LOOP_FILTER_WDSETS h, 10
+LOOP_FILTER_WDSETS v, 10
+LOOP_FILTER_WDSETS h, 12
+LOOP_FILTER_WDSETS v, 12
diff --git a/media/ffvpx/libavcodec/x86/vp9mc.asm b/media/ffvpx/libavcodec/x86/vp9mc.asm
new file mode 100644
index 0000000000..efc4cfbef1
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9mc.asm
@@ -0,0 +1,680 @@
+;******************************************************************************
+;* VP9 motion compensation SIMD optimizations
+;*
+;* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+cextern pw_256
+cextern pw_64
+
+%macro F8_SSSE3_TAPS 8
+times 16 db %1, %2
+times 16 db %3, %4
+times 16 db %5, %6
+times 16 db %7, %8
+%endmacro
+
+%macro F8_SSE2_TAPS 8
+times 8 dw %1
+times 8 dw %2
+times 8 dw %3
+times 8 dw %4
+times 8 dw %5
+times 8 dw %6
+times 8 dw %7
+times 8 dw %8
+%endmacro
+
+%macro F8_16BPP_TAPS 8
+times 8 dw %1, %2
+times 8 dw %3, %4
+times 8 dw %5, %6
+times 8 dw %7, %8
+%endmacro
+
+%macro FILTER 1
+const filters_%1 ; smooth
+                    F8_TAPS -3, -1,  32,  64,  38,   1, -3,  0
+                    F8_TAPS -2, -2,  29,  63,  41,   2, -3,  0
+                    F8_TAPS -2, -2,  26,  63,  43,   4, -4,  0
+                    F8_TAPS -2, -3,  24,  62,  46,   5, -4,  0
+                    F8_TAPS -2, -3,  21,  60,  49,   7, -4,  0
+                    F8_TAPS -1, -4,  18,  59,  51,   9, -4,  0
+                    F8_TAPS -1, -4,  16,  57,  53,  12, -4, -1
+                    F8_TAPS -1, -4,  14,  55,  55,  14, -4, -1
+                    F8_TAPS -1, -4,  12,  53,  57,  16, -4, -1
+                    F8_TAPS  0, -4,   9,  51,  59,  18, -4, -1
+                    F8_TAPS  0, -4,   7,  49,  60,  21, -3, -2
+                    F8_TAPS  0, -4,   5,  46,  62,  24, -3, -2
+                    F8_TAPS  0, -4,   4,  43,  63,  26, -2, -2
+                    F8_TAPS  0, -3,   2,  41,  63,  29, -2, -2
+                    F8_TAPS  0, -3,   1,  38,  64,  32, -1, -3
+                    ; regular
+                    F8_TAPS  0,  1,  -5, 126,   8,  -3,  1,  0
+                    F8_TAPS -1,  3, -10, 122,  18,  -6,  2,  0
+                    F8_TAPS -1,  4, -13, 118,  27,  -9,  3, -1
+                    F8_TAPS -1,  4, -16, 112,  37, -11,  4, -1
+                    F8_TAPS -1,  5, -18, 105,  48, -14,  4, -1
+                    F8_TAPS -1,  5, -19,  97,  58, -16,  5, -1
+                    F8_TAPS -1,  6, -19,  88,  68, -18,  5, -1
+                    F8_TAPS -1,  6, -19,  78,  78, -19,  6, -1
+                    F8_TAPS -1,  5, -18,  68,  88, -19,  6, -1
+                    F8_TAPS -1,  5, -16,  58,  97, -19,  5, -1
+                    F8_TAPS -1,  4, -14,  48, 105, -18,  5, -1
+                    F8_TAPS -1,  4, -11,  37, 112, -16,  4, -1
+                    F8_TAPS -1,  3,  -9,  27, 118, -13,  4, -1
+                    F8_TAPS  0,  2,  -6,  18, 122, -10,  3, -1
+                    F8_TAPS  0,  1,  -3,   8, 126,  -5,  1,  0
+                    ; sharp
+                    F8_TAPS -1,  3,  -7, 127,   8,  -3,  1,  0
+                    F8_TAPS -2,  5, -13, 125,  17,  -6,  3, -1
+                    F8_TAPS -3,  7, -17, 121,  27, -10,  5, -2
+                    F8_TAPS -4,  9, -20, 115,  37, -13,  6, -2
+                    F8_TAPS -4, 10, -23, 108,  48, -16,  8, -3
+                    F8_TAPS -4, 10, -24, 100,  59, -19,  9, -3
+                    F8_TAPS -4, 11, -24,  90,  70, -21, 10, -4
+                    F8_TAPS -4, 11, -23,  80,  80, -23, 11, -4
+                    F8_TAPS -4, 10, -21,  70,  90, -24, 11, -4
+                    F8_TAPS -3,  9, -19,  59, 100, -24, 10, -4
+                    F8_TAPS -3,  8, -16,  48, 108, -23, 10, -4
+                    F8_TAPS -2,  6, -13,  37, 115, -20,  9, -4
+                    F8_TAPS -2,  5, -10,  27, 121, -17,  7, -3
+                    F8_TAPS -1,  3,  -6,  17, 125, -13,  5, -2
+                    F8_TAPS  0,  1,  -3,   8, 127,  -7,  3, -1
+%endmacro
+
+%define F8_TAPS F8_SSSE3_TAPS
+; int8_t ff_filters_ssse3[3][15][4][32]
+FILTER ssse3
+%define F8_TAPS F8_SSE2_TAPS
+; int16_t ff_filters_sse2[3][15][8][8]
+FILTER sse2
+%define F8_TAPS F8_16BPP_TAPS
+; int16_t ff_filters_16bpp[3][15][4][16]
+FILTER 16bpp
+
+SECTION .text
+
+%macro filter_sse2_h_fn 1
+%assign %%px mmsize/2
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 15, dst, dstride, src, sstride, h, filtery
+    pxor        m5, m5
+    mova        m6, [pw_64]
+    mova        m7, [filteryq+  0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+ 16]
+    mova        m9, [filteryq+ 32]
+    mova       m10, [filteryq+ 48]
+    mova       m11, [filteryq+ 64]
+    mova       m12, [filteryq+ 80]
+    mova       m13, [filteryq+ 96]
+    mova       m14, [filteryq+112]
+%endif
+.loop:
+    movh        m0, [srcq-3]
+    movh        m1, [srcq-2]
+    movh        m2, [srcq-1]
+    movh        m3, [srcq+0]
+    movh        m4, [srcq+1]
+    punpcklbw   m0, m5
+    punpcklbw   m1, m5
+    punpcklbw   m2, m5
+    punpcklbw   m3, m5
+    punpcklbw   m4, m5
+    pmullw      m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmullw      m1, m8
+    pmullw      m2, m9
+    pmullw      m3, m10
+    pmullw      m4, m11
+%else
+    pmullw      m1, [filteryq+ 16]
+    pmullw      m2, [filteryq+ 32]
+    pmullw      m3, [filteryq+ 48]
+    pmullw      m4, [filteryq+ 64]
+%endif
+    paddw       m0, m1
+    paddw       m2, m3
+    paddw       m0, m4
+    movh        m1, [srcq+2]
+    movh        m3, [srcq+3]
+    movh        m4, [srcq+4]
+    add       srcq, sstrideq
+    punpcklbw   m1, m5
+    punpcklbw   m3, m5
+    punpcklbw   m4, m5
+%if ARCH_X86_64 && mmsize > 8
+    pmullw      m1, m12
+    pmullw      m3, m13
+    pmullw      m4, m14
+%else
+    pmullw      m1, [filteryq+ 80]
+    pmullw      m3, [filteryq+ 96]
+    pmullw      m4, [filteryq+112]
+%endif
+    paddw       m0, m1
+    paddw       m3, m4
+    paddw       m0, m6
+    paddw       m2, m3
+    paddsw      m0, m2
+    psraw       m0, 7
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    packuswb    m0, m0
+%ifidn %1, avg
+    pavgb       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_MMX mmxext
+filter_sse2_h_fn put
+filter_sse2_h_fn avg
+
+INIT_XMM sse2
+filter_sse2_h_fn put
+filter_sse2_h_fn avg
+
+%macro filter_h_fn 1
+%assign %%px mmsize/2
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 11, dst, dstride, src, sstride, h, filtery
+    mova        m6, [pw_256]
+    mova        m7, [filteryq+ 0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+32]
+    mova        m9, [filteryq+64]
+    mova       m10, [filteryq+96]
+%endif
+.loop:
+    movh        m0, [srcq-3]
+    movh        m1, [srcq-2]
+    movh        m2, [srcq-1]
+    movh        m3, [srcq+0]
+    movh        m4, [srcq+1]
+    movh        m5, [srcq+2]
+    punpcklbw   m0, m1
+    punpcklbw   m2, m3
+    movh        m1, [srcq+3]
+    movh        m3, [srcq+4]
+    add       srcq, sstrideq
+    punpcklbw   m4, m5
+    punpcklbw   m1, m3
+    pmaddubsw   m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddubsw   m2, m8
+    pmaddubsw   m4, m9
+    pmaddubsw   m1, m10
+%else
+    pmaddubsw   m2, [filteryq+32]
+    pmaddubsw   m4, [filteryq+64]
+    pmaddubsw   m1, [filteryq+96]
+%endif
+    paddw       m0, m4
+    paddw       m2, m1
+    paddsw      m0, m2
+    pmulhrsw    m0, m6
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    packuswb    m0, m0
+%ifidn %1, avg
+    pavgb       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_MMX ssse3
+filter_h_fn put
+filter_h_fn avg
+
+INIT_XMM ssse3
+filter_h_fn put
+filter_h_fn avg
+
+%if ARCH_X86_64
+%macro filter_hx2_fn 1
+%assign %%px mmsize
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 14, dst, dstride, src, sstride, h, filtery
+    mova       m13, [pw_256]
+    mova        m8, [filteryq+ 0]
+    mova        m9, [filteryq+32]
+    mova       m10, [filteryq+64]
+    mova       m11, [filteryq+96]
+.loop:
+    movu        m0, [srcq-3]
+    movu        m1, [srcq-2]
+    movu        m2, [srcq-1]
+    movu        m3, [srcq+0]
+    movu        m4, [srcq+1]
+    movu        m5, [srcq+2]
+    movu        m6, [srcq+3]
+    movu        m7, [srcq+4]
+    add       srcq, sstrideq
+    SBUTTERFLY  bw, 0, 1, 12
+    SBUTTERFLY  bw, 2, 3, 12
+    SBUTTERFLY  bw, 4, 5, 12
+    SBUTTERFLY  bw, 6, 7, 12
+    pmaddubsw   m0, m8
+    pmaddubsw   m1, m8
+    pmaddubsw   m2, m9
+    pmaddubsw   m3, m9
+    pmaddubsw   m4, m10
+    pmaddubsw   m5, m10
+    pmaddubsw   m6, m11
+    pmaddubsw   m7, m11
+    paddw       m0, m4
+    paddw       m1, m5
+    paddw       m2, m6
+    paddw       m3, m7
+    paddsw      m0, m2
+    paddsw      m1, m3
+    pmulhrsw    m0, m13
+    pmulhrsw    m1, m13
+    packuswb    m0, m1
+%ifidn %1, avg
+    pavgb       m0, [dstq]
+%endif
+    mova    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM ssse3
+filter_hx2_fn put
+filter_hx2_fn avg
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+filter_hx2_fn put
+filter_hx2_fn avg
+%endif
+
+%endif ; ARCH_X86_64
+
+%macro filter_sse2_v_fn 1
+%assign %%px mmsize/2
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 6, 8, 15, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 15, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%define hd r4mp
+%endif
+    pxor        m5, m5
+    mova        m6, [pw_64]
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
+    mova        m7, [filteryq+  0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+ 16]
+    mova        m9, [filteryq+ 32]
+    mova       m10, [filteryq+ 48]
+    mova       m11, [filteryq+ 64]
+    mova       m12, [filteryq+ 80]
+    mova       m13, [filteryq+ 96]
+    mova       m14, [filteryq+112]
+%endif
+.loop:
+    ; FIXME maybe reuse loads from previous rows, or just
+    ; more generally unroll this to prevent multiple loads of
+    ; the same data?
+    movh        m0, [srcq]
+    movh        m1, [srcq+sstrideq]
+    movh        m2, [srcq+sstrideq*2]
+    movh        m3, [srcq+sstride3q]
+    add       srcq, sstrideq
+    movh        m4, [src4q]
+    punpcklbw   m0, m5
+    punpcklbw   m1, m5
+    punpcklbw   m2, m5
+    punpcklbw   m3, m5
+    punpcklbw   m4, m5
+    pmullw      m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmullw      m1, m8
+    pmullw      m2, m9
+    pmullw      m3, m10
+    pmullw      m4, m11
+%else
+    pmullw      m1, [filteryq+ 16]
+    pmullw      m2, [filteryq+ 32]
+    pmullw      m3, [filteryq+ 48]
+    pmullw      m4, [filteryq+ 64]
+%endif
+    paddw       m0, m1
+    paddw       m2, m3
+    paddw       m0, m4
+    movh        m1, [src4q+sstrideq]
+    movh        m3, [src4q+sstrideq*2]
+    movh        m4, [src4q+sstride3q]
+    add      src4q, sstrideq
+    punpcklbw   m1, m5
+    punpcklbw   m3, m5
+    punpcklbw   m4, m5
+%if ARCH_X86_64 && mmsize > 8
+    pmullw      m1, m12
+    pmullw      m3, m13
+    pmullw      m4, m14
+%else
+    pmullw      m1, [filteryq+ 80]
+    pmullw      m3, [filteryq+ 96]
+    pmullw      m4, [filteryq+112]
+%endif
+    paddw       m0, m1
+    paddw       m3, m4
+    paddw       m0, m6
+    paddw       m2, m3
+    paddsw      m0, m2
+    psraw       m0, 7
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    packuswb    m0, m0
+%ifidn %1, avg
+    pavgb       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_MMX mmxext
+filter_sse2_v_fn put
+filter_sse2_v_fn avg
+
+INIT_XMM sse2
+filter_sse2_v_fn put
+filter_sse2_v_fn avg
+
+%macro filter_v_fn 1
+%assign %%px mmsize/2
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 6, 8, 11, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 11, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%define hd r4mp
+%endif
+    mova        m6, [pw_256]
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
+    mova        m7, [filteryq+ 0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+32]
+    mova        m9, [filteryq+64]
+    mova       m10, [filteryq+96]
+%endif
+.loop:
+    ; FIXME maybe reuse loads from previous rows, or just more generally
+    ; unroll this to prevent multiple loads of the same data?
+    movh        m0, [srcq]
+    movh        m1, [srcq+sstrideq]
+    movh        m2, [srcq+sstrideq*2]
+    movh        m3, [srcq+sstride3q]
+    movh        m4, [src4q]
+    movh        m5, [src4q+sstrideq]
+    punpcklbw   m0, m1
+    punpcklbw   m2, m3
+    movh        m1, [src4q+sstrideq*2]
+    movh        m3, [src4q+sstride3q]
+    add       srcq, sstrideq
+    add      src4q, sstrideq
+    punpcklbw   m4, m5
+    punpcklbw   m1, m3
+    pmaddubsw   m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddubsw   m2, m8
+    pmaddubsw   m4, m9
+    pmaddubsw   m1, m10
+%else
+    pmaddubsw   m2, [filteryq+32]
+    pmaddubsw   m4, [filteryq+64]
+    pmaddubsw   m1, [filteryq+96]
+%endif
+    paddw       m0, m4
+    paddw       m2, m1
+    paddsw      m0, m2
+    pmulhrsw    m0, m6
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    packuswb    m0, m0
+%ifidn %1, avg
+    pavgb       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_MMX ssse3
+filter_v_fn put
+filter_v_fn avg
+
+INIT_XMM ssse3
+filter_v_fn put
+filter_v_fn avg
+
+%if ARCH_X86_64
+
+%macro filter_vx2_fn 1
+%assign %%px mmsize
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 6, 8, 14, dst, dstride, src, sstride, h, filtery, src4, sstride3
+    mova       m13, [pw_256]
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
+    mova        m8, [filteryq+ 0]
+    mova        m9, [filteryq+32]
+    mova       m10, [filteryq+64]
+    mova       m11, [filteryq+96]
+.loop:
+    ; FIXME maybe reuse loads from previous rows, or just
+    ; more generally unroll this to prevent multiple loads of
+    ; the same data?
+    movu        m0, [srcq]
+    movu        m1, [srcq+sstrideq]
+    movu        m2, [srcq+sstrideq*2]
+    movu        m3, [srcq+sstride3q]
+    movu        m4, [src4q]
+    movu        m5, [src4q+sstrideq]
+    movu        m6, [src4q+sstrideq*2]
+    movu        m7, [src4q+sstride3q]
+    add       srcq, sstrideq
+    add      src4q, sstrideq
+    SBUTTERFLY  bw, 0, 1, 12
+    SBUTTERFLY  bw, 2, 3, 12
+    SBUTTERFLY  bw, 4, 5, 12
+    SBUTTERFLY  bw, 6, 7, 12
+    pmaddubsw   m0, m8
+    pmaddubsw   m1, m8
+    pmaddubsw   m2, m9
+    pmaddubsw   m3, m9
+    pmaddubsw   m4, m10
+    pmaddubsw   m5, m10
+    pmaddubsw   m6, m11
+    pmaddubsw   m7, m11
+    paddw       m0, m4
+    paddw       m1, m5
+    paddw       m2, m6
+    paddw       m3, m7
+    paddsw      m0, m2
+    paddsw      m1, m3
+    pmulhrsw    m0, m13
+    pmulhrsw    m1, m13
+    packuswb    m0, m1
+%ifidn %1, avg
+    pavgb       m0, [dstq]
+%endif
+    mova    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM ssse3
+filter_vx2_fn put
+filter_vx2_fn avg
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+filter_vx2_fn put
+filter_vx2_fn avg
+%endif
+
+%endif ; ARCH_X86_64
+
+%macro fpel_fn 6-8 0, 4
+%if %2 == 4
+%define %%srcfn movh
+%define %%dstfn movh
+%else
+%define %%srcfn movu
+%define %%dstfn mova
+%endif
+
+%if %7 == 8
+%define %%pavg pavgb
+%define %%szsuf _8
+%elif %7 == 16
+%define %%pavg pavgw
+%define %%szsuf _16
+%else
+%define %%szsuf
+%endif
+
+%if %2 <= mmsize
+cglobal vp9_%1%2 %+ %%szsuf, 5, 7, 4, dst, dstride, src, sstride, h, dstride3, sstride3
+    lea  sstride3q, [sstrideq*3]
+    lea  dstride3q, [dstrideq*3]
+%else
+cglobal vp9_%1%2 %+ %%szsuf, 5, 5, %8, dst, dstride, src, sstride, h
+%endif
+.loop:
+    %%srcfn     m0, [srcq]
+    %%srcfn     m1, [srcq+s%3]
+    %%srcfn     m2, [srcq+s%4]
+    %%srcfn     m3, [srcq+s%5]
+%if %2/mmsize == 8
+    %%srcfn     m4, [srcq+mmsize*4]
+    %%srcfn     m5, [srcq+mmsize*5]
+    %%srcfn     m6, [srcq+mmsize*6]
+    %%srcfn     m7, [srcq+mmsize*7]
+%endif
+    lea       srcq, [srcq+sstrideq*%6]
+%ifidn %1, avg
+    %%pavg      m0, [dstq]
+    %%pavg      m1, [dstq+d%3]
+    %%pavg      m2, [dstq+d%4]
+%if %2 == 4
+    %%srcfn     m4, [dstq+d%5]
+    %%pavg      m3, m4
+%else
+    %%pavg      m3, [dstq+d%5]
+%endif
+%if %2/mmsize == 8
+    %%pavg      m4, [dstq+mmsize*4]
+    %%pavg      m5, [dstq+mmsize*5]
+    %%pavg      m6, [dstq+mmsize*6]
+    %%pavg      m7, [dstq+mmsize*7]
+%endif
+%endif
+    %%dstfn [dstq], m0
+    %%dstfn [dstq+d%3], m1
+    %%dstfn [dstq+d%4], m2
+    %%dstfn [dstq+d%5], m3
+%if %2/mmsize == 8
+    %%dstfn [dstq+mmsize*4], m4
+    %%dstfn [dstq+mmsize*5], m5
+    %%dstfn [dstq+mmsize*6], m6
+    %%dstfn [dstq+mmsize*7], m7
+%endif
+    lea       dstq, [dstq+dstrideq*%6]
+    sub         hd, %6
+    jnz .loop
+    RET
+%endmacro
+
+%define d16 16
+%define s16 16
+%define d32 32
+%define s32 32
+INIT_MMX mmx
+fpel_fn put, 4,  strideq, strideq*2, stride3q, 4
+fpel_fn put, 8,  strideq, strideq*2, stride3q, 4
+INIT_MMX mmxext
+fpel_fn avg, 4,  strideq, strideq*2, stride3q, 4, 8
+fpel_fn avg, 8,  strideq, strideq*2, stride3q, 4, 8
+INIT_XMM sse
+fpel_fn put, 16, strideq, strideq*2, stride3q, 4
+fpel_fn put, 32, mmsize,  strideq,   strideq+mmsize, 2
+fpel_fn put, 64, mmsize,  mmsize*2,  mmsize*3, 1
+fpel_fn put, 128, mmsize, mmsize*2,  mmsize*3, 1, 0, 8
+INIT_XMM sse2
+fpel_fn avg, 16, strideq, strideq*2, stride3q, 4, 8
+fpel_fn avg, 32, mmsize,  strideq,   strideq+mmsize, 2, 8
+fpel_fn avg, 64, mmsize,  mmsize*2,  mmsize*3, 1, 8
+INIT_YMM avx
+fpel_fn put, 32, strideq, strideq*2, stride3q, 4
+fpel_fn put, 64, mmsize,  strideq,   strideq+mmsize, 2
+fpel_fn put, 128, mmsize, mmsize*2,     mmsize*3, 1
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+fpel_fn avg, 32, strideq, strideq*2, stride3q, 4, 8
+fpel_fn avg, 64, mmsize,  strideq,   strideq+mmsize, 2, 8
+%endif
+INIT_MMX mmxext
+fpel_fn avg,  8,  strideq, strideq*2, stride3q, 4, 16
+INIT_XMM sse2
+fpel_fn avg,  16, strideq, strideq*2, stride3q, 4, 16
+fpel_fn avg,  32, mmsize,  strideq,   strideq+mmsize, 2, 16
+fpel_fn avg,  64, mmsize,  mmsize*2,  mmsize*3, 1, 16
+fpel_fn avg, 128, mmsize,  mmsize*2,  mmsize*3, 1, 16, 8
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+fpel_fn avg,  32, strideq, strideq*2, stride3q, 4, 16
+fpel_fn avg,  64, mmsize,  strideq,   strideq+mmsize, 2, 16
+fpel_fn avg, 128, mmsize,  mmsize*2,  mmsize*3, 1, 16
+%endif
+%undef s16
+%undef d16
+%undef s32
+%undef d32
diff --git a/media/ffvpx/libavcodec/x86/vp9mc_16bpp.asm b/media/ffvpx/libavcodec/x86/vp9mc_16bpp.asm
new file mode 100644
index 0000000000..9a462eaf80
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9mc_16bpp.asm
@@ -0,0 +1,431 @@
+;******************************************************************************
+;* VP9 MC SIMD optimizations
+;*
+;* Copyright (c) 2015 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+pd_64: times 8 dd 64
+
+cextern pw_1023
+cextern pw_4095
+
+SECTION .text
+
+%macro filter_h4_fn 1-2 12
+cglobal vp9_%1_8tap_1d_h_4_10, 6, 6, %2, dst, dstride, src, sstride, h, filtery
+    mova        m5, [pw_1023]
+.body:
+%if notcpuflag(sse4) && ARCH_X86_64
+    pxor       m11, m11
+%endif
+    mova        m6, [pd_64]
+    mova        m7, [filteryq+ 0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+32]
+    mova        m9, [filteryq+64]
+    mova       m10, [filteryq+96]
+%endif
+.loop:
+    movh        m0, [srcq-6]
+    movh        m1, [srcq-4]
+    movh        m2, [srcq-2]
+    movh        m3, [srcq+0]
+    movh        m4, [srcq+2]
+    punpcklwd   m0, m1
+    punpcklwd   m2, m3
+    pmaddwd     m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m8
+%else
+    pmaddwd     m2, [filteryq+32]
+%endif
+    movu        m1, [srcq+4]
+    movu        m3, [srcq+6]
+    paddd       m0, m2
+    movu        m2, [srcq+8]
+    add       srcq, sstrideq
+    punpcklwd   m4, m1
+    punpcklwd   m3, m2
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m4, m9
+    pmaddwd     m3, m10
+%else
+    pmaddwd     m4, [filteryq+64]
+    pmaddwd     m3, [filteryq+96]
+%endif
+    paddd       m0, m4
+    paddd       m0, m3
+    paddd       m0, m6
+    psrad       m0, 7
+%if cpuflag(sse4)
+    packusdw    m0, m0
+%else
+    packssdw    m0, m0
+%endif
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    pminsw      m0, m5
+%if notcpuflag(sse4)
+%if ARCH_X86_64
+    pmaxsw      m0, m11
+%else
+    pxor        m2, m2
+    pmaxsw      m0, m2
+%endif
+%endif
+%ifidn %1, avg
+    pavgw       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+
+cglobal vp9_%1_8tap_1d_h_4_12, 6, 6, %2, dst, dstride, src, sstride, h, filtery
+    mova        m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_h_4_10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+filter_h4_fn put
+filter_h4_fn avg
+
+%macro filter_h_fn 1-2 12
+%assign %%px mmsize/2
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _10, 6, 6, %2, dst, dstride, src, sstride, h, filtery
+    mova        m5, [pw_1023]
+.body:
+%if notcpuflag(sse4) && ARCH_X86_64
+    pxor       m11, m11
+%endif
+    mova        m6, [pd_64]
+    mova        m7, [filteryq+ 0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+32]
+    mova        m9, [filteryq+64]
+    mova       m10, [filteryq+96]
+%endif
+.loop:
+    movu        m0, [srcq-6]
+    movu        m1, [srcq-4]
+    movu        m2, [srcq-2]
+    movu        m3, [srcq+0]
+    movu        m4, [srcq+2]
+    pmaddwd     m0, m7
+    pmaddwd     m1, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m8
+    pmaddwd     m3, m8
+    pmaddwd     m4, m9
+%else
+    pmaddwd     m2, [filteryq+32]
+    pmaddwd     m3, [filteryq+32]
+    pmaddwd     m4, [filteryq+64]
+%endif
+    paddd       m0, m2
+    paddd       m1, m3
+    paddd       m0, m4
+    movu        m2, [srcq+4]
+    movu        m3, [srcq+6]
+    movu        m4, [srcq+8]
+    add       srcq, sstrideq
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m9
+    pmaddwd     m3, m10
+    pmaddwd     m4, m10
+%else
+    pmaddwd     m2, [filteryq+64]
+    pmaddwd     m3, [filteryq+96]
+    pmaddwd     m4, [filteryq+96]
+%endif
+    paddd       m1, m2
+    paddd       m0, m3
+    paddd       m1, m4
+    paddd       m0, m6
+    paddd       m1, m6
+    psrad       m0, 7
+    psrad       m1, 7
+%if cpuflag(sse4)
+    packusdw    m0, m0
+    packusdw    m1, m1
+%else
+    packssdw    m0, m0
+    packssdw    m1, m1
+%endif
+    punpcklwd   m0, m1
+    pminsw      m0, m5
+%if notcpuflag(sse4)
+%if ARCH_X86_64
+    pmaxsw      m0, m11
+%else
+    pxor        m2, m2
+    pmaxsw      m0, m2
+%endif
+%endif
+%ifidn %1, avg
+    pavgw       m0, [dstq]
+%endif
+    mova    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _12, 6, 6, %2, dst, dstride, src, sstride, h, filtery
+    mova        m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_h_ %+ %%px %+ _10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+filter_h_fn put
+filter_h_fn avg
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+filter_h_fn put
+filter_h_fn avg
+%endif
+
+%macro filter_v4_fn 1-2 12
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_4_10, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_4_10, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%define hd r4mp
+%endif
+    mova        m5, [pw_1023]
+.body:
+%if notcpuflag(sse4) && ARCH_X86_64
+    pxor       m11, m11
+%endif
+    mova        m6, [pd_64]
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
+    mova        m7, [filteryq+  0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+ 32]
+    mova        m9, [filteryq+ 64]
+    mova       m10, [filteryq+ 96]
+%endif
+.loop:
+    ; FIXME maybe reuse loads from previous rows, or just
+    ; more generally unroll this to prevent multiple loads of
+    ; the same data?
+    movh        m0, [srcq]
+    movh        m1, [srcq+sstrideq]
+    movh        m2, [srcq+sstrideq*2]
+    movh        m3, [srcq+sstride3q]
+    add       srcq, sstrideq
+    movh        m4, [src4q]
+    punpcklwd   m0, m1
+    punpcklwd   m2, m3
+    pmaddwd     m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m8
+%else
+    pmaddwd     m2, [filteryq+ 32]
+%endif
+    movh        m1, [src4q+sstrideq]
+    movh        m3, [src4q+sstrideq*2]
+    paddd       m0, m2
+    movh        m2, [src4q+sstride3q]
+    add      src4q, sstrideq
+    punpcklwd   m4, m1
+    punpcklwd   m3, m2
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m4, m9
+    pmaddwd     m3, m10
+%else
+    pmaddwd     m4, [filteryq+ 64]
+    pmaddwd     m3, [filteryq+ 96]
+%endif
+    paddd       m0, m4
+    paddd       m0, m3
+    paddd       m0, m6
+    psrad       m0, 7
+%if cpuflag(sse4)
+    packusdw    m0, m0
+%else
+    packssdw    m0, m0
+%endif
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    pminsw      m0, m5
+%if notcpuflag(sse4)
+%if ARCH_X86_64
+    pmaxsw      m0, m11
+%else
+    pxor        m2, m2
+    pmaxsw      m0, m2
+%endif
+%endif
+%ifidn %1, avg
+    pavgw       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_4_12, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_4_12, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%endif
+    mova        m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_v_4_10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+filter_v4_fn put
+filter_v4_fn avg
+
+%macro filter_v_fn 1-2 13
+%assign %%px mmsize/2
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _10, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _10, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%define hd r4mp
+%endif
+    mova        m5, [pw_1023]
+.body:
+%if notcpuflag(sse4) && ARCH_X86_64
+    pxor       m12, m12
+%endif
+%if ARCH_X86_64
+    mova       m11, [pd_64]
+%endif
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
+    mova        m7, [filteryq+  0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+ 32]
+    mova        m9, [filteryq+ 64]
+    mova       m10, [filteryq+ 96]
+%endif
+.loop:
+    ; FIXME maybe reuse loads from previous rows, or just
+    ; more generally unroll this to prevent multiple loads of
+    ; the same data?
+    movu        m0, [srcq]
+    movu        m1, [srcq+sstrideq]
+    movu        m2, [srcq+sstrideq*2]
+    movu        m3, [srcq+sstride3q]
+    add       srcq, sstrideq
+    movu        m4, [src4q]
+    SBUTTERFLY  wd, 0, 1, 6
+    SBUTTERFLY  wd, 2, 3, 6
+    pmaddwd     m0, m7
+    pmaddwd     m1, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m8
+    pmaddwd     m3, m8
+%else
+    pmaddwd     m2, [filteryq+ 32]
+    pmaddwd     m3, [filteryq+ 32]
+%endif
+    paddd       m0, m2
+    paddd       m1, m3
+    movu        m2, [src4q+sstrideq]
+    movu        m3, [src4q+sstrideq*2]
+    SBUTTERFLY  wd, 4, 2, 6
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m4, m9
+    pmaddwd     m2, m9
+%else
+    pmaddwd     m4, [filteryq+ 64]
+    pmaddwd     m2, [filteryq+ 64]
+%endif
+    paddd       m0, m4
+    paddd       m1, m2
+    movu        m4, [src4q+sstride3q]
+    add      src4q, sstrideq
+    SBUTTERFLY  wd, 3, 4, 6
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m3, m10
+    pmaddwd     m4, m10
+%else
+    pmaddwd     m3, [filteryq+ 96]
+    pmaddwd     m4, [filteryq+ 96]
+%endif
+    paddd       m0, m3
+    paddd       m1, m4
+%if ARCH_X86_64
+    paddd       m0, m11
+    paddd       m1, m11
+%else
+    paddd       m0, [pd_64]
+    paddd       m1, [pd_64]
+%endif
+    psrad       m0, 7
+    psrad       m1, 7
+%if cpuflag(sse4)
+    packusdw    m0, m1
+%else
+    packssdw    m0, m1
+%endif
+    pminsw      m0, m5
+%if notcpuflag(sse4)
+%if ARCH_X86_64
+    pmaxsw      m0, m12
+%else
+    pxor        m2, m2
+    pmaxsw      m0, m2
+%endif
+%endif
+%ifidn %1, avg
+    pavgw       m0, [dstq]
+%endif
+    mova    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _12, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _12, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%endif
+    mova        m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_v_ %+ %%px %+ _10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+filter_v_fn put
+filter_v_fn avg
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+filter_v_fn put
+filter_v_fn avg
+%endif
diff --git a/media/ffvpx/libavcodec/x86/vpx_arith.h b/media/ffvpx/libavcodec/x86/vpx_arith.h
new file mode 100644
index 0000000000..d9e4c0dec4
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vpx_arith.h
@@ -0,0 +1,55 @@
+/**
+ * VP5 and VP6 compatible video decoder (arith decoder)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2010  Eli Friedman
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_VPX_ARITH_H
+#define AVCODEC_X86_VPX_ARITH_H
+
+#include "libavutil/x86/asm.h"
+
+#if HAVE_INLINE_ASM && HAVE_FAST_CMOV && HAVE_6REGS
+#include "libavutil/attributes.h"
+
+#define vpx_rac_get_prob vpx_rac_get_prob
+static av_always_inline int vpx_rac_get_prob(VPXRangeCoder *c, uint8_t prob)
+{
+    unsigned int code_word = vpx_rac_renorm(c);
+    unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
+    unsigned int low_shift = low << 16;
+    int bit = 0;
+    c->code_word = code_word;
+
+    __asm__(
+        "subl  %4, %1      \n\t"
+        "subl  %3, %2      \n\t"
+        "setae %b0         \n\t"
+        "cmovb %4, %1      \n\t"
+        "cmovb %5, %2      \n\t"
+        : "+q"(bit), "+&r"(c->high), "+&r"(c->code_word)
+        : "r"(low_shift), "r"(low), "r"(code_word)
+    );
+
+    return bit;
+}
+#endif
+
+#endif /* AVCODEC_X86_VPX_ARITH_H */
diff --git a/media/ffvpx/libavcodec/xiph.c b/media/ffvpx/libavcodec/xiph.c
new file mode 100644
index 0000000000..218b0813e9
--- /dev/null
+++ b/media/ffvpx/libavcodec/xiph.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2007 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <limits.h>
+#include "libavutil/error.h"
+#include "libavutil/intreadwrite.h"
+#include "xiph.h"
+
+int avpriv_split_xiph_headers(const uint8_t *extradata, int extradata_size,
+                          int first_header_size, const uint8_t *header_start[3],
+                          int header_len[3])
+{
+    int i;
+
+    if (extradata_size >= 6 && AV_RB16(extradata) == first_header_size) {
+        int overall_len = 6;
+        for (i=0; i<3; i++) {
+            header_len[i] = AV_RB16(extradata);
+            extradata += 2;
+            header_start[i] = extradata;
+            extradata += header_len[i];
+            if (overall_len > extradata_size - header_len[i])
+                return AVERROR_INVALIDDATA;
+            overall_len += header_len[i];
+        }
+    } else if (extradata_size >= 3 && extradata_size < INT_MAX - 0x1ff && extradata[0] == 2) {
+        int overall_len = 3;
+        extradata++;
+        for (i=0; i<2; i++, extradata++) {
+            header_len[i] = 0;
+            for (; overall_len < extradata_size && *extradata==0xff; extradata++) {
+                header_len[i] += 0xff;
+                overall_len   += 0xff + 1;
+            }
+            header_len[i] += *extradata;
+            overall_len   += *extradata;
+            if (overall_len > extradata_size)
+                return AVERROR_INVALIDDATA;
+        }
+        header_len[2] = extradata_size - overall_len;
+        header_start[0] = extradata;
+        header_start[1] = header_start[0] + header_len[0];
+        header_start[2] = header_start[1] + header_len[1];
+    } else {
+        return -1;
+    }
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/xiph.h b/media/ffvpx/libavcodec/xiph.h
new file mode 100644
index 0000000000..4ab2469528
--- /dev/null
+++ b/media/ffvpx/libavcodec/xiph.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2007 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_XIPH_H
+#define AVCODEC_XIPH_H
+
+#include <stdint.h>
+
+/**
+ * Split a single extradata buffer into the three headers that most
+ * Xiph codecs use. (e.g. Theora and Vorbis)
+ * Works both with Matroska's packing and lavc's packing.
+ *
+ * @param[in] extradata The single chunk that combines all three headers
+ * @param[in] extradata_size The size of the extradata buffer
+ * @param[in] first_header_size The size of the first header, used to
+ * differentiate between the Matroska packing and lavc packing.
+ * @param[out] header_start Pointers to the start of the three separate headers.
+ * @param[out] header_len The sizes of each of the three headers.
+ * @return On error a negative value is returned, on success zero.
+ */
+int avpriv_split_xiph_headers(const uint8_t *extradata, int extradata_size,
+                              int first_header_size, const uint8_t *header_start[3],
+                              int header_len[3]);
+
+#endif /* AVCODEC_XIPH_H */
diff --git a/media/ffvpx/libavcodec/xvididct.h b/media/ffvpx/libavcodec/xvididct.h
new file mode 100644
index 0000000000..e0bc1a2b91
--- /dev/null
+++ b/media/ffvpx/libavcodec/xvididct.h
@@ -0,0 +1,36 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_XVIDIDCT_H
+#define AVCODEC_XVIDIDCT_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+#include "idctdsp.h"
+
+void ff_xvid_idct(int16_t *const in);
+
+void ff_xvid_idct_init(IDCTDSPContext *c, AVCodecContext *avctx);
+
+void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
+                           unsigned high_bit_depth);
+void ff_xvid_idct_init_mips(IDCTDSPContext *c, AVCodecContext *avctx,
+                            unsigned high_bit_depth);
+
+#endif /* AVCODEC_XVIDIDCT_H */