24 files changed, 6239 insertions, 0 deletions
diff --git a/third_party/dav1d/tests/checkasm/arm/checkasm_32.S b/third_party/dav1d/tests/checkasm/arm/checkasm_32.S
new file mode 100644
index 0000000000..a186ef8fc2
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/arm/checkasm_32.S
@@ -0,0 +1,201 @@
+/******************************************************************************
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2015 Martin Storsjo
+ * Copyright © 2015 Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#define PRIVATE_PREFIX checkasm_
+
+#include "src/arm/asm.S"
+#include "src/arm/32/util.S"
+
+const register_init, align=3
+        .quad 0x21f86d66c8ca00ce
+        .quad 0x75b6ba21077c48ad
+        .quad 0xed56bb2dcb3c7736
+        .quad 0x8bda43d3fd1a7e06
+        .quad 0xb64a9c9e5d318408
+        .quad 0xdf9a54b303f1d3a3
+        .quad 0x4a75479abd64e097
+        .quad 0x249214109d5d1c88
+endconst
+
+const error_message_fpscr
+        .asciz "failed to preserve register FPSCR, changed bits: %x"
+error_message_gpr:
+        .asciz "failed to preserve register r%d"
+error_message_vfp:
+        .asciz "failed to preserve register d%d"
+error_message_stack:
+        .asciz "failed to preserve stack"
+endconst
+
+@ max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK 4*(MAX_ARGS - 4)
+
+@ Align the used stack space to 8 to preserve the stack alignment.
+@ +8 for stack canary reference.
+#define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed + 8)
+
+.macro clobbercheck variant
+.equ pushed, 4*9
+function checked_call_\variant, export=1
+        push            {r4-r11, lr}
+.ifc \variant, vfp
+        vpush           {d8-d15}
+        fmrx            r4,  FPSCR
+        push            {r4}
+.equ pushed, pushed + 16*4 + 4
+.endif
+
+        movrel          r12, register_init
+.ifc \variant, vfp
+        vldm            r12, {d8-d15}
+.endif
+        ldm             r12, {r4-r11}
+
+        sub             sp,  sp,  #ARG_STACK_A
+.equ pos, 0
+.rept MAX_ARGS-4
+        ldr             r12, [sp, #ARG_STACK_A + pushed + 8 + pos]
+        str             r12, [sp, #pos]
+.equ pos, pos + 4
+.endr
+
+        @ For stack overflows, the callee is free to overwrite the parameters
+        @ that were passed on the stack (if any), so we can only check after
+        @ that point. First figure out how many parameters the function
+        @ really took on the stack:
+        ldr             r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)]
+        @ Load the first non-parameter value from the stack, that should be
+        @ left untouched by the function. Store a copy of it inverted, so that
+        @ e.g. overwriting everything with zero would be noticed.
+        ldr             r12, [sp, r12, lsl #2]
+        mvn             r12, r12
+        str             r12, [sp, #ARG_STACK_A - 4]
+
+        mov             r12, r0
+        mov             r0,  r2
+        mov             r1,  r3
+        ldrd            r2,  r3,  [sp, #ARG_STACK_A + pushed]
+        @ Call the target function
+        blx             r12
+
+        @ Load the number of stack parameters, stack canary and its reference
+        ldr             r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)]
+        ldr             r2,  [sp, r12, lsl #2]
+        ldr             r3,  [sp, #ARG_STACK_A - 4]
+
+        add             sp,  sp,  #ARG_STACK_A
+        push            {r0, r1}
+
+        mvn             r3,  r3
+        cmp             r2,  r3
+        bne             5f
+
+        movrel          r12, register_init
+.ifc \variant, vfp
+.macro check_reg_vfp, dreg, offset
+        ldrd            r2,  r3,  [r12, #8 * (\offset)]
+        vmov            r0,  lr,  \dreg
+        eor             r2,  r2,  r0
+        eor             r3,  r3,  lr
+        orrs            r2,  r2,  r3
+        bne             4f
+.endm
+
+.irp n, 8, 9, 10, 11, 12, 13, 14, 15
+        @ keep track of the checked double/SIMD register
+        mov             r1,  #\n
+        check_reg_vfp   d\n, \n-8
+.endr
+.purgem check_reg_vfp
+
+        fmrx            r1,  FPSCR
+        ldr             r3,  [sp, #8]
+        eor             r1,  r1,  r3
+        @ Ignore changes in bits 0-4 and 7
+        bic             r1,  r1,  #0x9f
+        @ Ignore changes in the topmost 5 bits
+        bics            r1,  r1,  #0xf8000000
+        bne             3f
+.endif
+
+        @ keep track of the checked GPR
+        mov             r1,  #4
+.macro check_reg reg1, reg2=
+        ldrd            r2,  r3,  [r12], #8
+        eors            r2,  r2,  \reg1
+        bne             2f
+        add             r1,  r1,  #1
+.ifnb \reg2
+        eors            r3,  r3,  \reg2
+        bne             2f
+.endif
+        add             r1,  r1,  #1
+.endm
+        check_reg       r4,  r5
+        check_reg       r6,  r7
+@ r9 is a volatile register in the ios ABI
+#ifdef __APPLE__
+        check_reg       r8
+#else
+        check_reg       r8,  r9
+#endif
+        check_reg       r10, r11
+.purgem check_reg
+
+        b               0f
+5:
+        movrel          r0, error_message_stack
+        b               1f
+4:
+        movrel          r0, error_message_vfp
+        b               1f
+3:
+        movrel          r0, error_message_fpscr
+        b               1f
+2:
+        movrel          r0, error_message_gpr
+1:
+#ifdef PREFIX
+        bl              _checkasm_fail_func
+#else
+        bl              checkasm_fail_func
+#endif
+0:
+        pop             {r0, r1}
+.ifc \variant, vfp
+        pop             {r2}
+        fmxr            FPSCR, r2
+        vpop            {d8-d15}
+.endif
+        pop             {r4-r11, pc}
+endfunc
+.endm
+
+clobbercheck vfp
diff --git a/third_party/dav1d/tests/checkasm/arm/checkasm_64.S b/third_party/dav1d/tests/checkasm/arm/checkasm_64.S
new file mode 100644
index 0000000000..25749145a5
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/arm/checkasm_64.S
@@ -0,0 +1,211 @@
+/******************************************************************************
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2015 Martin Storsjo
+ * Copyright © 2015 Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#define PRIVATE_PREFIX checkasm_
+
+#include "src/arm/asm.S"
+#include "src/arm/64/util.S"
+
+const register_init, align=4
+        .quad 0x21f86d66c8ca00ce
+        .quad 0x75b6ba21077c48ad
+        .quad 0xed56bb2dcb3c7736
+        .quad 0x8bda43d3fd1a7e06
+        .quad 0xb64a9c9e5d318408
+        .quad 0xdf9a54b303f1d3a3
+        .quad 0x4a75479abd64e097
+        .quad 0x249214109d5d1c88
+        .quad 0x1a1b2550a612b48c
+        .quad 0x79445c159ce79064
+        .quad 0x2eed899d5a28ddcd
+        .quad 0x86b2536fcd8cf636
+        .quad 0xb0856806085e7943
+        .quad 0x3f2bf84fc0fcca4e
+        .quad 0xacbd382dcf5b8de2
+        .quad 0xd229e1f5b281303f
+        .quad 0x71aeaff20b095fd9
+        .quad 0xab63e2e11fa38ed9
+endconst
+
+
+const error_message_register
+        .asciz "failed to preserve register"
+error_message_stack:
+        .asciz "stack clobbered"
+endconst
+
+
+// max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15)
+
+function stack_clobber, export=1
+        mov             x3,  sp
+        mov             x2,  #CLOBBER_STACK
+1:
+        stp             x0,  x1,  [sp, #-16]!
+        subs            x2,  x2,  #16
+        b.gt            1b
+        mov             sp,  x3
+        ret
+endfunc
+
+// + 16 for stack canary reference
+#define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15 + 16)
+
+function checked_call, export=1
+        stp             x29, x30, [sp, #-16]!
+        mov             x29, sp
+        stp             x19, x20, [sp, #-16]!
+        stp             x21, x22, [sp, #-16]!
+        stp             x23, x24, [sp, #-16]!
+        stp             x25, x26, [sp, #-16]!
+        stp             x27, x28, [sp, #-16]!
+        stp             d8,  d9,  [sp, #-16]!
+        stp             d10, d11, [sp, #-16]!
+        stp             d12, d13, [sp, #-16]!
+        stp             d14, d15, [sp, #-16]!
+
+        movrel          x9, register_init
+        ldp             d8,  d9,  [x9], #16
+        ldp             d10, d11, [x9], #16
+        ldp             d12, d13, [x9], #16
+        ldp             d14, d15, [x9], #16
+        ldp             x19, x20, [x9], #16
+        ldp             x21, x22, [x9], #16
+        ldp             x23, x24, [x9], #16
+        ldp             x25, x26, [x9], #16
+        ldp             x27, x28, [x9], #16
+
+        sub             sp,  sp,  #ARG_STACK
+.equ pos, 0
+.rept MAX_ARGS-8
+        // Skip the first 8 args, that are loaded into registers
+        ldr             x9, [x29, #16 + 8*8 + pos]
+        str             x9, [sp, #pos]
+.equ pos, pos + 8
+.endr
+
+        // Fill x8-x17 with garbage. This doesn't have to be preserved,
+        // but avoids relying on them having any particular value.
+        movrel          x9, register_init
+        ldp             x10, x11, [x9], #32
+        ldp             x12, x13, [x9], #32
+        ldp             x14, x15, [x9], #32
+        ldp             x16, x17, [x9], #32
+        ldp             x8,  x9,  [x9]
+
+        // For stack overflows, the callee is free to overwrite the parameters
+        // that were passed on the stack (if any), so we can only check after
+        // that point. First figure out how many parameters the function
+        // really took on the stack:
+        ldr             w2,  [x29, #16 + 8*8 + (MAX_ARGS-8)*8]
+        // Load the first non-parameter value from the stack, that should be
+        // left untouched by the function. Store a copy of it inverted, so that
+        // e.g. overwriting everything with zero would be noticed.
+        ldr             x2,  [sp, x2, lsl #3]
+        mvn             x2,  x2
+        str             x2,  [sp, #ARG_STACK-8]
+
+        // Load the in-register arguments
+        mov             x12, x0
+        ldp             x0,  x1,  [x29, #16]
+        ldp             x2,  x3,  [x29, #32]
+        ldp             x4,  x5,  [x29, #48]
+        ldp             x6,  x7,  [x29, #64]
+        // Call the target function
+        blr             x12
+
+        // Load the number of stack parameters, stack canary and its reference
+        ldr             w2,  [x29, #16 + 8*8 + (MAX_ARGS-8)*8]
+        ldr             x2,  [sp, x2, lsl #3]
+        ldr             x3,  [sp, #ARG_STACK-8]
+
+        add             sp,  sp,  #ARG_STACK
+        stp             x0,  x1,  [sp, #-16]!
+
+        mvn             x3,  x3
+        cmp             x2,  x3
+        b.ne            2f
+
+        movrel          x9, register_init
+        movi            v3.8h,  #0
+
+.macro check_reg_neon reg1, reg2
+        ldr             q1,  [x9], #16
+        uzp1            v2.2d,  v\reg1\().2d, v\reg2\().2d
+        eor             v1.16b, v1.16b, v2.16b
+        orr             v3.16b, v3.16b, v1.16b
+.endm
+        check_reg_neon  8,  9
+        check_reg_neon  10, 11
+        check_reg_neon  12, 13
+        check_reg_neon  14, 15
+        uqxtn           v3.8b,  v3.8h
+        umov            x3,  v3.d[0]
+
+.macro check_reg reg1, reg2
+        ldp             x0,  x1,  [x9], #16
+        eor             x0,  x0,  \reg1
+        eor             x1,  x1,  \reg2
+        orr             x3,  x3,  x0
+        orr             x3,  x3,  x1
+.endm
+        check_reg       x19, x20
+        check_reg       x21, x22
+        check_reg       x23, x24
+        check_reg       x25, x26
+        check_reg       x27, x28
+
+        cbz             x3,  0f
+
+        movrel          x0, error_message_register
+        b               1f
+2:
+        movrel          x0, error_message_stack
+1:
+#ifdef PREFIX
+        bl              _checkasm_fail_func
+#else
+        bl              checkasm_fail_func
+#endif
+0:
+        ldp             x0,  x1,  [sp], #16
+        ldp             d14, d15, [sp], #16
+        ldp             d12, d13, [sp], #16
+        ldp             d10, d11, [sp], #16
+        ldp             d8,  d9,  [sp], #16
+        ldp             x27, x28, [sp], #16
+        ldp             x25, x26, [sp], #16
+        ldp             x23, x24, [sp], #16
+        ldp             x21, x22, [sp], #16
+        ldp             x19, x20, [sp], #16
+        ldp             x29, x30, [sp], #16
+        ret
+endfunc
diff --git a/third_party/dav1d/tests/checkasm/cdef.c b/third_party/dav1d/tests/checkasm/cdef.c
new file mode 100644
index 0000000000..9a90e313da
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/cdef.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "tests/checkasm/checkasm.h"
+
+#include <string.h>
+#include <stdio.h>
+
+#include "common/dump.h"
+
+#include "src/levels.h"
+#include "src/cdef.h"
+
+static int to_binary(int x) { /* 0-15 -> 0000-1111 */
+    return (x & 1) + 5 * (x & 2) + 25 * (x & 4) + 125 * (x & 8);
+}
+
+static void init_tmp(pixel *buf, int n, const int bitdepth_max) {
+    const int fill_type = rnd() & 7;
+    if (fill_type == 0)
+        while (n--) /* check for cdef_filter underflows */
+            *buf++ = rnd() & 1;
+    else if (fill_type == 1)
+        while (n--) /* check for cdef_filter overflows */
+            *buf++ = bitdepth_max - (rnd() & 1);
+    else
+        while (n--)
+            *buf++ = rnd() & bitdepth_max;
+}
+
+static void check_cdef_filter(const cdef_fn fn, const int w, const int h) {
+    ALIGN_STK_64(pixel, c_src,   16 * 10 + 16, ), *const c_dst = c_src + 8;
+    ALIGN_STK_64(pixel, a_src,   16 * 10 + 16, ), *const a_dst = a_src + 8;
+    ALIGN_STK_64(pixel, top_buf, 16 *  2 + 16, ), *const top = top_buf + 8;
+    ALIGN_STK_64(pixel, bot_buf, 16 *  2 + 16, ), *const bot = bot_buf + 8;
+    ALIGN_STK_16(pixel, left, 8,[2]);
+    const ptrdiff_t stride = 16 * sizeof(pixel);
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel (*left)[2],
+                 const pixel *top, const pixel *bot, int pri_strength,
+                 int sec_strength, int dir, int damping,
+                 enum CdefEdgeFlags edges HIGHBD_DECL_SUFFIX);
+
+    for (int s = 0x1; s <= 0x3; s++) {
+        if (check_func(fn, "cdef_filter_%dx%d_%02d_%dbpc", w, h, to_binary(s), BITDEPTH)) {
+            for (int dir = 0; dir < 8; dir++) {
+                for (enum CdefEdgeFlags edges = 0x0; edges <= 0xf; edges++) {
+#if BITDEPTH == 16
+                    const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                    const int bitdepth_max = 0xff;
+#endif
+                    const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
+
+                    init_tmp(c_src, 16 * 10 + 16, bitdepth_max);
+                    init_tmp(top_buf, 16 * 2 + 16, bitdepth_max);
+                    init_tmp(bot_buf, 16 * 2 + 16, bitdepth_max);
+                    init_tmp((pixel *) left, 8 * 2, bitdepth_max);
+                    memcpy(a_src, c_src, (16 * 10 + 16) * sizeof(pixel));
+
+                    const int pri_strength = s & 2 ? (1 + (rnd() % 15)) << bitdepth_min_8 : 0;
+                    const int sec_strength = s & 1 ? 1 << ((rnd() % 3) + bitdepth_min_8) : 0;
+                    const int damping = 3 + (rnd() & 3) + bitdepth_min_8 - (w == 4 || (rnd() & 1));
+                    call_ref(c_dst, stride, left, top, bot, pri_strength, sec_strength,
+                             dir, damping, edges HIGHBD_TAIL_SUFFIX);
+                    call_new(a_dst, stride, left, top, bot, pri_strength, sec_strength,
+                             dir, damping, edges HIGHBD_TAIL_SUFFIX);
+                    if (checkasm_check_pixel(c_dst, stride, a_dst, stride, w, h, "dst")) {
+                        fprintf(stderr, "strength = %d:%d, dir = %d, damping = %d, edges = %04d\n",
+                                pri_strength, sec_strength, dir, damping, to_binary(edges));
+                        return;
+                    }
+                    if (dir == 7 && (edges == 0x5 || edges == 0xa || edges == 0xf))
+                        bench_new(alternate(c_dst, a_dst), stride, left, top, bot, pri_strength,
+                                  sec_strength, dir, damping, edges HIGHBD_TAIL_SUFFIX);
+                }
+            }
+        }
+    }
+}
+
+static void check_cdef_direction(const cdef_dir_fn fn) {
+    ALIGN_STK_64(pixel, src, 8 * 8,);
+
+    declare_func(int, pixel *src, ptrdiff_t dst_stride, unsigned *var
+                 HIGHBD_DECL_SUFFIX);
+
+    if (check_func(fn, "cdef_dir_%dbpc", BITDEPTH)) {
+        unsigned c_var, a_var;
+#if BITDEPTH == 16
+        const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+        const int bitdepth_max = 0xff;
+#endif
+        init_tmp(src, 64, bitdepth_max);
+
+        const int c_dir = call_ref(src, 8 * sizeof(pixel), &c_var HIGHBD_TAIL_SUFFIX);
+        const int a_dir = call_new(src, 8 * sizeof(pixel), &a_var HIGHBD_TAIL_SUFFIX);
+        if (c_var != a_var || c_dir != a_dir) {
+            if (fail()) {
+                hex_fdump(stderr, src, 8 * sizeof(pixel), 8, 8, "src");
+                fprintf(stderr, "c_dir %d a_dir %d\n", c_dir, a_dir);
+            }
+        }
+        bench_new(src, 8 * sizeof(pixel), &a_var HIGHBD_TAIL_SUFFIX);
+    }
+    report("cdef_dir");
+}
+
+void bitfn(checkasm_check_cdef)(void) {
+    Dav1dCdefDSPContext c;
+    bitfn(dav1d_cdef_dsp_init)(&c);
+
+    check_cdef_direction(c.dir);
+
+    check_cdef_filter(c.fb[0], 8, 8);
+    check_cdef_filter(c.fb[1], 4, 8);
+    check_cdef_filter(c.fb[2], 4, 4);
+    report("cdef_filter");
+}
diff --git a/third_party/dav1d/tests/checkasm/checkasm.c b/third_party/dav1d/tests/checkasm/checkasm.c
new file mode 100644
index 0000000000..bca2158b5b
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/checkasm.c
@@ -0,0 +1,986 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "tests/checkasm/checkasm.h"
+
+#include <math.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "src/cpu.h"
+
+#ifdef _WIN32
+#include <windows.h>
+#define COLOR_RED    FOREGROUND_RED
+#define COLOR_GREEN  FOREGROUND_GREEN
+#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
+#else
+#include <unistd.h>
+#include <signal.h>
+#include <time.h>
+#include <pthread.h>
+#ifdef HAVE_PTHREAD_NP_H
+#include <pthread_np.h>
+#endif
+#ifdef __APPLE__
+#include <mach/mach_time.h>
+#endif
+#define COLOR_RED    1
+#define COLOR_GREEN  2
+#define COLOR_YELLOW 3
+#endif
+
+/* List of tests to invoke */
+static const struct {
+    const char *name;
+    void (*func)(void);
+} tests[] = {
+    { "msac", checkasm_check_msac },
+    { "refmvs", checkasm_check_refmvs },
+#if CONFIG_8BPC
+    { "cdef_8bpc", checkasm_check_cdef_8bpc },
+    { "filmgrain_8bpc", checkasm_check_filmgrain_8bpc },
+    { "ipred_8bpc", checkasm_check_ipred_8bpc },
+    { "itx_8bpc", checkasm_check_itx_8bpc },
+    { "loopfilter_8bpc", checkasm_check_loopfilter_8bpc },
+    { "looprestoration_8bpc", checkasm_check_looprestoration_8bpc },
+    { "mc_8bpc", checkasm_check_mc_8bpc },
+#endif
+#if CONFIG_16BPC
+    { "cdef_16bpc", checkasm_check_cdef_16bpc },
+    { "filmgrain_16bpc", checkasm_check_filmgrain_16bpc },
+    { "ipred_16bpc", checkasm_check_ipred_16bpc },
+    { "itx_16bpc", checkasm_check_itx_16bpc },
+    { "loopfilter_16bpc", checkasm_check_loopfilter_16bpc },
+    { "looprestoration_16bpc", checkasm_check_looprestoration_16bpc },
+    { "mc_16bpc", checkasm_check_mc_16bpc },
+#endif
+    { 0 }
+};
+
+/* List of cpu flags to check */
+static const struct {
+    const char *name;
+    const char *suffix;
+    unsigned flag;
+} cpus[] = {
+#if ARCH_X86
+    { "SSE2",               "sse2",      DAV1D_X86_CPU_FLAG_SSE2 },
+    { "SSSE3",              "ssse3",     DAV1D_X86_CPU_FLAG_SSSE3 },
+    { "SSE4.1",             "sse4",      DAV1D_X86_CPU_FLAG_SSE41 },
+    { "AVX2",               "avx2",      DAV1D_X86_CPU_FLAG_AVX2 },
+    { "AVX-512 (Ice Lake)", "avx512icl", DAV1D_X86_CPU_FLAG_AVX512ICL },
+#elif ARCH_AARCH64 || ARCH_ARM
+    { "NEON",               "neon",      DAV1D_ARM_CPU_FLAG_NEON },
+#elif ARCH_PPC64LE
+    { "VSX",                "vsx",       DAV1D_PPC_CPU_FLAG_VSX },
+#endif
+    { 0 }
+};
+
+typedef struct CheckasmFuncVersion {
+    struct CheckasmFuncVersion *next;
+    void *func;
+    int ok;
+    unsigned cpu;
+    int iterations;
+    uint64_t cycles;
+} CheckasmFuncVersion;
+
+/* Binary search tree node */
+typedef struct CheckasmFunc {
+    struct CheckasmFunc *child[2];
+    CheckasmFuncVersion versions;
+    uint8_t color; /* 0 = red, 1 = black */
+    char name[];
+} CheckasmFunc;
+
+/* Internal state */
+static struct {
+    CheckasmFunc *funcs;
+    CheckasmFunc *current_func;
+    CheckasmFuncVersion *current_func_ver;
+    const char *current_test_name;
+    int num_checked;
+    int num_failed;
+    double nop_time;
+    unsigned cpu_flag;
+    const char *cpu_flag_name;
+    const char *test_pattern;
+    const char *function_pattern;
+    unsigned seed;
+    int bench;
+    int bench_c;
+    int verbose;
+    int function_listing;
+    int catch_signals;
+    int suffix_length;
+    int max_function_name_length;
+#if ARCH_X86_64
+    void (*simd_warmup)(void);
+#endif
+} state;
+
+/* float compare support code */
+typedef union {
+    float f;
+    uint32_t i;
+} intfloat;
+
+static uint32_t xs_state[4];
+
+static void xor128_srand(unsigned seed) {
+    xs_state[0] = seed;
+    xs_state[1] = ( seed & 0xffff0000) | (~seed & 0x0000ffff);
+    xs_state[2] = (~seed & 0xffff0000) | ( seed & 0x0000ffff);
+    xs_state[3] = ~seed;
+}
+
+// xor128 from Marsaglia, George (July 2003). "Xorshift RNGs".
+//             Journal of Statistical Software. 8 (14).
+//             doi:10.18637/jss.v008.i14.
+int xor128_rand(void) {
+    const uint32_t x = xs_state[0];
+    const uint32_t t = x ^ (x << 11);
+
+    xs_state[0] = xs_state[1];
+    xs_state[1] = xs_state[2];
+    xs_state[2] = xs_state[3];
+    uint32_t w = xs_state[3];
+
+    w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
+    xs_state[3] = w;
+
+    return w >> 1;
+}
+
+static int is_negative(const intfloat u) {
+    return u.i >> 31;
+}
+
+int float_near_ulp(const float a, const float b, const unsigned max_ulp) {
+    intfloat x, y;
+
+    x.f = a;
+    y.f = b;
+
+    if (is_negative(x) != is_negative(y)) {
+        // handle -0.0 == +0.0
+        return a == b;
+    }
+
+    if (llabs((int64_t)x.i - y.i) <= max_ulp)
+        return 1;
+
+    return 0;
+}
+
+int float_near_ulp_array(const float *const a, const float *const b,
+                         const unsigned max_ulp, const int len)
+{
+    for (int i = 0; i < len; i++)
+        if (!float_near_ulp(a[i], b[i], max_ulp))
+            return 0;
+
+    return 1;
+}
+
+int float_near_abs_eps(const float a, const float b, const float eps) {
+    return fabsf(a - b) < eps;
+}
+
+int float_near_abs_eps_array(const float *const a, const float *const b,
+                             const float eps, const int len)
+{
+    for (int i = 0; i < len; i++)
+        if (!float_near_abs_eps(a[i], b[i], eps))
+            return 0;
+
+    return 1;
+}
+
+int float_near_abs_eps_ulp(const float a, const float b, const float eps,
+                           const unsigned max_ulp)
+{
+    return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps);
+}
+
+int float_near_abs_eps_array_ulp(const float *const a, const float *const b,
+                                 const float eps, const unsigned max_ulp,
+                                 const int len)
+{
+    for (int i = 0; i < len; i++)
+        if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp))
+            return 0;
+
+    return 1;
+}
+
+/* Print colored text to stderr if the terminal supports it */
+static void color_printf(const int color, const char *const fmt, ...) {
+    static int8_t use_color = -1;
+    va_list arg;
+
+#ifdef _WIN32
+    static HANDLE con;
+    static WORD org_attributes;
+
+    if (use_color < 0) {
+        CONSOLE_SCREEN_BUFFER_INFO con_info;
+        con = GetStdHandle(STD_ERROR_HANDLE);
+        if (con && con != INVALID_HANDLE_VALUE &&
+            GetConsoleScreenBufferInfo(con, &con_info))
+        {
+            org_attributes = con_info.wAttributes;
+            use_color = 1;
+        } else
+            use_color = 0;
+    }
+    if (use_color)
+        SetConsoleTextAttribute(con, (org_attributes & 0xfff0) |
+                                (color & 0x0f));
+#else
+    if (use_color < 0) {
+        const char *const term = getenv("TERM");
+        use_color = term && strcmp(term, "dumb") && isatty(2);
+    }
+    if (use_color)
+        fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
+#endif
+
+    va_start(arg, fmt);
+    vfprintf(stderr, fmt, arg);
+    va_end(arg);
+
+    if (use_color) {
+#ifdef _WIN32
+        SetConsoleTextAttribute(con, org_attributes);
+#else
+        fprintf(stderr, "\x1b[0m");
+#endif
+    }
+}
+
+/* Deallocate a tree */
+static void destroy_func_tree(CheckasmFunc *const f) {
+    if (f) {
+        CheckasmFuncVersion *v = f->versions.next;
+        while (v) {
+            CheckasmFuncVersion *next = v->next;
+            free(v);
+            v = next;
+        }
+
+        destroy_func_tree(f->child[0]);
+        destroy_func_tree(f->child[1]);
+        free(f);
+    }
+}
+
+/* Allocate a zero-initialized block, clean up and exit on failure */
+static void *checkasm_malloc(const size_t size) {
+    void *const ptr = calloc(1, size);
+    if (!ptr) {
+        fprintf(stderr, "checkasm: malloc failed\n");
+        destroy_func_tree(state.funcs);
+        exit(1);
+    }
+    return ptr;
+}
+
+/* Get the suffix of the specified cpu flag */
+static const char *cpu_suffix(const unsigned cpu) {
+    for (int i = (int)(sizeof(cpus) / sizeof(*cpus)) - 2; i >= 0; i--)
+        if (cpu & cpus[i].flag)
+            return cpus[i].suffix;
+
+    return "c";
+}
+
+#ifdef readtime
+static int cmp_nop(const void *a, const void *b) {
+    return *(const uint16_t*)a - *(const uint16_t*)b;
+}
+
+/* Measure the overhead of the timing code (in decicycles) */
+static double measure_nop_time(void) {
+    uint16_t nops[10000];
+    int nop_sum = 0;
+
+    for (int i = 0; i < 10000; i++) {
+        uint64_t t = readtime();
+        nops[i] = (uint16_t) (readtime() - t);
+    }
+
+    qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
+    for (int i = 2500; i < 7500; i++)
+        nop_sum += nops[i];
+
+    return nop_sum / 5000.0;
+}
+
+static double avg_cycles_per_call(const CheckasmFuncVersion *const v) {
+    if (v->iterations) {
+        const double cycles = (double)v->cycles / v->iterations - state.nop_time;
+        if (cycles > 0.0)
+            return cycles / 4.0; /* 4 calls per iteration */
+    }
+    return 0.0;
+}
+
+/* Print benchmark results */
+static void print_benchs(const CheckasmFunc *const f) {
+    if (f) {
+        print_benchs(f->child[0]);
+
+        /* Only print functions with at least one assembly version */
+        const CheckasmFuncVersion *v = &f->versions;
+        if ((state.bench_c || v->cpu || v->next) && v->iterations) {
+            const double baseline = avg_cycles_per_call(v);
+            do {
+                const int pad_length = 10 + state.max_function_name_length -
+                    printf("%s_%s:", f->name, cpu_suffix(v->cpu));
+                const double cycles = avg_cycles_per_call(v);
+                const double ratio = cycles ? baseline / cycles : 0.0;
+                printf("%*.1f (%5.2fx)\n", imax(pad_length, 0), cycles, ratio);
+            } while ((v = v->next));
+        }
+
+        print_benchs(f->child[1]);
+    }
+}
+#endif
+
+static void print_functions(const CheckasmFunc *const f) {
+    if (f) {
+        print_functions(f->child[0]);
+        const CheckasmFuncVersion *v = &f->versions;
+        printf("%s (%s", f->name, cpu_suffix(v->cpu));
+        while ((v = v->next))
+            printf(", %s", cpu_suffix(v->cpu));
+        printf(")\n");
+        print_functions(f->child[1]);
+    }
+}
+
+#define is_digit(x) ((x) >= '0' && (x) <= '9')
+
+/* ASCIIbetical sort except preserving natural order for numbers */
+static int cmp_func_names(const char *a, const char *b) {
+    const char *const start = a;
+    int ascii_diff, digit_diff;
+
+    for (; !(ascii_diff = *(const unsigned char*)a -
+                          *(const unsigned char*)b) && *a; a++, b++);
+    for (; is_digit(*a) && is_digit(*b); a++, b++);
+
+    if (a > start && is_digit(a[-1]) &&
+        (digit_diff = is_digit(*a) - is_digit(*b)))
+    {
+        return digit_diff;
+    }
+
+    return ascii_diff;
+}
+
+/* Perform a tree rotation in the specified direction and return the new root */
+static CheckasmFunc *rotate_tree(CheckasmFunc *const f, const int dir) {
+    CheckasmFunc *const r = f->child[dir^1];
+    f->child[dir^1] = r->child[dir];
+    r->child[dir] = f;
+    r->color = f->color;
+    f->color = 0;
+    return r;
+}
+
+#define is_red(f) ((f) && !(f)->color)
+
+/* Balance a left-leaning red-black tree at the specified node */
+static void balance_tree(CheckasmFunc **const root) {
+    CheckasmFunc *const f = *root;
+
+    if (is_red(f->child[0]) && is_red(f->child[1])) {
+        f->color ^= 1;
+        f->child[0]->color = f->child[1]->color = 1;
+    }
+    else if (!is_red(f->child[0]) && is_red(f->child[1]))
+        *root = rotate_tree(f, 0); /* Rotate left */
+    else if (is_red(f->child[0]) && is_red(f->child[0]->child[0]))
+        *root = rotate_tree(f, 1); /* Rotate right */
+}
+
+/* Get a node with the specified name, creating it if it doesn't exist */
+static CheckasmFunc *get_func(CheckasmFunc **const root, const char *const name) {
+    CheckasmFunc *f = *root;
+
+    if (f) {
+        /* Search the tree for a matching node */
+        const int cmp = cmp_func_names(name, f->name);
+        if (cmp) {
+            f = get_func(&f->child[cmp > 0], name);
+
+            /* Rebalance the tree on the way up if a new node was inserted */
+            if (!f->versions.func)
+                balance_tree(root);
+        }
+    } else {
+        /* Allocate and insert a new node into the tree */
+        const size_t name_length = strlen(name) + 1;
+        f = *root = checkasm_malloc(offsetof(CheckasmFunc, name) + name_length);
+        memcpy(f->name, name, name_length);
+    }
+
+    return f;
+}
+
+checkasm_context checkasm_context_buf;
+
+/* Crash handling: attempt to catch crashes and handle them
+ * gracefully instead of just aborting abruptly. */
+#ifdef _WIN32
+static LONG NTAPI signal_handler(EXCEPTION_POINTERS *const e) {
+    if (!state.catch_signals)
+        return EXCEPTION_CONTINUE_SEARCH;
+
+    const char *err;
+    switch (e->ExceptionRecord->ExceptionCode) {
+    case EXCEPTION_FLT_DIVIDE_BY_ZERO:
+    case EXCEPTION_INT_DIVIDE_BY_ZERO:
+        err = "fatal arithmetic error";
+        break;
+    case EXCEPTION_ILLEGAL_INSTRUCTION:
+    case EXCEPTION_PRIV_INSTRUCTION:
+        err = "illegal instruction";
+        break;
+    case EXCEPTION_ACCESS_VIOLATION:
+    case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
+    case EXCEPTION_DATATYPE_MISALIGNMENT:
+    case EXCEPTION_IN_PAGE_ERROR:
+    case EXCEPTION_STACK_OVERFLOW:
+        err = "segmentation fault";
+        break;
+    default:
+        return EXCEPTION_CONTINUE_SEARCH;
+    }
+    state.catch_signals = 0;
+    checkasm_fail_func(err);
+    checkasm_load_context();
+    return EXCEPTION_CONTINUE_EXECUTION; /* never reached, but shuts up gcc */
+}
+#else
+static void signal_handler(const int s) {
+    if (state.catch_signals) {
+        state.catch_signals = 0;
+        checkasm_fail_func(s == SIGFPE ? "fatal arithmetic error" :
+                           s == SIGILL ? "illegal instruction" :
+                                         "segmentation fault");
+        checkasm_load_context();
+    } else {
+        /* fall back to the default signal handler */
+        static const struct sigaction default_sa = { .sa_handler = SIG_DFL };
+        sigaction(s, &default_sa, NULL);
+        raise(s);
+    }
+}
+#endif
+
+/* Compares a string with a wildcard pattern. */
+static int wildstrcmp(const char *str, const char *pattern) {
+    const char *wild = strchr(pattern, '*');
+    if (wild) {
+        const size_t len = wild - pattern;
+        if (strncmp(str, pattern, len)) return 1;
+        while (*++wild == '*');
+        if (!*wild) return 0;
+        str += len;
+        while (*str && wildstrcmp(str, wild)) str++;
+        return !*str;
+    }
+    return strcmp(str, pattern);
+}
+
+/* Perform tests and benchmarks for the specified
+ * cpu flag if supported by the host */
+static void check_cpu_flag(const char *const name, unsigned flag) {
+    const unsigned old_cpu_flag = state.cpu_flag;
+
+    flag |= old_cpu_flag;
+    dav1d_set_cpu_flags_mask(flag);
+    state.cpu_flag = dav1d_get_cpu_flags();
+
+    if (!flag || state.cpu_flag != old_cpu_flag) {
+        state.cpu_flag_name = name;
+        state.suffix_length = (int)strlen(cpu_suffix(flag)) + 1;
+        for (int i = 0; tests[i].func; i++) {
+            if (state.test_pattern && wildstrcmp(tests[i].name, state.test_pattern))
+                continue;
+            xor128_srand(state.seed);
+            state.current_test_name = tests[i].name;
+            tests[i].func();
+        }
+    }
+}
+
+/* Print the name of the current CPU flag, but only do it once */
+static void print_cpu_name(void) {
+    if (state.cpu_flag_name) {
+        color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
+        state.cpu_flag_name = NULL;
+    }
+}
+
+static unsigned get_seed(void) {
+#ifdef _WIN32
+    LARGE_INTEGER i;
+    QueryPerformanceCounter(&i);
+    return i.LowPart;
+#elif defined(__APPLE__)
+    return (unsigned) mach_absolute_time();
+#else
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (unsigned) (1000000000ULL * ts.tv_sec + ts.tv_nsec);
+#endif
+}
+
+int main(int argc, char *argv[]) {
+    state.seed = get_seed();
+
+    while (argc > 1) {
+        if (!strncmp(argv[1], "--help", 6) || !strcmp(argv[1], "-h")) {
+            fprintf(stderr,
+                    "checkasm [options] <random seed>\n"
+                    "    <random seed>              Numeric value to seed the rng\n"
+                    "Options:\n"
+                    "    --affinity=<cpu>           Run the process on CPU <cpu>\n"
+                    "    --test=<pattern>           Test only <pattern>\n"
+                    "    --function=<pattern> -f    Test only the functions matching <pattern>\n"
+                    "    --bench -b                 Benchmark the tested functions\n"
+                    "    --list-functions           List available functions\n"
+                    "    --list-tests               List available tests\n"
+                    "    --bench-c -c               Benchmark the C-only functions\n"
+                    "    --verbose -v               Print verbose output\n");
+            return 0;
+        } else if (!strcmp(argv[1], "--bench-c") || !strcmp(argv[1], "-c")) {
+            state.bench_c = 1;
+        } else if (!strcmp(argv[1], "--bench") || !strcmp(argv[1], "-b")) {
+#ifndef readtime
+            fprintf(stderr,
+                    "checkasm: --bench is not supported on your system\n");
+            return 1;
+#endif
+            state.bench = 1;
+        } else if (!strncmp(argv[1], "--test=", 7)) {
+            state.test_pattern = argv[1] + 7;
+        } else if (!strcmp(argv[1], "-t")) {
+            state.test_pattern = argc > 1 ? argv[2] : "";
+            argc--;
+            argv++;
+        } else if (!strncmp(argv[1], "--function=", 11)) {
+            state.function_pattern = argv[1] + 11;
+        } else if (!strcmp(argv[1], "-f")) {
+            state.function_pattern = argc > 1 ? argv[2] : "";
+            argc--;
+            argv++;
+        } else if (!strcmp(argv[1], "--list-functions")) {
+            state.function_listing = 1;
+        } else if (!strcmp(argv[1], "--list-tests")) {
+            for (int i = 0; tests[i].name; i++)
+                printf("%s\n", tests[i].name);
+            return 0;
+        } else if (!strcmp(argv[1], "--verbose") || !strcmp(argv[1], "-v")) {
+            state.verbose = 1;
+        } else if (!strncmp(argv[1], "--affinity=", 11)) {
+            unsigned long affinity = strtoul(argv[1] + 11, NULL, 16);
+#ifdef _WIN32
+            BOOL (WINAPI *spdcs)(HANDLE, const ULONG*, ULONG) =
+                (void*)GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "SetProcessDefaultCpuSets");
+            HANDLE process = GetCurrentProcess();
+            int affinity_err;
+            if (spdcs) {
+                affinity_err = !spdcs(process, (ULONG[]){ affinity + 256 }, 1);
+            } else {
+                if (affinity < sizeof(DWORD_PTR) * 8)
+                    affinity_err = !SetProcessAffinityMask(process, (DWORD_PTR)1 << affinity);
+                else
+                    affinity_err = 1;
+            }
+            if (affinity_err) {
+                fprintf(stderr, "checkasm: invalid cpu affinity (%lu)\n", affinity);
+                return 1;
+            } else {
+                fprintf(stderr, "checkasm: running on cpu %lu\n", affinity);
+            }
+#elif defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(CPU_SET)
+            cpu_set_t set;
+            CPU_ZERO(&set);
+            CPU_SET(affinity, &set);
+            if (pthread_setaffinity_np(pthread_self(), sizeof(set), &set)) {
+                fprintf(stderr, "checkasm: invalid cpu affinity (%lu)\n", affinity);
+                return 1;
+            } else {
+                fprintf(stderr, "checkasm: running on cpu %lu\n", affinity);
+            }
+#else
+            (void)affinity;
+            fprintf(stderr,
+                    "checkasm: --affinity is not supported on your system\n");
+            return 1;
+#endif
+        } else {
+            state.seed = (unsigned) strtoul(argv[1], NULL, 10);
+        }
+
+        argc--;
+        argv++;
+    }
+
+#if TRIM_DSP_FUNCTIONS
+    fprintf(stderr, "checkasm: reference functions unavailable\n");
+    return 0;
+#endif
+
+    dav1d_init_cpu();
+
+#ifdef _WIN32
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+    AddVectoredExceptionHandler(0, signal_handler);
+#endif
+#else
+    const struct sigaction sa = {
+        .sa_handler = signal_handler,
+        .sa_flags = SA_NODEFER,
+    };
+    sigaction(SIGBUS,  &sa, NULL);
+    sigaction(SIGFPE,  &sa, NULL);
+    sigaction(SIGILL,  &sa, NULL);
+    sigaction(SIGSEGV, &sa, NULL);
+#endif
+
+#ifdef readtime
+    if (state.bench) {
+        static int testing = 0;
+        checkasm_save_context();
+        if (!testing) {
+            checkasm_set_signal_handler_state(1);
+            testing = 1;
+            readtime();
+            checkasm_set_signal_handler_state(0);
+        } else {
+            fprintf(stderr, "checkasm: unable to access cycle counter\n");
+            return 1;
+        }
+    }
+#endif
+
+    int ret = 0;
+
+    if (!state.function_listing) {
+#if ARCH_X86_64
+        void checkasm_warmup_avx2(void);
+        void checkasm_warmup_avx512(void);
+        const unsigned cpu_flags = dav1d_get_cpu_flags();
+        if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX512ICL)
+            state.simd_warmup = checkasm_warmup_avx512;
+        else if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX2)
+            state.simd_warmup = checkasm_warmup_avx2;
+        checkasm_simd_warmup();
+#endif
+#if ARCH_X86
+        unsigned checkasm_init_x86(char *name);
+        char name[48];
+        const unsigned cpuid = checkasm_init_x86(name);
+        for (size_t len = strlen(name); len && name[len-1] == ' '; len--)
+            name[len-1] = '\0'; /* trim trailing whitespace */
+        fprintf(stderr, "checkasm: %s (%08X) using random seed %u\n", name, cpuid, state.seed);
+#else
+        fprintf(stderr, "checkasm: using random seed %u\n", state.seed);
+#endif
+    }
+
+    check_cpu_flag(NULL, 0);
+    for (int i = 0; cpus[i].flag; i++)
+        check_cpu_flag(cpus[i].name, cpus[i].flag);
+
+    if (state.function_listing) {
+        print_functions(state.funcs);
+    } else if (state.num_failed) {
+        fprintf(stderr, "checkasm: %d of %d tests failed\n",
+                state.num_failed, state.num_checked);
+        ret = 1;
+    } else {
+        if (state.num_checked)
+            fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
+        else
+            fprintf(stderr, "checkasm: no tests to perform\n");
+#ifdef readtime
+        if (state.bench && state.max_function_name_length) {
+            state.nop_time = measure_nop_time();
+            if (state.verbose)
+                printf("nop:%*.1f\n", state.max_function_name_length + 6, state.nop_time);
+            print_benchs(state.funcs);
+        }
+#endif
+    }
+
+    destroy_func_tree(state.funcs);
+    return ret;
+}
+
+/* Decide whether or not the specified function needs to be tested and
+ * allocate/initialize data structures if needed. Returns a pointer to a
+ * reference function if the function should be tested, otherwise NULL */
+void *checkasm_check_func(void *const func, const char *const name, ...) {
+    char name_buf[256];
+    va_list arg;
+
+    va_start(arg, name);
+    int name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
+    va_end(arg);
+
+    if (!func || name_length <= 0 || (size_t)name_length >= sizeof(name_buf) ||
+        (state.function_pattern && wildstrcmp(name_buf, state.function_pattern)))
+    {
+        return NULL;
+    }
+
+    state.current_func = get_func(&state.funcs, name_buf);
+
+    state.funcs->color = 1;
+    CheckasmFuncVersion *v = &state.current_func->versions;
+    void *ref = func;
+
+    if (v->func) {
+        CheckasmFuncVersion *prev;
+        do {
+            /* Only test functions that haven't already been tested */
+            if (v->func == func)
+                return NULL;
+
+            if (v->ok)
+                ref = v->func;
+
+            prev = v;
+        } while ((v = v->next));
+
+        v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
+    }
+
+    name_length += state.suffix_length;
+    if (name_length > state.max_function_name_length)
+        state.max_function_name_length = name_length;
+
+    v->func = func;
+    v->ok = 1;
+    v->cpu = state.cpu_flag;
+    state.current_func_ver = v;
+    if (state.function_listing) /* Save function names without running tests */
+        return NULL;
+
+    xor128_srand(state.seed);
+
+    if (state.cpu_flag)
+        state.num_checked++;
+
+    return ref;
+}
+
+/* Decide whether or not the current function needs to be benchmarked */
+int checkasm_bench_func(void) {
+    return !state.num_failed && state.bench;
+}
+
+/* Indicate that the current test has failed, return whether verbose printing
+ * is requested. */
+int checkasm_fail_func(const char *const msg, ...) {
+    if (state.current_func_ver && state.current_func_ver->cpu &&
+        state.current_func_ver->ok)
+    {
+        va_list arg;
+
+        print_cpu_name();
+        fprintf(stderr, "   %s_%s (", state.current_func->name,
+                cpu_suffix(state.current_func_ver->cpu));
+        va_start(arg, msg);
+        vfprintf(stderr, msg, arg);
+        va_end(arg);
+        fprintf(stderr, ")\n");
+
+        state.current_func_ver->ok = 0;
+        state.num_failed++;
+    }
+    return state.verbose;
+}
+
+/* Update benchmark results of the current function */
+void checkasm_update_bench(const int iterations, const uint64_t cycles) {
+    state.current_func_ver->iterations += iterations;
+    state.current_func_ver->cycles += cycles;
+}
+
+/* Print the outcome of all tests performed since
+ * the last time this function was called */
+void checkasm_report(const char *const name, ...) {
+    static int prev_checked, prev_failed;
+    static size_t max_length;
+
+    if (state.num_checked > prev_checked) {
+        int pad_length = (int) max_length + 4;
+        va_list arg;
+
+        print_cpu_name();
+        pad_length -= fprintf(stderr, " - %s.", state.current_test_name);
+        va_start(arg, name);
+        pad_length -= vfprintf(stderr, name, arg);
+        va_end(arg);
+        fprintf(stderr, "%*c", imax(pad_length, 0) + 2, '[');
+
+        if (state.num_failed == prev_failed)
+            color_printf(COLOR_GREEN, "OK");
+        else
+            color_printf(COLOR_RED, "FAILED");
+        fprintf(stderr, "]\n");
+
+        prev_checked = state.num_checked;
+        prev_failed  = state.num_failed;
+    } else if (!state.cpu_flag) {
+        /* Calculate the amount of padding required
+         * to make the output vertically aligned */
+        size_t length = strlen(state.current_test_name);
+        va_list arg;
+
+        va_start(arg, name);
+        length += vsnprintf(NULL, 0, name, arg);
+        va_end(arg);
+
+        if (length > max_length)
+            max_length = length;
+    }
+}
+
+void checkasm_set_signal_handler_state(const int enabled) {
+    state.catch_signals = enabled;
+}
+
+static int check_err(const char *const file, const int line,
+                     const char *const name, const int w, const int h,
+                     int *const err)
+{
+    if (*err)
+        return 0;
+    if (!checkasm_fail_func("%s:%d", file, line))
+        return 1;
+    *err = 1;
+    fprintf(stderr, "%s (%dx%d):\n", name, w, h);
+    return 0;
+}
+
+#define DEF_CHECKASM_CHECK_FUNC(type, fmt) \
+int checkasm_check_##type(const char *const file, const int line, \
+                          const type *buf1, ptrdiff_t stride1, \
+                          const type *buf2, ptrdiff_t stride2, \
+                          const int w, int h, const char *const name, \
+                          const int align_w, const int align_h, \
+                          const int padding) \
+{ \
+    int aligned_w = (w + align_w - 1) & ~(align_w - 1); \
+    int aligned_h = (h + align_h - 1) & ~(align_h - 1); \
+    int err = 0; \
+    stride1 /= sizeof(*buf1); \
+    stride2 /= sizeof(*buf2); \
+    int y = 0; \
+    for (y = 0; y < h; y++) \
+        if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \
+            break; \
+    if (y != h) { \
+        if (check_err(file, line, name, w, h, &err)) \
+            return 1; \
+        for (y = 0; y < h; y++) { \
+            for (int x = 0; x < w; x++) \
+                fprintf(stderr, " " fmt, buf1[x]); \
+            fprintf(stderr, "    "); \
+            for (int x = 0; x < w; x++) \
+                fprintf(stderr, " " fmt, buf2[x]); \
+            fprintf(stderr, "    "); \
+            for (int x = 0; x < w; x++) \
+                fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \
+            buf1 += stride1; \
+            buf2 += stride2; \
+            fprintf(stderr, "\n"); \
+        } \
+        buf1 -= h*stride1; \
+        buf2 -= h*stride2; \
+    } \
+    for (y = -padding; y < 0; y++) \
+        if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
+                   (w + 2*padding)*sizeof(*buf1))) { \
+            if (check_err(file, line, name, w, h, &err)) \
+                return 1; \
+            fprintf(stderr, " overwrite above\n"); \
+            break; \
+        } \
+    for (y = aligned_h; y < aligned_h + padding; y++) \
+        if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
+                   (w + 2*padding)*sizeof(*buf1))) { \
+            if (check_err(file, line, name, w, h, &err)) \
+                return 1; \
+            fprintf(stderr, " overwrite below\n"); \
+            break; \
+        } \
+    for (y = 0; y < h; y++) \
+        if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
+                   padding*sizeof(*buf1))) { \
+            if (check_err(file, line, name, w, h, &err)) \
+                return 1; \
+            fprintf(stderr, " overwrite left\n"); \
+            break; \
+        } \
+    for (y = 0; y < h; y++) \
+        if (memcmp(&buf1[y*stride1 + aligned_w], &buf2[y*stride2 + aligned_w], \
+                   padding*sizeof(*buf1))) { \
+            if (check_err(file, line, name, w, h, &err)) \
+                return 1; \
+            fprintf(stderr, " overwrite right\n"); \
+            break; \
+        } \
+    return err; \
+}
+
+DEF_CHECKASM_CHECK_FUNC(int8_t,   "%4d")
+DEF_CHECKASM_CHECK_FUNC(int16_t,  "%6d")
+DEF_CHECKASM_CHECK_FUNC(int32_t,  "%9d")
+DEF_CHECKASM_CHECK_FUNC(uint8_t,  "%02x")
+DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x")
+DEF_CHECKASM_CHECK_FUNC(uint32_t, "%08x")
+
+#if ARCH_X86_64
+void checkasm_simd_warmup(void)
+{
+    if (state.simd_warmup)
+        state.simd_warmup();
+}
+#endif
diff --git a/third_party/dav1d/tests/checkasm/checkasm.h b/third_party/dav1d/tests/checkasm/checkasm.h
new file mode 100644
index 0000000000..29c1dbe2b9
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/checkasm.h
@@ -0,0 +1,379 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_TESTS_CHECKASM_CHECKASM_H
+#define DAV1D_TESTS_CHECKASM_CHECKASM_H
+
+#include "config.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#if ARCH_X86_64 && defined(_WIN32)
+/* setjmp/longjmp on 64-bit Windows will try to use SEH to unwind the stack,
+ * which doesn't work for assembly functions without unwind information. */
+#include <windows.h>
+#define checkasm_context CONTEXT
+#define checkasm_save_context() RtlCaptureContext(&checkasm_context_buf)
+#define checkasm_load_context() RtlRestoreContext(&checkasm_context_buf, NULL)
+#else
+#include <setjmp.h>
+#define checkasm_context jmp_buf
+#define checkasm_save_context() setjmp(checkasm_context_buf)
+#define checkasm_load_context() longjmp(checkasm_context_buf, 1)
+#endif
+
+#include "include/common/attributes.h"
+#include "include/common/bitdepth.h"
+#include "include/common/intops.h"
+
+int xor128_rand(void);
+#define rnd xor128_rand
+
+#define decl_check_bitfns(name) \
+name##_8bpc(void); \
+name##_16bpc(void)
+
+void checkasm_check_msac(void);
+void checkasm_check_refmvs(void);
+decl_check_bitfns(void checkasm_check_cdef);
+decl_check_bitfns(void checkasm_check_filmgrain);
+decl_check_bitfns(void checkasm_check_ipred);
+decl_check_bitfns(void checkasm_check_itx);
+decl_check_bitfns(void checkasm_check_loopfilter);
+decl_check_bitfns(void checkasm_check_looprestoration);
+decl_check_bitfns(void checkasm_check_mc);
+
+void *checkasm_check_func(void *func, const char *name, ...);
+int checkasm_bench_func(void);
+int checkasm_fail_func(const char *msg, ...);
+void checkasm_update_bench(int iterations, uint64_t cycles);
+void checkasm_report(const char *name, ...);
+void checkasm_set_signal_handler_state(int enabled);
+extern checkasm_context checkasm_context_buf;
+
+/* float compare utilities */
+int float_near_ulp(float a, float b, unsigned max_ulp);
+int float_near_abs_eps(float a, float b, float eps);
+int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp);
+int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
+                         int len);
+int float_near_abs_eps_array(const float *a, const float *b, float eps,
+                             int len);
+int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
+                                 unsigned max_ulp, int len);
+
+#define BENCH_RUNS (1 << 12) /* Trade-off between accuracy and speed */
+
+/* Decide whether or not the specified function needs to be tested */
+#define check_func(func, ...)\
+    (func_ref = checkasm_check_func((func_new = func), __VA_ARGS__))
+
+/* Declare the function prototype. The first argument is the return value,
+ * the remaining arguments are the function parameters. Naming parameters
+ * is optional. */
+#define declare_func(ret, ...)\
+    declare_new(ret, __VA_ARGS__)\
+    void *func_ref, *func_new;\
+    typedef ret func_type(__VA_ARGS__);\
+    checkasm_save_context()
+
+/* Indicate that the current test has failed */
+#define fail() checkasm_fail_func("%s:%d", __FILE__, __LINE__)
+
+/* Print the test outcome */
+#define report checkasm_report
+
+/* Call the reference function */
+#define call_ref(...)\
+    (checkasm_set_signal_handler_state(1),\
+     ((func_type *)func_ref)(__VA_ARGS__));\
+    checkasm_set_signal_handler_state(0)
+
+#if HAVE_ASM
+#if ARCH_X86
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <intrin.h>
+#define readtime() (_mm_lfence(), __rdtsc())
+#else
+static inline uint64_t readtime(void) {
+    uint32_t eax, edx;
+    __asm__ __volatile__("lfence\nrdtsc" : "=a"(eax), "=d"(edx));
+    return (((uint64_t)edx) << 32) | eax;
+}
+#define readtime readtime
+#endif
+#elif (ARCH_AARCH64 || ARCH_ARM) && defined(__APPLE__)
+#include <mach/mach_time.h>
+#define readtime() mach_absolute_time()
+#elif ARCH_AARCH64
+#ifdef _MSC_VER
+#include <windows.h>
+#define readtime() (_InstructionSynchronizationBarrier(), ReadTimeStampCounter())
+#else
+static inline uint64_t readtime(void) {
+    uint64_t cycle_counter;
+    /* This requires enabling user mode access to the cycle counter (which
+     * can only be done from kernel space).
+     * This could also read cntvct_el0 instead of pmccntr_el0; that register
+     * might also be readable (depending on kernel version), but it has much
+     * worse precision (it's a fixed 50 MHz timer). */
+    __asm__ __volatile__("isb\nmrs %0, pmccntr_el0"
+                         : "=r"(cycle_counter)
+                         :: "memory");
+    return cycle_counter;
+}
+#define readtime readtime
+#endif
+#elif ARCH_ARM && !defined(_MSC_VER) && __ARM_ARCH >= 7
+static inline uint64_t readtime(void) {
+    uint32_t cycle_counter;
+    /* This requires enabling user mode access to the cycle counter (which
+     * can only be done from kernel space). */
+    __asm__ __volatile__("isb\nmrc p15, 0, %0, c9, c13, 0"
+                         : "=r"(cycle_counter)
+                         :: "memory");
+    return cycle_counter;
+}
+#define readtime readtime
+#elif ARCH_PPC64LE
+static inline uint64_t readtime(void) {
+    uint32_t tbu, tbl, temp;
+
+    __asm__ __volatile__(
+        "1:\n"
+        "mfspr %2,269\n"
+        "mfspr %0,268\n"
+        "mfspr %1,269\n"
+        "cmpw   %2,%1\n"
+        "bne    1b\n"
+    : "=r"(tbl), "=r"(tbu), "=r"(temp)
+    :
+    : "cc");
+
+    return (((uint64_t)tbu) << 32) | (uint64_t)tbl;
+}
+#define readtime readtime
+#endif
+
+/* Verifies that clobbered callee-saved registers
+ * are properly saved and restored */
+void checkasm_checked_call(void *func, ...);
+
+#if ARCH_X86_64
+/* YMM and ZMM registers on x86 are turned off to save power when they haven't
+ * been used for some period of time. When they are used there will be a
+ * "warmup" period during which performance will be reduced and inconsistent
+ * which is problematic when trying to benchmark individual functions. We can
+ * work around this by periodically issuing "dummy" instructions that uses
+ * those registers to keep them powered on. */
+void checkasm_simd_warmup(void);
+
+/* The upper 32 bits of 32-bit data types are undefined when passed as function
+ * parameters. In practice those bits usually end up being zero which may hide
+ * certain bugs, such as using a register containing undefined bits as a pointer
+ * offset, so we want to intentionally clobber those bits with junk to expose
+ * any issues. The following set of macros automatically calculates a bitmask
+ * specifying which parameters should have their upper halves clobbered. */
+#ifdef _WIN32
+/* Integer and floating-point parameters share "register slots". */
+#define IGNORED_FP_ARGS 0
+#else
+/* Up to 8 floating-point parameters are passed in XMM registers, which are
+ * handled orthogonally from integer parameters passed in GPR registers. */
+#define IGNORED_FP_ARGS 8
+#endif
+#ifdef HAVE_C11_GENERIC
+#define clobber_type(arg) _Generic((void (*)(void*, arg))NULL,\
+     void (*)(void*, int32_t ): clobber_mask |= 1 << mpos++,\
+     void (*)(void*, uint32_t): clobber_mask |= 1 << mpos++,\
+     void (*)(void*, float   ): mpos += (fp_args++ >= IGNORED_FP_ARGS),\
+     void (*)(void*, double  ): mpos += (fp_args++ >= IGNORED_FP_ARGS),\
+     default:                   mpos++)
+#define init_clobber_mask(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, ...)\
+    unsigned clobber_mask = 0;\
+    {\
+        int mpos = 0, fp_args = 0;\
+        clobber_type(a); clobber_type(b); clobber_type(c); clobber_type(d);\
+        clobber_type(e); clobber_type(f); clobber_type(g); clobber_type(h);\
+        clobber_type(i); clobber_type(j); clobber_type(k); clobber_type(l);\
+        clobber_type(m); clobber_type(n); clobber_type(o); clobber_type(p);\
+    }
+#else
+/* Skip parameter clobbering on compilers without support for _Generic() */
+#define init_clobber_mask(...) unsigned clobber_mask = 0
+#endif
+#define declare_new(ret, ...)\
+    ret (*checked_call)(__VA_ARGS__, int, int, int, int, int, int, int,\
+                        int, int, int, int, int, int, int, int, int,\
+                        void*, unsigned) =\
+        (void*)checkasm_checked_call;\
+    init_clobber_mask(__VA_ARGS__, void*, void*, void*, void*,\
+                      void*, void*, void*, void*, void*, void*,\
+                      void*, void*, void*, void*, void*);
+#define call_new(...)\
+    (checkasm_set_signal_handler_state(1),\
+     checkasm_simd_warmup(),\
+     checked_call(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8,\
+                  7, 6, 5, 4, 3, 2, 1, func_new, clobber_mask));\
+    checkasm_set_signal_handler_state(0)
+#elif ARCH_X86_32
+#define declare_new(ret, ...)\
+    ret (*checked_call)(void *, __VA_ARGS__, int, int, int, int, int, int,\
+                        int, int, int, int, int, int, int, int, int) =\
+        (void *)checkasm_checked_call;
+#define call_new(...)\
+    (checkasm_set_signal_handler_state(1),\
+     checked_call(func_new, __VA_ARGS__, 15, 14, 13, 12,\
+                  11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1));\
+    checkasm_set_signal_handler_state(0)
+#elif ARCH_ARM
+/* Use a dummy argument, to offset the real parameters by 2, not only 1.
+ * This makes sure that potential 8-byte-alignment of parameters is kept
+ * the same even when the extra parameters have been removed. */
+void checkasm_checked_call_vfp(void *func, int dummy, ...);
+#define declare_new(ret, ...)\
+    ret (*checked_call)(void *, int dummy, __VA_ARGS__,\
+                        int, int, int, int, int, int, int, int,\
+                        int, int, int, int, int, int, int) =\
+    (void *)checkasm_checked_call_vfp;
+#define call_new(...)\
+    (checkasm_set_signal_handler_state(1),\
+     checked_call(func_new, 0, __VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0));\
+    checkasm_set_signal_handler_state(0)
+#elif ARCH_AARCH64 && !defined(__APPLE__)
+void checkasm_stack_clobber(uint64_t clobber, ...);
+#define declare_new(ret, ...)\
+    ret (*checked_call)(void *, int, int, int, int, int, int, int,\
+                        __VA_ARGS__, int, int, int, int, int, int, int, int,\
+                        int, int, int, int, int, int, int) =\
+    (void *)checkasm_checked_call;
+#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
+#define call_new(...)\
+    (checkasm_set_signal_handler_state(1),\
+     checkasm_stack_clobber(CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+                            CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+                            CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+                            CLOB, CLOB, CLOB, CLOB, CLOB),\
+     checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\
+                  7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0));\
+    checkasm_set_signal_handler_state(0)
+#else
+#define declare_new(ret, ...)
+#define call_new(...)\
+    (checkasm_set_signal_handler_state(1),\
+     ((func_type *)func_new)(__VA_ARGS__));\
+    checkasm_set_signal_handler_state(0)
+#endif
+#else /* HAVE_ASM */
+#define declare_new(ret, ...)
+/* Call the function */
+#define call_new(...)\
+    (checkasm_set_signal_handler_state(1),\
+     ((func_type *)func_new)(__VA_ARGS__));\
+    checkasm_set_signal_handler_state(0)
+#endif /* HAVE_ASM */
+
+/* Benchmark the function */
+#ifdef readtime
+#define bench_new(...)\
+    do {\
+        if (checkasm_bench_func()) {\
+            func_type *const tfunc = func_new;\
+            checkasm_set_signal_handler_state(1);\
+            uint64_t tsum = 0;\
+            int tcount = 0;\
+            for (int ti = 0; ti < BENCH_RUNS; ti++) {\
+                uint64_t t = readtime();\
+                int talt = 0; (void)talt;\
+                tfunc(__VA_ARGS__);\
+                talt = 1;\
+                tfunc(__VA_ARGS__);\
+                talt = 0;\
+                tfunc(__VA_ARGS__);\
+                talt = 1;\
+                tfunc(__VA_ARGS__);\
+                t = readtime() - t;\
+                if (t*tcount <= tsum*4 && ti > 0) {\
+                    tsum += t;\
+                    tcount++;\
+                }\
+            }\
+            checkasm_set_signal_handler_state(0);\
+            checkasm_update_bench(tcount, tsum);\
+        } else {\
+            const int talt = 0; (void)talt;\
+            call_new(__VA_ARGS__);\
+        }\
+    } while (0)
+#else
+#define bench_new(...) do {} while (0)
+#endif
+
+/* Alternates between two pointers. Intended to be used within bench_new()
+ * calls for functions which modifies their input buffer(s) to ensure that
+ * throughput, and not latency, is measured. */
+#define alternate(a, b) (talt ? (b) : (a))
+
+#define ROUND_UP(x,a) (((x)+((a)-1)) & ~((a)-1))
+#define PIXEL_RECT(name, w, h) \
+    ALIGN_STK_64(pixel, name##_buf, ((h)+32)*(ROUND_UP(w,64)+64) + 64,); \
+    ptrdiff_t name##_stride = sizeof(pixel)*(ROUND_UP(w,64)+64); \
+    (void)name##_stride; \
+    pixel *name = name##_buf + (ROUND_UP(w,64)+64)*16 + 64
+
+#define CLEAR_PIXEL_RECT(name) \
+    memset(name##_buf, 0x99, sizeof(name##_buf)) \
+
+#define DECL_CHECKASM_CHECK_FUNC(type) \
+int checkasm_check_##type(const char *const file, const int line, \
+                          const type *const buf1, const ptrdiff_t stride1, \
+                          const type *const buf2, const ptrdiff_t stride2, \
+                          const int w, const int h, const char *const name, \
+                          const int align_w, const int align_h, \
+                          const int padding)
+
+DECL_CHECKASM_CHECK_FUNC(int8_t);
+DECL_CHECKASM_CHECK_FUNC(int16_t);
+DECL_CHECKASM_CHECK_FUNC(int32_t);
+DECL_CHECKASM_CHECK_FUNC(uint8_t);
+DECL_CHECKASM_CHECK_FUNC(uint16_t);
+DECL_CHECKASM_CHECK_FUNC(uint32_t);
+
+#define CONCAT(a,b) a ## b
+
+#define checkasm_check2(prefix, ...) CONCAT(checkasm_check_, prefix)(__FILE__, __LINE__, __VA_ARGS__)
+#define checkasm_check(prefix, ...) checkasm_check2(prefix, __VA_ARGS__, 0, 0, 0)
+
+#ifdef BITDEPTH
+#define checkasm_check_pixel(...) checkasm_check(PIXEL_TYPE, __VA_ARGS__)
+#define checkasm_check_pixel_padded(...) checkasm_check2(PIXEL_TYPE, __VA_ARGS__, 1, 1, 8)
+#define checkasm_check_pixel_padded_align(...) checkasm_check2(PIXEL_TYPE, __VA_ARGS__, 8)
+#define checkasm_check_coef(...)  checkasm_check(COEF_TYPE,  __VA_ARGS__)
+#endif
+
+#endif /* DAV1D_TESTS_CHECKASM_CHECKASM_H */
diff --git a/third_party/dav1d/tests/checkasm/filmgrain.c b/third_party/dav1d/tests/checkasm/filmgrain.c
new file mode 100644
index 0000000000..638e83fd11
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/filmgrain.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright © 2019, VideoLAN and dav1d authors
+ * Copyright © 2019, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "tests/checkasm/checkasm.h"
+
+#include <string.h>
+
+#include "src/levels.h"
+#include "src/filmgrain.h"
+#define UNIT_TEST 1
+#include "src/fg_apply_tmpl.c"
+
+#if BITDEPTH == 8
+#define checkasm_check_entry(...) checkasm_check(int8_t, __VA_ARGS__)
+#else
+#define checkasm_check_entry(...) checkasm_check(int16_t, __VA_ARGS__)
+#endif
+
+static const char ss_name[][4] = {
+    [DAV1D_PIXEL_LAYOUT_I420 - 1] = "420",
+    [DAV1D_PIXEL_LAYOUT_I422 - 1] = "422",
+    [DAV1D_PIXEL_LAYOUT_I444 - 1] = "444",
+};
+
+static void check_gen_grny(const Dav1dFilmGrainDSPContext *const dsp) {
+    ALIGN_STK_16(entry, grain_lut_c, GRAIN_HEIGHT,[GRAIN_WIDTH]);
+    ALIGN_STK_16(entry, grain_lut_a, GRAIN_HEIGHT + 1,[GRAIN_WIDTH]);
+
+    declare_func(void, entry grain_lut[][GRAIN_WIDTH],
+                 const Dav1dFilmGrainData *data HIGHBD_DECL_SUFFIX);
+
+    for (int i = 0; i < 4; i++) {
+        if (check_func(dsp->generate_grain_y, "gen_grain_y_ar%d_%dbpc", i, BITDEPTH)) {
+            ALIGN_STK_16(Dav1dFilmGrainData, fg_data, 1,);
+            fg_data[0].seed = rnd() & 0xFFFF;
+
+#if BITDEPTH == 16
+            const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#endif
+
+            fg_data[0].grain_scale_shift = rnd() & 3;
+            fg_data[0].ar_coeff_shift = (rnd() & 3) + 6;
+            fg_data[0].ar_coeff_lag = i;
+            const int num_y_pos = 2 * fg_data[0].ar_coeff_lag * (fg_data[0].ar_coeff_lag + 1);
+            for (int n = 0; n < num_y_pos; n++)
+                fg_data[0].ar_coeffs_y[n] = (rnd() & 0xff) - 128;
+
+            call_ref(grain_lut_c, fg_data HIGHBD_TAIL_SUFFIX);
+            call_new(grain_lut_a, fg_data HIGHBD_TAIL_SUFFIX);
+            checkasm_check_entry(grain_lut_c[0], sizeof(entry) * GRAIN_WIDTH,
+                                 grain_lut_a[0], sizeof(entry) * GRAIN_WIDTH,
+                                 GRAIN_WIDTH, GRAIN_HEIGHT, "grain_lut");
+
+            bench_new(grain_lut_a, fg_data HIGHBD_TAIL_SUFFIX);
+        }
+    }
+
+    report("gen_grain_y");
+}
+
+static void check_gen_grnuv(const Dav1dFilmGrainDSPContext *const dsp) {
+    ALIGN_STK_16(entry, grain_lut_y, GRAIN_HEIGHT + 1,[GRAIN_WIDTH]);
+    ALIGN_STK_16(entry, grain_lut_c, GRAIN_HEIGHT,    [GRAIN_WIDTH]);
+    ALIGN_STK_16(entry, grain_lut_a, GRAIN_HEIGHT + 1,[GRAIN_WIDTH]);
+
+    declare_func(void, entry grain_lut[][GRAIN_WIDTH],
+                 const entry grain_lut_y[][GRAIN_WIDTH],
+                 const Dav1dFilmGrainData *data, intptr_t uv HIGHBD_DECL_SUFFIX);
+
+    for (int layout_idx = 0; layout_idx < 3; layout_idx++) {
+        const enum Dav1dPixelLayout layout = layout_idx + 1;
+        const int ss_x = layout != DAV1D_PIXEL_LAYOUT_I444;
+        const int ss_y = layout == DAV1D_PIXEL_LAYOUT_I420;
+
+        for (int i = 0; i < 4; i++) {
+            if (check_func(dsp->generate_grain_uv[layout_idx],
+                           "gen_grain_uv_ar%d_%dbpc_%s",
+                           i, BITDEPTH, ss_name[layout_idx]))
+            {
+                ALIGN_STK_16(Dav1dFilmGrainData, fg_data, 1,);
+                fg_data[0].seed = rnd() & 0xFFFF;
+
+#if BITDEPTH == 16
+                const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#endif
+
+                fg_data[0].num_y_points = rnd() & 1;
+                fg_data[0].grain_scale_shift = rnd() & 3;
+                fg_data[0].ar_coeff_shift = (rnd() & 3) + 6;
+                fg_data[0].ar_coeff_lag = i;
+                const int num_y_pos = 2 * fg_data[0].ar_coeff_lag * (fg_data[0].ar_coeff_lag + 1);
+                for (int n = 0; n < num_y_pos; n++)
+                    fg_data[0].ar_coeffs_y[n] = (rnd() & 0xff) - 128;
+                dsp->generate_grain_y(grain_lut_y, fg_data HIGHBD_TAIL_SUFFIX);
+
+                const int uv = rnd() & 1;
+                const int num_uv_pos = num_y_pos + !!fg_data[0].num_y_points;
+                for (int n = 0; n < num_uv_pos; n++)
+                    fg_data[0].ar_coeffs_uv[uv][n] = (rnd() & 0xff) - 128;
+                if (!fg_data[0].num_y_points)
+                    fg_data[0].ar_coeffs_uv[uv][num_uv_pos] = 0;
+                memset(grain_lut_c, 0xff, sizeof(grain_lut_c));
+                memset(grain_lut_a, 0xff, sizeof(grain_lut_a));
+                call_ref(grain_lut_c, grain_lut_y, fg_data, uv HIGHBD_TAIL_SUFFIX);
+                call_new(grain_lut_a, grain_lut_y, fg_data, uv HIGHBD_TAIL_SUFFIX);
+                int w = ss_x ? 44 : GRAIN_WIDTH;
+                int h = ss_y ? 38 : GRAIN_HEIGHT;
+                checkasm_check_entry(grain_lut_c[0], sizeof(entry) * GRAIN_WIDTH,
+                                     grain_lut_a[0], sizeof(entry) * GRAIN_WIDTH,
+                                     w, h, "grain_lut");
+
+                bench_new(grain_lut_a, grain_lut_y, fg_data, uv HIGHBD_TAIL_SUFFIX);
+            }
+        }
+    }
+
+    report("gen_grain_uv");
+}
+
+static void check_fgy_sbrow(const Dav1dFilmGrainDSPContext *const dsp) {
+    PIXEL_RECT(c_dst, 128, 32);
+    PIXEL_RECT(a_dst, 128, 32);
+    PIXEL_RECT(src,   128, 32);
+    const ptrdiff_t stride = c_dst_stride;
+
+    declare_func(void, pixel *dst_row, const pixel *src_row, ptrdiff_t stride,
+                 const Dav1dFilmGrainData *data, size_t pw,
+                 const uint8_t scaling[SCALING_SIZE],
+                 const entry grain_lut[][GRAIN_WIDTH],
+                 int bh, int row_num HIGHBD_DECL_SUFFIX);
+
+    if (check_func(dsp->fgy_32x32xn, "fgy_32x32xn_%dbpc", BITDEPTH)) {
+        ALIGN_STK_16(Dav1dFilmGrainData, fg_data, 16,);
+        ALIGN_STK_16(entry, grain_lut, GRAIN_HEIGHT + 1,[GRAIN_WIDTH]);
+        ALIGN_STK_64(uint8_t, scaling, SCALING_SIZE,);
+        fg_data[0].seed = rnd() & 0xFFFF;
+
+#if BITDEPTH == 16
+        const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+        const int bitdepth_max = 0xff;
+#endif
+
+        fg_data[0].grain_scale_shift = rnd() & 3;
+        fg_data[0].ar_coeff_shift = (rnd() & 3) + 6;
+        fg_data[0].ar_coeff_lag = rnd() & 3;
+        const int num_y_pos = 2 * fg_data[0].ar_coeff_lag * (fg_data[0].ar_coeff_lag + 1);
+        for (int n = 0; n < num_y_pos; n++)
+            fg_data[0].ar_coeffs_y[n] = (rnd() & 0xff) - 128;
+        dsp->generate_grain_y(grain_lut, fg_data HIGHBD_TAIL_SUFFIX);
+
+        fg_data[0].num_y_points = 2 + (rnd() % 13);
+        const int pad = 0xff / fg_data[0].num_y_points;
+        for (int n = 0; n < fg_data[0].num_y_points; n++) {
+            fg_data[0].y_points[n][0] = 0xff * n / fg_data[0].num_y_points;
+            fg_data[0].y_points[n][0] += rnd() % pad;
+            fg_data[0].y_points[n][1] = rnd() & 0xff;
+        }
+        generate_scaling(bitdepth_from_max(bitdepth_max), fg_data[0].y_points,
+                         fg_data[0].num_y_points, scaling);
+
+        fg_data[0].clip_to_restricted_range = rnd() & 1;
+        fg_data[0].scaling_shift = (rnd() & 3) + 8;
+        for (fg_data[0].overlap_flag = 0; fg_data[0].overlap_flag <= 1;
+             fg_data[0].overlap_flag++)
+        {
+            for (int i = 0; i <= 2 * fg_data[0].overlap_flag; i++) {
+                int w, h, row_num;
+                if (fg_data[0].overlap_flag) {
+                    w = 35 + (rnd() % 93);
+                    if (i == 0) {
+                        row_num = 0;
+                        h = 1 + (rnd() % 31);
+                    } else {
+                        row_num = 1 + (rnd() & 0x7ff);
+                        if (i == 1) {
+                            h = 3 + (rnd() % 30);
+                        } else {
+                            h = 1 + (rnd() & 1);
+                        }
+                    }
+                } else {
+                    w = 1 + (rnd() & 127);
+                    h = 1 + (rnd() & 31);
+                    row_num = rnd() & 0x7ff;
+                }
+
+                for (int y = 0; y < 32; y++) {
+                    // Src pixels past the right edge can be uninitialized
+                    for (int x = 0; x < 128; x++)
+                        src[y * PXSTRIDE(stride) + x] = rnd();
+                    for (int x = 0; x < w; x++)
+                        src[y * PXSTRIDE(stride) + x] &= bitdepth_max;
+                }
+
+                CLEAR_PIXEL_RECT(c_dst);
+                CLEAR_PIXEL_RECT(a_dst);
+                call_ref(c_dst, src, stride, fg_data, w, scaling, grain_lut, h,
+                         row_num HIGHBD_TAIL_SUFFIX);
+                call_new(a_dst, src, stride, fg_data, w, scaling, grain_lut, h,
+                         row_num HIGHBD_TAIL_SUFFIX);
+
+                checkasm_check_pixel_padded_align(c_dst, stride, a_dst, stride,
+                                                  w, h, "dst", 32, 2);
+            }
+        }
+        fg_data[0].overlap_flag = 1;
+        for (int y = 0; y < 32; y++) {
+            // Make sure all pixels are in range
+            for (int x = 0; x < 128; x++)
+                src[y * PXSTRIDE(stride) + x] &= bitdepth_max;
+        }
+        bench_new(a_dst, src, stride, fg_data, 64, scaling, grain_lut, 32,
+                  1 HIGHBD_TAIL_SUFFIX);
+    }
+
+    report("fgy_32x32xn");
+}
+
+static void check_fguv_sbrow(const Dav1dFilmGrainDSPContext *const dsp) {
+    PIXEL_RECT(c_dst,    128, 32);
+    PIXEL_RECT(a_dst,    128, 32);
+    PIXEL_RECT(src,      128, 32);
+    PIXEL_RECT(luma_src, 128, 32);
+    const ptrdiff_t lstride = luma_src_stride;
+
+    declare_func(void, pixel *dst_row, const pixel *src_row, ptrdiff_t stride,
+                 const Dav1dFilmGrainData *data, size_t pw,
+                 const uint8_t scaling[SCALING_SIZE],
+                 const entry grain_lut[][GRAIN_WIDTH], int bh, int row_num,
+                 const pixel *luma_row, ptrdiff_t luma_stride, int uv_pl,
+                 int is_identity HIGHBD_DECL_SUFFIX);
+
+    for (int layout_idx = 0; layout_idx < 3; layout_idx++) {
+        const enum Dav1dPixelLayout layout = layout_idx + 1;
+        const int ss_x = layout != DAV1D_PIXEL_LAYOUT_I444;
+        const int ss_y = layout == DAV1D_PIXEL_LAYOUT_I420;
+        const ptrdiff_t stride = c_dst_stride;
+
+        for (int csfl = 0; csfl <= 1; csfl++) {
+            if (check_func(dsp->fguv_32x32xn[layout_idx],
+                           "fguv_32x32xn_%dbpc_%s_csfl%d",
+                           BITDEPTH, ss_name[layout_idx], csfl))
+            {
+                ALIGN_STK_16(Dav1dFilmGrainData, fg_data, 1,);
+                ALIGN_STK_16(entry, grain_lut, 2,[GRAIN_HEIGHT + 1][GRAIN_WIDTH]);
+                ALIGN_STK_64(uint8_t, scaling, SCALING_SIZE,);
+
+                fg_data[0].seed = rnd() & 0xFFFF;
+
+#if BITDEPTH == 16
+                const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                const int bitdepth_max = 0xff;
+#endif
+                const int uv_pl = rnd() & 1;
+                const int is_identity = rnd() & 1;
+
+                fg_data[0].grain_scale_shift = rnd() & 3;
+                fg_data[0].ar_coeff_shift = (rnd() & 3) + 6;
+                fg_data[0].ar_coeff_lag = rnd() & 3;
+                fg_data[0].num_y_points = csfl ? 2 + (rnd() % 13) : 0;
+                const int num_y_pos = 2 * fg_data[0].ar_coeff_lag * (fg_data[0].ar_coeff_lag + 1);
+                for (int n = 0; n < num_y_pos; n++)
+                    fg_data[0].ar_coeffs_y[n] = (rnd() & 0xff) - 128;
+                const int num_uv_pos = num_y_pos + 1;
+                for (int n = 0; n < num_uv_pos; n++)
+                    fg_data[0].ar_coeffs_uv[uv_pl][n] = (rnd() & 0xff) - 128;
+                dsp->generate_grain_y(grain_lut[0], fg_data HIGHBD_TAIL_SUFFIX);
+                dsp->generate_grain_uv[layout_idx](grain_lut[1], grain_lut[0],
+                                                   fg_data, uv_pl HIGHBD_TAIL_SUFFIX);
+
+                if (csfl) {
+                    const int pad = 0xff / fg_data[0].num_y_points;
+                    for (int n = 0; n < fg_data[0].num_y_points; n++) {
+                        fg_data[0].y_points[n][0] = 0xff * n / fg_data[0].num_y_points;
+                        fg_data[0].y_points[n][0] += rnd() % pad;
+                        fg_data[0].y_points[n][1] = rnd() & 0xff;
+                    }
+                    generate_scaling(bitdepth_from_max(bitdepth_max), fg_data[0].y_points,
+                                     fg_data[0].num_y_points, scaling);
+                } else {
+                    fg_data[0].num_uv_points[uv_pl] = 2 + (rnd() % 9);
+                    const int pad = 0xff / fg_data[0].num_uv_points[uv_pl];
+                    for (int n = 0; n < fg_data[0].num_uv_points[uv_pl]; n++) {
+                        fg_data[0].uv_points[uv_pl][n][0] = 0xff * n / fg_data[0].num_uv_points[uv_pl];
+                        fg_data[0].uv_points[uv_pl][n][0] += rnd() % pad;
+                        fg_data[0].uv_points[uv_pl][n][1] = rnd() & 0xff;
+                    }
+                    generate_scaling(bitdepth_from_max(bitdepth_max), fg_data[0].uv_points[uv_pl],
+                                     fg_data[0].num_uv_points[uv_pl], scaling);
+
+                    fg_data[0].uv_mult[uv_pl] = (rnd() & 0xff) - 128;
+                    fg_data[0].uv_luma_mult[uv_pl] = (rnd() & 0xff) - 128;
+                    fg_data[0].uv_offset[uv_pl] = (rnd() & 0x1ff) - 256;
+                }
+
+                fg_data[0].clip_to_restricted_range = rnd() & 1;
+                fg_data[0].scaling_shift = (rnd() & 3) + 8;
+                fg_data[0].chroma_scaling_from_luma = csfl;
+                for (fg_data[0].overlap_flag = 0; fg_data[0].overlap_flag <= 1;
+                     fg_data[0].overlap_flag++)
+                {
+                    for (int i = 0; i <= 2 * fg_data[0].overlap_flag; i++) {
+                        int w, h, row_num;
+                        if (fg_data[0].overlap_flag) {
+                            w = (36 >> ss_x) + (rnd() % (92 >> ss_x));
+                            if (i == 0) {
+                                row_num = 0;
+                                h = 1 + (rnd() & (31 >> ss_y));
+                            } else {
+                                row_num = 1 + (rnd() & 0x7ff);
+                                if (i == 1) {
+                                    h = (ss_y ? 2 : 3) + (rnd() % (ss_y ? 15 : 30));
+                                } else {
+                                    h = ss_y ? 1 : 1 + (rnd() & 1);
+                                }
+                            }
+                        } else {
+                            w = 1 + (rnd() & (127 >> ss_x));
+                            h = 1 + (rnd() & (31 >> ss_y));
+                            row_num = rnd() & 0x7ff;
+                        }
+
+                        for (int y = 0; y < 32; y++) {
+                            // Src pixels past the right edge can be uninitialized
+                            for (int x = 0; x < 128; x++) {
+                                src[y * PXSTRIDE(stride) + x] = rnd();
+                                luma_src[y * PXSTRIDE(lstride) + x] = rnd();
+                            }
+                            for (int x = 0; x < w; x++)
+                                src[y * PXSTRIDE(stride) + x] &= bitdepth_max;
+                            for (int x = 0; x < (w << ss_x); x++)
+                                luma_src[y * PXSTRIDE(lstride) + x] &= bitdepth_max;
+                        }
+
+                        CLEAR_PIXEL_RECT(c_dst);
+                        CLEAR_PIXEL_RECT(a_dst);
+                        call_ref(c_dst, src, stride, fg_data, w, scaling, grain_lut[1], h,
+                                 row_num, luma_src, lstride, uv_pl, is_identity HIGHBD_TAIL_SUFFIX);
+                        call_new(a_dst, src, stride, fg_data, w, scaling, grain_lut[1], h,
+                                 row_num, luma_src, lstride, uv_pl, is_identity HIGHBD_TAIL_SUFFIX);
+
+                        checkasm_check_pixel_padded_align(c_dst, stride,
+                                                          a_dst, stride,
+                                                          w, h, "dst",
+                                                          32 >> ss_x, 4);
+                    }
+                }
+
+                fg_data[0].overlap_flag = 1;
+                for (int y = 0; y < 32; y++) {
+                    // Make sure all pixels are in range
+                    for (int x = 0; x < 128; x++) {
+                        src[y * PXSTRIDE(stride) + x] &= bitdepth_max;
+                        luma_src[y * PXSTRIDE(lstride) + x] &= bitdepth_max;
+                    }
+                }
+                bench_new(a_dst, src, stride, fg_data, 64 >> ss_x, scaling, grain_lut[1], 32 >> ss_y,
+                          1, luma_src, lstride, uv_pl, is_identity HIGHBD_TAIL_SUFFIX);
+            }
+        }
+    }
+
+    report("fguv_32x32xn");
+}
+
+void bitfn(checkasm_check_filmgrain)(void) {
+    Dav1dFilmGrainDSPContext c;
+
+    bitfn(dav1d_film_grain_dsp_init)(&c);
+
+    check_gen_grny(&c);
+    check_gen_grnuv(&c);
+    check_fgy_sbrow(&c);
+    check_fguv_sbrow(&c);
+}
diff --git a/third_party/dav1d/tests/checkasm/ipred.c b/third_party/dav1d/tests/checkasm/ipred.c
new file mode 100644
index 0000000000..3676b809b3
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/ipred.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "tests/checkasm/checkasm.h"
+#include "src/ipred.h"
+#include "src/levels.h"
+
+#include <stdio.h>
+
+static const char *const intra_pred_mode_names[N_IMPL_INTRA_PRED_MODES] = {
+    [DC_PRED]       = "dc",
+    [DC_128_PRED]   = "dc_128",
+    [TOP_DC_PRED]   = "dc_top",
+    [LEFT_DC_PRED]  = "dc_left",
+    [HOR_PRED]      = "h",
+    [VERT_PRED]     = "v",
+    [PAETH_PRED]    = "paeth",
+    [SMOOTH_PRED]   = "smooth",
+    [SMOOTH_V_PRED] = "smooth_v",
+    [SMOOTH_H_PRED] = "smooth_h",
+    [Z1_PRED]       = "z1",
+    [Z2_PRED]       = "z2",
+    [Z3_PRED]       = "z3",
+    [FILTER_PRED]   = "filter"
+};
+
+static const char *const cfl_ac_names[3] = { "420", "422", "444" };
+
+static const char *const cfl_pred_mode_names[DC_128_PRED + 1] = {
+    [DC_PRED]       = "cfl",
+    [DC_128_PRED]   = "cfl_128",
+    [TOP_DC_PRED]   = "cfl_top",
+    [LEFT_DC_PRED]  = "cfl_left",
+};
+
+static const uint8_t z_angles[27] = {
+     3,  6,  9,
+    14, 17, 20, 23, 26, 29, 32,
+    36, 39, 42, 45, 48, 51, 54,
+    58, 61, 64, 67, 70, 73, 76,
+    81, 84, 87
+};
+
+static void check_intra_pred(Dav1dIntraPredDSPContext *const c) {
+    PIXEL_RECT(c_dst, 64, 64);
+    PIXEL_RECT(a_dst, 64, 64);
+    ALIGN_STK_64(pixel, topleft_buf, 257,);
+    pixel *const topleft = topleft_buf + 128;
+
+    declare_func(void, pixel *dst, ptrdiff_t stride, const pixel *topleft,
+                 int width, int height, int angle, int max_width, int max_height
+                 HIGHBD_DECL_SUFFIX);
+
+    for (int mode = 0; mode < N_IMPL_INTRA_PRED_MODES; mode++) {
+        int bpc_min = BITDEPTH, bpc_max = BITDEPTH;
+        if (mode == FILTER_PRED && BITDEPTH == 16) {
+            bpc_min = 10;
+            bpc_max = 12;
+        }
+        for (int bpc = bpc_min; bpc <= bpc_max; bpc += 2)
+            for (int w = 4; w <= (mode == FILTER_PRED ? 32 : 64); w <<= 1)
+                if (check_func(c->intra_pred[mode], "intra_pred_%s_w%d_%dbpc",
+                    intra_pred_mode_names[mode], w, bpc))
+                {
+                    for (int h = imax(w / 4, 4); h <= imin(w * 4,
+                        (mode == FILTER_PRED ? 32 : 64)); h <<= 1)
+                    {
+                        const ptrdiff_t stride = c_dst_stride;
+                        int nb_iters = (mode >= Z1_PRED && mode <= Z3_PRED) ? 5 : 1;
+
+                        for (int iter = 0; iter < nb_iters; iter++) {
+                            int a = 0, maxw = 0, maxh = 0;
+                            if (mode >= Z1_PRED && mode <= Z3_PRED) { /* angle */
+                                a = (90 * (mode - Z1_PRED) + z_angles[rnd() % 27]) |
+                                    (rnd() & 0x600);
+                                if (mode == Z2_PRED) {
+                                    maxw = rnd(), maxh = rnd();
+                                    maxw = 1 + (maxw & (maxw & 4096 ? 4095 : w - 1));
+                                    maxh = 1 + (maxh & (maxh & 4096 ? 4095 : h - 1));
+                                }
+                            } else if (mode == FILTER_PRED) /* filter_idx */
+                                a = (rnd() % 5) | (rnd() & ~511);
+
+                            int bitdepth_max;
+                            if (bpc == 16)
+                                bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+                            else
+                                bitdepth_max = (1 << bpc) - 1;
+
+                            for (int i = -h * 2; i <= w * 2; i++)
+                                topleft[i] = rnd() & bitdepth_max;
+
+                            CLEAR_PIXEL_RECT(c_dst);
+                            CLEAR_PIXEL_RECT(a_dst);
+                            call_ref(c_dst, stride, topleft, w, h, a, maxw, maxh
+                                     HIGHBD_TAIL_SUFFIX);
+                            call_new(a_dst, stride, topleft, w, h, a, maxw, maxh
+                                     HIGHBD_TAIL_SUFFIX);
+                            if (checkasm_check_pixel_padded(c_dst, stride,
+                                                            a_dst, stride,
+                                                            w, h, "dst"))
+                            {
+                                if (mode == Z1_PRED || mode == Z3_PRED)
+                                    fprintf(stderr, "angle = %d (0x%03x)\n",
+                                            a & 0x1ff, a & 0x600);
+                                else if (mode == Z2_PRED)
+                                    fprintf(stderr, "angle = %d (0x%03x), "
+                                            "max_width = %d, max_height = %d\n",
+                                            a & 0x1ff, a & 0x600, maxw, maxh);
+                                else if (mode == FILTER_PRED)
+                                    fprintf(stderr, "filter_idx = %d\n", a & 0x1ff);
+                                break;
+                            }
+
+                            bench_new(a_dst, stride, topleft, w, h, a, 128, 128
+                                      HIGHBD_TAIL_SUFFIX);
+                        }
+                    }
+                }
+    }
+    report("intra_pred");
+}
+
+static void check_cfl_ac(Dav1dIntraPredDSPContext *const c) {
+    ALIGN_STK_64(int16_t, c_dst, 32 * 32,);
+    ALIGN_STK_64(int16_t, a_dst, 32 * 32,);
+    ALIGN_STK_64(pixel, luma, 32 * 32,);
+
+    declare_func(void, int16_t *ac, const pixel *y, ptrdiff_t stride,
+                 int w_pad, int h_pad, int cw, int ch);
+
+    for (int layout = 1; layout <= DAV1D_PIXEL_LAYOUT_I444; layout++) {
+        const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
+        const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
+        const int h_step = 2 >> ss_hor, v_step = 2 >> ss_ver;
+        for (int w = 4; w <= (32 >> ss_hor); w <<= 1)
+            if (check_func(c->cfl_ac[layout - 1], "cfl_ac_%s_w%d_%dbpc",
+                cfl_ac_names[layout - 1], w, BITDEPTH))
+            {
+                for (int h = imax(w / 4, 4);
+                     h <= imin(w * 4, (32 >> ss_ver)); h <<= 1)
+                {
+                    const ptrdiff_t stride = 32 * sizeof(pixel);
+                    for (int w_pad = imax((w >> 2) - h_step, 0);
+                         w_pad >= 0; w_pad -= h_step)
+                    {
+                        for (int h_pad = imax((h >> 2) - v_step, 0);
+                             h_pad >= 0; h_pad -= v_step)
+                        {
+#if BITDEPTH == 16
+                            const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                            const int bitdepth_max = 0xff;
+#endif
+                            for (int y = 0; y < (h << ss_ver); y++)
+                                for (int x = 0; x < (w << ss_hor); x++)
+                                    luma[y * 32 + x] = rnd() & bitdepth_max;
+
+                            call_ref(c_dst, luma, stride, w_pad, h_pad, w, h);
+                            call_new(a_dst, luma, stride, w_pad, h_pad, w, h);
+                            checkasm_check(int16_t, c_dst, w * sizeof(*c_dst),
+                                                    a_dst, w * sizeof(*a_dst),
+                                                    w, h, "dst");
+                        }
+                    }
+
+                    bench_new(a_dst, luma, stride, 0, 0, w, h);
+                }
+            }
+    }
+    report("cfl_ac");
+}
+
+static void check_cfl_pred(Dav1dIntraPredDSPContext *const c) {
+    PIXEL_RECT(c_dst, 32, 32);
+    PIXEL_RECT(a_dst, 32, 32);
+    ALIGN_STK_64(int16_t, ac, 32 * 32,);
+    ALIGN_STK_64(pixel, topleft_buf, 257,);
+    pixel *const topleft = topleft_buf + 128;
+
+    declare_func(void, pixel *dst, ptrdiff_t stride, const pixel *topleft,
+                 int width, int height, const int16_t *ac, int alpha
+                 HIGHBD_DECL_SUFFIX);
+
+    for (int mode = 0; mode <= DC_128_PRED; mode += 1 + 2 * !mode)
+        for (int w = 4; w <= 32; w <<= 1)
+            if (check_func(c->cfl_pred[mode], "cfl_pred_%s_w%d_%dbpc",
+                cfl_pred_mode_names[mode], w, BITDEPTH))
+            {
+                for (int h = imax(w / 4, 4); h <= imin(w * 4, 32); h <<= 1)
+                {
+#if BITDEPTH == 16
+                    const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                    const int bitdepth_max = 0xff;
+#endif
+
+                    int alpha = ((rnd() & 15) + 1) * (1 - (rnd() & 2));
+
+                    for (int i = -h * 2; i <= w * 2; i++)
+                        topleft[i] = rnd() & bitdepth_max;
+
+                    int luma_avg = w * h >> 1;
+                    for (int i = 0; i < w * h; i++)
+                        luma_avg += ac[i] = rnd() & (bitdepth_max << 3);
+                    luma_avg /= w * h;
+                    for (int i = 0; i < w * h; i++)
+                        ac[i] -= luma_avg;
+
+                    CLEAR_PIXEL_RECT(c_dst);
+                    CLEAR_PIXEL_RECT(a_dst);
+
+                    call_ref(c_dst, c_dst_stride, topleft, w, h, ac, alpha
+                             HIGHBD_TAIL_SUFFIX);
+                    call_new(a_dst, a_dst_stride, topleft, w, h, ac, alpha
+                             HIGHBD_TAIL_SUFFIX);
+                    checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
+                                                w, h, "dst");
+
+                    bench_new(a_dst, a_dst_stride, topleft, w, h, ac, alpha
+                              HIGHBD_TAIL_SUFFIX);
+                }
+            }
+    report("cfl_pred");
+}
+
+static void check_pal_pred(Dav1dIntraPredDSPContext *const c) {
+    PIXEL_RECT(c_dst, 64, 64);
+    PIXEL_RECT(a_dst, 64, 64);
+    ALIGN_STK_64(uint8_t, idx, 64 * 64,);
+    ALIGN_STK_16(uint16_t, pal, 8,);
+
+    declare_func(void, pixel *dst, ptrdiff_t stride, const uint16_t *pal,
+                 const uint8_t *idx, int w, int h);
+
+    for (int w = 4; w <= 64; w <<= 1)
+        if (check_func(c->pal_pred, "pal_pred_w%d_%dbpc", w, BITDEPTH))
+            for (int h = imax(w / 4, 4); h <= imin(w * 4, 64); h <<= 1)
+            {
+#if BITDEPTH == 16
+                const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                const int bitdepth_max = 0xff;
+#endif
+
+                for (int i = 0; i < 8; i++)
+                    pal[i] = rnd() & bitdepth_max;
+
+                for (int i = 0; i < w * h; i++)
+                    idx[i] = rnd() & 7;
+
+                CLEAR_PIXEL_RECT(c_dst);
+                CLEAR_PIXEL_RECT(a_dst);
+
+                call_ref(c_dst, c_dst_stride, pal, idx, w, h);
+                call_new(a_dst, a_dst_stride, pal, idx, w, h);
+                checkasm_check_pixel_padded(c_dst, c_dst_stride,
+                                            a_dst, a_dst_stride, w, h, "dst");
+
+                bench_new(a_dst, a_dst_stride, pal, idx, w, h);
+            }
+    report("pal_pred");
+}
+
+void bitfn(checkasm_check_ipred)(void) {
+    Dav1dIntraPredDSPContext c;
+    bitfn(dav1d_intra_pred_dsp_init)(&c);
+
+    check_intra_pred(&c);
+    check_cfl_ac(&c);
+    check_cfl_pred(&c);
+    check_pal_pred(&c);
+}
diff --git a/third_party/dav1d/tests/checkasm/itx.c b/third_party/dav1d/tests/checkasm/itx.c
new file mode 100644
index 0000000000..c7cc411ff5
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/itx.c
@@ -0,0 +1,318 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "tests/checkasm/checkasm.h"
+
+#include <math.h>
+
+#include "src/itx.h"
+#include "src/levels.h"
+#include "src/scan.h"
+#include "src/tables.h"
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+#ifndef M_SQRT1_2
+#define M_SQRT1_2 0.707106781186547524401
+#endif
+
+enum Tx1D { DCT, ADST, FLIPADST, IDENTITY, WHT };
+
+static const uint8_t itx_1d_types[N_TX_TYPES_PLUS_LL][2] = {
+    [DCT_DCT]           = { DCT,      DCT      },
+    [ADST_DCT]          = { DCT,      ADST     },
+    [DCT_ADST]          = { ADST,     DCT      },
+    [ADST_ADST]         = { ADST,     ADST     },
+    [FLIPADST_DCT]      = { DCT,      FLIPADST },
+    [DCT_FLIPADST]      = { FLIPADST, DCT      },
+    [FLIPADST_FLIPADST] = { FLIPADST, FLIPADST },
+    [ADST_FLIPADST]     = { FLIPADST, ADST     },
+    [FLIPADST_ADST]     = { ADST,     FLIPADST },
+    [IDTX]              = { IDENTITY, IDENTITY },
+    [V_DCT]             = { IDENTITY, DCT      },
+    [H_DCT]             = { DCT,      IDENTITY },
+    [V_ADST]            = { IDENTITY, ADST     },
+    [H_ADST]            = { ADST,     IDENTITY },
+    [V_FLIPADST]        = { IDENTITY, FLIPADST },
+    [H_FLIPADST]        = { FLIPADST, IDENTITY },
+    [WHT_WHT]           = { WHT,      WHT      },
+};
+
+static const char *const itx_1d_names[5] = {
+    [DCT]      = "dct",
+    [ADST]     = "adst",
+    [FLIPADST] = "flipadst",
+    [IDENTITY] = "identity",
+    [WHT]      = "wht"
+};
+
+static const double scaling_factors[9] = {
+    4.0000,             /*  4x4                          */
+    4.0000 * M_SQRT1_2, /*  4x8   8x4                    */
+    2.0000,             /*  4x16  8x8  16x4              */
+    2.0000 * M_SQRT1_2, /*        8x16 16x8              */
+    1.0000,             /*        8x32 16x16 32x8        */
+    0.5000 * M_SQRT1_2, /*             16x32 32x16       */
+    0.2500,             /*             16x64 32x32 64x16 */
+    0.1250 * M_SQRT1_2, /*                   32x64 64x32 */
+    0.0625,             /*                         64x64 */
+};
+
+/* FIXME: Ensure that those forward transforms are similar to the real AV1
+ * transforms. The FLIPADST currently uses the ADST forward transform for
+ * example which is obviously "incorrect", but we're just using it for now
+ * since it does produce coefficients in the correct range at least. */
+
+/* DCT-II */
+static void fdct_1d(double *const out, const double *const in, const int sz) {
+    for (int i = 0; i < sz; i++) {
+        out[i] = 0.0;
+        for (int j = 0; j < sz; j++)
+            out[i] += in[j] * cos(M_PI * (2 * j + 1) * i / (sz * 2.0));
+    }
+    out[0] *= M_SQRT1_2;
+}
+
+/* See "Towards jointly optimal spatial prediction and adaptive transform in
+ * video/image coding", by J. Han, A. Saxena, and K. Rose
+ * IEEE Proc. ICASSP, pp. 726-729, Mar. 2010.
+ * and "A Butterfly Structured Design of The Hybrid Transform Coding Scheme",
+ * by Jingning Han, Yaowu Xu, and Debargha Mukherjee
+ * http://research.google.com/pubs/archive/41418.pdf
+ */
+static void fadst_1d(double *const out, const double *const in, const int sz) {
+    for (int i = 0; i < sz; i++) {
+        out[i] = 0.0;
+        for (int j = 0; j < sz; j++)
+            out[i] += in[j] * sin(M_PI *
+            (sz == 4 ? (    j + 1) * (2 * i + 1) / (8.0 + 1.0) :
+                       (2 * j + 1) * (2 * i + 1) / (sz * 4.0)));
+    }
+}
+
+static void fwht4_1d(double *const out, const double *const in)
+{
+    const double t0 = in[0] + in[1];
+    const double t3 = in[3] - in[2];
+    const double t4 = (t0 - t3) * 0.5;
+    const double t1 = t4 - in[1];
+    const double t2 = t4 - in[2];
+    out[0] = t0 - t2;
+    out[1] = t2;
+    out[2] = t3 + t1;
+    out[3] = t1;
+}
+
+static int copy_subcoefs(coef *coeff,
+                         const enum RectTxfmSize tx, const enum TxfmType txtp,
+                         const int sw, const int sh, const int subsh)
+{
+    /* copy the topleft coefficients such that the return value (being the
+     * coefficient scantable index for the eob token) guarantees that only
+     * the topleft $sub out of $sz (where $sz >= $sub) coefficients in both
+     * dimensions are non-zero. This leads to braching to specific optimized
+     * simd versions (e.g. dc-only) so that we get full asm coverage in this
+     * test */
+
+    const enum TxClass tx_class = dav1d_tx_type_class[txtp];
+    const uint16_t *const scan = dav1d_scans[tx];
+    const int sub_high = subsh > 0 ? subsh * 8 - 1 : 0;
+    const int sub_low  = subsh > 1 ? sub_high - 8 : 0;
+    int n, eob;
+
+    for (n = 0, eob = 0; n < sw * sh; n++) {
+        int rc, rcx, rcy;
+        if (tx_class == TX_CLASS_2D)
+            rc = scan[n], rcx = rc % sh, rcy = rc / sh;
+        else if (tx_class == TX_CLASS_H)
+            rcx = n % sh, rcy = n / sh, rc = n;
+        else /* tx_class == TX_CLASS_V */
+            rcx = n / sw, rcy = n % sw, rc = rcy * sh + rcx;
+
+        /* Pick a random eob within this sub-itx */
+        if (rcx > sub_high || rcy > sub_high) {
+            break; /* upper boundary */
+        } else if (!eob && (rcx > sub_low || rcy > sub_low))
+            eob = n; /* lower boundary */
+    }
+
+    if (eob)
+        eob += rnd() % (n - eob - 1);
+    if (tx_class == TX_CLASS_2D)
+        for (n = eob + 1; n < sw * sh; n++)
+            coeff[scan[n]] = 0;
+    else if (tx_class == TX_CLASS_H)
+        for (n = eob + 1; n < sw * sh; n++)
+            coeff[n] = 0;
+    else /* tx_class == TX_CLASS_V */ {
+        for (int rcx = eob / sw, rcy = eob % sw; rcx < sh; rcx++, rcy = -1)
+            while (++rcy < sw)
+                coeff[rcy * sh + rcx] = 0;
+        n = sw * sh;
+    }
+    for (; n < 32 * 32; n++)
+        coeff[n] = rnd();
+    return eob;
+}
+
+static int ftx(coef *const buf, const enum RectTxfmSize tx,
+               const enum TxfmType txtp, const int w, const int h,
+               const int subsh, const int bitdepth_max)
+{
+    double out[64 * 64], temp[64 * 64];
+    const double scale = scaling_factors[ctz(w * h) - 4];
+    const int sw = imin(w, 32), sh = imin(h, 32);
+
+    for (int i = 0; i < h; i++) {
+        double in[64], temp_out[64];
+
+        for (int i = 0; i < w; i++)
+            in[i] = (rnd() & (2 * bitdepth_max + 1)) - bitdepth_max;
+
+        switch (itx_1d_types[txtp][0]) {
+        case DCT:
+            fdct_1d(temp_out, in, w);
+            break;
+        case ADST:
+        case FLIPADST:
+            fadst_1d(temp_out, in, w);
+            break;
+        case WHT:
+            fwht4_1d(temp_out, in);
+            break;
+        case IDENTITY:
+            memcpy(temp_out, in, w * sizeof(*temp_out));
+            break;
+        }
+
+        for (int j = 0; j < w; j++)
+            temp[j * h + i] = temp_out[j] * scale;
+    }
+
+    for (int i = 0; i < w; i++) {
+        switch (itx_1d_types[txtp][0]) {
+        case DCT:
+            fdct_1d(&out[i * h], &temp[i * h], h);
+            break;
+        case ADST:
+        case FLIPADST:
+            fadst_1d(&out[i * h], &temp[i * h], h);
+            break;
+        case WHT:
+            fwht4_1d(&out[i * h], &temp[i * h]);
+            break;
+        case IDENTITY:
+            memcpy(&out[i * h], &temp[i * h], h * sizeof(*out));
+            break;
+        }
+    }
+
+    for (int y = 0; y < sh; y++)
+        for (int x = 0; x < sw; x++)
+            buf[y * sw + x] = (coef) (out[y * w + x] + 0.5);
+
+    return copy_subcoefs(buf, tx, txtp, sw, sh, subsh);
+}
+
+static void check_itxfm_add(Dav1dInvTxfmDSPContext *const c,
+                            const enum RectTxfmSize tx)
+{
+    ALIGN_STK_64(coef, coeff, 2, [32 * 32]);
+    PIXEL_RECT(c_dst, 64, 64);
+    PIXEL_RECT(a_dst, 64, 64);
+
+    static const uint8_t subsh_iters[5] = { 2, 2, 3, 5, 5 };
+
+    const int w = dav1d_txfm_dimensions[tx].w * 4;
+    const int h = dav1d_txfm_dimensions[tx].h * 4;
+    const int subsh_max = subsh_iters[imax(dav1d_txfm_dimensions[tx].lw,
+                                           dav1d_txfm_dimensions[tx].lh)];
+#if BITDEPTH == 16
+    const int bpc_min = 10, bpc_max = 12;
+#else
+    const int bpc_min = 8, bpc_max = 8;
+#endif
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, coef *coeff,
+                 int eob HIGHBD_DECL_SUFFIX);
+
+    for (int bpc = bpc_min; bpc <= bpc_max; bpc += 2) {
+        bitfn(dav1d_itx_dsp_init)(c, bpc);
+        for (enum TxfmType txtp = 0; txtp < N_TX_TYPES_PLUS_LL; txtp++)
+            for (int subsh = 0; subsh < subsh_max; subsh++)
+                if (check_func(c->itxfm_add[tx][txtp],
+                               "inv_txfm_add_%dx%d_%s_%s_%d_%dbpc",
+                               w, h, itx_1d_names[itx_1d_types[txtp][0]],
+                               itx_1d_names[itx_1d_types[txtp][1]], subsh,
+                               bpc))
+                {
+                    const int bitdepth_max = (1 << bpc) - 1;
+                    const int eob = ftx(coeff[0], tx, txtp, w, h, subsh, bitdepth_max);
+                    memcpy(coeff[1], coeff[0], sizeof(*coeff));
+
+                    CLEAR_PIXEL_RECT(c_dst);
+                    CLEAR_PIXEL_RECT(a_dst);
+
+                    for (int y = 0; y < h; y++)
+                        for (int x = 0; x < w; x++)
+                            c_dst[y*PXSTRIDE(c_dst_stride) + x] =
+                            a_dst[y*PXSTRIDE(a_dst_stride) + x] = rnd() & bitdepth_max;
+
+                    call_ref(c_dst, c_dst_stride, coeff[0], eob
+                             HIGHBD_TAIL_SUFFIX);
+                    call_new(a_dst, a_dst_stride, coeff[1], eob
+                             HIGHBD_TAIL_SUFFIX);
+
+                    checkasm_check_pixel_padded(c_dst, c_dst_stride,
+                                                a_dst, a_dst_stride,
+                                                w, h, "dst");
+                    if (memcmp(coeff[0], coeff[1], sizeof(*coeff)))
+                        fail();
+
+                    bench_new(alternate(c_dst, a_dst), a_dst_stride,
+                              alternate(coeff[0], coeff[1]), eob HIGHBD_TAIL_SUFFIX);
+                }
+    }
+    report("add_%dx%d", w, h);
+}
+
+void bitfn(checkasm_check_itx)(void) {
+    static const uint8_t txfm_size_order[N_RECT_TX_SIZES] = {
+        TX_4X4,   RTX_4X8,  RTX_4X16,
+        RTX_8X4,  TX_8X8,   RTX_8X16,  RTX_8X32,
+        RTX_16X4, RTX_16X8, TX_16X16,  RTX_16X32, RTX_16X64,
+                  RTX_32X8, RTX_32X16, TX_32X32,  RTX_32X64,
+                            RTX_64X16, RTX_64X32, TX_64X64
+    };
+
+    /* Zero unused function pointer elements. */
+    Dav1dInvTxfmDSPContext c = { { { 0 } } };
+
+    for (int i = 0; i < N_RECT_TX_SIZES; i++)
+        check_itxfm_add(&c, txfm_size_order[i]);
+}
diff --git a/third_party/dav1d/tests/checkasm/loopfilter.c b/third_party/dav1d/tests/checkasm/loopfilter.c
new file mode 100644
index 0000000000..7d70d0648c
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/loopfilter.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "tests/checkasm/checkasm.h"
+
+#include <string.h>
+
+#include "src/levels.h"
+#include "src/loopfilter.h"
+
+static void init_lpf_border(pixel *const dst, const ptrdiff_t stride,
+                            int E, int I, const int bitdepth_max)
+{
+    const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
+    const int F = 1 << bitdepth_min_8;
+    E <<= bitdepth_min_8;
+    I <<= bitdepth_min_8;
+
+    const int filter_type = rnd() % 4;
+    const int edge_diff = rnd() % ((E + 2) * 4) - 2 * (E + 2);
+    switch (filter_type) {
+    case 0: // random, unfiltered
+        for (int i = -8; i < 8; i++)
+            dst[i * stride] = rnd() & bitdepth_max;
+        break;
+    case 1: // long flat
+        dst[-8 * stride] = rnd() & bitdepth_max;
+        dst[+7 * stride] = rnd() & bitdepth_max;
+        dst[+0 * stride] = rnd() & bitdepth_max;
+        dst[-1 * stride] = iclip_pixel(dst[+0 * stride] + edge_diff);
+        for (int i = 1; i < 7; i++) {
+            dst[-(1 + i) * stride] = iclip_pixel(dst[-1 * stride] +
+                                                 rnd() % (2 * (F + 1)) - (F + 1));
+            dst[+(0 + i) * stride] = iclip_pixel(dst[+0 * stride] +
+                                                 rnd() % (2 * (F + 1)) - (F + 1));
+        }
+        break;
+    case 2: // short flat
+        for (int i = 4; i < 8; i++) {
+            dst[-(1 + i) * stride] = rnd() & bitdepth_max;
+            dst[+(0 + i) * stride] = rnd() & bitdepth_max;
+        }
+        dst[+0 * stride] = rnd() & bitdepth_max;
+        dst[-1 * stride] = iclip_pixel(dst[+0 * stride] + edge_diff);
+        for (int i = 1; i < 4; i++) {
+            dst[-(1 + i) * stride] = iclip_pixel(dst[-1 * stride] +
+                                                 rnd() % (2 * (F + 1)) - (F + 1));
+            dst[+(0 + i) * stride] = iclip_pixel(dst[+0 * stride] +
+                                                 rnd() % (2 * (F + 1)) - (F + 1));
+        }
+        break;
+    case 3: // normal or hev
+        for (int i = 4; i < 8; i++) {
+            dst[-(1 + i) * stride] = rnd() & bitdepth_max;
+            dst[+(0 + i) * stride] = rnd() & bitdepth_max;
+        }
+        dst[+0 * stride] = rnd() & bitdepth_max;
+        dst[-1 * stride] = iclip_pixel(dst[+0 * stride] + edge_diff);
+        for (int i = 1; i < 4; i++) {
+            dst[-(1 + i) * stride] = iclip_pixel(dst[-(0 + i) * stride] +
+                                                 rnd() % (2 * (I + 1)) - (I + 1));
+            dst[+(0 + i) * stride] = iclip_pixel(dst[+(i - 1) * stride] +
+                                                 rnd() % (2 * (I + 1)) - (I + 1));
+        }
+        break;
+    }
+}
+
+static void check_lpf_sb(loopfilter_sb_fn fn, const char *const name,
+                         const int n_blks, const int lf_idx,
+                         const int is_chroma, const int dir)
+{
+    ALIGN_STK_64(pixel, c_dst_mem, 128 * 16,);
+    ALIGN_STK_64(pixel, a_dst_mem, 128 * 16,);
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const uint32_t *mask,
+                 const uint8_t (*l)[4], ptrdiff_t b4_stride,
+                 const Av1FilterLUT *lut, int w HIGHBD_DECL_SUFFIX);
+
+    pixel *a_dst, *c_dst;
+    ptrdiff_t stride, b4_stride;
+    int w, h;
+    if (dir) {
+        a_dst = a_dst_mem + n_blks * 4 * 8;
+        c_dst = c_dst_mem + n_blks * 4 * 8;
+        w = n_blks * 4;
+        h = 16;
+        b4_stride = 32;
+    } else {
+        a_dst = a_dst_mem + 8;
+        c_dst = c_dst_mem + 8;
+        w = 16;
+        h = n_blks * 4;
+        b4_stride = 2;
+    }
+    stride = w * sizeof(pixel);
+
+    Av1FilterLUT lut;
+    const int sharp = rnd() & 7;
+    for (int level = 0; level < 64; level++) {
+        int limit = level;
+
+        if (sharp > 0) {
+            limit >>= (sharp + 3) >> 2;
+            limit = imin(limit, 9 - sharp);
+        }
+        limit = imax(limit, 1);
+
+        lut.i[level] = limit;
+        lut.e[level] = 2 * (level + 2) + limit;
+    }
+    lut.sharp[0] = (sharp + 3) >> 2;
+    lut.sharp[1] = sharp ? 9 - sharp : 0xff;
+
+    const int n_strengths = is_chroma ? 2 : 3;
+    for (int i = 0; i < n_strengths; i++) {
+        if (check_func(fn, "%s_w%d_%dbpc", name,
+                       is_chroma ? 4 + 2 * i : 4 << i, BITDEPTH))
+        {
+            uint32_t vmask[4] = { 0 };
+            uint8_t l[32 * 2][4];
+
+            for (int j = 0; j < n_blks; j++) {
+                const int idx = rnd() % (i + 2);
+                if (idx) vmask[idx - 1] |= 1U << j;
+                if (dir) {
+                    l[j][lf_idx] = rnd() & 63;
+                    l[j + 32][lf_idx] = rnd() & 63;
+                } else {
+                    l[j * 2][lf_idx] = rnd() & 63;
+                    l[j * 2 + 1][lf_idx] = rnd() & 63;
+                }
+            }
+#if BITDEPTH == 16
+            const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+            const int bitdepth_max = 0xff;
+#endif
+
+            for (int i = 0; i < 4 * n_blks; i++) {
+                const int x = i >> 2;
+                int L;
+                if (dir) {
+                    L = l[32 + x][lf_idx] ? l[32 + x][lf_idx] : l[x][lf_idx];
+                } else {
+                    L = l[2 * x + 1][lf_idx] ? l[2 * x + 1][lf_idx] : l[2 * x][lf_idx];
+                }
+                init_lpf_border(c_dst + i * (dir ? 1 : 16), dir ? n_blks * 4 : 1,
+                                lut.e[L], lut.i[L], bitdepth_max);
+            }
+            memcpy(a_dst_mem, c_dst_mem, 128 * sizeof(pixel) * 16);
+
+            call_ref(c_dst, stride, vmask,
+                     (const uint8_t(*)[4]) &l[dir ? 32 : 1][lf_idx],
+                     b4_stride, &lut, n_blks HIGHBD_TAIL_SUFFIX);
+            call_new(a_dst, stride, vmask,
+                     (const uint8_t(*)[4]) &l[dir ? 32 : 1][lf_idx],
+                     b4_stride, &lut, n_blks HIGHBD_TAIL_SUFFIX);
+
+            checkasm_check_pixel(c_dst_mem, stride, a_dst_mem, stride,
+                                 w, h, "dst");
+            bench_new(alternate(c_dst, a_dst), stride, vmask,
+                      (const uint8_t(*)[4]) &l[dir ? 32 : 1][lf_idx],
+                      b4_stride, &lut, n_blks HIGHBD_TAIL_SUFFIX);
+        }
+    }
+    report(name);
+}
+
+void bitfn(checkasm_check_loopfilter)(void) {
+    Dav1dLoopFilterDSPContext c;
+
+    bitfn(dav1d_loop_filter_dsp_init)(&c);
+
+    check_lpf_sb(c.loop_filter_sb[0][0], "lpf_h_sb_y", 32, 0, 0, 0);
+    check_lpf_sb(c.loop_filter_sb[0][1], "lpf_v_sb_y", 32, 1, 0, 1);
+    check_lpf_sb(c.loop_filter_sb[1][0], "lpf_h_sb_uv", 16, 2, 1, 0);
+    check_lpf_sb(c.loop_filter_sb[1][1], "lpf_v_sb_uv", 16, 2, 1, 1);
+}
diff --git a/third_party/dav1d/tests/checkasm/looprestoration.c b/third_party/dav1d/tests/checkasm/looprestoration.c
new file mode 100644
index 0000000000..d84f3c476a
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/looprestoration.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "tests/checkasm/checkasm.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "src/levels.h"
+#include "src/looprestoration.h"
+#include "src/tables.h"
+
+static int to_binary(int x) { /* 0-15 -> 0000-1111 */
+    return (x & 1) + 5 * (x & 2) + 25 * (x & 4) + 125 * (x & 8);
+}
+
+static void init_tmp(pixel *buf, const ptrdiff_t stride,
+                     const int w, const int h, const int bitdepth_max)
+{
+    const int noise_mask = bitdepth_max >> 4;
+    const int x_off = rnd() & 7, y_off = rnd() & 7;
+
+    for (int y = 0; y < h; y++) {
+        for (int x = 0; x < w; x++) {
+            buf[x] = (((x + x_off) ^ (y + y_off)) & 8 ? bitdepth_max : 0) ^
+                     (rnd() & noise_mask);
+        }
+        buf += PXSTRIDE(stride);
+    }
+}
+
+static void check_wiener(Dav1dLoopRestorationDSPContext *const c, const int bpc) {
+    ALIGN_STK_64(pixel, c_src, 448 * 64 + 64,), *const c_dst = c_src + 64;
+    ALIGN_STK_64(pixel, a_src, 448 * 64 + 64,), *const a_dst = a_src + 64;
+    ALIGN_STK_64(pixel, edge_buf, 448 * 8 + 64,), *const h_edge = edge_buf + 64;
+    pixel left[64][4];
+    LooprestorationParams params;
+    int16_t (*const filter)[8] = params.filter;
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride,
+                 const pixel (*const left)[4],
+                 const pixel *lpf, int w, int h,
+                 const LooprestorationParams *params,
+                 enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX);
+
+    for (int t = 0; t < 2; t++) {
+        if (check_func(c->wiener[t], "wiener_%dtap_%dbpc", t ? 5 : 7, bpc)) {
+            filter[0][0] = filter[0][6] = t ? 0 : (rnd() & 15) - 5;
+            filter[0][1] = filter[0][5] = (rnd() & 31) - 23;
+            filter[0][2] = filter[0][4] = (rnd() & 63) - 17;
+            filter[0][3] = -(filter[0][0] + filter[0][1] + filter[0][2]) * 2;
+#if BITDEPTH != 8
+            filter[0][3] += 128;
+#endif
+
+            filter[1][0] = filter[1][6] = t ? 0 : (rnd() & 15) - 5;
+            filter[1][1] = filter[1][5] = (rnd() & 31) - 23;
+            filter[1][2] = filter[1][4] = (rnd() & 63) - 17;
+            filter[1][3] = 128 - (filter[1][0] + filter[1][1] + filter[1][2]) * 2;
+
+            const int base_w = 1 + (rnd() % 384);
+            const int base_h = 1 + (rnd() & 63);
+            const int bitdepth_max = (1 << bpc) - 1;
+
+            init_tmp(c_src, 448 * sizeof(pixel), 448, 64, bitdepth_max);
+            init_tmp(edge_buf, 448 * sizeof(pixel), 448, 8, bitdepth_max);
+            init_tmp((pixel *) left, 4 * sizeof(pixel), 4, 64, bitdepth_max);
+
+            for (enum LrEdgeFlags edges = 0; edges <= 0xf; edges++) {
+                const int w = edges & LR_HAVE_RIGHT ? 256 : base_w;
+                const int h = edges & LR_HAVE_BOTTOM ? 64 : base_h;
+
+                memcpy(a_src, c_src, 448 * 64 * sizeof(pixel));
+
+                call_ref(c_dst, 448 * sizeof(pixel), left,
+                         h_edge, w, h, &params, edges HIGHBD_TAIL_SUFFIX);
+                call_new(a_dst, 448 * sizeof(pixel), left,
+                         h_edge, w, h, &params, edges HIGHBD_TAIL_SUFFIX);
+                if (checkasm_check_pixel(c_dst, 448 * sizeof(pixel),
+                                         a_dst, 448 * sizeof(pixel),
+                                         w, h, "dst"))
+                {
+                    fprintf(stderr, "size = %dx%d, edges = %04d\n",
+                            w, h, to_binary(edges));
+                    break;
+                }
+            }
+            bench_new(alternate(c_dst, a_dst), 448 * sizeof(pixel), left,
+                      h_edge, 256, 64, &params, 0xf HIGHBD_TAIL_SUFFIX);
+        }
+    }
+}
+
+static void check_sgr(Dav1dLoopRestorationDSPContext *const c, const int bpc) {
+    ALIGN_STK_64(pixel, c_src, 448 * 64 + 64,), *const c_dst = c_src + 64;
+    ALIGN_STK_64(pixel, a_src, 448 * 64 + 64,), *const a_dst = a_src + 64;
+    ALIGN_STK_64(pixel, edge_buf, 448 * 8 + 64,), *const h_edge = edge_buf + 64;
+    pixel left[64][4];
+    LooprestorationParams params;
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride,
+                 const pixel (*const left)[4],
+                 const pixel *lpf, int w, int h,
+                 const LooprestorationParams *params,
+                 enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX);
+
+    static const struct { char name[4]; uint8_t idx; } sgr_data[3] = {
+        { "5x5", 14 },
+        { "3x3", 10 },
+        { "mix",  0 },
+    };
+
+    for (int i = 0; i < 3; i++) {
+        if (check_func(c->sgr[i], "sgr_%s_%dbpc", sgr_data[i].name, bpc)) {
+            const uint16_t *const sgr_params = dav1d_sgr_params[sgr_data[i].idx];
+            params.sgr.s0 = sgr_params[0];
+            params.sgr.s1 = sgr_params[1];
+            params.sgr.w0 = sgr_params[0] ? (rnd() & 127) - 96 : 0;
+            params.sgr.w1 = (sgr_params[1] ? 160 - (rnd() & 127) : 33) - params.sgr.w0;
+
+            const int base_w = 1 + (rnd() % 384);
+            const int base_h = 1 + (rnd() & 63);
+            const int bitdepth_max = (1 << bpc) - 1;
+
+            init_tmp(c_src, 448 * sizeof(pixel), 448, 64, bitdepth_max);
+            init_tmp(edge_buf, 448 * sizeof(pixel), 448, 8, bitdepth_max);
+            init_tmp((pixel *) left, 4 * sizeof(pixel), 4, 64, bitdepth_max);
+
+            for (enum LrEdgeFlags edges = 0; edges <= 0xf; edges++) {
+                const int w = edges & LR_HAVE_RIGHT ? 256 : base_w;
+                const int h = edges & LR_HAVE_BOTTOM ? 64 : base_h;
+
+                memcpy(a_src, c_src, 448 * 64 * sizeof(pixel));
+
+                call_ref(c_dst, 448 * sizeof(pixel), left, h_edge,
+                         w, h, &params, edges HIGHBD_TAIL_SUFFIX);
+                call_new(a_dst, 448 * sizeof(pixel), left, h_edge,
+                         w, h, &params, edges HIGHBD_TAIL_SUFFIX);
+                if (checkasm_check_pixel(c_dst, 448 * sizeof(pixel),
+                                         a_dst, 448 * sizeof(pixel),
+                                         w, h, "dst"))
+                {
+                    fprintf(stderr, "size = %dx%d, edges = %04d\n",
+                            w, h, to_binary(edges));
+                    break;
+                }
+            }
+            bench_new(alternate(c_dst, a_dst), 448 * sizeof(pixel), left,
+                      h_edge, 256, 64, &params, 0xf HIGHBD_TAIL_SUFFIX);
+        }
+    }
+}
+
+void bitfn(checkasm_check_looprestoration)(void) {
+#if BITDEPTH == 16
+    const int bpc_min = 10, bpc_max = 12;
+#else
+    const int bpc_min = 8, bpc_max = 8;
+#endif
+    for (int bpc = bpc_min; bpc <= bpc_max; bpc += 2) {
+        Dav1dLoopRestorationDSPContext c;
+        bitfn(dav1d_loop_restoration_dsp_init)(&c, bpc);
+        check_wiener(&c, bpc);
+    }
+    report("wiener");
+    for (int bpc = bpc_min; bpc <= bpc_max; bpc += 2) {
+        Dav1dLoopRestorationDSPContext c;
+        bitfn(dav1d_loop_restoration_dsp_init)(&c, bpc);
+        check_sgr(&c, bpc);
+    }
+    report("sgr");
+}
diff --git a/third_party/dav1d/tests/checkasm/mc.c b/third_party/dav1d/tests/checkasm/mc.c
new file mode 100644
index 0000000000..047ef7b4a4
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/mc.c
@@ -0,0 +1,790 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "tests/checkasm/checkasm.h"
+
+#include "src/levels.h"
+#include "src/mc.h"
+
+static const char *const filter_names[] = {
+    "8tap_regular",        "8tap_regular_smooth", "8tap_regular_sharp",
+    "8tap_sharp_regular",  "8tap_sharp_smooth",   "8tap_sharp",
+    "8tap_smooth_regular", "8tap_smooth",         "8tap_smooth_sharp",
+    "bilinear"
+};
+
+static const char *const mxy_names[] = { "0", "h", "v", "hv" };
+static const char *const scaled_paths[] = { "", "_dy1", "_dy2" };
+
+static int mc_h_next(const int h) {
+    switch (h) {
+    case 4:
+    case 8:
+    case 16:
+        return (h * 3) >> 1;
+    case 6:
+    case 12:
+    case 24:
+        return (h & (h - 1)) * 2;
+    default:
+        return h * 2;
+    }
+}
+
+static void check_mc(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(pixel, src_buf, 135 * 135,);
+    PIXEL_RECT(c_dst, 128, 128);
+    PIXEL_RECT(a_dst, 128, 128);
+    const pixel *src = src_buf + 135 * 3 + 3;
+    const ptrdiff_t src_stride = 135 * sizeof(pixel);
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
+                 ptrdiff_t src_stride, int w, int h, int mx, int my
+                 HIGHBD_DECL_SUFFIX);
+
+    for (int filter = 0; filter < N_2D_FILTERS; filter++)
+        for (int w = 2; w <= 128; w <<= 1) {
+            for (int mxy = 0; mxy < 4; mxy++)
+                if (check_func(c->mc[filter], "mc_%s_w%d_%s_%dbpc",
+                    filter_names[filter], w, mxy_names[mxy], BITDEPTH))
+                {
+                    const int h_min = w <= 32 ? 2 : w / 4;
+                    const int h_max = imax(imin(w * 4, 128), 32);
+                    for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
+                        const int mx = (mxy & 1) ? rnd() % 15 + 1 : 0;
+                        const int my = (mxy & 2) ? rnd() % 15 + 1 : 0;
+#if BITDEPTH == 16
+                        const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                        const int bitdepth_max = 0xff;
+#endif
+
+                        for (int i = 0; i < 135 * 135; i++)
+                            src_buf[i] = rnd() & bitdepth_max;
+
+                        CLEAR_PIXEL_RECT(c_dst);
+                        CLEAR_PIXEL_RECT(a_dst);
+
+                        call_ref(c_dst, c_dst_stride, src, src_stride, w, h,
+                                 mx, my HIGHBD_TAIL_SUFFIX);
+                        call_new(a_dst, a_dst_stride, src, src_stride, w, h,
+                                 mx, my HIGHBD_TAIL_SUFFIX);
+                        checkasm_check_pixel_padded(c_dst, c_dst_stride,
+                                                    a_dst, a_dst_stride,
+                                                    w, h, "dst");
+
+                        if (filter == FILTER_2D_8TAP_REGULAR ||
+                            filter == FILTER_2D_BILINEAR)
+                        {
+                            bench_new(a_dst, a_dst_stride, src, src_stride, w, h,
+                                      mx, my HIGHBD_TAIL_SUFFIX);
+                        }
+                    }
+                }
+        }
+    report("mc");
+}
+
+/* Generate worst case input in the topleft corner, randomize the rest */
+static void generate_mct_input(pixel *const buf, const int bitdepth_max) {
+    static const int8_t pattern[8] = { -1,  0, -1,  0,  0, -1,  0, -1 };
+    const int sign = -(rnd() & 1);
+
+    for (int y = 0; y < 135; y++)
+        for (int x = 0; x < 135; x++)
+            buf[135*y+x] = ((x | y) < 8 ? (pattern[x] ^ pattern[y] ^ sign)
+                                        : rnd()) & bitdepth_max;
+}
+
+static void check_mct(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(pixel, src_buf, 135 * 135,);
+    ALIGN_STK_64(int16_t, c_tmp, 128 * 128,);
+    ALIGN_STK_64(int16_t, a_tmp, 128 * 128,);
+    const pixel *src = src_buf + 135 * 3 + 3;
+    const ptrdiff_t src_stride = 135 * sizeof(pixel);
+
+    declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
+                 int w, int h, int mx, int my HIGHBD_DECL_SUFFIX);
+
+    for (int filter = 0; filter < N_2D_FILTERS; filter++)
+        for (int w = 4; w <= 128; w <<= 1)
+            for (int mxy = 0; mxy < 4; mxy++)
+                if (check_func(c->mct[filter], "mct_%s_w%d_%s_%dbpc",
+                    filter_names[filter], w, mxy_names[mxy], BITDEPTH))
+                    for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
+                    {
+                        const int mx = (mxy & 1) ? rnd() % 15 + 1 : 0;
+                        const int my = (mxy & 2) ? rnd() % 15 + 1 : 0;
+#if BITDEPTH == 16
+                        const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                        const int bitdepth_max = 0xff;
+#endif
+                        generate_mct_input(src_buf, bitdepth_max);
+
+                        call_ref(c_tmp, src, src_stride, w, h,
+                                 mx, my HIGHBD_TAIL_SUFFIX);
+                        call_new(a_tmp, src, src_stride, w, h,
+                                 mx, my HIGHBD_TAIL_SUFFIX);
+                        checkasm_check(int16_t, c_tmp, w * sizeof(*c_tmp),
+                                                a_tmp, w * sizeof(*a_tmp),
+                                                w, h, "tmp");
+
+                        if (filter == FILTER_2D_8TAP_REGULAR ||
+                            filter == FILTER_2D_BILINEAR)
+                        {
+                            bench_new(a_tmp, src, src_stride, w, h,
+                                      mx, my HIGHBD_TAIL_SUFFIX);
+                        }
+                    }
+    report("mct");
+}
+
+static void check_mc_scaled(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(pixel, src_buf, 263 * 263,);
+    PIXEL_RECT(c_dst, 128, 128);
+    PIXEL_RECT(a_dst, 128, 128);
+    const pixel *src = src_buf + 263 * 3 + 3;
+    const ptrdiff_t src_stride = 263 * sizeof(pixel);
+#if BITDEPTH == 16
+    const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+    const int bitdepth_max = 0xff;
+#endif
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
+                 ptrdiff_t src_stride, int w, int h,
+                 int mx, int my, int dx, int dy HIGHBD_DECL_SUFFIX);
+
+    for (int filter = 0; filter < N_2D_FILTERS; filter++)
+        for (int w = 2; w <= 128; w <<= 1) {
+            for (int p = 0; p < 3; ++p) {
+                if (check_func(c->mc_scaled[filter], "mc_scaled_%s_w%d%s_%dbpc",
+                               filter_names[filter], w, scaled_paths[p], BITDEPTH))
+                {
+                    const int h_min = w <= 32 ? 2 : w / 4;
+                    const int h_max = imax(imin(w * 4, 128), 32);
+                    for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
+                        const int mx = rnd() % 1024;
+                        const int my = rnd() % 1024;
+                        const int dx = rnd() % 2048 + 1;
+                        const int dy = !p
+                            ? rnd() % 2048 + 1
+                            : p << 10; // ystep=1.0 and ystep=2.0 paths
+
+                        for (int k = 0; k < 263 * 263; k++)
+                            src_buf[k] = rnd() & bitdepth_max;
+
+                        CLEAR_PIXEL_RECT(c_dst);
+                        CLEAR_PIXEL_RECT(a_dst);
+
+                        call_ref(c_dst, c_dst_stride, src, src_stride,
+                                 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
+                        call_new(a_dst, a_dst_stride, src, src_stride,
+                                 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
+                        checkasm_check_pixel_padded(c_dst, c_dst_stride,
+                                                    a_dst, a_dst_stride,
+                                                    w, h, "dst");
+
+                        if (filter == FILTER_2D_8TAP_REGULAR ||
+                            filter == FILTER_2D_BILINEAR)
+                            bench_new(a_dst, a_dst_stride, src, src_stride,
+                                      w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
+                    }
+                }
+            }
+        }
+    report("mc_scaled");
+}
+
+static void check_mct_scaled(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(pixel, src_buf, 263 * 263,);
+    ALIGN_STK_64(int16_t, c_tmp,   128 * 128,);
+    ALIGN_STK_64(int16_t, a_tmp,   128 * 128,);
+    const pixel *src = src_buf + 263 * 3 + 3;
+    const ptrdiff_t src_stride = 263 * sizeof(pixel);
+#if BITDEPTH == 16
+    const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+    const int bitdepth_max = 0xff;
+#endif
+
+    declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
+                 int w, int h, int mx, int my, int dx, int dy HIGHBD_DECL_SUFFIX);
+
+    for (int filter = 0; filter < N_2D_FILTERS; filter++)
+        for (int w = 4; w <= 128; w <<= 1)
+            for (int p = 0; p < 3; ++p) {
+                if (check_func(c->mct_scaled[filter], "mct_scaled_%s_w%d%s_%dbpc",
+                               filter_names[filter], w, scaled_paths[p], BITDEPTH))
+                {
+                    const int h_min = imax(w / 4, 4);
+                    const int h_max = imin(w * 4, 128);
+                    for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
+                        const int mx = rnd() % 1024;
+                        const int my = rnd() % 1024;
+                        const int dx = rnd() % 2048 + 1;
+                        const int dy = !p
+                            ? rnd() % 2048 + 1
+                            : p << 10; // ystep=1.0 and ystep=2.0 paths
+
+                        for (int k = 0; k < 263 * 263; k++)
+                            src_buf[k] = rnd() & bitdepth_max;
+
+                        call_ref(c_tmp, src, src_stride,
+                                 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
+                        call_new(a_tmp, src, src_stride,
+                                 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
+                        checkasm_check(int16_t, c_tmp, w * sizeof(*c_tmp),
+                                                a_tmp, w * sizeof(*a_tmp),
+                                                w, h, "tmp");
+
+                        if (filter == FILTER_2D_8TAP_REGULAR ||
+                            filter == FILTER_2D_BILINEAR)
+                            bench_new(a_tmp, src, src_stride,
+                                      w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
+                    }
+                }
+            }
+    report("mct_scaled");
+}
+
+static void init_tmp(Dav1dMCDSPContext *const c, pixel *const buf,
+                     int16_t (*const tmp)[128 * 128], const int bitdepth_max)
+{
+    for (int i = 0; i < 2; i++) {
+        generate_mct_input(buf, bitdepth_max);
+        c->mct[FILTER_2D_8TAP_SHARP](tmp[i], buf + 135 * 3 + 3,
+                                      135 * sizeof(pixel), 128, 128,
+                                      8, 8 HIGHBD_TAIL_SUFFIX);
+    }
+}
+
+static void check_avg(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
+    PIXEL_RECT(c_dst, 135, 135);
+    PIXEL_RECT(a_dst, 128, 128);
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
+                 const int16_t *tmp2, int w, int h HIGHBD_DECL_SUFFIX);
+
+    for (int w = 4; w <= 128; w <<= 1)
+        if (check_func(c->avg, "avg_w%d_%dbpc", w, BITDEPTH)) {
+            for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
+            {
+#if BITDEPTH == 16
+                const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                const int bitdepth_max = 0xff;
+#endif
+
+                init_tmp(c, c_dst, tmp, bitdepth_max);
+
+                CLEAR_PIXEL_RECT(c_dst);
+                CLEAR_PIXEL_RECT(a_dst);
+
+                call_ref(c_dst, c_dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
+                call_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
+                checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
+                                            w, h, "dst");
+
+                bench_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
+            }
+        }
+    report("avg");
+}
+
+static void check_w_avg(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
+    PIXEL_RECT(c_dst, 135, 135);
+    PIXEL_RECT(a_dst, 128, 128);
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
+                 const int16_t *tmp2, int w, int h, int weight HIGHBD_DECL_SUFFIX);
+
+    for (int w = 4; w <= 128; w <<= 1)
+        if (check_func(c->w_avg, "w_avg_w%d_%dbpc", w, BITDEPTH)) {
+            for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
+            {
+                int weight = rnd() % 15 + 1;
+#if BITDEPTH == 16
+                const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                const int bitdepth_max = 0xff;
+#endif
+                init_tmp(c, c_dst, tmp, bitdepth_max);
+
+                CLEAR_PIXEL_RECT(c_dst);
+                CLEAR_PIXEL_RECT(a_dst);
+
+                call_ref(c_dst, c_dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
+                call_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
+                checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
+                                            w, h, "dst");
+
+                bench_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
+            }
+        }
+    report("w_avg");
+}
+
+static void check_mask(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
+    PIXEL_RECT(c_dst, 135, 135);
+    PIXEL_RECT(a_dst, 128, 128);
+    ALIGN_STK_64(uint8_t, mask,  128 * 128,);
+
+    for (int i = 0; i < 128 * 128; i++)
+        mask[i] = rnd() % 65;
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
+                 const int16_t *tmp2, int w, int h, const uint8_t *mask
+                 HIGHBD_DECL_SUFFIX);
+
+    for (int w = 4; w <= 128; w <<= 1)
+        if (check_func(c->mask, "mask_w%d_%dbpc", w, BITDEPTH)) {
+            for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
+            {
+#if BITDEPTH == 16
+                const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                const int bitdepth_max = 0xff;
+#endif
+                init_tmp(c, c_dst, tmp, bitdepth_max);
+
+                CLEAR_PIXEL_RECT(c_dst);
+                CLEAR_PIXEL_RECT(a_dst);
+
+                call_ref(c_dst, c_dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
+                call_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
+                checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
+                                            w, h, "dst");
+
+                bench_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
+            }
+        }
+    report("mask");
+}
+
+static void check_w_mask(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
+    PIXEL_RECT(c_dst, 135, 135);
+    PIXEL_RECT(a_dst, 128, 128);
+    ALIGN_STK_64(uint8_t, c_mask, 128 * 128,);
+    ALIGN_STK_64(uint8_t, a_mask, 128 * 128,);
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
+                 const int16_t *tmp2, int w, int h, uint8_t *mask, int sign
+                 HIGHBD_DECL_SUFFIX);
+
+    static const uint16_t ss[] = { 444, 422, 420 };
+    static const uint8_t ss_hor[] = { 0, 1, 1 };
+    static const uint8_t ss_ver[] = { 0, 0, 1 };
+
+    for (int i = 0; i < 3; i++)
+        for (int w = 4; w <= 128; w <<= 1)
+            if (check_func(c->w_mask[i], "w_mask_%d_w%d_%dbpc", ss[i], w,
+                           BITDEPTH))
+            {
+                for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
+                {
+                    int sign = rnd() & 1;
+#if BITDEPTH == 16
+                    const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                    const int bitdepth_max = 0xff;
+#endif
+                    init_tmp(c, c_dst, tmp, bitdepth_max);
+
+                    CLEAR_PIXEL_RECT(c_dst);
+                    CLEAR_PIXEL_RECT(a_dst);
+
+                    call_ref(c_dst, c_dst_stride, tmp[0], tmp[1], w, h,
+                             c_mask, sign HIGHBD_TAIL_SUFFIX);
+                    call_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h,
+                             a_mask, sign HIGHBD_TAIL_SUFFIX);
+                    checkasm_check_pixel_padded(c_dst, c_dst_stride,
+                                                a_dst, a_dst_stride,
+                                                w, h, "dst");
+                    checkasm_check(uint8_t, c_mask, w >> ss_hor[i],
+                                            a_mask, w >> ss_hor[i],
+                                            w >> ss_hor[i], h >> ss_ver[i],
+                                            "mask");
+
+                    bench_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h,
+                              a_mask, sign HIGHBD_TAIL_SUFFIX);
+                }
+            }
+    report("w_mask");
+}
+
+static void check_blend(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(pixel, tmp, 32 * 32,);
+    PIXEL_RECT(c_dst, 32, 32);
+    PIXEL_RECT(a_dst, 32, 32);
+    ALIGN_STK_64(uint8_t, mask, 32 * 32,);
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
+                 int w, int h, const uint8_t *mask);
+
+    for (int w = 4; w <= 32; w <<= 1) {
+        if (check_func(c->blend, "blend_w%d_%dbpc", w, BITDEPTH))
+            for (int h = imax(w / 2, 4); h <= imin(w * 2, 32); h <<= 1) {
+#if BITDEPTH == 16
+                const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                const int bitdepth_max = 0xff;
+#endif
+                for (int i = 0; i < 32 * 32; i++) {
+                    tmp[i] = rnd() & bitdepth_max;
+                    mask[i] = rnd() % 65;
+                }
+
+                CLEAR_PIXEL_RECT(c_dst);
+                CLEAR_PIXEL_RECT(a_dst);
+
+                for (int y = 0; y < h; y++)
+                    for (int x = 0; x < w; x++)
+                        c_dst[y*PXSTRIDE(c_dst_stride) + x] =
+                        a_dst[y*PXSTRIDE(a_dst_stride) + x] = rnd() & bitdepth_max;
+
+                call_ref(c_dst, c_dst_stride, tmp, w, h, mask);
+                call_new(a_dst, a_dst_stride, tmp, w, h, mask);
+                checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
+                                            w, h, "dst");
+
+                bench_new(alternate(c_dst, a_dst), a_dst_stride, tmp, w, h, mask);
+            }
+    }
+    report("blend");
+}
+
+static void check_blend_v(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(pixel, tmp,   32 * 128,);
+    PIXEL_RECT(c_dst, 32, 128);
+    PIXEL_RECT(a_dst, 32, 128);
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
+                 int w, int h);
+
+    for (int w = 2; w <= 32; w <<= 1) {
+        if (check_func(c->blend_v, "blend_v_w%d_%dbpc", w, BITDEPTH))
+            for (int h = 2; h <= (w == 2 ? 64 : 128); h <<= 1) {
+#if BITDEPTH == 16
+                const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                const int bitdepth_max = 0xff;
+#endif
+
+                CLEAR_PIXEL_RECT(c_dst);
+                CLEAR_PIXEL_RECT(a_dst);
+
+                for (int y = 0; y < h; y++)
+                    for (int x = 0; x < w; x++)
+                        c_dst[y*PXSTRIDE(c_dst_stride) + x] =
+                        a_dst[y*PXSTRIDE(a_dst_stride) + x] = rnd() & bitdepth_max;
+
+                for (int i = 0; i < 32 * 128; i++)
+                    tmp[i] = rnd() & bitdepth_max;
+
+                call_ref(c_dst, c_dst_stride, tmp, w, h);
+                call_new(a_dst, a_dst_stride, tmp, w, h);
+                checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
+                                            w, h, "dst");
+
+                bench_new(alternate(c_dst, a_dst), a_dst_stride, tmp, w, h);
+            }
+    }
+    report("blend_v");
+}
+
+static void check_blend_h(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(pixel, tmp,   128 * 32,);
+    PIXEL_RECT(c_dst, 128, 32);
+    PIXEL_RECT(a_dst, 128, 32);
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
+                 int w, int h);
+
+    for (int w = 2; w <= 128; w <<= 1) {
+        if (check_func(c->blend_h, "blend_h_w%d_%dbpc", w, BITDEPTH))
+            for (int h = (w == 128 ? 4 : 2); h <= 32; h <<= 1) {
+#if BITDEPTH == 16
+                const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+                const int bitdepth_max = 0xff;
+#endif
+                CLEAR_PIXEL_RECT(c_dst);
+                CLEAR_PIXEL_RECT(a_dst);
+
+                for (int y = 0; y < h; y++)
+                    for (int x = 0; x < w; x++)
+                        c_dst[y*PXSTRIDE(c_dst_stride) + x] =
+                        a_dst[y*PXSTRIDE(a_dst_stride) + x] = rnd() & bitdepth_max;
+
+                for (int i = 0; i < 128 * 32; i++)
+                    tmp[i] = rnd() & bitdepth_max;
+
+                call_ref(c_dst, c_dst_stride, tmp, w, h);
+                call_new(a_dst, a_dst_stride, tmp, w, h);
+                checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
+                                            w, h, "dst");
+
+                bench_new(alternate(c_dst, a_dst), a_dst_stride, tmp, w, h);
+            }
+    }
+    report("blend_h");
+}
+
+static void check_warp8x8(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(pixel, src_buf, 15 * 15,);
+    PIXEL_RECT(c_dst, 8, 8);
+    PIXEL_RECT(a_dst, 8, 8);
+    int16_t abcd[4];
+    const pixel *src = src_buf + 15 * 3 + 3;
+    const ptrdiff_t src_stride = 15 * sizeof(pixel);
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
+                 ptrdiff_t src_stride, const int16_t *abcd, int mx, int my
+                 HIGHBD_DECL_SUFFIX);
+
+    if (check_func(c->warp8x8, "warp_8x8_%dbpc", BITDEPTH)) {
+        const int mx = (rnd() & 0x1fff) - 0xa00;
+        const int my = (rnd() & 0x1fff) - 0xa00;
+#if BITDEPTH == 16
+        const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+        const int bitdepth_max = 0xff;
+#endif
+
+        for (int i = 0; i < 4; i++)
+            abcd[i] = (rnd() & 0x1fff) - 0xa00;
+
+        for (int i = 0; i < 15 * 15; i++)
+            src_buf[i] = rnd() & bitdepth_max;
+
+        CLEAR_PIXEL_RECT(c_dst);
+        CLEAR_PIXEL_RECT(a_dst);
+
+        call_ref(c_dst, c_dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
+        call_new(a_dst, a_dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
+        checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
+                                    8, 8, "dst");
+
+        bench_new(a_dst, a_dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
+    }
+    report("warp8x8");
+}
+
+static void check_warp8x8t(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(pixel, src_buf, 15 * 15,);
+    ALIGN_STK_64(int16_t, c_tmp,  8 *  8,);
+    ALIGN_STK_64(int16_t, a_tmp,  8 *  8,);
+    int16_t abcd[4];
+    const pixel *src = src_buf + 15 * 3 + 3;
+    const ptrdiff_t src_stride = 15 * sizeof(pixel);
+
+    declare_func(void, int16_t *tmp, ptrdiff_t tmp_stride, const pixel *src,
+                 ptrdiff_t src_stride, const int16_t *abcd, int mx, int my
+                 HIGHBD_DECL_SUFFIX);
+
+    if (check_func(c->warp8x8t, "warp_8x8t_%dbpc", BITDEPTH)) {
+        const int mx = (rnd() & 0x1fff) - 0xa00;
+        const int my = (rnd() & 0x1fff) - 0xa00;
+#if BITDEPTH == 16
+        const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+        const int bitdepth_max = 0xff;
+#endif
+
+        for (int i = 0; i < 4; i++)
+            abcd[i] = (rnd() & 0x1fff) - 0xa00;
+
+        for (int i = 0; i < 15 * 15; i++)
+            src_buf[i] = rnd() & bitdepth_max;
+
+        call_ref(c_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
+        call_new(a_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
+        checkasm_check(int16_t, c_tmp, 8 * sizeof(*c_tmp),
+                                a_tmp, 8 * sizeof(*a_tmp),
+                                8, 8, "tmp");
+
+        bench_new(a_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
+    }
+    report("warp8x8t");
+}
+
+enum EdgeFlags {
+    HAVE_TOP = 1,
+    HAVE_BOTTOM = 2,
+    HAVE_LEFT = 4,
+    HAVE_RIGHT = 8,
+};
+
+static void random_offset_for_edge(int *const x, int *const y,
+                                   const int bw, const int bh,
+                                   int *const iw, int *const ih,
+                                   const enum EdgeFlags edge)
+{
+#define set_off(edge1, edge2, pos, dim) \
+    *i##dim = edge & (HAVE_##edge1 | HAVE_##edge2) ? 160 : 1 + (rnd() % (b##dim - 2)); \
+    switch (edge & (HAVE_##edge1 | HAVE_##edge2)) { \
+    case HAVE_##edge1 | HAVE_##edge2: \
+        assert(b##dim <= *i##dim); \
+        *pos = rnd() % (*i##dim - b##dim + 1); \
+        break; \
+    case HAVE_##edge1: \
+        *pos = (*i##dim - b##dim) + 1 + (rnd() % (b##dim - 1)); \
+        break; \
+    case HAVE_##edge2: \
+        *pos = -(1 + (rnd() % (b##dim - 1))); \
+        break; \
+    case 0: \
+        assert(b##dim - 1 > *i##dim); \
+        *pos = -(1 + (rnd() % (b##dim - *i##dim - 1))); \
+        break; \
+    }
+    set_off(LEFT, RIGHT, x, w);
+    set_off(TOP, BOTTOM, y, h);
+}
+
+static void check_emuedge(Dav1dMCDSPContext *const c) {
+    ALIGN_STK_64(pixel, c_dst, 135 * 192,);
+    ALIGN_STK_64(pixel, a_dst, 135 * 192,);
+    ALIGN_STK_64(pixel, src,   160 * 160,);
+
+    for (int i = 0; i < 160 * 160; i++)
+        src[i] = rnd() & ((1U << BITDEPTH) - 1);
+
+    declare_func(void, intptr_t bw, intptr_t bh, intptr_t iw, intptr_t ih,
+                 intptr_t x, intptr_t y,
+                 pixel *dst, ptrdiff_t dst_stride,
+                 const pixel *src, ptrdiff_t src_stride);
+
+    int x, y, iw, ih;
+    for (int w = 4; w <= 128; w <<= 1)
+        if (check_func(c->emu_edge, "emu_edge_w%d_%dbpc", w, BITDEPTH)) {
+            for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1) {
+                // we skip 0xf, since it implies that we don't need emu_edge
+                for (enum EdgeFlags edge = 0; edge < 0xf; edge++) {
+                    const int bw = w + (rnd() & 7);
+                    const int bh = h + (rnd() & 7);
+                    random_offset_for_edge(&x, &y, bw, bh, &iw, &ih, edge);
+                    call_ref(bw, bh, iw, ih, x, y,
+                             c_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
+                    call_new(bw, bh, iw, ih, x, y,
+                             a_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
+                    checkasm_check_pixel(c_dst, 192 * sizeof(pixel),
+                                         a_dst, 192 * sizeof(pixel),
+                                         bw, bh, "dst");
+                }
+            }
+            for (enum EdgeFlags edge = 1; edge < 0xf; edge <<= 1) {
+                random_offset_for_edge(&x, &y, w + 7, w + 7, &iw, &ih, edge);
+                bench_new(w + 7, w + 7, iw, ih, x, y,
+                          a_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
+            }
+        }
+    report("emu_edge");
+}
+
+static int get_upscale_x0(const int in_w, const int out_w, const int step) {
+    const int err = out_w * step - (in_w << 14);
+    const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err >> 1);
+    return x0 & 0x3fff;
+}
+
+static void check_resize(Dav1dMCDSPContext *const c) {
+    PIXEL_RECT(c_dst, 1024, 64);
+    PIXEL_RECT(a_dst, 1024, 64);
+    ALIGN_STK_64(pixel, src, 512 * 64,);
+
+    const int height = 64;
+    const int max_src_width = 512;
+    const ptrdiff_t src_stride = 512 * sizeof(pixel);
+
+    declare_func(void, pixel *dst, ptrdiff_t dst_stride,
+                 const pixel *src, ptrdiff_t src_stride,
+                 int dst_w, int src_w, int h, int dx, int mx0
+                 HIGHBD_DECL_SUFFIX);
+
+    if (check_func(c->resize, "resize_%dbpc", BITDEPTH)) {
+#if BITDEPTH == 16
+        const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+        const int bitdepth_max = 0xff;
+#endif
+
+        for (int i = 0; i < max_src_width * height; i++)
+            src[i] = rnd() & bitdepth_max;
+
+        const int w_den = 9 + (rnd() & 7);
+        const int src_w = 16 + (rnd() % (max_src_width - 16 + 1));
+        const int dst_w = w_den * src_w >> 3;
+#define scale_fac(ref_sz, this_sz) \
+    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
+        const int dx = scale_fac(src_w, dst_w);
+#undef scale_fac
+        const int mx0 = get_upscale_x0(src_w, dst_w, dx);
+
+        CLEAR_PIXEL_RECT(c_dst);
+        CLEAR_PIXEL_RECT(a_dst);
+
+        call_ref(c_dst, c_dst_stride, src, src_stride,
+                 dst_w, height, src_w, dx, mx0 HIGHBD_TAIL_SUFFIX);
+        call_new(a_dst, a_dst_stride, src, src_stride,
+                 dst_w, height, src_w, dx, mx0 HIGHBD_TAIL_SUFFIX);
+        checkasm_check_pixel_padded_align(c_dst, c_dst_stride, a_dst, a_dst_stride,
+                                          dst_w, height, "dst", 16, 1);
+
+        bench_new(a_dst, a_dst_stride, src, src_stride,
+                  512, height, 512 * 8 / w_den, dx, mx0 HIGHBD_TAIL_SUFFIX);
+    }
+
+    report("resize");
+}
+
+void bitfn(checkasm_check_mc)(void) {
+    Dav1dMCDSPContext c;
+    bitfn(dav1d_mc_dsp_init)(&c);
+
+    check_mc(&c);
+    check_mct(&c);
+    check_mc_scaled(&c);
+    check_mct_scaled(&c);
+    check_avg(&c);
+    check_w_avg(&c);
+    check_mask(&c);
+    check_w_mask(&c);
+    check_blend(&c);
+    check_blend_v(&c);
+    check_blend_h(&c);
+    check_warp8x8(&c);
+    check_warp8x8t(&c);
+    check_emuedge(&c);
+    check_resize(&c);
+}
diff --git a/third_party/dav1d/tests/checkasm/msac.c b/third_party/dav1d/tests/checkasm/msac.c
new file mode 100644
index 0000000000..b9c89b47cf
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/msac.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright © 2019, VideoLAN and dav1d authors
+ * Copyright © 2019, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "tests/checkasm/checkasm.h"
+
+#include "src/cpu.h"
+#include "src/msac.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#define BUF_SIZE 8192
+
+/* The normal code doesn't use function pointers */
+typedef unsigned (*decode_symbol_adapt_fn)(MsacContext *s, uint16_t *cdf,
+                                           size_t n_symbols);
+typedef unsigned (*decode_adapt_fn)(MsacContext *s, uint16_t *cdf);
+typedef unsigned (*decode_bool_equi_fn)(MsacContext *s);
+typedef unsigned (*decode_bool_fn)(MsacContext *s, unsigned f);
+
+typedef struct {
+    decode_symbol_adapt_fn decode_symbol_adapt4;
+    decode_symbol_adapt_fn decode_symbol_adapt8;
+    decode_symbol_adapt_fn decode_symbol_adapt16;
+    decode_adapt_fn        decode_bool_adapt;
+    decode_bool_equi_fn    decode_bool_equi;
+    decode_bool_fn         decode_bool;
+    decode_adapt_fn        decode_hi_tok;
+} MsacDSPContext;
+
+static void randomize_cdf(uint16_t *const cdf, const int n) {
+    int i;
+    for (i = 15; i > n; i--)
+        cdf[i] = rnd(); // padding
+    cdf[i] = 0;         // count
+    do {
+        cdf[i - 1] = cdf[i] + rnd() % (32768 - cdf[i] - i) + 1;
+    } while (--i > 0);
+}
+
+/* memcmp() on structs can have weird behavior due to padding etc. */
+static int msac_cmp(const MsacContext *const a, const MsacContext *const b) {
+    return a->buf_pos != b->buf_pos || a->buf_end != b->buf_end ||
+           a->dif != b->dif || a->rng != b->rng || a->cnt != b->cnt ||
+           a->allow_update_cdf != b->allow_update_cdf;
+}
+
+static void msac_dump(unsigned c_res, unsigned a_res,
+                      const MsacContext *const a, const MsacContext *const b,
+                      const uint16_t *const cdf_a, const uint16_t *const cdf_b,
+                      const int num_cdf)
+{
+    if (c_res != a_res)
+        fprintf(stderr, "c_res %u a_res %u\n", c_res, a_res);
+    if (a->buf_pos != b->buf_pos)
+        fprintf(stderr, "buf_pos %p vs %p\n", a->buf_pos, b->buf_pos);
+    if (a->buf_end != b->buf_end)
+        fprintf(stderr, "buf_end %p vs %p\n", a->buf_end, b->buf_end);
+    if (a->dif != b->dif)
+        fprintf(stderr, "dif %zx vs %zx\n", a->dif, b->dif);
+    if (a->rng != b->rng)
+        fprintf(stderr, "rng %u vs %u\n", a->rng, b->rng);
+    if (a->cnt != b->cnt)
+        fprintf(stderr, "cnt %d vs %d\n", a->cnt, b->cnt);
+    if (a->allow_update_cdf)
+        fprintf(stderr, "allow_update_cdf %d vs %d\n",
+                a->allow_update_cdf, b->allow_update_cdf);
+    if (num_cdf && memcmp(cdf_a, cdf_b, sizeof(*cdf_a) * (num_cdf + 1))) {
+        fprintf(stderr, "cdf:\n");
+        for (int i = 0; i <= num_cdf; i++)
+            fprintf(stderr, " %5u", cdf_a[i]);
+        fprintf(stderr, "\n");
+        for (int i = 0; i <= num_cdf; i++)
+            fprintf(stderr, " %5u", cdf_b[i]);
+        fprintf(stderr, "\n");
+        for (int i = 0; i <= num_cdf; i++)
+            fprintf(stderr, "     %c", cdf_a[i] != cdf_b[i] ? 'x' : '.');
+        fprintf(stderr, "\n");
+    }
+}
+
+#define CHECK_SYMBOL_ADAPT(n, n_min, n_max) do {                           \
+    if (check_func(c->decode_symbol_adapt##n,                              \
+                   "msac_decode_symbol_adapt%d", n))                       \
+    {                                                                      \
+        for (int cdf_update = 0; cdf_update <= 1; cdf_update++) {          \
+            for (int ns = n_min; ns <= n_max; ns++) {                      \
+                dav1d_msac_init(&s_c, buf, BUF_SIZE, !cdf_update);         \
+                s_a = s_c;                                                 \
+                randomize_cdf(cdf[0], ns);                                 \
+                memcpy(cdf[1], cdf[0], sizeof(*cdf));                      \
+                for (int i = 0; i < 64; i++) {                             \
+                    unsigned c_res = call_ref(&s_c, cdf[0], ns);           \
+                    unsigned a_res = call_new(&s_a, cdf[1], ns);           \
+                    if (c_res != a_res || msac_cmp(&s_c, &s_a) ||          \
+                        memcmp(cdf[0], cdf[1], sizeof(**cdf) * (ns + 1)))  \
+                    {                                                      \
+                        if (fail())                                        \
+                            msac_dump(c_res, a_res, &s_c, &s_a,            \
+                                      cdf[0], cdf[1], ns);                 \
+                    }                                                      \
+                }                                                          \
+                if (cdf_update && ns == n - 1)                             \
+                    bench_new(alternate(&s_c, &s_a),                       \
+                              alternate(cdf[0], cdf[1]), ns);              \
+            }                                                              \
+        }                                                                  \
+    }                                                                      \
+} while (0)
+
+static void check_decode_symbol(MsacDSPContext *const c, uint8_t *const buf) {
+    ALIGN_STK_32(uint16_t, cdf, 2, [16]);
+    MsacContext s_c, s_a;
+
+    declare_func(unsigned, MsacContext *s, uint16_t *cdf, size_t n_symbols);
+    CHECK_SYMBOL_ADAPT( 4, 1,  4);
+    CHECK_SYMBOL_ADAPT( 8, 1,  7);
+    CHECK_SYMBOL_ADAPT(16, 3, 15);
+    report("decode_symbol");
+}
+
+static void check_decode_bool_adapt(MsacDSPContext *const c, uint8_t *const buf) {
+    MsacContext s_c, s_a;
+
+    declare_func(unsigned, MsacContext *s, uint16_t *cdf);
+    if (check_func(c->decode_bool_adapt, "msac_decode_bool_adapt")) {
+        uint16_t cdf[2][2];
+        for (int cdf_update = 0; cdf_update <= 1; cdf_update++) {
+            dav1d_msac_init(&s_c, buf, BUF_SIZE, !cdf_update);
+            s_a = s_c;
+            cdf[0][0] = cdf[1][0] = rnd() % 32767 + 1;
+            cdf[0][1] = cdf[1][1] = 0;
+            for (int i = 0; i < 64; i++) {
+                unsigned c_res = call_ref(&s_c, cdf[0]);
+                unsigned a_res = call_new(&s_a, cdf[1]);
+                if (c_res != a_res || msac_cmp(&s_c, &s_a) ||
+                    memcmp(cdf[0], cdf[1], sizeof(*cdf)))
+                {
+                    if (fail())
+                        msac_dump(c_res, a_res, &s_c, &s_a, cdf[0], cdf[1], 1);
+                }
+            }
+            if (cdf_update)
+                bench_new(alternate(&s_c, &s_a), alternate(cdf[0], cdf[1]));
+        }
+    }
+}
+
+static void check_decode_bool_equi(MsacDSPContext *const c, uint8_t *const buf) {
+    MsacContext s_c, s_a;
+
+    declare_func(unsigned, MsacContext *s);
+    if (check_func(c->decode_bool_equi, "msac_decode_bool_equi")) {
+        dav1d_msac_init(&s_c, buf, BUF_SIZE, 1);
+        s_a = s_c;
+        for (int i = 0; i < 64; i++) {
+            unsigned c_res = call_ref(&s_c);
+            unsigned a_res = call_new(&s_a);
+            if (c_res != a_res || msac_cmp(&s_c, &s_a)) {
+                if (fail())
+                    msac_dump(c_res, a_res, &s_c, &s_a, NULL, NULL, 0);
+            }
+        }
+        bench_new(alternate(&s_c, &s_a));
+    }
+}
+
+static void check_decode_bool(MsacDSPContext *const c, uint8_t *const buf) {
+    MsacContext s_c, s_a;
+
+    declare_func(unsigned, MsacContext *s, unsigned f);
+    if (check_func(c->decode_bool, "msac_decode_bool")) {
+        dav1d_msac_init(&s_c, buf, BUF_SIZE, 1);
+        s_a = s_c;
+        for (int i = 0; i < 64; i++) {
+            const unsigned f = rnd() & 0x7fff;
+            unsigned c_res = call_ref(&s_c, f);
+            unsigned a_res = call_new(&s_a, f);
+            if (c_res != a_res || msac_cmp(&s_c, &s_a)) {
+                if (fail())
+                    msac_dump(c_res, a_res, &s_c, &s_a, NULL, NULL, 0);
+            }
+        }
+        bench_new(alternate(&s_c, &s_a), 16384);
+    }
+
+}
+
+static void check_decode_bool_funcs(MsacDSPContext *const c, uint8_t *const buf) {
+    check_decode_bool_adapt(c, buf);
+    check_decode_bool_equi(c, buf);
+    check_decode_bool(c, buf);
+    report("decode_bool");
+}
+
+static void check_decode_hi_tok(MsacDSPContext *const c, uint8_t *const buf) {
+    ALIGN_STK_16(uint16_t, cdf, 2, [16]);
+    MsacContext s_c, s_a;
+
+    declare_func(unsigned, MsacContext *s, uint16_t *cdf);
+    if (check_func(c->decode_hi_tok, "msac_decode_hi_tok")) {
+        for (int cdf_update = 0; cdf_update <= 1; cdf_update++) {
+            dav1d_msac_init(&s_c, buf, BUF_SIZE, !cdf_update);
+            s_a = s_c;
+            randomize_cdf(cdf[0], 3);
+            memcpy(cdf[1], cdf[0], sizeof(*cdf));
+            for (int i = 0; i < 64; i++) {
+                unsigned c_res = call_ref(&s_c, cdf[0]);
+                unsigned a_res = call_new(&s_a, cdf[1]);
+                if (c_res != a_res || msac_cmp(&s_c, &s_a) ||
+                    memcmp(cdf[0], cdf[1], sizeof(*cdf)))
+                {
+                    if (fail())
+                        msac_dump(c_res, a_res, &s_c, &s_a, cdf[0], cdf[1], 3);
+                    break;
+                }
+            }
+            if (cdf_update)
+                bench_new(alternate(&s_c, &s_a), alternate(cdf[0], cdf[1]));
+        }
+    }
+    report("decode_hi_tok");
+}
+
+void checkasm_check_msac(void) {
+    MsacDSPContext c;
+    c.decode_symbol_adapt4  = dav1d_msac_decode_symbol_adapt_c;
+    c.decode_symbol_adapt8  = dav1d_msac_decode_symbol_adapt_c;
+    c.decode_symbol_adapt16 = dav1d_msac_decode_symbol_adapt_c;
+    c.decode_bool_adapt     = dav1d_msac_decode_bool_adapt_c;
+    c.decode_bool_equi      = dav1d_msac_decode_bool_equi_c;
+    c.decode_bool           = dav1d_msac_decode_bool_c;
+    c.decode_hi_tok         = dav1d_msac_decode_hi_tok_c;
+
+#if (ARCH_AARCH64 || ARCH_ARM) && HAVE_ASM
+    if (dav1d_get_cpu_flags() & DAV1D_ARM_CPU_FLAG_NEON) {
+        c.decode_symbol_adapt4  = dav1d_msac_decode_symbol_adapt4_neon;
+        c.decode_symbol_adapt8  = dav1d_msac_decode_symbol_adapt8_neon;
+        c.decode_symbol_adapt16 = dav1d_msac_decode_symbol_adapt16_neon;
+        c.decode_bool_adapt     = dav1d_msac_decode_bool_adapt_neon;
+        c.decode_bool_equi      = dav1d_msac_decode_bool_equi_neon;
+        c.decode_bool           = dav1d_msac_decode_bool_neon;
+        c.decode_hi_tok         = dav1d_msac_decode_hi_tok_neon;
+    }
+#elif ARCH_X86 && HAVE_ASM
+    if (dav1d_get_cpu_flags() & DAV1D_X86_CPU_FLAG_SSE2) {
+        c.decode_symbol_adapt4  = dav1d_msac_decode_symbol_adapt4_sse2;
+        c.decode_symbol_adapt8  = dav1d_msac_decode_symbol_adapt8_sse2;
+        c.decode_symbol_adapt16 = dav1d_msac_decode_symbol_adapt16_sse2;
+        c.decode_bool_adapt     = dav1d_msac_decode_bool_adapt_sse2;
+        c.decode_bool_equi      = dav1d_msac_decode_bool_equi_sse2;
+        c.decode_bool           = dav1d_msac_decode_bool_sse2;
+        c.decode_hi_tok         = dav1d_msac_decode_hi_tok_sse2;
+    }
+
+#if ARCH_X86_64
+    if (dav1d_get_cpu_flags() & DAV1D_X86_CPU_FLAG_AVX2) {
+        c.decode_symbol_adapt16 = dav1d_msac_decode_symbol_adapt16_avx2;
+    }
+#endif
+#endif
+
+    uint8_t buf[BUF_SIZE];
+    for (int i = 0; i < BUF_SIZE; i++)
+        buf[i] = rnd();
+
+    check_decode_symbol(&c, buf);
+    check_decode_bool_funcs(&c, buf);
+    check_decode_hi_tok(&c, buf);
+}
diff --git a/third_party/dav1d/tests/checkasm/refmvs.c b/third_party/dav1d/tests/checkasm/refmvs.c
new file mode 100644
index 0000000000..f21c81f85a
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/refmvs.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright © 2021, VideoLAN and dav1d authors
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "tests/checkasm/checkasm.h"
+#include "src/refmvs.h"
+
+#include <stdio.h>
+
+static inline int gen_mv(const int total_bits, int spel_bits) {
+    int bits = rnd() & ((1 << spel_bits) - 1);
+    do {
+        bits |= (rnd() & 1) << spel_bits;
+    } while (rnd() & 1 && ++spel_bits < total_bits);
+    // the do/while makes it relatively more likely to be close to zero (fpel)
+    // than far away
+    return rnd() & 1 ? -bits : bits;
+}
+
+static void check_save_tmvs(const Dav1dRefmvsDSPContext *const c) {
+    refmvs_block *rr[31];
+    refmvs_block r[31 * 256];
+    ALIGN_STK_64(refmvs_temporal_block, c_rp, 128 * 16,);
+    ALIGN_STK_64(refmvs_temporal_block, a_rp, 128 * 16,);
+    uint8_t ref_sign[7];
+
+    for (int i = 0; i < 31; i++)
+        rr[i] = &r[i * 256];
+
+    declare_func(void, refmvs_temporal_block *rp, const ptrdiff_t stride,
+                 refmvs_block *const *const rr, const uint8_t *const ref_sign,
+                 int col_end8, int row_end8, int col_start8, int row_start8);
+
+    if (check_func(c->save_tmvs, "save_tmvs")) {
+        const int row_start8 = rnd() & 7;
+        const int row_end8 = 8 + (rnd() & 7);
+        const int col_start8 = rnd() & 31;
+        const int col_end8 = 96 + (rnd() & 31);
+
+        for (int i = 0; i < 7; i++)
+            ref_sign[i] = rnd() & 1;
+
+        for (int i = row_start8; i < row_end8; i++)
+            for (int j = col_start8; j < col_end8;) {
+                int bs = rnd() % N_BS_SIZES;
+                while (j + ((dav1d_block_dimensions[bs][0] + 1) >> 1) > col_end8)
+                    bs++;
+                rr[i * 2][j * 2 + 1] = (refmvs_block) {
+                    .mv.mv[0].x = gen_mv(14, 10),
+                    .mv.mv[0].y = gen_mv(14, 10),
+                    .mv.mv[1].x = gen_mv(14, 10),
+                    .mv.mv[1].y = gen_mv(14, 10),
+                    .ref.ref = { (rnd() % 9) - 1, (rnd() % 9) - 1 },
+                    .bs = bs
+                };
+                for (int k = 0; k < (dav1d_block_dimensions[bs][0] + 1) >> 1; k++, j++) {
+                    c_rp[i * 128 + j].mv.n = 0xdeadbeef;
+                    c_rp[i * 128 + j].ref = 0xdd;
+                }
+            }
+
+        call_ref(c_rp + row_start8 * 128, 128, rr, ref_sign,
+                 col_end8, row_end8, col_start8, row_start8);
+        call_new(a_rp + row_start8 * 128, 128, rr, ref_sign,
+                 col_end8, row_end8, col_start8, row_start8);
+        for (int i = row_start8; i < row_end8; i++)
+            for (int j = col_start8; j < col_end8; j++)
+                if (c_rp[i * 128 + j].mv.n != a_rp[i * 128 + j].mv.n ||
+                    c_rp[i * 128 + j].ref != a_rp[i * 128 + j].ref)
+                {
+                    if (fail()) {
+                        fprintf(stderr, "[%d][%d] c_rp.mv.x = 0x%x a_rp.mv.x = 0x%x\n",
+                                i, j, c_rp[i * 128 + j].mv.x, a_rp[i * 128 + j].mv.x);
+                        fprintf(stderr, "[%d][%d] c_rp.mv.y = 0x%x a_rp.mv.y = 0x%x\n",
+                                i, j, c_rp[i * 128 + j].mv.y, a_rp[i * 128 + j].mv.y);
+                        fprintf(stderr, "[%d][%d] c_rp.ref = %u a_rp.ref = %u\n",
+                                i, j, c_rp[i * 128 + j].ref, a_rp[i * 128 + j].ref);
+                    }
+                }
+
+        for (int bs = BS_4x4; bs < N_BS_SIZES; bs++) {
+            const int bw8 = (dav1d_block_dimensions[bs][0] + 1) >> 1;
+            for (int i = 0; i < 16; i++)
+                for (int j = 0; j < 128; j += bw8) {
+                    rr[i * 2][j * 2 + 1].ref.ref[0] = (rnd() % 9) - 1;
+                    rr[i * 2][j * 2 + 1].ref.ref[1] = (rnd() % 9) - 1;
+                    rr[i * 2][j * 2 + 1].bs = bs;
+                }
+            bench_new(alternate(c_rp, a_rp), 128, rr, ref_sign, 128, 16, 0, 0);
+        }
+    }
+
+    report("save_tmvs");
+}
+
+static void check_splat_mv(const Dav1dRefmvsDSPContext *const c) {
+    ALIGN_STK_64(refmvs_block, c_buf, 32 * 32,);
+    ALIGN_STK_64(refmvs_block, a_buf, 32 * 32,);
+    refmvs_block *c_dst[32];
+    refmvs_block *a_dst[32];
+    const size_t stride = 32 * sizeof(refmvs_block);
+
+    for (int i = 0; i < 32; i++) {
+        c_dst[i] = c_buf + 32 * i;
+        a_dst[i] = a_buf + 32 * i;
+    }
+
+    declare_func(void, refmvs_block **rr, const refmvs_block *rmv,
+                 int bx4, int bw4, int bh4);
+
+    for (int w = 1; w <= 32; w *= 2) {
+        if (check_func(c->splat_mv, "splat_mv_w%d", w)) {
+            const int h_min = imax(w / 4, 1);
+            const int h_max = imin(w * 4, 32);
+            const int w_uint32 = w * sizeof(refmvs_block) / sizeof(uint32_t);
+            for (int h = h_min; h <= h_max; h *= 2) {
+                const int offset = (int) ((unsigned) w * rnd()) & 31;
+                union {
+                    refmvs_block rmv;
+                    uint32_t u32[3];
+                } ALIGN(tmp, 16);
+                tmp.u32[0] = rnd();
+                tmp.u32[1] = rnd();
+                tmp.u32[2] = rnd();
+
+                call_ref(c_dst, &tmp.rmv, offset, w, h);
+                call_new(a_dst, &tmp.rmv, offset, w, h);
+                checkasm_check(uint32_t, (uint32_t*)(c_buf + offset), stride,
+                                         (uint32_t*)(a_buf + offset), stride,
+                                         w_uint32, h, "dst");
+
+                bench_new(a_dst, &tmp.rmv, 0, w, h);
+            }
+        }
+    }
+    report("splat_mv");
+}
+
+void checkasm_check_refmvs(void) {
+    Dav1dRefmvsDSPContext c;
+    dav1d_refmvs_dsp_init(&c);
+
+    check_save_tmvs(&c);
+    check_splat_mv(&c);
+}
diff --git a/third_party/dav1d/tests/checkasm/x86/checkasm.asm b/third_party/dav1d/tests/checkasm/x86/checkasm.asm
new file mode 100644
index 0000000000..8f19ef97f7
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/x86/checkasm.asm
@@ -0,0 +1,475 @@
+; Copyright © 2018, VideoLAN and dav1d authors
+; Copyright © 2018, Two Orioles, LLC
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are met:
+;
+; 1. Redistributions of source code must retain the above copyright notice, this
+;    list of conditions and the following disclaimer.
+;
+; 2. Redistributions in binary form must reproduce the above copyright notice,
+;    this list of conditions and the following disclaimer in the documentation
+;    and/or other materials provided with the distribution.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+%include "config.asm"
+%undef private_prefix
+%define private_prefix checkasm
+%include "ext/x86/x86inc.asm"
+
+SECTION_RODATA 16
+
+%if ARCH_X86_64
+; just random numbers to reduce the chance of incidental match
+%if WIN64
+x6:  dq 0x1a1b2550a612b48c,0x79445c159ce79064
+x7:  dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
+x8:  dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
+x9:  dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
+x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
+x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
+x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
+x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
+x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
+x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
+n7:  dq 0x21f86d66c8ca00ce
+n8:  dq 0x75b6ba21077c48ad
+%endif
+n9:  dq 0xed56bb2dcb3c7736
+n10: dq 0x8bda43d3fd1a7e06
+n11: dq 0xb64a9c9e5d318408
+n12: dq 0xdf9a54b303f1d3a3
+n13: dq 0x4a75479abd64e097
+n14: dq 0x249214109d5d1c88
+%endif
+
+errmsg_stack: db "stack corruption", 0
+errmsg_register: db "failed to preserve register:%s", 0
+errmsg_vzeroupper: db "missing vzeroupper", 0
+
+SECTION .bss
+
+check_vzeroupper: resd 1
+
+SECTION .text
+
+cextern fail_func
+
+; max number of args used by any asm function.
+; (max_args % 4) must equal 3 for stack alignment
+%define max_args 15
+
+%if UNIX64
+    DECLARE_REG_TMP 0
+%else
+    DECLARE_REG_TMP 4
+%endif
+
+;-----------------------------------------------------------------------------
+; unsigned checkasm_init_x86(char *name)
+;-----------------------------------------------------------------------------
+cglobal init_x86, 0, 5
+%if ARCH_X86_64
+    push          rbx
+%endif
+    movifnidn      t0, r0mp
+    mov           eax, 0x80000000
+    cpuid
+    cmp           eax, 0x80000004
+    jb .no_brand ; processor brand string not supported
+    mov           eax, 0x80000002
+    cpuid
+    mov     [t0+4* 0], eax
+    mov     [t0+4* 1], ebx
+    mov     [t0+4* 2], ecx
+    mov     [t0+4* 3], edx
+    mov           eax, 0x80000003
+    cpuid
+    mov     [t0+4* 4], eax
+    mov     [t0+4* 5], ebx
+    mov     [t0+4* 6], ecx
+    mov     [t0+4* 7], edx
+    mov           eax, 0x80000004
+    cpuid
+    mov     [t0+4* 8], eax
+    mov     [t0+4* 9], ebx
+    mov     [t0+4*10], ecx
+    mov     [t0+4*11], edx
+    xor           eax, eax
+    cpuid
+    jmp .check_xcr1
+.no_brand: ; use manufacturer id as a fallback
+    xor           eax, eax
+    mov      [t0+4*3], eax
+    cpuid
+    mov      [t0+4*0], ebx
+    mov      [t0+4*1], edx
+    mov      [t0+4*2], ecx
+.check_xcr1:
+    test          eax, eax
+    jz .end2 ; cpuid leaf 1 not supported
+    mov           t0d, eax ; max leaf
+    mov           eax, 1
+    cpuid
+    and           ecx, 0x18000000
+    cmp           ecx, 0x18000000
+    jne .end2 ; osxsave/avx not supported
+    cmp           t0d, 13 ; cpuid leaf 13 not supported
+    jb .end2
+    mov           t0d, eax ; cpuid signature
+    mov           eax, 13
+    mov           ecx, 1
+    cpuid
+    test           al, 0x04
+    jz .end ; xcr1 not supported
+    mov           ecx, 1
+    xgetbv
+    test           al, 0x04
+    jnz .end ; always-dirty ymm state
+%if ARCH_X86_64 == 0 && PIC
+    LEA           eax, check_vzeroupper
+    mov         [eax], ecx
+%else
+    mov [check_vzeroupper], ecx
+%endif
+.end:
+    mov           eax, t0d
+.end2:
+%if ARCH_X86_64
+    pop           rbx
+%endif
+    RET
+
+%if ARCH_X86_64
+%if WIN64
+    %define stack_param rsp+32 ; shadow space
+    %define num_fn_args rsp+stack_offset+17*8
+    %assign num_reg_args 4
+    %assign free_regs 7
+    %assign clobber_mask_stack_bit 16
+    DECLARE_REG_TMP 4
+%else
+    %define stack_param rsp
+    %define num_fn_args rsp+stack_offset+11*8
+    %assign num_reg_args 6
+    %assign free_regs 9
+    %assign clobber_mask_stack_bit 64
+    DECLARE_REG_TMP 7
+%endif
+
+%macro CLOBBER_UPPER 2 ; reg, mask_bit
+    mov          r13d, %1d
+    or            r13, r8
+    test          r9b, %2
+    cmovnz         %1, r13
+%endmacro
+
+cglobal checked_call, 2, 15, 16, max_args*8+64+8
+    mov          r10d, [num_fn_args]
+    mov            r8, 0xdeadbeef00000000
+    mov           r9d, [num_fn_args+r10*8+8] ; clobber_mask
+    mov            t0, [num_fn_args+r10*8]   ; func
+
+    ; Clobber the upper halves of 32-bit parameters
+    CLOBBER_UPPER  r0, 1
+    CLOBBER_UPPER  r1, 2
+    CLOBBER_UPPER  r2, 4
+    CLOBBER_UPPER  r3, 8
+%if UNIX64
+    CLOBBER_UPPER  r4, 16
+    CLOBBER_UPPER  r5, 32
+%else ; WIN64
+%assign i 6
+%rep 16-6
+    mova       m %+ i, [x %+ i]
+    %assign i i+1
+%endrep
+%endif
+
+    xor          r11d, r11d
+    sub          r10d, num_reg_args
+    cmovs        r10d, r11d ; num stack args
+
+    ; write stack canaries to the area above parameters passed on the stack
+    mov           r12, [rsp+stack_offset] ; return address
+    not           r12
+%assign i 0
+%rep 8 ; 64 bytes
+    mov [stack_param+(r10+i)*8], r12
+    %assign i i+1
+%endrep
+
+    test         r10d, r10d
+    jz .stack_setup_done ; no stack parameters
+.copy_stack_parameter:
+    mov           r12, [stack_param+stack_offset+8+r11*8]
+    CLOBBER_UPPER r12, clobber_mask_stack_bit
+    shr           r9d, 1
+    mov [stack_param+r11*8], r12
+    inc          r11d
+    cmp          r11d, r10d
+    jl .copy_stack_parameter
+.stack_setup_done:
+
+%assign i 14
+%rep 15-free_regs
+    mov        r %+ i, [n %+ i]
+    %assign i i-1
+%endrep
+    call           t0
+
+    ; check for stack corruption
+    mov           r0d, [num_fn_args]
+    xor           r3d, r3d
+    sub           r0d, num_reg_args
+    cmovs         r0d, r3d ; num stack args
+
+    mov            r3, [rsp+stack_offset]
+    mov            r4, [stack_param+r0*8]
+    not            r3
+    xor            r4, r3
+%assign i 1
+%rep 6
+    mov            r5, [stack_param+(r0+i)*8]
+    xor            r5, r3
+    or             r4, r5
+    %assign i i+1
+%endrep
+    xor            r3, [stack_param+(r0+7)*8]
+    or             r4, r3
+    jz .stack_ok
+    ; Save the return value located in rdx:rax first to prevent clobbering.
+    mov           r10, rax
+    mov           r11, rdx
+    lea            r0, [errmsg_stack]
+    jmp .fail
+.stack_ok:
+
+    ; check for failure to preserve registers
+%assign i 14
+%rep 15-free_regs
+    cmp        r %+ i, [n %+ i]
+    setne         r4b
+    lea           r3d, [r4+r3*2]
+    %assign i i-1
+%endrep
+%if WIN64
+    lea            r0, [rsp+32] ; account for shadow space
+    mov            r5, r0
+    test          r3d, r3d
+    jz .gpr_ok
+%else
+    test          r3d, r3d
+    jz .gpr_xmm_ok
+    mov            r0, rsp
+%endif
+%assign i free_regs
+%rep 15-free_regs
+%if i < 10
+    mov    dword [r0], " r0" + (i << 16)
+    lea            r4, [r0+3]
+%else
+    mov    dword [r0], " r10" + ((i - 10) << 24)
+    lea            r4, [r0+4]
+%endif
+    test          r3b, 1 << (i - free_regs)
+    cmovnz         r0, r4
+    %assign i i+1
+%endrep
+%if WIN64 ; xmm registers
+.gpr_ok:
+%assign i 6
+%rep 16-6
+    pxor       m %+ i, [x %+ i]
+    %assign i i+1
+%endrep
+    packsswb       m6, m7
+    packsswb       m8, m9
+    packsswb      m10, m11
+    packsswb      m12, m13
+    packsswb      m14, m15
+    packsswb       m6, m6
+    packsswb       m8, m10
+    packsswb      m12, m14
+    packsswb       m6, m6
+    packsswb       m8, m12
+    packsswb       m6, m8
+    pxor           m7, m7
+    pcmpeqb        m6, m7
+    pmovmskb      r3d, m6
+    cmp           r3d, 0xffff
+    je .xmm_ok
+    mov           r7d, " xmm"
+%assign i 6
+%rep 16-6
+    mov        [r0+0], r7d
+%if i < 10
+    mov   byte [r0+4], "0" + i
+    lea            r4, [r0+5]
+%else
+    mov   word [r0+4], "10" + ((i - 10) << 8)
+    lea            r4, [r0+6]
+%endif
+    test          r3d, 1 << i
+    cmovz          r0, r4
+    %assign i i+1
+%endrep
+.xmm_ok:
+    cmp            r0, r5
+    je .gpr_xmm_ok
+    mov     byte [r0], 0
+    mov           r11, rdx
+    mov            r1, r5
+%else
+    mov     byte [r0], 0
+    mov           r11, rdx
+    mov            r1, rsp
+%endif
+    mov           r10, rax
+    lea            r0, [errmsg_register]
+    jmp .fail
+.gpr_xmm_ok:
+    ; Check for dirty YMM state, i.e. missing vzeroupper
+    mov           ecx, [check_vzeroupper]
+    test          ecx, ecx
+    jz .ok ; not supported, skip
+    mov           r10, rax
+    mov           r11, rdx
+    xgetbv
+    test           al, 0x04
+    jz .restore_retval ; clean ymm state
+    lea            r0, [errmsg_vzeroupper]
+    vzeroupper
+.fail:
+    ; Call fail_func() with a descriptive message to mark it as a failure.
+    xor           eax, eax
+    call fail_func
+.restore_retval:
+    mov           rax, r10
+    mov           rdx, r11
+.ok:
+    RET
+
+; trigger a warmup of vector units
+%macro WARMUP 0
+cglobal warmup, 0, 0
+    xorps          m0, m0
+    mulps          m0, m0
+    RET
+%endmacro
+
+INIT_YMM avx2
+WARMUP
+INIT_ZMM avx512
+WARMUP
+
+%else
+
+; just random numbers to reduce the chance of incidental match
+%assign n3 0x6549315c
+%assign n4 0xe02f3e23
+%assign n5 0xb78d0d1d
+%assign n6 0x33627ba7
+
+;-----------------------------------------------------------------------------
+; void checkasm_checked_call(void *func, ...)
+;-----------------------------------------------------------------------------
+cglobal checked_call, 1, 7
+    mov            r3, [esp+stack_offset]      ; return address
+    mov            r1, [esp+stack_offset+17*4] ; num_stack_params
+    mov            r2, 27
+    not            r3
+    sub            r2, r1
+.push_canary:
+    push           r3
+    dec            r2
+    jg .push_canary
+.push_parameter:
+    push dword [esp+32*4]
+    dec            r1
+    jg .push_parameter
+    mov            r3, n3
+    mov            r4, n4
+    mov            r5, n5
+    mov            r6, n6
+    call           r0
+
+    ; check for failure to preserve registers
+    cmp            r3, n3
+    setne         r3h
+    cmp            r4, n4
+    setne         r3b
+    shl           r3d, 16
+    cmp            r5, n5
+    setne         r3h
+    cmp            r6, n6
+    setne         r3b
+    test           r3, r3
+    jz .gpr_ok
+    lea            r1, [esp+16]
+    mov       [esp+4], r1
+%assign i 3
+%rep 4
+    mov    dword [r1], " r0" + (i << 16)
+    lea            r4, [r1+3]
+    test           r3, 1 << ((6 - i) * 8)
+    cmovnz         r1, r4
+    %assign i i+1
+%endrep
+    mov     byte [r1], 0
+    mov            r5, eax
+    mov            r6, edx
+    LEA            r1, errmsg_register
+    jmp .fail
+.gpr_ok:
+    ; check for stack corruption
+    mov            r3, [esp+48*4] ; num_stack_params
+    mov            r6, [esp+31*4] ; return address
+    mov            r4, [esp+r3*4]
+    sub            r3, 26
+    not            r6
+    xor            r4, r6
+.check_canary:
+    mov            r5, [esp+(r3+27)*4]
+    xor            r5, r6
+    or             r4, r5
+    inc            r3
+    jl .check_canary
+    mov            r5, eax
+    mov            r6, edx
+    test           r4, r4
+    jz .stack_ok
+    LEA            r1, errmsg_stack
+    jmp .fail
+.stack_ok:
+    ; check for dirty YMM state, i.e. missing vzeroupper
+    LEA           ecx, check_vzeroupper
+    mov           ecx, [ecx]
+    test          ecx, ecx
+    jz .ok ; not supported, skip
+    xgetbv
+    test           al, 0x04
+    jz .ok ; clean ymm state
+    LEA            r1, errmsg_vzeroupper
+    vzeroupper
+.fail:
+    mov         [esp], r1
+    call fail_func
+.ok:
+    add           esp, 27*4
+    mov           eax, r5
+    mov           edx, r6
+    RET
+
+%endif ; ARCH_X86_64
diff --git a/third_party/dav1d/tests/dav1d_argon.bash b/third_party/dav1d/tests/dav1d_argon.bash
new file mode 100755
index 0000000000..0c35663834
--- /dev/null
+++ b/third_party/dav1d/tests/dav1d_argon.bash
@@ -0,0 +1,175 @@
+#!/usr/bin/env bash
+
+DAV1D="tools/dav1d"
+ARGON_DIR='.'
+FILMGRAIN=1
+CPUMASK=-1
+THREADS=0
+JOBS=1
+
+usage() {
+    NAME=$(basename "$0")
+    {
+        printf "Usage:   %s [-d dav1d] [-a argondir] [-g \$filmgrain] [-c \$cpumask] [-t threads] [-j jobs] [DIRECTORY]...\n" "$NAME"
+        printf "Example: %s -d /path/to/dav1d -a /path/to/argon/ -g 0 -c avx2 profile0_core\n" "$NAME"
+        printf "Used to verify that dav1d can decode the Argon AV1 test vectors correctly.\n\n"
+        printf " DIRECTORY one or more dirs in the argon folder to check against\n"
+        printf "             (default: everything except large scale tiles and stress files)\n"
+        printf " -d dav1d  path to dav1d executable (default: tools/dav1d)\n"
+        printf " -a dir    path to argon dir (default: 'tests/argon' if found; '.' otherwise)\n"
+        printf " -g \$num   enable filmgrain (default: 1)\n"
+        printf " -c \$mask  use restricted cpumask (default: -1)\n"
+        printf " -t \$num   number of threads per dav1d (default: 0)\n"
+        printf " -j \$num   number of parallel dav1d processes (default: 1)\n\n"
+    } >&2
+    exit 1
+}
+
+error() {
+    printf "\033[1;91m%s\033[0m\n" "$*" >&2
+    exit 1
+}
+
+fail() {
+    printf "\033[1K\rMismatch in %s\n" "$1"
+    (( failed++ ))
+}
+
+check_pids() {
+    new_pids=()
+    done_pids=()
+    for p in "${pids[@]}"; do
+        if kill -0 "$p" 2>/dev/null; then
+            new_pids+=("$p")
+        else
+            done_pids+=("$p")
+        fi
+    done
+    pids=("${new_pids[@]}")
+}
+
+wait_pids() {
+    pid_list=("$@")
+    for p in "${pid_list[@]}"; do
+        if ! wait "$p"; then
+            local file_varname="file$p"
+            fail "${!file_varname}"
+        fi
+    done
+}
+
+block_pids() {
+    while [ ${#pids[@]} -ge "$JOBS" ]; do
+        check_pids
+        if [ ${#done_pids} -eq 0 ]; then
+            sleep 0.2
+        else
+            wait_pids "${done_pids[@]}"
+        fi
+    done
+}
+
+wait_all_pids() {
+    wait_pids "${pids[@]}"
+}
+
+# find tests/argon
+tests_dir=$(dirname "$(readlink -f "$0")")
+if [ -d "$tests_dir/argon" ]; then
+    ARGON_DIR="$tests_dir/argon"
+fi
+
+while getopts ":d:a:g:c:t:j:" opt; do
+    case "$opt" in
+        d)
+            DAV1D="$OPTARG"
+            ;;
+        a)
+            ARGON_DIR="$OPTARG"
+            ;;
+        g)
+            FILMGRAIN="$OPTARG"
+            ;;
+        c)
+            CPUMASK="$OPTARG"
+            ;;
+        t)
+            THREADS="$OPTARG"
+            ;;
+        j)
+            JOBS="$OPTARG"
+            ;;
+        \?)
+            printf "Error! Invalid option: -%s\n" "$OPTARG" >&2
+            usage
+            ;;
+        *)
+            usage
+            ;;
+    esac
+done
+shift $((OPTIND-1))
+
+if [ "$#" -eq 0 ]; then
+    # Everything except large scale tiles and stress files.
+    dirs=("$ARGON_DIR/profile0_core"       "$ARGON_DIR/profile0_core_special"
+          "$ARGON_DIR/profile0_not_annexb" "$ARGON_DIR/profile0_not_annexb_special"
+          "$ARGON_DIR/profile1_core"       "$ARGON_DIR/profile1_core_special"
+          "$ARGON_DIR/profile1_not_annexb" "$ARGON_DIR/profile1_not_annexb_special"
+          "$ARGON_DIR/profile2_core"       "$ARGON_DIR/profile2_core_special"
+          "$ARGON_DIR/profile2_not_annexb" "$ARGON_DIR/profile2_not_annexb_special"
+          "$ARGON_DIR/profile_switching")
+else
+    mapfile -t dirs < <(printf "${ARGON_DIR}/%s\n" "$@" | sort -u)
+fi
+
+ver_info="dav1d $("$DAV1D" -v 2>&1) filmgrain=$FILMGRAIN cpumask=$CPUMASK" || error "Error! Can't run $DAV1D"
+files=()
+
+for d in "${dirs[@]}"; do
+    if [ -d "$d/streams" ]; then
+        files+=("${d/%\//}"/streams/*.obu)
+    fi
+done
+
+if [ ${#files[@]} -eq 0 ]; then
+    error "Error! No files found at ${dirs[*]}"
+fi
+
+failed=0
+pids=()
+for i in "${!files[@]}"; do
+    f="${files[i]}"
+    if [ "$FILMGRAIN" -eq 0 ]; then
+        md5=${f/\/streams\//\/md5_no_film_grain\/}
+    else
+        md5=${f/\/streams\//\/md5_ref\/}
+    fi
+    md5=$(<"${md5/%obu/md5}") || error "Error! Can't read md5 ${md5} for file ${f}"
+    md5=${md5/ */}
+
+    printf "\033[1K\r[%3d%% %d/%d] Verifying %s" "$(((i+1)*100/${#files[@]}))" "$((i+1))" "${#files[@]}" "$f"
+    cmd=("$DAV1D" -i "$f" --filmgrain "$FILMGRAIN" --verify "$md5" --cpumask "$CPUMASK" --threads "$THREADS" -q)
+    if [ "$JOBS" -gt 1 ]; then
+        "${cmd[@]}" 2>/dev/null &
+        p=$!
+        pids+=("$p")
+        declare "file$p=$f"
+        block_pids
+    else
+        if ! "${cmd[@]}" 2>/dev/null; then
+            fail "$f"
+        fi
+    fi
+done
+
+wait_all_pids
+
+if [ "$failed" -ne 0 ]; then
+    printf "\033[1K\r%d/%d files \033[1;91mfailed\033[0m to verify" "$failed" "${#files[@]}"
+else
+    printf "\033[1K\r%d files \033[1;92msuccessfully\033[0m verified" "${#files[@]}"
+fi
+printf " in %dm%ds (%s)\n" "$((SECONDS/60))" "$((SECONDS%60))" "$ver_info"
+
+exit $failed
diff --git a/third_party/dav1d/tests/header_test.c b/third_party/dav1d/tests/header_test.c
new file mode 100644
index 0000000000..2cd9eca7f9
--- /dev/null
+++ b/third_party/dav1d/tests/header_test.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include DAV1D_TEST_HEADER
+
+int main(void)
+{
+    return 0;
+}
diff --git a/third_party/dav1d/tests/libfuzzer/alloc_fail.c b/third_party/dav1d/tests/libfuzzer/alloc_fail.c
new file mode 100644
index 0000000000..ddd1dd71ab
--- /dev/null
+++ b/third_party/dav1d/tests/libfuzzer/alloc_fail.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "alloc_fail.h"
+
+static int fail_probability;
+
+void dav1d_setup_alloc_fail(unsigned seed, unsigned probability) {
+    srand(seed);
+
+    while (probability >= RAND_MAX)
+        probability >>= 1;
+
+    fail_probability = probability;
+}
+
+void * __wrap_malloc(size_t);
+
+void * __wrap_malloc(size_t sz) {
+    if (rand() < fail_probability)
+        return NULL;
+    return malloc(sz);
+}
+
+#if defined(HAVE_POSIX_MEMALIGN)
+int __wrap_posix_memalign(void **memptr, size_t alignment, size_t size);
+
+int __wrap_posix_memalign(void **memptr, size_t alignment, size_t size) {
+    if (rand() < fail_probability)
+        return ENOMEM;
+    return posix_memalign(memptr, alignment, size);
+}
+#else
+#error "HAVE_POSIX_MEMALIGN required"
+#endif
+
+int __wrap_pthread_create(pthread_t *, const pthread_attr_t *,
+                          void *(*) (void *), void *);
+
+int __wrap_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+                          void *(*start_routine) (void *), void *arg)
+{
+    if (rand() < (fail_probability + RAND_MAX/16))
+        return EAGAIN;
+
+    return pthread_create(thread, attr, start_routine, arg);
+}
+
+int __wrap_pthread_mutex_init(pthread_mutex_t *,
+                              const pthread_mutexattr_t *);
+
+int __wrap_pthread_mutex_init(pthread_mutex_t *restrict mutex,
+                              const pthread_mutexattr_t *restrict attr)
+{
+    if (rand() < (fail_probability + RAND_MAX/8))
+        return ENOMEM;
+
+    return pthread_mutex_init(mutex, attr);
+}
+
+int __wrap_pthread_cond_init(pthread_cond_t *,
+                             const pthread_condattr_t *);
+
+int __wrap_pthread_cond_init(pthread_cond_t *restrict cond,
+                             const pthread_condattr_t *restrict attr)
+{
+    if (rand() < (fail_probability + RAND_MAX/16))
+        return ENOMEM;
+
+    return pthread_cond_init(cond, attr);
+}
diff --git a/third_party/dav1d/tests/libfuzzer/alloc_fail.h b/third_party/dav1d/tests/libfuzzer/alloc_fail.h
new file mode 100644
index 0000000000..5ace870beb
--- /dev/null
+++ b/third_party/dav1d/tests/libfuzzer/alloc_fail.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_TESTS_LIBFUZZER_ALLOC_FAIL_H
+#define DAV1D_TESTS_LIBFUZZER_ALLOC_FAIL_H
+
+#include <dav1d/common.h>
+
+DAV1D_API void dav1d_setup_alloc_fail(unsigned seed, unsigned probability);
+
+#endif /* DAV1D_TESTS_LIBFUZZER_ALLOC_FAIL_H */
diff --git a/third_party/dav1d/tests/libfuzzer/dav1d_fuzzer.c b/third_party/dav1d/tests/libfuzzer/dav1d_fuzzer.c
new file mode 100644
index 0000000000..c894636f1e
--- /dev/null
+++ b/third_party/dav1d/tests/libfuzzer/dav1d_fuzzer.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <dav1d/dav1d.h>
+#include "src/cpu.h"
+#include "dav1d_fuzzer.h"
+
+#ifdef DAV1D_ALLOC_FAIL
+
+#include "alloc_fail.h"
+
+static unsigned djb_xor(const uint8_t * c, size_t len) {
+    unsigned hash = 5381;
+    for(size_t i = 0; i < len; i++)
+        hash = hash * 33 ^ c[i];
+    return hash;
+}
+#endif
+
+static unsigned r32le(const uint8_t *const p) {
+    return ((uint32_t)p[3] << 24U) | (p[2] << 16U) | (p[1] << 8U) | p[0];
+}
+
+#define DAV1D_FUZZ_MAX_SIZE 4096 * 4096
+
+// search for "--cpumask xxx" in argv and remove both parameters
+int LLVMFuzzerInitialize(int *argc, char ***argv) {
+    int i = 1;
+    for (; i < *argc; i++) {
+        if (!strcmp((*argv)[i], "--cpumask")) {
+            const char * cpumask = (*argv)[i+1];
+            if (cpumask) {
+                char *end;
+                unsigned res;
+                if (!strncmp(cpumask, "0x", 2)) {
+                    cpumask += 2;
+                    res = (unsigned) strtoul(cpumask, &end, 16);
+                } else {
+                    res = (unsigned) strtoul(cpumask, &end, 0);
+                }
+                if (end != cpumask && !end[0]) {
+                    dav1d_set_cpu_flags_mask(res);
+                }
+            }
+            break;
+        }
+    }
+
+    for (; i < *argc - 2; i++) {
+        (*argv)[i] = (*argv)[i + 2];
+    }
+
+    *argc = i;
+
+    return 0;
+}
+
+
+// expects ivf input
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
+{
+    Dav1dSettings settings = { 0 };
+    Dav1dContext * ctx = NULL;
+    Dav1dPicture pic;
+    const uint8_t *ptr = data;
+    int have_seq_hdr = 0;
+    int err;
+
+    dav1d_version();
+
+    if (size < 32) goto end;
+#ifdef DAV1D_ALLOC_FAIL
+    unsigned h = djb_xor(ptr, 32);
+    unsigned seed = h;
+    unsigned probability = h > (RAND_MAX >> 5) ? RAND_MAX >> 5 : h;
+    int max_frame_delay = (h & 0xf) + 1;
+    int n_threads = ((h >> 4) & 0x7) + 1;
+    if (max_frame_delay > 5) max_frame_delay = 1;
+    if (n_threads > 3) n_threads = 1;
+#endif
+    ptr += 32; // skip ivf header
+
+    dav1d_default_settings(&settings);
+
+#ifdef DAV1D_MT_FUZZING
+    settings.max_frame_delay = settings.n_threads = 4;
+#elif defined(DAV1D_ALLOC_FAIL)
+    settings.max_frame_delay = max_frame_delay;
+    settings.n_threads = n_threads;
+    dav1d_setup_alloc_fail(seed, probability);
+#else
+    settings.max_frame_delay = settings.n_threads = 1;
+#endif
+#if defined(DAV1D_FUZZ_MAX_SIZE)
+    settings.frame_size_limit = DAV1D_FUZZ_MAX_SIZE;
+#endif
+
+    err = dav1d_open(&ctx, &settings);
+    if (err < 0) goto end;
+
+    while (ptr <= data + size - 12) {
+        Dav1dData buf;
+        uint8_t *p;
+
+        size_t frame_size = r32le(ptr);
+        ptr += 12;
+
+        if (frame_size > size || ptr > data + size - frame_size)
+            break;
+
+        if (!frame_size) continue;
+
+        if (!have_seq_hdr) {
+            Dav1dSequenceHeader seq = { 0 };
+            int err = dav1d_parse_sequence_header(&seq, ptr, frame_size);
+            // skip frames until we see a sequence header
+            if  (err != 0) {
+                ptr += frame_size;
+                continue;
+            }
+            have_seq_hdr = 1;
+        }
+
+        // copy frame data to a new buffer to catch reads past the end of input
+        p = dav1d_data_create(&buf, frame_size);
+        if (!p) goto cleanup;
+        memcpy(p, ptr, frame_size);
+        ptr += frame_size;
+
+        do {
+            if ((err = dav1d_send_data(ctx, &buf)) < 0) {
+                if (err != DAV1D_ERR(EAGAIN))
+                    break;
+            }
+            memset(&pic, 0, sizeof(pic));
+            err = dav1d_get_picture(ctx, &pic);
+            if (err == 0) {
+                dav1d_picture_unref(&pic);
+            } else if (err != DAV1D_ERR(EAGAIN)) {
+                break;
+            }
+        } while (buf.sz > 0);
+
+        if (buf.sz > 0)
+            dav1d_data_unref(&buf);
+    }
+
+    memset(&pic, 0, sizeof(pic));
+    if ((err = dav1d_get_picture(ctx, &pic)) == 0) {
+        /* Test calling dav1d_picture_unref() after dav1d_close() */
+        do {
+            Dav1dPicture pic2 = { 0 };
+            if ((err = dav1d_get_picture(ctx, &pic2)) == 0)
+                dav1d_picture_unref(&pic2);
+        } while (err != DAV1D_ERR(EAGAIN));
+
+        dav1d_close(&ctx);
+        dav1d_picture_unref(&pic);
+        return 0;
+    }
+
+cleanup:
+    dav1d_close(&ctx);
+end:
+    return 0;
+}
diff --git a/third_party/dav1d/tests/libfuzzer/dav1d_fuzzer.h b/third_party/dav1d/tests/libfuzzer/dav1d_fuzzer.h
new file mode 100644
index 0000000000..0cbbad46b0
--- /dev/null
+++ b/third_party/dav1d/tests/libfuzzer/dav1d_fuzzer.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_TESTS_LIBFUZZER_DAV1D_FUZZER_H
+#define DAV1D_TESTS_LIBFUZZER_DAV1D_FUZZER_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+int LLVMFuzzerInitialize(int *argc, char ***argv);
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
+
+#endif /* DAV1D_TESTS_LIBFUZZER_DAV1D_FUZZER_H */
diff --git a/third_party/dav1d/tests/libfuzzer/main.c b/third_party/dav1d/tests/libfuzzer/main.c
new file mode 100644
index 0000000000..8647738666
--- /dev/null
+++ b/third_party/dav1d/tests/libfuzzer/main.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "dav1d_fuzzer.h"
+
+// expects ivf input
+
+int main(int argc, char *argv[]) {
+    int ret = -1;
+    FILE *f = NULL;
+    int64_t fsize;
+    const char *filename = NULL;
+    uint8_t *data = NULL;
+    size_t size = 0;
+
+    if (LLVMFuzzerInitialize(&argc, &argv)) {
+        return 1;
+    }
+
+    if (argc != 2) {
+        fprintf(stdout, "Usage:\n%s fuzzing_testcase.ivf\n", argv[0]);
+        return -1;
+    }
+    filename = argv[1];
+
+    if (!(f = fopen(filename, "rb"))) {
+        fprintf(stderr, "failed to open %s: %s\n", filename, strerror(errno));
+        goto error;
+    }
+
+    if (fseeko(f, 0, SEEK_END) == -1) {
+        fprintf(stderr, "fseek(%s, 0, SEEK_END) failed: %s\n", filename,
+                strerror(errno));
+        goto error;
+    }
+    if ((fsize = ftello(f)) == -1) {
+        fprintf(stderr, "ftell(%s) failed: %s\n", filename, strerror(errno));
+        goto error;
+    }
+    rewind(f);
+
+    if (fsize < 0 || fsize > INT_MAX) {
+        fprintf(stderr, "%s is too large: %"PRId64"\n", filename, fsize);
+        goto error;
+    }
+    size = (size_t)fsize;
+
+    if (!(data = malloc(size))) {
+        fprintf(stderr, "failed to allocate: %zu bytes\n", size);
+        goto error;
+    }
+
+    if (fread(data, size, 1, f) == size) {
+        fprintf(stderr, "failed to read %zu bytes from %s: %s\n", size,
+                filename, strerror(errno));
+        goto error;
+    }
+
+    ret = LLVMFuzzerTestOneInput(data, size);
+
+error:
+    free(data);
+    if (f) fclose(f);
+    return ret;
+}
diff --git a/third_party/dav1d/tests/libfuzzer/meson.build b/third_party/dav1d/tests/libfuzzer/meson.build
new file mode 100644
index 0000000000..45d28562c1
--- /dev/null
+++ b/third_party/dav1d/tests/libfuzzer/meson.build
@@ -0,0 +1,101 @@
+# Copyright © 2020, VideoLAN and dav1d authors
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#
+# Build definition for the dav1d fuzzing binaries
+#
+
+if fuzzing_engine == 'none' and not have_fseeko
+    subdir_done()
+endif
+
+dav1d_fuzzer_sources =  files('dav1d_fuzzer.c')
+fuzzer_ldflags = []
+fuzzer_link_lang = {}
+
+if get_option('fuzzer_ldflags') != ''
+    fuzzer_ldflags += [get_option('fuzzer_ldflags')]
+endif
+
+if fuzzing_engine == 'none'
+    dav1d_fuzzer_sources += files('main.c')
+elif fuzzing_engine == 'libfuzzer'
+    fuzzer_ldflags += ['-fsanitize=fuzzer']
+elif fuzzing_engine == 'oss-fuzz'
+    # libFuzzingEngine needs c++
+    add_languages('cpp')
+    fuzzer_link_lang = {'link_language': 'cpp'}
+endif
+
+dav1d_fuzzer = executable('dav1d_fuzzer',
+    dav1d_fuzzer_sources,
+    include_directories: dav1d_inc_dirs,
+    link_args: fuzzer_ldflags,
+    link_with : libdav1d,
+    build_by_default: true,
+    dependencies : [thread_dependency],
+    kwargs: fuzzer_link_lang
+    )
+
+dav1d_fuzzer_mt = executable('dav1d_fuzzer_mt',
+    dav1d_fuzzer_sources,
+    include_directories: dav1d_inc_dirs,
+    c_args: ['-DDAV1D_MT_FUZZING'],
+    link_args: fuzzer_ldflags,
+    link_with : libdav1d,
+    build_by_default: true,
+    dependencies : [thread_dependency],
+    kwargs: fuzzer_link_lang
+    )
+
+objcopy = find_program('objcopy',
+                       required: false)
+if (objcopy.found() and
+    not get_option('b_lto') and
+    get_option('default_library') == 'static' and
+    cc.has_function('posix_memalign', prefix : '#include <stdlib.h>', args : test_args))
+
+    libdav1d_af = custom_target('libdav1d_af',
+                                input: libdav1d,
+                                output: 'libdav1d_af.a',
+                                depends: libdav1d,
+                                command: [objcopy,
+                                          '--redefine-sym', 'malloc=__wrap_malloc',
+                                          '--redefine-sym', 'posix_memalign=__wrap_posix_memalign',
+                                          '--redefine-sym', 'pthread_create=__wrap_pthread_create',
+                                          '--redefine-sym', 'pthread_cond_init=__wrap_pthread_cond_init',
+                                          '--redefine-sym', 'pthread_mutex_init=__wrap_pthread_mutex_init',
+                                          '@INPUT@', '@OUTPUT@'])
+
+    dav1d_fuzzer_mem = executable('dav1d_fuzzer_mem',
+        dav1d_fuzzer_sources + ['alloc_fail.c'],
+        include_directories: dav1d_inc_dirs,
+        c_args: ['-DDAV1D_ALLOC_FAIL'],
+        link_args: fuzzer_ldflags + [join_paths(libdav1d_af.full_path())],
+        link_depends: libdav1d_af,
+        build_by_default: false,
+        dependencies : [thread_dependency],
+        kwargs: fuzzer_link_lang
+        )
+endif
diff --git a/third_party/dav1d/tests/meson.build b/third_party/dav1d/tests/meson.build
new file mode 100644
index 0000000000..90de417727
--- /dev/null
+++ b/third_party/dav1d/tests/meson.build
@@ -0,0 +1,152 @@
+# Copyright © 2018, VideoLAN and dav1d authors
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#
+# Build definition for the dav1d tests
+#
+
+# Leave subdir if tests are disabled
+if not get_option('enable_tests')
+    subdir_done()
+endif
+
+if is_asm_enabled
+    checkasm_sources = files(
+        'checkasm/checkasm.c',
+        'checkasm/msac.c',
+        'checkasm/refmvs.c',
+    )
+
+    checkasm_tmpl_sources = files(
+        'checkasm/cdef.c',
+        'checkasm/filmgrain.c',
+        'checkasm/ipred.c',
+        'checkasm/itx.c',
+        'checkasm/loopfilter.c',
+        'checkasm/looprestoration.c',
+        'checkasm/mc.c',
+    )
+
+    checkasm_bitdepth_objs = []
+    foreach bitdepth : dav1d_bitdepths
+        checkasm_bitdepth_lib = static_library(
+            'checkasm_bitdepth_@0@'.format(bitdepth),
+            checkasm_tmpl_sources,
+            include_directories: dav1d_inc_dirs,
+            dependencies : [stdatomic_dependencies],
+            c_args: ['-DBITDEPTH=@0@'.format(bitdepth)],
+            install: false,
+            build_by_default: false,
+        )
+        checkasm_bitdepth_objs += checkasm_bitdepth_lib.extract_all_objects(recursive: true)
+    endforeach
+
+    checkasm_asm_objs = []
+    checkasm_asm_sources = []
+    if host_machine.cpu_family() == 'aarch64' or host_machine.cpu() == 'arm64'
+        checkasm_asm_sources += files('checkasm/arm/checkasm_64.S')
+    elif host_machine.cpu_family().startswith('arm')
+        checkasm_asm_sources += files('checkasm/arm/checkasm_32.S')
+    elif host_machine.cpu_family().startswith('x86')
+        checkasm_asm_objs += nasm_gen.process(files('checkasm/x86/checkasm.asm'))
+    endif
+
+    if use_gaspp
+        checkasm_asm_objs += gaspp_gen.process(checkasm_asm_sources)
+    else
+        checkasm_sources += checkasm_asm_sources
+    endif
+
+    checkasm = executable('checkasm',
+        checkasm_sources,
+        checkasm_asm_objs,
+
+        objects: [
+            checkasm_bitdepth_objs,
+            libdav1d.extract_all_objects(recursive: true),
+            ],
+
+        include_directories: dav1d_inc_dirs,
+        build_by_default: false,
+        dependencies : [
+            thread_dependency,
+            rt_dependency,
+            libdl_dependency,
+            libm_dependency,
+            ],
+        )
+
+    test('checkasm', checkasm, suite: 'checkasm', timeout: 180, is_parallel: false)
+    benchmark('checkasm', checkasm, suite: 'checkasm', timeout: 3600, args: '--bench')
+endif
+
+c99_extension_flag = cc.first_supported_argument(
+    '-Werror=c11-extensions',
+    '-Werror=c99-c11-compat',
+    '-Wc11-extensions',
+    '-Wc99-c11-compat',
+)
+
+# dav1d_api_headers
+foreach header : dav1d_api_headers
+    target = header + '_test'
+
+    header_test_exe = executable(target,
+        'header_test.c',
+        include_directories: dav1d_inc_dirs,
+        c_args: ['-DDAV1D_TEST_HEADER="@0@"'.format(header), c99_extension_flag],
+        build_by_default: true
+    )
+
+    test(target, header_test_exe, suite: 'headers')
+endforeach
+
+
+# fuzzing binaries
+subdir('libfuzzer')
+
+# seek stress test binary, depends on dav1d cli tool
+if get_option('enable_tools')
+    seek_stress_sources = files('seek_stress.c')
+    seek_stress = executable('seek_stress',
+        seek_stress_sources, rev_target,
+        objects: [
+            dav1d.extract_objects('dav1d_cli_parse.c'),
+            dav1d_input_objs.extract_objects('input/input.c', 'input/ivf.c'),
+        ],
+        include_directories: [dav1d_inc_dirs, include_directories('../tools')],
+        link_with: libdav1d,
+        dependencies: [
+            thread_dependency,
+            rt_dependency,
+            getopt_dependency,
+            libm_dependency,
+        ],
+    )
+endif
+
+# Include dav1d test data repository with additional tests
+if get_option('testdata_tests')
+    subdir('dav1d-test-data')
+endif
diff --git a/third_party/dav1d/tests/seek_stress.c b/third_party/dav1d/tests/seek_stress.c
new file mode 100644
index 0000000000..a85ec86886
--- /dev/null
+++ b/third_party/dav1d/tests/seek_stress.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright © 2020, VideoLAN and dav1d authors
+ * Copyright © 2020, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "vcs_version.h"
+#include "cli_config.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "dav1d/dav1d.h"
+#include "input/input.h"
+#include "input/demuxer.h"
+#include "dav1d_cli_parse.h"
+
+#define NUM_RAND_SEEK 3
+#define NUM_REL_SEEK  4
+#define NUM_END_SEEK  2
+
+const Demuxer annexb_demuxer = { .name = "" };
+const Demuxer section5_demuxer = { .name = "" };
+
+#ifdef _WIN32
+#include <windows.h>
+static unsigned get_seed(void) {
+    return GetTickCount();
+}
+#else
+#ifdef __APPLE__
+#include <mach/mach_time.h>
+#else
+#include <time.h>
+#endif
+static unsigned get_seed(void) {
+#ifdef __APPLE__
+    return (unsigned) mach_absolute_time();
+#elif defined(HAVE_CLOCK_GETTIME)
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (unsigned) (1000000000ULL * ts.tv_sec + ts.tv_nsec);
+#endif
+}
+#endif
+
+static uint32_t xs_state[4];
+
+static void xor128_srand(unsigned seed) {
+    xs_state[0] = seed;
+    xs_state[1] = ( seed & 0xffff0000) | (~seed & 0x0000ffff);
+    xs_state[2] = (~seed & 0xffff0000) | ( seed & 0x0000ffff);
+    xs_state[3] = ~seed;
+}
+
+// xor128 from Marsaglia, George (July 2003). "Xorshift RNGs".
+//             Journal of Statistical Software. 8 (14).
+//             doi:10.18637/jss.v008.i14.
+static int xor128_rand(void) {
+    const uint32_t x = xs_state[0];
+    const uint32_t t = x ^ (x << 11);
+
+    xs_state[0] = xs_state[1];
+    xs_state[1] = xs_state[2];
+    xs_state[2] = xs_state[3];
+    uint32_t w = xs_state[3];
+
+    w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
+    xs_state[3] = w;
+
+    return w >> 1;
+}
+
+static inline int decode_frame(Dav1dPicture *const p,
+                               Dav1dContext *const c, Dav1dData *const data)
+{
+    int res;
+    memset(p, 0, sizeof(*p));
+    if ((res = dav1d_send_data(c, data)) < 0) {
+        if (res != DAV1D_ERR(EAGAIN)) {
+            fprintf(stderr, "Error decoding frame: %s\n",
+                    strerror(DAV1D_ERR(res)));
+            return res;
+        }
+    }
+    if ((res = dav1d_get_picture(c, p)) < 0) {
+        if (res != DAV1D_ERR(EAGAIN)) {
+            fprintf(stderr, "Error decoding frame: %s\n",
+                    strerror(DAV1D_ERR(res)));
+            return res;
+        }
+    } else dav1d_picture_unref(p);
+    return 0;
+}
+
+static int decode_rand(DemuxerContext *const in, Dav1dContext *const c,
+                       Dav1dData *const data, const double fps)
+{
+    int res = 0;
+    Dav1dPicture p;
+    const int num_frames = xor128_rand() % (int)(fps * 5);
+    for (int i = 0; i < num_frames; i++) {
+        if ((res = decode_frame(&p, c, data))) break;
+        if (input_read(in, data) || data->sz == 0) break;
+    }
+    return res;
+}
+
+static int decode_all(DemuxerContext *const in,
+                      Dav1dContext *const c, Dav1dData *const data)
+{
+    int res = 0;
+    Dav1dPicture p;
+    do { if ((res = decode_frame(&p, c, data))) break;
+    } while (!input_read(in, data) && data->sz > 0);
+    return res;
+}
+
+static int seek(DemuxerContext *const in, Dav1dContext *const c,
+                const uint64_t pts, Dav1dData *const data)
+{
+    int res;
+    if ((res = input_seek(in, pts))) return res;
+    Dav1dSequenceHeader seq;
+    do { if ((res = input_read(in, data))) break;
+    } while (dav1d_parse_sequence_header(&seq, data->data, data->sz));
+    dav1d_flush(c);
+    return res;
+}
+
+int main(const int argc, char *const *const argv) {
+    const char *version = dav1d_version();
+    if (strcmp(version, DAV1D_VERSION)) {
+        fprintf(stderr, "Version mismatch (library: %s, executable: %s)\n",
+                version, DAV1D_VERSION);
+        return EXIT_FAILURE;
+    }
+
+    CLISettings cli_settings;
+    Dav1dSettings lib_settings;
+    DemuxerContext *in;
+    Dav1dContext *c;
+    Dav1dData data;
+    unsigned total, i_fps[2], i_timebase[2];
+    double timebase, spf, fps;
+    uint64_t pts;
+
+    xor128_srand(get_seed());
+    parse(argc, argv, &cli_settings, &lib_settings);
+
+    if (input_open(&in, "ivf", cli_settings.inputfile,
+                   i_fps, &total, i_timebase) < 0 ||
+        !i_timebase[0] || !i_timebase[1] ||  !i_fps[0] || !i_fps[1])
+    {
+        return EXIT_SUCCESS;
+    }
+    if (dav1d_open(&c, &lib_settings))
+        return EXIT_FAILURE;
+
+    timebase = (double)i_timebase[1] / i_timebase[0];
+    spf = (double)i_fps[1] / i_fps[0];
+    fps = (double)i_fps[0] / i_fps[1];
+    if (fps < 1) goto end;
+
+#define FRAME_OFFSET_TO_PTS(foff) \
+    (uint64_t)llround(((foff) * spf) * 1000000000.0)
+#define TS_TO_PTS(ts) \
+    (uint64_t)llround(((ts) * timebase) * 1000000000.0)
+
+    // seek at random pts
+    for (int i = 0; i < NUM_RAND_SEEK; i++) {
+        pts = FRAME_OFFSET_TO_PTS(xor128_rand() % total);
+        if (seek(in, c, pts, &data)) continue;
+        if (decode_rand(in, c, &data, fps)) goto end;
+    }
+    pts = TS_TO_PTS(data.m.timestamp);
+
+    // seek left / right randomly with random intervals within 1s
+    for (int i = 0, tries = 0;
+         i - tries < NUM_REL_SEEK && tries < NUM_REL_SEEK / 2;
+         i++)
+    {
+        const int sign = xor128_rand() & 1 ? -1 : +1;
+        const float diff = (xor128_rand() % 100) / 100.f;
+        int64_t new_pts = pts + sign * FRAME_OFFSET_TO_PTS(diff * fps);
+        const int64_t new_ts = llround(new_pts / (timebase * 1000000000.0));
+        new_pts = TS_TO_PTS(new_ts);
+        if (new_pts < 0 || (uint64_t)new_pts >= FRAME_OFFSET_TO_PTS(total)) {
+            if (seek(in, c, FRAME_OFFSET_TO_PTS(total / 2), &data)) break;
+            pts = TS_TO_PTS(data.m.timestamp);
+            tries++;
+            continue;
+        }
+        if (seek(in, c, new_pts, &data))
+            if (seek(in, c, 0, &data)) goto end;
+        if (decode_rand(in, c, &data, fps)) goto end;
+        pts = TS_TO_PTS(data.m.timestamp);
+    }
+
+    unsigned shift = 0;
+    do {
+        shift += 5;
+        if (shift > total)
+            shift = total;
+    } while (seek(in, c, FRAME_OFFSET_TO_PTS(total - shift), &data));
+
+    // simulate seeking after the end of the file
+    for (int i = 0; i < NUM_END_SEEK; i++) {
+        if (seek(in, c, FRAME_OFFSET_TO_PTS(total - shift), &data)) goto end;
+        if (decode_all(in, c, &data)) goto end;
+        int num_flush = 1 + 64 + xor128_rand() % 64;
+        while (num_flush--) dav1d_flush(c);
+    }
+
+end:
+    input_close(in);
+    dav1d_close(&c);
+    return EXIT_SUCCESS;
+}